]> git.proxmox.com Git - pve-qemu-kvm.git/blame - debian/patches/0004-introduce-new-vma-archive-format.patch
bump version to 1.4-4
[pve-qemu-kvm.git] / debian / patches / 0004-introduce-new-vma-archive-format.patch
CommitLineData
89af8a77 1From 5476ae43806488e74cd293bbaa17f130aa53d402 Mon Sep 17 00:00:00 2001
5ad5891c
DM
2From: Dietmar Maurer <dietmar@proxmox.com>
3Date: Tue, 13 Nov 2012 11:11:38 +0100
89af8a77 4Subject: [PATCH v4 4/6] introduce new vma archive format
5ad5891c
DM
5
6This is a very simple archive format, see docs/specs/vma_spec.txt
7
8Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
9---
10 Makefile | 3 +-
11 Makefile.objs | 2 +-
92bf040c 12 backup.h | 1 +
5ad5891c
DM
13 blockdev.c | 6 +-
14 docs/specs/vma_spec.txt | 24 ++
92bf040c
DM
15 vma-reader.c | 799 ++++++++++++++++++++++++++++++++++++++++
16 vma-writer.c | 932 +++++++++++++++++++++++++++++++++++++++++++++++
17 vma.c | 559 ++++++++++++++++++++++++++++
55827521 18 vma.h | 145 ++++++++
92bf040c 19 9 files changed, 2467 insertions(+), 4 deletions(-)
5ad5891c
DM
20 create mode 100644 docs/specs/vma_spec.txt
21 create mode 100644 vma-reader.c
22 create mode 100644 vma-writer.c
23 create mode 100644 vma.c
24 create mode 100644 vma.h
25
26diff --git a/Makefile b/Makefile
92bf040c 27index 0d9099a..16f1c25 100644
5ad5891c
DM
28--- a/Makefile
29+++ b/Makefile
92bf040c
DM
30@@ -115,7 +115,7 @@ ifeq ($(CONFIG_SMARTCARD_NSS),y)
31 include $(SRC_PATH)/libcacard/Makefile
32 endif
5ad5891c
DM
33
34-all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
35+all: $(DOCS) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all
36
37 config-host.h: config-host.h-timestamp
38 config-host.h-timestamp: config-host.mak
92bf040c
DM
39@@ -167,6 +167,7 @@ qemu-img.o: qemu-img-cmds.h
40 qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
41 qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
42 qemu-io$(EXESUF): qemu-io.o cmd.o $(block-obj-y) libqemuutil.a libqemustub.a
43+vma$(EXESUF): vma.o vma-writer.o vma-reader.o $(block-obj-y) libqemuutil.a libqemustub.a
5ad5891c
DM
44
45 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
46
47diff --git a/Makefile.objs b/Makefile.objs
89af8a77 48index df64f70..91f133b 100644
5ad5891c
DM
49--- a/Makefile.objs
50+++ b/Makefile.objs
92bf040c
DM
51@@ -13,7 +13,7 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
52 block-obj-$(CONFIG_WIN32) += aio-win32.o
53 block-obj-y += block/
54 block-obj-y += qapi-types.o qapi-visit.o
5ad5891c
DM
55-block-obj-y += backup.o
56+block-obj-y += vma-writer.o backup.o
92bf040c
DM
57
58 block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
59 block-obj-y += qemu-coroutine-sleep.o
60diff --git a/backup.h b/backup.h
89af8a77 61index c8ba153..406f011 100644
92bf040c
DM
62--- a/backup.h
63+++ b/backup.h
64@@ -15,6 +15,7 @@
65 #define QEMU_BACKUP_H
66
67 #include <uuid/uuid.h>
68+#include "block/block.h"
69
70 #define BACKUP_CLUSTER_BITS 16
71 #define BACKUP_CLUSTER_SIZE (1<<BACKUP_CLUSTER_BITS)
5ad5891c 72diff --git a/blockdev.c b/blockdev.c
89af8a77 73index c340fde..1cfc780 100644
5ad5891c
DM
74--- a/blockdev.c
75+++ b/blockdev.c
76@@ -21,6 +21,7 @@
77 #include "trace.h"
92bf040c 78 #include "sysemu/arch_init.h"
5ad5891c
DM
79 #include "backup.h"
80+#include "vma.h"
81
82 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
83
89af8a77 84@@ -1530,10 +1531,11 @@ char *qmp_backup(const char *backup_file, bool has_format, BackupFormat format,
5ad5891c 85 /* Todo: try to auto-detect format based on file name */
3055eeb4 86 format = has_format ? format : BACKUP_FORMAT_VMA;
5ad5891c
DM
87
88- /* fixme: find driver for specifued format */
89 const BackupDriver *driver = NULL;
90
91- if (!driver) {
3055eeb4 92+ if (format == BACKUP_FORMAT_VMA) {
5ad5891c
DM
93+ driver = &backup_vma_driver;
94+ } else {
3055eeb4 95 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
5ad5891c 96 return NULL;
3055eeb4 97 }
5ad5891c
DM
98diff --git a/docs/specs/vma_spec.txt b/docs/specs/vma_spec.txt
99new file mode 100644
100index 0000000..052c629
101--- /dev/null
102+++ b/docs/specs/vma_spec.txt
103@@ -0,0 +1,24 @@
104+=Virtual Machine Archive format (VMA)=
105+
106+This format contains a header which includes the VM configuration as
107+binary blobs, and a list of devices (dev_id, name).
108+
109+The actual VM image data is stored inside extents. An extent contains
110+up to 64 clusters, and start with a 512 byte header containing
111+additional information for those clusters.
112+
113+We use a cluster size of 65536, and use 8 bytes for each
114+cluster in the header to store the following information:
115+
116+* 1 byte dev_id (to identity the drive)
117+* 2 bytes zero indicator (mark zero regions (16x4096))
118+* 4 bytes cluster number
119+* 1 byte not used (reserved)
120+
121+We only store non-zero blocks (such block is 4096 bytes).
122+
123+Each archive is marked with a uuid. The archive header and all
124+extent headers includes that uuid and a MD5 checksum (over header
125+data).
126+
127+
128diff --git a/vma-reader.c b/vma-reader.c
129new file mode 100644
92bf040c 130index 0000000..7e81847
5ad5891c
DM
131--- /dev/null
132+++ b/vma-reader.c
92bf040c 133@@ -0,0 +1,799 @@
5ad5891c
DM
134+/*
135+ * VMA: Virtual Machine Archive
136+ *
137+ * Copyright (C) 2012 Proxmox Server Solutions
138+ *
139+ * Authors:
140+ * Dietmar Maurer (dietmar@proxmox.com)
141+ *
142+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
143+ * See the COPYING file in the top-level directory.
144+ *
145+ */
146+
147+#include <stdio.h>
148+#include <errno.h>
149+#include <unistd.h>
150+#include <stdio.h>
151+#include <string.h>
152+#include <sys/types.h>
153+#include <sys/stat.h>
154+#include <fcntl.h>
155+#include <glib.h>
156+#include <uuid/uuid.h>
157+
158+#include "qemu-common.h"
92bf040c 159+#include "qemu/timer.h"
5ad5891c
DM
160+#include "qemu/ratelimit.h"
161+#include "vma.h"
92bf040c 162+#include "block/block.h"
5ad5891c
DM
163+
164+#define BITS_PER_LONG (sizeof(unsigned long) * 8)
165+
166+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
167+
168+typedef struct VmaRestoreState {
169+ BlockDriverState *bs;
170+ bool write_zeroes;
171+ unsigned long *bitmap;
172+ int bitmap_size;
173+} VmaRestoreState;
174+
175+struct VmaReader {
176+ int fd;
177+ GChecksum *md5csum;
178+ GHashTable *blob_hash;
179+ unsigned char *head_data;
180+ VmaDeviceInfo devinfo[256];
181+ VmaRestoreState rstate[256];
182+ GList *cdata_list;
183+ guint8 vmstate_stream;
184+ uint32_t vmstate_clusters;
4244016d
DM
185+ /* to show restore percentage if run with -v */
186+ time_t start_time;
187+ int64_t cluster_count;
188+ int64_t clusters_read;
189+ int clusters_read_per;
5ad5891c
DM
190+};
191+
192+static guint
193+g_int32_hash(gconstpointer v)
194+{
195+ return *(const uint32_t *)v;
196+}
197+
198+static gboolean
199+g_int32_equal(gconstpointer v1, gconstpointer v2)
200+{
201+ return *((const uint32_t *)v1) == *((const uint32_t *)v2);
202+}
203+
204+static int vma_reader_get_bitmap(VmaRestoreState *rstate, int64_t cluster_num)
205+{
206+ assert(rstate);
207+ assert(rstate->bitmap);
208+
209+ unsigned long val, idx, bit;
210+
211+ idx = cluster_num / BITS_PER_LONG;
212+
213+ assert(rstate->bitmap_size > idx);
214+
215+ bit = cluster_num % BITS_PER_LONG;
216+ val = rstate->bitmap[idx];
217+
218+ return !!(val & (1UL << bit));
219+}
220+
221+static void vma_reader_set_bitmap(VmaRestoreState *rstate, int64_t cluster_num,
222+ int dirty)
223+{
224+ assert(rstate);
225+ assert(rstate->bitmap);
226+
227+ unsigned long val, idx, bit;
228+
229+ idx = cluster_num / BITS_PER_LONG;
230+
231+ assert(rstate->bitmap_size > idx);
232+
233+ bit = cluster_num % BITS_PER_LONG;
234+ val = rstate->bitmap[idx];
235+ if (dirty) {
236+ if (!(val & (1UL << bit))) {
237+ val |= 1UL << bit;
238+ }
239+ } else {
240+ if (val & (1UL << bit)) {
241+ val &= ~(1UL << bit);
242+ }
243+ }
244+ rstate->bitmap[idx] = val;
245+}
246+
247+typedef struct VmaBlob {
248+ uint32_t start;
249+ uint32_t len;
250+ void *data;
251+} VmaBlob;
252+
253+static const VmaBlob *get_header_blob(VmaReader *vmar, uint32_t pos)
254+{
255+ assert(vmar);
256+ assert(vmar->blob_hash);
257+
258+ return g_hash_table_lookup(vmar->blob_hash, &pos);
259+}
260+
261+static const char *get_header_str(VmaReader *vmar, uint32_t pos)
262+{
263+ const VmaBlob *blob = get_header_blob(vmar, pos);
264+ if (!blob) {
265+ return NULL;
266+ }
267+ const char *res = (char *)blob->data;
268+ if (res[blob->len-1] != '\0') {
269+ return NULL;
270+ }
271+ return res;
272+}
273+
274+static ssize_t
275+safe_read(int fd, unsigned char *buf, size_t count)
276+{
277+ ssize_t n;
278+
279+ do {
280+ n = read(fd, buf, count);
281+ } while (n < 0 && errno == EINTR);
282+
283+ return n;
284+}
285+
286+static ssize_t
287+full_read(int fd, unsigned char *buf, size_t len)
288+{
289+ ssize_t n;
290+ size_t total;
291+
292+ total = 0;
293+
294+ while (len > 0) {
295+ n = safe_read(fd, buf, len);
296+
297+ if (n == 0) {
298+ return total;
299+ }
300+
301+ if (n <= 0) {
302+ break;
303+ }
304+
305+ buf += n;
306+ total += n;
307+ len -= n;
308+ }
309+
310+ if (len) {
311+ return -1;
312+ }
313+
314+ return total;
315+}
316+
317+void vma_reader_destroy(VmaReader *vmar)
318+{
319+ assert(vmar);
320+
321+ if (vmar->fd >= 0) {
322+ close(vmar->fd);
323+ }
324+
325+ if (vmar->cdata_list) {
326+ g_list_free(vmar->cdata_list);
327+ }
328+
329+ int i;
330+ for (i = 1; i < 256; i++) {
331+ if (vmar->rstate[i].bitmap) {
332+ g_free(vmar->rstate[i].bitmap);
333+ }
334+ }
335+
336+ if (vmar->md5csum) {
337+ g_checksum_free(vmar->md5csum);
338+ }
339+
340+ if (vmar->blob_hash) {
341+ g_hash_table_destroy(vmar->blob_hash);
342+ }
343+
344+ if (vmar->head_data) {
345+ g_free(vmar->head_data);
346+ }
347+
348+ g_free(vmar);
349+
350+};
351+
352+static int vma_reader_read_head(VmaReader *vmar, Error **errp)
353+{
354+ assert(vmar);
355+ assert(errp);
356+ assert(*errp == NULL);
357+
358+ unsigned char md5sum[16];
359+ int i;
360+ int ret = 0;
361+
362+ vmar->head_data = g_malloc(sizeof(VmaHeader));
363+
364+ if (full_read(vmar->fd, vmar->head_data, sizeof(VmaHeader)) !=
365+ sizeof(VmaHeader)) {
366+ error_setg(errp, "can't read vma header - %s",
367+ errno ? strerror(errno) : "got EOF");
368+ return -1;
369+ }
370+
371+ VmaHeader *h = (VmaHeader *)vmar->head_data;
372+
373+ if (h->magic != VMA_MAGIC) {
374+ error_setg(errp, "not a vma file - wrong magic number");
375+ return -1;
376+ }
377+
378+ uint32_t header_size = GUINT32_FROM_BE(h->header_size);
379+ int need = header_size - sizeof(VmaHeader);
380+ if (need <= 0) {
381+ error_setg(errp, "wrong vma header size %d", header_size);
382+ return -1;
383+ }
384+
385+ vmar->head_data = g_realloc(vmar->head_data, header_size);
386+ h = (VmaHeader *)vmar->head_data;
387+
388+ if (full_read(vmar->fd, vmar->head_data + sizeof(VmaHeader), need) !=
389+ need) {
390+ error_setg(errp, "can't read vma header data - %s",
391+ errno ? strerror(errno) : "got EOF");
392+ return -1;
393+ }
394+
395+ memcpy(md5sum, h->md5sum, 16);
396+ memset(h->md5sum, 0, 16);
397+
398+ g_checksum_reset(vmar->md5csum);
399+ g_checksum_update(vmar->md5csum, vmar->head_data, header_size);
400+ gsize csize = 16;
401+ g_checksum_get_digest(vmar->md5csum, (guint8 *)(h->md5sum), &csize);
402+
403+ if (memcmp(md5sum, h->md5sum, 16) != 0) {
404+ error_setg(errp, "wrong vma header chechsum");
405+ return -1;
406+ }
407+
408+ /* we can modify header data after checksum verify */
409+ h->header_size = header_size;
410+
411+ h->version = GUINT32_FROM_BE(h->version);
412+ if (h->version != 1) {
413+ error_setg(errp, "wrong vma version %d", h->version);
414+ return -1;
415+ }
416+
417+ h->ctime = GUINT64_FROM_BE(h->ctime);
418+ h->blob_buffer_offset = GUINT32_FROM_BE(h->blob_buffer_offset);
419+ h->blob_buffer_size = GUINT32_FROM_BE(h->blob_buffer_size);
420+
421+ uint32_t bstart = h->blob_buffer_offset + 1;
422+ uint32_t bend = h->blob_buffer_offset + h->blob_buffer_size;
423+
424+ if (bstart <= sizeof(VmaHeader)) {
425+ error_setg(errp, "wrong vma blob buffer offset %d",
426+ h->blob_buffer_offset);
427+ return -1;
428+ }
429+
430+ if (bend > header_size) {
431+ error_setg(errp, "wrong vma blob buffer size %d/%d",
432+ h->blob_buffer_offset, h->blob_buffer_size);
433+ return -1;
434+ }
435+
436+ while ((bstart + 2) <= bend) {
437+ uint32_t size = vmar->head_data[bstart] +
438+ (vmar->head_data[bstart+1] << 8);
439+ if ((bstart + size + 2) <= bend) {
440+ VmaBlob *blob = g_new0(VmaBlob, 1);
441+ blob->start = bstart - h->blob_buffer_offset;
442+ blob->len = size;
443+ blob->data = vmar->head_data + bstart + 2;
444+ g_hash_table_insert(vmar->blob_hash, &blob->start, blob);
445+ }
446+ bstart += size + 2;
447+ }
448+
449+
450+ int count = 0;
451+ for (i = 1; i < 256; i++) {
452+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
453+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
454+ uint64_t size = GUINT64_FROM_BE(dih->size);
455+ const char *devname = get_header_str(vmar, devname_ptr);
456+
457+ if (size && devname) {
458+ count++;
459+ vmar->devinfo[i].size = size;
460+ vmar->devinfo[i].devname = devname;
461+
462+ if (strcmp(devname, "vmstate") == 0) {
463+ vmar->vmstate_stream = i;
464+ }
465+ }
466+ }
467+
468+ if (!count) {
469+ error_setg(errp, "vma does not contain data");
470+ return -1;
471+ }
472+
473+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
474+ uint32_t name_ptr = GUINT32_FROM_BE(h->config_names[i]);
475+ uint32_t data_ptr = GUINT32_FROM_BE(h->config_data[i]);
476+
477+ if (!(name_ptr && data_ptr)) {
478+ continue;
479+ }
480+ const char *name = get_header_str(vmar, name_ptr);
481+ const VmaBlob *blob = get_header_blob(vmar, data_ptr);
482+
483+ if (!(name && blob)) {
484+ error_setg(errp, "vma contains invalid data pointers");
485+ return -1;
486+ }
487+
488+ VmaConfigData *cdata = g_new0(VmaConfigData, 1);
489+ cdata->name = name;
490+ cdata->data = blob->data;
491+ cdata->len = blob->len;
492+
493+ vmar->cdata_list = g_list_append(vmar->cdata_list, cdata);
494+ }
495+
496+ return ret;
497+};
498+
499+VmaReader *vma_reader_create(const char *filename, Error **errp)
500+{
501+ assert(filename);
502+ assert(errp);
503+
504+ VmaReader *vmar = g_new0(VmaReader, 1);
505+
3f350e0d
DM
506+ if (strcmp(filename, "-") == 0) {
507+ vmar->fd = dup(0);
508+ } else {
509+ vmar->fd = open(filename, O_RDONLY);
510+ }
5ad5891c
DM
511+
512+ if (vmar->fd < 0) {
513+ error_setg(errp, "can't open file %s - %s\n", filename,
514+ strerror(errno));
515+ goto err;
516+ }
517+
518+ vmar->md5csum = g_checksum_new(G_CHECKSUM_MD5);
519+ if (!vmar->md5csum) {
520+ error_setg(errp, "can't allocate cmsum\n");
521+ goto err;
522+ }
523+
524+ vmar->blob_hash = g_hash_table_new_full(g_int32_hash, g_int32_equal,
525+ NULL, g_free);
526+
527+ if (vma_reader_read_head(vmar, errp) < 0) {
528+ goto err;
529+ }
530+
531+ return vmar;
532+
533+err:
534+ if (vmar) {
535+ vma_reader_destroy(vmar);
536+ }
537+
538+ return NULL;
539+}
540+
541+VmaHeader *vma_reader_get_header(VmaReader *vmar)
542+{
543+ assert(vmar);
544+ assert(vmar->head_data);
545+
546+ return (VmaHeader *)(vmar->head_data);
547+}
548+
549+GList *vma_reader_get_config_data(VmaReader *vmar)
550+{
551+ assert(vmar);
552+ assert(vmar->head_data);
553+
554+ return vmar->cdata_list;
555+}
556+
557+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
558+{
559+ assert(vmar);
560+ assert(dev_id);
561+
562+ if (vmar->devinfo[dev_id].size && vmar->devinfo[dev_id].devname) {
563+ return &vmar->devinfo[dev_id];
564+ }
565+
566+ return NULL;
567+}
568+
569+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockDriverState *bs,
570+ bool write_zeroes, Error **errp)
571+{
572+ assert(vmar);
573+ assert(bs != NULL);
574+ assert(dev_id);
575+ assert(vmar->rstate[dev_id].bs == NULL);
576+
577+ int64_t size = bdrv_getlength(bs);
578+ if (size != vmar->devinfo[dev_id].size) {
579+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
580+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
581+ size, vmar->devinfo[dev_id].size);
582+ return -1;
583+ }
584+
585+ vmar->rstate[dev_id].bs = bs;
586+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
587+
588+ int64_t bitmap_size = (size/BDRV_SECTOR_SIZE) +
589+ (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG - 1;
590+ bitmap_size /= (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG;
591+
592+ vmar->rstate[dev_id].bitmap_size = bitmap_size;
593+ vmar->rstate[dev_id].bitmap = g_new0(unsigned long, bitmap_size);
594+
4244016d
DM
595+ vmar->cluster_count += size/VMA_CLUSTER_SIZE;
596+
5ad5891c
DM
597+ return 0;
598+}
599+
600+static ssize_t safe_write(int fd, void *buf, size_t count)
601+{
602+ ssize_t n;
603+
604+ do {
605+ n = write(fd, buf, count);
606+ } while (n < 0 && errno == EINTR);
607+
608+ return n;
609+}
610+
611+static size_t full_write(int fd, void *buf, size_t len)
612+{
613+ ssize_t n;
614+ size_t total;
615+
616+ total = 0;
617+
618+ while (len > 0) {
619+ n = safe_write(fd, buf, len);
620+ if (n < 0) {
621+ return n;
622+ }
623+ buf += n;
624+ total += n;
625+ len -= n;
626+ }
627+
628+ if (len) {
629+ /* incomplete write ? */
630+ return -1;
631+ }
632+
633+ return total;
634+}
635+
636+static int restore_write_data(VmaReader *vmar, guint8 dev_id,
637+ BlockDriverState *bs, int vmstate_fd,
638+ unsigned char *buf, int64_t sector_num,
639+ int nb_sectors, Error **errp)
640+{
641+ assert(vmar);
642+
643+ if (dev_id == vmar->vmstate_stream) {
644+ if (vmstate_fd >= 0) {
645+ int len = nb_sectors * BDRV_SECTOR_SIZE;
646+ int res = full_write(vmstate_fd, buf, len);
647+ if (res < 0) {
648+ error_setg(errp, "write vmstate failed %d", res);
649+ return -1;
650+ }
651+ }
652+ } else {
653+ int res = bdrv_write(bs, sector_num, buf, nb_sectors);
654+ if (res < 0) {
655+ error_setg(errp, "bdrv_write to %s failed (%d)",
656+ bdrv_get_device_name(bs), res);
657+ return -1;
658+ }
659+ }
660+ return 0;
661+}
662+static int restore_extent(VmaReader *vmar, unsigned char *buf,
309874bd 663+ int extent_size, int vmstate_fd,
4244016d 664+ bool verbose, Error **errp)
5ad5891c
DM
665+{
666+ assert(vmar);
667+ assert(buf);
668+
669+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
670+ int start = VMA_EXTENT_HEADER_SIZE;
671+ int i;
672+
673+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
674+ uint64_t block_info = GUINT64_FROM_BE(ehead->blockinfo[i]);
4244016d
DM
675+ uint64_t cluster_num = block_info & 0xffffffff;
676+ uint8_t dev_id = (block_info >> 32) & 0xff;
5ad5891c
DM
677+ uint16_t mask = block_info >> (32+16);
678+ int64_t max_sector;
679+
680+ if (!dev_id) {
681+ continue;
682+ }
683+
684+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
685+ BlockDriverState *bs = NULL;
686+
687+ if (dev_id != vmar->vmstate_stream) {
688+ bs = rstate->bs;
689+ if (!bs) {
690+ error_setg(errp, "got wrong dev id %d", dev_id);
691+ return -1;
692+ }
693+
694+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
4244016d 695+ error_setg(errp, "found duplicated cluster %zd for stream %s",
5ad5891c
DM
696+ cluster_num, vmar->devinfo[dev_id].devname);
697+ return -1;
698+ }
699+ vma_reader_set_bitmap(rstate, cluster_num, 1);
700+
701+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
702+ } else {
703+ max_sector = G_MAXINT64;
704+ if (cluster_num != vmar->vmstate_clusters) {
705+ error_setg(errp, "found out of order vmstate data");
706+ return -1;
707+ }
708+ vmar->vmstate_clusters++;
709+ }
710+
4244016d
DM
711+ vmar->clusters_read++;
712+
713+ if (verbose) {
714+ time_t duration = time(NULL) - vmar->start_time;
715+ int percent = (vmar->clusters_read*100)/vmar->cluster_count;
716+ if (percent != vmar->clusters_read_per) {
309874bd 717+ printf("progress %d%% (read %zd bytes, duration %zd sec)\n",
4244016d
DM
718+ percent, vmar->clusters_read*VMA_CLUSTER_SIZE,
719+ duration);
720+ fflush(stdout);
721+ vmar->clusters_read_per = percent;
722+ }
723+ }
724+
5ad5891c
DM
725+ /* try to write whole clusters to speedup restore */
726+ if (mask == 0xffff) {
727+ if ((start + VMA_CLUSTER_SIZE) > extent_size) {
728+ error_setg(errp, "short vma extent - too many blocks");
729+ return -1;
730+ }
731+ int64_t sector_num = (cluster_num * VMA_CLUSTER_SIZE) /
732+ BDRV_SECTOR_SIZE;
733+ int64_t end_sector = sector_num +
734+ VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE;
735+
736+ if (end_sector > max_sector) {
737+ end_sector = max_sector;
738+ }
739+
740+ if (end_sector <= sector_num) {
741+ error_setg(errp, "got wrong block address - write bejond end");
742+ return -1;
743+ }
744+
745+ int nb_sectors = end_sector - sector_num;
746+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd, buf + start,
747+ sector_num, nb_sectors, errp) < 0) {
748+ return -1;
749+ }
750+
751+ start += VMA_CLUSTER_SIZE;
752+ } else {
753+ int j;
754+ int bit = 1;
755+
756+ for (j = 0; j < 16; j++) {
757+ int64_t sector_num = (cluster_num*VMA_CLUSTER_SIZE +
758+ j*VMA_BLOCK_SIZE)/BDRV_SECTOR_SIZE;
759+
760+ int64_t end_sector = sector_num +
761+ VMA_BLOCK_SIZE/BDRV_SECTOR_SIZE;
762+ if (end_sector > max_sector) {
763+ end_sector = max_sector;
764+ }
765+
766+ if (mask & bit) {
767+ if ((start + VMA_BLOCK_SIZE) > extent_size) {
768+ error_setg(errp, "short vma extent - too many blocks");
769+ return -1;
770+ }
771+
772+ if (end_sector <= sector_num) {
773+ error_setg(errp, "got wrong block address - "
774+ "write bejond end");
775+ return -1;
776+ }
777+
778+ int nb_sectors = end_sector - sector_num;
779+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
780+ buf + start, sector_num,
781+ nb_sectors, errp) < 0) {
782+ return -1;
783+ }
784+
785+ start += VMA_BLOCK_SIZE;
786+
787+ } else {
788+
4244016d 789+ if (rstate->write_zeroes && (end_sector > sector_num)) {
5ad5891c
DM
790+ /* Todo: use bdrv_co_write_zeroes (but that need to
791+ * be run inside coroutine?)
792+ */
793+ int nb_sectors = end_sector - sector_num;
794+ if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
795+ zero_vma_block, sector_num,
796+ nb_sectors, errp) < 0) {
797+ return -1;
798+ }
799+ }
800+ }
801+
802+ bit = bit << 1;
803+ }
804+ }
805+ }
806+
807+ if (start != extent_size) {
808+ error_setg(errp, "vma extent error - missing blocks");
809+ return -1;
810+ }
811+
812+ return 0;
813+}
814+
4244016d 815+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
309874bd 816+ Error **errp)
5ad5891c
DM
817+{
818+ assert(vmar);
819+ assert(vmar->head_data);
820+
821+ int ret = 0;
822+ unsigned char buf[VMA_MAX_EXTENT_SIZE];
823+ int buf_pos = 0;
824+ unsigned char md5sum[16];
825+ VmaHeader *h = (VmaHeader *)vmar->head_data;
826+
4244016d 827+ vmar->start_time = time(NULL);
5ad5891c
DM
828+
829+ while (1) {
830+ int bytes = full_read(vmar->fd, buf + buf_pos, sizeof(buf) - buf_pos);
831+ if (bytes < 0) {
832+ error_setg(errp, "read failed - %s", strerror(errno));
833+ return -1;
834+ }
835+
836+ buf_pos += bytes;
837+
838+ if (!buf_pos) {
839+ break; /* EOF */
840+ }
841+
842+ if (buf_pos < VMA_EXTENT_HEADER_SIZE) {
843+ error_setg(errp, "read short extent (%d bytes)", buf_pos);
844+ return -1;
845+ }
846+
847+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
848+
849+ /* extract md5sum */
850+ memcpy(md5sum, ehead->md5sum, sizeof(ehead->md5sum));
851+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
852+
853+ g_checksum_reset(vmar->md5csum);
854+ g_checksum_update(vmar->md5csum, buf, VMA_EXTENT_HEADER_SIZE);
855+ gsize csize = 16;
856+ g_checksum_get_digest(vmar->md5csum, ehead->md5sum, &csize);
857+
858+ if (memcmp(md5sum, ehead->md5sum, 16) != 0) {
859+ error_setg(errp, "wrong vma extent header chechsum");
860+ return -1;
861+ }
862+
863+ if (memcmp(h->uuid, ehead->uuid, sizeof(ehead->uuid)) != 0) {
864+ error_setg(errp, "wrong vma extent uuid");
865+ return -1;
866+ }
867+
868+ if (ehead->magic != VMA_EXTENT_MAGIC || ehead->reserved1 != 0) {
869+ error_setg(errp, "wrong vma extent header magic");
870+ return -1;
871+ }
872+
873+ int block_count = GUINT16_FROM_BE(ehead->block_count);
874+ int extent_size = VMA_EXTENT_HEADER_SIZE + block_count*VMA_BLOCK_SIZE;
875+
876+ if (buf_pos < extent_size) {
877+ error_setg(errp, "short vma extent (%d < %d)", buf_pos,
878+ extent_size);
879+ return -1;
880+ }
881+
309874bd 882+ if (restore_extent(vmar, buf, extent_size, vmstate_fd, verbose,
4244016d 883+ errp) < 0) {
5ad5891c
DM
884+ return -1;
885+ }
886+
887+ if (buf_pos > extent_size) {
888+ memmove(buf, buf + extent_size, buf_pos - extent_size);
889+ buf_pos = buf_pos - extent_size;
890+ } else {
891+ buf_pos = 0;
892+ }
893+ }
894+
895+ bdrv_drain_all();
896+
897+ int i;
898+ for (i = 1; i < 256; i++) {
899+ VmaRestoreState *rstate = &vmar->rstate[i];
900+ if (!rstate->bs) {
901+ continue;
902+ }
903+
904+ if (bdrv_flush(rstate->bs) < 0) {
905+ error_setg(errp, "vma bdrv_flush %s failed",
906+ vmar->devinfo[i].devname);
907+ return -1;
908+ }
909+
910+ if (vmar->devinfo[i].size &&
911+ (strcmp(vmar->devinfo[i].devname, "vmstate") != 0)) {
912+ assert(rstate->bitmap);
913+
914+ int64_t cluster_num, end;
915+
916+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
917+ VMA_CLUSTER_SIZE;
918+
919+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
920+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
921+ error_setg(errp, "detected missing cluster %zd "
922+ "for stream %s", cluster_num,
923+ vmar->devinfo[i].devname);
924+ return -1;
925+ }
926+ }
927+ }
928+ }
929+
930+ return ret;
931+}
932+
933diff --git a/vma-writer.c b/vma-writer.c
934new file mode 100644
89af8a77 935index 0000000..761d7ca
5ad5891c
DM
936--- /dev/null
937+++ b/vma-writer.c
92bf040c 938@@ -0,0 +1,932 @@
5ad5891c
DM
939+/*
940+ * VMA: Virtual Machine Archive
941+ *
942+ * Copyright (C) 2012 Proxmox Server Solutions
943+ *
944+ * Authors:
945+ * Dietmar Maurer (dietmar@proxmox.com)
946+ *
947+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
948+ * See the COPYING file in the top-level directory.
949+ *
950+ */
951+
952+#include <stdio.h>
953+#include <errno.h>
954+#include <unistd.h>
955+#include <stdio.h>
956+#include <string.h>
957+#include <sys/types.h>
958+#include <sys/stat.h>
959+#include <fcntl.h>
960+#include <glib.h>
961+#include <uuid/uuid.h>
962+
963+#include "qemu-common.h"
5ad5891c 964+#include "vma.h"
92bf040c
DM
965+#include "block/block.h"
966+#include "monitor/monitor.h"
5ad5891c
DM
967+
968+#define DEBUG_VMA 0
969+
970+#define DPRINTF(fmt, ...)\
971+ do { if (DEBUG_VMA) { printf("vma: " fmt, ## __VA_ARGS__); } } while (0)
972+
973+#define WRITE_BUFFERS 5
974+
975+typedef struct VmaAIOCB VmaAIOCB;
976+struct VmaAIOCB {
5ad5891c 977+ unsigned char buffer[VMA_MAX_EXTENT_SIZE];
2dfd543c 978+ VmaWriter *vmaw;
5ad5891c
DM
979+ size_t bytes;
980+ Coroutine *co;
981+};
982+
983+struct VmaWriter {
984+ int fd;
985+ FILE *cmd;
986+ int status;
987+ char errmsg[8192];
988+ uuid_t uuid;
989+ bool header_written;
990+ bool closed;
991+
992+ /* we always write extents */
993+ unsigned char outbuf[VMA_MAX_EXTENT_SIZE];
994+ int outbuf_pos; /* in bytes */
995+ int outbuf_count; /* in VMA_BLOCKS */
996+ uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT];
997+
2dfd543c 998+ VmaAIOCB *aiocbs[WRITE_BUFFERS];
5ad5891c
DM
999+ CoQueue wqueue;
1000+
1001+ GChecksum *md5csum;
1002+ CoMutex writer_lock;
1003+ CoMutex flush_lock;
1004+ Coroutine *co_writer;
5ad5891c
DM
1005+
1006+ /* drive informations */
1007+ VmaStreamInfo stream_info[256];
1008+ guint stream_count;
1009+
1010+ guint8 vmstate_stream;
1011+ uint32_t vmstate_clusters;
1012+
1013+ /* header blob table */
1014+ char *header_blob_table;
1015+ uint32_t header_blob_table_size;
1016+ uint32_t header_blob_table_pos;
1017+
1018+ /* store for config blobs */
1019+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1020+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1021+ uint32_t config_count;
1022+};
1023+
1024+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...)
1025+{
1026+ va_list ap;
1027+
1028+ if (vmaw->status < 0) {
1029+ return;
1030+ }
1031+
1032+ vmaw->status = -1;
1033+
1034+ va_start(ap, fmt);
1035+ g_vsnprintf(vmaw->errmsg, sizeof(vmaw->errmsg), fmt, ap);
1036+ va_end(ap);
1037+
1038+ DPRINTF("vma_writer_set_error: %s\n", vmaw->errmsg);
1039+}
1040+
1041+static uint32_t allocate_header_blob(VmaWriter *vmaw, const char *data,
1042+ size_t len)
1043+{
1044+ if (len > 65535) {
1045+ return 0;
1046+ }
1047+
1048+ if (!vmaw->header_blob_table ||
1049+ (vmaw->header_blob_table_size <
1050+ (vmaw->header_blob_table_pos + len + 2))) {
1051+ int newsize = vmaw->header_blob_table_size + ((len + 2 + 511)/512)*512;
1052+
1053+ vmaw->header_blob_table = g_realloc(vmaw->header_blob_table, newsize);
1054+ memset(vmaw->header_blob_table + vmaw->header_blob_table_size,
1055+ 0, newsize - vmaw->header_blob_table_size);
1056+ vmaw->header_blob_table_size = newsize;
1057+ }
1058+
1059+ uint32_t cpos = vmaw->header_blob_table_pos;
1060+ vmaw->header_blob_table[cpos] = len & 255;
1061+ vmaw->header_blob_table[cpos+1] = (len >> 8) & 255;
1062+ memcpy(vmaw->header_blob_table + cpos + 2, data, len);
1063+ vmaw->header_blob_table_pos += len + 2;
1064+ return cpos;
1065+}
1066+
1067+static uint32_t allocate_header_string(VmaWriter *vmaw, const char *str)
1068+{
1069+ assert(vmaw);
1070+
1071+ size_t len = strlen(str) + 1;
1072+
1073+ return allocate_header_blob(vmaw, str, len);
1074+}
1075+
1076+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
1077+ gsize len)
1078+{
1079+ assert(vmaw);
1080+ assert(!vmaw->header_written);
1081+ assert(vmaw->config_count < VMA_MAX_CONFIGS);
1082+ assert(name);
1083+ assert(data);
1084+ assert(len);
1085+
1086+ uint32_t name_ptr = allocate_header_string(vmaw, name);
1087+ if (!name_ptr) {
1088+ return -1;
1089+ }
1090+
1091+ uint32_t data_ptr = allocate_header_blob(vmaw, data, len);
1092+ if (!data_ptr) {
1093+ return -1;
1094+ }
1095+
1096+ vmaw->config_names[vmaw->config_count] = name_ptr;
1097+ vmaw->config_data[vmaw->config_count] = data_ptr;
1098+
1099+ vmaw->config_count++;
1100+
1101+ return 0;
1102+}
1103+
1104+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
1105+ size_t size)
1106+{
1107+ assert(vmaw);
1108+ assert(devname);
1109+ assert(!vmaw->status);
1110+
1111+ if (vmaw->header_written) {
1112+ vma_writer_set_error(vmaw, "vma_writer_register_stream: header "
1113+ "already written");
1114+ return -1;
1115+ }
1116+
1117+ guint n = vmaw->stream_count + 1;
1118+
1119+ /* we can have dev_ids form 1 to 255 (0 reserved)
1120+ * 255(-1) reseverd for safety
1121+ */
1122+ if (n > 254) {
1123+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1124+ "too many drives");
1125+ return -1;
1126+ }
1127+
1128+ if (size <= 0) {
1129+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1130+ "got strange size %zd", size);
1131+ return -1;
1132+ }
1133+
1134+ DPRINTF("vma_writer_register_stream %s %zu %d\n", devname, size, n);
1135+
1136+ vmaw->stream_info[n].devname = g_strdup(devname);
1137+ vmaw->stream_info[n].size = size;
1138+
1139+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
1140+ VMA_CLUSTER_SIZE;
1141+
1142+ vmaw->stream_count = n;
1143+
1144+ if (strcmp(devname, "vmstate") == 0) {
1145+ vmaw->vmstate_stream = n;
1146+ }
1147+
1148+ return n;
1149+}
1150+
1151+static void vma_co_continue_write(void *opaque)
1152+{
1153+ VmaWriter *vmaw = opaque;
1154+
5ad5891c
DM
1155+ DPRINTF("vma_co_continue_write\n");
1156+ qemu_coroutine_enter(vmaw->co_writer, NULL);
1157+}
1158+
2dfd543c
DM
1159+static int vma_co_write_finished(void *opaque)
1160+{
1161+ VmaWriter *vmaw = opaque;
89af8a77 1162+
2dfd543c
DM
1163+ return (vmaw->co_writer != 0);
1164+}
1165+
5ad5891c
DM
1166+static ssize_t coroutine_fn
1167+vma_co_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1168+{
1169+ size_t done = 0;
1170+ ssize_t ret;
1171+
1172+ /* atomic writes (we cannot interleave writes) */
1173+ qemu_co_mutex_lock(&vmaw->writer_lock);
1174+
1175+ DPRINTF("vma_co_write enter %zd\n", bytes);
1176+
2dfd543c
DM
1177+ assert(vmaw->co_writer == NULL);
1178+
c6a99f54
DM
1179+ vmaw->co_writer = qemu_coroutine_self();
1180+
1181+ qemu_aio_set_fd_handler(vmaw->fd, NULL, vma_co_continue_write,
89af8a77 1182+ vma_co_write_finished, vmaw);
c6a99f54
DM
1183+
1184+ DPRINTF("vma_co_write wait until writable\n");
1185+ qemu_coroutine_yield();
1186+ DPRINTF("vma_co_write starting %zd\n", bytes);
89af8a77 1187+
5ad5891c 1188+ while (done < bytes) {
c6a99f54 1189+ ret = write(vmaw->fd, buf + done, bytes - done);
5ad5891c
DM
1190+ if (ret > 0) {
1191+ done += ret;
1192+ DPRINTF("vma_co_write written %zd %zd\n", done, ret);
1193+ } else if (ret < 0) {
1194+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
1195+ DPRINTF("vma_co_write yield %zd\n", done);
5ad5891c
DM
1196+ qemu_coroutine_yield();
1197+ DPRINTF("vma_co_write restart %zd\n", done);
1198+ } else {
1199+ vma_writer_set_error(vmaw, "vma_co_write write error - %s",
1200+ strerror(errno));
1201+ done = -1; /* always return failure for partial writes */
1202+ break;
1203+ }
1204+ } else if (ret == 0) {
1205+ /* should not happen - simply try again */
1206+ }
1207+ }
1208+
c6a99f54
DM
1209+ qemu_aio_set_fd_handler(vmaw->fd, NULL, NULL, NULL, NULL);
1210+
2dfd543c
DM
1211+ vmaw->co_writer = NULL;
1212+
5ad5891c
DM
1213+ qemu_co_mutex_unlock(&vmaw->writer_lock);
1214+
1215+ DPRINTF("vma_co_write leave %zd\n", done);
1216+ return done;
1217+}
1218+
1219+static void coroutine_fn vma_co_writer_task(void *opaque)
1220+{
1221+ VmaAIOCB *cb = opaque;
1222+
1223+ DPRINTF("vma_co_writer_task start\n");
1224+
1225+ int64_t done = vma_co_write(cb->vmaw, cb->buffer, cb->bytes);
1226+ DPRINTF("vma_co_writer_task write done %zd\n", done);
1227+
1228+ if (done != cb->bytes) {
1229+ DPRINTF("vma_co_writer_task failed write %zd %zd", cb->bytes, done);
1230+ vma_writer_set_error(cb->vmaw, "vma_co_writer_task failed write %zd",
1231+ done);
1232+ }
1233+
1234+ cb->bytes = 0;
1235+
1236+ qemu_co_queue_next(&cb->vmaw->wqueue);
1237+
1238+ DPRINTF("vma_co_writer_task end\n");
1239+}
1240+
1241+static void coroutine_fn vma_queue_flush(VmaWriter *vmaw)
1242+{
1243+ DPRINTF("vma_queue_flush enter\n");
1244+
1245+ assert(vmaw);
1246+
1247+ while (1) {
1248+ int i;
1249+ VmaAIOCB *cb = NULL;
1250+ for (i = 0; i < WRITE_BUFFERS; i++) {
2dfd543c
DM
1251+ if (vmaw->aiocbs[i]->bytes) {
1252+ cb = vmaw->aiocbs[i];
5ad5891c 1253+ DPRINTF("FOUND USED AIO BUFFER %d %zd\n", i,
2dfd543c 1254+ vmaw->aiocbs[i]->bytes);
5ad5891c
DM
1255+ break;
1256+ }
1257+ }
1258+ if (!cb) {
1259+ break;
1260+ }
1261+ qemu_co_queue_wait(&vmaw->wqueue);
1262+ }
1263+
1264+ DPRINTF("vma_queue_flush leave\n");
1265+}
1266+
1267+/**
1268+ * NOTE: pipe buffer size in only 4096 bytes on linux (see 'ulimit -a')
1269+ * So we need to create a coroutione to allow 'parallel' execution.
1270+ */
1271+static ssize_t coroutine_fn
1272+vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1273+{
1274+ DPRINTF("vma_queue_write enter %zd\n", bytes);
1275+
1276+ assert(vmaw);
1277+ assert(buf);
1278+ assert(bytes <= VMA_MAX_EXTENT_SIZE);
1279+
1280+ VmaAIOCB *cb = NULL;
1281+ while (!cb) {
1282+ int i;
1283+ for (i = 0; i < WRITE_BUFFERS; i++) {
2dfd543c
DM
1284+ if (!vmaw->aiocbs[i]->bytes) {
1285+ cb = vmaw->aiocbs[i];
5ad5891c
DM
1286+ break;
1287+ }
1288+ }
1289+ if (!cb) {
1290+ qemu_co_queue_wait(&vmaw->wqueue);
1291+ }
1292+ }
1293+
1294+ memcpy(cb->buffer, buf, bytes);
1295+ cb->bytes = bytes;
1296+ cb->vmaw = vmaw;
1297+
1298+ DPRINTF("vma_queue_write start %zd\n", bytes);
1299+ cb->co = qemu_coroutine_create(vma_co_writer_task);
1300+ qemu_coroutine_enter(cb->co, cb);
1301+
1302+ DPRINTF("vma_queue_write leave\n");
1303+
1304+ return bytes;
1305+}
1306+
55827521 1307+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
5ad5891c
DM
1308+{
1309+ const char *p;
1310+
1311+ assert(sizeof(VmaHeader) == (4096 + 8192));
1312+ assert(sizeof(VmaExtentHeader) == 512);
1313+
1314+ VmaWriter *vmaw = g_new0(VmaWriter, 1);
1315+ vmaw->fd = -1;
1316+
1317+ vmaw->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1318+ if (!vmaw->md5csum) {
1319+ error_setg(errp, "can't allocate cmsum\n");
1320+ goto err;
1321+ }
1322+
1323+ if (strstart(filename, "exec:", &p)) {
1324+ vmaw->cmd = popen(p, "w");
1325+ if (vmaw->cmd == NULL) {
1326+ error_setg(errp, "can't popen command '%s' - %s\n", p,
1327+ strerror(errno));
1328+ goto err;
1329+ }
1330+ vmaw->fd = fileno(vmaw->cmd);
2dfd543c
DM
1331+
1332+ /* try to use O_NONBLOCK and O_DIRECT */
1333+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
1334+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_DIRECT);
5ad5891c
DM
1335+
1336+ } else {
efa8e5de
DM
1337+ struct stat st;
1338+ int oflags;
309874bd 1339+ const char *tmp_id_str;
3f350e0d 1340+
efa8e5de 1341+ if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
2dfd543c 1342+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY;
309874bd 1343+ vmaw->fd = qemu_open(filename, oflags, 0644);
3f350e0d 1344+ } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) {
2dfd543c 1345+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY;
3f350e0d 1346+ vmaw->fd = qemu_open(filename, oflags, 0644);
309874bd 1347+ } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) {
3f350e0d
DM
1348+ vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp);
1349+ if (vmaw->fd < 0) {
1350+ goto err;
1351+ }
2dfd543c
DM
1352+ /* try to use O_NONBLOCK and O_DIRECT */
1353+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
1354+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_DIRECT);
309874bd 1355+ } else {
2dfd543c 1356+ oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_CREAT|O_EXCL;
309874bd 1357+ vmaw->fd = qemu_open(filename, oflags, 0644);
efa8e5de 1358+ }
3f350e0d 1359+
5ad5891c
DM
1360+ if (vmaw->fd < 0) {
1361+ error_setg(errp, "can't open file %s - %s\n", filename,
1362+ strerror(errno));
1363+ goto err;
1364+ }
1365+ }
1366+
2dfd543c
DM
1367+ /* we use O_DIRECT, so we need to align IO buffers */
1368+ int i;
1369+ for (i = 0; i < WRITE_BUFFERS; i++) {
1370+ vmaw->aiocbs[i] = qemu_memalign(512, sizeof(VmaAIOCB));
1371+ memset(vmaw->aiocbs[i], 0, sizeof(VmaAIOCB));
1372+ }
1373+
5ad5891c
DM
1374+ vmaw->outbuf_count = 0;
1375+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1376+
1377+ vmaw->header_blob_table_pos = 1; /* start at pos 1 */
1378+
1379+ qemu_co_mutex_init(&vmaw->writer_lock);
1380+ qemu_co_mutex_init(&vmaw->flush_lock);
1381+ qemu_co_queue_init(&vmaw->wqueue);
1382+
1383+ uuid_copy(vmaw->uuid, uuid);
1384+
5ad5891c
DM
1385+ return vmaw;
1386+
1387+err:
1388+ if (vmaw) {
1389+ if (vmaw->cmd) {
1390+ pclose(vmaw->cmd);
1391+ } else if (vmaw->fd >= 0) {
1392+ close(vmaw->fd);
1393+ }
1394+
1395+ if (vmaw->md5csum) {
1396+ g_checksum_free(vmaw->md5csum);
1397+ }
1398+
1399+ g_free(vmaw);
1400+ }
1401+
1402+ return NULL;
1403+}
1404+
1405+static int coroutine_fn vma_write_header(VmaWriter *vmaw)
1406+{
1407+ assert(vmaw);
1408+ int header_clusters = 8;
1409+ char buf[65536*header_clusters];
1410+ VmaHeader *head = (VmaHeader *)buf;
1411+
1412+ int i;
1413+
1414+ DPRINTF("VMA WRITE HEADER\n");
1415+
1416+ if (vmaw->status < 0) {
1417+ return vmaw->status;
1418+ }
1419+
1420+ memset(buf, 0, sizeof(buf));
1421+
1422+ head->magic = VMA_MAGIC;
1423+ head->version = GUINT32_TO_BE(1); /* v1 */
1424+ memcpy(head->uuid, vmaw->uuid, 16);
1425+
1426+ time_t ctime = time(NULL);
1427+ head->ctime = GUINT64_TO_BE(ctime);
1428+
1429+ if (!vmaw->stream_count) {
1430+ return -1;
1431+ }
1432+
1433+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1434+ head->config_names[i] = GUINT32_TO_BE(vmaw->config_names[i]);
1435+ head->config_data[i] = GUINT32_TO_BE(vmaw->config_data[i]);
1436+ }
1437+
1438+ /* 32 bytes per device (12 used currently) = 8192 bytes max */
1439+ for (i = 1; i <= 254; i++) {
1440+ VmaStreamInfo *si = &vmaw->stream_info[i];
1441+ if (si->size) {
1442+ assert(si->devname);
1443+ uint32_t devname_ptr = allocate_header_string(vmaw, si->devname);
1444+ if (!devname_ptr) {
1445+ return -1;
1446+ }
1447+ head->dev_info[i].devname_ptr = GUINT32_TO_BE(devname_ptr);
1448+ head->dev_info[i].size = GUINT64_TO_BE(si->size);
1449+ }
1450+ }
1451+
1452+ uint32_t header_size = sizeof(VmaHeader) + vmaw->header_blob_table_size;
1453+ head->header_size = GUINT32_TO_BE(header_size);
1454+
1455+ if (header_size > sizeof(buf)) {
1456+ return -1; /* just to be sure */
1457+ }
1458+
1459+ uint32_t blob_buffer_offset = sizeof(VmaHeader);
1460+ memcpy(buf + blob_buffer_offset, vmaw->header_blob_table,
1461+ vmaw->header_blob_table_size);
1462+ head->blob_buffer_offset = GUINT32_TO_BE(blob_buffer_offset);
1463+ head->blob_buffer_size = GUINT32_TO_BE(vmaw->header_blob_table_pos);
1464+
1465+ g_checksum_reset(vmaw->md5csum);
1466+ g_checksum_update(vmaw->md5csum, (const guchar *)buf, header_size);
1467+ gsize csize = 16;
1468+ g_checksum_get_digest(vmaw->md5csum, (guint8 *)(head->md5sum), &csize);
1469+
1470+ return vma_queue_write(vmaw, buf, header_size);
1471+}
1472+
1473+static int coroutine_fn vma_writer_flush(VmaWriter *vmaw)
1474+{
1475+ assert(vmaw);
1476+
1477+ int ret;
1478+ int i;
1479+
1480+ if (vmaw->status < 0) {
1481+ return vmaw->status;
1482+ }
1483+
1484+ if (!vmaw->header_written) {
1485+ vmaw->header_written = true;
1486+ ret = vma_write_header(vmaw);
1487+ if (ret < 0) {
1488+ vma_writer_set_error(vmaw, "vma_writer_flush: write header failed");
1489+ return ret;
1490+ }
1491+ }
1492+
1493+ DPRINTF("VMA WRITE FLUSH %d %d\n", vmaw->outbuf_count, vmaw->outbuf_pos);
1494+
1495+
1496+ VmaExtentHeader *ehead = (VmaExtentHeader *)vmaw->outbuf;
1497+
1498+ ehead->magic = VMA_EXTENT_MAGIC;
1499+ ehead->reserved1 = 0;
1500+
1501+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1502+ ehead->blockinfo[i] = GUINT64_TO_BE(vmaw->outbuf_block_info[i]);
1503+ }
1504+
1505+ guint16 block_count = (vmaw->outbuf_pos - VMA_EXTENT_HEADER_SIZE) /
1506+ VMA_BLOCK_SIZE;
1507+
1508+ ehead->block_count = GUINT16_TO_BE(block_count);
1509+
1510+ memcpy(ehead->uuid, vmaw->uuid, sizeof(ehead->uuid));
1511+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
1512+
1513+ g_checksum_reset(vmaw->md5csum);
1514+ g_checksum_update(vmaw->md5csum, vmaw->outbuf, VMA_EXTENT_HEADER_SIZE);
1515+ gsize csize = 16;
1516+ g_checksum_get_digest(vmaw->md5csum, ehead->md5sum, &csize);
1517+
1518+ int bytes = vmaw->outbuf_pos;
1519+ ret = vma_queue_write(vmaw, vmaw->outbuf, bytes);
1520+ if (ret != bytes) {
1521+ vma_writer_set_error(vmaw, "vma_writer_flush: failed write");
1522+ }
1523+
1524+ vmaw->outbuf_count = 0;
1525+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1526+
1527+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1528+ vmaw->outbuf_block_info[i] = 0;
1529+ }
1530+
1531+ return vmaw->status;
1532+}
1533+
1534+static int vma_count_open_streams(VmaWriter *vmaw)
1535+{
1536+ g_assert(vmaw != NULL);
1537+
1538+ int i;
1539+ int open_drives = 0;
1540+ for (i = 0; i <= 255; i++) {
1541+ if (vmaw->stream_info[i].size && !vmaw->stream_info[i].finished) {
1542+ open_drives++;
1543+ }
1544+ }
1545+
1546+ return open_drives;
1547+}
1548+
1549+/**
1550+ * all jobs should call this when there is no more data
1551+ * Returns: number of remaining stream (0 ==> finished)
1552+ */
1553+int coroutine_fn
1554+vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id)
1555+{
1556+ g_assert(vmaw != NULL);
1557+
1558+ DPRINTF("vma_writer_set_status %d\n", dev_id);
1559+ if (!vmaw->stream_info[dev_id].size) {
1560+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1561+ "no such stream %d", dev_id);
1562+ return -1;
1563+ }
1564+ if (vmaw->stream_info[dev_id].finished) {
1565+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1566+ "stream already closed %d", dev_id);
1567+ return -1;
1568+ }
1569+
1570+ vmaw->stream_info[dev_id].finished = true;
1571+
1572+ int open_drives = vma_count_open_streams(vmaw);
1573+
1574+ if (open_drives <= 0) {
1575+ DPRINTF("vma_writer_set_status all drives completed\n");
1576+ qemu_co_mutex_lock(&vmaw->flush_lock);
1577+ int ret = vma_writer_flush(vmaw);
1578+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1579+ if (ret < 0) {
1580+ vma_writer_set_error(vmaw, "vma_writer_close_stream: flush failed");
1581+ }
1582+ }
1583+
1584+ return open_drives;
1585+}
1586+
1587+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status)
1588+{
1589+ int i;
1590+
1591+ g_assert(vmaw != NULL);
1592+
1593+ if (status) {
1594+ status->status = vmaw->status;
1595+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
1596+ for (i = 0; i <= 255; i++) {
1597+ status->stream_info[i] = vmaw->stream_info[i];
1598+ }
1599+
1600+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
1601+ }
1602+
1603+ status->closed = vmaw->closed;
1604+
1605+ return vmaw->status;
1606+}
1607+
2dfd543c 1608+static int vma_writer_get_buffer(VmaWriter *vmaw)
5ad5891c 1609+{
309874bd 1610+ int ret = 0;
5ad5891c 1611+
309874bd 1612+ qemu_co_mutex_lock(&vmaw->flush_lock);
5ad5891c 1613+
309874bd
DM
1614+ /* wait until buffer is available */
1615+ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) {
1616+ ret = vma_writer_flush(vmaw);
1617+ if (ret < 0) {
5ad5891c 1618+ vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed");
309874bd 1619+ break;
5ad5891c
DM
1620+ }
1621+ }
1622+
309874bd
DM
1623+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1624+
1625+ return ret;
5ad5891c
DM
1626+}
1627+
1628+
1629+int64_t coroutine_fn
1630+vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
1631+ unsigned char *buf, size_t *zero_bytes)
1632+{
1633+ g_assert(vmaw != NULL);
1634+ g_assert(zero_bytes != NULL);
1635+
1636+ *zero_bytes = 0;
1637+
1638+ if (vmaw->status < 0) {
1639+ return vmaw->status;
1640+ }
1641+
1642+ if (!dev_id || !vmaw->stream_info[dev_id].size) {
1643+ vma_writer_set_error(vmaw, "vma_writer_write: "
1644+ "no such stream %d", dev_id);
1645+ return -1;
1646+ }
1647+
1648+ if (vmaw->stream_info[dev_id].finished) {
1649+ vma_writer_set_error(vmaw, "vma_writer_write: "
1650+ "stream already closed %d", dev_id);
1651+ return -1;
1652+ }
1653+
1654+
1655+ if (cluster_num >= (((uint64_t)1)<<32)) {
1656+ vma_writer_set_error(vmaw, "vma_writer_write: "
1657+ "cluster number out of range");
1658+ return -1;
1659+ }
1660+
1661+ if (dev_id == vmaw->vmstate_stream) {
1662+ if (cluster_num != vmaw->vmstate_clusters) {
1663+ vma_writer_set_error(vmaw, "vma_writer_write: "
1664+ "non sequential vmstate write");
1665+ }
1666+ vmaw->vmstate_clusters++;
1667+ } else if (cluster_num >= vmaw->stream_info[dev_id].cluster_count) {
1668+ vma_writer_set_error(vmaw, "vma_writer_write: cluster number too big");
1669+ return -1;
1670+ }
1671+
1672+ /* wait until buffer is available */
2dfd543c 1673+ if (vma_writer_get_buffer(vmaw) < 0) {
5ad5891c
DM
1674+ vma_writer_set_error(vmaw, "vma_writer_write: "
1675+ "vma_writer_get_buffer failed");
1676+ return -1;
1677+ }
1678+
309874bd 1679+ DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
5ad5891c 1680+
2dfd543c
DM
1681+ int i;
1682+ int bit = 1;
1683+ uint16_t mask = 0;
5ad5891c
DM
1684+ for (i = 0; i < 16; i++) {
1685+ unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
2dfd543c
DM
1686+ if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
1687+ mask |= bit;
309874bd
DM
1688+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock, VMA_BLOCK_SIZE);
1689+ vmaw->outbuf_pos += VMA_BLOCK_SIZE;
1690+ } else {
5ad5891c
DM
1691+ DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
1692+ vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE;
1693+ *zero_bytes += VMA_BLOCK_SIZE;
5ad5891c
DM
1694+ }
1695+
1696+ bit = bit << 1;
1697+ }
1698+
1699+ uint64_t block_info = ((uint64_t)mask) << (32+16);
1700+ block_info |= ((uint64_t)dev_id) << 32;
1701+ block_info |= (cluster_num & 0xffffffff);
309874bd 1702+ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info;
5ad5891c
DM
1703+
1704+ DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info);
1705+
1706+ vmaw->outbuf_count++;
1707+
1708+ /** NOTE: We allways write whole clusters, but we correctly set
1709+ * transferred bytes. So transferred == size when when everything
1710+ * went OK.
1711+ */
1712+ size_t transferred = VMA_CLUSTER_SIZE;
1713+
1714+ if (dev_id != vmaw->vmstate_stream) {
1715+ uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
1716+ if (last > vmaw->stream_info[dev_id].size) {
1717+ uint64_t diff = last - vmaw->stream_info[dev_id].size;
1718+ if (diff >= VMA_CLUSTER_SIZE) {
1719+ vma_writer_set_error(vmaw, "vma_writer_write: "
1720+ "read after last cluster");
1721+ return -1;
1722+ }
1723+ transferred -= diff;
1724+ }
1725+ }
1726+
1727+ vmaw->stream_info[dev_id].transferred += transferred;
1728+
1729+ return transferred;
1730+}
1731+
1732+int vma_writer_close(VmaWriter *vmaw, Error **errp)
1733+{
1734+ g_assert(vmaw != NULL);
1735+
1736+ int i;
1737+
1738+ vma_queue_flush(vmaw);
1739+
1740+ /* this should not happen - just to be sure */
1741+ while (!qemu_co_queue_empty(&vmaw->wqueue)) {
1742+ DPRINTF("vma_writer_close wait\n");
1743+ co_sleep_ns(rt_clock, 1000000);
1744+ }
1745+
1746+ if (vmaw->cmd) {
1747+ if (pclose(vmaw->cmd) < 0) {
1748+ vma_writer_set_error(vmaw, "vma_writer_close: "
1749+ "pclose failed - %s", strerror(errno));
1750+ }
1751+ } else {
1752+ if (close(vmaw->fd) < 0) {
1753+ vma_writer_set_error(vmaw, "vma_writer_close: "
1754+ "close failed - %s", strerror(errno));
1755+ }
1756+ }
1757+
1758+ for (i = 0; i <= 255; i++) {
1759+ VmaStreamInfo *si = &vmaw->stream_info[i];
1760+ if (si->size) {
1761+ if (!si->finished) {
1762+ vma_writer_set_error(vmaw, "vma_writer_close: "
1763+ "detected open stream '%s'", si->devname);
1764+ } else if ((si->transferred != si->size) &&
1765+ (i != vmaw->vmstate_stream)) {
1766+ vma_writer_set_error(vmaw, "vma_writer_close: "
1767+ "incomplete stream '%s' (%zd != %zd)",
1768+ si->devname, si->transferred, si->size);
1769+ }
1770+ }
1771+ }
1772+
1773+ for (i = 0; i <= 255; i++) {
1774+ vmaw->stream_info[i].finished = 1; /* mark as closed */
1775+ }
1776+
1777+ vmaw->closed = 1;
1778+
1779+ if (vmaw->status < 0 && *errp == NULL) {
1780+ error_setg(errp, "%s", vmaw->errmsg);
1781+ }
1782+
1783+ return vmaw->status;
1784+}
1785+
1786+void vma_writer_destroy(VmaWriter *vmaw)
1787+{
1788+ assert(vmaw);
1789+
1790+ int i;
1791+
1792+ for (i = 0; i <= 255; i++) {
1793+ if (vmaw->stream_info[i].devname) {
1794+ g_free(vmaw->stream_info[i].devname);
1795+ }
1796+ }
1797+
1798+ if (vmaw->md5csum) {
1799+ g_checksum_free(vmaw->md5csum);
1800+ }
1801+
2dfd543c
DM
1802+ for (i = 0; i < WRITE_BUFFERS; i++) {
1803+ free(vmaw->aiocbs[i]);
1804+ }
1805+
5ad5891c
DM
1806+ g_free(vmaw);
1807+}
1808+
1809+/* backup driver plugin */
1810+
1811+static int vma_dump_cb(void *opaque, uint8_t dev_id, int64_t cluster_num,
1812+ unsigned char *buf, size_t *zero_bytes)
1813+{
1814+ VmaWriter *vmaw = opaque;
1815+
1816+ return vma_writer_write(vmaw, dev_id, cluster_num, buf, zero_bytes);
1817+}
1818+
1819+static int vma_close_cb(void *opaque, Error **errp)
1820+{
1821+ VmaWriter *vmaw = opaque;
1822+
1823+ int res = vma_writer_close(vmaw, errp);
1824+ vma_writer_destroy(vmaw);
1825+
1826+ return res;
1827+}
1828+
1829+static int vma_complete_cb(void *opaque, uint8_t dev_id, int ret)
1830+{
1831+ VmaWriter *vmaw = opaque;
1832+
1833+ if (ret < 0) {
1834+ vma_writer_set_error(vmaw, "backup_complete_cb %d", ret);
1835+ }
1836+
1837+ return vma_writer_close_stream(vmaw, dev_id);
1838+}
1839+
1840+static int vma_register_stream_cb(void *opaque, const char *devname,
1841+ size_t size)
1842+{
1843+ VmaWriter *vmaw = opaque;
1844+
1845+ return vma_writer_register_stream(vmaw, devname, size);
1846+}
1847+
1848+static int vma_register_config_cb(void *opaque, const char *name,
1849+ gpointer data, size_t data_len)
1850+{
1851+ VmaWriter *vmaw = opaque;
1852+
1853+ return vma_writer_add_config(vmaw, name, data, data_len);
1854+}
1855+
55827521 1856+static void *vma_open_cb(const char *filename, uuid_t uuid, Error **errp)
5ad5891c 1857+{
55827521 1858+ return vma_writer_create(filename, uuid, errp);
5ad5891c
DM
1859+}
1860+
1861+const BackupDriver backup_vma_driver = {
1862+ .format = "vma",
1863+ .open_cb = vma_open_cb,
1864+ .close_cb = vma_close_cb,
1865+ .register_config_cb = vma_register_config_cb,
1866+ .register_stream_cb = vma_register_stream_cb,
1867+ .dump_cb = vma_dump_cb,
1868+ .complete_cb = vma_complete_cb,
1869+};
1870+
1871diff --git a/vma.c b/vma.c
1872new file mode 100644
89af8a77 1873index 0000000..b2e276c
5ad5891c
DM
1874--- /dev/null
1875+++ b/vma.c
92bf040c 1876@@ -0,0 +1,559 @@
5ad5891c
DM
1877+/*
1878+ * VMA: Virtual Machine Archive
1879+ *
1880+ * Copyright (C) 2012 Proxmox Server Solutions
1881+ *
1882+ * Authors:
1883+ * Dietmar Maurer (dietmar@proxmox.com)
1884+ *
1885+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
1886+ * See the COPYING file in the top-level directory.
1887+ *
1888+ */
1889+
1890+#include <stdio.h>
1891+#include <errno.h>
1892+#include <unistd.h>
1893+#include <stdio.h>
1894+#include <string.h>
1895+#include <sys/types.h>
1896+#include <sys/stat.h>
1897+#include <fcntl.h>
1898+#include <glib.h>
1899+
1900+#include "qemu-common.h"
92bf040c 1901+#include "qemu/error-report.h"
5ad5891c 1902+#include "vma.h"
92bf040c 1903+#include "block/block.h"
5ad5891c
DM
1904+
1905+static void help(void)
1906+{
1907+ const char *help_msg =
1908+ "usage: vma command [command options]\n"
1909+ "\n"
1910+ "vma list <filename>\n"
1911+ "vma create <filename> [-c config] <archive> pathname ...\n"
1912+ "vma extract <filename> [-r] <targetdir>\n"
1913+ ;
1914+
1915+ printf("%s", help_msg);
1916+ exit(1);
1917+}
1918+
1919+static const char *extract_devname(const char *path, char **devname, int index)
1920+{
1921+ assert(path);
1922+
1923+ const char *sep = strchr(path, '=');
1924+
1925+ if (sep) {
1926+ *devname = g_strndup(path, sep - path);
1927+ path = sep + 1;
1928+ } else {
1929+ if (index >= 0) {
1930+ *devname = g_strdup_printf("disk%d", index);
1931+ } else {
1932+ *devname = NULL;
1933+ }
1934+ }
1935+
1936+ return path;
1937+}
1938+
1939+static void print_content(VmaReader *vmar)
1940+{
1941+ assert(vmar);
1942+
1943+ VmaHeader *head = vma_reader_get_header(vmar);
1944+
5ad5891c
DM
1945+ GList *l = vma_reader_get_config_data(vmar);
1946+ while (l && l->data) {
1947+ VmaConfigData *cdata = (VmaConfigData *)l->data;
1948+ l = g_list_next(l);
1949+ printf("CFG: size: %d name: %s\n", cdata->len, cdata->name);
1950+ }
1951+
1952+ int i;
1953+ VmaDeviceInfo *di;
1954+ for (i = 1; i < 255; i++) {
1955+ di = vma_reader_get_device_info(vmar, i);
1956+ if (di) {
1957+ if (strcmp(di->devname, "vmstate") == 0) {
1958+ printf("VMSTATE: dev_id=%d memory: %zd\n", i, di->size);
1959+ } else {
1960+ printf("DEV: dev_id=%d size: %zd devname: %s\n",
1961+ i, di->size, di->devname);
1962+ }
1963+ }
1964+ }
d95c8c31
DM
1965+ /* ctime is the last entry we print */
1966+ printf("CTIME: %s", ctime(&head->ctime));
1967+ fflush(stdout);
5ad5891c
DM
1968+}
1969+
1970+static int list_content(int argc, char **argv)
1971+{
1972+ int c, ret = 0;
1973+ const char *filename;
1974+
1975+ for (;;) {
1976+ c = getopt(argc, argv, "h");
1977+ if (c == -1) {
1978+ break;
1979+ }
1980+ switch (c) {
1981+ case '?':
1982+ case 'h':
1983+ help();
1984+ break;
1985+ default:
1986+ g_assert_not_reached();
1987+ }
1988+ }
1989+
1990+ /* Get the filename */
1991+ if ((optind + 1) != argc) {
1992+ help();
1993+ }
1994+ filename = argv[optind++];
1995+
1996+ Error *errp = NULL;
1997+ VmaReader *vmar = vma_reader_create(filename, &errp);
1998+
1999+ if (!vmar) {
2000+ g_error("%s", error_get_pretty(errp));
2001+ }
2002+
2003+ print_content(vmar);
2004+
2005+ vma_reader_destroy(vmar);
2006+
2007+ return ret;
2008+}
2009+
2010+typedef struct RestoreMap {
2011+ char *devname;
2012+ char *path;
2013+ bool write_zero;
2014+} RestoreMap;
2015+
2016+static int extract_content(int argc, char **argv)
2017+{
2018+ int c, ret = 0;
4244016d 2019+ int verbose = 0;
5ad5891c
DM
2020+ const char *filename;
2021+ const char *dirname;
3f350e0d 2022+ const char *readmap = NULL;
5ad5891c
DM
2023+
2024+ for (;;) {
4244016d 2025+ c = getopt(argc, argv, "hvr:");
5ad5891c
DM
2026+ if (c == -1) {
2027+ break;
2028+ }
2029+ switch (c) {
2030+ case '?':
2031+ case 'h':
2032+ help();
2033+ break;
2034+ case 'r':
3f350e0d 2035+ readmap = optarg;
5ad5891c 2036+ break;
309874bd 2037+ case 'v':
4244016d
DM
2038+ verbose = 1;
2039+ break;
5ad5891c
DM
2040+ default:
2041+ help();
2042+ }
2043+ }
2044+
2045+ /* Get the filename */
2046+ if ((optind + 2) != argc) {
2047+ help();
2048+ }
2049+ filename = argv[optind++];
2050+ dirname = argv[optind++];
2051+
2052+ Error *errp = NULL;
2053+ VmaReader *vmar = vma_reader_create(filename, &errp);
2054+
2055+ if (!vmar) {
2056+ g_error("%s", error_get_pretty(errp));
2057+ }
2058+
2059+ if (mkdir(dirname, 0777) < 0) {
2060+ g_error("unable to create target directory %s - %s",
2061+ dirname, strerror(errno));
2062+ }
2063+
2064+ GList *l = vma_reader_get_config_data(vmar);
2065+ while (l && l->data) {
2066+ VmaConfigData *cdata = (VmaConfigData *)l->data;
2067+ l = g_list_next(l);
2068+ char *cfgfn = g_strdup_printf("%s/%s", dirname, cdata->name);
2069+ GError *err = NULL;
2070+ if (!g_file_set_contents(cfgfn, (gchar *)cdata->data, cdata->len,
2071+ &err)) {
3f350e0d 2072+ g_error("unable to write file: %s", err->message);
5ad5891c
DM
2073+ }
2074+ }
2075+
2076+ GHashTable *devmap = g_hash_table_new(g_str_hash, g_str_equal);
2077+
2078+ if (readmap) {
2079+ print_content(vmar);
2080+
3f350e0d
DM
2081+ FILE *map = fopen(readmap, "r");
2082+ if (!map) {
2083+ g_error("unable to open fifo %s - %s", readmap, strerror(errno));
2084+ }
2085+
5ad5891c
DM
2086+ while (1) {
2087+ char inbuf[8192];
3f350e0d 2088+ char *line = fgets(inbuf, sizeof(inbuf), map);
5ad5891c
DM
2089+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
2090+ break;
2091+ }
2092+ int len = strlen(line);
2093+ if (line[len - 1] == '\n') {
2094+ line[len - 1] = '\0';
2095+ if (len == 1) {
2096+ break;
2097+ }
2098+ }
2099+
2100+ const char *path;
2101+ bool write_zero;
2102+ if (line[0] == '0' && line[1] == ':') {
2103+ path = inbuf + 2;
2104+ write_zero = false;
2105+ } else if (line[0] == '1' && line[1] == ':') {
2106+ path = inbuf + 2;
2107+ write_zero = true;
3f350e0d 2108+ } else {
5ad5891c
DM
2109+ g_error("read map failed - parse error ('%s')", inbuf);
2110+ }
2111+
2112+ char *devname = NULL;
2113+ path = extract_devname(path, &devname, -1);
2114+ if (!devname) {
2115+ g_error("read map failed - no dev name specified ('%s')",
2116+ inbuf);
2117+ }
2118+
5ad5891c
DM
2119+ RestoreMap *map = g_new0(RestoreMap, 1);
2120+ map->devname = g_strdup(devname);
2121+ map->path = g_strdup(path);
2122+ map->write_zero = write_zero;
2123+
2124+ g_hash_table_insert(devmap, map->devname, map);
2125+
2126+ };
2127+ }
2128+
2129+ int i;
2130+ int vmstate_fd = -1;
2131+ guint8 vmstate_stream = 0;
2132+
2133+ for (i = 1; i < 255; i++) {
2134+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2135+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
2136+ vmstate_stream = i;
2137+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
2138+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
2139+ if (vmstate_fd < 0) {
2140+ g_error("create vmstate file '%s' failed - %s", statefn,
2141+ strerror(errno));
2142+ }
2143+ g_free(statefn);
2144+ } else if (di) {
2145+ char *devfn = NULL;
2146+ int flags = BDRV_O_RDWR|BDRV_O_CACHE_WB;
2147+ bool write_zero = true;
2148+
2149+ if (readmap) {
2150+ RestoreMap *map;
2151+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
2152+ if (map == NULL) {
2153+ g_error("no device name mapping for %s", di->devname);
2154+ }
2155+ devfn = map->path;
2156+ write_zero = map->write_zero;
2157+ } else {
2158+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2159+ dirname, di->devname);
2160+ printf("DEVINFO %s %zd\n", devfn, di->size);
2161+
89af8a77 2162+ bdrv_img_create(devfn, "raw", NULL, NULL, NULL, di->size,
92bf040c
DM
2163+ flags, &errp);
2164+ if (error_is_set(&errp)) {
89af8a77 2165+ g_error("can't create file %s: %s", devfn,
92bf040c 2166+ error_get_pretty(errp));
5ad5891c
DM
2167+ }
2168+
2169+ /* Note: we created an empty file above, so there is no
2170+ * need to write zeroes (so we generate a sparse file)
2171+ */
2172+ write_zero = false;
2173+ }
2174+
9de54cd3
DM
2175+ BlockDriverState *bs = bdrv_new(di->devname);
2176+ if (bdrv_open(bs, devfn, flags, NULL)) {
5ad5891c
DM
2177+ g_error("can't open file %s", devfn);
2178+ }
2179+ if (vma_reader_register_bs(vmar, i, bs, write_zero, &errp) < 0) {
2180+ g_error("%s", error_get_pretty(errp));
2181+ }
2182+
2183+ if (!readmap) {
2184+ g_free(devfn);
2185+ }
2186+ }
2187+ }
2188+
4244016d 2189+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
5ad5891c
DM
2190+ g_error("restore failed - %s", error_get_pretty(errp));
2191+ }
2192+
2193+ if (!readmap) {
2194+ for (i = 1; i < 255; i++) {
2195+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2196+ if (di && (i != vmstate_stream)) {
2197+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2198+ dirname, di->devname);
2199+ char *fn = g_strdup_printf("%s/disk-%s.raw",
2200+ dirname, di->devname);
2201+ if (rename(tmpfn, fn) != 0) {
2202+ g_error("rename %s to %s failed - %s",
2203+ tmpfn, fn, strerror(errno));
2204+ }
2205+ }
2206+ }
2207+ }
2208+
2209+ vma_reader_destroy(vmar);
2210+
2211+ bdrv_close_all();
2212+
2213+ return ret;
2214+}
2215+
2216+typedef struct BackupCB {
2217+ VmaWriter *vmaw;
2218+ uint8_t dev_id;
2219+} BackupCB;
2220+
2221+static int backup_dump_cb(void *opaque, BlockDriverState *bs,
2222+ int64_t cluster_num, unsigned char *buf)
2223+{
2224+ BackupCB *bcb = opaque;
2225+ size_t zb = 0;
2226+ if (vma_writer_write(bcb->vmaw, bcb->dev_id, cluster_num, buf, &zb) < 0) {
2227+ g_warning("backup_dump_cb vma_writer_write failed");
2228+ return -1;
2229+ }
2230+
2231+ return 0;
2232+}
2233+
2234+static void backup_complete_cb(void *opaque, int ret)
2235+{
2236+ BackupCB *bcb = opaque;
2237+
2238+ if (ret < 0) {
2239+ vma_writer_set_error(bcb->vmaw, "backup_complete_cb %d", ret);
2240+ }
2241+
2242+ if (vma_writer_close_stream(bcb->vmaw, bcb->dev_id) <= 0) {
2243+ Error *err = NULL;
2244+ if (vma_writer_close(bcb->vmaw, &err) != 0) {
2245+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2246+ }
2247+ }
2248+}
2249+
2250+static int create_archive(int argc, char **argv)
2251+{
2252+ int i, c, res;
2253+ int verbose = 0;
2254+ const char *archivename;
2255+ GList *config_files = NULL;
2256+
2257+ for (;;) {
2258+ c = getopt(argc, argv, "hvc:");
2259+ if (c == -1) {
2260+ break;
2261+ }
2262+ switch (c) {
2263+ case '?':
2264+ case 'h':
2265+ help();
2266+ break;
2267+ case 'c':
2268+ config_files = g_list_append(config_files, optarg);
2269+ break;
2270+ case 'v':
2271+ verbose = 1;
2272+ break;
2273+ default:
2274+ g_assert_not_reached();
2275+ }
2276+ }
2277+
2278+
2279+ /* make sure we have archive name and at least one path */
2280+ if ((optind + 2) > argc) {
2281+ help();
2282+ }
2283+
2284+ archivename = argv[optind++];
2285+
2286+ uuid_t uuid;
2287+ uuid_generate(uuid);
2288+
2289+ Error *local_err = NULL;
55827521 2290+ VmaWriter *vmaw = vma_writer_create(archivename, uuid, &local_err);
5ad5891c
DM
2291+
2292+ if (vmaw == NULL) {
2293+ g_error("%s", error_get_pretty(local_err));
2294+ }
2295+
2296+ GList *l = config_files;
2297+ while (l && l->data) {
2298+ char *name = l->data;
2299+ char *cdata = NULL;
2300+ gsize clen = 0;
2301+ GError *err = NULL;
2302+ if (!g_file_get_contents(name, &cdata, &clen, &err)) {
2303+ unlink(archivename);
2304+ g_error("Unable to read file: %s", err->message);
2305+ }
2306+
2307+ if (vma_writer_add_config(vmaw, name, cdata, clen) != 0) {
2308+ unlink(archivename);
2309+ g_error("Unable to append config data %s (len = %zd)",
2310+ name, clen);
2311+ }
2312+ l = g_list_next(l);
2313+ }
2314+
2315+ int ind = 0;
2316+ while (optind < argc) {
2317+ const char *path = argv[optind++];
2318+ char *devname = NULL;
2319+ path = extract_devname(path, &devname, ind++);
2320+
2321+ BlockDriver *drv = NULL;
2322+ BlockDriverState *bs = bdrv_new(devname);
2323+
2324+ res = bdrv_open(bs, path, BDRV_O_CACHE_WB , drv);
2325+ if (res < 0) {
2326+ unlink(archivename);
2327+ g_error("bdrv_open '%s' failed", path);
2328+ }
2329+ int64_t size = bdrv_getlength(bs);
2330+ int dev_id = vma_writer_register_stream(vmaw, devname, size);
2331+ if (dev_id <= 0) {
2332+ unlink(archivename);
2333+ g_error("vma_writer_register_stream '%s' failed", devname);
2334+ }
2335+
2336+ BackupCB *bcb = g_new0(BackupCB, 1);
2337+ bcb->vmaw = vmaw;
2338+ bcb->dev_id = dev_id;
2339+
89af8a77
DM
2340+ if (backup_job_create(bs, backup_dump_cb, backup_complete_cb,
2341+ bcb, 0) < 0) {
5ad5891c
DM
2342+ unlink(archivename);
2343+ g_error("backup_job_start failed");
309874bd 2344+ } else {
89af8a77 2345+ backup_job_start(bs, false);
5ad5891c
DM
2346+ }
2347+ }
2348+
2349+ VmaStatus vmastat;
2350+ int percent = 0;
2351+ int last_percent = -1;
2352+
2353+ while (1) {
2354+ main_loop_wait(false);
2355+ vma_writer_get_status(vmaw, &vmastat);
2356+
2357+ if (verbose) {
2358+
2359+ uint64_t total = 0;
2360+ uint64_t transferred = 0;
2361+ uint64_t zero_bytes = 0;
2362+
2363+ int i;
2364+ for (i = 0; i < 256; i++) {
2365+ if (vmastat.stream_info[i].size) {
2366+ total += vmastat.stream_info[i].size;
2367+ transferred += vmastat.stream_info[i].transferred;
2368+ zero_bytes += vmastat.stream_info[i].zero_bytes;
2369+ }
2370+ }
2371+ percent = (transferred*100)/total;
2372+ if (percent != last_percent) {
2373+ printf("progress %d%% %zd/%zd %zd\n", percent,
2374+ transferred, total, zero_bytes);
2375+
2376+ last_percent = percent;
2377+ }
2378+ }
2379+
2380+ if (vmastat.closed) {
2381+ break;
2382+ }
2383+ }
2384+
2385+ bdrv_drain_all();
2386+
2387+ vma_writer_get_status(vmaw, &vmastat);
2388+
2389+ if (verbose) {
2390+ for (i = 0; i < 256; i++) {
2391+ VmaStreamInfo *si = &vmastat.stream_info[i];
2392+ if (si->size) {
2393+ printf("image %s: size=%zd zeros=%zd saved=%zd\n", si->devname,
2394+ si->size, si->zero_bytes, si->size - si->zero_bytes);
2395+ }
2396+ }
2397+ }
2398+
2399+ if (vmastat.status < 0) {
2400+ unlink(archivename);
2401+ g_error("creating vma archive failed");
2402+ }
2403+
2404+ return 0;
2405+}
2406+
2407+int main(int argc, char **argv)
2408+{
2409+ const char *cmdname;
2410+
2411+ error_set_progname(argv[0]);
2412+
2413+ qemu_init_main_loop();
2414+
2415+ bdrv_init();
2416+
2417+ if (argc < 2) {
2418+ help();
2419+ }
2420+
2421+ cmdname = argv[1];
2422+ argc--; argv++;
2423+
2424+
2425+ if (!strcmp(cmdname, "list")) {
2426+ return list_content(argc, argv);
2427+ } else if (!strcmp(cmdname, "create")) {
2428+ return create_archive(argc, argv);
2429+ } else if (!strcmp(cmdname, "extract")) {
2430+ return extract_content(argc, argv);
2431+ }
2432+
2433+ help();
2434+ return 0;
2435+}
2436diff --git a/vma.h b/vma.h
2437new file mode 100644
55827521 2438index 0000000..76d0dc8
5ad5891c
DM
2439--- /dev/null
2440+++ b/vma.h
55827521 2441@@ -0,0 +1,145 @@
5ad5891c
DM
2442+/*
2443+ * VMA: Virtual Machine Archive
2444+ *
2445+ * Copyright (C) Proxmox Server Solutions
2446+ *
2447+ * Authors:
2448+ * Dietmar Maurer (dietmar@proxmox.com)
2449+ *
2450+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
2451+ * See the COPYING file in the top-level directory.
2452+ *
2453+ */
2454+
2455+#ifndef BACKUP_VMA_H
2456+#define BACKUP_VMA_H
2457+
2458+#include "backup.h"
2459+#include "error.h"
2460+
2461+#define VMA_BLOCK_BITS 12
2462+#define VMA_BLOCK_SIZE (1<<VMA_BLOCK_BITS)
2463+#define VMA_CLUSTER_BITS (VMA_BLOCK_BITS+4)
2464+#define VMA_CLUSTER_SIZE (1<<VMA_CLUSTER_BITS)
2465+
2466+#if VMA_CLUSTER_SIZE != 65536
2467+#error unexpected cluster size
2468+#endif
2469+
2470+#define VMA_EXTENT_HEADER_SIZE 512
2471+#define VMA_BLOCKS_PER_EXTENT 59
2472+#define VMA_MAX_CONFIGS 256
2473+
2474+#define VMA_MAX_EXTENT_SIZE \
2475+ (VMA_EXTENT_HEADER_SIZE+VMA_CLUSTER_SIZE*VMA_BLOCKS_PER_EXTENT)
2476+#if VMA_MAX_EXTENT_SIZE != 3867136
2477+#error unexpected VMA_EXTENT_SIZE
2478+#endif
2479+
2480+/* File Format Definitions */
2481+
2482+#define VMA_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|0x00))
2483+#define VMA_EXTENT_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|'E'))
2484+
2485+typedef struct VmaDeviceInfoHeader {
2486+ uint32_t devname_ptr; /* offset into blob_buffer table */
2487+ uint32_t reserved0;
2488+ uint64_t size; /* device size in bytes */
2489+ uint64_t reserved1;
2490+ uint64_t reserved2;
2491+} VmaDeviceInfoHeader;
2492+
2493+typedef struct VmaHeader {
2494+ uint32_t magic;
2495+ uint32_t version;
2496+ unsigned char uuid[16];
2497+ int64_t ctime;
2498+ unsigned char md5sum[16];
2499+
2500+ uint32_t blob_buffer_offset;
2501+ uint32_t blob_buffer_size;
2502+ uint32_t header_size;
2503+
2504+ unsigned char reserved[1984];
2505+
2506+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2507+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2508+
2509+ VmaDeviceInfoHeader dev_info[256];
2510+} VmaHeader;
2511+
2512+typedef struct VmaExtentHeader {
2513+ uint32_t magic;
2514+ uint16_t reserved1;
2515+ uint16_t block_count;
2516+ unsigned char uuid[16];
2517+ unsigned char md5sum[16];
2518+ uint64_t blockinfo[VMA_BLOCKS_PER_EXTENT];
2519+} VmaExtentHeader;
2520+
2521+/* functions/definitions to read/write vma files */
2522+
2523+typedef struct VmaReader VmaReader;
2524+
2525+typedef struct VmaWriter VmaWriter;
2526+
2527+typedef struct VmaConfigData {
2528+ const char *name;
2529+ const void *data;
2530+ uint32_t len;
2531+} VmaConfigData;
2532+
2533+typedef struct VmaStreamInfo {
2534+ uint64_t size;
2535+ uint64_t cluster_count;
2536+ uint64_t transferred;
2537+ uint64_t zero_bytes;
2538+ int finished;
2539+ char *devname;
2540+} VmaStreamInfo;
2541+
2542+typedef struct VmaStatus {
2543+ int status;
2544+ bool closed;
2545+ char errmsg[8192];
2546+ char uuid_str[37];
2547+ VmaStreamInfo stream_info[256];
2548+} VmaStatus;
2549+
2550+typedef struct VmaDeviceInfo {
2551+ uint64_t size; /* device size in bytes */
2552+ const char *devname;
2553+} VmaDeviceInfo;
2554+
2555+extern const BackupDriver backup_vma_driver;
2556+
55827521 2557+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp);
5ad5891c
DM
2558+int vma_writer_close(VmaWriter *vmaw, Error **errp);
2559+void vma_writer_destroy(VmaWriter *vmaw);
2560+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
2561+ size_t len);
2562+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
2563+ size_t size);
2564+
2565+int64_t coroutine_fn vma_writer_write(VmaWriter *vmaw, uint8_t dev_id,
2566+ int64_t cluster_num, unsigned char *buf,
2567+ size_t *zero_bytes);
2568+
2569+int coroutine_fn vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id);
2570+
2571+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
2572+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
2573+
2574+
2575+VmaReader *vma_reader_create(const char *filename, Error **errp);
2576+void vma_reader_destroy(VmaReader *vmar);
2577+VmaHeader *vma_reader_get_header(VmaReader *vmar);
2578+GList *vma_reader_get_config_data(VmaReader *vmar);
2579+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
2580+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
2581+ BlockDriverState *bs, bool write_zeroes,
2582+ Error **errp);
309874bd
DM
2583+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
2584+ Error **errp);
5ad5891c
DM
2585+
2586+#endif /* BACKUP_VMA_H */
2587--
25881.7.2.5
2589