]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0027-PVE-Backup-add-vma-backup-format-code.patch
backup: improve error when copy-before-write fails for fleecing
[pve-qemu.git] / debian / patches / pve / 0027-PVE-Backup-add-vma-backup-format-code.patch
CommitLineData
23102ed6 1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
6402d961 2From: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe
TL
3Date: Mon, 6 Apr 2020 12:16:57 +0200
4Subject: [PATCH] PVE-Backup: add vma backup format code
95259824 5
db5d2a4b
FE
6Notes about partial restoring: skipping a certain drive is done via a
7map line of the form skip=drive-scsi0. Since in PVE, most archives are
8compressed and piped to vma for restore, it's not easily possible to
9skip reads.
10
ddbf7a87 11Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
db5d2a4b
FE
12[FE: improvements during create
13 allow partial restore]
14Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
95259824 15---
817b7667
SR
16 block/meson.build | 2 +
17 meson.build | 5 +
f1eed34a 18 vma-reader.c | 870 ++++++++++++++++++++++++++++++++++++++++++++
76394996 19 vma-writer.c | 818 +++++++++++++++++++++++++++++++++++++++++
f1eed34a 20 vma.c | 901 ++++++++++++++++++++++++++++++++++++++++++++++
817b7667 21 vma.h | 150 ++++++++
f1eed34a 22 6 files changed, 2746 insertions(+)
95259824
WB
23 create mode 100644 vma-reader.c
24 create mode 100644 vma-writer.c
25 create mode 100644 vma.c
26 create mode 100644 vma.h
27
817b7667 28diff --git a/block/meson.build b/block/meson.build
4fbd50e2 29index b530e117b5..b245daa98e 100644
817b7667
SR
30--- a/block/meson.build
31+++ b/block/meson.build
f1eed34a 32@@ -42,6 +42,8 @@ block_ss.add(files(
817b7667 33 'zeroinit.c',
8dca018b 34 ), zstd, zlib, gnutls)
6838f038 35
817b7667
SR
36+block_ss.add(files('../vma-writer.c'), libuuid)
37+
10e10933
FE
38 system_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
39 system_ss.add(files('block-ram-registrar.c'))
95259824 40
817b7667 41diff --git a/meson.build b/meson.build
4fbd50e2 42index 91a0aa64c6..620cc594b2 100644
817b7667
SR
43--- a/meson.build
44+++ b/meson.build
4fbd50e2 45@@ -1922,6 +1922,8 @@ endif
95259824 46
817b7667 47 has_gettid = cc.has_function('gettid')
95259824 48
817b7667
SR
49+libuuid = cc.find_library('uuid', required: true)
50+
4567474e
FE
51 # libselinux
52 selinux = dependency('libselinux',
53 required: get_option('selinux'),
4fbd50e2 54@@ -4023,6 +4025,9 @@ if have_tools
4567474e
FE
55 dependencies: [blockdev, qemuutil, gnutls, selinux],
56 install: true)
95259824 57
c5e8e7c9 58+ vma = executable('vma', files('vma.c', 'vma-reader.c') + genh,
817b7667
SR
59+ dependencies: [authz, block, crypto, io, qom], install: true)
60+
61 subdir('storage-daemon')
4fbd50e2
FE
62
63 foreach exe: [ 'qemu-img', 'qemu-io', 'qemu-nbd', 'qemu-storage-daemon']
95259824
WB
64diff --git a/vma-reader.c b/vma-reader.c
65new file mode 100644
f1eed34a 66index 0000000000..d0b6721812
95259824
WB
67--- /dev/null
68+++ b/vma-reader.c
f1eed34a 69@@ -0,0 +1,870 @@
95259824
WB
70+/*
71+ * VMA: Virtual Machine Archive
72+ *
73+ * Copyright (C) 2012 Proxmox Server Solutions
74+ *
75+ * Authors:
76+ * Dietmar Maurer (dietmar@proxmox.com)
77+ *
78+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
79+ * See the COPYING file in the top-level directory.
80+ *
81+ */
82+
83+#include "qemu/osdep.h"
84+#include <glib.h>
85+#include <uuid/uuid.h>
86+
95259824
WB
87+#include "qemu/timer.h"
88+#include "qemu/ratelimit.h"
89+#include "vma.h"
90+#include "block/block.h"
f1eed34a 91+#include "block/graph-lock.h"
95259824
WB
92+#include "sysemu/block-backend.h"
93+
94+static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
95+
96+typedef struct VmaRestoreState {
67af0fa4 97+ BlockBackend *target;
95259824
WB
98+ bool write_zeroes;
99+ unsigned long *bitmap;
100+ int bitmap_size;
db5d2a4b 101+ bool skip;
95259824
WB
102+} VmaRestoreState;
103+
104+struct VmaReader {
105+ int fd;
106+ GChecksum *md5csum;
107+ GHashTable *blob_hash;
108+ unsigned char *head_data;
109+ VmaDeviceInfo devinfo[256];
110+ VmaRestoreState rstate[256];
111+ GList *cdata_list;
112+ guint8 vmstate_stream;
113+ uint32_t vmstate_clusters;
114+ /* to show restore percentage if run with -v */
115+ time_t start_time;
116+ int64_t cluster_count;
117+ int64_t clusters_read;
67af0fa4
WB
118+ int64_t zero_cluster_data;
119+ int64_t partial_zero_cluster_data;
95259824
WB
120+ int clusters_read_per;
121+};
122+
123+static guint
124+g_int32_hash(gconstpointer v)
125+{
126+ return *(const uint32_t *)v;
127+}
128+
129+static gboolean
130+g_int32_equal(gconstpointer v1, gconstpointer v2)
131+{
132+ return *((const uint32_t *)v1) == *((const uint32_t *)v2);
133+}
134+
135+static int vma_reader_get_bitmap(VmaRestoreState *rstate, int64_t cluster_num)
136+{
137+ assert(rstate);
138+ assert(rstate->bitmap);
139+
140+ unsigned long val, idx, bit;
141+
142+ idx = cluster_num / BITS_PER_LONG;
143+
144+ assert(rstate->bitmap_size > idx);
145+
146+ bit = cluster_num % BITS_PER_LONG;
147+ val = rstate->bitmap[idx];
148+
149+ return !!(val & (1UL << bit));
150+}
151+
152+static void vma_reader_set_bitmap(VmaRestoreState *rstate, int64_t cluster_num,
153+ int dirty)
154+{
155+ assert(rstate);
156+ assert(rstate->bitmap);
157+
158+ unsigned long val, idx, bit;
159+
160+ idx = cluster_num / BITS_PER_LONG;
161+
162+ assert(rstate->bitmap_size > idx);
163+
164+ bit = cluster_num % BITS_PER_LONG;
165+ val = rstate->bitmap[idx];
166+ if (dirty) {
167+ if (!(val & (1UL << bit))) {
168+ val |= 1UL << bit;
169+ }
170+ } else {
171+ if (val & (1UL << bit)) {
172+ val &= ~(1UL << bit);
173+ }
174+ }
175+ rstate->bitmap[idx] = val;
176+}
177+
178+typedef struct VmaBlob {
179+ uint32_t start;
180+ uint32_t len;
181+ void *data;
182+} VmaBlob;
183+
184+static const VmaBlob *get_header_blob(VmaReader *vmar, uint32_t pos)
185+{
186+ assert(vmar);
187+ assert(vmar->blob_hash);
188+
189+ return g_hash_table_lookup(vmar->blob_hash, &pos);
190+}
191+
192+static const char *get_header_str(VmaReader *vmar, uint32_t pos)
193+{
194+ const VmaBlob *blob = get_header_blob(vmar, pos);
195+ if (!blob) {
196+ return NULL;
197+ }
198+ const char *res = (char *)blob->data;
199+ if (res[blob->len-1] != '\0') {
200+ return NULL;
201+ }
202+ return res;
203+}
204+
205+static ssize_t
206+safe_read(int fd, unsigned char *buf, size_t count)
207+{
208+ ssize_t n;
209+
210+ do {
211+ n = read(fd, buf, count);
212+ } while (n < 0 && errno == EINTR);
213+
214+ return n;
215+}
216+
217+static ssize_t
218+full_read(int fd, unsigned char *buf, size_t len)
219+{
220+ ssize_t n;
221+ size_t total;
222+
223+ total = 0;
224+
225+ while (len > 0) {
226+ n = safe_read(fd, buf, len);
227+
228+ if (n == 0) {
229+ return total;
230+ }
231+
232+ if (n <= 0) {
233+ break;
234+ }
235+
236+ buf += n;
237+ total += n;
238+ len -= n;
239+ }
240+
241+ if (len) {
242+ return -1;
243+ }
244+
245+ return total;
246+}
247+
248+void vma_reader_destroy(VmaReader *vmar)
249+{
250+ assert(vmar);
251+
252+ if (vmar->fd >= 0) {
253+ close(vmar->fd);
254+ }
255+
256+ if (vmar->cdata_list) {
257+ g_list_free(vmar->cdata_list);
258+ }
259+
260+ int i;
261+ for (i = 1; i < 256; i++) {
262+ if (vmar->rstate[i].bitmap) {
263+ g_free(vmar->rstate[i].bitmap);
264+ }
2653a5f0
TL
265+ if (vmar->rstate[i].target) {
266+ blk_unref(vmar->rstate[i].target);
267+ }
95259824
WB
268+ }
269+
270+ if (vmar->md5csum) {
271+ g_checksum_free(vmar->md5csum);
272+ }
273+
274+ if (vmar->blob_hash) {
275+ g_hash_table_destroy(vmar->blob_hash);
276+ }
277+
278+ if (vmar->head_data) {
279+ g_free(vmar->head_data);
280+ }
281+
282+ g_free(vmar);
283+
284+};
285+
286+static int vma_reader_read_head(VmaReader *vmar, Error **errp)
287+{
288+ assert(vmar);
289+ assert(errp);
290+ assert(*errp == NULL);
291+
292+ unsigned char md5sum[16];
293+ int i;
294+ int ret = 0;
295+
296+ vmar->head_data = g_malloc(sizeof(VmaHeader));
297+
298+ if (full_read(vmar->fd, vmar->head_data, sizeof(VmaHeader)) !=
299+ sizeof(VmaHeader)) {
300+ error_setg(errp, "can't read vma header - %s",
301+ errno ? g_strerror(errno) : "got EOF");
302+ return -1;
303+ }
304+
305+ VmaHeader *h = (VmaHeader *)vmar->head_data;
306+
307+ if (h->magic != VMA_MAGIC) {
308+ error_setg(errp, "not a vma file - wrong magic number");
309+ return -1;
310+ }
311+
312+ uint32_t header_size = GUINT32_FROM_BE(h->header_size);
313+ int need = header_size - sizeof(VmaHeader);
314+ if (need <= 0) {
315+ error_setg(errp, "wrong vma header size %d", header_size);
316+ return -1;
317+ }
318+
319+ vmar->head_data = g_realloc(vmar->head_data, header_size);
320+ h = (VmaHeader *)vmar->head_data;
321+
322+ if (full_read(vmar->fd, vmar->head_data + sizeof(VmaHeader), need) !=
323+ need) {
324+ error_setg(errp, "can't read vma header data - %s",
325+ errno ? g_strerror(errno) : "got EOF");
326+ return -1;
327+ }
328+
329+ memcpy(md5sum, h->md5sum, 16);
330+ memset(h->md5sum, 0, 16);
331+
332+ g_checksum_reset(vmar->md5csum);
333+ g_checksum_update(vmar->md5csum, vmar->head_data, header_size);
334+ gsize csize = 16;
335+ g_checksum_get_digest(vmar->md5csum, (guint8 *)(h->md5sum), &csize);
336+
337+ if (memcmp(md5sum, h->md5sum, 16) != 0) {
338+ error_setg(errp, "wrong vma header chechsum");
339+ return -1;
340+ }
341+
342+ /* we can modify header data after checksum verify */
343+ h->header_size = header_size;
344+
345+ h->version = GUINT32_FROM_BE(h->version);
346+ if (h->version != 1) {
347+ error_setg(errp, "wrong vma version %d", h->version);
348+ return -1;
349+ }
350+
351+ h->ctime = GUINT64_FROM_BE(h->ctime);
352+ h->blob_buffer_offset = GUINT32_FROM_BE(h->blob_buffer_offset);
353+ h->blob_buffer_size = GUINT32_FROM_BE(h->blob_buffer_size);
354+
355+ uint32_t bstart = h->blob_buffer_offset + 1;
356+ uint32_t bend = h->blob_buffer_offset + h->blob_buffer_size;
357+
358+ if (bstart <= sizeof(VmaHeader)) {
359+ error_setg(errp, "wrong vma blob buffer offset %d",
360+ h->blob_buffer_offset);
361+ return -1;
362+ }
363+
364+ if (bend > header_size) {
365+ error_setg(errp, "wrong vma blob buffer size %d/%d",
366+ h->blob_buffer_offset, h->blob_buffer_size);
367+ return -1;
368+ }
369+
370+ while ((bstart + 2) <= bend) {
371+ uint32_t size = vmar->head_data[bstart] +
372+ (vmar->head_data[bstart+1] << 8);
373+ if ((bstart + size + 2) <= bend) {
374+ VmaBlob *blob = g_new0(VmaBlob, 1);
375+ blob->start = bstart - h->blob_buffer_offset;
376+ blob->len = size;
377+ blob->data = vmar->head_data + bstart + 2;
378+ g_hash_table_insert(vmar->blob_hash, &blob->start, blob);
379+ }
380+ bstart += size + 2;
381+ }
382+
383+
384+ int count = 0;
385+ for (i = 1; i < 256; i++) {
386+ VmaDeviceInfoHeader *dih = &h->dev_info[i];
387+ uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
388+ uint64_t size = GUINT64_FROM_BE(dih->size);
389+ const char *devname = get_header_str(vmar, devname_ptr);
390+
391+ if (size && devname) {
392+ count++;
393+ vmar->devinfo[i].size = size;
394+ vmar->devinfo[i].devname = devname;
395+
396+ if (strcmp(devname, "vmstate") == 0) {
397+ vmar->vmstate_stream = i;
398+ }
399+ }
400+ }
401+
95259824
WB
402+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
403+ uint32_t name_ptr = GUINT32_FROM_BE(h->config_names[i]);
404+ uint32_t data_ptr = GUINT32_FROM_BE(h->config_data[i]);
405+
406+ if (!(name_ptr && data_ptr)) {
407+ continue;
408+ }
409+ const char *name = get_header_str(vmar, name_ptr);
410+ const VmaBlob *blob = get_header_blob(vmar, data_ptr);
411+
412+ if (!(name && blob)) {
413+ error_setg(errp, "vma contains invalid data pointers");
414+ return -1;
415+ }
416+
417+ VmaConfigData *cdata = g_new0(VmaConfigData, 1);
418+ cdata->name = name;
419+ cdata->data = blob->data;
420+ cdata->len = blob->len;
421+
422+ vmar->cdata_list = g_list_append(vmar->cdata_list, cdata);
423+ }
424+
425+ return ret;
426+};
427+
428+VmaReader *vma_reader_create(const char *filename, Error **errp)
429+{
430+ assert(filename);
431+ assert(errp);
432+
433+ VmaReader *vmar = g_new0(VmaReader, 1);
434+
435+ if (strcmp(filename, "-") == 0) {
436+ vmar->fd = dup(0);
437+ } else {
438+ vmar->fd = open(filename, O_RDONLY);
439+ }
440+
441+ if (vmar->fd < 0) {
442+ error_setg(errp, "can't open file %s - %s\n", filename,
443+ g_strerror(errno));
444+ goto err;
445+ }
446+
447+ vmar->md5csum = g_checksum_new(G_CHECKSUM_MD5);
448+ if (!vmar->md5csum) {
449+ error_setg(errp, "can't allocate cmsum\n");
450+ goto err;
451+ }
452+
453+ vmar->blob_hash = g_hash_table_new_full(g_int32_hash, g_int32_equal,
454+ NULL, g_free);
455+
456+ if (vma_reader_read_head(vmar, errp) < 0) {
457+ goto err;
458+ }
459+
460+ return vmar;
461+
462+err:
463+ if (vmar) {
464+ vma_reader_destroy(vmar);
465+ }
466+
467+ return NULL;
468+}
469+
470+VmaHeader *vma_reader_get_header(VmaReader *vmar)
471+{
472+ assert(vmar);
473+ assert(vmar->head_data);
474+
475+ return (VmaHeader *)(vmar->head_data);
476+}
477+
478+GList *vma_reader_get_config_data(VmaReader *vmar)
479+{
480+ assert(vmar);
481+ assert(vmar->head_data);
482+
483+ return vmar->cdata_list;
484+}
485+
486+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
487+{
488+ assert(vmar);
489+ assert(dev_id);
490+
491+ if (vmar->devinfo[dev_id].size && vmar->devinfo[dev_id].devname) {
492+ return &vmar->devinfo[dev_id];
493+ }
494+
495+ return NULL;
496+}
497+
67af0fa4 498+static void allocate_rstate(VmaReader *vmar, guint8 dev_id,
db5d2a4b 499+ BlockBackend *target, bool write_zeroes, bool skip)
67af0fa4
WB
500+{
501+ assert(vmar);
502+ assert(dev_id);
503+
504+ vmar->rstate[dev_id].target = target;
505+ vmar->rstate[dev_id].write_zeroes = write_zeroes;
db5d2a4b 506+ vmar->rstate[dev_id].skip = skip;
67af0fa4
WB
507+
508+ int64_t size = vmar->devinfo[dev_id].size;
509+
510+ int64_t bitmap_size = (size/BDRV_SECTOR_SIZE) +
511+ (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG - 1;
512+ bitmap_size /= (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG;
513+
514+ vmar->rstate[dev_id].bitmap_size = bitmap_size;
515+ vmar->rstate[dev_id].bitmap = g_new0(unsigned long, bitmap_size);
516+
517+ vmar->cluster_count += size/VMA_CLUSTER_SIZE;
518+}
519+
520+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockBackend *target,
db5d2a4b 521+ bool write_zeroes, bool skip, Error **errp)
95259824
WB
522+{
523+ assert(vmar);
db5d2a4b 524+ assert(target != NULL || skip);
95259824 525+ assert(dev_id);
db5d2a4b 526+ assert(vmar->rstate[dev_id].target == NULL && !vmar->rstate[dev_id].skip);
95259824 527+
db5d2a4b
FE
528+ if (target != NULL) {
529+ int64_t size = blk_getlength(target);
530+ int64_t size_diff = size - vmar->devinfo[dev_id].size;
531+
532+ /* storage types can have different size restrictions, so it
533+ * is not always possible to create an image with exact size.
534+ * So we tolerate a size difference up to 4MB.
535+ */
536+ if ((size_diff < 0) || (size_diff > 4*1024*1024)) {
537+ error_setg(errp, "vma_reader_register_bs for stream %s failed - "
538+ "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
539+ size, vmar->devinfo[dev_id].size);
540+ return -1;
541+ }
95259824
WB
542+ }
543+
db5d2a4b 544+ allocate_rstate(vmar, dev_id, target, write_zeroes, skip);
95259824
WB
545+
546+ return 0;
547+}
548+
549+static ssize_t safe_write(int fd, void *buf, size_t count)
550+{
551+ ssize_t n;
552+
553+ do {
554+ n = write(fd, buf, count);
555+ } while (n < 0 && errno == EINTR);
556+
557+ return n;
558+}
559+
560+static size_t full_write(int fd, void *buf, size_t len)
561+{
562+ ssize_t n;
563+ size_t total;
564+
565+ total = 0;
566+
567+ while (len > 0) {
568+ n = safe_write(fd, buf, len);
569+ if (n < 0) {
570+ return n;
571+ }
572+ buf += n;
573+ total += n;
574+ len -= n;
575+ }
576+
577+ if (len) {
578+ /* incomplete write ? */
579+ return -1;
580+ }
581+
582+ return total;
583+}
584+
585+static int restore_write_data(VmaReader *vmar, guint8 dev_id,
67af0fa4 586+ BlockBackend *target, int vmstate_fd,
95259824
WB
587+ unsigned char *buf, int64_t sector_num,
588+ int nb_sectors, Error **errp)
589+{
590+ assert(vmar);
591+
592+ if (dev_id == vmar->vmstate_stream) {
593+ if (vmstate_fd >= 0) {
594+ int len = nb_sectors * BDRV_SECTOR_SIZE;
595+ int res = full_write(vmstate_fd, buf, len);
596+ if (res < 0) {
597+ error_setg(errp, "write vmstate failed %d", res);
598+ return -1;
599+ }
600+ }
601+ } else {
5b15e2ec 602+ int res = blk_pwrite(target, sector_num * BDRV_SECTOR_SIZE, nb_sectors * BDRV_SECTOR_SIZE, buf, 0);
95259824 603+ if (res < 0) {
f1eed34a 604+ bdrv_graph_rdlock_main_loop();
67af0fa4
WB
605+ error_setg(errp, "blk_pwrite to %s failed (%d)",
606+ bdrv_get_device_name(blk_bs(target)), res);
f1eed34a 607+ bdrv_graph_rdunlock_main_loop();
95259824
WB
608+ return -1;
609+ }
610+ }
611+ return 0;
612+}
67af0fa4 613+
95259824
WB
614+static int restore_extent(VmaReader *vmar, unsigned char *buf,
615+ int extent_size, int vmstate_fd,
67af0fa4 616+ bool verbose, bool verify, Error **errp)
95259824
WB
617+{
618+ assert(vmar);
619+ assert(buf);
620+
621+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
622+ int start = VMA_EXTENT_HEADER_SIZE;
623+ int i;
624+
625+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
626+ uint64_t block_info = GUINT64_FROM_BE(ehead->blockinfo[i]);
627+ uint64_t cluster_num = block_info & 0xffffffff;
628+ uint8_t dev_id = (block_info >> 32) & 0xff;
629+ uint16_t mask = block_info >> (32+16);
630+ int64_t max_sector;
631+
632+ if (!dev_id) {
633+ continue;
634+ }
635+
636+ VmaRestoreState *rstate = &vmar->rstate[dev_id];
67af0fa4 637+ BlockBackend *target = NULL;
95259824 638+
db5d2a4b
FE
639+ bool skip = rstate->skip;
640+
95259824 641+ if (dev_id != vmar->vmstate_stream) {
67af0fa4 642+ target = rstate->target;
db5d2a4b 643+ if (!verify && !target && !skip) {
95259824
WB
644+ error_setg(errp, "got wrong dev id %d", dev_id);
645+ return -1;
646+ }
647+
db5d2a4b
FE
648+ if (!skip) {
649+ if (vma_reader_get_bitmap(rstate, cluster_num)) {
650+ error_setg(errp, "found duplicated cluster %zd for stream %s",
651+ cluster_num, vmar->devinfo[dev_id].devname);
652+ return -1;
653+ }
654+ vma_reader_set_bitmap(rstate, cluster_num, 1);
95259824 655+ }
95259824
WB
656+
657+ max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
658+ } else {
659+ max_sector = G_MAXINT64;
660+ if (cluster_num != vmar->vmstate_clusters) {
661+ error_setg(errp, "found out of order vmstate data");
662+ return -1;
663+ }
664+ vmar->vmstate_clusters++;
665+ }
666+
667+ vmar->clusters_read++;
668+
669+ if (verbose) {
670+ time_t duration = time(NULL) - vmar->start_time;
671+ int percent = (vmar->clusters_read*100)/vmar->cluster_count;
672+ if (percent != vmar->clusters_read_per) {
673+ printf("progress %d%% (read %zd bytes, duration %zd sec)\n",
674+ percent, vmar->clusters_read*VMA_CLUSTER_SIZE,
675+ duration);
676+ fflush(stdout);
677+ vmar->clusters_read_per = percent;
678+ }
679+ }
680+
681+ /* try to write whole clusters to speedup restore */
682+ if (mask == 0xffff) {
683+ if ((start + VMA_CLUSTER_SIZE) > extent_size) {
684+ error_setg(errp, "short vma extent - too many blocks");
685+ return -1;
686+ }
687+ int64_t sector_num = (cluster_num * VMA_CLUSTER_SIZE) /
688+ BDRV_SECTOR_SIZE;
689+ int64_t end_sector = sector_num +
690+ VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE;
691+
692+ if (end_sector > max_sector) {
693+ end_sector = max_sector;
694+ }
695+
696+ if (end_sector <= sector_num) {
9b05d1d4 697+ error_setg(errp, "got wrong block address - write beyond end");
95259824
WB
698+ return -1;
699+ }
700+
db5d2a4b 701+ if (!verify && !skip) {
67af0fa4
WB
702+ int nb_sectors = end_sector - sector_num;
703+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
704+ buf + start, sector_num, nb_sectors,
705+ errp) < 0) {
706+ return -1;
707+ }
95259824
WB
708+ }
709+
710+ start += VMA_CLUSTER_SIZE;
711+ } else {
712+ int j;
713+ int bit = 1;
714+
715+ for (j = 0; j < 16; j++) {
716+ int64_t sector_num = (cluster_num*VMA_CLUSTER_SIZE +
717+ j*VMA_BLOCK_SIZE)/BDRV_SECTOR_SIZE;
718+
719+ int64_t end_sector = sector_num +
720+ VMA_BLOCK_SIZE/BDRV_SECTOR_SIZE;
721+ if (end_sector > max_sector) {
722+ end_sector = max_sector;
723+ }
724+
725+ if (mask & bit) {
726+ if ((start + VMA_BLOCK_SIZE) > extent_size) {
727+ error_setg(errp, "short vma extent - too many blocks");
728+ return -1;
729+ }
730+
731+ if (end_sector <= sector_num) {
732+ error_setg(errp, "got wrong block address - "
9b05d1d4 733+ "write beyond end");
95259824
WB
734+ return -1;
735+ }
736+
db5d2a4b 737+ if (!verify && !skip) {
67af0fa4
WB
738+ int nb_sectors = end_sector - sector_num;
739+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
740+ buf + start, sector_num,
741+ nb_sectors, errp) < 0) {
742+ return -1;
743+ }
95259824
WB
744+ }
745+
746+ start += VMA_BLOCK_SIZE;
747+
748+ } else {
749+
67af0fa4
WB
750+
751+ if (end_sector > sector_num) {
95259824
WB
752+ /* Todo: use bdrv_co_write_zeroes (but that need to
753+ * be run inside coroutine?)
754+ */
755+ int nb_sectors = end_sector - sector_num;
67af0fa4
WB
756+ int zero_size = BDRV_SECTOR_SIZE*nb_sectors;
757+ vmar->zero_cluster_data += zero_size;
758+ if (mask != 0) {
759+ vmar->partial_zero_cluster_data += zero_size;
760+ }
761+
db5d2a4b 762+ if (rstate->write_zeroes && !verify && !skip) {
67af0fa4
WB
763+ if (restore_write_data(vmar, dev_id, target, vmstate_fd,
764+ zero_vma_block, sector_num,
765+ nb_sectors, errp) < 0) {
766+ return -1;
767+ }
95259824
WB
768+ }
769+ }
770+ }
771+
772+ bit = bit << 1;
773+ }
774+ }
775+ }
776+
777+ if (start != extent_size) {
778+ error_setg(errp, "vma extent error - missing blocks");
779+ return -1;
780+ }
781+
782+ return 0;
783+}
784+
67af0fa4
WB
785+static int vma_reader_restore_full(VmaReader *vmar, int vmstate_fd,
786+ bool verbose, bool verify,
787+ Error **errp)
95259824
WB
788+{
789+ assert(vmar);
790+ assert(vmar->head_data);
791+
792+ int ret = 0;
793+ unsigned char buf[VMA_MAX_EXTENT_SIZE];
794+ int buf_pos = 0;
795+ unsigned char md5sum[16];
796+ VmaHeader *h = (VmaHeader *)vmar->head_data;
797+
798+ vmar->start_time = time(NULL);
799+
800+ while (1) {
801+ int bytes = full_read(vmar->fd, buf + buf_pos, sizeof(buf) - buf_pos);
802+ if (bytes < 0) {
803+ error_setg(errp, "read failed - %s", g_strerror(errno));
804+ return -1;
805+ }
806+
807+ buf_pos += bytes;
808+
809+ if (!buf_pos) {
810+ break; /* EOF */
811+ }
812+
813+ if (buf_pos < VMA_EXTENT_HEADER_SIZE) {
814+ error_setg(errp, "read short extent (%d bytes)", buf_pos);
815+ return -1;
816+ }
817+
818+ VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
819+
820+ /* extract md5sum */
821+ memcpy(md5sum, ehead->md5sum, sizeof(ehead->md5sum));
822+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
823+
824+ g_checksum_reset(vmar->md5csum);
825+ g_checksum_update(vmar->md5csum, buf, VMA_EXTENT_HEADER_SIZE);
826+ gsize csize = 16;
827+ g_checksum_get_digest(vmar->md5csum, ehead->md5sum, &csize);
828+
829+ if (memcmp(md5sum, ehead->md5sum, 16) != 0) {
830+ error_setg(errp, "wrong vma extent header chechsum");
831+ return -1;
832+ }
833+
834+ if (memcmp(h->uuid, ehead->uuid, sizeof(ehead->uuid)) != 0) {
835+ error_setg(errp, "wrong vma extent uuid");
836+ return -1;
837+ }
838+
839+ if (ehead->magic != VMA_EXTENT_MAGIC || ehead->reserved1 != 0) {
840+ error_setg(errp, "wrong vma extent header magic");
841+ return -1;
842+ }
843+
844+ int block_count = GUINT16_FROM_BE(ehead->block_count);
845+ int extent_size = VMA_EXTENT_HEADER_SIZE + block_count*VMA_BLOCK_SIZE;
846+
847+ if (buf_pos < extent_size) {
848+ error_setg(errp, "short vma extent (%d < %d)", buf_pos,
849+ extent_size);
850+ return -1;
851+ }
852+
853+ if (restore_extent(vmar, buf, extent_size, vmstate_fd, verbose,
67af0fa4 854+ verify, errp) < 0) {
95259824
WB
855+ return -1;
856+ }
857+
858+ if (buf_pos > extent_size) {
859+ memmove(buf, buf + extent_size, buf_pos - extent_size);
860+ buf_pos = buf_pos - extent_size;
861+ } else {
862+ buf_pos = 0;
863+ }
864+ }
865+
866+ bdrv_drain_all();
867+
868+ int i;
869+ for (i = 1; i < 256; i++) {
870+ VmaRestoreState *rstate = &vmar->rstate[i];
67af0fa4 871+ if (!rstate->target) {
95259824
WB
872+ continue;
873+ }
874+
67af0fa4
WB
875+ if (blk_flush(rstate->target) < 0) {
876+ error_setg(errp, "vma blk_flush %s failed",
95259824
WB
877+ vmar->devinfo[i].devname);
878+ return -1;
879+ }
880+
881+ if (vmar->devinfo[i].size &&
882+ (strcmp(vmar->devinfo[i].devname, "vmstate") != 0)) {
883+ assert(rstate->bitmap);
884+
885+ int64_t cluster_num, end;
886+
887+ end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
888+ VMA_CLUSTER_SIZE;
889+
890+ for (cluster_num = 0; cluster_num < end; cluster_num++) {
891+ if (!vma_reader_get_bitmap(rstate, cluster_num)) {
892+ error_setg(errp, "detected missing cluster %zd "
893+ "for stream %s", cluster_num,
894+ vmar->devinfo[i].devname);
895+ return -1;
896+ }
897+ }
898+ }
899+ }
900+
67af0fa4
WB
901+ if (verbose) {
902+ if (vmar->clusters_read) {
903+ printf("total bytes read %zd, sparse bytes %zd (%.3g%%)\n",
904+ vmar->clusters_read*VMA_CLUSTER_SIZE,
905+ vmar->zero_cluster_data,
906+ (double)(100.0*vmar->zero_cluster_data)/
907+ (vmar->clusters_read*VMA_CLUSTER_SIZE));
908+
909+ int64_t datasize = vmar->clusters_read*VMA_CLUSTER_SIZE-vmar->zero_cluster_data;
910+ if (datasize) { // this does not make sense for empty files
911+ printf("space reduction due to 4K zero blocks %.3g%%\n",
912+ (double)(100.0*vmar->partial_zero_cluster_data) / datasize);
913+ }
914+ } else {
915+ printf("vma archive contains no image data\n");
916+ }
917+ }
95259824
WB
918+ return ret;
919+}
920+
67af0fa4
WB
921+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
922+ Error **errp)
923+{
924+ return vma_reader_restore_full(vmar, vmstate_fd, verbose, false, errp);
925+}
926+
927+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp)
928+{
929+ guint8 dev_id;
930+
931+ for (dev_id = 1; dev_id < 255; dev_id++) {
932+ if (vma_reader_get_device_info(vmar, dev_id)) {
db5d2a4b 933+ allocate_rstate(vmar, dev_id, NULL, false, false);
67af0fa4
WB
934+ }
935+ }
936+
937+ return vma_reader_restore_full(vmar, -1, verbose, true, errp);
938+}
939+
95259824
WB
940diff --git a/vma-writer.c b/vma-writer.c
941new file mode 100644
76394996 942index 0000000000..126b296647
95259824
WB
943--- /dev/null
944+++ b/vma-writer.c
76394996 945@@ -0,0 +1,818 @@
95259824
WB
946+/*
947+ * VMA: Virtual Machine Archive
948+ *
949+ * Copyright (C) 2012 Proxmox Server Solutions
950+ *
951+ * Authors:
952+ * Dietmar Maurer (dietmar@proxmox.com)
953+ *
954+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
955+ * See the COPYING file in the top-level directory.
956+ *
957+ */
958+
959+#include "qemu/osdep.h"
960+#include <glib.h>
76394996
FE
961+#include <linux/magic.h>
962+#include <sys/vfs.h>
95259824
WB
963+#include <uuid/uuid.h>
964+
965+#include "vma.h"
966+#include "block/block.h"
967+#include "monitor/monitor.h"
968+#include "qemu/main-loop.h"
969+#include "qemu/coroutine.h"
970+#include "qemu/cutils.h"
76394996 971+#include "qemu/error-report.h"
dc9827a6 972+#include "qemu/memalign.h"
95259824
WB
973+
974+#define DEBUG_VMA 0
975+
976+#define DPRINTF(fmt, ...)\
977+ do { if (DEBUG_VMA) { printf("vma: " fmt, ## __VA_ARGS__); } } while (0)
978+
979+#define WRITE_BUFFERS 5
67af0fa4
WB
980+#define HEADER_CLUSTERS 8
981+#define HEADERBUF_SIZE (VMA_CLUSTER_SIZE*HEADER_CLUSTERS)
95259824
WB
982+
983+struct VmaWriter {
984+ int fd;
985+ FILE *cmd;
986+ int status;
987+ char errmsg[8192];
988+ uuid_t uuid;
989+ bool header_written;
990+ bool closed;
991+
992+ /* we always write extents */
67af0fa4 993+ unsigned char *outbuf;
95259824
WB
994+ int outbuf_pos; /* in bytes */
995+ int outbuf_count; /* in VMA_BLOCKS */
996+ uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT];
997+
67af0fa4 998+ unsigned char *headerbuf;
95259824
WB
999+
1000+ GChecksum *md5csum;
95259824
WB
1001+ CoMutex flush_lock;
1002+ Coroutine *co_writer;
1003+
1004+ /* drive informations */
1005+ VmaStreamInfo stream_info[256];
1006+ guint stream_count;
1007+
1008+ guint8 vmstate_stream;
1009+ uint32_t vmstate_clusters;
1010+
1011+ /* header blob table */
1012+ char *header_blob_table;
1013+ uint32_t header_blob_table_size;
1014+ uint32_t header_blob_table_pos;
1015+
1016+ /* store for config blobs */
1017+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1018+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1019+ uint32_t config_count;
1020+};
1021+
1022+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...)
1023+{
1024+ va_list ap;
1025+
1026+ if (vmaw->status < 0) {
1027+ return;
1028+ }
1029+
1030+ vmaw->status = -1;
1031+
1032+ va_start(ap, fmt);
1033+ g_vsnprintf(vmaw->errmsg, sizeof(vmaw->errmsg), fmt, ap);
1034+ va_end(ap);
1035+
1036+ DPRINTF("vma_writer_set_error: %s\n", vmaw->errmsg);
1037+}
1038+
1039+static uint32_t allocate_header_blob(VmaWriter *vmaw, const char *data,
1040+ size_t len)
1041+{
1042+ if (len > 65535) {
1043+ return 0;
1044+ }
1045+
1046+ if (!vmaw->header_blob_table ||
1047+ (vmaw->header_blob_table_size <
1048+ (vmaw->header_blob_table_pos + len + 2))) {
1049+ int newsize = vmaw->header_blob_table_size + ((len + 2 + 511)/512)*512;
1050+
1051+ vmaw->header_blob_table = g_realloc(vmaw->header_blob_table, newsize);
1052+ memset(vmaw->header_blob_table + vmaw->header_blob_table_size,
1053+ 0, newsize - vmaw->header_blob_table_size);
1054+ vmaw->header_blob_table_size = newsize;
1055+ }
1056+
1057+ uint32_t cpos = vmaw->header_blob_table_pos;
1058+ vmaw->header_blob_table[cpos] = len & 255;
1059+ vmaw->header_blob_table[cpos+1] = (len >> 8) & 255;
1060+ memcpy(vmaw->header_blob_table + cpos + 2, data, len);
1061+ vmaw->header_blob_table_pos += len + 2;
1062+ return cpos;
1063+}
1064+
1065+static uint32_t allocate_header_string(VmaWriter *vmaw, const char *str)
1066+{
1067+ assert(vmaw);
1068+
1069+ size_t len = strlen(str) + 1;
1070+
1071+ return allocate_header_blob(vmaw, str, len);
1072+}
1073+
1074+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
1075+ gsize len)
1076+{
1077+ assert(vmaw);
1078+ assert(!vmaw->header_written);
1079+ assert(vmaw->config_count < VMA_MAX_CONFIGS);
1080+ assert(name);
1081+ assert(data);
95259824
WB
1082+
1083+ gchar *basename = g_path_get_basename(name);
1084+ uint32_t name_ptr = allocate_header_string(vmaw, basename);
1085+ g_free(basename);
1086+
1087+ if (!name_ptr) {
1088+ return -1;
1089+ }
1090+
1091+ uint32_t data_ptr = allocate_header_blob(vmaw, data, len);
1092+ if (!data_ptr) {
1093+ return -1;
1094+ }
1095+
1096+ vmaw->config_names[vmaw->config_count] = name_ptr;
1097+ vmaw->config_data[vmaw->config_count] = data_ptr;
1098+
1099+ vmaw->config_count++;
1100+
1101+ return 0;
1102+}
1103+
1104+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
1105+ size_t size)
1106+{
1107+ assert(vmaw);
1108+ assert(devname);
1109+ assert(!vmaw->status);
1110+
1111+ if (vmaw->header_written) {
1112+ vma_writer_set_error(vmaw, "vma_writer_register_stream: header "
1113+ "already written");
1114+ return -1;
1115+ }
1116+
1117+ guint n = vmaw->stream_count + 1;
1118+
1119+ /* we can have dev_ids form 1 to 255 (0 reserved)
1120+ * 255(-1) reseverd for safety
1121+ */
1122+ if (n > 254) {
1123+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1124+ "too many drives");
1125+ return -1;
1126+ }
1127+
1128+ if (size <= 0) {
1129+ vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1130+ "got strange size %zd", size);
1131+ return -1;
1132+ }
1133+
1134+ DPRINTF("vma_writer_register_stream %s %zu %d\n", devname, size, n);
1135+
1136+ vmaw->stream_info[n].devname = g_strdup(devname);
1137+ vmaw->stream_info[n].size = size;
1138+
1139+ vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
1140+ VMA_CLUSTER_SIZE;
1141+
1142+ vmaw->stream_count = n;
1143+
1144+ if (strcmp(devname, "vmstate") == 0) {
1145+ vmaw->vmstate_stream = n;
1146+ }
1147+
1148+ return n;
1149+}
1150+
6402d961 1151+static void coroutine_fn yield_until_fd_writable(int fd)
95259824 1152+{
6402d961
TL
1153+ assert(qemu_in_coroutine());
1154+ AioContext *ctx = qemu_get_current_aio_context();
10e10933
FE
1155+ aio_set_fd_handler(ctx, fd, NULL, (IOHandler *)qemu_coroutine_enter, NULL,
1156+ NULL, qemu_coroutine_self());
6402d961 1157+ qemu_coroutine_yield();
10e10933 1158+ aio_set_fd_handler(ctx, fd, NULL, NULL, NULL, NULL, NULL);
95259824
WB
1159+}
1160+
1161+static ssize_t coroutine_fn
67af0fa4 1162+vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes)
95259824 1163+{
67af0fa4 1164+ DPRINTF("vma_queue_write enter %zd\n", bytes);
95259824 1165+
67af0fa4
WB
1166+ assert(vmaw);
1167+ assert(buf);
1168+ assert(bytes <= VMA_MAX_EXTENT_SIZE);
95259824 1169+
67af0fa4
WB
1170+ size_t done = 0;
1171+ ssize_t ret;
95259824
WB
1172+
1173+ assert(vmaw->co_writer == NULL);
1174+
1175+ vmaw->co_writer = qemu_coroutine_self();
1176+
95259824 1177+ while (done < bytes) {
67af0fa4
WB
1178+ if (vmaw->status < 0) {
1179+ DPRINTF("vma_queue_write detected canceled backup\n");
1180+ done = -1;
1181+ break;
1182+ }
6402d961 1183+ yield_until_fd_writable(vmaw->fd);
95259824
WB
1184+ ret = write(vmaw->fd, buf + done, bytes - done);
1185+ if (ret > 0) {
1186+ done += ret;
67af0fa4 1187+ DPRINTF("vma_queue_write written %zd %zd\n", done, ret);
95259824
WB
1188+ } else if (ret < 0) {
1189+ if (errno == EAGAIN || errno == EWOULDBLOCK) {
67af0fa4
WB
1190+ /* try again */
1191+ } else {
1192+ vma_writer_set_error(vmaw, "vma_queue_write: write error - %s",
95259824
WB
1193+ g_strerror(errno));
1194+ done = -1; /* always return failure for partial writes */
1195+ break;
1196+ }
1197+ } else if (ret == 0) {
1198+ /* should not happen - simply try again */
1199+ }
1200+ }
1201+
95259824
WB
1202+ vmaw->co_writer = NULL;
1203+
67af0fa4 1204+ return (done == bytes) ? bytes : -1;
95259824
WB
1205+}
1206+
76394996
FE
1207+static bool is_path_tmpfs(const char *path) {
1208+ struct statfs fs;
1209+ int ret;
1210+
1211+ do {
1212+ ret = statfs(path, &fs);
1213+ } while (ret != 0 && errno == EINTR);
1214+
1215+ if (ret != 0) {
1216+ warn_report("statfs call for %s failed, assuming not tmpfs - %s\n",
1217+ path, strerror(errno));
1218+ return false;
1219+ }
1220+
1221+ return fs.f_type == TMPFS_MAGIC;
1222+}
1223+
95259824
WB
1224+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
1225+{
1226+ const char *p;
1227+
1228+ assert(sizeof(VmaHeader) == (4096 + 8192));
1229+ assert(G_STRUCT_OFFSET(VmaHeader, config_names) == 2044);
1230+ assert(G_STRUCT_OFFSET(VmaHeader, config_data) == 3068);
1231+ assert(G_STRUCT_OFFSET(VmaHeader, dev_info) == 4096);
1232+ assert(sizeof(VmaExtentHeader) == 512);
1233+
1234+ VmaWriter *vmaw = g_new0(VmaWriter, 1);
1235+ vmaw->fd = -1;
1236+
1237+ vmaw->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1238+ if (!vmaw->md5csum) {
1239+ error_setg(errp, "can't allocate cmsum\n");
1240+ goto err;
1241+ }
1242+
1243+ if (strstart(filename, "exec:", &p)) {
1244+ vmaw->cmd = popen(p, "w");
1245+ if (vmaw->cmd == NULL) {
1246+ error_setg(errp, "can't popen command '%s' - %s\n", p,
1247+ g_strerror(errno));
1248+ goto err;
1249+ }
1250+ vmaw->fd = fileno(vmaw->cmd);
1251+
67af0fa4 1252+ /* try to use O_NONBLOCK */
95259824 1253+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
95259824
WB
1254+
1255+ } else {
1256+ struct stat st;
1257+ int oflags;
1258+ const char *tmp_id_str;
1259+
1260+ if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
67af0fa4 1261+ oflags = O_NONBLOCK|O_WRONLY;
817b7667 1262+ vmaw->fd = qemu_open(filename, oflags, errp);
95259824 1263+ } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) {
67af0fa4 1264+ oflags = O_NONBLOCK|O_WRONLY;
817b7667 1265+ vmaw->fd = qemu_open(filename, oflags, errp);
95259824 1266+ } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) {
817b7667 1267+ vmaw->fd = monitor_get_fd(monitor_cur(), tmp_id_str, errp);
95259824
WB
1268+ if (vmaw->fd < 0) {
1269+ goto err;
1270+ }
67af0fa4 1271+ /* try to use O_NONBLOCK */
95259824 1272+ fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
76394996
FE
1273+ } else {
1274+ gchar *dirname = g_path_get_dirname(filename);
1275+ oflags = O_NONBLOCK|O_WRONLY|O_EXCL;
1276+ if (!is_path_tmpfs(dirname)) {
1277+ oflags |= O_DIRECT;
1278+ }
1279+ g_free(dirname);
817b7667 1280+ vmaw->fd = qemu_create(filename, oflags, 0644, errp);
95259824
WB
1281+ }
1282+
1283+ if (vmaw->fd < 0) {
db5d2a4b
FE
1284+ error_free(*errp);
1285+ *errp = NULL;
95259824
WB
1286+ error_setg(errp, "can't open file %s - %s\n", filename,
1287+ g_strerror(errno));
1288+ goto err;
1289+ }
1290+ }
1291+
1292+ /* we use O_DIRECT, so we need to align IO buffers */
67af0fa4
WB
1293+
1294+ vmaw->outbuf = qemu_memalign(512, VMA_MAX_EXTENT_SIZE);
1295+ vmaw->headerbuf = qemu_memalign(512, HEADERBUF_SIZE);
95259824
WB
1296+
1297+ vmaw->outbuf_count = 0;
1298+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1299+
1300+ vmaw->header_blob_table_pos = 1; /* start at pos 1 */
1301+
95259824 1302+ qemu_co_mutex_init(&vmaw->flush_lock);
95259824
WB
1303+
1304+ uuid_copy(vmaw->uuid, uuid);
1305+
1306+ return vmaw;
1307+
1308+err:
1309+ if (vmaw) {
1310+ if (vmaw->cmd) {
1311+ pclose(vmaw->cmd);
1312+ } else if (vmaw->fd >= 0) {
1313+ close(vmaw->fd);
1314+ }
1315+
1316+ if (vmaw->md5csum) {
1317+ g_checksum_free(vmaw->md5csum);
1318+ }
1319+
1320+ g_free(vmaw);
1321+ }
1322+
1323+ return NULL;
1324+}
1325+
1326+static int coroutine_fn vma_write_header(VmaWriter *vmaw)
1327+{
1328+ assert(vmaw);
67af0fa4 1329+ unsigned char *buf = vmaw->headerbuf;
95259824
WB
1330+ VmaHeader *head = (VmaHeader *)buf;
1331+
1332+ int i;
1333+
1334+ DPRINTF("VMA WRITE HEADER\n");
1335+
1336+ if (vmaw->status < 0) {
1337+ return vmaw->status;
1338+ }
1339+
67af0fa4 1340+ memset(buf, 0, HEADERBUF_SIZE);
95259824
WB
1341+
1342+ head->magic = VMA_MAGIC;
1343+ head->version = GUINT32_TO_BE(1); /* v1 */
1344+ memcpy(head->uuid, vmaw->uuid, 16);
1345+
1346+ time_t ctime = time(NULL);
1347+ head->ctime = GUINT64_TO_BE(ctime);
1348+
95259824
WB
1349+ for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1350+ head->config_names[i] = GUINT32_TO_BE(vmaw->config_names[i]);
1351+ head->config_data[i] = GUINT32_TO_BE(vmaw->config_data[i]);
1352+ }
1353+
1354+ /* 32 bytes per device (12 used currently) = 8192 bytes max */
1355+ for (i = 1; i <= 254; i++) {
1356+ VmaStreamInfo *si = &vmaw->stream_info[i];
1357+ if (si->size) {
1358+ assert(si->devname);
1359+ uint32_t devname_ptr = allocate_header_string(vmaw, si->devname);
1360+ if (!devname_ptr) {
1361+ return -1;
1362+ }
1363+ head->dev_info[i].devname_ptr = GUINT32_TO_BE(devname_ptr);
1364+ head->dev_info[i].size = GUINT64_TO_BE(si->size);
1365+ }
1366+ }
1367+
1368+ uint32_t header_size = sizeof(VmaHeader) + vmaw->header_blob_table_size;
1369+ head->header_size = GUINT32_TO_BE(header_size);
1370+
67af0fa4 1371+ if (header_size > HEADERBUF_SIZE) {
95259824
WB
1372+ return -1; /* just to be sure */
1373+ }
1374+
1375+ uint32_t blob_buffer_offset = sizeof(VmaHeader);
1376+ memcpy(buf + blob_buffer_offset, vmaw->header_blob_table,
1377+ vmaw->header_blob_table_size);
1378+ head->blob_buffer_offset = GUINT32_TO_BE(blob_buffer_offset);
1379+ head->blob_buffer_size = GUINT32_TO_BE(vmaw->header_blob_table_pos);
1380+
1381+ g_checksum_reset(vmaw->md5csum);
1382+ g_checksum_update(vmaw->md5csum, (const guchar *)buf, header_size);
1383+ gsize csize = 16;
1384+ g_checksum_get_digest(vmaw->md5csum, (guint8 *)(head->md5sum), &csize);
1385+
1386+ return vma_queue_write(vmaw, buf, header_size);
1387+}
1388+
1389+static int coroutine_fn vma_writer_flush(VmaWriter *vmaw)
1390+{
1391+ assert(vmaw);
1392+
1393+ int ret;
1394+ int i;
1395+
1396+ if (vmaw->status < 0) {
1397+ return vmaw->status;
1398+ }
1399+
1400+ if (!vmaw->header_written) {
1401+ vmaw->header_written = true;
1402+ ret = vma_write_header(vmaw);
1403+ if (ret < 0) {
1404+ vma_writer_set_error(vmaw, "vma_writer_flush: write header failed");
1405+ return ret;
1406+ }
1407+ }
1408+
1409+ DPRINTF("VMA WRITE FLUSH %d %d\n", vmaw->outbuf_count, vmaw->outbuf_pos);
1410+
1411+
1412+ VmaExtentHeader *ehead = (VmaExtentHeader *)vmaw->outbuf;
1413+
1414+ ehead->magic = VMA_EXTENT_MAGIC;
1415+ ehead->reserved1 = 0;
1416+
1417+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1418+ ehead->blockinfo[i] = GUINT64_TO_BE(vmaw->outbuf_block_info[i]);
1419+ }
1420+
1421+ guint16 block_count = (vmaw->outbuf_pos - VMA_EXTENT_HEADER_SIZE) /
1422+ VMA_BLOCK_SIZE;
1423+
1424+ ehead->block_count = GUINT16_TO_BE(block_count);
1425+
1426+ memcpy(ehead->uuid, vmaw->uuid, sizeof(ehead->uuid));
1427+ memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
1428+
1429+ g_checksum_reset(vmaw->md5csum);
1430+ g_checksum_update(vmaw->md5csum, vmaw->outbuf, VMA_EXTENT_HEADER_SIZE);
1431+ gsize csize = 16;
1432+ g_checksum_get_digest(vmaw->md5csum, ehead->md5sum, &csize);
1433+
1434+ int bytes = vmaw->outbuf_pos;
1435+ ret = vma_queue_write(vmaw, vmaw->outbuf, bytes);
1436+ if (ret != bytes) {
1437+ vma_writer_set_error(vmaw, "vma_writer_flush: failed write");
1438+ }
1439+
1440+ vmaw->outbuf_count = 0;
1441+ vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1442+
1443+ for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1444+ vmaw->outbuf_block_info[i] = 0;
1445+ }
1446+
1447+ return vmaw->status;
1448+}
1449+
1450+static int vma_count_open_streams(VmaWriter *vmaw)
1451+{
1452+ g_assert(vmaw != NULL);
1453+
1454+ int i;
1455+ int open_drives = 0;
1456+ for (i = 0; i <= 255; i++) {
1457+ if (vmaw->stream_info[i].size && !vmaw->stream_info[i].finished) {
1458+ open_drives++;
1459+ }
1460+ }
1461+
1462+ return open_drives;
1463+}
1464+
67af0fa4
WB
1465+
1466+/**
1467+ * You need to call this if the vma archive does not contain
1468+ * any data stream.
1469+ */
1470+int coroutine_fn
1471+vma_writer_flush_output(VmaWriter *vmaw)
1472+{
1473+ qemu_co_mutex_lock(&vmaw->flush_lock);
1474+ int ret = vma_writer_flush(vmaw);
1475+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1476+ if (ret < 0) {
1477+ vma_writer_set_error(vmaw, "vma_writer_flush_header failed");
1478+ }
1479+ return ret;
1480+}
1481+
95259824
WB
1482+/**
1483+ * all jobs should call this when there is no more data
1484+ * Returns: number of remaining stream (0 ==> finished)
1485+ */
1486+int coroutine_fn
1487+vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id)
1488+{
1489+ g_assert(vmaw != NULL);
1490+
1491+ DPRINTF("vma_writer_set_status %d\n", dev_id);
1492+ if (!vmaw->stream_info[dev_id].size) {
1493+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1494+ "no such stream %d", dev_id);
1495+ return -1;
1496+ }
1497+ if (vmaw->stream_info[dev_id].finished) {
1498+ vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1499+ "stream already closed %d", dev_id);
1500+ return -1;
1501+ }
1502+
1503+ vmaw->stream_info[dev_id].finished = true;
1504+
1505+ int open_drives = vma_count_open_streams(vmaw);
1506+
1507+ if (open_drives <= 0) {
1508+ DPRINTF("vma_writer_set_status all drives completed\n");
67af0fa4 1509+ vma_writer_flush_output(vmaw);
95259824
WB
1510+ }
1511+
1512+ return open_drives;
1513+}
1514+
1515+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status)
1516+{
1517+ int i;
1518+
1519+ g_assert(vmaw != NULL);
1520+
1521+ if (status) {
1522+ status->status = vmaw->status;
1523+ g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
1524+ for (i = 0; i <= 255; i++) {
1525+ status->stream_info[i] = vmaw->stream_info[i];
1526+ }
1527+
1528+ uuid_unparse_lower(vmaw->uuid, status->uuid_str);
1529+ }
1530+
1531+ status->closed = vmaw->closed;
1532+
1533+ return vmaw->status;
1534+}
1535+
1536+static int vma_writer_get_buffer(VmaWriter *vmaw)
1537+{
1538+ int ret = 0;
1539+
1540+ qemu_co_mutex_lock(&vmaw->flush_lock);
1541+
1542+ /* wait until buffer is available */
1543+ while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) {
1544+ ret = vma_writer_flush(vmaw);
1545+ if (ret < 0) {
1546+ vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed");
1547+ break;
1548+ }
1549+ }
1550+
1551+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1552+
1553+ return ret;
1554+}
1555+
1556+
1557+int64_t coroutine_fn
1558+vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
6838f038 1559+ const unsigned char *buf, size_t *zero_bytes)
95259824
WB
1560+{
1561+ g_assert(vmaw != NULL);
1562+ g_assert(zero_bytes != NULL);
1563+
1564+ *zero_bytes = 0;
1565+
1566+ if (vmaw->status < 0) {
1567+ return vmaw->status;
1568+ }
1569+
1570+ if (!dev_id || !vmaw->stream_info[dev_id].size) {
1571+ vma_writer_set_error(vmaw, "vma_writer_write: "
1572+ "no such stream %d", dev_id);
1573+ return -1;
1574+ }
1575+
1576+ if (vmaw->stream_info[dev_id].finished) {
1577+ vma_writer_set_error(vmaw, "vma_writer_write: "
1578+ "stream already closed %d", dev_id);
1579+ return -1;
1580+ }
1581+
1582+
1583+ if (cluster_num >= (((uint64_t)1)<<32)) {
1584+ vma_writer_set_error(vmaw, "vma_writer_write: "
1585+ "cluster number out of range");
1586+ return -1;
1587+ }
1588+
1589+ if (dev_id == vmaw->vmstate_stream) {
1590+ if (cluster_num != vmaw->vmstate_clusters) {
1591+ vma_writer_set_error(vmaw, "vma_writer_write: "
1592+ "non sequential vmstate write");
1593+ }
1594+ vmaw->vmstate_clusters++;
1595+ } else if (cluster_num >= vmaw->stream_info[dev_id].cluster_count) {
1596+ vma_writer_set_error(vmaw, "vma_writer_write: cluster number too big");
1597+ return -1;
1598+ }
1599+
1600+ /* wait until buffer is available */
1601+ if (vma_writer_get_buffer(vmaw) < 0) {
1602+ vma_writer_set_error(vmaw, "vma_writer_write: "
1603+ "vma_writer_get_buffer failed");
1604+ return -1;
1605+ }
1606+
1607+ DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
1608+
d7f4e01a 1609+ uint64_t dev_size = vmaw->stream_info[dev_id].size;
95259824
WB
1610+ uint16_t mask = 0;
1611+
1612+ if (buf) {
1613+ int i;
1614+ int bit = 1;
d7f4e01a 1615+ uint64_t byte_offset = cluster_num * VMA_CLUSTER_SIZE;
95259824 1616+ for (i = 0; i < 16; i++) {
6838f038 1617+ const unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
d7f4e01a
TL
1618+
1619+ // Note: If the source is not 64k-aligned, we might reach 4k blocks
1620+ // after the end of the device. Always mark these as zero in the
1621+ // mask, so the restore handles them correctly.
1622+ if (byte_offset < dev_size &&
1623+ !buffer_is_zero(vmablock, VMA_BLOCK_SIZE))
1624+ {
95259824
WB
1625+ mask |= bit;
1626+ memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock,
1627+ VMA_BLOCK_SIZE);
d7f4e01a
TL
1628+
1629+ // prevent memory leakage on unaligned last block
1630+ if (byte_offset + VMA_BLOCK_SIZE > dev_size) {
1631+ uint64_t real_data_in_block = dev_size - byte_offset;
1632+ memset(vmaw->outbuf + vmaw->outbuf_pos + real_data_in_block,
1633+ 0, VMA_BLOCK_SIZE - real_data_in_block);
1634+ }
1635+
95259824
WB
1636+ vmaw->outbuf_pos += VMA_BLOCK_SIZE;
1637+ } else {
1638+ DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
1639+ vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE;
1640+ *zero_bytes += VMA_BLOCK_SIZE;
1641+ }
1642+
d7f4e01a 1643+ byte_offset += VMA_BLOCK_SIZE;
95259824
WB
1644+ bit = bit << 1;
1645+ }
1646+ } else {
1647+ DPRINTF("VMA WRITE %zd ZERO CLUSTER\n", cluster_num);
1648+ vmaw->stream_info[dev_id].zero_bytes += VMA_CLUSTER_SIZE;
1649+ *zero_bytes += VMA_CLUSTER_SIZE;
1650+ }
1651+
1652+ uint64_t block_info = ((uint64_t)mask) << (32+16);
1653+ block_info |= ((uint64_t)dev_id) << 32;
1654+ block_info |= (cluster_num & 0xffffffff);
1655+ vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info;
1656+
1657+ DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info);
1658+
1659+ vmaw->outbuf_count++;
1660+
1661+ /** NOTE: We allways write whole clusters, but we correctly set
1662+ * transferred bytes. So transferred == size when when everything
1663+ * went OK.
1664+ */
1665+ size_t transferred = VMA_CLUSTER_SIZE;
1666+
1667+ if (dev_id != vmaw->vmstate_stream) {
1668+ uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
d7f4e01a
TL
1669+ if (last > dev_size) {
1670+ uint64_t diff = last - dev_size;
95259824
WB
1671+ if (diff >= VMA_CLUSTER_SIZE) {
1672+ vma_writer_set_error(vmaw, "vma_writer_write: "
1673+ "read after last cluster");
1674+ return -1;
1675+ }
1676+ transferred -= diff;
1677+ }
1678+ }
1679+
1680+ vmaw->stream_info[dev_id].transferred += transferred;
1681+
1682+ return transferred;
1683+}
1684+
67af0fa4
WB
1685+void vma_writer_error_propagate(VmaWriter *vmaw, Error **errp)
1686+{
1687+ if (vmaw->status < 0 && *errp == NULL) {
1688+ error_setg(errp, "%s", vmaw->errmsg);
1689+ }
1690+}
1691+
95259824
WB
1692+int vma_writer_close(VmaWriter *vmaw, Error **errp)
1693+{
1694+ g_assert(vmaw != NULL);
1695+
1696+ int i;
1697+
6402d961 1698+ qemu_co_mutex_lock(&vmaw->flush_lock); // wait for pending writes
95259824 1699+
67af0fa4
WB
1700+ assert(vmaw->co_writer == NULL);
1701+
95259824
WB
1702+ if (vmaw->cmd) {
1703+ if (pclose(vmaw->cmd) < 0) {
1704+ vma_writer_set_error(vmaw, "vma_writer_close: "
1705+ "pclose failed - %s", g_strerror(errno));
1706+ }
1707+ } else {
1708+ if (close(vmaw->fd) < 0) {
1709+ vma_writer_set_error(vmaw, "vma_writer_close: "
1710+ "close failed - %s", g_strerror(errno));
1711+ }
1712+ }
1713+
1714+ for (i = 0; i <= 255; i++) {
1715+ VmaStreamInfo *si = &vmaw->stream_info[i];
1716+ if (si->size) {
1717+ if (!si->finished) {
1718+ vma_writer_set_error(vmaw, "vma_writer_close: "
1719+ "detected open stream '%s'", si->devname);
1720+ } else if ((si->transferred != si->size) &&
1721+ (i != vmaw->vmstate_stream)) {
1722+ vma_writer_set_error(vmaw, "vma_writer_close: "
1723+ "incomplete stream '%s' (%zd != %zd)",
1724+ si->devname, si->transferred, si->size);
1725+ }
1726+ }
1727+ }
1728+
1729+ for (i = 0; i <= 255; i++) {
1730+ vmaw->stream_info[i].finished = 1; /* mark as closed */
1731+ }
1732+
1733+ vmaw->closed = 1;
1734+
1735+ if (vmaw->status < 0 && *errp == NULL) {
1736+ error_setg(errp, "%s", vmaw->errmsg);
1737+ }
1738+
6402d961
TL
1739+ qemu_co_mutex_unlock(&vmaw->flush_lock);
1740+
95259824
WB
1741+ return vmaw->status;
1742+}
1743+
1744+void vma_writer_destroy(VmaWriter *vmaw)
1745+{
1746+ assert(vmaw);
1747+
1748+ int i;
1749+
1750+ for (i = 0; i <= 255; i++) {
1751+ if (vmaw->stream_info[i].devname) {
1752+ g_free(vmaw->stream_info[i].devname);
1753+ }
1754+ }
1755+
1756+ if (vmaw->md5csum) {
1757+ g_checksum_free(vmaw->md5csum);
1758+ }
1759+
d7f4e01a
TL
1760+ qemu_vfree(vmaw->headerbuf);
1761+ qemu_vfree(vmaw->outbuf);
95259824
WB
1762+ g_free(vmaw);
1763+}
1764diff --git a/vma.c b/vma.c
1765new file mode 100644
f1eed34a 1766index 0000000000..bb715e9061
95259824
WB
1767--- /dev/null
1768+++ b/vma.c
f1eed34a 1769@@ -0,0 +1,901 @@
95259824
WB
1770+/*
1771+ * VMA: Virtual Machine Archive
1772+ *
1773+ * Copyright (C) 2012-2013 Proxmox Server Solutions
1774+ *
1775+ * Authors:
1776+ * Dietmar Maurer (dietmar@proxmox.com)
1777+ *
1778+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
1779+ * See the COPYING file in the top-level directory.
1780+ *
1781+ */
1782+
1783+#include "qemu/osdep.h"
1784+#include <glib.h>
1785+
1786+#include "vma.h"
6402d961 1787+#include "qemu/module.h"
95259824
WB
1788+#include "qemu/error-report.h"
1789+#include "qemu/main-loop.h"
6402d961 1790+#include "qemu/cutils.h"
dc9827a6 1791+#include "qemu/memalign.h"
6402d961 1792+#include "qapi/qmp/qdict.h"
67af0fa4 1793+#include "sysemu/block-backend.h"
95259824
WB
1794+
1795+static void help(void)
1796+{
1797+ const char *help_msg =
1798+ "usage: vma command [command options]\n"
1799+ "\n"
1800+ "vma list <filename>\n"
67af0fa4
WB
1801+ "vma config <filename> [-c config]\n"
1802+ "vma create <filename> [-c config] pathname ...\n"
0cff91a0 1803+ "vma extract <filename> [-d <drive-list>] [-r <fifo>] <targetdir>\n"
67af0fa4 1804+ "vma verify <filename> [-v]\n"
95259824
WB
1805+ ;
1806+
1807+ printf("%s", help_msg);
1808+ exit(1);
1809+}
1810+
1811+static const char *extract_devname(const char *path, char **devname, int index)
1812+{
1813+ assert(path);
1814+
1815+ const char *sep = strchr(path, '=');
1816+
1817+ if (sep) {
1818+ *devname = g_strndup(path, sep - path);
1819+ path = sep + 1;
1820+ } else {
1821+ if (index >= 0) {
1822+ *devname = g_strdup_printf("disk%d", index);
1823+ } else {
1824+ *devname = NULL;
1825+ }
1826+ }
1827+
1828+ return path;
1829+}
1830+
1831+static void print_content(VmaReader *vmar)
1832+{
1833+ assert(vmar);
1834+
1835+ VmaHeader *head = vma_reader_get_header(vmar);
1836+
1837+ GList *l = vma_reader_get_config_data(vmar);
1838+ while (l && l->data) {
1839+ VmaConfigData *cdata = (VmaConfigData *)l->data;
1840+ l = g_list_next(l);
1841+ printf("CFG: size: %d name: %s\n", cdata->len, cdata->name);
1842+ }
1843+
1844+ int i;
1845+ VmaDeviceInfo *di;
1846+ for (i = 1; i < 255; i++) {
1847+ di = vma_reader_get_device_info(vmar, i);
1848+ if (di) {
1849+ if (strcmp(di->devname, "vmstate") == 0) {
1850+ printf("VMSTATE: dev_id=%d memory: %zd\n", i, di->size);
1851+ } else {
1852+ printf("DEV: dev_id=%d size: %zd devname: %s\n",
1853+ i, di->size, di->devname);
1854+ }
1855+ }
1856+ }
1857+ /* ctime is the last entry we print */
1858+ printf("CTIME: %s", ctime(&head->ctime));
1859+ fflush(stdout);
1860+}
1861+
1862+static int list_content(int argc, char **argv)
1863+{
1864+ int c, ret = 0;
1865+ const char *filename;
1866+
1867+ for (;;) {
1868+ c = getopt(argc, argv, "h");
1869+ if (c == -1) {
1870+ break;
1871+ }
1872+ switch (c) {
1873+ case '?':
1874+ case 'h':
1875+ help();
1876+ break;
1877+ default:
1878+ g_assert_not_reached();
1879+ }
1880+ }
1881+
1882+ /* Get the filename */
1883+ if ((optind + 1) != argc) {
1884+ help();
1885+ }
1886+ filename = argv[optind++];
1887+
1888+ Error *errp = NULL;
1889+ VmaReader *vmar = vma_reader_create(filename, &errp);
1890+
1891+ if (!vmar) {
1892+ g_error("%s", error_get_pretty(errp));
1893+ }
1894+
1895+ print_content(vmar);
1896+
1897+ vma_reader_destroy(vmar);
1898+
1899+ return ret;
1900+}
1901+
1902+typedef struct RestoreMap {
1903+ char *devname;
1904+ char *path;
67af0fa4 1905+ char *format;
6402d961
TL
1906+ uint64_t throttling_bps;
1907+ char *throttling_group;
1908+ char *cache;
95259824 1909+ bool write_zero;
db5d2a4b 1910+ bool skip;
95259824
WB
1911+} RestoreMap;
1912+
6402d961
TL
1913+static bool try_parse_option(char **line, const char *optname, char **out, const char *inbuf) {
1914+ size_t optlen = strlen(optname);
1915+ if (strncmp(*line, optname, optlen) != 0 || (*line)[optlen] != '=') {
1916+ return false;
1917+ }
1918+ if (*out) {
1919+ g_error("read map failed - duplicate value for option '%s'", optname);
1920+ }
1921+ char *value = (*line) + optlen + 1; /* including a '=' */
1922+ char *colon = strchr(value, ':');
1923+ if (!colon) {
1924+ g_error("read map failed - option '%s' not terminated ('%s')",
1925+ optname, inbuf);
1926+ }
1927+ *line = colon+1;
1928+ *out = g_strndup(value, colon - value);
1929+ return true;
1930+}
1931+
1932+static uint64_t verify_u64(const char *text) {
1933+ uint64_t value;
1934+ const char *endptr = NULL;
1935+ if (qemu_strtou64(text, &endptr, 0, &value) != 0 || !endptr || *endptr) {
1936+ g_error("read map failed - not a number: %s", text);
1937+ }
1938+ return value;
1939+}
1940+
95259824
WB
1941+static int extract_content(int argc, char **argv)
1942+{
1943+ int c, ret = 0;
1944+ int verbose = 0;
1945+ const char *filename;
1946+ const char *dirname;
1947+ const char *readmap = NULL;
ef3308db 1948+ gchar **drive_list = NULL;
95259824
WB
1949+
1950+ for (;;) {
0cff91a0 1951+ c = getopt(argc, argv, "hvd:r:");
95259824
WB
1952+ if (c == -1) {
1953+ break;
1954+ }
1955+ switch (c) {
1956+ case '?':
1957+ case 'h':
1958+ help();
1959+ break;
0cff91a0
FS
1960+ case 'd':
1961+ drive_list = g_strsplit(optarg, ",", 254);
1962+ break;
95259824
WB
1963+ case 'r':
1964+ readmap = optarg;
1965+ break;
1966+ case 'v':
1967+ verbose = 1;
1968+ break;
1969+ default:
1970+ help();
1971+ }
1972+ }
1973+
1974+ /* Get the filename */
1975+ if ((optind + 2) != argc) {
1976+ help();
1977+ }
1978+ filename = argv[optind++];
1979+ dirname = argv[optind++];
1980+
1981+ Error *errp = NULL;
1982+ VmaReader *vmar = vma_reader_create(filename, &errp);
1983+
1984+ if (!vmar) {
1985+ g_error("%s", error_get_pretty(errp));
1986+ }
1987+
1988+ if (mkdir(dirname, 0777) < 0) {
1989+ g_error("unable to create target directory %s - %s",
1990+ dirname, g_strerror(errno));
1991+ }
1992+
1993+ GList *l = vma_reader_get_config_data(vmar);
1994+ while (l && l->data) {
1995+ VmaConfigData *cdata = (VmaConfigData *)l->data;
1996+ l = g_list_next(l);
1997+ char *cfgfn = g_strdup_printf("%s/%s", dirname, cdata->name);
1998+ GError *err = NULL;
1999+ if (!g_file_set_contents(cfgfn, (gchar *)cdata->data, cdata->len,
2000+ &err)) {
2001+ g_error("unable to write file: %s", err->message);
2002+ }
2003+ }
2004+
2005+ GHashTable *devmap = g_hash_table_new(g_str_hash, g_str_equal);
2006+
2007+ if (readmap) {
2008+ print_content(vmar);
2009+
2010+ FILE *map = fopen(readmap, "r");
2011+ if (!map) {
2012+ g_error("unable to open fifo %s - %s", readmap, g_strerror(errno));
2013+ }
2014+
2015+ while (1) {
2016+ char inbuf[8192];
2017+ char *line = fgets(inbuf, sizeof(inbuf), map);
6402d961
TL
2018+ char *format = NULL;
2019+ char *bps = NULL;
2020+ char *group = NULL;
2021+ char *cache = NULL;
db5d2a4b
FE
2022+ char *devname = NULL;
2023+ bool skip = false;
2024+ uint64_t bps_value = 0;
2025+ const char *path = NULL;
2026+ bool write_zero = true;
2027+
95259824
WB
2028+ if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
2029+ break;
2030+ }
2031+ int len = strlen(line);
2032+ if (line[len - 1] == '\n') {
2033+ line[len - 1] = '\0';
db5d2a4b
FE
2034+ len = len - 1;
2035+ if (len == 0) {
95259824
WB
2036+ break;
2037+ }
2038+ }
2039+
db5d2a4b
FE
2040+ if (strncmp(line, "skip", 4) == 0) {
2041+ if (len < 6 || line[4] != '=') {
2042+ g_error("read map failed - option 'skip' has no value ('%s')",
2043+ inbuf);
2044+ } else {
2045+ devname = line + 5;
2046+ skip = true;
2047+ }
2048+ } else {
2049+ while (1) {
2050+ if (!try_parse_option(&line, "format", &format, inbuf) &&
2051+ !try_parse_option(&line, "throttling.bps", &bps, inbuf) &&
2052+ !try_parse_option(&line, "throttling.group", &group, inbuf) &&
2053+ !try_parse_option(&line, "cache", &cache, inbuf))
2054+ {
2055+ break;
2056+ }
67af0fa4 2057+ }
6402d961 2058+
db5d2a4b
FE
2059+ if (bps) {
2060+ bps_value = verify_u64(bps);
2061+ g_free(bps);
2062+ }
67af0fa4 2063+
db5d2a4b
FE
2064+ if (line[0] == '0' && line[1] == ':') {
2065+ path = line + 2;
2066+ write_zero = false;
2067+ } else if (line[0] == '1' && line[1] == ':') {
2068+ path = line + 2;
2069+ write_zero = true;
2070+ } else {
2071+ g_error("read map failed - parse error ('%s')", inbuf);
2072+ }
2073+
2074+ path = extract_devname(path, &devname, -1);
95259824
WB
2075+ }
2076+
95259824
WB
2077+ if (!devname) {
2078+ g_error("read map failed - no dev name specified ('%s')",
2079+ inbuf);
2080+ }
2081+
f1eed34a
FE
2082+ RestoreMap *restore_map = g_new0(RestoreMap, 1);
2083+ restore_map->devname = g_strdup(devname);
2084+ restore_map->path = g_strdup(path);
2085+ restore_map->format = format;
2086+ restore_map->throttling_bps = bps_value;
2087+ restore_map->throttling_group = group;
2088+ restore_map->cache = cache;
2089+ restore_map->write_zero = write_zero;
2090+ restore_map->skip = skip;
95259824 2091+
f1eed34a 2092+ g_hash_table_insert(devmap, restore_map->devname, restore_map);
95259824
WB
2093+
2094+ };
2095+ }
2096+
2097+ int i;
2098+ int vmstate_fd = -1;
0cff91a0
FS
2099+ bool drive_rename_bitmap[255];
2100+ memset(drive_rename_bitmap, 0, sizeof(drive_rename_bitmap));
95259824
WB
2101+
2102+ for (i = 1; i < 255; i++) {
2103+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2104+ if (di && (strcmp(di->devname, "vmstate") == 0)) {
95259824
WB
2105+ char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
2106+ vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
2107+ if (vmstate_fd < 0) {
2108+ g_error("create vmstate file '%s' failed - %s", statefn,
2109+ g_strerror(errno));
2110+ }
2111+ g_free(statefn);
2112+ } else if (di) {
2113+ char *devfn = NULL;
67af0fa4 2114+ const char *format = NULL;
6402d961
TL
2115+ uint64_t throttling_bps = 0;
2116+ const char *throttling_group = NULL;
2117+ const char *cache = NULL;
2118+ int flags = BDRV_O_RDWR;
95259824 2119+ bool write_zero = true;
db5d2a4b 2120+ bool skip = false;
95259824 2121+
2653a5f0
TL
2122+ BlockBackend *blk = NULL;
2123+
0cff91a0
FS
2124+ if (drive_list) {
2125+ skip = true;
2126+ int j;
2127+ for (j = 0; drive_list[j]; j++) {
2128+ if (strcmp(drive_list[j], di->devname) == 0) {
2129+ skip = false;
2130+ drive_rename_bitmap[i] = true;
2131+ break;
2132+ }
2133+ }
2134+ } else {
2135+ drive_rename_bitmap[i] = true;
2136+ }
2137+
2138+ if (!skip && readmap) {
95259824
WB
2139+ RestoreMap *map;
2140+ map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
2141+ if (map == NULL) {
2142+ g_error("no device name mapping for %s", di->devname);
2143+ }
2144+ devfn = map->path;
67af0fa4 2145+ format = map->format;
6402d961
TL
2146+ throttling_bps = map->throttling_bps;
2147+ throttling_group = map->throttling_group;
2148+ cache = map->cache;
95259824 2149+ write_zero = map->write_zero;
db5d2a4b 2150+ skip = map->skip;
0cff91a0 2151+ } else if (!skip) {
95259824
WB
2152+ devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2153+ dirname, di->devname);
2154+ printf("DEVINFO %s %zd\n", devfn, di->size);
2155+
2156+ bdrv_img_create(devfn, "raw", NULL, NULL, NULL, di->size,
6838f038 2157+ flags, true, &errp);
95259824
WB
2158+ if (errp) {
2159+ g_error("can't create file %s: %s", devfn,
2160+ error_get_pretty(errp));
2161+ }
2162+
2163+ /* Note: we created an empty file above, so there is no
2164+ * need to write zeroes (so we generate a sparse file)
2165+ */
2166+ write_zero = false;
2167+ }
2168+
db5d2a4b
FE
2169+ if (!skip) {
2170+ size_t devlen = strlen(devfn);
2171+ QDict *options = NULL;
2172+ bool writethrough;
2173+ if (format) {
2174+ /* explicit format from commandline */
2175+ options = qdict_new();
2176+ qdict_put_str(options, "driver", format);
2177+ } else if ((devlen > 4 && strcmp(devfn+devlen-4, ".raw") == 0) ||
2178+ strncmp(devfn, "/dev/", 5) == 0)
2179+ {
2180+ /* This part is now deprecated for PVE as well (just as qemu
2181+ * deprecated not specifying an explicit raw format, too.
2182+ */
2183+ /* explicit raw format */
2184+ options = qdict_new();
2185+ qdict_put_str(options, "driver", "raw");
2186+ }
67af0fa4 2187+
db5d2a4b
FE
2188+ if (cache && bdrv_parse_cache_mode(cache, &flags, &writethrough)) {
2189+ g_error("invalid cache option: %s\n", cache);
2190+ }
67af0fa4 2191+
db5d2a4b
FE
2192+ if (errp || !(blk = blk_new_open(devfn, NULL, options, flags, &errp))) {
2193+ g_error("can't open file %s - %s", devfn,
2194+ error_get_pretty(errp));
2195+ }
6402d961 2196+
db5d2a4b
FE
2197+ if (cache) {
2198+ blk_set_enable_write_cache(blk, !writethrough);
2199+ }
6402d961 2200+
db5d2a4b
FE
2201+ if (throttling_group) {
2202+ blk_io_limits_enable(blk, throttling_group);
6402d961
TL
2203+ }
2204+
db5d2a4b
FE
2205+ if (throttling_bps) {
2206+ if (!throttling_group) {
2207+ blk_io_limits_enable(blk, devfn);
2208+ }
2209+
2210+ ThrottleConfig cfg;
2211+ throttle_config_init(&cfg);
2212+ cfg.buckets[THROTTLE_BPS_WRITE].avg = throttling_bps;
2213+ Error *err = NULL;
2214+ if (!throttle_is_valid(&cfg, &err)) {
2215+ error_report_err(err);
2216+ g_error("failed to apply throttling");
2217+ }
2218+ blk_set_io_limits(blk, &cfg);
6402d961 2219+ }
6402d961
TL
2220+ }
2221+
db5d2a4b 2222+ if (vma_reader_register_bs(vmar, i, blk, write_zero, skip, &errp) < 0) {
95259824
WB
2223+ g_error("%s", error_get_pretty(errp));
2224+ }
2225+
2226+ if (!readmap) {
2227+ g_free(devfn);
2228+ }
2229+ }
2230+ }
2231+
0cff91a0
FS
2232+ if (drive_list) {
2233+ g_strfreev(drive_list);
2234+ }
2235+
95259824
WB
2236+ if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
2237+ g_error("restore failed - %s", error_get_pretty(errp));
2238+ }
2239+
2240+ if (!readmap) {
2241+ for (i = 1; i < 255; i++) {
2242+ VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
0cff91a0 2243+ if (di && drive_rename_bitmap[i]) {
95259824
WB
2244+ char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2245+ dirname, di->devname);
2246+ char *fn = g_strdup_printf("%s/disk-%s.raw",
2247+ dirname, di->devname);
2248+ if (rename(tmpfn, fn) != 0) {
2249+ g_error("rename %s to %s failed - %s",
2250+ tmpfn, fn, g_strerror(errno));
2251+ }
2252+ }
2253+ }
2254+ }
2255+
2256+ vma_reader_destroy(vmar);
2257+
67af0fa4
WB
2258+ bdrv_close_all();
2259+
2260+ return ret;
2261+}
2262+
2263+static int verify_content(int argc, char **argv)
2264+{
2265+ int c, ret = 0;
2266+ int verbose = 0;
2267+ const char *filename;
2268+
2269+ for (;;) {
2270+ c = getopt(argc, argv, "hv");
2271+ if (c == -1) {
2272+ break;
2273+ }
2274+ switch (c) {
2275+ case '?':
2276+ case 'h':
2277+ help();
2278+ break;
2279+ case 'v':
2280+ verbose = 1;
2281+ break;
2282+ default:
2283+ help();
2284+ }
2285+ }
2286+
2287+ /* Get the filename */
2288+ if ((optind + 1) != argc) {
2289+ help();
2290+ }
2291+ filename = argv[optind++];
2292+
2293+ Error *errp = NULL;
2294+ VmaReader *vmar = vma_reader_create(filename, &errp);
2295+
2296+ if (!vmar) {
2297+ g_error("%s", error_get_pretty(errp));
2298+ }
2299+
2300+ if (verbose) {
2301+ print_content(vmar);
2302+ }
2303+
2304+ if (vma_reader_verify(vmar, verbose, &errp) < 0) {
2305+ g_error("verify failed - %s", error_get_pretty(errp));
2306+ }
2307+
2308+ vma_reader_destroy(vmar);
2309+
95259824
WB
2310+ bdrv_close_all();
2311+
2312+ return ret;
2313+}
2314+
2315+typedef struct BackupJob {
67af0fa4 2316+ BlockBackend *target;
95259824
WB
2317+ int64_t len;
2318+ VmaWriter *vmaw;
2319+ uint8_t dev_id;
2320+} BackupJob;
2321+
2322+#define BACKUP_SECTORS_PER_CLUSTER (VMA_CLUSTER_SIZE / BDRV_SECTOR_SIZE)
2323+
67af0fa4
WB
2324+static void coroutine_fn backup_run_empty(void *opaque)
2325+{
2326+ VmaWriter *vmaw = (VmaWriter *)opaque;
2327+
2328+ vma_writer_flush_output(vmaw);
2329+
2330+ Error *err = NULL;
2331+ if (vma_writer_close(vmaw, &err) != 0) {
2332+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2333+ }
2334+}
2335+
95259824
WB
2336+static void coroutine_fn backup_run(void *opaque)
2337+{
2338+ BackupJob *job = (BackupJob *)opaque;
2339+ struct iovec iov;
2340+ QEMUIOVector qiov;
2341+
db5d2a4b 2342+ int64_t start, end, readlen;
95259824
WB
2343+ int ret = 0;
2344+
67af0fa4 2345+ unsigned char *buf = blk_blockalign(job->target, VMA_CLUSTER_SIZE);
95259824
WB
2346+
2347+ start = 0;
2348+ end = DIV_ROUND_UP(job->len / BDRV_SECTOR_SIZE,
2349+ BACKUP_SECTORS_PER_CLUSTER);
2350+
2351+ for (; start < end; start++) {
2352+ iov.iov_base = buf;
2353+ iov.iov_len = VMA_CLUSTER_SIZE;
2354+ qemu_iovec_init_external(&qiov, &iov, 1);
2355+
db5d2a4b
FE
2356+ if (start + 1 == end) {
2357+ memset(buf, 0, VMA_CLUSTER_SIZE);
2358+ readlen = job->len - start * VMA_CLUSTER_SIZE;
2359+ assert(readlen > 0 && readlen <= VMA_CLUSTER_SIZE);
2360+ } else {
2361+ readlen = VMA_CLUSTER_SIZE;
2362+ }
2363+
67af0fa4 2364+ ret = blk_co_preadv(job->target, start * VMA_CLUSTER_SIZE,
db5d2a4b 2365+ readlen, &qiov, 0);
95259824 2366+ if (ret < 0) {
b8b4ce04 2367+ vma_writer_set_error(job->vmaw, "read error");
95259824
WB
2368+ goto out;
2369+ }
2370+
2371+ size_t zb = 0;
2372+ if (vma_writer_write(job->vmaw, job->dev_id, start, buf, &zb) < 0) {
b8b4ce04 2373+ vma_writer_set_error(job->vmaw, "backup_dump_cb vma_writer_write failed");
95259824
WB
2374+ goto out;
2375+ }
2376+ }
2377+
2378+
2379+out:
2380+ if (vma_writer_close_stream(job->vmaw, job->dev_id) <= 0) {
2381+ Error *err = NULL;
2382+ if (vma_writer_close(job->vmaw, &err) != 0) {
2383+ g_warning("vma_writer_close failed %s", error_get_pretty(err));
2384+ }
2385+ }
d7f4e01a 2386+ qemu_vfree(buf);
95259824
WB
2387+}
2388+
2389+static int create_archive(int argc, char **argv)
2390+{
f1eed34a 2391+ int c;
95259824
WB
2392+ int verbose = 0;
2393+ const char *archivename;
2bf61c3e 2394+ GList *backup_coroutines = NULL;
95259824
WB
2395+ GList *config_files = NULL;
2396+
2397+ for (;;) {
2398+ c = getopt(argc, argv, "hvc:");
2399+ if (c == -1) {
2400+ break;
2401+ }
2402+ switch (c) {
2403+ case '?':
2404+ case 'h':
2405+ help();
2406+ break;
2407+ case 'c':
2408+ config_files = g_list_append(config_files, optarg);
2409+ break;
2410+ case 'v':
2411+ verbose = 1;
2412+ break;
2413+ default:
2414+ g_assert_not_reached();
2415+ }
2416+ }
2417+
2418+
67af0fa4
WB
2419+ /* make sure we an archive name */
2420+ if ((optind + 1) > argc) {
95259824
WB
2421+ help();
2422+ }
2423+
2424+ archivename = argv[optind++];
2425+
2426+ uuid_t uuid;
2427+ uuid_generate(uuid);
2428+
2429+ Error *local_err = NULL;
2430+ VmaWriter *vmaw = vma_writer_create(archivename, uuid, &local_err);
2431+
2432+ if (vmaw == NULL) {
2433+ g_error("%s", error_get_pretty(local_err));
2434+ }
2435+
2436+ GList *l = config_files;
2437+ while (l && l->data) {
2438+ char *name = l->data;
2439+ char *cdata = NULL;
2440+ gsize clen = 0;
2441+ GError *err = NULL;
2442+ if (!g_file_get_contents(name, &cdata, &clen, &err)) {
2443+ unlink(archivename);
2444+ g_error("Unable to read file: %s", err->message);
2445+ }
2446+
2447+ if (vma_writer_add_config(vmaw, name, cdata, clen) != 0) {
2448+ unlink(archivename);
2449+ g_error("Unable to append config data %s (len = %zd)",
2450+ name, clen);
2451+ }
2452+ l = g_list_next(l);
2453+ }
2454+
67af0fa4 2455+ int devcount = 0;
95259824
WB
2456+ while (optind < argc) {
2457+ const char *path = argv[optind++];
2458+ char *devname = NULL;
67af0fa4 2459+ path = extract_devname(path, &devname, devcount++);
95259824
WB
2460+
2461+ Error *errp = NULL;
67af0fa4 2462+ BlockBackend *target;
95259824 2463+
67af0fa4
WB
2464+ target = blk_new_open(path, NULL, NULL, 0, &errp);
2465+ if (!target) {
95259824
WB
2466+ unlink(archivename);
2467+ g_error("bdrv_open '%s' failed - %s", path, error_get_pretty(errp));
2468+ }
67af0fa4 2469+ int64_t size = blk_getlength(target);
95259824
WB
2470+ int dev_id = vma_writer_register_stream(vmaw, devname, size);
2471+ if (dev_id <= 0) {
2472+ unlink(archivename);
83faa3fe 2473+ g_error("vma_writer_register_stream '%s' failed", devname);
95259824
WB
2474+ }
2475+
2476+ BackupJob *job = g_new0(BackupJob, 1);
2477+ job->len = size;
67af0fa4 2478+ job->target = target;
95259824
WB
2479+ job->vmaw = vmaw;
2480+ job->dev_id = dev_id;
2481+
2482+ Coroutine *co = qemu_coroutine_create(backup_run, job);
2bf61c3e
FE
2483+ // Don't enter coroutine yet, because it might write the header before
2484+ // all streams can be registered.
2485+ backup_coroutines = g_list_append(backup_coroutines, co);
95259824
WB
2486+ }
2487+
83faa3fe 2488+ VmaStatus vmastat;
95259824
WB
2489+ int percent = 0;
2490+ int last_percent = -1;
2491+
67af0fa4 2492+ if (devcount) {
2bf61c3e
FE
2493+ GList *entry = backup_coroutines;
2494+ while (entry && entry->data) {
2495+ Coroutine *co = entry->data;
2496+ qemu_coroutine_enter(co);
2497+ entry = g_list_next(entry);
2498+ }
2499+
67af0fa4
WB
2500+ while (1) {
2501+ main_loop_wait(false);
2502+ vma_writer_get_status(vmaw, &vmastat);
95259824 2503+
67af0fa4 2504+ if (verbose) {
95259824 2505+
67af0fa4
WB
2506+ uint64_t total = 0;
2507+ uint64_t transferred = 0;
2508+ uint64_t zero_bytes = 0;
95259824 2509+
67af0fa4
WB
2510+ int i;
2511+ for (i = 0; i < 256; i++) {
2512+ if (vmastat.stream_info[i].size) {
2513+ total += vmastat.stream_info[i].size;
2514+ transferred += vmastat.stream_info[i].transferred;
2515+ zero_bytes += vmastat.stream_info[i].zero_bytes;
2516+ }
95259824 2517+ }
67af0fa4
WB
2518+ percent = (transferred*100)/total;
2519+ if (percent != last_percent) {
2520+ fprintf(stderr, "progress %d%% %zd/%zd %zd\n", percent,
2521+ transferred, total, zero_bytes);
2522+ fflush(stderr);
95259824 2523+
67af0fa4
WB
2524+ last_percent = percent;
2525+ }
95259824 2526+ }
95259824 2527+
67af0fa4
WB
2528+ if (vmastat.closed) {
2529+ break;
2530+ }
95259824
WB
2531+ }
2532+ } else {
2533+ Coroutine *co = qemu_coroutine_create(backup_run_empty, vmaw);
2534+ qemu_coroutine_enter(co);
2535+ while (1) {
2536+ main_loop_wait(false);
2537+ vma_writer_get_status(vmaw, &vmastat);
2538+ if (vmastat.closed) {
2539+ break;
2540+ }
2541+ }
2542+ }
2543+
2544+ bdrv_drain_all();
2545+
2546+ vma_writer_get_status(vmaw, &vmastat);
2547+
2548+ if (verbose) {
f1eed34a 2549+ int i;
95259824
WB
2550+ for (i = 0; i < 256; i++) {
2551+ VmaStreamInfo *si = &vmastat.stream_info[i];
2552+ if (si->size) {
2553+ fprintf(stderr, "image %s: size=%zd zeros=%zd saved=%zd\n",
2554+ si->devname, si->size, si->zero_bytes,
2555+ si->size - si->zero_bytes);
2556+ }
2557+ }
2558+ }
2559+
2560+ if (vmastat.status < 0) {
2561+ unlink(archivename);
2562+ g_error("creating vma archive failed");
2563+ }
2564+
2bf61c3e
FE
2565+ g_list_free(backup_coroutines);
2566+ g_list_free(config_files);
d7f4e01a 2567+ vma_writer_destroy(vmaw);
95259824
WB
2568+ return 0;
2569+}
2570+
67af0fa4
WB
2571+static int dump_config(int argc, char **argv)
2572+{
2573+ int c, ret = 0;
2574+ const char *filename;
2575+ const char *config_name = "qemu-server.conf";
2576+
2577+ for (;;) {
2578+ c = getopt(argc, argv, "hc:");
2579+ if (c == -1) {
2580+ break;
2581+ }
2582+ switch (c) {
2583+ case '?':
2584+ case 'h':
2585+ help();
2586+ break;
2587+ case 'c':
2588+ config_name = optarg;
2589+ break;
2590+ default:
2591+ help();
2592+ }
2593+ }
2594+
2595+ /* Get the filename */
2596+ if ((optind + 1) != argc) {
2597+ help();
2598+ }
2599+ filename = argv[optind++];
2600+
2601+ Error *errp = NULL;
2602+ VmaReader *vmar = vma_reader_create(filename, &errp);
2603+
2604+ if (!vmar) {
2605+ g_error("%s", error_get_pretty(errp));
2606+ }
2607+
2608+ int found = 0;
2609+ GList *l = vma_reader_get_config_data(vmar);
2610+ while (l && l->data) {
2611+ VmaConfigData *cdata = (VmaConfigData *)l->data;
2612+ l = g_list_next(l);
2613+ if (strcmp(cdata->name, config_name) == 0) {
2614+ found = 1;
2615+ fwrite(cdata->data, cdata->len, 1, stdout);
2616+ break;
2617+ }
2618+ }
2619+
2620+ vma_reader_destroy(vmar);
2621+
2622+ bdrv_close_all();
2623+
2624+ if (!found) {
2625+ fprintf(stderr, "unable to find configuration data '%s'\n", config_name);
2626+ return -1;
2627+ }
2628+
2629+ return ret;
2630+}
2631+
95259824
WB
2632+int main(int argc, char **argv)
2633+{
2634+ const char *cmdname;
2635+ Error *main_loop_err = NULL;
2636+
6402d961
TL
2637+ error_init(argv[0]);
2638+ module_call_init(MODULE_INIT_TRACE);
2639+ qemu_init_exec_dir(argv[0]);
95259824
WB
2640+
2641+ if (qemu_init_main_loop(&main_loop_err)) {
2642+ g_error("%s", error_get_pretty(main_loop_err));
2643+ }
2644+
2645+ bdrv_init();
6402d961 2646+ module_call_init(MODULE_INIT_QOM);
95259824
WB
2647+
2648+ if (argc < 2) {
2649+ help();
2650+ }
2651+
2652+ cmdname = argv[1];
2653+ argc--; argv++;
2654+
2655+
2656+ if (!strcmp(cmdname, "list")) {
2657+ return list_content(argc, argv);
2658+ } else if (!strcmp(cmdname, "create")) {
2659+ return create_archive(argc, argv);
2660+ } else if (!strcmp(cmdname, "extract")) {
2661+ return extract_content(argc, argv);
67af0fa4
WB
2662+ } else if (!strcmp(cmdname, "verify")) {
2663+ return verify_content(argc, argv);
2664+ } else if (!strcmp(cmdname, "config")) {
2665+ return dump_config(argc, argv);
95259824
WB
2666+ }
2667+
2668+ help();
2669+ return 0;
2670+}
2671diff --git a/vma.h b/vma.h
2672new file mode 100644
c36e3f9d 2673index 0000000000..86d2873aa5
95259824
WB
2674--- /dev/null
2675+++ b/vma.h
6838f038 2676@@ -0,0 +1,150 @@
95259824
WB
2677+/*
2678+ * VMA: Virtual Machine Archive
2679+ *
2680+ * Copyright (C) Proxmox Server Solutions
2681+ *
2682+ * Authors:
2683+ * Dietmar Maurer (dietmar@proxmox.com)
2684+ *
2685+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
2686+ * See the COPYING file in the top-level directory.
2687+ *
2688+ */
2689+
2690+#ifndef BACKUP_VMA_H
2691+#define BACKUP_VMA_H
2692+
2693+#include <uuid/uuid.h>
2694+#include "qapi/error.h"
2695+#include "block/block.h"
2696+
2697+#define VMA_BLOCK_BITS 12
2698+#define VMA_BLOCK_SIZE (1<<VMA_BLOCK_BITS)
2699+#define VMA_CLUSTER_BITS (VMA_BLOCK_BITS+4)
2700+#define VMA_CLUSTER_SIZE (1<<VMA_CLUSTER_BITS)
2701+
2702+#if VMA_CLUSTER_SIZE != 65536
2703+#error unexpected cluster size
2704+#endif
2705+
2706+#define VMA_EXTENT_HEADER_SIZE 512
2707+#define VMA_BLOCKS_PER_EXTENT 59
2708+#define VMA_MAX_CONFIGS 256
2709+
2710+#define VMA_MAX_EXTENT_SIZE \
2711+ (VMA_EXTENT_HEADER_SIZE+VMA_CLUSTER_SIZE*VMA_BLOCKS_PER_EXTENT)
2712+#if VMA_MAX_EXTENT_SIZE != 3867136
2713+#error unexpected VMA_EXTENT_SIZE
2714+#endif
2715+
2716+/* File Format Definitions */
2717+
2718+#define VMA_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|0x00))
2719+#define VMA_EXTENT_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|'E'))
2720+
2721+typedef struct VmaDeviceInfoHeader {
2722+ uint32_t devname_ptr; /* offset into blob_buffer table */
2723+ uint32_t reserved0;
2724+ uint64_t size; /* device size in bytes */
2725+ uint64_t reserved1;
2726+ uint64_t reserved2;
2727+} VmaDeviceInfoHeader;
2728+
2729+typedef struct VmaHeader {
2730+ uint32_t magic;
2731+ uint32_t version;
2732+ unsigned char uuid[16];
2733+ int64_t ctime;
2734+ unsigned char md5sum[16];
2735+
2736+ uint32_t blob_buffer_offset;
2737+ uint32_t blob_buffer_size;
2738+ uint32_t header_size;
2739+
2740+ unsigned char reserved[1984];
2741+
2742+ uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2743+ uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2744+
2745+ uint32_t reserved1;
2746+
2747+ VmaDeviceInfoHeader dev_info[256];
2748+} VmaHeader;
2749+
2750+typedef struct VmaExtentHeader {
2751+ uint32_t magic;
2752+ uint16_t reserved1;
2753+ uint16_t block_count;
2754+ unsigned char uuid[16];
2755+ unsigned char md5sum[16];
2756+ uint64_t blockinfo[VMA_BLOCKS_PER_EXTENT];
2757+} VmaExtentHeader;
2758+
2759+/* functions/definitions to read/write vma files */
2760+
2761+typedef struct VmaReader VmaReader;
2762+
2763+typedef struct VmaWriter VmaWriter;
2764+
2765+typedef struct VmaConfigData {
2766+ const char *name;
2767+ const void *data;
2768+ uint32_t len;
2769+} VmaConfigData;
2770+
2771+typedef struct VmaStreamInfo {
2772+ uint64_t size;
2773+ uint64_t cluster_count;
2774+ uint64_t transferred;
2775+ uint64_t zero_bytes;
2776+ int finished;
2777+ char *devname;
2778+} VmaStreamInfo;
2779+
2780+typedef struct VmaStatus {
2781+ int status;
2782+ bool closed;
2783+ char errmsg[8192];
2784+ char uuid_str[37];
2785+ VmaStreamInfo stream_info[256];
2786+} VmaStatus;
2787+
2788+typedef struct VmaDeviceInfo {
2789+ uint64_t size; /* device size in bytes */
2790+ const char *devname;
2791+} VmaDeviceInfo;
2792+
2793+VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp);
2794+int vma_writer_close(VmaWriter *vmaw, Error **errp);
67af0fa4 2795+void vma_writer_error_propagate(VmaWriter *vmaw, Error **errp);
95259824
WB
2796+void vma_writer_destroy(VmaWriter *vmaw);
2797+int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
2798+ size_t len);
2799+int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
2800+ size_t size);
2801+
2802+int64_t coroutine_fn vma_writer_write(VmaWriter *vmaw, uint8_t dev_id,
6838f038
WB
2803+ int64_t cluster_num,
2804+ const unsigned char *buf,
95259824
WB
2805+ size_t *zero_bytes);
2806+
2807+int coroutine_fn vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id);
67af0fa4 2808+int coroutine_fn vma_writer_flush_output(VmaWriter *vmaw);
95259824
WB
2809+
2810+int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
b8b4ce04 2811+void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...) G_GNUC_PRINTF(2, 3);
95259824
WB
2812+
2813+
2814+VmaReader *vma_reader_create(const char *filename, Error **errp);
2815+void vma_reader_destroy(VmaReader *vmar);
2816+VmaHeader *vma_reader_get_header(VmaReader *vmar);
2817+GList *vma_reader_get_config_data(VmaReader *vmar);
2818+VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
2819+int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
67af0fa4 2820+ BlockBackend *target, bool write_zeroes,
db5d2a4b 2821+ bool skip, Error **errp);
95259824
WB
2822+int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
2823+ Error **errp);
67af0fa4 2824+int vma_reader_verify(VmaReader *vmar, bool verbose, Error **errp);
95259824
WB
2825+
2826+#endif /* BACKUP_VMA_H */