]> git.proxmox.com Git - pve-qemu-kvm.git/blob - debian/patches/0004-introduce-new-vma-archive-format.patch
Two more fixes
[pve-qemu-kvm.git] / debian / patches / 0004-introduce-new-vma-archive-format.patch
1 From 6289a43696ca6f713a5d3bb9f95a5adb608a5e13 Mon Sep 17 00:00:00 2001
2 From: Dietmar Maurer <dietmar@proxmox.com>
3 Date: Tue, 13 Nov 2012 11:11:38 +0100
4 Subject: [PATCH v5 4/6] introduce new vma archive format
5
6 This is a very simple archive format, see docs/specs/vma_spec.txt
7
8 Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
9 ---
10 Makefile | 3 +-
11 Makefile.objs | 2 +-
12 blockdev.c | 6 +-
13 docs/specs/vma_spec.txt | 24 ++
14 vma-reader.c | 799 ++++++++++++++++++++++++++++++++++++++++
15 vma-writer.c | 940 +++++++++++++++++++++++++++++++++++++++++++++++
16 vma.c | 561 ++++++++++++++++++++++++++++
17 vma.h | 145 ++++++++
18 8 files changed, 2476 insertions(+), 4 deletions(-)
19 create mode 100644 docs/specs/vma_spec.txt
20 create mode 100644 vma-reader.c
21 create mode 100644 vma-writer.c
22 create mode 100644 vma.c
23 create mode 100644 vma.h
24
25 diff --git a/Makefile b/Makefile
26 index 0d9099a..16f1c25 100644
27 --- a/Makefile
28 +++ b/Makefile
29 @@ -115,7 +115,7 @@ ifeq ($(CONFIG_SMARTCARD_NSS),y)
30 include $(SRC_PATH)/libcacard/Makefile
31 endif
32
33 -all: $(DOCS) $(TOOLS) $(HELPERS-y) recurse-all
34 +all: $(DOCS) $(TOOLS) vma$(EXESUF) $(HELPERS-y) recurse-all
35
36 config-host.h: config-host.h-timestamp
37 config-host.h-timestamp: config-host.mak
38 @@ -167,6 +167,7 @@ qemu-img.o: qemu-img-cmds.h
39 qemu-img$(EXESUF): qemu-img.o $(block-obj-y) libqemuutil.a libqemustub.a
40 qemu-nbd$(EXESUF): qemu-nbd.o $(block-obj-y) libqemuutil.a libqemustub.a
41 qemu-io$(EXESUF): qemu-io.o cmd.o $(block-obj-y) libqemuutil.a libqemustub.a
42 +vma$(EXESUF): vma.o vma-writer.o vma-reader.o $(block-obj-y) libqemuutil.a libqemustub.a
43
44 qemu-bridge-helper$(EXESUF): qemu-bridge-helper.o
45
46 diff --git a/Makefile.objs b/Makefile.objs
47 index df64f70..91f133b 100644
48 --- a/Makefile.objs
49 +++ b/Makefile.objs
50 @@ -13,7 +13,7 @@ block-obj-$(CONFIG_POSIX) += aio-posix.o
51 block-obj-$(CONFIG_WIN32) += aio-win32.o
52 block-obj-y += block/
53 block-obj-y += qapi-types.o qapi-visit.o
54 -block-obj-y += backup.o
55 +block-obj-y += vma-writer.o backup.o
56
57 block-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
58 block-obj-y += qemu-coroutine-sleep.o
59 diff --git a/blockdev.c b/blockdev.c
60 index 84f598d..683f7da 100644
61 --- a/blockdev.c
62 +++ b/blockdev.c
63 @@ -21,6 +21,7 @@
64 #include "trace.h"
65 #include "sysemu/arch_init.h"
66 #include "backup.h"
67 +#include "vma.h"
68
69 static QTAILQ_HEAD(drivelist, DriveInfo) drives = QTAILQ_HEAD_INITIALIZER(drives);
70
71 @@ -1530,10 +1531,11 @@ char *qmp_backup(const char *backup_file, bool has_format, BackupFormat format,
72 /* Todo: try to auto-detect format based on file name */
73 format = has_format ? format : BACKUP_FORMAT_VMA;
74
75 - /* fixme: find driver for specifued format */
76 const BackupDriver *driver = NULL;
77
78 - if (!driver) {
79 + if (format == BACKUP_FORMAT_VMA) {
80 + driver = &backup_vma_driver;
81 + } else {
82 error_set(errp, ERROR_CLASS_GENERIC_ERROR, "unknown backup format");
83 return NULL;
84 }
85 diff --git a/docs/specs/vma_spec.txt b/docs/specs/vma_spec.txt
86 new file mode 100644
87 index 0000000..9b715f2
88 --- /dev/null
89 +++ b/docs/specs/vma_spec.txt
90 @@ -0,0 +1,24 @@
91 +=Virtual Machine Archive format (VMA)=
92 +
93 +This format contains a header which includes the VM configuration as
94 +binary blobs, and a list of devices (dev_id, name).
95 +
96 +The actual VM image data is stored inside extents. An extent contains
97 +up to 64 clusters, and start with a 512 byte header containing
98 +additional information for those clusters.
99 +
100 +We use a cluster size of 65536, and use 8 bytes for each
101 +cluster in the header to store the following information:
102 +
103 +* 1 byte dev_id (to identity the drive)
104 +* 1 byte not used (reserved)
105 +* 2 bytes zero indicator (mark zero regions (16x4096))
106 +* 4 bytes cluster number
107 +
108 +We only store non-zero blocks (such block is 4096 bytes).
109 +
110 +Each archive is marked with a uuid. The archive header and all
111 +extent headers includes that uuid and a MD5 checksum (over header
112 +data).
113 +
114 +
115 diff --git a/vma-reader.c b/vma-reader.c
116 new file mode 100644
117 index 0000000..bc36cba
118 --- /dev/null
119 +++ b/vma-reader.c
120 @@ -0,0 +1,799 @@
121 +/*
122 + * VMA: Virtual Machine Archive
123 + *
124 + * Copyright (C) 2012 Proxmox Server Solutions
125 + *
126 + * Authors:
127 + * Dietmar Maurer (dietmar@proxmox.com)
128 + *
129 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
130 + * See the COPYING file in the top-level directory.
131 + *
132 + */
133 +
134 +#include <stdio.h>
135 +#include <errno.h>
136 +#include <unistd.h>
137 +#include <stdio.h>
138 +#include <string.h>
139 +#include <sys/types.h>
140 +#include <sys/stat.h>
141 +#include <fcntl.h>
142 +#include <glib.h>
143 +#include <uuid/uuid.h>
144 +
145 +#include "qemu-common.h"
146 +#include "qemu/timer.h"
147 +#include "qemu/ratelimit.h"
148 +#include "vma.h"
149 +#include "block/block.h"
150 +
151 +#define BITS_PER_LONG (sizeof(unsigned long) * CHAR_BIT)
152 +
153 +static unsigned char zero_vma_block[VMA_BLOCK_SIZE];
154 +
155 +typedef struct VmaRestoreState {
156 + BlockDriverState *bs;
157 + bool write_zeroes;
158 + unsigned long *bitmap;
159 + int bitmap_size;
160 +} VmaRestoreState;
161 +
162 +struct VmaReader {
163 + int fd;
164 + GChecksum *md5csum;
165 + GHashTable *blob_hash;
166 + unsigned char *head_data;
167 + VmaDeviceInfo devinfo[256];
168 + VmaRestoreState rstate[256];
169 + GList *cdata_list;
170 + guint8 vmstate_stream;
171 + uint32_t vmstate_clusters;
172 + /* to show restore percentage if run with -v */
173 + time_t start_time;
174 + int64_t cluster_count;
175 + int64_t clusters_read;
176 + int clusters_read_per;
177 +};
178 +
179 +static guint
180 +g_int32_hash(gconstpointer v)
181 +{
182 + return *(const uint32_t *)v;
183 +}
184 +
185 +static gboolean
186 +g_int32_equal(gconstpointer v1, gconstpointer v2)
187 +{
188 + return *((const uint32_t *)v1) == *((const uint32_t *)v2);
189 +}
190 +
191 +static int vma_reader_get_bitmap(VmaRestoreState *rstate, int64_t cluster_num)
192 +{
193 + assert(rstate);
194 + assert(rstate->bitmap);
195 +
196 + unsigned long val, idx, bit;
197 +
198 + idx = cluster_num / BITS_PER_LONG;
199 +
200 + assert(rstate->bitmap_size > idx);
201 +
202 + bit = cluster_num % BITS_PER_LONG;
203 + val = rstate->bitmap[idx];
204 +
205 + return !!(val & (1UL << bit));
206 +}
207 +
208 +static void vma_reader_set_bitmap(VmaRestoreState *rstate, int64_t cluster_num,
209 + int dirty)
210 +{
211 + assert(rstate);
212 + assert(rstate->bitmap);
213 +
214 + unsigned long val, idx, bit;
215 +
216 + idx = cluster_num / BITS_PER_LONG;
217 +
218 + assert(rstate->bitmap_size > idx);
219 +
220 + bit = cluster_num % BITS_PER_LONG;
221 + val = rstate->bitmap[idx];
222 + if (dirty) {
223 + if (!(val & (1UL << bit))) {
224 + val |= 1UL << bit;
225 + }
226 + } else {
227 + if (val & (1UL << bit)) {
228 + val &= ~(1UL << bit);
229 + }
230 + }
231 + rstate->bitmap[idx] = val;
232 +}
233 +
234 +typedef struct VmaBlob {
235 + uint32_t start;
236 + uint32_t len;
237 + void *data;
238 +} VmaBlob;
239 +
240 +static const VmaBlob *get_header_blob(VmaReader *vmar, uint32_t pos)
241 +{
242 + assert(vmar);
243 + assert(vmar->blob_hash);
244 +
245 + return g_hash_table_lookup(vmar->blob_hash, &pos);
246 +}
247 +
248 +static const char *get_header_str(VmaReader *vmar, uint32_t pos)
249 +{
250 + const VmaBlob *blob = get_header_blob(vmar, pos);
251 + if (!blob) {
252 + return NULL;
253 + }
254 + const char *res = (char *)blob->data;
255 + if (res[blob->len-1] != '\0') {
256 + return NULL;
257 + }
258 + return res;
259 +}
260 +
261 +static ssize_t
262 +safe_read(int fd, unsigned char *buf, size_t count)
263 +{
264 + ssize_t n;
265 +
266 + do {
267 + n = read(fd, buf, count);
268 + } while (n < 0 && errno == EINTR);
269 +
270 + return n;
271 +}
272 +
273 +static ssize_t
274 +full_read(int fd, unsigned char *buf, size_t len)
275 +{
276 + ssize_t n;
277 + size_t total;
278 +
279 + total = 0;
280 +
281 + while (len > 0) {
282 + n = safe_read(fd, buf, len);
283 +
284 + if (n == 0) {
285 + return total;
286 + }
287 +
288 + if (n <= 0) {
289 + break;
290 + }
291 +
292 + buf += n;
293 + total += n;
294 + len -= n;
295 + }
296 +
297 + if (len) {
298 + return -1;
299 + }
300 +
301 + return total;
302 +}
303 +
304 +void vma_reader_destroy(VmaReader *vmar)
305 +{
306 + assert(vmar);
307 +
308 + if (vmar->fd >= 0) {
309 + close(vmar->fd);
310 + }
311 +
312 + if (vmar->cdata_list) {
313 + g_list_free(vmar->cdata_list);
314 + }
315 +
316 + int i;
317 + for (i = 1; i < 256; i++) {
318 + if (vmar->rstate[i].bitmap) {
319 + g_free(vmar->rstate[i].bitmap);
320 + }
321 + }
322 +
323 + if (vmar->md5csum) {
324 + g_checksum_free(vmar->md5csum);
325 + }
326 +
327 + if (vmar->blob_hash) {
328 + g_hash_table_destroy(vmar->blob_hash);
329 + }
330 +
331 + if (vmar->head_data) {
332 + g_free(vmar->head_data);
333 + }
334 +
335 + g_free(vmar);
336 +
337 +};
338 +
339 +static int vma_reader_read_head(VmaReader *vmar, Error **errp)
340 +{
341 + assert(vmar);
342 + assert(errp);
343 + assert(*errp == NULL);
344 +
345 + unsigned char md5sum[16];
346 + int i;
347 + int ret = 0;
348 +
349 + vmar->head_data = g_malloc(sizeof(VmaHeader));
350 +
351 + if (full_read(vmar->fd, vmar->head_data, sizeof(VmaHeader)) !=
352 + sizeof(VmaHeader)) {
353 + error_setg(errp, "can't read vma header - %s",
354 + errno ? g_strerror(errno) : "got EOF");
355 + return -1;
356 + }
357 +
358 + VmaHeader *h = (VmaHeader *)vmar->head_data;
359 +
360 + if (h->magic != VMA_MAGIC) {
361 + error_setg(errp, "not a vma file - wrong magic number");
362 + return -1;
363 + }
364 +
365 + uint32_t header_size = GUINT32_FROM_BE(h->header_size);
366 + int need = header_size - sizeof(VmaHeader);
367 + if (need <= 0) {
368 + error_setg(errp, "wrong vma header size %d", header_size);
369 + return -1;
370 + }
371 +
372 + vmar->head_data = g_realloc(vmar->head_data, header_size);
373 + h = (VmaHeader *)vmar->head_data;
374 +
375 + if (full_read(vmar->fd, vmar->head_data + sizeof(VmaHeader), need) !=
376 + need) {
377 + error_setg(errp, "can't read vma header data - %s",
378 + errno ? g_strerror(errno) : "got EOF");
379 + return -1;
380 + }
381 +
382 + memcpy(md5sum, h->md5sum, 16);
383 + memset(h->md5sum, 0, 16);
384 +
385 + g_checksum_reset(vmar->md5csum);
386 + g_checksum_update(vmar->md5csum, vmar->head_data, header_size);
387 + gsize csize = 16;
388 + g_checksum_get_digest(vmar->md5csum, (guint8 *)(h->md5sum), &csize);
389 +
390 + if (memcmp(md5sum, h->md5sum, 16) != 0) {
391 + error_setg(errp, "wrong vma header chechsum");
392 + return -1;
393 + }
394 +
395 + /* we can modify header data after checksum verify */
396 + h->header_size = header_size;
397 +
398 + h->version = GUINT32_FROM_BE(h->version);
399 + if (h->version != 1) {
400 + error_setg(errp, "wrong vma version %d", h->version);
401 + return -1;
402 + }
403 +
404 + h->ctime = GUINT64_FROM_BE(h->ctime);
405 + h->blob_buffer_offset = GUINT32_FROM_BE(h->blob_buffer_offset);
406 + h->blob_buffer_size = GUINT32_FROM_BE(h->blob_buffer_size);
407 +
408 + uint32_t bstart = h->blob_buffer_offset + 1;
409 + uint32_t bend = h->blob_buffer_offset + h->blob_buffer_size;
410 +
411 + if (bstart <= sizeof(VmaHeader)) {
412 + error_setg(errp, "wrong vma blob buffer offset %d",
413 + h->blob_buffer_offset);
414 + return -1;
415 + }
416 +
417 + if (bend > header_size) {
418 + error_setg(errp, "wrong vma blob buffer size %d/%d",
419 + h->blob_buffer_offset, h->blob_buffer_size);
420 + return -1;
421 + }
422 +
423 + while ((bstart + 2) <= bend) {
424 + uint32_t size = vmar->head_data[bstart] +
425 + (vmar->head_data[bstart+1] << 8);
426 + if ((bstart + size + 2) <= bend) {
427 + VmaBlob *blob = g_new0(VmaBlob, 1);
428 + blob->start = bstart - h->blob_buffer_offset;
429 + blob->len = size;
430 + blob->data = vmar->head_data + bstart + 2;
431 + g_hash_table_insert(vmar->blob_hash, &blob->start, blob);
432 + }
433 + bstart += size + 2;
434 + }
435 +
436 +
437 + int count = 0;
438 + for (i = 1; i < 256; i++) {
439 + VmaDeviceInfoHeader *dih = &h->dev_info[i];
440 + uint32_t devname_ptr = GUINT32_FROM_BE(dih->devname_ptr);
441 + uint64_t size = GUINT64_FROM_BE(dih->size);
442 + const char *devname = get_header_str(vmar, devname_ptr);
443 +
444 + if (size && devname) {
445 + count++;
446 + vmar->devinfo[i].size = size;
447 + vmar->devinfo[i].devname = devname;
448 +
449 + if (strcmp(devname, "vmstate") == 0) {
450 + vmar->vmstate_stream = i;
451 + }
452 + }
453 + }
454 +
455 + if (!count) {
456 + error_setg(errp, "vma does not contain data");
457 + return -1;
458 + }
459 +
460 + for (i = 0; i < VMA_MAX_CONFIGS; i++) {
461 + uint32_t name_ptr = GUINT32_FROM_BE(h->config_names[i]);
462 + uint32_t data_ptr = GUINT32_FROM_BE(h->config_data[i]);
463 +
464 + if (!(name_ptr && data_ptr)) {
465 + continue;
466 + }
467 + const char *name = get_header_str(vmar, name_ptr);
468 + const VmaBlob *blob = get_header_blob(vmar, data_ptr);
469 +
470 + if (!(name && blob)) {
471 + error_setg(errp, "vma contains invalid data pointers");
472 + return -1;
473 + }
474 +
475 + VmaConfigData *cdata = g_new0(VmaConfigData, 1);
476 + cdata->name = name;
477 + cdata->data = blob->data;
478 + cdata->len = blob->len;
479 +
480 + vmar->cdata_list = g_list_append(vmar->cdata_list, cdata);
481 + }
482 +
483 + return ret;
484 +};
485 +
486 +VmaReader *vma_reader_create(const char *filename, Error **errp)
487 +{
488 + assert(filename);
489 + assert(errp);
490 +
491 + VmaReader *vmar = g_new0(VmaReader, 1);
492 +
493 + if (strcmp(filename, "-") == 0) {
494 + vmar->fd = dup(0);
495 + } else {
496 + vmar->fd = open(filename, O_RDONLY);
497 + }
498 +
499 + if (vmar->fd < 0) {
500 + error_setg(errp, "can't open file %s - %s\n", filename,
501 + g_strerror(errno));
502 + goto err;
503 + }
504 +
505 + vmar->md5csum = g_checksum_new(G_CHECKSUM_MD5);
506 + if (!vmar->md5csum) {
507 + error_setg(errp, "can't allocate cmsum\n");
508 + goto err;
509 + }
510 +
511 + vmar->blob_hash = g_hash_table_new_full(g_int32_hash, g_int32_equal,
512 + NULL, g_free);
513 +
514 + if (vma_reader_read_head(vmar, errp) < 0) {
515 + goto err;
516 + }
517 +
518 + return vmar;
519 +
520 +err:
521 + if (vmar) {
522 + vma_reader_destroy(vmar);
523 + }
524 +
525 + return NULL;
526 +}
527 +
528 +VmaHeader *vma_reader_get_header(VmaReader *vmar)
529 +{
530 + assert(vmar);
531 + assert(vmar->head_data);
532 +
533 + return (VmaHeader *)(vmar->head_data);
534 +}
535 +
536 +GList *vma_reader_get_config_data(VmaReader *vmar)
537 +{
538 + assert(vmar);
539 + assert(vmar->head_data);
540 +
541 + return vmar->cdata_list;
542 +}
543 +
544 +VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id)
545 +{
546 + assert(vmar);
547 + assert(dev_id);
548 +
549 + if (vmar->devinfo[dev_id].size && vmar->devinfo[dev_id].devname) {
550 + return &vmar->devinfo[dev_id];
551 + }
552 +
553 + return NULL;
554 +}
555 +
556 +int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id, BlockDriverState *bs,
557 + bool write_zeroes, Error **errp)
558 +{
559 + assert(vmar);
560 + assert(bs != NULL);
561 + assert(dev_id);
562 + assert(vmar->rstate[dev_id].bs == NULL);
563 +
564 + int64_t size = bdrv_getlength(bs);
565 + if (size != vmar->devinfo[dev_id].size) {
566 + error_setg(errp, "vma_reader_register_bs for stream %s failed - "
567 + "unexpected size %zd != %zd", vmar->devinfo[dev_id].devname,
568 + size, vmar->devinfo[dev_id].size);
569 + return -1;
570 + }
571 +
572 + vmar->rstate[dev_id].bs = bs;
573 + vmar->rstate[dev_id].write_zeroes = write_zeroes;
574 +
575 + int64_t bitmap_size = (size/BDRV_SECTOR_SIZE) +
576 + (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG - 1;
577 + bitmap_size /= (VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE) * BITS_PER_LONG;
578 +
579 + vmar->rstate[dev_id].bitmap_size = bitmap_size;
580 + vmar->rstate[dev_id].bitmap = g_new0(unsigned long, bitmap_size);
581 +
582 + vmar->cluster_count += size/VMA_CLUSTER_SIZE;
583 +
584 + return 0;
585 +}
586 +
587 +static ssize_t safe_write(int fd, void *buf, size_t count)
588 +{
589 + ssize_t n;
590 +
591 + do {
592 + n = write(fd, buf, count);
593 + } while (n < 0 && errno == EINTR);
594 +
595 + return n;
596 +}
597 +
598 +static size_t full_write(int fd, void *buf, size_t len)
599 +{
600 + ssize_t n;
601 + size_t total;
602 +
603 + total = 0;
604 +
605 + while (len > 0) {
606 + n = safe_write(fd, buf, len);
607 + if (n < 0) {
608 + return n;
609 + }
610 + buf += n;
611 + total += n;
612 + len -= n;
613 + }
614 +
615 + if (len) {
616 + /* incomplete write ? */
617 + return -1;
618 + }
619 +
620 + return total;
621 +}
622 +
623 +static int restore_write_data(VmaReader *vmar, guint8 dev_id,
624 + BlockDriverState *bs, int vmstate_fd,
625 + unsigned char *buf, int64_t sector_num,
626 + int nb_sectors, Error **errp)
627 +{
628 + assert(vmar);
629 +
630 + if (dev_id == vmar->vmstate_stream) {
631 + if (vmstate_fd >= 0) {
632 + int len = nb_sectors * BDRV_SECTOR_SIZE;
633 + int res = full_write(vmstate_fd, buf, len);
634 + if (res < 0) {
635 + error_setg(errp, "write vmstate failed %d", res);
636 + return -1;
637 + }
638 + }
639 + } else {
640 + int res = bdrv_write(bs, sector_num, buf, nb_sectors);
641 + if (res < 0) {
642 + error_setg(errp, "bdrv_write to %s failed (%d)",
643 + bdrv_get_device_name(bs), res);
644 + return -1;
645 + }
646 + }
647 + return 0;
648 +}
649 +static int restore_extent(VmaReader *vmar, unsigned char *buf,
650 + int extent_size, int vmstate_fd,
651 + bool verbose, Error **errp)
652 +{
653 + assert(vmar);
654 + assert(buf);
655 +
656 + VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
657 + int start = VMA_EXTENT_HEADER_SIZE;
658 + int i;
659 +
660 + for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
661 + uint64_t block_info = GUINT64_FROM_BE(ehead->blockinfo[i]);
662 + uint64_t cluster_num = block_info & 0xffffffff;
663 + uint8_t dev_id = (block_info >> 32) & 0xff;
664 + uint16_t mask = block_info >> (32+16);
665 + int64_t max_sector;
666 +
667 + if (!dev_id) {
668 + continue;
669 + }
670 +
671 + VmaRestoreState *rstate = &vmar->rstate[dev_id];
672 + BlockDriverState *bs = NULL;
673 +
674 + if (dev_id != vmar->vmstate_stream) {
675 + bs = rstate->bs;
676 + if (!bs) {
677 + error_setg(errp, "got wrong dev id %d", dev_id);
678 + return -1;
679 + }
680 +
681 + if (vma_reader_get_bitmap(rstate, cluster_num)) {
682 + error_setg(errp, "found duplicated cluster %zd for stream %s",
683 + cluster_num, vmar->devinfo[dev_id].devname);
684 + return -1;
685 + }
686 + vma_reader_set_bitmap(rstate, cluster_num, 1);
687 +
688 + max_sector = vmar->devinfo[dev_id].size/BDRV_SECTOR_SIZE;
689 + } else {
690 + max_sector = G_MAXINT64;
691 + if (cluster_num != vmar->vmstate_clusters) {
692 + error_setg(errp, "found out of order vmstate data");
693 + return -1;
694 + }
695 + vmar->vmstate_clusters++;
696 + }
697 +
698 + vmar->clusters_read++;
699 +
700 + if (verbose) {
701 + time_t duration = time(NULL) - vmar->start_time;
702 + int percent = (vmar->clusters_read*100)/vmar->cluster_count;
703 + if (percent != vmar->clusters_read_per) {
704 + printf("progress %d%% (read %zd bytes, duration %zd sec)\n",
705 + percent, vmar->clusters_read*VMA_CLUSTER_SIZE,
706 + duration);
707 + fflush(stdout);
708 + vmar->clusters_read_per = percent;
709 + }
710 + }
711 +
712 + /* try to write whole clusters to speedup restore */
713 + if (mask == 0xffff) {
714 + if ((start + VMA_CLUSTER_SIZE) > extent_size) {
715 + error_setg(errp, "short vma extent - too many blocks");
716 + return -1;
717 + }
718 + int64_t sector_num = (cluster_num * VMA_CLUSTER_SIZE) /
719 + BDRV_SECTOR_SIZE;
720 + int64_t end_sector = sector_num +
721 + VMA_CLUSTER_SIZE/BDRV_SECTOR_SIZE;
722 +
723 + if (end_sector > max_sector) {
724 + end_sector = max_sector;
725 + }
726 +
727 + if (end_sector <= sector_num) {
728 + error_setg(errp, "got wrong block address - write bejond end");
729 + return -1;
730 + }
731 +
732 + int nb_sectors = end_sector - sector_num;
733 + if (restore_write_data(vmar, dev_id, bs, vmstate_fd, buf + start,
734 + sector_num, nb_sectors, errp) < 0) {
735 + return -1;
736 + }
737 +
738 + start += VMA_CLUSTER_SIZE;
739 + } else {
740 + int j;
741 + int bit = 1;
742 +
743 + for (j = 0; j < 16; j++) {
744 + int64_t sector_num = (cluster_num*VMA_CLUSTER_SIZE +
745 + j*VMA_BLOCK_SIZE)/BDRV_SECTOR_SIZE;
746 +
747 + int64_t end_sector = sector_num +
748 + VMA_BLOCK_SIZE/BDRV_SECTOR_SIZE;
749 + if (end_sector > max_sector) {
750 + end_sector = max_sector;
751 + }
752 +
753 + if (mask & bit) {
754 + if ((start + VMA_BLOCK_SIZE) > extent_size) {
755 + error_setg(errp, "short vma extent - too many blocks");
756 + return -1;
757 + }
758 +
759 + if (end_sector <= sector_num) {
760 + error_setg(errp, "got wrong block address - "
761 + "write bejond end");
762 + return -1;
763 + }
764 +
765 + int nb_sectors = end_sector - sector_num;
766 + if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
767 + buf + start, sector_num,
768 + nb_sectors, errp) < 0) {
769 + return -1;
770 + }
771 +
772 + start += VMA_BLOCK_SIZE;
773 +
774 + } else {
775 +
776 + if (rstate->write_zeroes && (end_sector > sector_num)) {
777 + /* Todo: use bdrv_co_write_zeroes (but that need to
778 + * be run inside coroutine?)
779 + */
780 + int nb_sectors = end_sector - sector_num;
781 + if (restore_write_data(vmar, dev_id, bs, vmstate_fd,
782 + zero_vma_block, sector_num,
783 + nb_sectors, errp) < 0) {
784 + return -1;
785 + }
786 + }
787 + }
788 +
789 + bit = bit << 1;
790 + }
791 + }
792 + }
793 +
794 + if (start != extent_size) {
795 + error_setg(errp, "vma extent error - missing blocks");
796 + return -1;
797 + }
798 +
799 + return 0;
800 +}
801 +
802 +int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
803 + Error **errp)
804 +{
805 + assert(vmar);
806 + assert(vmar->head_data);
807 +
808 + int ret = 0;
809 + unsigned char buf[VMA_MAX_EXTENT_SIZE];
810 + int buf_pos = 0;
811 + unsigned char md5sum[16];
812 + VmaHeader *h = (VmaHeader *)vmar->head_data;
813 +
814 + vmar->start_time = time(NULL);
815 +
816 + while (1) {
817 + int bytes = full_read(vmar->fd, buf + buf_pos, sizeof(buf) - buf_pos);
818 + if (bytes < 0) {
819 + error_setg(errp, "read failed - %s", g_strerror(errno));
820 + return -1;
821 + }
822 +
823 + buf_pos += bytes;
824 +
825 + if (!buf_pos) {
826 + break; /* EOF */
827 + }
828 +
829 + if (buf_pos < VMA_EXTENT_HEADER_SIZE) {
830 + error_setg(errp, "read short extent (%d bytes)", buf_pos);
831 + return -1;
832 + }
833 +
834 + VmaExtentHeader *ehead = (VmaExtentHeader *)buf;
835 +
836 + /* extract md5sum */
837 + memcpy(md5sum, ehead->md5sum, sizeof(ehead->md5sum));
838 + memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
839 +
840 + g_checksum_reset(vmar->md5csum);
841 + g_checksum_update(vmar->md5csum, buf, VMA_EXTENT_HEADER_SIZE);
842 + gsize csize = 16;
843 + g_checksum_get_digest(vmar->md5csum, ehead->md5sum, &csize);
844 +
845 + if (memcmp(md5sum, ehead->md5sum, 16) != 0) {
846 + error_setg(errp, "wrong vma extent header chechsum");
847 + return -1;
848 + }
849 +
850 + if (memcmp(h->uuid, ehead->uuid, sizeof(ehead->uuid)) != 0) {
851 + error_setg(errp, "wrong vma extent uuid");
852 + return -1;
853 + }
854 +
855 + if (ehead->magic != VMA_EXTENT_MAGIC || ehead->reserved1 != 0) {
856 + error_setg(errp, "wrong vma extent header magic");
857 + return -1;
858 + }
859 +
860 + int block_count = GUINT16_FROM_BE(ehead->block_count);
861 + int extent_size = VMA_EXTENT_HEADER_SIZE + block_count*VMA_BLOCK_SIZE;
862 +
863 + if (buf_pos < extent_size) {
864 + error_setg(errp, "short vma extent (%d < %d)", buf_pos,
865 + extent_size);
866 + return -1;
867 + }
868 +
869 + if (restore_extent(vmar, buf, extent_size, vmstate_fd, verbose,
870 + errp) < 0) {
871 + return -1;
872 + }
873 +
874 + if (buf_pos > extent_size) {
875 + memmove(buf, buf + extent_size, buf_pos - extent_size);
876 + buf_pos = buf_pos - extent_size;
877 + } else {
878 + buf_pos = 0;
879 + }
880 + }
881 +
882 + bdrv_drain_all();
883 +
884 + int i;
885 + for (i = 1; i < 256; i++) {
886 + VmaRestoreState *rstate = &vmar->rstate[i];
887 + if (!rstate->bs) {
888 + continue;
889 + }
890 +
891 + if (bdrv_flush(rstate->bs) < 0) {
892 + error_setg(errp, "vma bdrv_flush %s failed",
893 + vmar->devinfo[i].devname);
894 + return -1;
895 + }
896 +
897 + if (vmar->devinfo[i].size &&
898 + (strcmp(vmar->devinfo[i].devname, "vmstate") != 0)) {
899 + assert(rstate->bitmap);
900 +
901 + int64_t cluster_num, end;
902 +
903 + end = (vmar->devinfo[i].size + VMA_CLUSTER_SIZE - 1) /
904 + VMA_CLUSTER_SIZE;
905 +
906 + for (cluster_num = 0; cluster_num < end; cluster_num++) {
907 + if (!vma_reader_get_bitmap(rstate, cluster_num)) {
908 + error_setg(errp, "detected missing cluster %zd "
909 + "for stream %s", cluster_num,
910 + vmar->devinfo[i].devname);
911 + return -1;
912 + }
913 + }
914 + }
915 + }
916 +
917 + return ret;
918 +}
919 +
920 diff --git a/vma-writer.c b/vma-writer.c
921 new file mode 100644
922 index 0000000..9228ca6
923 --- /dev/null
924 +++ b/vma-writer.c
925 @@ -0,0 +1,940 @@
926 +/*
927 + * VMA: Virtual Machine Archive
928 + *
929 + * Copyright (C) 2012 Proxmox Server Solutions
930 + *
931 + * Authors:
932 + * Dietmar Maurer (dietmar@proxmox.com)
933 + *
934 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
935 + * See the COPYING file in the top-level directory.
936 + *
937 + */
938 +
939 +#include <stdio.h>
940 +#include <errno.h>
941 +#include <unistd.h>
942 +#include <stdio.h>
943 +#include <string.h>
944 +#include <sys/types.h>
945 +#include <sys/stat.h>
946 +#include <fcntl.h>
947 +#include <glib.h>
948 +#include <uuid/uuid.h>
949 +
950 +#include "qemu-common.h"
951 +#include "vma.h"
952 +#include "block/block.h"
953 +#include "monitor/monitor.h"
954 +
955 +#define DEBUG_VMA 0
956 +
957 +#define DPRINTF(fmt, ...)\
958 + do { if (DEBUG_VMA) { printf("vma: " fmt, ## __VA_ARGS__); } } while (0)
959 +
960 +#define WRITE_BUFFERS 5
961 +
962 +typedef struct VmaAIOCB VmaAIOCB;
963 +struct VmaAIOCB {
964 + unsigned char buffer[VMA_MAX_EXTENT_SIZE];
965 + VmaWriter *vmaw;
966 + size_t bytes;
967 + Coroutine *co;
968 +};
969 +
970 +struct VmaWriter {
971 + int fd;
972 + FILE *cmd;
973 + int status;
974 + char errmsg[8192];
975 + uuid_t uuid;
976 + bool header_written;
977 + bool closed;
978 +
979 + /* we always write extents */
980 + unsigned char outbuf[VMA_MAX_EXTENT_SIZE];
981 + int outbuf_pos; /* in bytes */
982 + int outbuf_count; /* in VMA_BLOCKS */
983 + uint64_t outbuf_block_info[VMA_BLOCKS_PER_EXTENT];
984 +
985 + VmaAIOCB *aiocbs[WRITE_BUFFERS];
986 + CoQueue wqueue;
987 +
988 + GChecksum *md5csum;
989 + CoMutex writer_lock;
990 + CoMutex flush_lock;
991 + Coroutine *co_writer;
992 +
993 + /* drive informations */
994 + VmaStreamInfo stream_info[256];
995 + guint stream_count;
996 +
997 + guint8 vmstate_stream;
998 + uint32_t vmstate_clusters;
999 +
1000 + /* header blob table */
1001 + char *header_blob_table;
1002 + uint32_t header_blob_table_size;
1003 + uint32_t header_blob_table_pos;
1004 +
1005 + /* store for config blobs */
1006 + uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1007 + uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
1008 + uint32_t config_count;
1009 +};
1010 +
1011 +void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...)
1012 +{
1013 + va_list ap;
1014 +
1015 + if (vmaw->status < 0) {
1016 + return;
1017 + }
1018 +
1019 + vmaw->status = -1;
1020 +
1021 + va_start(ap, fmt);
1022 + g_vsnprintf(vmaw->errmsg, sizeof(vmaw->errmsg), fmt, ap);
1023 + va_end(ap);
1024 +
1025 + DPRINTF("vma_writer_set_error: %s\n", vmaw->errmsg);
1026 +}
1027 +
1028 +static uint32_t allocate_header_blob(VmaWriter *vmaw, const char *data,
1029 + size_t len)
1030 +{
1031 + if (len > 65535) {
1032 + return 0;
1033 + }
1034 +
1035 + if (!vmaw->header_blob_table ||
1036 + (vmaw->header_blob_table_size <
1037 + (vmaw->header_blob_table_pos + len + 2))) {
1038 + int newsize = vmaw->header_blob_table_size + ((len + 2 + 511)/512)*512;
1039 +
1040 + vmaw->header_blob_table = g_realloc(vmaw->header_blob_table, newsize);
1041 + memset(vmaw->header_blob_table + vmaw->header_blob_table_size,
1042 + 0, newsize - vmaw->header_blob_table_size);
1043 + vmaw->header_blob_table_size = newsize;
1044 + }
1045 +
1046 + uint32_t cpos = vmaw->header_blob_table_pos;
1047 + vmaw->header_blob_table[cpos] = len & 255;
1048 + vmaw->header_blob_table[cpos+1] = (len >> 8) & 255;
1049 + memcpy(vmaw->header_blob_table + cpos + 2, data, len);
1050 + vmaw->header_blob_table_pos += len + 2;
1051 + return cpos;
1052 +}
1053 +
1054 +static uint32_t allocate_header_string(VmaWriter *vmaw, const char *str)
1055 +{
1056 + assert(vmaw);
1057 +
1058 + size_t len = strlen(str) + 1;
1059 +
1060 + return allocate_header_blob(vmaw, str, len);
1061 +}
1062 +
1063 +int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
1064 + gsize len)
1065 +{
1066 + assert(vmaw);
1067 + assert(!vmaw->header_written);
1068 + assert(vmaw->config_count < VMA_MAX_CONFIGS);
1069 + assert(name);
1070 + assert(data);
1071 + assert(len);
1072 +
1073 + uint32_t name_ptr = allocate_header_string(vmaw, name);
1074 + if (!name_ptr) {
1075 + return -1;
1076 + }
1077 +
1078 + uint32_t data_ptr = allocate_header_blob(vmaw, data, len);
1079 + if (!data_ptr) {
1080 + return -1;
1081 + }
1082 +
1083 + vmaw->config_names[vmaw->config_count] = name_ptr;
1084 + vmaw->config_data[vmaw->config_count] = data_ptr;
1085 +
1086 + vmaw->config_count++;
1087 +
1088 + return 0;
1089 +}
1090 +
1091 +int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
1092 + size_t size)
1093 +{
1094 + assert(vmaw);
1095 + assert(devname);
1096 + assert(!vmaw->status);
1097 +
1098 + if (vmaw->header_written) {
1099 + vma_writer_set_error(vmaw, "vma_writer_register_stream: header "
1100 + "already written");
1101 + return -1;
1102 + }
1103 +
1104 + guint n = vmaw->stream_count + 1;
1105 +
1106 + /* we can have dev_ids form 1 to 255 (0 reserved)
1107 + * 255(-1) reseverd for safety
1108 + */
1109 + if (n > 254) {
1110 + vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1111 + "too many drives");
1112 + return -1;
1113 + }
1114 +
1115 + if (size <= 0) {
1116 + vma_writer_set_error(vmaw, "vma_writer_register_stream: "
1117 + "got strange size %zd", size);
1118 + return -1;
1119 + }
1120 +
1121 + DPRINTF("vma_writer_register_stream %s %zu %d\n", devname, size, n);
1122 +
1123 + vmaw->stream_info[n].devname = g_strdup(devname);
1124 + vmaw->stream_info[n].size = size;
1125 +
1126 + vmaw->stream_info[n].cluster_count = (size + VMA_CLUSTER_SIZE - 1) /
1127 + VMA_CLUSTER_SIZE;
1128 +
1129 + vmaw->stream_count = n;
1130 +
1131 + if (strcmp(devname, "vmstate") == 0) {
1132 + vmaw->vmstate_stream = n;
1133 + }
1134 +
1135 + return n;
1136 +}
1137 +
1138 +static void vma_co_continue_write(void *opaque)
1139 +{
1140 + VmaWriter *vmaw = opaque;
1141 +
1142 + DPRINTF("vma_co_continue_write\n");
1143 + qemu_coroutine_enter(vmaw->co_writer, NULL);
1144 +}
1145 +
1146 +static int vma_co_write_finished(void *opaque)
1147 +{
1148 + VmaWriter *vmaw = opaque;
1149 +
1150 + return (vmaw->co_writer != 0);
1151 +}
1152 +
1153 +static ssize_t coroutine_fn
1154 +vma_co_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1155 +{
1156 + size_t done = 0;
1157 + ssize_t ret;
1158 +
1159 + /* atomic writes (we cannot interleave writes) */
1160 + qemu_co_mutex_lock(&vmaw->writer_lock);
1161 +
1162 + DPRINTF("vma_co_write enter %zd\n", bytes);
1163 +
1164 + assert(vmaw->co_writer == NULL);
1165 +
1166 + vmaw->co_writer = qemu_coroutine_self();
1167 +
1168 + qemu_aio_set_fd_handler(vmaw->fd, NULL, vma_co_continue_write,
1169 + vma_co_write_finished, vmaw);
1170 +
1171 + DPRINTF("vma_co_write wait until writable\n");
1172 + qemu_coroutine_yield();
1173 + DPRINTF("vma_co_write starting %zd\n", bytes);
1174 +
1175 + while (done < bytes) {
1176 + ret = write(vmaw->fd, buf + done, bytes - done);
1177 + if (ret > 0) {
1178 + done += ret;
1179 + DPRINTF("vma_co_write written %zd %zd\n", done, ret);
1180 + } else if (ret < 0) {
1181 + if (errno == EAGAIN || errno == EWOULDBLOCK) {
1182 + DPRINTF("vma_co_write yield %zd\n", done);
1183 + qemu_coroutine_yield();
1184 + DPRINTF("vma_co_write restart %zd\n", done);
1185 + } else {
1186 + vma_writer_set_error(vmaw, "vma_co_write write error - %s",
1187 + g_strerror(errno));
1188 + done = -1; /* always return failure for partial writes */
1189 + break;
1190 + }
1191 + } else if (ret == 0) {
1192 + /* should not happen - simply try again */
1193 + }
1194 + }
1195 +
1196 + qemu_aio_set_fd_handler(vmaw->fd, NULL, NULL, NULL, NULL);
1197 +
1198 + vmaw->co_writer = NULL;
1199 +
1200 + qemu_co_mutex_unlock(&vmaw->writer_lock);
1201 +
1202 + DPRINTF("vma_co_write leave %zd\n", done);
1203 + return done;
1204 +}
1205 +
1206 +static void coroutine_fn vma_co_writer_task(void *opaque)
1207 +{
1208 + VmaAIOCB *cb = opaque;
1209 +
1210 + DPRINTF("vma_co_writer_task start\n");
1211 +
1212 + int64_t done = vma_co_write(cb->vmaw, cb->buffer, cb->bytes);
1213 + DPRINTF("vma_co_writer_task write done %zd\n", done);
1214 +
1215 + if (done != cb->bytes) {
1216 + DPRINTF("vma_co_writer_task failed write %zd %zd", cb->bytes, done);
1217 + vma_writer_set_error(cb->vmaw, "vma_co_writer_task failed write %zd",
1218 + done);
1219 + }
1220 +
1221 + cb->bytes = 0;
1222 +
1223 + qemu_co_queue_next(&cb->vmaw->wqueue);
1224 +
1225 + DPRINTF("vma_co_writer_task end\n");
1226 +}
1227 +
1228 +static void coroutine_fn vma_queue_flush(VmaWriter *vmaw)
1229 +{
1230 + DPRINTF("vma_queue_flush enter\n");
1231 +
1232 + assert(vmaw);
1233 +
1234 + while (1) {
1235 + int i;
1236 + VmaAIOCB *cb = NULL;
1237 + for (i = 0; i < WRITE_BUFFERS; i++) {
1238 + if (vmaw->aiocbs[i]->bytes) {
1239 + cb = vmaw->aiocbs[i];
1240 + DPRINTF("FOUND USED AIO BUFFER %d %zd\n", i,
1241 + vmaw->aiocbs[i]->bytes);
1242 + break;
1243 + }
1244 + }
1245 + if (!cb) {
1246 + break;
1247 + }
1248 + qemu_co_queue_wait(&vmaw->wqueue);
1249 + }
1250 +
1251 + DPRINTF("vma_queue_flush leave\n");
1252 +}
1253 +
1254 +/**
1255 + * NOTE: pipe buffer size in only 4096 bytes on linux (see 'ulimit -a')
1256 + * So we need to create a coroutione to allow 'parallel' execution.
1257 + */
1258 +static ssize_t coroutine_fn
1259 +vma_queue_write(VmaWriter *vmaw, const void *buf, size_t bytes)
1260 +{
1261 + DPRINTF("vma_queue_write enter %zd\n", bytes);
1262 +
1263 + assert(vmaw);
1264 + assert(buf);
1265 + assert(bytes <= VMA_MAX_EXTENT_SIZE);
1266 +
1267 + VmaAIOCB *cb = NULL;
1268 + while (!cb) {
1269 + int i;
1270 + for (i = 0; i < WRITE_BUFFERS; i++) {
1271 + if (!vmaw->aiocbs[i]->bytes) {
1272 + cb = vmaw->aiocbs[i];
1273 + break;
1274 + }
1275 + }
1276 + if (!cb) {
1277 + qemu_co_queue_wait(&vmaw->wqueue);
1278 + }
1279 + }
1280 +
1281 + memcpy(cb->buffer, buf, bytes);
1282 + cb->bytes = bytes;
1283 + cb->vmaw = vmaw;
1284 +
1285 + DPRINTF("vma_queue_write start %zd\n", bytes);
1286 + cb->co = qemu_coroutine_create(vma_co_writer_task);
1287 + qemu_coroutine_enter(cb->co, cb);
1288 +
1289 + DPRINTF("vma_queue_write leave\n");
1290 +
1291 + return bytes;
1292 +}
1293 +
1294 +VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp)
1295 +{
1296 + const char *p;
1297 +
1298 + assert(sizeof(VmaHeader) == (4096 + 8192));
1299 + assert(sizeof(VmaExtentHeader) == 512);
1300 +
1301 + VmaWriter *vmaw = g_new0(VmaWriter, 1);
1302 + vmaw->fd = -1;
1303 +
1304 + vmaw->md5csum = g_checksum_new(G_CHECKSUM_MD5);
1305 + if (!vmaw->md5csum) {
1306 + error_setg(errp, "can't allocate cmsum\n");
1307 + goto err;
1308 + }
1309 +
1310 + if (strstart(filename, "exec:", &p)) {
1311 + vmaw->cmd = popen(p, "w");
1312 + if (vmaw->cmd == NULL) {
1313 + error_setg(errp, "can't popen command '%s' - %s\n", p,
1314 + g_strerror(errno));
1315 + goto err;
1316 + }
1317 + vmaw->fd = fileno(vmaw->cmd);
1318 +
1319 + /* try to use O_NONBLOCK and O_DIRECT */
1320 + fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
1321 + fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_DIRECT);
1322 +
1323 + } else {
1324 + struct stat st;
1325 + int oflags;
1326 + const char *tmp_id_str;
1327 +
1328 + if ((stat(filename, &st) == 0) && S_ISFIFO(st.st_mode)) {
1329 + oflags = O_NONBLOCK|O_DIRECT|O_WRONLY;
1330 + vmaw->fd = qemu_open(filename, oflags, 0644);
1331 + } else if (strstart(filename, "/dev/fdset/", &tmp_id_str)) {
1332 + oflags = O_NONBLOCK|O_DIRECT|O_WRONLY;
1333 + vmaw->fd = qemu_open(filename, oflags, 0644);
1334 + } else if (strstart(filename, "/dev/fdname/", &tmp_id_str)) {
1335 + vmaw->fd = monitor_get_fd(cur_mon, tmp_id_str, errp);
1336 + if (vmaw->fd < 0) {
1337 + goto err;
1338 + }
1339 + /* try to use O_NONBLOCK and O_DIRECT */
1340 + fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_NONBLOCK);
1341 + fcntl(vmaw->fd, F_SETFL, fcntl(vmaw->fd, F_GETFL)|O_DIRECT);
1342 + } else {
1343 + oflags = O_NONBLOCK|O_DIRECT|O_WRONLY|O_CREAT|O_EXCL;
1344 + vmaw->fd = qemu_open(filename, oflags, 0644);
1345 + }
1346 +
1347 + if (vmaw->fd < 0) {
1348 + error_setg(errp, "can't open file %s - %s\n", filename,
1349 + g_strerror(errno));
1350 + goto err;
1351 + }
1352 + }
1353 +
1354 + /* we use O_DIRECT, so we need to align IO buffers */
1355 + int i;
1356 + for (i = 0; i < WRITE_BUFFERS; i++) {
1357 + vmaw->aiocbs[i] = qemu_memalign(512, sizeof(VmaAIOCB));
1358 + memset(vmaw->aiocbs[i], 0, sizeof(VmaAIOCB));
1359 + }
1360 +
1361 + vmaw->outbuf_count = 0;
1362 + vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1363 +
1364 + vmaw->header_blob_table_pos = 1; /* start at pos 1 */
1365 +
1366 + qemu_co_mutex_init(&vmaw->writer_lock);
1367 + qemu_co_mutex_init(&vmaw->flush_lock);
1368 + qemu_co_queue_init(&vmaw->wqueue);
1369 +
1370 + uuid_copy(vmaw->uuid, uuid);
1371 +
1372 + return vmaw;
1373 +
1374 +err:
1375 + if (vmaw) {
1376 + if (vmaw->cmd) {
1377 + pclose(vmaw->cmd);
1378 + } else if (vmaw->fd >= 0) {
1379 + close(vmaw->fd);
1380 + }
1381 +
1382 + if (vmaw->md5csum) {
1383 + g_checksum_free(vmaw->md5csum);
1384 + }
1385 +
1386 + g_free(vmaw);
1387 + }
1388 +
1389 + return NULL;
1390 +}
1391 +
1392 +static int coroutine_fn vma_write_header(VmaWriter *vmaw)
1393 +{
1394 + assert(vmaw);
1395 + int header_clusters = 8;
1396 + char buf[65536*header_clusters];
1397 + VmaHeader *head = (VmaHeader *)buf;
1398 +
1399 + int i;
1400 +
1401 + DPRINTF("VMA WRITE HEADER\n");
1402 +
1403 + if (vmaw->status < 0) {
1404 + return vmaw->status;
1405 + }
1406 +
1407 + memset(buf, 0, sizeof(buf));
1408 +
1409 + head->magic = VMA_MAGIC;
1410 + head->version = GUINT32_TO_BE(1); /* v1 */
1411 + memcpy(head->uuid, vmaw->uuid, 16);
1412 +
1413 + time_t ctime = time(NULL);
1414 + head->ctime = GUINT64_TO_BE(ctime);
1415 +
1416 + if (!vmaw->stream_count) {
1417 + return -1;
1418 + }
1419 +
1420 + for (i = 0; i < VMA_MAX_CONFIGS; i++) {
1421 + head->config_names[i] = GUINT32_TO_BE(vmaw->config_names[i]);
1422 + head->config_data[i] = GUINT32_TO_BE(vmaw->config_data[i]);
1423 + }
1424 +
1425 + /* 32 bytes per device (12 used currently) = 8192 bytes max */
1426 + for (i = 1; i <= 254; i++) {
1427 + VmaStreamInfo *si = &vmaw->stream_info[i];
1428 + if (si->size) {
1429 + assert(si->devname);
1430 + uint32_t devname_ptr = allocate_header_string(vmaw, si->devname);
1431 + if (!devname_ptr) {
1432 + return -1;
1433 + }
1434 + head->dev_info[i].devname_ptr = GUINT32_TO_BE(devname_ptr);
1435 + head->dev_info[i].size = GUINT64_TO_BE(si->size);
1436 + }
1437 + }
1438 +
1439 + uint32_t header_size = sizeof(VmaHeader) + vmaw->header_blob_table_size;
1440 + head->header_size = GUINT32_TO_BE(header_size);
1441 +
1442 + if (header_size > sizeof(buf)) {
1443 + return -1; /* just to be sure */
1444 + }
1445 +
1446 + uint32_t blob_buffer_offset = sizeof(VmaHeader);
1447 + memcpy(buf + blob_buffer_offset, vmaw->header_blob_table,
1448 + vmaw->header_blob_table_size);
1449 + head->blob_buffer_offset = GUINT32_TO_BE(blob_buffer_offset);
1450 + head->blob_buffer_size = GUINT32_TO_BE(vmaw->header_blob_table_pos);
1451 +
1452 + g_checksum_reset(vmaw->md5csum);
1453 + g_checksum_update(vmaw->md5csum, (const guchar *)buf, header_size);
1454 + gsize csize = 16;
1455 + g_checksum_get_digest(vmaw->md5csum, (guint8 *)(head->md5sum), &csize);
1456 +
1457 + return vma_queue_write(vmaw, buf, header_size);
1458 +}
1459 +
1460 +static int coroutine_fn vma_writer_flush(VmaWriter *vmaw)
1461 +{
1462 + assert(vmaw);
1463 +
1464 + int ret;
1465 + int i;
1466 +
1467 + if (vmaw->status < 0) {
1468 + return vmaw->status;
1469 + }
1470 +
1471 + if (!vmaw->header_written) {
1472 + vmaw->header_written = true;
1473 + ret = vma_write_header(vmaw);
1474 + if (ret < 0) {
1475 + vma_writer_set_error(vmaw, "vma_writer_flush: write header failed");
1476 + return ret;
1477 + }
1478 + }
1479 +
1480 + DPRINTF("VMA WRITE FLUSH %d %d\n", vmaw->outbuf_count, vmaw->outbuf_pos);
1481 +
1482 +
1483 + VmaExtentHeader *ehead = (VmaExtentHeader *)vmaw->outbuf;
1484 +
1485 + ehead->magic = VMA_EXTENT_MAGIC;
1486 + ehead->reserved1 = 0;
1487 +
1488 + for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1489 + ehead->blockinfo[i] = GUINT64_TO_BE(vmaw->outbuf_block_info[i]);
1490 + }
1491 +
1492 + guint16 block_count = (vmaw->outbuf_pos - VMA_EXTENT_HEADER_SIZE) /
1493 + VMA_BLOCK_SIZE;
1494 +
1495 + ehead->block_count = GUINT16_TO_BE(block_count);
1496 +
1497 + memcpy(ehead->uuid, vmaw->uuid, sizeof(ehead->uuid));
1498 + memset(ehead->md5sum, 0, sizeof(ehead->md5sum));
1499 +
1500 + g_checksum_reset(vmaw->md5csum);
1501 + g_checksum_update(vmaw->md5csum, vmaw->outbuf, VMA_EXTENT_HEADER_SIZE);
1502 + gsize csize = 16;
1503 + g_checksum_get_digest(vmaw->md5csum, ehead->md5sum, &csize);
1504 +
1505 + int bytes = vmaw->outbuf_pos;
1506 + ret = vma_queue_write(vmaw, vmaw->outbuf, bytes);
1507 + if (ret != bytes) {
1508 + vma_writer_set_error(vmaw, "vma_writer_flush: failed write");
1509 + }
1510 +
1511 + vmaw->outbuf_count = 0;
1512 + vmaw->outbuf_pos = VMA_EXTENT_HEADER_SIZE;
1513 +
1514 + for (i = 0; i < VMA_BLOCKS_PER_EXTENT; i++) {
1515 + vmaw->outbuf_block_info[i] = 0;
1516 + }
1517 +
1518 + return vmaw->status;
1519 +}
1520 +
1521 +static int vma_count_open_streams(VmaWriter *vmaw)
1522 +{
1523 + g_assert(vmaw != NULL);
1524 +
1525 + int i;
1526 + int open_drives = 0;
1527 + for (i = 0; i <= 255; i++) {
1528 + if (vmaw->stream_info[i].size && !vmaw->stream_info[i].finished) {
1529 + open_drives++;
1530 + }
1531 + }
1532 +
1533 + return open_drives;
1534 +}
1535 +
1536 +/**
1537 + * all jobs should call this when there is no more data
1538 + * Returns: number of remaining stream (0 ==> finished)
1539 + */
1540 +int coroutine_fn
1541 +vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id)
1542 +{
1543 + g_assert(vmaw != NULL);
1544 +
1545 + DPRINTF("vma_writer_set_status %d\n", dev_id);
1546 + if (!vmaw->stream_info[dev_id].size) {
1547 + vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1548 + "no such stream %d", dev_id);
1549 + return -1;
1550 + }
1551 + if (vmaw->stream_info[dev_id].finished) {
1552 + vma_writer_set_error(vmaw, "vma_writer_close_stream: "
1553 + "stream already closed %d", dev_id);
1554 + return -1;
1555 + }
1556 +
1557 + vmaw->stream_info[dev_id].finished = true;
1558 +
1559 + int open_drives = vma_count_open_streams(vmaw);
1560 +
1561 + if (open_drives <= 0) {
1562 + DPRINTF("vma_writer_set_status all drives completed\n");
1563 + qemu_co_mutex_lock(&vmaw->flush_lock);
1564 + int ret = vma_writer_flush(vmaw);
1565 + qemu_co_mutex_unlock(&vmaw->flush_lock);
1566 + if (ret < 0) {
1567 + vma_writer_set_error(vmaw, "vma_writer_close_stream: flush failed");
1568 + }
1569 + }
1570 +
1571 + return open_drives;
1572 +}
1573 +
1574 +int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status)
1575 +{
1576 + int i;
1577 +
1578 + g_assert(vmaw != NULL);
1579 +
1580 + if (status) {
1581 + status->status = vmaw->status;
1582 + g_strlcpy(status->errmsg, vmaw->errmsg, sizeof(status->errmsg));
1583 + for (i = 0; i <= 255; i++) {
1584 + status->stream_info[i] = vmaw->stream_info[i];
1585 + }
1586 +
1587 + uuid_unparse_lower(vmaw->uuid, status->uuid_str);
1588 + }
1589 +
1590 + status->closed = vmaw->closed;
1591 +
1592 + return vmaw->status;
1593 +}
1594 +
1595 +static int vma_writer_get_buffer(VmaWriter *vmaw)
1596 +{
1597 + int ret = 0;
1598 +
1599 + qemu_co_mutex_lock(&vmaw->flush_lock);
1600 +
1601 + /* wait until buffer is available */
1602 + while (vmaw->outbuf_count >= (VMA_BLOCKS_PER_EXTENT - 1)) {
1603 + ret = vma_writer_flush(vmaw);
1604 + if (ret < 0) {
1605 + vma_writer_set_error(vmaw, "vma_writer_get_buffer: flush failed");
1606 + break;
1607 + }
1608 + }
1609 +
1610 + qemu_co_mutex_unlock(&vmaw->flush_lock);
1611 +
1612 + return ret;
1613 +}
1614 +
1615 +
1616 +int64_t coroutine_fn
1617 +vma_writer_write(VmaWriter *vmaw, uint8_t dev_id, int64_t cluster_num,
1618 + unsigned char *buf, size_t *zero_bytes)
1619 +{
1620 + g_assert(vmaw != NULL);
1621 + g_assert(zero_bytes != NULL);
1622 +
1623 + *zero_bytes = 0;
1624 +
1625 + if (vmaw->status < 0) {
1626 + return vmaw->status;
1627 + }
1628 +
1629 + if (!dev_id || !vmaw->stream_info[dev_id].size) {
1630 + vma_writer_set_error(vmaw, "vma_writer_write: "
1631 + "no such stream %d", dev_id);
1632 + return -1;
1633 + }
1634 +
1635 + if (vmaw->stream_info[dev_id].finished) {
1636 + vma_writer_set_error(vmaw, "vma_writer_write: "
1637 + "stream already closed %d", dev_id);
1638 + return -1;
1639 + }
1640 +
1641 +
1642 + if (cluster_num >= (((uint64_t)1)<<32)) {
1643 + vma_writer_set_error(vmaw, "vma_writer_write: "
1644 + "cluster number out of range");
1645 + return -1;
1646 + }
1647 +
1648 + if (dev_id == vmaw->vmstate_stream) {
1649 + if (cluster_num != vmaw->vmstate_clusters) {
1650 + vma_writer_set_error(vmaw, "vma_writer_write: "
1651 + "non sequential vmstate write");
1652 + }
1653 + vmaw->vmstate_clusters++;
1654 + } else if (cluster_num >= vmaw->stream_info[dev_id].cluster_count) {
1655 + vma_writer_set_error(vmaw, "vma_writer_write: cluster number too big");
1656 + return -1;
1657 + }
1658 +
1659 + /* wait until buffer is available */
1660 + if (vma_writer_get_buffer(vmaw) < 0) {
1661 + vma_writer_set_error(vmaw, "vma_writer_write: "
1662 + "vma_writer_get_buffer failed");
1663 + return -1;
1664 + }
1665 +
1666 + DPRINTF("VMA WRITE %d %zd\n", dev_id, cluster_num);
1667 +
1668 + uint16_t mask = 0;
1669 +
1670 + if (buf) {
1671 + int i;
1672 + int bit = 1;
1673 + for (i = 0; i < 16; i++) {
1674 + unsigned char *vmablock = buf + (i*VMA_BLOCK_SIZE);
1675 + if (!buffer_is_zero(vmablock, VMA_BLOCK_SIZE)) {
1676 + mask |= bit;
1677 + memcpy(vmaw->outbuf + vmaw->outbuf_pos, vmablock,
1678 + VMA_BLOCK_SIZE);
1679 + vmaw->outbuf_pos += VMA_BLOCK_SIZE;
1680 + } else {
1681 + DPRINTF("VMA WRITE %zd ZERO BLOCK %d\n", cluster_num, i);
1682 + vmaw->stream_info[dev_id].zero_bytes += VMA_BLOCK_SIZE;
1683 + *zero_bytes += VMA_BLOCK_SIZE;
1684 + }
1685 +
1686 + bit = bit << 1;
1687 + }
1688 + } else {
1689 + DPRINTF("VMA WRITE %zd ZERO CLUSTER\n", cluster_num);
1690 + vmaw->stream_info[dev_id].zero_bytes += VMA_CLUSTER_SIZE;
1691 + *zero_bytes += VMA_CLUSTER_SIZE;
1692 + }
1693 +
1694 + uint64_t block_info = ((uint64_t)mask) << (32+16);
1695 + block_info |= ((uint64_t)dev_id) << 32;
1696 + block_info |= (cluster_num & 0xffffffff);
1697 + vmaw->outbuf_block_info[vmaw->outbuf_count] = block_info;
1698 +
1699 + DPRINTF("VMA WRITE MASK %zd %zx\n", cluster_num, block_info);
1700 +
1701 + vmaw->outbuf_count++;
1702 +
1703 + /** NOTE: We allways write whole clusters, but we correctly set
1704 + * transferred bytes. So transferred == size when when everything
1705 + * went OK.
1706 + */
1707 + size_t transferred = VMA_CLUSTER_SIZE;
1708 +
1709 + if (dev_id != vmaw->vmstate_stream) {
1710 + uint64_t last = (cluster_num + 1) * VMA_CLUSTER_SIZE;
1711 + if (last > vmaw->stream_info[dev_id].size) {
1712 + uint64_t diff = last - vmaw->stream_info[dev_id].size;
1713 + if (diff >= VMA_CLUSTER_SIZE) {
1714 + vma_writer_set_error(vmaw, "vma_writer_write: "
1715 + "read after last cluster");
1716 + return -1;
1717 + }
1718 + transferred -= diff;
1719 + }
1720 + }
1721 +
1722 + vmaw->stream_info[dev_id].transferred += transferred;
1723 +
1724 + return transferred;
1725 +}
1726 +
1727 +int vma_writer_close(VmaWriter *vmaw, Error **errp)
1728 +{
1729 + g_assert(vmaw != NULL);
1730 +
1731 + int i;
1732 +
1733 + vma_queue_flush(vmaw);
1734 +
1735 + /* this should not happen - just to be sure */
1736 + while (!qemu_co_queue_empty(&vmaw->wqueue)) {
1737 + DPRINTF("vma_writer_close wait\n");
1738 + co_sleep_ns(rt_clock, 1000000);
1739 + }
1740 +
1741 + if (vmaw->cmd) {
1742 + if (pclose(vmaw->cmd) < 0) {
1743 + vma_writer_set_error(vmaw, "vma_writer_close: "
1744 + "pclose failed - %s", g_strerror(errno));
1745 + }
1746 + } else {
1747 + if (close(vmaw->fd) < 0) {
1748 + vma_writer_set_error(vmaw, "vma_writer_close: "
1749 + "close failed - %s", g_strerror(errno));
1750 + }
1751 + }
1752 +
1753 + for (i = 0; i <= 255; i++) {
1754 + VmaStreamInfo *si = &vmaw->stream_info[i];
1755 + if (si->size) {
1756 + if (!si->finished) {
1757 + vma_writer_set_error(vmaw, "vma_writer_close: "
1758 + "detected open stream '%s'", si->devname);
1759 + } else if ((si->transferred != si->size) &&
1760 + (i != vmaw->vmstate_stream)) {
1761 + vma_writer_set_error(vmaw, "vma_writer_close: "
1762 + "incomplete stream '%s' (%zd != %zd)",
1763 + si->devname, si->transferred, si->size);
1764 + }
1765 + }
1766 + }
1767 +
1768 + for (i = 0; i <= 255; i++) {
1769 + vmaw->stream_info[i].finished = 1; /* mark as closed */
1770 + }
1771 +
1772 + vmaw->closed = 1;
1773 +
1774 + if (vmaw->status < 0 && *errp == NULL) {
1775 + error_setg(errp, "%s", vmaw->errmsg);
1776 + }
1777 +
1778 + return vmaw->status;
1779 +}
1780 +
1781 +void vma_writer_destroy(VmaWriter *vmaw)
1782 +{
1783 + assert(vmaw);
1784 +
1785 + int i;
1786 +
1787 + for (i = 0; i <= 255; i++) {
1788 + if (vmaw->stream_info[i].devname) {
1789 + g_free(vmaw->stream_info[i].devname);
1790 + }
1791 + }
1792 +
1793 + if (vmaw->md5csum) {
1794 + g_checksum_free(vmaw->md5csum);
1795 + }
1796 +
1797 + for (i = 0; i < WRITE_BUFFERS; i++) {
1798 + free(vmaw->aiocbs[i]);
1799 + }
1800 +
1801 + g_free(vmaw);
1802 +}
1803 +
1804 +/* backup driver plugin */
1805 +
1806 +static int vma_dump_cb(void *opaque, uint8_t dev_id, int64_t cluster_num,
1807 + unsigned char *buf, size_t *zero_bytes)
1808 +{
1809 + VmaWriter *vmaw = opaque;
1810 +
1811 + return vma_writer_write(vmaw, dev_id, cluster_num, buf, zero_bytes);
1812 +}
1813 +
1814 +static int vma_close_cb(void *opaque, Error **errp)
1815 +{
1816 + VmaWriter *vmaw = opaque;
1817 +
1818 + int res = vma_writer_close(vmaw, errp);
1819 + vma_writer_destroy(vmaw);
1820 +
1821 + return res;
1822 +}
1823 +
1824 +static int vma_complete_cb(void *opaque, uint8_t dev_id, int ret)
1825 +{
1826 + VmaWriter *vmaw = opaque;
1827 +
1828 + if (ret < 0) {
1829 + vma_writer_set_error(vmaw, "backup_complete_cb %d", ret);
1830 + }
1831 +
1832 + return vma_writer_close_stream(vmaw, dev_id);
1833 +}
1834 +
1835 +static int vma_register_stream_cb(void *opaque, const char *devname,
1836 + size_t size)
1837 +{
1838 + VmaWriter *vmaw = opaque;
1839 +
1840 + return vma_writer_register_stream(vmaw, devname, size);
1841 +}
1842 +
1843 +static int vma_register_config_cb(void *opaque, const char *name,
1844 + gpointer data, size_t data_len)
1845 +{
1846 + VmaWriter *vmaw = opaque;
1847 +
1848 + return vma_writer_add_config(vmaw, name, data, data_len);
1849 +}
1850 +
1851 +static void *vma_open_cb(const char *filename, uuid_t uuid, Error **errp)
1852 +{
1853 + return vma_writer_create(filename, uuid, errp);
1854 +}
1855 +
1856 +const BackupDriver backup_vma_driver = {
1857 + .format = "vma",
1858 + .open = vma_open_cb,
1859 + .close = vma_close_cb,
1860 + .register_config = vma_register_config_cb,
1861 + .register_stream = vma_register_stream_cb,
1862 + .dump = vma_dump_cb,
1863 + .complete = vma_complete_cb,
1864 +};
1865 +
1866 diff --git a/vma.c b/vma.c
1867 new file mode 100644
1868 index 0000000..6633aa5
1869 --- /dev/null
1870 +++ b/vma.c
1871 @@ -0,0 +1,561 @@
1872 +/*
1873 + * VMA: Virtual Machine Archive
1874 + *
1875 + * Copyright (C) 2012 Proxmox Server Solutions
1876 + *
1877 + * Authors:
1878 + * Dietmar Maurer (dietmar@proxmox.com)
1879 + *
1880 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
1881 + * See the COPYING file in the top-level directory.
1882 + *
1883 + */
1884 +
1885 +#include <stdio.h>
1886 +#include <errno.h>
1887 +#include <unistd.h>
1888 +#include <stdio.h>
1889 +#include <string.h>
1890 +#include <sys/types.h>
1891 +#include <sys/stat.h>
1892 +#include <fcntl.h>
1893 +#include <glib.h>
1894 +
1895 +#include "qemu-common.h"
1896 +#include "qemu/error-report.h"
1897 +#include "vma.h"
1898 +#include "block/block.h"
1899 +
1900 +static void help(void)
1901 +{
1902 + const char *help_msg =
1903 + "usage: vma command [command options]\n"
1904 + "\n"
1905 + "vma list <filename>\n"
1906 + "vma create <filename> [-c config] <archive> pathname ...\n"
1907 + "vma extract <filename> [-r] <targetdir>\n"
1908 + ;
1909 +
1910 + printf("%s", help_msg);
1911 + exit(1);
1912 +}
1913 +
1914 +static const char *extract_devname(const char *path, char **devname, int index)
1915 +{
1916 + assert(path);
1917 +
1918 + const char *sep = strchr(path, '=');
1919 +
1920 + if (sep) {
1921 + *devname = g_strndup(path, sep - path);
1922 + path = sep + 1;
1923 + } else {
1924 + if (index >= 0) {
1925 + *devname = g_strdup_printf("disk%d", index);
1926 + } else {
1927 + *devname = NULL;
1928 + }
1929 + }
1930 +
1931 + return path;
1932 +}
1933 +
1934 +static void print_content(VmaReader *vmar)
1935 +{
1936 + assert(vmar);
1937 +
1938 + VmaHeader *head = vma_reader_get_header(vmar);
1939 +
1940 + GList *l = vma_reader_get_config_data(vmar);
1941 + while (l && l->data) {
1942 + VmaConfigData *cdata = (VmaConfigData *)l->data;
1943 + l = g_list_next(l);
1944 + printf("CFG: size: %d name: %s\n", cdata->len, cdata->name);
1945 + }
1946 +
1947 + int i;
1948 + VmaDeviceInfo *di;
1949 + for (i = 1; i < 255; i++) {
1950 + di = vma_reader_get_device_info(vmar, i);
1951 + if (di) {
1952 + if (strcmp(di->devname, "vmstate") == 0) {
1953 + printf("VMSTATE: dev_id=%d memory: %zd\n", i, di->size);
1954 + } else {
1955 + printf("DEV: dev_id=%d size: %zd devname: %s\n",
1956 + i, di->size, di->devname);
1957 + }
1958 + }
1959 + }
1960 + /* ctime is the last entry we print */
1961 + printf("CTIME: %s", ctime(&head->ctime));
1962 + fflush(stdout);
1963 +}
1964 +
1965 +static int list_content(int argc, char **argv)
1966 +{
1967 + int c, ret = 0;
1968 + const char *filename;
1969 +
1970 + for (;;) {
1971 + c = getopt(argc, argv, "h");
1972 + if (c == -1) {
1973 + break;
1974 + }
1975 + switch (c) {
1976 + case '?':
1977 + case 'h':
1978 + help();
1979 + break;
1980 + default:
1981 + g_assert_not_reached();
1982 + }
1983 + }
1984 +
1985 + /* Get the filename */
1986 + if ((optind + 1) != argc) {
1987 + help();
1988 + }
1989 + filename = argv[optind++];
1990 +
1991 + Error *errp = NULL;
1992 + VmaReader *vmar = vma_reader_create(filename, &errp);
1993 +
1994 + if (!vmar) {
1995 + g_error("%s", error_get_pretty(errp));
1996 + }
1997 +
1998 + print_content(vmar);
1999 +
2000 + vma_reader_destroy(vmar);
2001 +
2002 + return ret;
2003 +}
2004 +
2005 +typedef struct RestoreMap {
2006 + char *devname;
2007 + char *path;
2008 + bool write_zero;
2009 +} RestoreMap;
2010 +
2011 +static int extract_content(int argc, char **argv)
2012 +{
2013 + int c, ret = 0;
2014 + int verbose = 0;
2015 + const char *filename;
2016 + const char *dirname;
2017 + const char *readmap = NULL;
2018 +
2019 + for (;;) {
2020 + c = getopt(argc, argv, "hvr:");
2021 + if (c == -1) {
2022 + break;
2023 + }
2024 + switch (c) {
2025 + case '?':
2026 + case 'h':
2027 + help();
2028 + break;
2029 + case 'r':
2030 + readmap = optarg;
2031 + break;
2032 + case 'v':
2033 + verbose = 1;
2034 + break;
2035 + default:
2036 + help();
2037 + }
2038 + }
2039 +
2040 + /* Get the filename */
2041 + if ((optind + 2) != argc) {
2042 + help();
2043 + }
2044 + filename = argv[optind++];
2045 + dirname = argv[optind++];
2046 +
2047 + Error *errp = NULL;
2048 + VmaReader *vmar = vma_reader_create(filename, &errp);
2049 +
2050 + if (!vmar) {
2051 + g_error("%s", error_get_pretty(errp));
2052 + }
2053 +
2054 + if (mkdir(dirname, 0777) < 0) {
2055 + g_error("unable to create target directory %s - %s",
2056 + dirname, g_strerror(errno));
2057 + }
2058 +
2059 + GList *l = vma_reader_get_config_data(vmar);
2060 + while (l && l->data) {
2061 + VmaConfigData *cdata = (VmaConfigData *)l->data;
2062 + l = g_list_next(l);
2063 + char *cfgfn = g_strdup_printf("%s/%s", dirname, cdata->name);
2064 + GError *err = NULL;
2065 + if (!g_file_set_contents(cfgfn, (gchar *)cdata->data, cdata->len,
2066 + &err)) {
2067 + g_error("unable to write file: %s", err->message);
2068 + }
2069 + }
2070 +
2071 + GHashTable *devmap = g_hash_table_new(g_str_hash, g_str_equal);
2072 +
2073 + if (readmap) {
2074 + print_content(vmar);
2075 +
2076 + FILE *map = fopen(readmap, "r");
2077 + if (!map) {
2078 + g_error("unable to open fifo %s - %s", readmap, g_strerror(errno));
2079 + }
2080 +
2081 + while (1) {
2082 + char inbuf[8192];
2083 + char *line = fgets(inbuf, sizeof(inbuf), map);
2084 + if (!line || line[0] == '\0' || !strcmp(line, "done\n")) {
2085 + break;
2086 + }
2087 + int len = strlen(line);
2088 + if (line[len - 1] == '\n') {
2089 + line[len - 1] = '\0';
2090 + if (len == 1) {
2091 + break;
2092 + }
2093 + }
2094 +
2095 + const char *path;
2096 + bool write_zero;
2097 + if (line[0] == '0' && line[1] == ':') {
2098 + path = inbuf + 2;
2099 + write_zero = false;
2100 + } else if (line[0] == '1' && line[1] == ':') {
2101 + path = inbuf + 2;
2102 + write_zero = true;
2103 + } else {
2104 + g_error("read map failed - parse error ('%s')", inbuf);
2105 + }
2106 +
2107 + char *devname = NULL;
2108 + path = extract_devname(path, &devname, -1);
2109 + if (!devname) {
2110 + g_error("read map failed - no dev name specified ('%s')",
2111 + inbuf);
2112 + }
2113 +
2114 + RestoreMap *map = g_new0(RestoreMap, 1);
2115 + map->devname = g_strdup(devname);
2116 + map->path = g_strdup(path);
2117 + map->write_zero = write_zero;
2118 +
2119 + g_hash_table_insert(devmap, map->devname, map);
2120 +
2121 + };
2122 + }
2123 +
2124 + int i;
2125 + int vmstate_fd = -1;
2126 + guint8 vmstate_stream = 0;
2127 +
2128 + for (i = 1; i < 255; i++) {
2129 + VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2130 + if (di && (strcmp(di->devname, "vmstate") == 0)) {
2131 + vmstate_stream = i;
2132 + char *statefn = g_strdup_printf("%s/vmstate.bin", dirname);
2133 + vmstate_fd = open(statefn, O_WRONLY|O_CREAT|O_EXCL, 0644);
2134 + if (vmstate_fd < 0) {
2135 + g_error("create vmstate file '%s' failed - %s", statefn,
2136 + g_strerror(errno));
2137 + }
2138 + g_free(statefn);
2139 + } else if (di) {
2140 + char *devfn = NULL;
2141 + int flags = BDRV_O_RDWR|BDRV_O_CACHE_WB;
2142 + bool write_zero = true;
2143 +
2144 + if (readmap) {
2145 + RestoreMap *map;
2146 + map = (RestoreMap *)g_hash_table_lookup(devmap, di->devname);
2147 + if (map == NULL) {
2148 + g_error("no device name mapping for %s", di->devname);
2149 + }
2150 + devfn = map->path;
2151 + write_zero = map->write_zero;
2152 + } else {
2153 + devfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2154 + dirname, di->devname);
2155 + printf("DEVINFO %s %zd\n", devfn, di->size);
2156 +
2157 + bdrv_img_create(devfn, "raw", NULL, NULL, NULL, di->size,
2158 + flags, &errp);
2159 + if (error_is_set(&errp)) {
2160 + g_error("can't create file %s: %s", devfn,
2161 + error_get_pretty(errp));
2162 + }
2163 +
2164 + /* Note: we created an empty file above, so there is no
2165 + * need to write zeroes (so we generate a sparse file)
2166 + */
2167 + write_zero = false;
2168 + }
2169 +
2170 + BlockDriverState *bs = bdrv_new(di->devname);
2171 + if (bdrv_open(bs, devfn, flags, NULL)) {
2172 + g_error("can't open file %s", devfn);
2173 + }
2174 + if (vma_reader_register_bs(vmar, i, bs, write_zero, &errp) < 0) {
2175 + g_error("%s", error_get_pretty(errp));
2176 + }
2177 +
2178 + if (!readmap) {
2179 + g_free(devfn);
2180 + }
2181 + }
2182 + }
2183 +
2184 + if (vma_reader_restore(vmar, vmstate_fd, verbose, &errp) < 0) {
2185 + g_error("restore failed - %s", error_get_pretty(errp));
2186 + }
2187 +
2188 + if (!readmap) {
2189 + for (i = 1; i < 255; i++) {
2190 + VmaDeviceInfo *di = vma_reader_get_device_info(vmar, i);
2191 + if (di && (i != vmstate_stream)) {
2192 + char *tmpfn = g_strdup_printf("%s/tmp-disk-%s.raw",
2193 + dirname, di->devname);
2194 + char *fn = g_strdup_printf("%s/disk-%s.raw",
2195 + dirname, di->devname);
2196 + if (rename(tmpfn, fn) != 0) {
2197 + g_error("rename %s to %s failed - %s",
2198 + tmpfn, fn, g_strerror(errno));
2199 + }
2200 + }
2201 + }
2202 + }
2203 +
2204 + vma_reader_destroy(vmar);
2205 +
2206 + bdrv_close_all();
2207 +
2208 + return ret;
2209 +}
2210 +
2211 +typedef struct BackupCB {
2212 + VmaWriter *vmaw;
2213 + uint8_t dev_id;
2214 +} BackupCB;
2215 +
2216 +static int backup_dump_cb(void *opaque, BlockDriverState *bs,
2217 + int64_t cluster_num, unsigned char *buf)
2218 +{
2219 + BackupCB *bcb = opaque;
2220 + size_t zb = 0;
2221 + if (vma_writer_write(bcb->vmaw, bcb->dev_id, cluster_num, buf, &zb) < 0) {
2222 + g_warning("backup_dump_cb vma_writer_write failed");
2223 + return -1;
2224 + }
2225 +
2226 + return 0;
2227 +}
2228 +
2229 +static void backup_complete_cb(void *opaque, int ret)
2230 +{
2231 + BackupCB *bcb = opaque;
2232 +
2233 + if (ret < 0) {
2234 + vma_writer_set_error(bcb->vmaw, "backup_complete_cb %d", ret);
2235 + }
2236 +
2237 + if (vma_writer_close_stream(bcb->vmaw, bcb->dev_id) <= 0) {
2238 + Error *err = NULL;
2239 + if (vma_writer_close(bcb->vmaw, &err) != 0) {
2240 + g_warning("vma_writer_close failed %s", error_get_pretty(err));
2241 + }
2242 + }
2243 +}
2244 +
2245 +static int create_archive(int argc, char **argv)
2246 +{
2247 + int i, c, res;
2248 + int verbose = 0;
2249 + const char *archivename;
2250 + GList *config_files = NULL;
2251 +
2252 + for (;;) {
2253 + c = getopt(argc, argv, "hvc:");
2254 + if (c == -1) {
2255 + break;
2256 + }
2257 + switch (c) {
2258 + case '?':
2259 + case 'h':
2260 + help();
2261 + break;
2262 + case 'c':
2263 + config_files = g_list_append(config_files, optarg);
2264 + break;
2265 + case 'v':
2266 + verbose = 1;
2267 + break;
2268 + default:
2269 + g_assert_not_reached();
2270 + }
2271 + }
2272 +
2273 +
2274 + /* make sure we have archive name and at least one path */
2275 + if ((optind + 2) > argc) {
2276 + help();
2277 + }
2278 +
2279 + archivename = argv[optind++];
2280 +
2281 + uuid_t uuid;
2282 + uuid_generate(uuid);
2283 +
2284 + Error *local_err = NULL;
2285 + VmaWriter *vmaw = vma_writer_create(archivename, uuid, &local_err);
2286 +
2287 + if (vmaw == NULL) {
2288 + g_error("%s", error_get_pretty(local_err));
2289 + }
2290 +
2291 + GList *l = config_files;
2292 + while (l && l->data) {
2293 + char *name = l->data;
2294 + char *cdata = NULL;
2295 + gsize clen = 0;
2296 + GError *err = NULL;
2297 + if (!g_file_get_contents(name, &cdata, &clen, &err)) {
2298 + unlink(archivename);
2299 + g_error("Unable to read file: %s", err->message);
2300 + }
2301 +
2302 + if (vma_writer_add_config(vmaw, name, cdata, clen) != 0) {
2303 + unlink(archivename);
2304 + g_error("Unable to append config data %s (len = %zd)",
2305 + name, clen);
2306 + }
2307 + l = g_list_next(l);
2308 + }
2309 +
2310 + int ind = 0;
2311 + while (optind < argc) {
2312 + const char *path = argv[optind++];
2313 + char *devname = NULL;
2314 + path = extract_devname(path, &devname, ind++);
2315 +
2316 + BlockDriver *drv = NULL;
2317 + BlockDriverState *bs = bdrv_new(devname);
2318 +
2319 + res = bdrv_open(bs, path, BDRV_O_CACHE_WB , drv);
2320 + if (res < 0) {
2321 + unlink(archivename);
2322 + g_error("bdrv_open '%s' failed", path);
2323 + }
2324 + int64_t size = bdrv_getlength(bs);
2325 + int dev_id = vma_writer_register_stream(vmaw, devname, size);
2326 + if (dev_id <= 0) {
2327 + unlink(archivename);
2328 + g_error("vma_writer_register_stream '%s' failed", devname);
2329 + }
2330 +
2331 + BackupCB *bcb = g_new0(BackupCB, 1);
2332 + bcb->vmaw = vmaw;
2333 + bcb->dev_id = dev_id;
2334 +
2335 + if (backup_job_create(bs, backup_dump_cb, backup_complete_cb,
2336 + bcb, 0) < 0) {
2337 + unlink(archivename);
2338 + g_error("backup_job_start failed");
2339 + } else {
2340 + backup_job_start(bs, false);
2341 + }
2342 + }
2343 +
2344 + VmaStatus vmastat;
2345 + int percent = 0;
2346 + int last_percent = -1;
2347 +
2348 + while (1) {
2349 + main_loop_wait(false);
2350 + vma_writer_get_status(vmaw, &vmastat);
2351 +
2352 + if (verbose) {
2353 +
2354 + uint64_t total = 0;
2355 + uint64_t transferred = 0;
2356 + uint64_t zero_bytes = 0;
2357 +
2358 + int i;
2359 + for (i = 0; i < 256; i++) {
2360 + if (vmastat.stream_info[i].size) {
2361 + total += vmastat.stream_info[i].size;
2362 + transferred += vmastat.stream_info[i].transferred;
2363 + zero_bytes += vmastat.stream_info[i].zero_bytes;
2364 + }
2365 + }
2366 + percent = (transferred*100)/total;
2367 + if (percent != last_percent) {
2368 + fprintf(stderr, "progress %d%% %zd/%zd %zd\n", percent,
2369 + transferred, total, zero_bytes);
2370 + fflush(stderr);
2371 +
2372 + last_percent = percent;
2373 + }
2374 + }
2375 +
2376 + if (vmastat.closed) {
2377 + break;
2378 + }
2379 + }
2380 +
2381 + bdrv_drain_all();
2382 +
2383 + vma_writer_get_status(vmaw, &vmastat);
2384 +
2385 + if (verbose) {
2386 + for (i = 0; i < 256; i++) {
2387 + VmaStreamInfo *si = &vmastat.stream_info[i];
2388 + if (si->size) {
2389 + fprintf(stderr, "image %s: size=%zd zeros=%zd saved=%zd\n",
2390 + si->devname, si->size, si->zero_bytes,
2391 + si->size - si->zero_bytes);
2392 + }
2393 + }
2394 + }
2395 +
2396 + if (vmastat.status < 0) {
2397 + unlink(archivename);
2398 + g_error("creating vma archive failed");
2399 + }
2400 +
2401 + return 0;
2402 +}
2403 +
2404 +int main(int argc, char **argv)
2405 +{
2406 + const char *cmdname;
2407 +
2408 + error_set_progname(argv[0]);
2409 +
2410 + qemu_init_main_loop();
2411 +
2412 + bdrv_init();
2413 +
2414 + if (argc < 2) {
2415 + help();
2416 + }
2417 +
2418 + cmdname = argv[1];
2419 + argc--; argv++;
2420 +
2421 +
2422 + if (!strcmp(cmdname, "list")) {
2423 + return list_content(argc, argv);
2424 + } else if (!strcmp(cmdname, "create")) {
2425 + return create_archive(argc, argv);
2426 + } else if (!strcmp(cmdname, "extract")) {
2427 + return extract_content(argc, argv);
2428 + }
2429 +
2430 + help();
2431 + return 0;
2432 +}
2433 diff --git a/vma.h b/vma.h
2434 new file mode 100644
2435 index 0000000..76d0dc8
2436 --- /dev/null
2437 +++ b/vma.h
2438 @@ -0,0 +1,145 @@
2439 +/*
2440 + * VMA: Virtual Machine Archive
2441 + *
2442 + * Copyright (C) Proxmox Server Solutions
2443 + *
2444 + * Authors:
2445 + * Dietmar Maurer (dietmar@proxmox.com)
2446 + *
2447 + * This work is licensed under the terms of the GNU GPL, version 2 or later.
2448 + * See the COPYING file in the top-level directory.
2449 + *
2450 + */
2451 +
2452 +#ifndef BACKUP_VMA_H
2453 +#define BACKUP_VMA_H
2454 +
2455 +#include "backup.h"
2456 +#include "error.h"
2457 +
2458 +#define VMA_BLOCK_BITS 12
2459 +#define VMA_BLOCK_SIZE (1<<VMA_BLOCK_BITS)
2460 +#define VMA_CLUSTER_BITS (VMA_BLOCK_BITS+4)
2461 +#define VMA_CLUSTER_SIZE (1<<VMA_CLUSTER_BITS)
2462 +
2463 +#if VMA_CLUSTER_SIZE != 65536
2464 +#error unexpected cluster size
2465 +#endif
2466 +
2467 +#define VMA_EXTENT_HEADER_SIZE 512
2468 +#define VMA_BLOCKS_PER_EXTENT 59
2469 +#define VMA_MAX_CONFIGS 256
2470 +
2471 +#define VMA_MAX_EXTENT_SIZE \
2472 + (VMA_EXTENT_HEADER_SIZE+VMA_CLUSTER_SIZE*VMA_BLOCKS_PER_EXTENT)
2473 +#if VMA_MAX_EXTENT_SIZE != 3867136
2474 +#error unexpected VMA_EXTENT_SIZE
2475 +#endif
2476 +
2477 +/* File Format Definitions */
2478 +
2479 +#define VMA_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|0x00))
2480 +#define VMA_EXTENT_MAGIC (GUINT32_TO_BE(('V'<<24)|('M'<<16)|('A'<<8)|'E'))
2481 +
2482 +typedef struct VmaDeviceInfoHeader {
2483 + uint32_t devname_ptr; /* offset into blob_buffer table */
2484 + uint32_t reserved0;
2485 + uint64_t size; /* device size in bytes */
2486 + uint64_t reserved1;
2487 + uint64_t reserved2;
2488 +} VmaDeviceInfoHeader;
2489 +
2490 +typedef struct VmaHeader {
2491 + uint32_t magic;
2492 + uint32_t version;
2493 + unsigned char uuid[16];
2494 + int64_t ctime;
2495 + unsigned char md5sum[16];
2496 +
2497 + uint32_t blob_buffer_offset;
2498 + uint32_t blob_buffer_size;
2499 + uint32_t header_size;
2500 +
2501 + unsigned char reserved[1984];
2502 +
2503 + uint32_t config_names[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2504 + uint32_t config_data[VMA_MAX_CONFIGS]; /* offset into blob_buffer table */
2505 +
2506 + VmaDeviceInfoHeader dev_info[256];
2507 +} VmaHeader;
2508 +
2509 +typedef struct VmaExtentHeader {
2510 + uint32_t magic;
2511 + uint16_t reserved1;
2512 + uint16_t block_count;
2513 + unsigned char uuid[16];
2514 + unsigned char md5sum[16];
2515 + uint64_t blockinfo[VMA_BLOCKS_PER_EXTENT];
2516 +} VmaExtentHeader;
2517 +
2518 +/* functions/definitions to read/write vma files */
2519 +
2520 +typedef struct VmaReader VmaReader;
2521 +
2522 +typedef struct VmaWriter VmaWriter;
2523 +
2524 +typedef struct VmaConfigData {
2525 + const char *name;
2526 + const void *data;
2527 + uint32_t len;
2528 +} VmaConfigData;
2529 +
2530 +typedef struct VmaStreamInfo {
2531 + uint64_t size;
2532 + uint64_t cluster_count;
2533 + uint64_t transferred;
2534 + uint64_t zero_bytes;
2535 + int finished;
2536 + char *devname;
2537 +} VmaStreamInfo;
2538 +
2539 +typedef struct VmaStatus {
2540 + int status;
2541 + bool closed;
2542 + char errmsg[8192];
2543 + char uuid_str[37];
2544 + VmaStreamInfo stream_info[256];
2545 +} VmaStatus;
2546 +
2547 +typedef struct VmaDeviceInfo {
2548 + uint64_t size; /* device size in bytes */
2549 + const char *devname;
2550 +} VmaDeviceInfo;
2551 +
2552 +extern const BackupDriver backup_vma_driver;
2553 +
2554 +VmaWriter *vma_writer_create(const char *filename, uuid_t uuid, Error **errp);
2555 +int vma_writer_close(VmaWriter *vmaw, Error **errp);
2556 +void vma_writer_destroy(VmaWriter *vmaw);
2557 +int vma_writer_add_config(VmaWriter *vmaw, const char *name, gpointer data,
2558 + size_t len);
2559 +int vma_writer_register_stream(VmaWriter *vmaw, const char *devname,
2560 + size_t size);
2561 +
2562 +int64_t coroutine_fn vma_writer_write(VmaWriter *vmaw, uint8_t dev_id,
2563 + int64_t cluster_num, unsigned char *buf,
2564 + size_t *zero_bytes);
2565 +
2566 +int coroutine_fn vma_writer_close_stream(VmaWriter *vmaw, uint8_t dev_id);
2567 +
2568 +int vma_writer_get_status(VmaWriter *vmaw, VmaStatus *status);
2569 +void vma_writer_set_error(VmaWriter *vmaw, const char *fmt, ...);
2570 +
2571 +
2572 +VmaReader *vma_reader_create(const char *filename, Error **errp);
2573 +void vma_reader_destroy(VmaReader *vmar);
2574 +VmaHeader *vma_reader_get_header(VmaReader *vmar);
2575 +GList *vma_reader_get_config_data(VmaReader *vmar);
2576 +VmaDeviceInfo *vma_reader_get_device_info(VmaReader *vmar, guint8 dev_id);
2577 +int vma_reader_register_bs(VmaReader *vmar, guint8 dev_id,
2578 + BlockDriverState *bs, bool write_zeroes,
2579 + Error **errp);
2580 +int vma_reader_restore(VmaReader *vmar, int vmstate_fd, bool verbose,
2581 + Error **errp);
2582 +
2583 +#endif /* BACKUP_VMA_H */
2584 --
2585 1.7.2.5
2586