]> git.proxmox.com Git - mirror_qemu.git/blob - block/qcow2-snapshot.c
qcow2: Employ metadata overlap checks
[mirror_qemu.git] / block / qcow2-snapshot.c
1 /*
2 * Block driver for the QCOW version 2 format
3 *
4 * Copyright (c) 2004-2006 Fabrice Bellard
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
25 #include "qemu-common.h"
26 #include "block/block_int.h"
27 #include "block/qcow2.h"
28
29 typedef struct QEMU_PACKED QCowSnapshotHeader {
30 /* header is 8 byte aligned */
31 uint64_t l1_table_offset;
32
33 uint32_t l1_size;
34 uint16_t id_str_size;
35 uint16_t name_size;
36
37 uint32_t date_sec;
38 uint32_t date_nsec;
39
40 uint64_t vm_clock_nsec;
41
42 uint32_t vm_state_size;
43 uint32_t extra_data_size; /* for extension */
44 /* extra data follows */
45 /* id_str follows */
46 /* name follows */
47 } QCowSnapshotHeader;
48
49 typedef struct QEMU_PACKED QCowSnapshotExtraData {
50 uint64_t vm_state_size_large;
51 uint64_t disk_size;
52 } QCowSnapshotExtraData;
53
54 void qcow2_free_snapshots(BlockDriverState *bs)
55 {
56 BDRVQcowState *s = bs->opaque;
57 int i;
58
59 for(i = 0; i < s->nb_snapshots; i++) {
60 g_free(s->snapshots[i].name);
61 g_free(s->snapshots[i].id_str);
62 }
63 g_free(s->snapshots);
64 s->snapshots = NULL;
65 s->nb_snapshots = 0;
66 }
67
68 int qcow2_read_snapshots(BlockDriverState *bs)
69 {
70 BDRVQcowState *s = bs->opaque;
71 QCowSnapshotHeader h;
72 QCowSnapshotExtraData extra;
73 QCowSnapshot *sn;
74 int i, id_str_size, name_size;
75 int64_t offset;
76 uint32_t extra_data_size;
77 int ret;
78
79 if (!s->nb_snapshots) {
80 s->snapshots = NULL;
81 s->snapshots_size = 0;
82 return 0;
83 }
84
85 offset = s->snapshots_offset;
86 s->snapshots = g_malloc0(s->nb_snapshots * sizeof(QCowSnapshot));
87
88 for(i = 0; i < s->nb_snapshots; i++) {
89 /* Read statically sized part of the snapshot header */
90 offset = align_offset(offset, 8);
91 ret = bdrv_pread(bs->file, offset, &h, sizeof(h));
92 if (ret < 0) {
93 goto fail;
94 }
95
96 offset += sizeof(h);
97 sn = s->snapshots + i;
98 sn->l1_table_offset = be64_to_cpu(h.l1_table_offset);
99 sn->l1_size = be32_to_cpu(h.l1_size);
100 sn->vm_state_size = be32_to_cpu(h.vm_state_size);
101 sn->date_sec = be32_to_cpu(h.date_sec);
102 sn->date_nsec = be32_to_cpu(h.date_nsec);
103 sn->vm_clock_nsec = be64_to_cpu(h.vm_clock_nsec);
104 extra_data_size = be32_to_cpu(h.extra_data_size);
105
106 id_str_size = be16_to_cpu(h.id_str_size);
107 name_size = be16_to_cpu(h.name_size);
108
109 /* Read extra data */
110 ret = bdrv_pread(bs->file, offset, &extra,
111 MIN(sizeof(extra), extra_data_size));
112 if (ret < 0) {
113 goto fail;
114 }
115 offset += extra_data_size;
116
117 if (extra_data_size >= 8) {
118 sn->vm_state_size = be64_to_cpu(extra.vm_state_size_large);
119 }
120
121 if (extra_data_size >= 16) {
122 sn->disk_size = be64_to_cpu(extra.disk_size);
123 } else {
124 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
125 }
126
127 /* Read snapshot ID */
128 sn->id_str = g_malloc(id_str_size + 1);
129 ret = bdrv_pread(bs->file, offset, sn->id_str, id_str_size);
130 if (ret < 0) {
131 goto fail;
132 }
133 offset += id_str_size;
134 sn->id_str[id_str_size] = '\0';
135
136 /* Read snapshot name */
137 sn->name = g_malloc(name_size + 1);
138 ret = bdrv_pread(bs->file, offset, sn->name, name_size);
139 if (ret < 0) {
140 goto fail;
141 }
142 offset += name_size;
143 sn->name[name_size] = '\0';
144 }
145
146 s->snapshots_size = offset - s->snapshots_offset;
147 return 0;
148
149 fail:
150 qcow2_free_snapshots(bs);
151 return ret;
152 }
153
154 /* add at the end of the file a new list of snapshots */
155 static int qcow2_write_snapshots(BlockDriverState *bs)
156 {
157 BDRVQcowState *s = bs->opaque;
158 QCowSnapshot *sn;
159 QCowSnapshotHeader h;
160 QCowSnapshotExtraData extra;
161 int i, name_size, id_str_size, snapshots_size;
162 struct {
163 uint32_t nb_snapshots;
164 uint64_t snapshots_offset;
165 } QEMU_PACKED header_data;
166 int64_t offset, snapshots_offset;
167 int ret;
168
169 /* compute the size of the snapshots */
170 offset = 0;
171 for(i = 0; i < s->nb_snapshots; i++) {
172 sn = s->snapshots + i;
173 offset = align_offset(offset, 8);
174 offset += sizeof(h);
175 offset += sizeof(extra);
176 offset += strlen(sn->id_str);
177 offset += strlen(sn->name);
178 }
179 snapshots_size = offset;
180
181 /* Allocate space for the new snapshot list */
182 snapshots_offset = qcow2_alloc_clusters(bs, snapshots_size);
183 offset = snapshots_offset;
184 if (offset < 0) {
185 return offset;
186 }
187 ret = bdrv_flush(bs);
188 if (ret < 0) {
189 return ret;
190 }
191
192 /* The snapshot list position has not yet been updated, so these clusters
193 * must indeed be completely free */
194 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, offset,
195 s->snapshots_size);
196 if (ret < 0) {
197 return ret;
198 }
199
200
201 /* Write all snapshots to the new list */
202 for(i = 0; i < s->nb_snapshots; i++) {
203 sn = s->snapshots + i;
204 memset(&h, 0, sizeof(h));
205 h.l1_table_offset = cpu_to_be64(sn->l1_table_offset);
206 h.l1_size = cpu_to_be32(sn->l1_size);
207 /* If it doesn't fit in 32 bit, older implementations should treat it
208 * as a disk-only snapshot rather than truncate the VM state */
209 if (sn->vm_state_size <= 0xffffffff) {
210 h.vm_state_size = cpu_to_be32(sn->vm_state_size);
211 }
212 h.date_sec = cpu_to_be32(sn->date_sec);
213 h.date_nsec = cpu_to_be32(sn->date_nsec);
214 h.vm_clock_nsec = cpu_to_be64(sn->vm_clock_nsec);
215 h.extra_data_size = cpu_to_be32(sizeof(extra));
216
217 memset(&extra, 0, sizeof(extra));
218 extra.vm_state_size_large = cpu_to_be64(sn->vm_state_size);
219 extra.disk_size = cpu_to_be64(sn->disk_size);
220
221 id_str_size = strlen(sn->id_str);
222 name_size = strlen(sn->name);
223 h.id_str_size = cpu_to_be16(id_str_size);
224 h.name_size = cpu_to_be16(name_size);
225 offset = align_offset(offset, 8);
226
227 ret = bdrv_pwrite(bs->file, offset, &h, sizeof(h));
228 if (ret < 0) {
229 goto fail;
230 }
231 offset += sizeof(h);
232
233 ret = bdrv_pwrite(bs->file, offset, &extra, sizeof(extra));
234 if (ret < 0) {
235 goto fail;
236 }
237 offset += sizeof(extra);
238
239 ret = bdrv_pwrite(bs->file, offset, sn->id_str, id_str_size);
240 if (ret < 0) {
241 goto fail;
242 }
243 offset += id_str_size;
244
245 ret = bdrv_pwrite(bs->file, offset, sn->name, name_size);
246 if (ret < 0) {
247 goto fail;
248 }
249 offset += name_size;
250 }
251
252 /*
253 * Update the header to point to the new snapshot table. This requires the
254 * new table and its refcounts to be stable on disk.
255 */
256 ret = bdrv_flush(bs);
257 if (ret < 0) {
258 goto fail;
259 }
260
261 QEMU_BUILD_BUG_ON(offsetof(QCowHeader, snapshots_offset) !=
262 offsetof(QCowHeader, nb_snapshots) + sizeof(header_data.nb_snapshots));
263
264 header_data.nb_snapshots = cpu_to_be32(s->nb_snapshots);
265 header_data.snapshots_offset = cpu_to_be64(snapshots_offset);
266
267 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, nb_snapshots),
268 &header_data, sizeof(header_data));
269 if (ret < 0) {
270 goto fail;
271 }
272
273 /* free the old snapshot table */
274 qcow2_free_clusters(bs, s->snapshots_offset, s->snapshots_size,
275 QCOW2_DISCARD_SNAPSHOT);
276 s->snapshots_offset = snapshots_offset;
277 s->snapshots_size = snapshots_size;
278 return 0;
279
280 fail:
281 return ret;
282 }
283
284 static void find_new_snapshot_id(BlockDriverState *bs,
285 char *id_str, int id_str_size)
286 {
287 BDRVQcowState *s = bs->opaque;
288 QCowSnapshot *sn;
289 int i, id, id_max = 0;
290
291 for(i = 0; i < s->nb_snapshots; i++) {
292 sn = s->snapshots + i;
293 id = strtoul(sn->id_str, NULL, 10);
294 if (id > id_max)
295 id_max = id;
296 }
297 snprintf(id_str, id_str_size, "%d", id_max + 1);
298 }
299
300 static int find_snapshot_by_id(BlockDriverState *bs, const char *id_str)
301 {
302 BDRVQcowState *s = bs->opaque;
303 int i;
304
305 for(i = 0; i < s->nb_snapshots; i++) {
306 if (!strcmp(s->snapshots[i].id_str, id_str))
307 return i;
308 }
309 return -1;
310 }
311
312 static int find_snapshot_by_id_or_name(BlockDriverState *bs, const char *name)
313 {
314 BDRVQcowState *s = bs->opaque;
315 int i, ret;
316
317 ret = find_snapshot_by_id(bs, name);
318 if (ret >= 0)
319 return ret;
320 for(i = 0; i < s->nb_snapshots; i++) {
321 if (!strcmp(s->snapshots[i].name, name))
322 return i;
323 }
324 return -1;
325 }
326
327 /* if no id is provided, a new one is constructed */
328 int qcow2_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
329 {
330 BDRVQcowState *s = bs->opaque;
331 QCowSnapshot *new_snapshot_list = NULL;
332 QCowSnapshot *old_snapshot_list = NULL;
333 QCowSnapshot sn1, *sn = &sn1;
334 int i, ret;
335 uint64_t *l1_table = NULL;
336 int64_t l1_table_offset;
337
338 memset(sn, 0, sizeof(*sn));
339
340 /* Generate an ID if it wasn't passed */
341 if (sn_info->id_str[0] == '\0') {
342 find_new_snapshot_id(bs, sn_info->id_str, sizeof(sn_info->id_str));
343 }
344
345 /* Check that the ID is unique */
346 if (find_snapshot_by_id(bs, sn_info->id_str) >= 0) {
347 return -EEXIST;
348 }
349
350 /* Populate sn with passed data */
351 sn->id_str = g_strdup(sn_info->id_str);
352 sn->name = g_strdup(sn_info->name);
353
354 sn->disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
355 sn->vm_state_size = sn_info->vm_state_size;
356 sn->date_sec = sn_info->date_sec;
357 sn->date_nsec = sn_info->date_nsec;
358 sn->vm_clock_nsec = sn_info->vm_clock_nsec;
359
360 /* Allocate the L1 table of the snapshot and copy the current one there. */
361 l1_table_offset = qcow2_alloc_clusters(bs, s->l1_size * sizeof(uint64_t));
362 if (l1_table_offset < 0) {
363 ret = l1_table_offset;
364 goto fail;
365 }
366
367 sn->l1_table_offset = l1_table_offset;
368 sn->l1_size = s->l1_size;
369
370 l1_table = g_malloc(s->l1_size * sizeof(uint64_t));
371 for(i = 0; i < s->l1_size; i++) {
372 l1_table[i] = cpu_to_be64(s->l1_table[i]);
373 }
374
375 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
376 sn->l1_table_offset, s->l1_size * sizeof(uint64_t));
377 if (ret < 0) {
378 goto fail;
379 }
380
381 ret = bdrv_pwrite(bs->file, sn->l1_table_offset, l1_table,
382 s->l1_size * sizeof(uint64_t));
383 if (ret < 0) {
384 goto fail;
385 }
386
387 g_free(l1_table);
388 l1_table = NULL;
389
390 /*
391 * Increase the refcounts of all clusters and make sure everything is
392 * stable on disk before updating the snapshot table to contain a pointer
393 * to the new L1 table.
394 */
395 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 1);
396 if (ret < 0) {
397 goto fail;
398 }
399
400 /* Append the new snapshot to the snapshot list */
401 new_snapshot_list = g_malloc((s->nb_snapshots + 1) * sizeof(QCowSnapshot));
402 if (s->snapshots) {
403 memcpy(new_snapshot_list, s->snapshots,
404 s->nb_snapshots * sizeof(QCowSnapshot));
405 old_snapshot_list = s->snapshots;
406 }
407 s->snapshots = new_snapshot_list;
408 s->snapshots[s->nb_snapshots++] = *sn;
409
410 ret = qcow2_write_snapshots(bs);
411 if (ret < 0) {
412 g_free(s->snapshots);
413 s->snapshots = old_snapshot_list;
414 goto fail;
415 }
416
417 g_free(old_snapshot_list);
418
419 #ifdef DEBUG_ALLOC
420 {
421 BdrvCheckResult result = {0};
422 qcow2_check_refcounts(bs, &result, 0);
423 }
424 #endif
425 return 0;
426
427 fail:
428 g_free(sn->id_str);
429 g_free(sn->name);
430 g_free(l1_table);
431
432 return ret;
433 }
434
435 /* copy the snapshot 'snapshot_name' into the current disk image */
436 int qcow2_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
437 {
438 BDRVQcowState *s = bs->opaque;
439 QCowSnapshot *sn;
440 int i, snapshot_index;
441 int cur_l1_bytes, sn_l1_bytes;
442 int ret;
443 uint64_t *sn_l1_table = NULL;
444
445 /* Search the snapshot */
446 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
447 if (snapshot_index < 0) {
448 return -ENOENT;
449 }
450 sn = &s->snapshots[snapshot_index];
451
452 if (sn->disk_size != bs->total_sectors * BDRV_SECTOR_SIZE) {
453 error_report("qcow2: Loading snapshots with different disk "
454 "size is not implemented");
455 ret = -ENOTSUP;
456 goto fail;
457 }
458
459 /*
460 * Make sure that the current L1 table is big enough to contain the whole
461 * L1 table of the snapshot. If the snapshot L1 table is smaller, the
462 * current one must be padded with zeros.
463 */
464 ret = qcow2_grow_l1_table(bs, sn->l1_size, true);
465 if (ret < 0) {
466 goto fail;
467 }
468
469 cur_l1_bytes = s->l1_size * sizeof(uint64_t);
470 sn_l1_bytes = sn->l1_size * sizeof(uint64_t);
471
472 /*
473 * Copy the snapshot L1 table to the current L1 table.
474 *
475 * Before overwriting the old current L1 table on disk, make sure to
476 * increase all refcounts for the clusters referenced by the new one.
477 * Decrease the refcount referenced by the old one only when the L1
478 * table is overwritten.
479 */
480 sn_l1_table = g_malloc0(cur_l1_bytes);
481
482 ret = bdrv_pread(bs->file, sn->l1_table_offset, sn_l1_table, sn_l1_bytes);
483 if (ret < 0) {
484 goto fail;
485 }
486
487 ret = qcow2_update_snapshot_refcount(bs, sn->l1_table_offset,
488 sn->l1_size, 1);
489 if (ret < 0) {
490 goto fail;
491 }
492
493 ret = qcow2_pre_write_overlap_check(bs,
494 QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
495 s->l1_table_offset, cur_l1_bytes);
496 if (ret < 0) {
497 goto fail;
498 }
499
500 ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset, sn_l1_table,
501 cur_l1_bytes);
502 if (ret < 0) {
503 goto fail;
504 }
505
506 /*
507 * Decrease refcount of clusters of current L1 table.
508 *
509 * At this point, the in-memory s->l1_table points to the old L1 table,
510 * whereas on disk we already have the new one.
511 *
512 * qcow2_update_snapshot_refcount special cases the current L1 table to use
513 * the in-memory data instead of really using the offset to load a new one,
514 * which is why this works.
515 */
516 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset,
517 s->l1_size, -1);
518
519 /*
520 * Now update the in-memory L1 table to be in sync with the on-disk one. We
521 * need to do this even if updating refcounts failed.
522 */
523 for(i = 0;i < s->l1_size; i++) {
524 s->l1_table[i] = be64_to_cpu(sn_l1_table[i]);
525 }
526
527 if (ret < 0) {
528 goto fail;
529 }
530
531 g_free(sn_l1_table);
532 sn_l1_table = NULL;
533
534 /*
535 * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
536 * when we decreased the refcount of the old snapshot.
537 */
538 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
539 if (ret < 0) {
540 goto fail;
541 }
542
543 #ifdef DEBUG_ALLOC
544 {
545 BdrvCheckResult result = {0};
546 qcow2_check_refcounts(bs, &result, 0);
547 }
548 #endif
549 return 0;
550
551 fail:
552 g_free(sn_l1_table);
553 return ret;
554 }
555
556 int qcow2_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
557 {
558 BDRVQcowState *s = bs->opaque;
559 QCowSnapshot sn;
560 int snapshot_index, ret;
561
562 /* Search the snapshot */
563 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_id);
564 if (snapshot_index < 0) {
565 return -ENOENT;
566 }
567 sn = s->snapshots[snapshot_index];
568
569 /* Remove it from the snapshot list */
570 memmove(s->snapshots + snapshot_index,
571 s->snapshots + snapshot_index + 1,
572 (s->nb_snapshots - snapshot_index - 1) * sizeof(sn));
573 s->nb_snapshots--;
574 ret = qcow2_write_snapshots(bs);
575 if (ret < 0) {
576 return ret;
577 }
578
579 /*
580 * The snapshot is now unused, clean up. If we fail after this point, we
581 * won't recover but just leak clusters.
582 */
583 g_free(sn.id_str);
584 g_free(sn.name);
585
586 /*
587 * Now decrease the refcounts of clusters referenced by the snapshot and
588 * free the L1 table.
589 */
590 ret = qcow2_update_snapshot_refcount(bs, sn.l1_table_offset,
591 sn.l1_size, -1);
592 if (ret < 0) {
593 return ret;
594 }
595 qcow2_free_clusters(bs, sn.l1_table_offset, sn.l1_size * sizeof(uint64_t),
596 QCOW2_DISCARD_SNAPSHOT);
597
598 /* must update the copied flag on the current cluster offsets */
599 ret = qcow2_update_snapshot_refcount(bs, s->l1_table_offset, s->l1_size, 0);
600 if (ret < 0) {
601 return ret;
602 }
603
604 #ifdef DEBUG_ALLOC
605 {
606 BdrvCheckResult result = {0};
607 qcow2_check_refcounts(bs, &result, 0);
608 }
609 #endif
610 return 0;
611 }
612
613 int qcow2_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
614 {
615 BDRVQcowState *s = bs->opaque;
616 QEMUSnapshotInfo *sn_tab, *sn_info;
617 QCowSnapshot *sn;
618 int i;
619
620 if (!s->nb_snapshots) {
621 *psn_tab = NULL;
622 return s->nb_snapshots;
623 }
624
625 sn_tab = g_malloc0(s->nb_snapshots * sizeof(QEMUSnapshotInfo));
626 for(i = 0; i < s->nb_snapshots; i++) {
627 sn_info = sn_tab + i;
628 sn = s->snapshots + i;
629 pstrcpy(sn_info->id_str, sizeof(sn_info->id_str),
630 sn->id_str);
631 pstrcpy(sn_info->name, sizeof(sn_info->name),
632 sn->name);
633 sn_info->vm_state_size = sn->vm_state_size;
634 sn_info->date_sec = sn->date_sec;
635 sn_info->date_nsec = sn->date_nsec;
636 sn_info->vm_clock_nsec = sn->vm_clock_nsec;
637 }
638 *psn_tab = sn_tab;
639 return s->nb_snapshots;
640 }
641
642 int qcow2_snapshot_load_tmp(BlockDriverState *bs, const char *snapshot_name)
643 {
644 int i, snapshot_index;
645 BDRVQcowState *s = bs->opaque;
646 QCowSnapshot *sn;
647 uint64_t *new_l1_table;
648 int new_l1_bytes;
649 int ret;
650
651 assert(bs->read_only);
652
653 /* Search the snapshot */
654 snapshot_index = find_snapshot_by_id_or_name(bs, snapshot_name);
655 if (snapshot_index < 0) {
656 return -ENOENT;
657 }
658 sn = &s->snapshots[snapshot_index];
659
660 /* Allocate and read in the snapshot's L1 table */
661 new_l1_bytes = s->l1_size * sizeof(uint64_t);
662 new_l1_table = g_malloc0(align_offset(new_l1_bytes, 512));
663
664 ret = bdrv_pread(bs->file, sn->l1_table_offset, new_l1_table, new_l1_bytes);
665 if (ret < 0) {
666 g_free(new_l1_table);
667 return ret;
668 }
669
670 /* Switch the L1 table */
671 g_free(s->l1_table);
672
673 s->l1_size = sn->l1_size;
674 s->l1_table_offset = sn->l1_table_offset;
675 s->l1_table = new_l1_table;
676
677 for(i = 0;i < s->l1_size; i++) {
678 be64_to_cpus(&s->l1_table[i]);
679 }
680
681 return 0;
682 }