2 * Block driver for the QCOW version 2 format
4 * Copyright (c) 2004-2006 Fabrice Bellard
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 #include "qemu-common.h"
26 #include "block/block_int.h"
27 #include "block/qcow2.h"
29 typedef struct QEMU_PACKED QCowSnapshotHeader
{
30 /* header is 8 byte aligned */
31 uint64_t l1_table_offset
;
40 uint64_t vm_clock_nsec
;
42 uint32_t vm_state_size
;
43 uint32_t extra_data_size
; /* for extension */
44 /* extra data follows */
49 typedef struct QEMU_PACKED QCowSnapshotExtraData
{
50 uint64_t vm_state_size_large
;
52 } QCowSnapshotExtraData
;
54 void qcow2_free_snapshots(BlockDriverState
*bs
)
56 BDRVQcowState
*s
= bs
->opaque
;
59 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
60 g_free(s
->snapshots
[i
].name
);
61 g_free(s
->snapshots
[i
].id_str
);
68 int qcow2_read_snapshots(BlockDriverState
*bs
)
70 BDRVQcowState
*s
= bs
->opaque
;
72 QCowSnapshotExtraData extra
;
74 int i
, id_str_size
, name_size
;
76 uint32_t extra_data_size
;
79 if (!s
->nb_snapshots
) {
81 s
->snapshots_size
= 0;
85 offset
= s
->snapshots_offset
;
86 s
->snapshots
= g_malloc0(s
->nb_snapshots
* sizeof(QCowSnapshot
));
88 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
89 /* Read statically sized part of the snapshot header */
90 offset
= align_offset(offset
, 8);
91 ret
= bdrv_pread(bs
->file
, offset
, &h
, sizeof(h
));
97 sn
= s
->snapshots
+ i
;
98 sn
->l1_table_offset
= be64_to_cpu(h
.l1_table_offset
);
99 sn
->l1_size
= be32_to_cpu(h
.l1_size
);
100 sn
->vm_state_size
= be32_to_cpu(h
.vm_state_size
);
101 sn
->date_sec
= be32_to_cpu(h
.date_sec
);
102 sn
->date_nsec
= be32_to_cpu(h
.date_nsec
);
103 sn
->vm_clock_nsec
= be64_to_cpu(h
.vm_clock_nsec
);
104 extra_data_size
= be32_to_cpu(h
.extra_data_size
);
106 id_str_size
= be16_to_cpu(h
.id_str_size
);
107 name_size
= be16_to_cpu(h
.name_size
);
109 /* Read extra data */
110 ret
= bdrv_pread(bs
->file
, offset
, &extra
,
111 MIN(sizeof(extra
), extra_data_size
));
115 offset
+= extra_data_size
;
117 if (extra_data_size
>= 8) {
118 sn
->vm_state_size
= be64_to_cpu(extra
.vm_state_size_large
);
121 if (extra_data_size
>= 16) {
122 sn
->disk_size
= be64_to_cpu(extra
.disk_size
);
124 sn
->disk_size
= bs
->total_sectors
* BDRV_SECTOR_SIZE
;
127 /* Read snapshot ID */
128 sn
->id_str
= g_malloc(id_str_size
+ 1);
129 ret
= bdrv_pread(bs
->file
, offset
, sn
->id_str
, id_str_size
);
133 offset
+= id_str_size
;
134 sn
->id_str
[id_str_size
] = '\0';
136 /* Read snapshot name */
137 sn
->name
= g_malloc(name_size
+ 1);
138 ret
= bdrv_pread(bs
->file
, offset
, sn
->name
, name_size
);
143 sn
->name
[name_size
] = '\0';
146 s
->snapshots_size
= offset
- s
->snapshots_offset
;
150 qcow2_free_snapshots(bs
);
154 /* add at the end of the file a new list of snapshots */
155 static int qcow2_write_snapshots(BlockDriverState
*bs
)
157 BDRVQcowState
*s
= bs
->opaque
;
159 QCowSnapshotHeader h
;
160 QCowSnapshotExtraData extra
;
161 int i
, name_size
, id_str_size
, snapshots_size
;
163 uint32_t nb_snapshots
;
164 uint64_t snapshots_offset
;
165 } QEMU_PACKED header_data
;
166 int64_t offset
, snapshots_offset
;
169 /* compute the size of the snapshots */
171 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
172 sn
= s
->snapshots
+ i
;
173 offset
= align_offset(offset
, 8);
175 offset
+= sizeof(extra
);
176 offset
+= strlen(sn
->id_str
);
177 offset
+= strlen(sn
->name
);
179 snapshots_size
= offset
;
181 /* Allocate space for the new snapshot list */
182 snapshots_offset
= qcow2_alloc_clusters(bs
, snapshots_size
);
183 offset
= snapshots_offset
;
188 ret
= bdrv_flush(bs
);
193 /* The snapshot list position has not yet been updated, so these clusters
194 * must indeed be completely free */
195 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_DEFAULT
, offset
,
202 /* Write all snapshots to the new list */
203 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
204 sn
= s
->snapshots
+ i
;
205 memset(&h
, 0, sizeof(h
));
206 h
.l1_table_offset
= cpu_to_be64(sn
->l1_table_offset
);
207 h
.l1_size
= cpu_to_be32(sn
->l1_size
);
208 /* If it doesn't fit in 32 bit, older implementations should treat it
209 * as a disk-only snapshot rather than truncate the VM state */
210 if (sn
->vm_state_size
<= 0xffffffff) {
211 h
.vm_state_size
= cpu_to_be32(sn
->vm_state_size
);
213 h
.date_sec
= cpu_to_be32(sn
->date_sec
);
214 h
.date_nsec
= cpu_to_be32(sn
->date_nsec
);
215 h
.vm_clock_nsec
= cpu_to_be64(sn
->vm_clock_nsec
);
216 h
.extra_data_size
= cpu_to_be32(sizeof(extra
));
218 memset(&extra
, 0, sizeof(extra
));
219 extra
.vm_state_size_large
= cpu_to_be64(sn
->vm_state_size
);
220 extra
.disk_size
= cpu_to_be64(sn
->disk_size
);
222 id_str_size
= strlen(sn
->id_str
);
223 name_size
= strlen(sn
->name
);
224 assert(id_str_size
<= UINT16_MAX
&& name_size
<= UINT16_MAX
);
225 h
.id_str_size
= cpu_to_be16(id_str_size
);
226 h
.name_size
= cpu_to_be16(name_size
);
227 offset
= align_offset(offset
, 8);
229 ret
= bdrv_pwrite(bs
->file
, offset
, &h
, sizeof(h
));
235 ret
= bdrv_pwrite(bs
->file
, offset
, &extra
, sizeof(extra
));
239 offset
+= sizeof(extra
);
241 ret
= bdrv_pwrite(bs
->file
, offset
, sn
->id_str
, id_str_size
);
245 offset
+= id_str_size
;
247 ret
= bdrv_pwrite(bs
->file
, offset
, sn
->name
, name_size
);
255 * Update the header to point to the new snapshot table. This requires the
256 * new table and its refcounts to be stable on disk.
258 ret
= bdrv_flush(bs
);
263 QEMU_BUILD_BUG_ON(offsetof(QCowHeader
, snapshots_offset
) !=
264 offsetof(QCowHeader
, nb_snapshots
) + sizeof(header_data
.nb_snapshots
));
266 header_data
.nb_snapshots
= cpu_to_be32(s
->nb_snapshots
);
267 header_data
.snapshots_offset
= cpu_to_be64(snapshots_offset
);
269 ret
= bdrv_pwrite_sync(bs
->file
, offsetof(QCowHeader
, nb_snapshots
),
270 &header_data
, sizeof(header_data
));
275 /* free the old snapshot table */
276 qcow2_free_clusters(bs
, s
->snapshots_offset
, s
->snapshots_size
,
277 QCOW2_DISCARD_SNAPSHOT
);
278 s
->snapshots_offset
= snapshots_offset
;
279 s
->snapshots_size
= snapshots_size
;
283 if (snapshots_offset
> 0) {
284 qcow2_free_clusters(bs
, snapshots_offset
, snapshots_size
,
285 QCOW2_DISCARD_ALWAYS
);
290 static void find_new_snapshot_id(BlockDriverState
*bs
,
291 char *id_str
, int id_str_size
)
293 BDRVQcowState
*s
= bs
->opaque
;
296 unsigned long id
, id_max
= 0;
298 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
299 sn
= s
->snapshots
+ i
;
300 id
= strtoul(sn
->id_str
, NULL
, 10);
304 snprintf(id_str
, id_str_size
, "%lu", id_max
+ 1);
307 static int find_snapshot_by_id_and_name(BlockDriverState
*bs
,
311 BDRVQcowState
*s
= bs
->opaque
;
315 for (i
= 0; i
< s
->nb_snapshots
; i
++) {
316 if (!strcmp(s
->snapshots
[i
].id_str
, id
) &&
317 !strcmp(s
->snapshots
[i
].name
, name
)) {
322 for (i
= 0; i
< s
->nb_snapshots
; i
++) {
323 if (!strcmp(s
->snapshots
[i
].id_str
, id
)) {
328 for (i
= 0; i
< s
->nb_snapshots
; i
++) {
329 if (!strcmp(s
->snapshots
[i
].name
, name
)) {
338 static int find_snapshot_by_id_or_name(BlockDriverState
*bs
,
339 const char *id_or_name
)
343 ret
= find_snapshot_by_id_and_name(bs
, id_or_name
, NULL
);
347 return find_snapshot_by_id_and_name(bs
, NULL
, id_or_name
);
350 /* if no id is provided, a new one is constructed */
351 int qcow2_snapshot_create(BlockDriverState
*bs
, QEMUSnapshotInfo
*sn_info
)
353 BDRVQcowState
*s
= bs
->opaque
;
354 QCowSnapshot
*new_snapshot_list
= NULL
;
355 QCowSnapshot
*old_snapshot_list
= NULL
;
356 QCowSnapshot sn1
, *sn
= &sn1
;
358 uint64_t *l1_table
= NULL
;
359 int64_t l1_table_offset
;
361 memset(sn
, 0, sizeof(*sn
));
363 /* Generate an ID if it wasn't passed */
364 if (sn_info
->id_str
[0] == '\0') {
365 find_new_snapshot_id(bs
, sn_info
->id_str
, sizeof(sn_info
->id_str
));
368 /* Check that the ID is unique */
369 if (find_snapshot_by_id_and_name(bs
, sn_info
->id_str
, NULL
) >= 0) {
373 /* Populate sn with passed data */
374 sn
->id_str
= g_strdup(sn_info
->id_str
);
375 sn
->name
= g_strdup(sn_info
->name
);
377 sn
->disk_size
= bs
->total_sectors
* BDRV_SECTOR_SIZE
;
378 sn
->vm_state_size
= sn_info
->vm_state_size
;
379 sn
->date_sec
= sn_info
->date_sec
;
380 sn
->date_nsec
= sn_info
->date_nsec
;
381 sn
->vm_clock_nsec
= sn_info
->vm_clock_nsec
;
383 /* Allocate the L1 table of the snapshot and copy the current one there. */
384 l1_table_offset
= qcow2_alloc_clusters(bs
, s
->l1_size
* sizeof(uint64_t));
385 if (l1_table_offset
< 0) {
386 ret
= l1_table_offset
;
390 sn
->l1_table_offset
= l1_table_offset
;
391 sn
->l1_size
= s
->l1_size
;
393 l1_table
= g_malloc(s
->l1_size
* sizeof(uint64_t));
394 for(i
= 0; i
< s
->l1_size
; i
++) {
395 l1_table
[i
] = cpu_to_be64(s
->l1_table
[i
]);
398 ret
= qcow2_pre_write_overlap_check(bs
, QCOW2_OL_DEFAULT
,
399 sn
->l1_table_offset
, s
->l1_size
* sizeof(uint64_t));
404 ret
= bdrv_pwrite(bs
->file
, sn
->l1_table_offset
, l1_table
,
405 s
->l1_size
* sizeof(uint64_t));
414 * Increase the refcounts of all clusters and make sure everything is
415 * stable on disk before updating the snapshot table to contain a pointer
416 * to the new L1 table.
418 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 1);
423 /* Append the new snapshot to the snapshot list */
424 new_snapshot_list
= g_malloc((s
->nb_snapshots
+ 1) * sizeof(QCowSnapshot
));
426 memcpy(new_snapshot_list
, s
->snapshots
,
427 s
->nb_snapshots
* sizeof(QCowSnapshot
));
428 old_snapshot_list
= s
->snapshots
;
430 s
->snapshots
= new_snapshot_list
;
431 s
->snapshots
[s
->nb_snapshots
++] = *sn
;
433 ret
= qcow2_write_snapshots(bs
);
435 g_free(s
->snapshots
);
436 s
->snapshots
= old_snapshot_list
;
441 g_free(old_snapshot_list
);
443 /* The VM state isn't needed any more in the active L1 table; in fact, it
444 * hurts by causing expensive COW for the next snapshot. */
445 qcow2_discard_clusters(bs
, qcow2_vm_state_offset(s
),
446 align_offset(sn
->vm_state_size
, s
->cluster_size
)
448 QCOW2_DISCARD_NEVER
);
452 BdrvCheckResult result
= {0};
453 qcow2_check_refcounts(bs
, &result
, 0);
466 /* copy the snapshot 'snapshot_name' into the current disk image */
467 int qcow2_snapshot_goto(BlockDriverState
*bs
, const char *snapshot_id
)
469 BDRVQcowState
*s
= bs
->opaque
;
471 int i
, snapshot_index
;
472 int cur_l1_bytes
, sn_l1_bytes
;
474 uint64_t *sn_l1_table
= NULL
;
476 /* Search the snapshot */
477 snapshot_index
= find_snapshot_by_id_or_name(bs
, snapshot_id
);
478 if (snapshot_index
< 0) {
481 sn
= &s
->snapshots
[snapshot_index
];
483 if (sn
->disk_size
!= bs
->total_sectors
* BDRV_SECTOR_SIZE
) {
484 error_report("qcow2: Loading snapshots with different disk "
485 "size is not implemented");
491 * Make sure that the current L1 table is big enough to contain the whole
492 * L1 table of the snapshot. If the snapshot L1 table is smaller, the
493 * current one must be padded with zeros.
495 ret
= qcow2_grow_l1_table(bs
, sn
->l1_size
, true);
500 cur_l1_bytes
= s
->l1_size
* sizeof(uint64_t);
501 sn_l1_bytes
= sn
->l1_size
* sizeof(uint64_t);
504 * Copy the snapshot L1 table to the current L1 table.
506 * Before overwriting the old current L1 table on disk, make sure to
507 * increase all refcounts for the clusters referenced by the new one.
508 * Decrease the refcount referenced by the old one only when the L1
509 * table is overwritten.
511 sn_l1_table
= g_malloc0(cur_l1_bytes
);
513 ret
= bdrv_pread(bs
->file
, sn
->l1_table_offset
, sn_l1_table
, sn_l1_bytes
);
518 ret
= qcow2_update_snapshot_refcount(bs
, sn
->l1_table_offset
,
524 ret
= qcow2_pre_write_overlap_check(bs
,
525 QCOW2_OL_DEFAULT
& ~QCOW2_OL_ACTIVE_L1
,
526 s
->l1_table_offset
, cur_l1_bytes
);
531 ret
= bdrv_pwrite_sync(bs
->file
, s
->l1_table_offset
, sn_l1_table
,
538 * Decrease refcount of clusters of current L1 table.
540 * At this point, the in-memory s->l1_table points to the old L1 table,
541 * whereas on disk we already have the new one.
543 * qcow2_update_snapshot_refcount special cases the current L1 table to use
544 * the in-memory data instead of really using the offset to load a new one,
545 * which is why this works.
547 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
,
551 * Now update the in-memory L1 table to be in sync with the on-disk one. We
552 * need to do this even if updating refcounts failed.
554 for(i
= 0;i
< s
->l1_size
; i
++) {
555 s
->l1_table
[i
] = be64_to_cpu(sn_l1_table
[i
]);
566 * Update QCOW_OFLAG_COPIED in the active L1 table (it may have changed
567 * when we decreased the refcount of the old snapshot.
569 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 0);
576 BdrvCheckResult result
= {0};
577 qcow2_check_refcounts(bs
, &result
, 0);
587 int qcow2_snapshot_delete(BlockDriverState
*bs
,
588 const char *snapshot_id
,
592 BDRVQcowState
*s
= bs
->opaque
;
594 int snapshot_index
, ret
;
596 /* Search the snapshot */
597 snapshot_index
= find_snapshot_by_id_and_name(bs
, snapshot_id
, name
);
598 if (snapshot_index
< 0) {
599 error_setg(errp
, "Can't find the snapshot");
602 sn
= s
->snapshots
[snapshot_index
];
604 /* Remove it from the snapshot list */
605 memmove(s
->snapshots
+ snapshot_index
,
606 s
->snapshots
+ snapshot_index
+ 1,
607 (s
->nb_snapshots
- snapshot_index
- 1) * sizeof(sn
));
609 ret
= qcow2_write_snapshots(bs
);
611 error_setg(errp
, "Failed to remove snapshot from snapshot list");
616 * The snapshot is now unused, clean up. If we fail after this point, we
617 * won't recover but just leak clusters.
623 * Now decrease the refcounts of clusters referenced by the snapshot and
626 ret
= qcow2_update_snapshot_refcount(bs
, sn
.l1_table_offset
,
629 error_setg(errp
, "Failed to free the cluster and L1 table");
632 qcow2_free_clusters(bs
, sn
.l1_table_offset
, sn
.l1_size
* sizeof(uint64_t),
633 QCOW2_DISCARD_SNAPSHOT
);
635 /* must update the copied flag on the current cluster offsets */
636 ret
= qcow2_update_snapshot_refcount(bs
, s
->l1_table_offset
, s
->l1_size
, 0);
638 error_setg(errp
, "Failed to update snapshot status in disk");
644 BdrvCheckResult result
= {0};
645 qcow2_check_refcounts(bs
, &result
, 0);
651 int qcow2_snapshot_list(BlockDriverState
*bs
, QEMUSnapshotInfo
**psn_tab
)
653 BDRVQcowState
*s
= bs
->opaque
;
654 QEMUSnapshotInfo
*sn_tab
, *sn_info
;
658 if (!s
->nb_snapshots
) {
660 return s
->nb_snapshots
;
663 sn_tab
= g_malloc0(s
->nb_snapshots
* sizeof(QEMUSnapshotInfo
));
664 for(i
= 0; i
< s
->nb_snapshots
; i
++) {
665 sn_info
= sn_tab
+ i
;
666 sn
= s
->snapshots
+ i
;
667 pstrcpy(sn_info
->id_str
, sizeof(sn_info
->id_str
),
669 pstrcpy(sn_info
->name
, sizeof(sn_info
->name
),
671 sn_info
->vm_state_size
= sn
->vm_state_size
;
672 sn_info
->date_sec
= sn
->date_sec
;
673 sn_info
->date_nsec
= sn
->date_nsec
;
674 sn_info
->vm_clock_nsec
= sn
->vm_clock_nsec
;
677 return s
->nb_snapshots
;
680 int qcow2_snapshot_load_tmp(BlockDriverState
*bs
, const char *snapshot_name
)
682 int i
, snapshot_index
;
683 BDRVQcowState
*s
= bs
->opaque
;
685 uint64_t *new_l1_table
;
689 assert(bs
->read_only
);
691 /* Search the snapshot */
692 snapshot_index
= find_snapshot_by_id_or_name(bs
, snapshot_name
);
693 if (snapshot_index
< 0) {
696 sn
= &s
->snapshots
[snapshot_index
];
698 /* Allocate and read in the snapshot's L1 table */
699 new_l1_bytes
= s
->l1_size
* sizeof(uint64_t);
700 new_l1_table
= g_malloc0(align_offset(new_l1_bytes
, 512));
702 ret
= bdrv_pread(bs
->file
, sn
->l1_table_offset
, new_l1_table
, new_l1_bytes
);
704 g_free(new_l1_table
);
708 /* Switch the L1 table */
711 s
->l1_size
= sn
->l1_size
;
712 s
->l1_table_offset
= sn
->l1_table_offset
;
713 s
->l1_table
= new_l1_table
;
715 for(i
= 0;i
< s
->l1_size
; i
++) {
716 be64_to_cpus(&s
->l1_table
[i
]);