]> git.proxmox.com Git - mirror_qemu.git/blame - block/qcow2.c
iscsi: Don't set error if already set in iscsi_do_inquiry
[mirror_qemu.git] / block / qcow2.c
CommitLineData
585f8587
FB
1/*
2 * Block driver for the QCOW version 2 format
5fafdf24 3 *
585f8587 4 * Copyright (c) 2004-2006 Fabrice Bellard
5fafdf24 5 *
585f8587
FB
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
faf07963 24#include "qemu-common.h"
737e150e 25#include "block/block_int.h"
1de7afc9 26#include "qemu/module.h"
585f8587 27#include <zlib.h>
753d9b82 28#include "qemu/aes.h"
f7d0fe02 29#include "block/qcow2.h"
1de7afc9 30#include "qemu/error-report.h"
7b1b5d19 31#include "qapi/qmp/qerror.h"
acdfb480 32#include "qapi/qmp/qbool.h"
3cce16f4 33#include "trace.h"
585f8587
FB
34
35/*
36 Differences with QCOW:
37
38 - Support for multiple incremental snapshots.
39 - Memory management by reference counts.
40 - Clusters which have a reference count of one have the bit
41 QCOW_OFLAG_COPIED to optimize write performance.
5fafdf24 42 - Size of compressed clusters is stored in sectors to reduce bit usage
585f8587
FB
43 in the cluster offsets.
44 - Support for storing additional data (such as the VM state) in the
3b46e624 45 snapshots.
585f8587
FB
46 - If a backing store is used, the cluster size is not constrained
47 (could be backported to QCOW).
48 - L2 tables have always a size of one cluster.
49*/
50
9b80ddf3
AL
51
52typedef struct {
53 uint32_t magic;
54 uint32_t len;
c4217f64 55} QEMU_PACKED QCowExtension;
21d82ac9 56
7c80ab3f
JS
57#define QCOW2_EXT_MAGIC_END 0
58#define QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
cfcc4c62 59#define QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
9b80ddf3 60
7c80ab3f 61static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
585f8587
FB
62{
63 const QCowHeader *cow_header = (const void *)buf;
3b46e624 64
585f8587
FB
65 if (buf_size >= sizeof(QCowHeader) &&
66 be32_to_cpu(cow_header->magic) == QCOW_MAGIC &&
6744cbab 67 be32_to_cpu(cow_header->version) >= 2)
585f8587
FB
68 return 100;
69 else
70 return 0;
71}
72
9b80ddf3
AL
73
74/*
75 * read qcow2 extension and fill bs
76 * start reading from start_offset
77 * finish reading upon magic of value 0 or when end_offset reached
78 * unknown magic is skipped (future extension this version knows nothing about)
79 * return 0 upon success, non-0 otherwise
80 */
7c80ab3f 81static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
3ef6c40a
HR
82 uint64_t end_offset, void **p_feature_table,
83 Error **errp)
9b80ddf3 84{
75bab85c 85 BDRVQcowState *s = bs->opaque;
9b80ddf3
AL
86 QCowExtension ext;
87 uint64_t offset;
75bab85c 88 int ret;
9b80ddf3
AL
89
90#ifdef DEBUG_EXT
7c80ab3f 91 printf("qcow2_read_extensions: start=%ld end=%ld\n", start_offset, end_offset);
9b80ddf3
AL
92#endif
93 offset = start_offset;
94 while (offset < end_offset) {
95
96#ifdef DEBUG_EXT
97 /* Sanity check */
98 if (offset > s->cluster_size)
7c80ab3f 99 printf("qcow2_read_extension: suspicious offset %lu\n", offset);
9b80ddf3 100
9b2260cb 101 printf("attempting to read extended header in offset %lu\n", offset);
9b80ddf3
AL
102#endif
103
3ef6c40a
HR
104 ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
105 if (ret < 0) {
106 error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
107 "pread fail from offset %" PRIu64, offset);
9b80ddf3
AL
108 return 1;
109 }
110 be32_to_cpus(&ext.magic);
111 be32_to_cpus(&ext.len);
112 offset += sizeof(ext);
113#ifdef DEBUG_EXT
114 printf("ext.magic = 0x%x\n", ext.magic);
115#endif
64ca6aee 116 if (ext.len > end_offset - offset) {
3ef6c40a 117 error_setg(errp, "Header extension too large");
64ca6aee
KW
118 return -EINVAL;
119 }
120
9b80ddf3 121 switch (ext.magic) {
7c80ab3f 122 case QCOW2_EXT_MAGIC_END:
9b80ddf3 123 return 0;
f965509c 124
7c80ab3f 125 case QCOW2_EXT_MAGIC_BACKING_FORMAT:
f965509c 126 if (ext.len >= sizeof(bs->backing_format)) {
3ef6c40a
HR
127 error_setg(errp, "ERROR: ext_backing_format: len=%u too large"
128 " (>=%zu)", ext.len, sizeof(bs->backing_format));
f965509c
AL
129 return 2;
130 }
3ef6c40a
HR
131 ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
132 if (ret < 0) {
133 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
134 "Could not read format name");
f965509c 135 return 3;
3ef6c40a 136 }
f965509c
AL
137 bs->backing_format[ext.len] = '\0';
138#ifdef DEBUG_EXT
139 printf("Qcow2: Got format extension %s\n", bs->backing_format);
140#endif
f965509c
AL
141 break;
142
cfcc4c62
KW
143 case QCOW2_EXT_MAGIC_FEATURE_TABLE:
144 if (p_feature_table != NULL) {
145 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
146 ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
147 if (ret < 0) {
3ef6c40a
HR
148 error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
149 "Could not read table");
cfcc4c62
KW
150 return ret;
151 }
152
153 *p_feature_table = feature_table;
154 }
155 break;
156
9b80ddf3 157 default:
75bab85c
KW
158 /* unknown magic - save it in case we need to rewrite the header */
159 {
160 Qcow2UnknownHeaderExtension *uext;
161
162 uext = g_malloc0(sizeof(*uext) + ext.len);
163 uext->magic = ext.magic;
164 uext->len = ext.len;
165 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
166
167 ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
168 if (ret < 0) {
3ef6c40a
HR
169 error_setg_errno(errp, -ret, "ERROR: unknown extension: "
170 "Could not read data");
75bab85c
KW
171 return ret;
172 }
75bab85c 173 }
9b80ddf3
AL
174 break;
175 }
fd29b4bb
KW
176
177 offset += ((ext.len + 7) & ~7);
9b80ddf3
AL
178 }
179
180 return 0;
181}
182
75bab85c
KW
183static void cleanup_unknown_header_ext(BlockDriverState *bs)
184{
185 BDRVQcowState *s = bs->opaque;
186 Qcow2UnknownHeaderExtension *uext, *next;
187
188 QLIST_FOREACH_SAFE(uext, &s->unknown_header_ext, next, next) {
189 QLIST_REMOVE(uext, next);
190 g_free(uext);
191 }
192}
9b80ddf3 193
3ef6c40a
HR
194static void GCC_FMT_ATTR(3, 4) report_unsupported(BlockDriverState *bs,
195 Error **errp, const char *fmt, ...)
6744cbab
KW
196{
197 char msg[64];
198 va_list ap;
199
200 va_start(ap, fmt);
201 vsnprintf(msg, sizeof(msg), fmt, ap);
202 va_end(ap);
203
3ef6c40a
HR
204 error_set(errp, QERR_UNKNOWN_BLOCK_FORMAT_FEATURE, bs->device_name, "qcow2",
205 msg);
6744cbab
KW
206}
207
cfcc4c62 208static void report_unsupported_feature(BlockDriverState *bs,
3ef6c40a 209 Error **errp, Qcow2Feature *table, uint64_t mask)
cfcc4c62
KW
210{
211 while (table && table->name[0] != '\0') {
212 if (table->type == QCOW2_FEAT_TYPE_INCOMPATIBLE) {
213 if (mask & (1 << table->bit)) {
3ef6c40a 214 report_unsupported(bs, errp, "%.46s", table->name);
cfcc4c62
KW
215 mask &= ~(1 << table->bit);
216 }
217 }
218 table++;
219 }
220
221 if (mask) {
3ef6c40a
HR
222 report_unsupported(bs, errp, "Unknown incompatible feature: %" PRIx64,
223 mask);
cfcc4c62
KW
224 }
225}
226
bfe8043e
SH
227/*
228 * Sets the dirty bit and flushes afterwards if necessary.
229 *
230 * The incompatible_features bit is only set if the image file header was
231 * updated successfully. Therefore it is not required to check the return
232 * value of this function.
233 */
280d3735 234int qcow2_mark_dirty(BlockDriverState *bs)
bfe8043e
SH
235{
236 BDRVQcowState *s = bs->opaque;
237 uint64_t val;
238 int ret;
239
240 assert(s->qcow_version >= 3);
241
242 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
243 return 0; /* already dirty */
244 }
245
246 val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
247 ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
248 &val, sizeof(val));
249 if (ret < 0) {
250 return ret;
251 }
252 ret = bdrv_flush(bs->file);
253 if (ret < 0) {
254 return ret;
255 }
256
257 /* Only treat image as dirty if the header was updated successfully */
258 s->incompatible_features |= QCOW2_INCOMPAT_DIRTY;
259 return 0;
260}
261
c61d0004
SH
262/*
263 * Clears the dirty bit and flushes before if necessary. Only call this
264 * function when there are no pending requests, it does not guard against
265 * concurrent requests dirtying the image.
266 */
267static int qcow2_mark_clean(BlockDriverState *bs)
268{
269 BDRVQcowState *s = bs->opaque;
270
271 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
272 int ret = bdrv_flush(bs);
273 if (ret < 0) {
274 return ret;
275 }
276
277 s->incompatible_features &= ~QCOW2_INCOMPAT_DIRTY;
278 return qcow2_update_header(bs);
279 }
280 return 0;
281}
282
69c98726
HR
283/*
284 * Marks the image as corrupt.
285 */
286int qcow2_mark_corrupt(BlockDriverState *bs)
287{
288 BDRVQcowState *s = bs->opaque;
289
290 s->incompatible_features |= QCOW2_INCOMPAT_CORRUPT;
291 return qcow2_update_header(bs);
292}
293
294/*
295 * Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
296 * before if necessary.
297 */
298int qcow2_mark_consistent(BlockDriverState *bs)
299{
300 BDRVQcowState *s = bs->opaque;
301
302 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
303 int ret = bdrv_flush(bs);
304 if (ret < 0) {
305 return ret;
306 }
307
308 s->incompatible_features &= ~QCOW2_INCOMPAT_CORRUPT;
309 return qcow2_update_header(bs);
310 }
311 return 0;
312}
313
acbe5982
SH
314static int qcow2_check(BlockDriverState *bs, BdrvCheckResult *result,
315 BdrvCheckMode fix)
316{
317 int ret = qcow2_check_refcounts(bs, result, fix);
318 if (ret < 0) {
319 return ret;
320 }
321
322 if (fix && result->check_errors == 0 && result->corruptions == 0) {
24530f3e
HR
323 ret = qcow2_mark_clean(bs);
324 if (ret < 0) {
325 return ret;
326 }
327 return qcow2_mark_consistent(bs);
acbe5982
SH
328 }
329 return ret;
330}
331
8c7de283
KW
332static int validate_table_offset(BlockDriverState *bs, uint64_t offset,
333 uint64_t entries, size_t entry_len)
334{
335 BDRVQcowState *s = bs->opaque;
336 uint64_t size;
337
338 /* Use signed INT64_MAX as the maximum even for uint64_t header fields,
339 * because values will be passed to qemu functions taking int64_t. */
340 if (entries > INT64_MAX / entry_len) {
341 return -EINVAL;
342 }
343
344 size = entries * entry_len;
345
346 if (INT64_MAX - size < offset) {
347 return -EINVAL;
348 }
349
350 /* Tables must be cluster aligned */
351 if (offset & (s->cluster_size - 1)) {
352 return -EINVAL;
353 }
354
355 return 0;
356}
357
74c4510a
KW
358static QemuOptsList qcow2_runtime_opts = {
359 .name = "qcow2",
360 .head = QTAILQ_HEAD_INITIALIZER(qcow2_runtime_opts.head),
361 .desc = {
362 {
64aa99d3 363 .name = QCOW2_OPT_LAZY_REFCOUNTS,
74c4510a
KW
364 .type = QEMU_OPT_BOOL,
365 .help = "Postpone refcount updates",
366 },
67af674e
KW
367 {
368 .name = QCOW2_OPT_DISCARD_REQUEST,
369 .type = QEMU_OPT_BOOL,
370 .help = "Pass guest discard requests to the layer below",
371 },
372 {
373 .name = QCOW2_OPT_DISCARD_SNAPSHOT,
374 .type = QEMU_OPT_BOOL,
375 .help = "Generate discard requests when snapshot related space "
376 "is freed",
377 },
378 {
379 .name = QCOW2_OPT_DISCARD_OTHER,
380 .type = QEMU_OPT_BOOL,
381 .help = "Generate discard requests when other clusters are freed",
382 },
05de7e86
HR
383 {
384 .name = QCOW2_OPT_OVERLAP,
385 .type = QEMU_OPT_STRING,
386 .help = "Selects which overlap checks to perform from a range of "
387 "templates (none, constant, cached, all)",
388 },
389 {
390 .name = QCOW2_OPT_OVERLAP_MAIN_HEADER,
391 .type = QEMU_OPT_BOOL,
392 .help = "Check for unintended writes into the main qcow2 header",
393 },
394 {
395 .name = QCOW2_OPT_OVERLAP_ACTIVE_L1,
396 .type = QEMU_OPT_BOOL,
397 .help = "Check for unintended writes into the active L1 table",
398 },
399 {
400 .name = QCOW2_OPT_OVERLAP_ACTIVE_L2,
401 .type = QEMU_OPT_BOOL,
402 .help = "Check for unintended writes into an active L2 table",
403 },
404 {
405 .name = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
406 .type = QEMU_OPT_BOOL,
407 .help = "Check for unintended writes into the refcount table",
408 },
409 {
410 .name = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
411 .type = QEMU_OPT_BOOL,
412 .help = "Check for unintended writes into a refcount block",
413 },
414 {
415 .name = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
416 .type = QEMU_OPT_BOOL,
417 .help = "Check for unintended writes into the snapshot table",
418 },
419 {
420 .name = QCOW2_OPT_OVERLAP_INACTIVE_L1,
421 .type = QEMU_OPT_BOOL,
422 .help = "Check for unintended writes into an inactive L1 table",
423 },
424 {
425 .name = QCOW2_OPT_OVERLAP_INACTIVE_L2,
426 .type = QEMU_OPT_BOOL,
427 .help = "Check for unintended writes into an inactive L2 table",
428 },
74c4510a
KW
429 { /* end of list */ }
430 },
431};
432
4092e99d
HR
433static const char *overlap_bool_option_names[QCOW2_OL_MAX_BITNR] = {
434 [QCOW2_OL_MAIN_HEADER_BITNR] = QCOW2_OPT_OVERLAP_MAIN_HEADER,
435 [QCOW2_OL_ACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L1,
436 [QCOW2_OL_ACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_ACTIVE_L2,
437 [QCOW2_OL_REFCOUNT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_TABLE,
438 [QCOW2_OL_REFCOUNT_BLOCK_BITNR] = QCOW2_OPT_OVERLAP_REFCOUNT_BLOCK,
439 [QCOW2_OL_SNAPSHOT_TABLE_BITNR] = QCOW2_OPT_OVERLAP_SNAPSHOT_TABLE,
440 [QCOW2_OL_INACTIVE_L1_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L1,
441 [QCOW2_OL_INACTIVE_L2_BITNR] = QCOW2_OPT_OVERLAP_INACTIVE_L2,
442};
443
015a1036
HR
444static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
445 Error **errp)
585f8587
FB
446{
447 BDRVQcowState *s = bs->opaque;
6d33e8e7
KW
448 unsigned int len, i;
449 int ret = 0;
585f8587 450 QCowHeader header;
74c4510a
KW
451 QemuOpts *opts;
452 Error *local_err = NULL;
9b80ddf3 453 uint64_t ext_end;
2cf7cfa1 454 uint64_t l1_vm_state_index;
1fa5cc83
HR
455 const char *opt_overlap_check;
456 int overlap_check_template = 0;
585f8587 457
6d85a57e
JS
458 ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
459 if (ret < 0) {
3ef6c40a 460 error_setg_errno(errp, -ret, "Could not read qcow2 header");
585f8587 461 goto fail;
6d85a57e 462 }
585f8587
FB
463 be32_to_cpus(&header.magic);
464 be32_to_cpus(&header.version);
465 be64_to_cpus(&header.backing_file_offset);
466 be32_to_cpus(&header.backing_file_size);
467 be64_to_cpus(&header.size);
468 be32_to_cpus(&header.cluster_bits);
469 be32_to_cpus(&header.crypt_method);
470 be64_to_cpus(&header.l1_table_offset);
471 be32_to_cpus(&header.l1_size);
472 be64_to_cpus(&header.refcount_table_offset);
473 be32_to_cpus(&header.refcount_table_clusters);
474 be64_to_cpus(&header.snapshots_offset);
475 be32_to_cpus(&header.nb_snapshots);
3b46e624 476
e8cdcec1 477 if (header.magic != QCOW_MAGIC) {
3ef6c40a 478 error_setg(errp, "Image is not in qcow2 format");
76abe407 479 ret = -EINVAL;
585f8587 480 goto fail;
6d85a57e 481 }
6744cbab 482 if (header.version < 2 || header.version > 3) {
3ef6c40a 483 report_unsupported(bs, errp, "QCOW version %d", header.version);
6744cbab
KW
484 ret = -ENOTSUP;
485 goto fail;
486 }
487
488 s->qcow_version = header.version;
489
24342f2c
KW
490 /* Initialise cluster size */
491 if (header.cluster_bits < MIN_CLUSTER_BITS ||
492 header.cluster_bits > MAX_CLUSTER_BITS) {
493 error_setg(errp, "Unsupported cluster size: 2^%i", header.cluster_bits);
494 ret = -EINVAL;
495 goto fail;
496 }
497
498 s->cluster_bits = header.cluster_bits;
499 s->cluster_size = 1 << s->cluster_bits;
500 s->cluster_sectors = 1 << (s->cluster_bits - 9);
501
6744cbab
KW
502 /* Initialise version 3 header fields */
503 if (header.version == 2) {
504 header.incompatible_features = 0;
505 header.compatible_features = 0;
506 header.autoclear_features = 0;
507 header.refcount_order = 4;
508 header.header_length = 72;
509 } else {
510 be64_to_cpus(&header.incompatible_features);
511 be64_to_cpus(&header.compatible_features);
512 be64_to_cpus(&header.autoclear_features);
513 be32_to_cpus(&header.refcount_order);
514 be32_to_cpus(&header.header_length);
24342f2c
KW
515
516 if (header.header_length < 104) {
517 error_setg(errp, "qcow2 header too short");
518 ret = -EINVAL;
519 goto fail;
520 }
521 }
522
523 if (header.header_length > s->cluster_size) {
524 error_setg(errp, "qcow2 header exceeds cluster size");
525 ret = -EINVAL;
526 goto fail;
6744cbab
KW
527 }
528
529 if (header.header_length > sizeof(header)) {
530 s->unknown_header_fields_size = header.header_length - sizeof(header);
531 s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
532 ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
533 s->unknown_header_fields_size);
534 if (ret < 0) {
3ef6c40a
HR
535 error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
536 "fields");
6744cbab
KW
537 goto fail;
538 }
539 }
540
a1b3955c
KW
541 if (header.backing_file_offset > s->cluster_size) {
542 error_setg(errp, "Invalid backing file offset");
543 ret = -EINVAL;
544 goto fail;
545 }
546
cfcc4c62
KW
547 if (header.backing_file_offset) {
548 ext_end = header.backing_file_offset;
549 } else {
550 ext_end = 1 << header.cluster_bits;
551 }
552
6744cbab
KW
553 /* Handle feature bits */
554 s->incompatible_features = header.incompatible_features;
555 s->compatible_features = header.compatible_features;
556 s->autoclear_features = header.autoclear_features;
557
c61d0004 558 if (s->incompatible_features & ~QCOW2_INCOMPAT_MASK) {
cfcc4c62
KW
559 void *feature_table = NULL;
560 qcow2_read_extensions(bs, header.header_length, ext_end,
3ef6c40a
HR
561 &feature_table, NULL);
562 report_unsupported_feature(bs, errp, feature_table,
c61d0004
SH
563 s->incompatible_features &
564 ~QCOW2_INCOMPAT_MASK);
6744cbab 565 ret = -ENOTSUP;
c5a33ee9 566 g_free(feature_table);
6744cbab
KW
567 goto fail;
568 }
569
69c98726
HR
570 if (s->incompatible_features & QCOW2_INCOMPAT_CORRUPT) {
571 /* Corrupt images may not be written to unless they are being repaired
572 */
573 if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) {
3ef6c40a
HR
574 error_setg(errp, "qcow2: Image is corrupt; cannot be opened "
575 "read/write");
69c98726
HR
576 ret = -EACCES;
577 goto fail;
578 }
579 }
580
6744cbab
KW
581 /* Check support for various header values */
582 if (header.refcount_order != 4) {
3ef6c40a 583 report_unsupported(bs, errp, "%d bit reference counts",
6744cbab 584 1 << header.refcount_order);
e8cdcec1
KW
585 ret = -ENOTSUP;
586 goto fail;
587 }
b6481f37 588 s->refcount_order = header.refcount_order;
6744cbab 589
6d85a57e 590 if (header.crypt_method > QCOW_CRYPT_AES) {
3ef6c40a
HR
591 error_setg(errp, "Unsupported encryption method: %i",
592 header.crypt_method);
6d85a57e 593 ret = -EINVAL;
585f8587 594 goto fail;
6d85a57e 595 }
585f8587 596 s->crypt_method_header = header.crypt_method;
6d85a57e 597 if (s->crypt_method_header) {
585f8587 598 bs->encrypted = 1;
6d85a57e 599 }
24342f2c 600
585f8587
FB
601 s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
602 s->l2_size = 1 << s->l2_bits;
603 bs->total_sectors = header.size / 512;
604 s->csize_shift = (62 - (s->cluster_bits - 8));
605 s->csize_mask = (1 << (s->cluster_bits - 8)) - 1;
606 s->cluster_offset_mask = (1LL << s->csize_shift) - 1;
5dab2fad 607
585f8587 608 s->refcount_table_offset = header.refcount_table_offset;
5fafdf24 609 s->refcount_table_size =
585f8587
FB
610 header.refcount_table_clusters << (s->cluster_bits - 3);
611
2b5d5953 612 if (header.refcount_table_clusters > qcow2_max_refcount_clusters(s)) {
5dab2fad
KW
613 error_setg(errp, "Reference count table too large");
614 ret = -EINVAL;
615 goto fail;
616 }
617
8c7de283
KW
618 ret = validate_table_offset(bs, s->refcount_table_offset,
619 s->refcount_table_size, sizeof(uint64_t));
620 if (ret < 0) {
621 error_setg(errp, "Invalid reference count table offset");
622 goto fail;
623 }
624
ce48f2f4
KW
625 /* Snapshot table offset/length */
626 if (header.nb_snapshots > QCOW_MAX_SNAPSHOTS) {
627 error_setg(errp, "Too many snapshots");
628 ret = -EINVAL;
629 goto fail;
630 }
631
632 ret = validate_table_offset(bs, header.snapshots_offset,
633 header.nb_snapshots,
634 sizeof(QCowSnapshotHeader));
635 if (ret < 0) {
636 error_setg(errp, "Invalid snapshot table offset");
637 goto fail;
638 }
639
585f8587 640 /* read the level 1 table */
6a83f8b5 641 if (header.l1_size > QCOW_MAX_L1_SIZE) {
2d51c32c
KW
642 error_setg(errp, "Active L1 table too large");
643 ret = -EFBIG;
644 goto fail;
645 }
585f8587 646 s->l1_size = header.l1_size;
2cf7cfa1
KW
647
648 l1_vm_state_index = size_to_l1(s, header.size);
649 if (l1_vm_state_index > INT_MAX) {
3ef6c40a 650 error_setg(errp, "Image is too big");
2cf7cfa1
KW
651 ret = -EFBIG;
652 goto fail;
653 }
654 s->l1_vm_state_index = l1_vm_state_index;
655
585f8587
FB
656 /* the L1 table must contain at least enough entries to put
657 header.size bytes */
6d85a57e 658 if (s->l1_size < s->l1_vm_state_index) {
3ef6c40a 659 error_setg(errp, "L1 table is too small");
6d85a57e 660 ret = -EINVAL;
585f8587 661 goto fail;
6d85a57e 662 }
2d51c32c
KW
663
664 ret = validate_table_offset(bs, header.l1_table_offset,
665 header.l1_size, sizeof(uint64_t));
666 if (ret < 0) {
667 error_setg(errp, "Invalid L1 table offset");
668 goto fail;
669 }
585f8587 670 s->l1_table_offset = header.l1_table_offset;
2d51c32c
KW
671
672
d191d12d 673 if (s->l1_size > 0) {
7267c094 674 s->l1_table = g_malloc0(
d191d12d 675 align_offset(s->l1_size * sizeof(uint64_t), 512));
6d85a57e
JS
676 ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
677 s->l1_size * sizeof(uint64_t));
678 if (ret < 0) {
3ef6c40a 679 error_setg_errno(errp, -ret, "Could not read L1 table");
d191d12d 680 goto fail;
6d85a57e 681 }
d191d12d
SW
682 for(i = 0;i < s->l1_size; i++) {
683 be64_to_cpus(&s->l1_table[i]);
684 }
585f8587 685 }
29c1a730
KW
686
687 /* alloc L2 table/refcount block cache */
6af4e9ea
PB
688 s->l2_table_cache = qcow2_cache_create(bs, L2_CACHE_SIZE);
689 s->refcount_block_cache = qcow2_cache_create(bs, REFCOUNT_CACHE_SIZE);
29c1a730 690
7267c094 691 s->cluster_cache = g_malloc(s->cluster_size);
585f8587 692 /* one more sector for decompressed data alignment */
dea43a65 693 s->cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
095a9c58 694 + 512);
585f8587 695 s->cluster_cache_offset = -1;
06d9260f 696 s->flags = flags;
3b46e624 697
6d85a57e
JS
698 ret = qcow2_refcount_init(bs);
699 if (ret != 0) {
3ef6c40a 700 error_setg_errno(errp, -ret, "Could not initialize refcount handling");
585f8587 701 goto fail;
6d85a57e 702 }
585f8587 703
72cf2d4f 704 QLIST_INIT(&s->cluster_allocs);
0b919fae 705 QTAILQ_INIT(&s->discards);
f214978a 706
9b80ddf3 707 /* read qcow2 extensions */
3ef6c40a
HR
708 if (qcow2_read_extensions(bs, header.header_length, ext_end, NULL,
709 &local_err)) {
710 error_propagate(errp, local_err);
6d85a57e 711 ret = -EINVAL;
9b80ddf3 712 goto fail;
6d85a57e 713 }
9b80ddf3 714
585f8587
FB
715 /* read the backing file name */
716 if (header.backing_file_offset != 0) {
717 len = header.backing_file_size;
6d33e8e7
KW
718 if (len > MIN(1023, s->cluster_size - header.backing_file_offset)) {
719 error_setg(errp, "Backing file name too long");
720 ret = -EINVAL;
721 goto fail;
6d85a57e
JS
722 }
723 ret = bdrv_pread(bs->file, header.backing_file_offset,
724 bs->backing_file, len);
725 if (ret < 0) {
3ef6c40a 726 error_setg_errno(errp, -ret, "Could not read backing file name");
585f8587 727 goto fail;
6d85a57e 728 }
585f8587
FB
729 bs->backing_file[len] = '\0';
730 }
42deb29f 731
11b128f4
KW
732 /* Internal snapshots */
733 s->snapshots_offset = header.snapshots_offset;
734 s->nb_snapshots = header.nb_snapshots;
735
42deb29f
KW
736 ret = qcow2_read_snapshots(bs);
737 if (ret < 0) {
3ef6c40a 738 error_setg_errno(errp, -ret, "Could not read snapshots");
585f8587 739 goto fail;
6d85a57e 740 }
585f8587 741
af7b708d 742 /* Clear unknown autoclear feature bits */
27eb6c09 743 if (!bs->read_only && !(flags & BDRV_O_INCOMING) && s->autoclear_features) {
af7b708d
SH
744 s->autoclear_features = 0;
745 ret = qcow2_update_header(bs);
746 if (ret < 0) {
3ef6c40a 747 error_setg_errno(errp, -ret, "Could not update qcow2 header");
af7b708d
SH
748 goto fail;
749 }
750 }
751
68d100e9
KW
752 /* Initialise locks */
753 qemu_co_mutex_init(&s->lock);
754
c61d0004 755 /* Repair image if dirty */
27eb6c09 756 if (!(flags & (BDRV_O_CHECK | BDRV_O_INCOMING)) && !bs->read_only &&
058f8f16 757 (s->incompatible_features & QCOW2_INCOMPAT_DIRTY)) {
c61d0004
SH
758 BdrvCheckResult result = {0};
759
acbe5982 760 ret = qcow2_check(bs, &result, BDRV_FIX_ERRORS);
c61d0004 761 if (ret < 0) {
3ef6c40a 762 error_setg_errno(errp, -ret, "Could not repair dirty image");
c61d0004
SH
763 goto fail;
764 }
765 }
766
74c4510a 767 /* Enable lazy_refcounts according to image and command line options */
87ea75d5 768 opts = qemu_opts_create(&qcow2_runtime_opts, NULL, 0, &error_abort);
74c4510a 769 qemu_opts_absorb_qdict(opts, options, &local_err);
84d18f06 770 if (local_err) {
3ef6c40a 771 error_propagate(errp, local_err);
74c4510a
KW
772 ret = -EINVAL;
773 goto fail;
774 }
775
acdfb480 776 s->use_lazy_refcounts = qemu_opt_get_bool(opts, QCOW2_OPT_LAZY_REFCOUNTS,
74c4510a
KW
777 (s->compatible_features & QCOW2_COMPAT_LAZY_REFCOUNTS));
778
67af674e
KW
779 s->discard_passthrough[QCOW2_DISCARD_NEVER] = false;
780 s->discard_passthrough[QCOW2_DISCARD_ALWAYS] = true;
781 s->discard_passthrough[QCOW2_DISCARD_REQUEST] =
782 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_REQUEST,
783 flags & BDRV_O_UNMAP);
784 s->discard_passthrough[QCOW2_DISCARD_SNAPSHOT] =
785 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_SNAPSHOT, true);
786 s->discard_passthrough[QCOW2_DISCARD_OTHER] =
787 qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
788
1fa5cc83
HR
789 opt_overlap_check = qemu_opt_get(opts, "overlap-check") ?: "cached";
790 if (!strcmp(opt_overlap_check, "none")) {
791 overlap_check_template = 0;
792 } else if (!strcmp(opt_overlap_check, "constant")) {
793 overlap_check_template = QCOW2_OL_CONSTANT;
794 } else if (!strcmp(opt_overlap_check, "cached")) {
795 overlap_check_template = QCOW2_OL_CACHED;
796 } else if (!strcmp(opt_overlap_check, "all")) {
797 overlap_check_template = QCOW2_OL_ALL;
798 } else {
799 error_setg(errp, "Unsupported value '%s' for qcow2 option "
800 "'overlap-check'. Allowed are either of the following: "
801 "none, constant, cached, all", opt_overlap_check);
802 qemu_opts_del(opts);
803 ret = -EINVAL;
804 goto fail;
805 }
806
807 s->overlap_check = 0;
808 for (i = 0; i < QCOW2_OL_MAX_BITNR; i++) {
809 /* overlap-check defines a template bitmask, but every flag may be
810 * overwritten through the associated boolean option */
811 s->overlap_check |=
812 qemu_opt_get_bool(opts, overlap_bool_option_names[i],
813 overlap_check_template & (1 << i)) << i;
814 }
3e355390 815
74c4510a
KW
816 qemu_opts_del(opts);
817
818 if (s->use_lazy_refcounts && s->qcow_version < 3) {
3ef6c40a
HR
819 error_setg(errp, "Lazy refcounts require a qcow2 image with at least "
820 "qemu 1.1 compatibility level");
74c4510a
KW
821 ret = -EINVAL;
822 goto fail;
823 }
824
585f8587 825#ifdef DEBUG_ALLOC
6cbc3031
PH
826 {
827 BdrvCheckResult result = {0};
b35278f7 828 qcow2_check_refcounts(bs, &result, 0);
6cbc3031 829 }
585f8587 830#endif
6d85a57e 831 return ret;
585f8587
FB
832
833 fail:
6744cbab 834 g_free(s->unknown_header_fields);
75bab85c 835 cleanup_unknown_header_ext(bs);
ed6ccf0f
KW
836 qcow2_free_snapshots(bs);
837 qcow2_refcount_close(bs);
7267c094 838 g_free(s->l1_table);
cf93980e
HR
839 /* else pre-write overlap checks in cache_destroy may crash */
840 s->l1_table = NULL;
29c1a730
KW
841 if (s->l2_table_cache) {
842 qcow2_cache_destroy(bs, s->l2_table_cache);
843 }
c5a33ee9
PJ
844 if (s->refcount_block_cache) {
845 qcow2_cache_destroy(bs, s->refcount_block_cache);
846 }
7267c094 847 g_free(s->cluster_cache);
dea43a65 848 qemu_vfree(s->cluster_data);
6d85a57e 849 return ret;
585f8587
FB
850}
851
d34682cd
KW
852static int qcow2_refresh_limits(BlockDriverState *bs)
853{
854 BDRVQcowState *s = bs->opaque;
855
856 bs->bl.write_zeroes_alignment = s->cluster_sectors;
857
858 return 0;
859}
860
7c80ab3f 861static int qcow2_set_key(BlockDriverState *bs, const char *key)
585f8587
FB
862{
863 BDRVQcowState *s = bs->opaque;
864 uint8_t keybuf[16];
865 int len, i;
3b46e624 866
585f8587
FB
867 memset(keybuf, 0, 16);
868 len = strlen(key);
869 if (len > 16)
870 len = 16;
871 /* XXX: we could compress the chars to 7 bits to increase
872 entropy */
873 for(i = 0;i < len;i++) {
874 keybuf[i] = key[i];
875 }
876 s->crypt_method = s->crypt_method_header;
877
878 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
879 return -1;
880 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
881 return -1;
882#if 0
883 /* test */
884 {
885 uint8_t in[16];
886 uint8_t out[16];
887 uint8_t tmp[16];
888 for(i=0;i<16;i++)
889 in[i] = i;
890 AES_encrypt(in, tmp, &s->aes_encrypt_key);
891 AES_decrypt(tmp, out, &s->aes_decrypt_key);
892 for(i = 0; i < 16; i++)
893 printf(" %02x", tmp[i]);
894 printf("\n");
895 for(i = 0; i < 16; i++)
896 printf(" %02x", out[i]);
897 printf("\n");
898 }
899#endif
900 return 0;
901}
902
21d82ac9
JC
903/* We have nothing to do for QCOW2 reopen, stubs just return
904 * success */
905static int qcow2_reopen_prepare(BDRVReopenState *state,
906 BlockReopenQueue *queue, Error **errp)
907{
908 return 0;
909}
910
b6b8a333 911static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
f8a2e5e3 912 int64_t sector_num, int nb_sectors, int *pnum)
585f8587 913{
f8a2e5e3 914 BDRVQcowState *s = bs->opaque;
585f8587 915 uint64_t cluster_offset;
4bc74be9
PB
916 int index_in_cluster, ret;
917 int64_t status = 0;
585f8587 918
095a9c58 919 *pnum = nb_sectors;
f8a2e5e3 920 qemu_co_mutex_lock(&s->lock);
1c46efaa 921 ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
f8a2e5e3 922 qemu_co_mutex_unlock(&s->lock);
1c46efaa 923 if (ret < 0) {
d663640c 924 return ret;
1c46efaa 925 }
095a9c58 926
4bc74be9
PB
927 if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
928 !s->crypt_method) {
929 index_in_cluster = sector_num & (s->cluster_sectors - 1);
930 cluster_offset |= (index_in_cluster << BDRV_SECTOR_BITS);
931 status |= BDRV_BLOCK_OFFSET_VALID | cluster_offset;
932 }
933 if (ret == QCOW2_CLUSTER_ZERO) {
934 status |= BDRV_BLOCK_ZERO;
935 } else if (ret != QCOW2_CLUSTER_UNALLOCATED) {
936 status |= BDRV_BLOCK_DATA;
937 }
938 return status;
585f8587
FB
939}
940
a9465922 941/* handle reading after the end of the backing file */
bd28f835
KW
942int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
943 int64_t sector_num, int nb_sectors)
a9465922
FB
944{
945 int n1;
946 if ((sector_num + nb_sectors) <= bs->total_sectors)
947 return nb_sectors;
948 if (sector_num >= bs->total_sectors)
949 n1 = 0;
950 else
951 n1 = bs->total_sectors - sector_num;
bd28f835 952
3d9b4925 953 qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
bd28f835 954
a9465922
FB
955 return n1;
956}
957
a968168c 958static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
3fc48d09 959 int remaining_sectors, QEMUIOVector *qiov)
585f8587 960{
585f8587 961 BDRVQcowState *s = bs->opaque;
a9465922 962 int index_in_cluster, n1;
68d100e9 963 int ret;
faf575c1 964 int cur_nr_sectors; /* number of sectors in current iteration */
c2bdd990 965 uint64_t cluster_offset = 0;
3fc48d09
FZ
966 uint64_t bytes_done = 0;
967 QEMUIOVector hd_qiov;
968 uint8_t *cluster_data = NULL;
585f8587 969
3fc48d09
FZ
970 qemu_iovec_init(&hd_qiov, qiov->niov);
971
972 qemu_co_mutex_lock(&s->lock);
973
974 while (remaining_sectors != 0) {
bd28f835 975
5ebaa27e 976 /* prepare next request */
3fc48d09 977 cur_nr_sectors = remaining_sectors;
5ebaa27e
FZ
978 if (s->crypt_method) {
979 cur_nr_sectors = MIN(cur_nr_sectors,
980 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
585f8587 981 }
5ebaa27e 982
3fc48d09 983 ret = qcow2_get_cluster_offset(bs, sector_num << 9,
5ebaa27e 984 &cur_nr_sectors, &cluster_offset);
8af36488 985 if (ret < 0) {
3fc48d09 986 goto fail;
8af36488 987 }
bd28f835 988
3fc48d09 989 index_in_cluster = sector_num & (s->cluster_sectors - 1);
c87c0672 990
3fc48d09 991 qemu_iovec_reset(&hd_qiov);
1b093c48 992 qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
5ebaa27e
FZ
993 cur_nr_sectors * 512);
994
68d000a3
KW
995 switch (ret) {
996 case QCOW2_CLUSTER_UNALLOCATED:
5ebaa27e
FZ
997
998 if (bs->backing_hd) {
999 /* read from the base image */
3fc48d09
FZ
1000 n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
1001 sector_num, cur_nr_sectors);
5ebaa27e
FZ
1002 if (n1 > 0) {
1003 BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
1004 qemu_co_mutex_unlock(&s->lock);
3fc48d09
FZ
1005 ret = bdrv_co_readv(bs->backing_hd, sector_num,
1006 n1, &hd_qiov);
5ebaa27e
FZ
1007 qemu_co_mutex_lock(&s->lock);
1008 if (ret < 0) {
3fc48d09 1009 goto fail;
5ebaa27e
FZ
1010 }
1011 }
1012 } else {
1013 /* Note: in this case, no need to wait */
3d9b4925 1014 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
5ebaa27e 1015 }
68d000a3
KW
1016 break;
1017
6377af48 1018 case QCOW2_CLUSTER_ZERO:
3d9b4925 1019 qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
6377af48
KW
1020 break;
1021
68d000a3 1022 case QCOW2_CLUSTER_COMPRESSED:
5ebaa27e
FZ
1023 /* add AIO support for compressed blocks ? */
1024 ret = qcow2_decompress_cluster(bs, cluster_offset);
1025 if (ret < 0) {
3fc48d09 1026 goto fail;
bd28f835
KW
1027 }
1028
03396148 1029 qemu_iovec_from_buf(&hd_qiov, 0,
5ebaa27e 1030 s->cluster_cache + index_in_cluster * 512,
faf575c1 1031 512 * cur_nr_sectors);
68d000a3
KW
1032 break;
1033
1034 case QCOW2_CLUSTER_NORMAL:
5ebaa27e 1035 if ((cluster_offset & 511) != 0) {
3fc48d09
FZ
1036 ret = -EIO;
1037 goto fail;
5ebaa27e 1038 }
bd28f835 1039
5ebaa27e
FZ
1040 if (s->crypt_method) {
1041 /*
1042 * For encrypted images, read everything into a temporary
1043 * contiguous buffer on which the AES functions can work.
1044 */
3fc48d09
FZ
1045 if (!cluster_data) {
1046 cluster_data =
dea43a65 1047 qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
5ebaa27e
FZ
1048 }
1049
1050 assert(cur_nr_sectors <=
1051 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
3fc48d09
FZ
1052 qemu_iovec_reset(&hd_qiov);
1053 qemu_iovec_add(&hd_qiov, cluster_data,
5ebaa27e
FZ
1054 512 * cur_nr_sectors);
1055 }
1056
1057 BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
1058 qemu_co_mutex_unlock(&s->lock);
1059 ret = bdrv_co_readv(bs->file,
1060 (cluster_offset >> 9) + index_in_cluster,
3fc48d09 1061 cur_nr_sectors, &hd_qiov);
5ebaa27e
FZ
1062 qemu_co_mutex_lock(&s->lock);
1063 if (ret < 0) {
3fc48d09 1064 goto fail;
5ebaa27e
FZ
1065 }
1066 if (s->crypt_method) {
3fc48d09
FZ
1067 qcow2_encrypt_sectors(s, sector_num, cluster_data,
1068 cluster_data, cur_nr_sectors, 0, &s->aes_decrypt_key);
03396148
MT
1069 qemu_iovec_from_buf(qiov, bytes_done,
1070 cluster_data, 512 * cur_nr_sectors);
5ebaa27e 1071 }
68d000a3
KW
1072 break;
1073
1074 default:
1075 g_assert_not_reached();
1076 ret = -EIO;
1077 goto fail;
faf575c1 1078 }
f141eafe 1079
3fc48d09
FZ
1080 remaining_sectors -= cur_nr_sectors;
1081 sector_num += cur_nr_sectors;
1082 bytes_done += cur_nr_sectors * 512;
5ebaa27e 1083 }
3fc48d09 1084 ret = 0;
faf575c1 1085
3fc48d09 1086fail:
68d100e9 1087 qemu_co_mutex_unlock(&s->lock);
42496d62 1088
3fc48d09 1089 qemu_iovec_destroy(&hd_qiov);
dea43a65 1090 qemu_vfree(cluster_data);
68d100e9
KW
1091
1092 return ret;
585f8587
FB
1093}
1094
a968168c 1095static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
3fc48d09
FZ
1096 int64_t sector_num,
1097 int remaining_sectors,
1098 QEMUIOVector *qiov)
585f8587 1099{
585f8587 1100 BDRVQcowState *s = bs->opaque;
585f8587 1101 int index_in_cluster;
68d100e9 1102 int ret;
faf575c1 1103 int cur_nr_sectors; /* number of sectors in current iteration */
c2bdd990 1104 uint64_t cluster_offset;
3fc48d09
FZ
1105 QEMUIOVector hd_qiov;
1106 uint64_t bytes_done = 0;
1107 uint8_t *cluster_data = NULL;
8d2497c3 1108 QCowL2Meta *l2meta = NULL;
c2271403 1109
3cce16f4
KW
1110 trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
1111 remaining_sectors);
1112
3fc48d09
FZ
1113 qemu_iovec_init(&hd_qiov, qiov->niov);
1114
1115 s->cluster_cache_offset = -1; /* disable compressed cache */
3b46e624 1116
3fc48d09
FZ
1117 qemu_co_mutex_lock(&s->lock);
1118
1119 while (remaining_sectors != 0) {
1120
f50f88b9 1121 l2meta = NULL;
cf5c1a23 1122
3cce16f4 1123 trace_qcow2_writev_start_part(qemu_coroutine_self());
3fc48d09 1124 index_in_cluster = sector_num & (s->cluster_sectors - 1);
16f0587e 1125 cur_nr_sectors = remaining_sectors;
5ebaa27e 1126 if (s->crypt_method &&
16f0587e
HT
1127 cur_nr_sectors >
1128 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
1129 cur_nr_sectors =
1130 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
5ebaa27e 1131 }
095a9c58 1132
3fc48d09 1133 ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
16f0587e 1134 &cur_nr_sectors, &cluster_offset, &l2meta);
5ebaa27e 1135 if (ret < 0) {
3fc48d09 1136 goto fail;
5ebaa27e 1137 }
148da7ea 1138
5ebaa27e 1139 assert((cluster_offset & 511) == 0);
148da7ea 1140
3fc48d09 1141 qemu_iovec_reset(&hd_qiov);
1b093c48 1142 qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
5ebaa27e 1143 cur_nr_sectors * 512);
6f5f060b 1144
5ebaa27e 1145 if (s->crypt_method) {
3fc48d09 1146 if (!cluster_data) {
dea43a65 1147 cluster_data = qemu_blockalign(bs, QCOW_MAX_CRYPT_CLUSTERS *
5ebaa27e
FZ
1148 s->cluster_size);
1149 }
6f5f060b 1150
3fc48d09 1151 assert(hd_qiov.size <=
5ebaa27e 1152 QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
d5e6b161 1153 qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
6f5f060b 1154
3fc48d09
FZ
1155 qcow2_encrypt_sectors(s, sector_num, cluster_data,
1156 cluster_data, cur_nr_sectors, 1, &s->aes_encrypt_key);
6f5f060b 1157
3fc48d09
FZ
1158 qemu_iovec_reset(&hd_qiov);
1159 qemu_iovec_add(&hd_qiov, cluster_data,
5ebaa27e
FZ
1160 cur_nr_sectors * 512);
1161 }
6f5f060b 1162
231bb267 1163 ret = qcow2_pre_write_overlap_check(bs, 0,
cf93980e
HR
1164 cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
1165 cur_nr_sectors * BDRV_SECTOR_SIZE);
1166 if (ret < 0) {
1167 goto fail;
1168 }
1169
5ebaa27e 1170 qemu_co_mutex_unlock(&s->lock);
67a7a0eb 1171 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
3cce16f4
KW
1172 trace_qcow2_writev_data(qemu_coroutine_self(),
1173 (cluster_offset >> 9) + index_in_cluster);
5ebaa27e
FZ
1174 ret = bdrv_co_writev(bs->file,
1175 (cluster_offset >> 9) + index_in_cluster,
3fc48d09 1176 cur_nr_sectors, &hd_qiov);
5ebaa27e
FZ
1177 qemu_co_mutex_lock(&s->lock);
1178 if (ret < 0) {
3fc48d09 1179 goto fail;
5ebaa27e 1180 }
f141eafe 1181
88c6588c
KW
1182 while (l2meta != NULL) {
1183 QCowL2Meta *next;
1184
f50f88b9
KW
1185 ret = qcow2_alloc_cluster_link_l2(bs, l2meta);
1186 if (ret < 0) {
1187 goto fail;
1188 }
faf575c1 1189
4e95314e
KW
1190 /* Take the request off the list of running requests */
1191 if (l2meta->nb_clusters != 0) {
1192 QLIST_REMOVE(l2meta, next_in_flight);
1193 }
1194
4e95314e 1195 qemu_co_queue_restart_all(&l2meta->dependent_requests);
4e95314e 1196
88c6588c 1197 next = l2meta->next;
f50f88b9 1198 g_free(l2meta);
88c6588c 1199 l2meta = next;
f50f88b9 1200 }
0fa9131a 1201
3fc48d09
FZ
1202 remaining_sectors -= cur_nr_sectors;
1203 sector_num += cur_nr_sectors;
1204 bytes_done += cur_nr_sectors * 512;
3cce16f4 1205 trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
5ebaa27e 1206 }
3fc48d09 1207 ret = 0;
faf575c1 1208
3fc48d09 1209fail:
4e95314e
KW
1210 qemu_co_mutex_unlock(&s->lock);
1211
88c6588c
KW
1212 while (l2meta != NULL) {
1213 QCowL2Meta *next;
1214
4e95314e
KW
1215 if (l2meta->nb_clusters != 0) {
1216 QLIST_REMOVE(l2meta, next_in_flight);
1217 }
1218 qemu_co_queue_restart_all(&l2meta->dependent_requests);
88c6588c
KW
1219
1220 next = l2meta->next;
cf5c1a23 1221 g_free(l2meta);
88c6588c 1222 l2meta = next;
cf5c1a23 1223 }
0fa9131a 1224
3fc48d09 1225 qemu_iovec_destroy(&hd_qiov);
dea43a65 1226 qemu_vfree(cluster_data);
3cce16f4 1227 trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
42496d62 1228
68d100e9 1229 return ret;
585f8587
FB
1230}
1231
7c80ab3f 1232static void qcow2_close(BlockDriverState *bs)
585f8587
FB
1233{
1234 BDRVQcowState *s = bs->opaque;
7267c094 1235 g_free(s->l1_table);
cf93980e
HR
1236 /* else pre-write overlap checks in cache_destroy may crash */
1237 s->l1_table = NULL;
29c1a730 1238
27eb6c09
KW
1239 if (!(bs->open_flags & BDRV_O_INCOMING)) {
1240 qcow2_cache_flush(bs, s->l2_table_cache);
1241 qcow2_cache_flush(bs, s->refcount_block_cache);
29c1a730 1242
27eb6c09
KW
1243 qcow2_mark_clean(bs);
1244 }
c61d0004 1245
29c1a730
KW
1246 qcow2_cache_destroy(bs, s->l2_table_cache);
1247 qcow2_cache_destroy(bs, s->refcount_block_cache);
1248
6744cbab 1249 g_free(s->unknown_header_fields);
75bab85c 1250 cleanup_unknown_header_ext(bs);
6744cbab 1251
7267c094 1252 g_free(s->cluster_cache);
dea43a65 1253 qemu_vfree(s->cluster_data);
ed6ccf0f 1254 qcow2_refcount_close(bs);
28c1202b 1255 qcow2_free_snapshots(bs);
585f8587
FB
1256}
1257
5a8a30db 1258static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
06d9260f
AL
1259{
1260 BDRVQcowState *s = bs->opaque;
1261 int flags = s->flags;
1262 AES_KEY aes_encrypt_key;
1263 AES_KEY aes_decrypt_key;
1264 uint32_t crypt_method = 0;
acdfb480 1265 QDict *options;
5a8a30db
KW
1266 Error *local_err = NULL;
1267 int ret;
06d9260f
AL
1268
1269 /*
1270 * Backing files are read-only which makes all of their metadata immutable,
1271 * that means we don't have to worry about reopening them here.
1272 */
1273
1274 if (s->crypt_method) {
1275 crypt_method = s->crypt_method;
1276 memcpy(&aes_encrypt_key, &s->aes_encrypt_key, sizeof(aes_encrypt_key));
1277 memcpy(&aes_decrypt_key, &s->aes_decrypt_key, sizeof(aes_decrypt_key));
1278 }
1279
1280 qcow2_close(bs);
1281
5a8a30db
KW
1282 bdrv_invalidate_cache(bs->file, &local_err);
1283 if (local_err) {
1284 error_propagate(errp, local_err);
1285 return;
1286 }
3456a8d1 1287
06d9260f 1288 memset(s, 0, sizeof(BDRVQcowState));
d475e5ac 1289 options = qdict_clone_shallow(bs->options);
5a8a30db
KW
1290
1291 ret = qcow2_open(bs, options, flags, &local_err);
1292 if (local_err) {
1293 error_setg(errp, "Could not reopen qcow2 layer: %s",
1294 error_get_pretty(local_err));
1295 error_free(local_err);
1296 return;
1297 } else if (ret < 0) {
1298 error_setg_errno(errp, -ret, "Could not reopen qcow2 layer");
1299 return;
1300 }
acdfb480
KW
1301
1302 QDECREF(options);
06d9260f
AL
1303
1304 if (crypt_method) {
1305 s->crypt_method = crypt_method;
1306 memcpy(&s->aes_encrypt_key, &aes_encrypt_key, sizeof(aes_encrypt_key));
1307 memcpy(&s->aes_decrypt_key, &aes_decrypt_key, sizeof(aes_decrypt_key));
1308 }
1309}
1310
e24e49e6
KW
1311static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
1312 size_t len, size_t buflen)
1313{
1314 QCowExtension *ext_backing_fmt = (QCowExtension*) buf;
1315 size_t ext_len = sizeof(QCowExtension) + ((len + 7) & ~7);
1316
1317 if (buflen < ext_len) {
1318 return -ENOSPC;
1319 }
1320
1321 *ext_backing_fmt = (QCowExtension) {
1322 .magic = cpu_to_be32(magic),
1323 .len = cpu_to_be32(len),
1324 };
1325 memcpy(buf + sizeof(QCowExtension), s, len);
1326
1327 return ext_len;
1328}
1329
756e6736 1330/*
e24e49e6
KW
1331 * Updates the qcow2 header, including the variable length parts of it, i.e.
1332 * the backing file name and all extensions. qcow2 was not designed to allow
1333 * such changes, so if we run out of space (we can only use the first cluster)
1334 * this function may fail.
756e6736
KW
1335 *
1336 * Returns 0 on success, -errno in error cases.
1337 */
e24e49e6 1338int qcow2_update_header(BlockDriverState *bs)
756e6736 1339{
756e6736 1340 BDRVQcowState *s = bs->opaque;
e24e49e6
KW
1341 QCowHeader *header;
1342 char *buf;
1343 size_t buflen = s->cluster_size;
756e6736 1344 int ret;
e24e49e6
KW
1345 uint64_t total_size;
1346 uint32_t refcount_table_clusters;
6744cbab 1347 size_t header_length;
75bab85c 1348 Qcow2UnknownHeaderExtension *uext;
756e6736 1349
e24e49e6 1350 buf = qemu_blockalign(bs, buflen);
756e6736 1351
e24e49e6
KW
1352 /* Header structure */
1353 header = (QCowHeader*) buf;
756e6736 1354
e24e49e6
KW
1355 if (buflen < sizeof(*header)) {
1356 ret = -ENOSPC;
1357 goto fail;
756e6736
KW
1358 }
1359
6744cbab 1360 header_length = sizeof(*header) + s->unknown_header_fields_size;
e24e49e6
KW
1361 total_size = bs->total_sectors * BDRV_SECTOR_SIZE;
1362 refcount_table_clusters = s->refcount_table_size >> (s->cluster_bits - 3);
1363
1364 *header = (QCowHeader) {
6744cbab 1365 /* Version 2 fields */
e24e49e6 1366 .magic = cpu_to_be32(QCOW_MAGIC),
6744cbab 1367 .version = cpu_to_be32(s->qcow_version),
e24e49e6
KW
1368 .backing_file_offset = 0,
1369 .backing_file_size = 0,
1370 .cluster_bits = cpu_to_be32(s->cluster_bits),
1371 .size = cpu_to_be64(total_size),
1372 .crypt_method = cpu_to_be32(s->crypt_method_header),
1373 .l1_size = cpu_to_be32(s->l1_size),
1374 .l1_table_offset = cpu_to_be64(s->l1_table_offset),
1375 .refcount_table_offset = cpu_to_be64(s->refcount_table_offset),
1376 .refcount_table_clusters = cpu_to_be32(refcount_table_clusters),
1377 .nb_snapshots = cpu_to_be32(s->nb_snapshots),
1378 .snapshots_offset = cpu_to_be64(s->snapshots_offset),
6744cbab
KW
1379
1380 /* Version 3 fields */
1381 .incompatible_features = cpu_to_be64(s->incompatible_features),
1382 .compatible_features = cpu_to_be64(s->compatible_features),
1383 .autoclear_features = cpu_to_be64(s->autoclear_features),
b6481f37 1384 .refcount_order = cpu_to_be32(s->refcount_order),
6744cbab 1385 .header_length = cpu_to_be32(header_length),
e24e49e6 1386 };
756e6736 1387
6744cbab
KW
1388 /* For older versions, write a shorter header */
1389 switch (s->qcow_version) {
1390 case 2:
1391 ret = offsetof(QCowHeader, incompatible_features);
1392 break;
1393 case 3:
1394 ret = sizeof(*header);
1395 break;
1396 default:
b6c14762
JM
1397 ret = -EINVAL;
1398 goto fail;
6744cbab
KW
1399 }
1400
1401 buf += ret;
1402 buflen -= ret;
1403 memset(buf, 0, buflen);
1404
1405 /* Preserve any unknown field in the header */
1406 if (s->unknown_header_fields_size) {
1407 if (buflen < s->unknown_header_fields_size) {
1408 ret = -ENOSPC;
1409 goto fail;
1410 }
1411
1412 memcpy(buf, s->unknown_header_fields, s->unknown_header_fields_size);
1413 buf += s->unknown_header_fields_size;
1414 buflen -= s->unknown_header_fields_size;
1415 }
756e6736 1416
e24e49e6
KW
1417 /* Backing file format header extension */
1418 if (*bs->backing_format) {
1419 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_BACKING_FORMAT,
1420 bs->backing_format, strlen(bs->backing_format),
1421 buflen);
1422 if (ret < 0) {
1423 goto fail;
756e6736
KW
1424 }
1425
e24e49e6
KW
1426 buf += ret;
1427 buflen -= ret;
756e6736
KW
1428 }
1429
cfcc4c62
KW
1430 /* Feature table */
1431 Qcow2Feature features[] = {
c61d0004
SH
1432 {
1433 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
1434 .bit = QCOW2_INCOMPAT_DIRTY_BITNR,
1435 .name = "dirty bit",
1436 },
69c98726
HR
1437 {
1438 .type = QCOW2_FEAT_TYPE_INCOMPATIBLE,
1439 .bit = QCOW2_INCOMPAT_CORRUPT_BITNR,
1440 .name = "corrupt bit",
1441 },
bfe8043e
SH
1442 {
1443 .type = QCOW2_FEAT_TYPE_COMPATIBLE,
1444 .bit = QCOW2_COMPAT_LAZY_REFCOUNTS_BITNR,
1445 .name = "lazy refcounts",
1446 },
cfcc4c62
KW
1447 };
1448
1449 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_FEATURE_TABLE,
1450 features, sizeof(features), buflen);
1451 if (ret < 0) {
1452 goto fail;
1453 }
1454 buf += ret;
1455 buflen -= ret;
1456
75bab85c
KW
1457 /* Keep unknown header extensions */
1458 QLIST_FOREACH(uext, &s->unknown_header_ext, next) {
1459 ret = header_ext_add(buf, uext->magic, uext->data, uext->len, buflen);
1460 if (ret < 0) {
1461 goto fail;
1462 }
1463
1464 buf += ret;
1465 buflen -= ret;
1466 }
1467
e24e49e6
KW
1468 /* End of header extensions */
1469 ret = header_ext_add(buf, QCOW2_EXT_MAGIC_END, NULL, 0, buflen);
756e6736
KW
1470 if (ret < 0) {
1471 goto fail;
1472 }
1473
e24e49e6
KW
1474 buf += ret;
1475 buflen -= ret;
756e6736 1476
e24e49e6
KW
1477 /* Backing file name */
1478 if (*bs->backing_file) {
1479 size_t backing_file_len = strlen(bs->backing_file);
1480
1481 if (buflen < backing_file_len) {
1482 ret = -ENOSPC;
1483 goto fail;
1484 }
1485
00ea1881 1486 /* Using strncpy is ok here, since buf is not NUL-terminated. */
e24e49e6
KW
1487 strncpy(buf, bs->backing_file, buflen);
1488
1489 header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
1490 header->backing_file_size = cpu_to_be32(backing_file_len);
756e6736
KW
1491 }
1492
e24e49e6
KW
1493 /* Write the new header */
1494 ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
756e6736
KW
1495 if (ret < 0) {
1496 goto fail;
1497 }
1498
1499 ret = 0;
1500fail:
e24e49e6 1501 qemu_vfree(header);
756e6736
KW
1502 return ret;
1503}
1504
1505static int qcow2_change_backing_file(BlockDriverState *bs,
1506 const char *backing_file, const char *backing_fmt)
1507{
e24e49e6
KW
1508 pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
1509 pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
1510
1511 return qcow2_update_header(bs);
756e6736
KW
1512}
1513
a35e1c17
KW
1514static int preallocate(BlockDriverState *bs)
1515{
a35e1c17
KW
1516 uint64_t nb_sectors;
1517 uint64_t offset;
060bee89 1518 uint64_t host_offset = 0;
a35e1c17 1519 int num;
148da7ea 1520 int ret;
f50f88b9 1521 QCowL2Meta *meta;
a35e1c17 1522
7c2bbf4a 1523 nb_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
a35e1c17
KW
1524 offset = 0;
1525
1526 while (nb_sectors) {
7c2bbf4a 1527 num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
16f0587e 1528 ret = qcow2_alloc_cluster_offset(bs, offset, &num,
060bee89 1529 &host_offset, &meta);
148da7ea 1530 if (ret < 0) {
19dbcbf7 1531 return ret;
a35e1c17
KW
1532 }
1533
c792707f
SH
1534 while (meta) {
1535 QCowL2Meta *next = meta->next;
1536
7c2bbf4a
HT
1537 ret = qcow2_alloc_cluster_link_l2(bs, meta);
1538 if (ret < 0) {
1539 qcow2_free_any_clusters(bs, meta->alloc_offset,
1540 meta->nb_clusters, QCOW2_DISCARD_NEVER);
1541 return ret;
1542 }
1543
1544 /* There are no dependent requests, but we need to remove our
1545 * request from the list of in-flight requests */
4e95314e 1546 QLIST_REMOVE(meta, next_in_flight);
c792707f
SH
1547
1548 g_free(meta);
1549 meta = next;
f50f88b9 1550 }
f214978a 1551
a35e1c17
KW
1552 /* TODO Preallocate data if requested */
1553
1554 nb_sectors -= num;
7c2bbf4a 1555 offset += num << BDRV_SECTOR_BITS;
a35e1c17
KW
1556 }
1557
1558 /*
1559 * It is expected that the image file is large enough to actually contain
1560 * all of the allocated clusters (otherwise we get failing reads after
1561 * EOF). Extend the image to the last allocated sector.
1562 */
060bee89 1563 if (host_offset != 0) {
7c2bbf4a
HT
1564 uint8_t buf[BDRV_SECTOR_SIZE];
1565 memset(buf, 0, BDRV_SECTOR_SIZE);
1566 ret = bdrv_write(bs->file, (host_offset >> BDRV_SECTOR_BITS) + num - 1,
1567 buf, 1);
19dbcbf7
KW
1568 if (ret < 0) {
1569 return ret;
1570 }
a35e1c17
KW
1571 }
1572
1573 return 0;
1574}
1575
7c80ab3f
JS
1576static int qcow2_create2(const char *filename, int64_t total_size,
1577 const char *backing_file, const char *backing_format,
1578 int flags, size_t cluster_size, int prealloc,
3ef6c40a
HR
1579 QEMUOptionParameter *options, int version,
1580 Error **errp)
a9420734 1581{
9b2260cb 1582 /* Calculate cluster_bits */
a9420734
KW
1583 int cluster_bits;
1584 cluster_bits = ffs(cluster_size) - 1;
1585 if (cluster_bits < MIN_CLUSTER_BITS || cluster_bits > MAX_CLUSTER_BITS ||
1586 (1 << cluster_bits) != cluster_size)
1587 {
3ef6c40a
HR
1588 error_setg(errp, "Cluster size must be a power of two between %d and "
1589 "%dk", 1 << MIN_CLUSTER_BITS, 1 << (MAX_CLUSTER_BITS - 10));
a9420734
KW
1590 return -EINVAL;
1591 }
1592
1593 /*
1594 * Open the image file and write a minimal qcow2 header.
1595 *
1596 * We keep things simple and start with a zero-sized image. We also
1597 * do without refcount blocks or a L1 table for now. We'll fix the
1598 * inconsistency later.
1599 *
1600 * We do need a refcount table because growing the refcount table means
1601 * allocating two new refcount blocks - the seconds of which would be at
1602 * 2 GB for 64k clusters, and we don't want to have a 2 GB initial file
1603 * size for any qcow2 image.
1604 */
1605 BlockDriverState* bs;
f8413b3c 1606 QCowHeader *header;
b106ad91 1607 uint64_t* refcount_table;
3ef6c40a 1608 Error *local_err = NULL;
a9420734
KW
1609 int ret;
1610
3ef6c40a 1611 ret = bdrv_create_file(filename, options, &local_err);
a9420734 1612 if (ret < 0) {
3ef6c40a 1613 error_propagate(errp, local_err);
a9420734
KW
1614 return ret;
1615 }
1616
2e40134b
HR
1617 bs = NULL;
1618 ret = bdrv_open(&bs, filename, NULL, NULL, BDRV_O_RDWR | BDRV_O_PROTOCOL,
1619 NULL, &local_err);
a9420734 1620 if (ret < 0) {
3ef6c40a 1621 error_propagate(errp, local_err);
a9420734
KW
1622 return ret;
1623 }
1624
1625 /* Write the header */
f8413b3c
KW
1626 QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
1627 header = g_malloc0(cluster_size);
1628 *header = (QCowHeader) {
1629 .magic = cpu_to_be32(QCOW_MAGIC),
1630 .version = cpu_to_be32(version),
1631 .cluster_bits = cpu_to_be32(cluster_bits),
1632 .size = cpu_to_be64(0),
1633 .l1_table_offset = cpu_to_be64(0),
1634 .l1_size = cpu_to_be32(0),
1635 .refcount_table_offset = cpu_to_be64(cluster_size),
1636 .refcount_table_clusters = cpu_to_be32(1),
1637 .refcount_order = cpu_to_be32(3 + REFCOUNT_SHIFT),
1638 .header_length = cpu_to_be32(sizeof(*header)),
1639 };
a9420734
KW
1640
1641 if (flags & BLOCK_FLAG_ENCRYPT) {
f8413b3c 1642 header->crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
a9420734 1643 } else {
f8413b3c 1644 header->crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
a9420734
KW
1645 }
1646
bfe8043e 1647 if (flags & BLOCK_FLAG_LAZY_REFCOUNTS) {
f8413b3c 1648 header->compatible_features |=
bfe8043e
SH
1649 cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
1650 }
1651
f8413b3c
KW
1652 ret = bdrv_pwrite(bs, 0, header, cluster_size);
1653 g_free(header);
a9420734 1654 if (ret < 0) {
3ef6c40a 1655 error_setg_errno(errp, -ret, "Could not write qcow2 header");
a9420734
KW
1656 goto out;
1657 }
1658
b106ad91
KW
1659 /* Write a refcount table with one refcount block */
1660 refcount_table = g_malloc0(2 * cluster_size);
1661 refcount_table[0] = cpu_to_be64(2 * cluster_size);
1662 ret = bdrv_pwrite(bs, cluster_size, refcount_table, 2 * cluster_size);
7267c094 1663 g_free(refcount_table);
a9420734
KW
1664
1665 if (ret < 0) {
3ef6c40a 1666 error_setg_errno(errp, -ret, "Could not write refcount table");
a9420734
KW
1667 goto out;
1668 }
1669
f67503e5
HR
1670 bdrv_unref(bs);
1671 bs = NULL;
a9420734
KW
1672
1673 /*
1674 * And now open the image and make it consistent first (i.e. increase the
1675 * refcount of the cluster that is occupied by the header and the refcount
1676 * table)
1677 */
1678 BlockDriver* drv = bdrv_find_format("qcow2");
1679 assert(drv != NULL);
ddf5636d 1680 ret = bdrv_open(&bs, filename, NULL, NULL,
3ef6c40a 1681 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH, drv, &local_err);
a9420734 1682 if (ret < 0) {
3ef6c40a 1683 error_propagate(errp, local_err);
a9420734
KW
1684 goto out;
1685 }
1686
b106ad91 1687 ret = qcow2_alloc_clusters(bs, 3 * cluster_size);
a9420734 1688 if (ret < 0) {
3ef6c40a
HR
1689 error_setg_errno(errp, -ret, "Could not allocate clusters for qcow2 "
1690 "header and refcount table");
a9420734
KW
1691 goto out;
1692
1693 } else if (ret != 0) {
1694 error_report("Huh, first cluster in empty image is already in use?");
1695 abort();
1696 }
1697
1698 /* Okay, now that we have a valid image, let's give it the right size */
1699 ret = bdrv_truncate(bs, total_size * BDRV_SECTOR_SIZE);
1700 if (ret < 0) {
3ef6c40a 1701 error_setg_errno(errp, -ret, "Could not resize image");
a9420734
KW
1702 goto out;
1703 }
1704
1705 /* Want a backing file? There you go.*/
1706 if (backing_file) {
1707 ret = bdrv_change_backing_file(bs, backing_file, backing_format);
1708 if (ret < 0) {
3ef6c40a
HR
1709 error_setg_errno(errp, -ret, "Could not assign backing file '%s' "
1710 "with format '%s'", backing_file, backing_format);
a9420734
KW
1711 goto out;
1712 }
1713 }
1714
1715 /* And if we're supposed to preallocate metadata, do that now */
1716 if (prealloc) {
15552c4a
ZYW
1717 BDRVQcowState *s = bs->opaque;
1718 qemu_co_mutex_lock(&s->lock);
a9420734 1719 ret = preallocate(bs);
15552c4a 1720 qemu_co_mutex_unlock(&s->lock);
a9420734 1721 if (ret < 0) {
3ef6c40a 1722 error_setg_errno(errp, -ret, "Could not preallocate metadata");
a9420734
KW
1723 goto out;
1724 }
1725 }
1726
f67503e5
HR
1727 bdrv_unref(bs);
1728 bs = NULL;
ba2ab2f2
HR
1729
1730 /* Reopen the image without BDRV_O_NO_FLUSH to flush it before returning */
ddf5636d 1731 ret = bdrv_open(&bs, filename, NULL, NULL,
c9fbb99d
KW
1732 BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
1733 drv, &local_err);
84d18f06 1734 if (local_err) {
ba2ab2f2
HR
1735 error_propagate(errp, local_err);
1736 goto out;
1737 }
1738
a9420734
KW
1739 ret = 0;
1740out:
f67503e5
HR
1741 if (bs) {
1742 bdrv_unref(bs);
1743 }
a9420734
KW
1744 return ret;
1745}
de5f3f40 1746
d5124c00
HR
1747static int qcow2_create(const char *filename, QEMUOptionParameter *options,
1748 Error **errp)
de5f3f40
KW
1749{
1750 const char *backing_file = NULL;
1751 const char *backing_fmt = NULL;
1752 uint64_t sectors = 0;
1753 int flags = 0;
99cce9fa 1754 size_t cluster_size = DEFAULT_CLUSTER_SIZE;
de5f3f40 1755 int prealloc = 0;
8ad1898c 1756 int version = 3;
3ef6c40a
HR
1757 Error *local_err = NULL;
1758 int ret;
de5f3f40
KW
1759
1760 /* Read out options */
1761 while (options && options->name) {
1762 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
1763 sectors = options->value.n / 512;
1764 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
1765 backing_file = options->value.s;
1766 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FMT)) {
1767 backing_fmt = options->value.s;
1768 } else if (!strcmp(options->name, BLOCK_OPT_ENCRYPT)) {
1769 flags |= options->value.n ? BLOCK_FLAG_ENCRYPT : 0;
1770 } else if (!strcmp(options->name, BLOCK_OPT_CLUSTER_SIZE)) {
1771 if (options->value.n) {
1772 cluster_size = options->value.n;
1773 }
1774 } else if (!strcmp(options->name, BLOCK_OPT_PREALLOC)) {
1775 if (!options->value.s || !strcmp(options->value.s, "off")) {
1776 prealloc = 0;
1777 } else if (!strcmp(options->value.s, "metadata")) {
1778 prealloc = 1;
1779 } else {
3ef6c40a
HR
1780 error_setg(errp, "Invalid preallocation mode: '%s'",
1781 options->value.s);
de5f3f40
KW
1782 return -EINVAL;
1783 }
6744cbab 1784 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT_LEVEL)) {
9117b477
KW
1785 if (!options->value.s) {
1786 /* keep the default */
1787 } else if (!strcmp(options->value.s, "0.10")) {
6744cbab
KW
1788 version = 2;
1789 } else if (!strcmp(options->value.s, "1.1")) {
1790 version = 3;
1791 } else {
3ef6c40a
HR
1792 error_setg(errp, "Invalid compatibility level: '%s'",
1793 options->value.s);
6744cbab
KW
1794 return -EINVAL;
1795 }
bfe8043e
SH
1796 } else if (!strcmp(options->name, BLOCK_OPT_LAZY_REFCOUNTS)) {
1797 flags |= options->value.n ? BLOCK_FLAG_LAZY_REFCOUNTS : 0;
de5f3f40
KW
1798 }
1799 options++;
1800 }
1801
1802 if (backing_file && prealloc) {
3ef6c40a
HR
1803 error_setg(errp, "Backing file and preallocation cannot be used at "
1804 "the same time");
de5f3f40
KW
1805 return -EINVAL;
1806 }
1807
bfe8043e 1808 if (version < 3 && (flags & BLOCK_FLAG_LAZY_REFCOUNTS)) {
3ef6c40a
HR
1809 error_setg(errp, "Lazy refcounts only supported with compatibility "
1810 "level 1.1 and above (use compat=1.1 or greater)");
bfe8043e
SH
1811 return -EINVAL;
1812 }
1813
3ef6c40a
HR
1814 ret = qcow2_create2(filename, sectors, backing_file, backing_fmt, flags,
1815 cluster_size, prealloc, options, version, &local_err);
84d18f06 1816 if (local_err) {
3ef6c40a
HR
1817 error_propagate(errp, local_err);
1818 }
1819 return ret;
de5f3f40
KW
1820}
1821
621f0589 1822static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
aa7bfbff 1823 int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
621f0589
KW
1824{
1825 int ret;
1826 BDRVQcowState *s = bs->opaque;
1827
1828 /* Emulate misaligned zero writes */
1829 if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
1830 return -ENOTSUP;
1831 }
1832
1833 /* Whatever is left can use real zero clusters */
1834 qemu_co_mutex_lock(&s->lock);
1835 ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
1836 nb_sectors);
1837 qemu_co_mutex_unlock(&s->lock);
1838
1839 return ret;
1840}
1841
6db39ae2
PB
1842static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
1843 int64_t sector_num, int nb_sectors)
5ea929e3 1844{
6db39ae2
PB
1845 int ret;
1846 BDRVQcowState *s = bs->opaque;
1847
1848 qemu_co_mutex_lock(&s->lock);
1849 ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
670df5e3 1850 nb_sectors, QCOW2_DISCARD_REQUEST);
6db39ae2
PB
1851 qemu_co_mutex_unlock(&s->lock);
1852 return ret;
5ea929e3
KW
1853}
1854
419b19d9
SH
1855static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
1856{
1857 BDRVQcowState *s = bs->opaque;
2cf7cfa1
KW
1858 int64_t new_l1_size;
1859 int ret;
419b19d9
SH
1860
1861 if (offset & 511) {
259b2173 1862 error_report("The new size must be a multiple of 512");
419b19d9
SH
1863 return -EINVAL;
1864 }
1865
1866 /* cannot proceed if image has snapshots */
1867 if (s->nb_snapshots) {
259b2173 1868 error_report("Can't resize an image which has snapshots");
419b19d9
SH
1869 return -ENOTSUP;
1870 }
1871
1872 /* shrinking is currently not supported */
1873 if (offset < bs->total_sectors * 512) {
259b2173 1874 error_report("qcow2 doesn't support shrinking images yet");
419b19d9
SH
1875 return -ENOTSUP;
1876 }
1877
1878 new_l1_size = size_to_l1(s, offset);
72893756 1879 ret = qcow2_grow_l1_table(bs, new_l1_size, true);
419b19d9
SH
1880 if (ret < 0) {
1881 return ret;
1882 }
1883
1884 /* write updated header.size */
1885 offset = cpu_to_be64(offset);
8b3b7206
KW
1886 ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
1887 &offset, sizeof(uint64_t));
419b19d9
SH
1888 if (ret < 0) {
1889 return ret;
1890 }
1891
1892 s->l1_vm_state_index = new_l1_size;
1893 return 0;
1894}
1895
20d97356
BS
1896/* XXX: put compressed sectors first, then all the cluster aligned
1897 tables to avoid losing bytes in alignment */
7c80ab3f
JS
1898static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
1899 const uint8_t *buf, int nb_sectors)
20d97356
BS
1900{
1901 BDRVQcowState *s = bs->opaque;
1902 z_stream strm;
1903 int ret, out_len;
1904 uint8_t *out_buf;
1905 uint64_t cluster_offset;
1906
1907 if (nb_sectors == 0) {
1908 /* align end of file to a sector boundary to ease reading with
1909 sector based I/Os */
66f82cee 1910 cluster_offset = bdrv_getlength(bs->file);
20d97356 1911 cluster_offset = (cluster_offset + 511) & ~511;
66f82cee 1912 bdrv_truncate(bs->file, cluster_offset);
20d97356
BS
1913 return 0;
1914 }
1915
f4d38bef
SH
1916 if (nb_sectors != s->cluster_sectors) {
1917 ret = -EINVAL;
1918
1919 /* Zero-pad last write if image size is not cluster aligned */
1920 if (sector_num + nb_sectors == bs->total_sectors &&
1921 nb_sectors < s->cluster_sectors) {
1922 uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
1923 memset(pad_buf, 0, s->cluster_size);
1924 memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
1925 ret = qcow2_write_compressed(bs, sector_num,
1926 pad_buf, s->cluster_sectors);
1927 qemu_vfree(pad_buf);
1928 }
1929 return ret;
1930 }
20d97356 1931
7267c094 1932 out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
20d97356
BS
1933
1934 /* best compression, small window, no zlib header */
1935 memset(&strm, 0, sizeof(strm));
1936 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
1937 Z_DEFLATED, -12,
1938 9, Z_DEFAULT_STRATEGY);
1939 if (ret != 0) {
8f1efd00
KW
1940 ret = -EINVAL;
1941 goto fail;
20d97356
BS
1942 }
1943
1944 strm.avail_in = s->cluster_size;
1945 strm.next_in = (uint8_t *)buf;
1946 strm.avail_out = s->cluster_size;
1947 strm.next_out = out_buf;
1948
1949 ret = deflate(&strm, Z_FINISH);
1950 if (ret != Z_STREAM_END && ret != Z_OK) {
20d97356 1951 deflateEnd(&strm);
8f1efd00
KW
1952 ret = -EINVAL;
1953 goto fail;
20d97356
BS
1954 }
1955 out_len = strm.next_out - out_buf;
1956
1957 deflateEnd(&strm);
1958
1959 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
1960 /* could not compress: write normal cluster */
8f1efd00
KW
1961 ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
1962 if (ret < 0) {
1963 goto fail;
1964 }
20d97356
BS
1965 } else {
1966 cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
1967 sector_num << 9, out_len);
8f1efd00
KW
1968 if (!cluster_offset) {
1969 ret = -EIO;
1970 goto fail;
1971 }
20d97356 1972 cluster_offset &= s->cluster_offset_mask;
cf93980e 1973
231bb267 1974 ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
cf93980e
HR
1975 if (ret < 0) {
1976 goto fail;
1977 }
1978
66f82cee 1979 BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
8f1efd00
KW
1980 ret = bdrv_pwrite(bs->file, cluster_offset, out_buf, out_len);
1981 if (ret < 0) {
1982 goto fail;
20d97356
BS
1983 }
1984 }
1985
8f1efd00
KW
1986 ret = 0;
1987fail:
7267c094 1988 g_free(out_buf);
8f1efd00 1989 return ret;
20d97356
BS
1990}
1991
a968168c 1992static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
20d97356 1993{
29c1a730
KW
1994 BDRVQcowState *s = bs->opaque;
1995 int ret;
1996
8b94ff85 1997 qemu_co_mutex_lock(&s->lock);
29c1a730
KW
1998 ret = qcow2_cache_flush(bs, s->l2_table_cache);
1999 if (ret < 0) {
c95de7e2 2000 qemu_co_mutex_unlock(&s->lock);
8b94ff85 2001 return ret;
29c1a730
KW
2002 }
2003
bfe8043e
SH
2004 if (qcow2_need_accurate_refcounts(s)) {
2005 ret = qcow2_cache_flush(bs, s->refcount_block_cache);
2006 if (ret < 0) {
2007 qemu_co_mutex_unlock(&s->lock);
2008 return ret;
2009 }
29c1a730 2010 }
8b94ff85 2011 qemu_co_mutex_unlock(&s->lock);
29c1a730 2012
eb489bb1
KW
2013 return 0;
2014}
2015
7c80ab3f 2016static int qcow2_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
20d97356
BS
2017{
2018 BDRVQcowState *s = bs->opaque;
95de6d70
PB
2019 bdi->unallocated_blocks_are_zero = true;
2020 bdi->can_write_zeroes_with_unmap = (s->qcow_version >= 3);
20d97356 2021 bdi->cluster_size = s->cluster_size;
7c80ab3f 2022 bdi->vm_state_offset = qcow2_vm_state_offset(s);
20d97356
BS
2023 return 0;
2024}
2025
37764dfb
HR
2026static ImageInfoSpecific *qcow2_get_specific_info(BlockDriverState *bs)
2027{
2028 BDRVQcowState *s = bs->opaque;
2029 ImageInfoSpecific *spec_info = g_new(ImageInfoSpecific, 1);
2030
2031 *spec_info = (ImageInfoSpecific){
2032 .kind = IMAGE_INFO_SPECIFIC_KIND_QCOW2,
2033 {
2034 .qcow2 = g_new(ImageInfoSpecificQCow2, 1),
2035 },
2036 };
2037 if (s->qcow_version == 2) {
2038 *spec_info->qcow2 = (ImageInfoSpecificQCow2){
2039 .compat = g_strdup("0.10"),
2040 };
2041 } else if (s->qcow_version == 3) {
2042 *spec_info->qcow2 = (ImageInfoSpecificQCow2){
2043 .compat = g_strdup("1.1"),
2044 .lazy_refcounts = s->compatible_features &
2045 QCOW2_COMPAT_LAZY_REFCOUNTS,
2046 .has_lazy_refcounts = true,
2047 };
2048 }
2049
2050 return spec_info;
2051}
2052
20d97356
BS
2053#if 0
2054static void dump_refcounts(BlockDriverState *bs)
2055{
2056 BDRVQcowState *s = bs->opaque;
2057 int64_t nb_clusters, k, k1, size;
2058 int refcount;
2059
66f82cee 2060 size = bdrv_getlength(bs->file);
20d97356
BS
2061 nb_clusters = size_to_clusters(s, size);
2062 for(k = 0; k < nb_clusters;) {
2063 k1 = k;
2064 refcount = get_refcount(bs, k);
2065 k++;
2066 while (k < nb_clusters && get_refcount(bs, k) == refcount)
2067 k++;
0bfcd599
BS
2068 printf("%" PRId64 ": refcount=%d nb=%" PRId64 "\n", k, refcount,
2069 k - k1);
20d97356
BS
2070 }
2071}
2072#endif
2073
cf8074b3
KW
2074static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
2075 int64_t pos)
20d97356
BS
2076{
2077 BDRVQcowState *s = bs->opaque;
eedff66f 2078 int64_t total_sectors = bs->total_sectors;
20d97356 2079 int growable = bs->growable;
6e13610a 2080 bool zero_beyond_eof = bs->zero_beyond_eof;
20d97356
BS
2081 int ret;
2082
66f82cee 2083 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
20d97356 2084 bs->growable = 1;
6e13610a 2085 bs->zero_beyond_eof = false;
8d3b1a2d 2086 ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
20d97356 2087 bs->growable = growable;
6e13610a 2088 bs->zero_beyond_eof = zero_beyond_eof;
20d97356 2089
eedff66f
HR
2090 /* bdrv_co_do_writev will have increased the total_sectors value to include
2091 * the VM state - the VM state is however not an actual part of the block
2092 * device, therefore, we need to restore the old value. */
2093 bs->total_sectors = total_sectors;
2094
20d97356
BS
2095 return ret;
2096}
2097
7c80ab3f
JS
2098static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
2099 int64_t pos, int size)
20d97356
BS
2100{
2101 BDRVQcowState *s = bs->opaque;
2102 int growable = bs->growable;
0d51b4de 2103 bool zero_beyond_eof = bs->zero_beyond_eof;
20d97356
BS
2104 int ret;
2105
66f82cee 2106 BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
20d97356 2107 bs->growable = 1;
0d51b4de 2108 bs->zero_beyond_eof = false;
7c80ab3f 2109 ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
20d97356 2110 bs->growable = growable;
0d51b4de 2111 bs->zero_beyond_eof = zero_beyond_eof;
20d97356
BS
2112
2113 return ret;
2114}
2115
9296b3ed
HR
2116/*
2117 * Downgrades an image's version. To achieve this, any incompatible features
2118 * have to be removed.
2119 */
2120static int qcow2_downgrade(BlockDriverState *bs, int target_version)
2121{
2122 BDRVQcowState *s = bs->opaque;
2123 int current_version = s->qcow_version;
2124 int ret;
2125
2126 if (target_version == current_version) {
2127 return 0;
2128 } else if (target_version > current_version) {
2129 return -EINVAL;
2130 } else if (target_version != 2) {
2131 return -EINVAL;
2132 }
2133
2134 if (s->refcount_order != 4) {
2135 /* we would have to convert the image to a refcount_order == 4 image
2136 * here; however, since qemu (at the time of writing this) does not
2137 * support anything different than 4 anyway, there is no point in doing
2138 * so right now; however, we should error out (if qemu supports this in
2139 * the future and this code has not been adapted) */
9e3f0892 2140 error_report("qcow2_downgrade: Image refcount orders other than 4 are "
9296b3ed
HR
2141 "currently not supported.");
2142 return -ENOTSUP;
2143 }
2144
2145 /* clear incompatible features */
2146 if (s->incompatible_features & QCOW2_INCOMPAT_DIRTY) {
2147 ret = qcow2_mark_clean(bs);
2148 if (ret < 0) {
2149 return ret;
2150 }
2151 }
2152
2153 /* with QCOW2_INCOMPAT_CORRUPT, it is pretty much impossible to get here in
2154 * the first place; if that happens nonetheless, returning -ENOTSUP is the
2155 * best thing to do anyway */
2156
2157 if (s->incompatible_features) {
2158 return -ENOTSUP;
2159 }
2160
2161 /* since we can ignore compatible features, we can set them to 0 as well */
2162 s->compatible_features = 0;
2163 /* if lazy refcounts have been used, they have already been fixed through
2164 * clearing the dirty flag */
2165
2166 /* clearing autoclear features is trivial */
2167 s->autoclear_features = 0;
2168
2169 ret = qcow2_expand_zero_clusters(bs);
2170 if (ret < 0) {
2171 return ret;
2172 }
2173
2174 s->qcow_version = target_version;
2175 ret = qcow2_update_header(bs);
2176 if (ret < 0) {
2177 s->qcow_version = current_version;
2178 return ret;
2179 }
2180 return 0;
2181}
2182
2183static int qcow2_amend_options(BlockDriverState *bs,
2184 QEMUOptionParameter *options)
2185{
2186 BDRVQcowState *s = bs->opaque;
2187 int old_version = s->qcow_version, new_version = old_version;
2188 uint64_t new_size = 0;
2189 const char *backing_file = NULL, *backing_format = NULL;
2190 bool lazy_refcounts = s->use_lazy_refcounts;
2191 int ret;
2192 int i;
2193
2194 for (i = 0; options[i].name; i++)
2195 {
2196 if (!options[i].assigned) {
2197 /* only change explicitly defined options */
2198 continue;
2199 }
2200
2201 if (!strcmp(options[i].name, "compat")) {
2202 if (!options[i].value.s) {
2203 /* preserve default */
2204 } else if (!strcmp(options[i].value.s, "0.10")) {
2205 new_version = 2;
2206 } else if (!strcmp(options[i].value.s, "1.1")) {
2207 new_version = 3;
2208 } else {
2209 fprintf(stderr, "Unknown compatibility level %s.\n",
2210 options[i].value.s);
2211 return -EINVAL;
2212 }
2213 } else if (!strcmp(options[i].name, "preallocation")) {
2214 fprintf(stderr, "Cannot change preallocation mode.\n");
2215 return -ENOTSUP;
2216 } else if (!strcmp(options[i].name, "size")) {
2217 new_size = options[i].value.n;
2218 } else if (!strcmp(options[i].name, "backing_file")) {
2219 backing_file = options[i].value.s;
2220 } else if (!strcmp(options[i].name, "backing_fmt")) {
2221 backing_format = options[i].value.s;
2222 } else if (!strcmp(options[i].name, "encryption")) {
2223 if ((options[i].value.n != !!s->crypt_method)) {
2224 fprintf(stderr, "Changing the encryption flag is not "
2225 "supported.\n");
2226 return -ENOTSUP;
2227 }
2228 } else if (!strcmp(options[i].name, "cluster_size")) {
2229 if (options[i].value.n != s->cluster_size) {
2230 fprintf(stderr, "Changing the cluster size is not "
2231 "supported.\n");
2232 return -ENOTSUP;
2233 }
2234 } else if (!strcmp(options[i].name, "lazy_refcounts")) {
2235 lazy_refcounts = options[i].value.n;
2236 } else {
2237 /* if this assertion fails, this probably means a new option was
2238 * added without having it covered here */
2239 assert(false);
2240 }
2241 }
2242
2243 if (new_version != old_version) {
2244 if (new_version > old_version) {
2245 /* Upgrade */
2246 s->qcow_version = new_version;
2247 ret = qcow2_update_header(bs);
2248 if (ret < 0) {
2249 s->qcow_version = old_version;
2250 return ret;
2251 }
2252 } else {
2253 ret = qcow2_downgrade(bs, new_version);
2254 if (ret < 0) {
2255 return ret;
2256 }
2257 }
2258 }
2259
2260 if (backing_file || backing_format) {
2261 ret = qcow2_change_backing_file(bs, backing_file ?: bs->backing_file,
2262 backing_format ?: bs->backing_format);
2263 if (ret < 0) {
2264 return ret;
2265 }
2266 }
2267
2268 if (s->use_lazy_refcounts != lazy_refcounts) {
2269 if (lazy_refcounts) {
2270 if (s->qcow_version < 3) {
2271 fprintf(stderr, "Lazy refcounts only supported with compatibility "
2272 "level 1.1 and above (use compat=1.1 or greater)\n");
2273 return -EINVAL;
2274 }
2275 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
2276 ret = qcow2_update_header(bs);
2277 if (ret < 0) {
2278 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
2279 return ret;
2280 }
2281 s->use_lazy_refcounts = true;
2282 } else {
2283 /* make image clean first */
2284 ret = qcow2_mark_clean(bs);
2285 if (ret < 0) {
2286 return ret;
2287 }
2288 /* now disallow lazy refcounts */
2289 s->compatible_features &= ~QCOW2_COMPAT_LAZY_REFCOUNTS;
2290 ret = qcow2_update_header(bs);
2291 if (ret < 0) {
2292 s->compatible_features |= QCOW2_COMPAT_LAZY_REFCOUNTS;
2293 return ret;
2294 }
2295 s->use_lazy_refcounts = false;
2296 }
2297 }
2298
2299 if (new_size) {
2300 ret = bdrv_truncate(bs, new_size);
2301 if (ret < 0) {
2302 return ret;
2303 }
2304 }
2305
2306 return 0;
2307}
2308
7c80ab3f 2309static QEMUOptionParameter qcow2_create_options[] = {
20d97356
BS
2310 {
2311 .name = BLOCK_OPT_SIZE,
2312 .type = OPT_SIZE,
2313 .help = "Virtual disk size"
2314 },
6744cbab
KW
2315 {
2316 .name = BLOCK_OPT_COMPAT_LEVEL,
2317 .type = OPT_STRING,
2318 .help = "Compatibility level (0.10 or 1.1)"
2319 },
20d97356
BS
2320 {
2321 .name = BLOCK_OPT_BACKING_FILE,
2322 .type = OPT_STRING,
2323 .help = "File name of a base image"
2324 },
2325 {
2326 .name = BLOCK_OPT_BACKING_FMT,
2327 .type = OPT_STRING,
2328 .help = "Image format of the base image"
2329 },
2330 {
2331 .name = BLOCK_OPT_ENCRYPT,
2332 .type = OPT_FLAG,
2333 .help = "Encrypt the image"
2334 },
2335 {
2336 .name = BLOCK_OPT_CLUSTER_SIZE,
2337 .type = OPT_SIZE,
99cce9fa
KW
2338 .help = "qcow2 cluster size",
2339 .value = { .n = DEFAULT_CLUSTER_SIZE },
20d97356
BS
2340 },
2341 {
2342 .name = BLOCK_OPT_PREALLOC,
2343 .type = OPT_STRING,
2344 .help = "Preallocation mode (allowed values: off, metadata)"
2345 },
bfe8043e
SH
2346 {
2347 .name = BLOCK_OPT_LAZY_REFCOUNTS,
2348 .type = OPT_FLAG,
2349 .help = "Postpone refcount updates",
2350 },
20d97356
BS
2351 { NULL }
2352};
2353
2354static BlockDriver bdrv_qcow2 = {
7c80ab3f
JS
2355 .format_name = "qcow2",
2356 .instance_size = sizeof(BDRVQcowState),
2357 .bdrv_probe = qcow2_probe,
2358 .bdrv_open = qcow2_open,
2359 .bdrv_close = qcow2_close,
21d82ac9 2360 .bdrv_reopen_prepare = qcow2_reopen_prepare,
7c80ab3f 2361 .bdrv_create = qcow2_create,
3ac21627 2362 .bdrv_has_zero_init = bdrv_has_zero_init_1,
b6b8a333 2363 .bdrv_co_get_block_status = qcow2_co_get_block_status,
7c80ab3f 2364 .bdrv_set_key = qcow2_set_key,
7c80ab3f 2365
c68b89ac
KW
2366 .bdrv_co_readv = qcow2_co_readv,
2367 .bdrv_co_writev = qcow2_co_writev,
eb489bb1 2368 .bdrv_co_flush_to_os = qcow2_co_flush_to_os,
419b19d9 2369
621f0589 2370 .bdrv_co_write_zeroes = qcow2_co_write_zeroes,
6db39ae2 2371 .bdrv_co_discard = qcow2_co_discard,
419b19d9 2372 .bdrv_truncate = qcow2_truncate,
7c80ab3f 2373 .bdrv_write_compressed = qcow2_write_compressed,
20d97356
BS
2374
2375 .bdrv_snapshot_create = qcow2_snapshot_create,
2376 .bdrv_snapshot_goto = qcow2_snapshot_goto,
2377 .bdrv_snapshot_delete = qcow2_snapshot_delete,
2378 .bdrv_snapshot_list = qcow2_snapshot_list,
51ef6727 2379 .bdrv_snapshot_load_tmp = qcow2_snapshot_load_tmp,
7c80ab3f 2380 .bdrv_get_info = qcow2_get_info,
37764dfb 2381 .bdrv_get_specific_info = qcow2_get_specific_info,
20d97356 2382
7c80ab3f
JS
2383 .bdrv_save_vmstate = qcow2_save_vmstate,
2384 .bdrv_load_vmstate = qcow2_load_vmstate,
20d97356
BS
2385
2386 .bdrv_change_backing_file = qcow2_change_backing_file,
2387
d34682cd 2388 .bdrv_refresh_limits = qcow2_refresh_limits,
06d9260f
AL
2389 .bdrv_invalidate_cache = qcow2_invalidate_cache,
2390
7c80ab3f
JS
2391 .create_options = qcow2_create_options,
2392 .bdrv_check = qcow2_check,
9296b3ed 2393 .bdrv_amend_options = qcow2_amend_options,
20d97356
BS
2394};
2395
5efa9d5a
AL
2396static void bdrv_qcow2_init(void)
2397{
2398 bdrv_register(&bdrv_qcow2);
2399}
2400
2401block_init(bdrv_qcow2_init);