]> git.proxmox.com Git - mirror_qemu.git/blobdiff - block/qcow2.c
block/gluster: improve defense over string to int conversion
[mirror_qemu.git] / block / qcow2.c
index 642802971cc607830860d093eeee99573194f3ce..6d5689a23cf27fef7efaddeee94bd293e3567382 100644 (file)
@@ -36,6 +36,7 @@
 #include "trace.h"
 #include "qemu/option_int.h"
 #include "qemu/cutils.h"
+#include "qemu/bswap.h"
 
 /*
   Differences with QCOW:
@@ -106,7 +107,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
         printf("attempting to read extended header in offset %lu\n", offset);
 #endif
 
-        ret = bdrv_pread(bs->file->bs, offset, &ext, sizeof(ext));
+        ret = bdrv_pread(bs->file, offset, &ext, sizeof(ext));
         if (ret < 0) {
             error_setg_errno(errp, -ret, "qcow2_read_extension: ERROR: "
                              "pread fail from offset %" PRIu64, offset);
@@ -134,7 +135,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                            sizeof(bs->backing_format));
                 return 2;
             }
-            ret = bdrv_pread(bs->file->bs, offset, bs->backing_format, ext.len);
+            ret = bdrv_pread(bs->file, offset, bs->backing_format, ext.len);
             if (ret < 0) {
                 error_setg_errno(errp, -ret, "ERROR: ext_backing_format: "
                                  "Could not read format name");
@@ -150,7 +151,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
         case QCOW2_EXT_MAGIC_FEATURE_TABLE:
             if (p_feature_table != NULL) {
                 void* feature_table = g_malloc0(ext.len + 2 * sizeof(Qcow2Feature));
-                ret = bdrv_pread(bs->file->bs, offset , feature_table, ext.len);
+                ret = bdrv_pread(bs->file, offset , feature_table, ext.len);
                 if (ret < 0) {
                     error_setg_errno(errp, -ret, "ERROR: ext_feature_table: "
                                      "Could not read table");
@@ -171,7 +172,7 @@ static int qcow2_read_extensions(BlockDriverState *bs, uint64_t start_offset,
                 uext->len = ext.len;
                 QLIST_INSERT_HEAD(&s->unknown_header_ext, uext, next);
 
-                ret = bdrv_pread(bs->file->bs, offset , uext->data, uext->len);
+                ret = bdrv_pread(bs->file, offset , uext->data, uext->len);
                 if (ret < 0) {
                     error_setg_errno(errp, -ret, "ERROR: unknown extension: "
                                      "Could not read data");
@@ -248,7 +249,7 @@ int qcow2_mark_dirty(BlockDriverState *bs)
     }
 
     val = cpu_to_be64(s->incompatible_features | QCOW2_INCOMPAT_DIRTY);
-    ret = bdrv_pwrite(bs->file->bs, offsetof(QCowHeader, incompatible_features),
+    ret = bdrv_pwrite(bs->file, offsetof(QCowHeader, incompatible_features),
                       &val, sizeof(val));
     if (ret < 0) {
         return ret;
@@ -816,7 +817,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     uint64_t ext_end;
     uint64_t l1_vm_state_index;
 
-    ret = bdrv_pread(bs->file->bs, 0, &header, sizeof(header));
+    ret = bdrv_pread(bs->file, 0, &header, sizeof(header));
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not read qcow2 header");
         goto fail;
@@ -891,7 +892,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
     if (header.header_length > sizeof(header)) {
         s->unknown_header_fields_size = header.header_length - sizeof(header);
         s->unknown_header_fields = g_malloc(s->unknown_header_fields_size);
-        ret = bdrv_pread(bs->file->bs, sizeof(header), s->unknown_header_fields,
+        ret = bdrv_pread(bs->file, sizeof(header), s->unknown_header_fields,
                          s->unknown_header_fields_size);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read unknown qcow2 header "
@@ -958,14 +959,29 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
         ret = -EINVAL;
         goto fail;
     }
-    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128)) {
+    if (!qcrypto_cipher_supports(QCRYPTO_CIPHER_ALG_AES_128,
+                                 QCRYPTO_CIPHER_MODE_CBC)) {
         error_setg(errp, "AES cipher not available");
         ret = -EINVAL;
         goto fail;
     }
     s->crypt_method_header = header.crypt_method;
     if (s->crypt_method_header) {
-        bs->encrypted = 1;
+        if (bdrv_uses_whitelist() &&
+            s->crypt_method_header == QCOW_CRYPT_AES) {
+            error_setg(errp,
+                       "Use of AES-CBC encrypted qcow2 images is no longer "
+                       "supported in system emulators");
+            error_append_hint(errp,
+                              "You can use 'qemu-img convert' to convert your "
+                              "image to an alternative supported format, such "
+                              "as unencrypted qcow2, or raw with the LUKS "
+                              "format instead.\n");
+            ret = -ENOSYS;
+            goto fail;
+        }
+
+        bs->encrypted = true;
     }
 
     s->l2_bits = s->cluster_bits - 3; /* L2 is always one cluster */
@@ -1051,7 +1067,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
             ret = -ENOMEM;
             goto fail;
         }
-        ret = bdrv_pread(bs->file->bs, s->l1_table_offset, s->l1_table,
+        ret = bdrv_pread(bs->file, s->l1_table_offset, s->l1_table,
                          s->l1_size * sizeof(uint64_t));
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read L1 table");
@@ -1107,7 +1123,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
             ret = -EINVAL;
             goto fail;
         }
-        ret = bdrv_pread(bs->file->bs, header.backing_file_offset,
+        ret = bdrv_pread(bs->file, header.backing_file_offset,
                          bs->backing_file, len);
         if (ret < 0) {
             error_setg_errno(errp, -ret, "Could not read backing file name");
@@ -1139,6 +1155,7 @@ static int qcow2_open(BlockDriverState *bs, QDict *options, int flags,
 
     /* Initialise locks */
     qemu_co_mutex_init(&s->lock);
+    bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
 
     /* Repair image if dirty */
     if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
@@ -1184,7 +1201,11 @@ static void qcow2_refresh_limits(BlockDriverState *bs, Error **errp)
 {
     BDRVQcow2State *s = bs->opaque;
 
-    bs->bl.write_zeroes_alignment = s->cluster_sectors;
+    if (bs->encrypted) {
+        /* Encryption works on a sector granularity */
+        bs->bl.request_alignment = BDRV_SECTOR_SIZE;
+    }
+    bs->bl.pwrite_zeroes_alignment = s->cluster_size;
 }
 
 static int qcow2_set_key(BlockDriverState *bs, const char *key)
@@ -1322,16 +1343,20 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
     BDRVQcow2State *s = bs->opaque;
     uint64_t cluster_offset;
     int index_in_cluster, ret;
+    unsigned int bytes;
     int64_t status = 0;
 
-    *pnum = nb_sectors;
+    bytes = MIN(INT_MAX, nb_sectors * BDRV_SECTOR_SIZE);
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_get_cluster_offset(bs, sector_num << 9, pnum, &cluster_offset);
+    ret = qcow2_get_cluster_offset(bs, sector_num << 9, &bytes,
+                                   &cluster_offset);
     qemu_co_mutex_unlock(&s->lock);
     if (ret < 0) {
         return ret;
     }
 
+    *pnum = bytes >> BDRV_SECTOR_BITS;
+
     if (cluster_offset != 0 && ret != QCOW2_CLUSTER_COMPRESSED &&
         !s->cipher) {
         index_in_cluster = sector_num & (s->cluster_sectors - 1);
@@ -1349,28 +1374,34 @@ static int64_t coroutine_fn qcow2_co_get_block_status(BlockDriverState *bs,
 
 /* handle reading after the end of the backing file */
 int qcow2_backing_read1(BlockDriverState *bs, QEMUIOVector *qiov,
-                  int64_t sector_num, int nb_sectors)
+                        int64_t offset, int bytes)
 {
+    uint64_t bs_size = bs->total_sectors * BDRV_SECTOR_SIZE;
     int n1;
-    if ((sector_num + nb_sectors) <= bs->total_sectors)
-        return nb_sectors;
-    if (sector_num >= bs->total_sectors)
+
+    if ((offset + bytes) <= bs_size) {
+        return bytes;
+    }
+
+    if (offset >= bs_size) {
         n1 = 0;
-    else
-        n1 = bs->total_sectors - sector_num;
+    } else {
+        n1 = bs_size - offset;
+    }
 
-    qemu_iovec_memset(qiov, 512 * n1, 0, 512 * (nb_sectors - n1));
+    qemu_iovec_memset(qiov, n1, 0, bytes - n1);
 
     return n1;
 }
 
-static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
-                          int remaining_sectors, QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
+                                        uint64_t bytes, QEMUIOVector *qiov,
+                                        int flags)
 {
     BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster, n1;
+    int offset_in_cluster, n1;
     int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
+    unsigned int cur_bytes; /* number of bytes in current iteration */
     uint64_t cluster_offset = 0;
     uint64_t bytes_done = 0;
     QEMUIOVector hd_qiov;
@@ -1380,26 +1411,24 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
 
     qemu_co_mutex_lock(&s->lock);
 
-    while (remaining_sectors != 0) {
+    while (bytes != 0) {
 
         /* prepare next request */
-        cur_nr_sectors = remaining_sectors;
+        cur_bytes = MIN(bytes, INT_MAX);
         if (s->cipher) {
-            cur_nr_sectors = MIN(cur_nr_sectors,
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+            cur_bytes = MIN(cur_bytes,
+                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
         }
 
-        ret = qcow2_get_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset);
+        ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
         if (ret < 0) {
             goto fail;
         }
 
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
+        offset_in_cluster = offset_into_cluster(s, offset);
 
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
         switch (ret) {
         case QCOW2_CLUSTER_UNALLOCATED:
@@ -1407,18 +1436,17 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             if (bs->backing) {
                 /* read from the base image */
                 n1 = qcow2_backing_read1(bs->backing->bs, &hd_qiov,
-                    sector_num, cur_nr_sectors);
+                                         offset, cur_bytes);
                 if (n1 > 0) {
                     QEMUIOVector local_qiov;
 
                     qemu_iovec_init(&local_qiov, hd_qiov.niov);
-                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
-                                      n1 * BDRV_SECTOR_SIZE);
+                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0, n1);
 
                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                     qemu_co_mutex_unlock(&s->lock);
-                    ret = bdrv_co_readv(bs->backing->bs, sector_num,
-                                        n1, &local_qiov);
+                    ret = bdrv_co_preadv(bs->backing, offset, n1,
+                                         &local_qiov, 0);
                     qemu_co_mutex_lock(&s->lock);
 
                     qemu_iovec_destroy(&local_qiov);
@@ -1429,12 +1457,12 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 }
             } else {
                 /* Note: in this case, no need to wait */
-                qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+                qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             }
             break;
 
         case QCOW2_CLUSTER_ZERO:
-            qemu_iovec_memset(&hd_qiov, 0, 0, 512 * cur_nr_sectors);
+            qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
             break;
 
         case QCOW2_CLUSTER_COMPRESSED:
@@ -1445,8 +1473,8 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             }
 
             qemu_iovec_from_buf(&hd_qiov, 0,
-                s->cluster_cache + index_in_cluster * 512,
-                512 * cur_nr_sectors);
+                                s->cluster_cache + offset_in_cluster,
+                                cur_bytes);
             break;
 
         case QCOW2_CLUSTER_NORMAL:
@@ -1473,34 +1501,34 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                     }
                 }
 
-                assert(cur_nr_sectors <=
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors);
+                assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
                 qemu_iovec_reset(&hd_qiov);
-                qemu_iovec_add(&hd_qiov, cluster_data,
-                    512 * cur_nr_sectors);
+                qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
             }
 
             BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
             qemu_co_mutex_unlock(&s->lock);
-            ret = bdrv_co_readv(bs->file->bs,
-                                (cluster_offset >> 9) + index_in_cluster,
-                                cur_nr_sectors, &hd_qiov);
+            ret = bdrv_co_preadv(bs->file,
+                                 cluster_offset + offset_in_cluster,
+                                 cur_bytes, &hd_qiov, 0);
             qemu_co_mutex_lock(&s->lock);
             if (ret < 0) {
                 goto fail;
             }
             if (bs->encrypted) {
                 assert(s->cipher);
+                assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
+                assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
                 Error *err = NULL;
-                if (qcow2_encrypt_sectors(s, sector_num,  cluster_data,
-                                          cluster_data, cur_nr_sectors, false,
-                                          &err) < 0) {
+                if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+                                          cluster_data, cluster_data,
+                                          cur_bytes >> BDRV_SECTOR_BITS,
+                                          false, &err) < 0) {
                     error_free(err);
                     ret = -EIO;
                     goto fail;
                 }
-                qemu_iovec_from_buf(qiov, bytes_done,
-                    cluster_data, 512 * cur_nr_sectors);
+                qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
             }
             break;
 
@@ -1510,9 +1538,9 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
             goto fail;
         }
 
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
+        bytes -= cur_bytes;
+        offset += cur_bytes;
+        bytes_done += cur_bytes;
     }
     ret = 0;
 
@@ -1525,23 +1553,21 @@ fail:
     return ret;
 }
 
-static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
-                           int64_t sector_num,
-                           int remaining_sectors,
-                           QEMUIOVector *qiov)
+static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
+                                         uint64_t bytes, QEMUIOVector *qiov,
+                                         int flags)
 {
     BDRVQcow2State *s = bs->opaque;
-    int index_in_cluster;
+    int offset_in_cluster;
     int ret;
-    int cur_nr_sectors; /* number of sectors in current iteration */
+    unsigned int cur_bytes; /* number of sectors in current iteration */
     uint64_t cluster_offset;
     QEMUIOVector hd_qiov;
     uint64_t bytes_done = 0;
     uint8_t *cluster_data = NULL;
     QCowL2Meta *l2meta = NULL;
 
-    trace_qcow2_writev_start_req(qemu_coroutine_self(), sector_num,
-                                 remaining_sectors);
+    trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
 
     qemu_iovec_init(&hd_qiov, qiov->niov);
 
@@ -1549,22 +1575,21 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
 
     qemu_co_mutex_lock(&s->lock);
 
-    while (remaining_sectors != 0) {
+    while (bytes != 0) {
 
         l2meta = NULL;
 
         trace_qcow2_writev_start_part(qemu_coroutine_self());
-        index_in_cluster = sector_num & (s->cluster_sectors - 1);
-        cur_nr_sectors = remaining_sectors;
-        if (bs->encrypted &&
-            cur_nr_sectors >
-            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster) {
-            cur_nr_sectors =
-                QCOW_MAX_CRYPT_CLUSTERS * s->cluster_sectors - index_in_cluster;
+        offset_in_cluster = offset_into_cluster(s, offset);
+        cur_bytes = MIN(bytes, INT_MAX);
+        if (bs->encrypted) {
+            cur_bytes = MIN(cur_bytes,
+                            QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size
+                            - offset_in_cluster);
         }
 
-        ret = qcow2_alloc_cluster_offset(bs, sector_num << 9,
-            &cur_nr_sectors, &cluster_offset, &l2meta);
+        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
+                                         &cluster_offset, &l2meta);
         if (ret < 0) {
             goto fail;
         }
@@ -1572,8 +1597,7 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         assert((cluster_offset & 511) == 0);
 
         qemu_iovec_reset(&hd_qiov);
-        qemu_iovec_concat(&hd_qiov, qiov, bytes_done,
-            cur_nr_sectors * 512);
+        qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
 
         if (bs->encrypted) {
             Error *err = NULL;
@@ -1592,8 +1616,9 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
             qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
 
-            if (qcow2_encrypt_sectors(s, sector_num, cluster_data,
-                                      cluster_data, cur_nr_sectors,
+            if (qcow2_encrypt_sectors(s, offset >> BDRV_SECTOR_BITS,
+                                      cluster_data, cluster_data,
+                                      cur_bytes >>BDRV_SECTOR_BITS,
                                       true, &err) < 0) {
                 error_free(err);
                 ret = -EIO;
@@ -1601,13 +1626,11 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             }
 
             qemu_iovec_reset(&hd_qiov);
-            qemu_iovec_add(&hd_qiov, cluster_data,
-                cur_nr_sectors * 512);
+            qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
         }
 
         ret = qcow2_pre_write_overlap_check(bs, 0,
-                cluster_offset + index_in_cluster * BDRV_SECTOR_SIZE,
-                cur_nr_sectors * BDRV_SECTOR_SIZE);
+                cluster_offset + offset_in_cluster, cur_bytes);
         if (ret < 0) {
             goto fail;
         }
@@ -1615,10 +1638,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
         qemu_co_mutex_unlock(&s->lock);
         BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
         trace_qcow2_writev_data(qemu_coroutine_self(),
-                                (cluster_offset >> 9) + index_in_cluster);
-        ret = bdrv_co_writev(bs->file->bs,
-                             (cluster_offset >> 9) + index_in_cluster,
-                             cur_nr_sectors, &hd_qiov);
+                                cluster_offset + offset_in_cluster);
+        ret = bdrv_co_pwritev(bs->file,
+                              cluster_offset + offset_in_cluster,
+                              cur_bytes, &hd_qiov, 0);
         qemu_co_mutex_lock(&s->lock);
         if (ret < 0) {
             goto fail;
@@ -1644,10 +1667,10 @@ static coroutine_fn int qcow2_co_writev(BlockDriverState *bs,
             l2meta = next;
         }
 
-        remaining_sectors -= cur_nr_sectors;
-        sector_num += cur_nr_sectors;
-        bytes_done += cur_nr_sectors * 512;
-        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_nr_sectors);
+        bytes -= cur_bytes;
+        offset += cur_bytes;
+        bytes_done += cur_bytes;
+        trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
     }
     ret = 0;
 
@@ -1749,13 +1772,6 @@ static void qcow2_invalidate_cache(BlockDriverState *bs, Error **errp)
 
     qcow2_close(bs);
 
-    bdrv_invalidate_cache(bs->file->bs, &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-        bs->drv = NULL;
-        return;
-    }
-
     memset(s, 0, sizeof(BDRVQcow2State));
     options = qdict_clone_shallow(bs->options);
 
@@ -1790,7 +1806,10 @@ static size_t header_ext_add(char *buf, uint32_t magic, const void *s,
         .magic  = cpu_to_be32(magic),
         .len    = cpu_to_be32(len),
     };
-    memcpy(buf + sizeof(QCowExtension), s, len);
+
+    if (len) {
+        memcpy(buf + sizeof(QCowExtension), s, len);
+    }
 
     return ext_len;
 }
@@ -1962,7 +1981,7 @@ int qcow2_update_header(BlockDriverState *bs)
     }
 
     /* Write the new header */
-    ret = bdrv_pwrite(bs->file->bs, 0, header, s->cluster_size);
+    ret = bdrv_pwrite(bs->file, 0, header, s->cluster_size);
     if (ret < 0) {
         goto fail;
     }
@@ -1978,6 +1997,10 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
 {
     BDRVQcow2State *s = bs->opaque;
 
+    if (backing_file && strlen(backing_file) > 1023) {
+        return -EINVAL;
+    }
+
     pstrcpy(bs->backing_file, sizeof(bs->backing_file), backing_file ?: "");
     pstrcpy(bs->backing_format, sizeof(bs->backing_format), backing_fmt ?: "");
 
@@ -1992,19 +2015,19 @@ static int qcow2_change_backing_file(BlockDriverState *bs,
 
 static int preallocate(BlockDriverState *bs)
 {
-    uint64_t nb_sectors;
+    uint64_t bytes;
     uint64_t offset;
     uint64_t host_offset = 0;
-    int num;
+    unsigned int cur_bytes;
     int ret;
     QCowL2Meta *meta;
 
-    nb_sectors = bdrv_nb_sectors(bs);
+    bytes = bdrv_getlength(bs);
     offset = 0;
 
-    while (nb_sectors) {
-        num = MIN(nb_sectors, INT_MAX >> BDRV_SECTOR_BITS);
-        ret = qcow2_alloc_cluster_offset(bs, offset, &num,
+    while (bytes) {
+        cur_bytes = MIN(bytes, INT_MAX);
+        ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
                                          &host_offset, &meta);
         if (ret < 0) {
             return ret;
@@ -2030,8 +2053,8 @@ static int preallocate(BlockDriverState *bs)
 
         /* TODO Preallocate data if requested */
 
-        nb_sectors -= num;
-        offset += num << BDRV_SECTOR_BITS;
+        bytes -= cur_bytes;
+        offset += cur_bytes;
     }
 
     /*
@@ -2040,11 +2063,9 @@ static int preallocate(BlockDriverState *bs)
      * EOF). Extend the image to the last allocated sector.
      */
     if (host_offset != 0) {
-        uint8_t buf[BDRV_SECTOR_SIZE];
-        memset(buf, 0, BDRV_SECTOR_SIZE);
-        ret = bdrv_write(bs->file->bs,
-                         (host_offset >> BDRV_SECTOR_BITS) + num - 1,
-                         buf, 1);
+        uint8_t data = 0;
+        ret = bdrv_pwrite(bs->file, (host_offset + cur_bytes) - 1,
+                          &data, 1);
         if (ret < 0) {
             return ret;
         }
@@ -2160,8 +2181,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     }
 
     blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_PROTOCOL,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         return -EIO;
@@ -2196,7 +2216,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
             cpu_to_be64(QCOW2_COMPAT_LAZY_REFCOUNTS);
     }
 
-    ret = blk_pwrite(blk, 0, header, cluster_size);
+    ret = blk_pwrite(blk, 0, header, cluster_size, 0);
     g_free(header);
     if (ret < 0) {
         error_setg_errno(errp, -ret, "Could not write qcow2 header");
@@ -2206,7 +2226,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     /* Write a refcount table with one refcount block */
     refcount_table = g_malloc0(2 * cluster_size);
     refcount_table[0] = cpu_to_be64(2 * cluster_size);
-    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size);
+    ret = blk_pwrite(blk, cluster_size, refcount_table, 2 * cluster_size, 0);
     g_free(refcount_table);
 
     if (ret < 0) {
@@ -2225,8 +2245,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow2"));
     blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_NO_FLUSH, &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -2287,8 +2306,7 @@ static int qcow2_create2(const char *filename, int64_t total_size,
     options = qdict_new();
     qdict_put(options, "driver", qstring_from_str("qcow2"));
     blk = blk_new_open(filename, NULL, options,
-                       BDRV_O_RDWR | BDRV_O_CACHE_WB | BDRV_O_NO_BACKING,
-                       &local_err);
+                       BDRV_O_RDWR | BDRV_O_NO_BACKING, &local_err);
     if (blk == NULL) {
         error_propagate(errp, local_err);
         ret = -EIO;
@@ -2391,9 +2409,7 @@ static int qcow2_create(const char *filename, QemuOpts *opts, Error **errp)
     ret = qcow2_create2(filename, size, backing_file, backing_fmt, flags,
                         cluster_size, prealloc, opts, version, refcount_order,
                         &local_err);
-    if (local_err) {
-        error_propagate(errp, local_err);
-    }
+    error_propagate(errp, local_err);
 
 finish:
     g_free(backing_file);
@@ -2402,35 +2418,81 @@ finish:
     return ret;
 }
 
-static coroutine_fn int qcow2_co_write_zeroes(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors, BdrvRequestFlags flags)
+
+static bool is_zero_sectors(BlockDriverState *bs, int64_t start,
+                            uint32_t count)
+{
+    int nr;
+    BlockDriverState *file;
+    int64_t res;
+
+    if (!count) {
+        return true;
+    }
+    res = bdrv_get_block_status_above(bs, NULL, start, count,
+                                      &nr, &file);
+    return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == count;
+}
+
+static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
+    int64_t offset, int count, BdrvRequestFlags flags)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
 
-    /* Emulate misaligned zero writes */
-    if (sector_num % s->cluster_sectors || nb_sectors % s->cluster_sectors) {
-        return -ENOTSUP;
+    uint32_t head = offset % s->cluster_size;
+    uint32_t tail = (offset + count) % s->cluster_size;
+
+    trace_qcow2_pwrite_zeroes_start_req(qemu_coroutine_self(), offset, count);
+
+    if (head || tail) {
+        int64_t cl_start = (offset - head) >> BDRV_SECTOR_BITS;
+        uint64_t off;
+        unsigned int nr;
+
+        assert(head + count <= s->cluster_size);
+
+        /* check whether remainder of cluster already reads as zero */
+        if (!(is_zero_sectors(bs, cl_start,
+                              DIV_ROUND_UP(head, BDRV_SECTOR_SIZE)) &&
+              is_zero_sectors(bs, (offset + count) >> BDRV_SECTOR_BITS,
+                              DIV_ROUND_UP(-tail & (s->cluster_size - 1),
+                                           BDRV_SECTOR_SIZE)))) {
+            return -ENOTSUP;
+        }
+
+        qemu_co_mutex_lock(&s->lock);
+        /* We can have new write after previous check */
+        offset = cl_start << BDRV_SECTOR_BITS;
+        count = s->cluster_size;
+        nr = s->cluster_size;
+        ret = qcow2_get_cluster_offset(bs, offset, &nr, &off);
+        if (ret != QCOW2_CLUSTER_UNALLOCATED && ret != QCOW2_CLUSTER_ZERO) {
+            qemu_co_mutex_unlock(&s->lock);
+            return -ENOTSUP;
+        }
+    } else {
+        qemu_co_mutex_lock(&s->lock);
     }
 
+    trace_qcow2_pwrite_zeroes(qemu_coroutine_self(), offset, count);
+
     /* Whatever is left can use real zero clusters */
-    qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_zero_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-        nb_sectors);
+    ret = qcow2_zero_clusters(bs, offset, count >> BDRV_SECTOR_BITS, flags);
     qemu_co_mutex_unlock(&s->lock);
 
     return ret;
 }
 
-static coroutine_fn int qcow2_co_discard(BlockDriverState *bs,
-    int64_t sector_num, int nb_sectors)
+static coroutine_fn int qcow2_co_pdiscard(BlockDriverState *bs,
+                                          int64_t offset, int count)
 {
     int ret;
     BDRVQcow2State *s = bs->opaque;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_discard_clusters(bs, sector_num << BDRV_SECTOR_BITS,
-        nb_sectors, QCOW2_DISCARD_REQUEST, false);
+    ret = qcow2_discard_clusters(bs, offset, count >> BDRV_SECTOR_BITS,
+                                 QCOW2_DISCARD_REQUEST, false);
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
@@ -2466,7 +2528,7 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
 
     /* write updated header.size */
     offset = cpu_to_be64(offset);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, size),
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, size),
                            &offset, sizeof(uint64_t));
     if (ret < 0) {
         return ret;
@@ -2478,39 +2540,39 @@ static int qcow2_truncate(BlockDriverState *bs, int64_t offset)
 
 /* XXX: put compressed sectors first, then all the cluster aligned
    tables to avoid losing bytes in alignment */
-static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
-                                  const uint8_t *buf, int nb_sectors)
+static coroutine_fn int
+qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
+                            uint64_t bytes, QEMUIOVector *qiov)
 {
     BDRVQcow2State *s = bs->opaque;
+    QEMUIOVector hd_qiov;
+    struct iovec iov;
     z_stream strm;
     int ret, out_len;
-    uint8_t *out_buf;
+    uint8_t *buf, *out_buf;
     uint64_t cluster_offset;
 
-    if (nb_sectors == 0) {
+    if (bytes == 0) {
         /* align end of file to a sector boundary to ease reading with
            sector based I/Os */
         cluster_offset = bdrv_getlength(bs->file->bs);
         return bdrv_truncate(bs->file->bs, cluster_offset);
     }
 
-    if (nb_sectors != s->cluster_sectors) {
-        ret = -EINVAL;
-
-        /* Zero-pad last write if image size is not cluster aligned */
-        if (sector_num + nb_sectors == bs->total_sectors &&
-            nb_sectors < s->cluster_sectors) {
-            uint8_t *pad_buf = qemu_blockalign(bs, s->cluster_size);
-            memset(pad_buf, 0, s->cluster_size);
-            memcpy(pad_buf, buf, nb_sectors * BDRV_SECTOR_SIZE);
-            ret = qcow2_write_compressed(bs, sector_num,
-                                         pad_buf, s->cluster_sectors);
-            qemu_vfree(pad_buf);
+    buf = qemu_blockalign(bs, s->cluster_size);
+    if (bytes != s->cluster_size) {
+        if (bytes > s->cluster_size ||
+            offset + bytes != bs->total_sectors << BDRV_SECTOR_BITS)
+        {
+            qemu_vfree(buf);
+            return -EINVAL;
         }
-        return ret;
+        /* Zero-pad last write if image size is not cluster aligned */
+        memset(buf + bytes, 0, s->cluster_size - bytes);
     }
+    qemu_iovec_to_buf(qiov, 0, buf, bytes);
 
-    out_buf = g_malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
+    out_buf = g_malloc(s->cluster_size);
 
     /* best compression, small window, no zlib header */
     memset(&strm, 0, sizeof(strm));
@@ -2539,33 +2601,44 @@ static int qcow2_write_compressed(BlockDriverState *bs, int64_t sector_num,
 
     if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
         /* could not compress: write normal cluster */
-        ret = bdrv_write(bs, sector_num, buf, s->cluster_sectors);
+        ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
         if (ret < 0) {
             goto fail;
         }
-    } else {
-        cluster_offset = qcow2_alloc_compressed_cluster_offset(bs,
-            sector_num << 9, out_len);
-        if (!cluster_offset) {
-            ret = -EIO;
-            goto fail;
-        }
-        cluster_offset &= s->cluster_offset_mask;
+        goto success;
+    }
 
-        ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
+    qemu_co_mutex_lock(&s->lock);
+    cluster_offset =
+        qcow2_alloc_compressed_cluster_offset(bs, offset, out_len);
+    if (!cluster_offset) {
+        qemu_co_mutex_unlock(&s->lock);
+        ret = -EIO;
+        goto fail;
+    }
+    cluster_offset &= s->cluster_offset_mask;
 
-        BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
-        ret = bdrv_pwrite(bs->file->bs, cluster_offset, out_buf, out_len);
-        if (ret < 0) {
-            goto fail;
-        }
+    ret = qcow2_pre_write_overlap_check(bs, 0, cluster_offset, out_len);
+    qemu_co_mutex_unlock(&s->lock);
+    if (ret < 0) {
+        goto fail;
     }
 
+    iov = (struct iovec) {
+        .iov_base   = out_buf,
+        .iov_len    = out_len,
+    };
+    qemu_iovec_init_external(&hd_qiov, &iov, 1);
+
+    BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
+    ret = bdrv_co_pwritev(bs->file, cluster_offset, out_len, &hd_qiov, 0);
+    if (ret < 0) {
+        goto fail;
+    }
+success:
     ret = 0;
 fail:
+    qemu_vfree(buf);
     g_free(out_buf);
     return ret;
 }
@@ -2607,8 +2680,8 @@ static int make_completely_empty(BlockDriverState *bs)
     /* After this call, neither the in-memory nor the on-disk refcount
      * information accurately describe the actual references */
 
-    ret = bdrv_write_zeroes(bs->file->bs, s->l1_table_offset / BDRV_SECTOR_SIZE,
-                            l1_clusters * s->cluster_sectors, 0);
+    ret = bdrv_pwrite_zeroes(bs->file, s->l1_table_offset,
+                             l1_clusters * s->cluster_size, 0);
     if (ret < 0) {
         goto fail_broken_refcounts;
     }
@@ -2621,9 +2694,8 @@ static int make_completely_empty(BlockDriverState *bs)
      * overwrite parts of the existing refcount and L1 table, which is not
      * an issue because the dirty flag is set, complete data loss is in fact
      * desired and partial data loss is consequently fine as well */
-    ret = bdrv_write_zeroes(bs->file->bs, s->cluster_size / BDRV_SECTOR_SIZE,
-                            (2 + l1_clusters) * s->cluster_size /
-                            BDRV_SECTOR_SIZE, 0);
+    ret = bdrv_pwrite_zeroes(bs->file, s->cluster_size,
+                             (2 + l1_clusters) * s->cluster_size, 0);
     /* This call (even if it failed overall) may have overwritten on-disk
      * refcount structures; in that case, the in-memory refcount information
      * will probably differ from the on-disk information which makes the BDS
@@ -2638,10 +2710,10 @@ static int make_completely_empty(BlockDriverState *bs)
     /* "Create" an empty reftable (one cluster) directly after the image
      * header and an empty L1 table three clusters after the image header;
      * the cluster between those two will be used as the first refblock */
-    cpu_to_be64w(&l1_ofs_rt_ofs_cls.l1_offset, 3 * s->cluster_size);
-    cpu_to_be64w(&l1_ofs_rt_ofs_cls.reftable_offset, s->cluster_size);
-    cpu_to_be32w(&l1_ofs_rt_ofs_cls.reftable_clusters, 1);
-    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_table_offset),
+    l1_ofs_rt_ofs_cls.l1_offset = cpu_to_be64(3 * s->cluster_size);
+    l1_ofs_rt_ofs_cls.reftable_offset = cpu_to_be64(s->cluster_size);
+    l1_ofs_rt_ofs_cls.reftable_clusters = cpu_to_be32(1);
+    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_table_offset),
                            &l1_ofs_rt_ofs_cls, sizeof(l1_ofs_rt_ofs_cls));
     if (ret < 0) {
         goto fail_broken_refcounts;
@@ -2672,7 +2744,7 @@ static int make_completely_empty(BlockDriverState *bs)
 
     /* Enter the first refblock into the reftable */
     rt_entry = cpu_to_be64(2 * s->cluster_size);
-    ret = bdrv_pwrite_sync(bs->file->bs, s->cluster_size,
+    ret = bdrv_pwrite_sync(bs->file, s->cluster_size,
                            &rt_entry, sizeof(rt_entry));
     if (ret < 0) {
         goto fail_broken_refcounts;
@@ -2765,14 +2837,14 @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
     int ret;
 
     qemu_co_mutex_lock(&s->lock);
-    ret = qcow2_cache_flush(bs, s->l2_table_cache);
+    ret = qcow2_cache_write(bs, s->l2_table_cache);
     if (ret < 0) {
         qemu_co_mutex_unlock(&s->lock);
         return ret;
     }
 
     if (qcow2_need_accurate_refcounts(s)) {
-        ret = qcow2_cache_flush(bs, s->refcount_block_cache);
+        ret = qcow2_cache_write(bs, s->refcount_block_cache);
         if (ret < 0) {
             qemu_co_mutex_unlock(&s->lock);
             return ret;
@@ -2852,36 +2924,20 @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
                               int64_t pos)
 {
     BDRVQcow2State *s = bs->opaque;
-    int64_t total_sectors = bs->total_sectors;
-    bool zero_beyond_eof = bs->zero_beyond_eof;
-    int ret;
 
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
-    bs->zero_beyond_eof = false;
-    ret = bdrv_pwritev(bs, qcow2_vm_state_offset(s) + pos, qiov);
-    bs->zero_beyond_eof = zero_beyond_eof;
-
-    /* bdrv_co_do_writev will have increased the total_sectors value to include
-     * the VM state - the VM state is however not an actual part of the block
-     * device, therefore, we need to restore the old value. */
-    bs->total_sectors = total_sectors;
-
-    return ret;
+    return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
+                                    qiov->size, qiov, 0);
 }
 
-static int qcow2_load_vmstate(BlockDriverState *bs, uint8_t *buf,
-                              int64_t pos, int size)
+static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+                              int64_t pos)
 {
     BDRVQcow2State *s = bs->opaque;
-    bool zero_beyond_eof = bs->zero_beyond_eof;
-    int ret;
 
     BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
-    bs->zero_beyond_eof = false;
-    ret = bdrv_pread(bs, qcow2_vm_state_offset(s) + pos, buf, size);
-    bs->zero_beyond_eof = zero_beyond_eof;
-
-    return ret;
+    return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
+                                   qiov->size, qiov, 0);
 }
 
 /*
@@ -3320,14 +3376,14 @@ BlockDriver bdrv_qcow2 = {
     .bdrv_co_get_block_status = qcow2_co_get_block_status,
     .bdrv_set_key       = qcow2_set_key,
 
-    .bdrv_co_readv          = qcow2_co_readv,
-    .bdrv_co_writev         = qcow2_co_writev,
+    .bdrv_co_preadv         = qcow2_co_preadv,
+    .bdrv_co_pwritev        = qcow2_co_pwritev,
     .bdrv_co_flush_to_os    = qcow2_co_flush_to_os,
 
-    .bdrv_co_write_zeroes   = qcow2_co_write_zeroes,
-    .bdrv_co_discard        = qcow2_co_discard,
+    .bdrv_co_pwrite_zeroes  = qcow2_co_pwrite_zeroes,
+    .bdrv_co_pdiscard       = qcow2_co_pdiscard,
     .bdrv_truncate          = qcow2_truncate,
-    .bdrv_write_compressed  = qcow2_write_compressed,
+    .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
     .bdrv_make_empty        = qcow2_make_empty,
 
     .bdrv_snapshot_create   = qcow2_snapshot_create,