]> git.proxmox.com Git - mirror_qemu.git/blobdiff - block/qcow2-cluster.c
include/qemu/osdep.h: Don't include qapi/error.h
[mirror_qemu.git] / block / qcow2-cluster.c
index f7dd8c0985a6783662d42a38e2015595f298a59e..31ecc1030452be1e536c468ab5ef3e6f12d6be1d 100644 (file)
  * THE SOFTWARE.
  */
 
+#include "qemu/osdep.h"
 #include <zlib.h>
 
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
 #include "block/qcow2.h"
@@ -32,7 +34,7 @@
 int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
                         bool exact_size)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int new_l1_size2, ret, i;
     uint64_t *new_l1_table;
     int64_t old_l1_table_offset, old_l1_size;
@@ -72,7 +74,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 #endif
 
     new_l1_size2 = sizeof(uint64_t) * new_l1_size;
-    new_l1_table = qemu_try_blockalign(bs->file,
+    new_l1_table = qemu_try_blockalign(bs->file->bs,
                                        align_offset(new_l1_size2, 512));
     if (new_l1_table == NULL) {
         return -ENOMEM;
@@ -105,7 +107,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
     for(i = 0; i < s->l1_size; i++)
         new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
-    ret = bdrv_pwrite_sync(bs->file, new_l1_table_offset, new_l1_table, new_l1_size2);
+    ret = bdrv_pwrite_sync(bs->file->bs, new_l1_table_offset,
+                           new_l1_table, new_l1_size2);
     if (ret < 0)
         goto fail;
     for(i = 0; i < s->l1_size; i++)
@@ -115,7 +118,8 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_ACTIVATE_TABLE);
     cpu_to_be32w((uint32_t*)data, new_l1_size);
     stq_be_p(data + 4, new_l1_table_offset);
-    ret = bdrv_pwrite_sync(bs->file, offsetof(QCowHeader, l1_size), data,sizeof(data));
+    ret = bdrv_pwrite_sync(bs->file->bs, offsetof(QCowHeader, l1_size),
+                           data, sizeof(data));
     if (ret < 0) {
         goto fail;
     }
@@ -148,7 +152,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
     uint64_t **l2_table)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int ret;
 
     ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset, (void**) l2_table);
@@ -163,13 +167,15 @@ static int l2_load(BlockDriverState *bs, uint64_t l2_offset,
 #define L1_ENTRIES_PER_SECTOR (512 / 8)
 int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
 {
-    BDRVQcowState *s = bs->opaque;
-    uint64_t buf[L1_ENTRIES_PER_SECTOR];
+    BDRVQcow2State *s = bs->opaque;
+    uint64_t buf[L1_ENTRIES_PER_SECTOR] = { 0 };
     int l1_start_index;
     int i, ret;
 
     l1_start_index = l1_index & ~(L1_ENTRIES_PER_SECTOR - 1);
-    for (i = 0; i < L1_ENTRIES_PER_SECTOR; i++) {
+    for (i = 0; i < L1_ENTRIES_PER_SECTOR && l1_start_index + i < s->l1_size;
+         i++)
+    {
         buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
     }
 
@@ -180,8 +186,9 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
-    ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
-        buf, sizeof(buf));
+    ret = bdrv_pwrite_sync(bs->file->bs,
+                           s->l1_table_offset + 8 * l1_start_index,
+                           buf, sizeof(buf));
     if (ret < 0) {
         return ret;
     }
@@ -201,7 +208,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
 
 static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     uint64_t old_l2_offset;
     uint64_t *l2_table = NULL;
     int64_t l2_offset;
@@ -251,17 +258,14 @@ static int l2_allocate(BlockDriverState *bs, int l1_index, uint64_t **table)
 
         memcpy(l2_table, old_table, s->cluster_size);
 
-        ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &old_table);
-        if (ret < 0) {
-            goto fail;
-        }
+        qcow2_cache_put(bs, s->l2_table_cache, (void **) &old_table);
     }
 
     /* write the l2 table to the file */
     BLKDBG_EVENT(bs->file, BLKDBG_L2_ALLOC_WRITE);
 
     trace_qcow2_l2_allocate_write_l2(bs, l1_index);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
     ret = qcow2_cache_flush(bs, s->l2_table_cache);
     if (ret < 0) {
         goto fail;
@@ -299,7 +303,7 @@ fail:
  * as contiguous. (This allows it, for example, to stop at the first compressed
  * cluster which may require a different handling)
  */
-static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
+static int count_contiguous_clusters(int nb_clusters, int cluster_size,
         uint64_t *l2_table, uint64_t stop_flags)
 {
     int i;
@@ -310,7 +314,7 @@ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
     if (!offset)
         return 0;
 
-    assert(qcow2_get_cluster_type(first_entry) != QCOW2_CLUSTER_COMPRESSED);
+    assert(qcow2_get_cluster_type(first_entry) == QCOW2_CLUSTER_NORMAL);
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t l2_entry = be64_to_cpu(l2_table[i]) & mask;
@@ -322,14 +326,16 @@ static int count_contiguous_clusters(uint64_t nb_clusters, int cluster_size,
        return i;
 }
 
-static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_table)
+static int count_contiguous_clusters_by_type(int nb_clusters,
+                                             uint64_t *l2_table,
+                                             int wanted_type)
 {
     int i;
 
     for (i = 0; i < nb_clusters; i++) {
         int type = qcow2_get_cluster_type(be64_to_cpu(l2_table[i]));
 
-        if (type != QCOW2_CLUSTER_UNALLOCATED) {
+        if (type != wanted_type) {
             break;
         }
     }
@@ -340,26 +346,47 @@ static int count_contiguous_free_clusters(uint64_t nb_clusters, uint64_t *l2_tab
 /* The crypt function is compatible with the linux cryptoloop
    algorithm for < 4 GB images. NOTE: out_buf == in_buf is
    supported */
-void qcow2_encrypt_sectors(BDRVQcowState *s, int64_t sector_num,
-                           uint8_t *out_buf, const uint8_t *in_buf,
-                           int nb_sectors, int enc,
-                           const AES_KEY *key)
+int qcow2_encrypt_sectors(BDRVQcow2State *s, int64_t sector_num,
+                          uint8_t *out_buf, const uint8_t *in_buf,
+                          int nb_sectors, bool enc,
+                          Error **errp)
 {
     union {
         uint64_t ll[2];
         uint8_t b[16];
     } ivec;
     int i;
+    int ret;
 
     for(i = 0; i < nb_sectors; i++) {
         ivec.ll[0] = cpu_to_le64(sector_num);
         ivec.ll[1] = 0;
-        AES_cbc_encrypt(in_buf, out_buf, 512, key,
-                        ivec.b, enc);
+        if (qcrypto_cipher_setiv(s->cipher,
+                                 ivec.b, G_N_ELEMENTS(ivec.b),
+                                 errp) < 0) {
+            return -1;
+        }
+        if (enc) {
+            ret = qcrypto_cipher_encrypt(s->cipher,
+                                         in_buf,
+                                         out_buf,
+                                         512,
+                                         errp);
+        } else {
+            ret = qcrypto_cipher_decrypt(s->cipher,
+                                         in_buf,
+                                         out_buf,
+                                         512,
+                                         errp);
+        }
+        if (ret < 0) {
+            return -1;
+        }
         sector_num++;
         in_buf += 512;
         out_buf += 512;
     }
+    return 0;
 }
 
 static int coroutine_fn copy_sectors(BlockDriverState *bs,
@@ -367,7 +394,7 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
                                      uint64_t cluster_offset,
                                      int n_start, int n_end)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     QEMUIOVector qiov;
     struct iovec iov;
     int n, ret;
@@ -401,10 +428,16 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
         goto out;
     }
 
-    if (s->crypt_method) {
-        qcow2_encrypt_sectors(s, start_sect + n_start,
-                        iov.iov_base, iov.iov_base, n, 1,
-                        &s->aes_encrypt_key);
+    if (bs->encrypted) {
+        Error *err = NULL;
+        assert(s->cipher);
+        if (qcow2_encrypt_sectors(s, start_sect + n_start,
+                                  iov.iov_base, iov.iov_base, n,
+                                  true, &err) < 0) {
+            ret = -EIO;
+            error_free(err);
+            goto out;
+        }
     }
 
     ret = qcow2_pre_write_overlap_check(bs, 0,
@@ -414,7 +447,8 @@ static int coroutine_fn copy_sectors(BlockDriverState *bs,
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
+    ret = bdrv_co_writev(bs->file->bs, (cluster_offset >> 9) + n_start, n,
+                         &qiov);
     if (ret < 0) {
         goto out;
     }
@@ -443,7 +477,7 @@ out:
 int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int *num, uint64_t *cluster_offset)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
     uint64_t l1_index, l2_offset, *l2_table;
     int l1_bits, c;
@@ -469,10 +503,11 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     if (nb_needed > nb_available) {
         nb_needed = nb_available;
     }
+    assert(nb_needed <= INT_MAX);
 
     *cluster_offset = 0;
 
-    /* seek the the l2 offset in the l1 table */
+    /* seek to the l2 offset in the l1 table */
 
     l1_index = offset >> l1_bits;
     if (l1_index >= s->l1_size) {
@@ -504,6 +539,8 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
 
     l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
     *cluster_offset = be64_to_cpu(l2_table[l2_index]);
+
+    /* nb_needed <= INT_MAX, thus nb_clusters <= INT_MAX, too */
     nb_clusters = size_to_clusters(s, nb_needed << 9);
 
     ret = qcow2_get_cluster_type(*cluster_offset);
@@ -521,13 +558,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
             ret = -EIO;
             goto fail;
         }
-        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
-                &l2_table[l2_index], QCOW_OFLAG_ZERO);
+        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+                                              QCOW2_CLUSTER_ZERO);
         *cluster_offset = 0;
         break;
     case QCOW2_CLUSTER_UNALLOCATED:
         /* how many empty clusters ? */
-        c = count_contiguous_free_clusters(nb_clusters, &l2_table[l2_index]);
+        c = count_contiguous_clusters_by_type(nb_clusters, &l2_table[l2_index],
+                                              QCOW2_CLUSTER_UNALLOCATED);
         *cluster_offset = 0;
         break;
     case QCOW2_CLUSTER_NORMAL:
@@ -580,13 +618,13 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
                              uint64_t **new_l2_table,
                              int *new_l2_index)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     unsigned int l2_index;
     uint64_t l1_index, l2_offset;
     uint64_t *l2_table = NULL;
     int ret;
 
-    /* seek the the l2 offset in the l1 table */
+    /* seek to the l2 offset in the l1 table */
 
     l1_index = offset >> (s->l2_bits + s->cluster_bits);
     if (l1_index >= s->l1_size) {
@@ -654,7 +692,7 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
                                                uint64_t offset,
                                                int compressed_size)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int l2_index, ret;
     uint64_t *l2_table;
     int64_t cluster_offset;
@@ -690,19 +728,16 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     /* compressed clusters never have the copied flag */
 
     BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
     l2_table[l2_index] = cpu_to_be64(cluster_offset);
-    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-    if (ret < 0) {
-        return 0;
-    }
+    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
     return cluster_offset;
 }
 
 static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int ret;
 
     if (r->nb_sectors == 0) {
@@ -731,7 +766,7 @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m, Qcow2COWRegion *r)
 
 int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int i, j = 0, l2_index, ret;
     uint64_t *old_cluster, *l2_table;
     uint64_t cluster_offset = m->alloc_offset;
@@ -769,7 +804,7 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
     if (ret < 0) {
         goto err;
     }
-    qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+    qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
 
     assert(l2_index + m->nb_clusters <= s->l2_size);
     for (i = 0; i < m->nb_clusters; i++) {
@@ -787,14 +822,10 @@ int qcow2_alloc_cluster_link_l2(BlockDriverState *bs, QCowL2Meta *m)
      }
 
 
-    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-    if (ret < 0) {
-        goto err;
-    }
+    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
     /*
      * If this was a COW, we need to decrease the refcount of the old cluster.
-     * Also flush bs->file to get the right order for L2 and refcount update.
      *
      * Don't discard clusters that reach a refcount of 0 (e.g. compressed
      * clusters), the next write will reuse them anyway.
@@ -817,7 +848,7 @@ err:
  * write, but require COW to be performed (this includes yet unallocated space,
  * which must copy from the backing file)
  */
-static int count_cow_clusters(BDRVQcowState *s, int nb_clusters,
+static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
     uint64_t *l2_table, int l2_index)
 {
     int i;
@@ -863,7 +894,7 @@ out:
 static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
     uint64_t *cur_bytes, QCowL2Meta **m)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     QCowL2Meta *old_alloc;
     uint64_t bytes = *cur_bytes;
 
@@ -936,13 +967,13 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
 static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
     uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int l2_index;
     uint64_t cluster_offset;
     uint64_t *l2_table;
-    unsigned int nb_clusters;
+    uint64_t nb_clusters;
     unsigned int keep_clusters;
-    int ret, pret;
+    int ret;
 
     trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
                               *bytes);
@@ -959,6 +990,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
 
     l2_index = offset_to_l2_index(s, guest_offset);
     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    assert(nb_clusters <= INT_MAX);
 
     /* Find L2 entry for the first involved cluster */
     ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
@@ -1009,10 +1041,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
 
     /* Cleanup */
 out:
-    pret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-    if (pret < 0) {
-        return pret;
-    }
+    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
     /* Only return a host offset if we actually made progress. Otherwise we
      * would make requirements for handle_alloc() that it can't fulfill */
@@ -1044,9 +1073,9 @@ out:
  * restarted, but the whole request should not be failed.
  */
 static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
-    uint64_t *host_offset, unsigned int *nb_clusters)
+                                   uint64_t *host_offset, uint64_t *nb_clusters)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
 
     trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
                                          *host_offset, *nb_clusters);
@@ -1062,7 +1091,7 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
         *host_offset = cluster_offset;
         return 0;
     } else {
-        int ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
+        int64_t ret = qcow2_alloc_clusters_at(bs, *host_offset, *nb_clusters);
         if (ret < 0) {
             return ret;
         }
@@ -1094,11 +1123,11 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
 static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     uint64_t *host_offset, uint64_t *bytes, QCowL2Meta **m)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int l2_index;
     uint64_t *l2_table;
     uint64_t entry;
-    unsigned int nb_clusters;
+    uint64_t nb_clusters;
     int ret;
 
     uint64_t alloc_cluster_offset;
@@ -1116,6 +1145,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
 
     l2_index = offset_to_l2_index(s, guest_offset);
     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    assert(nb_clusters <= INT_MAX);
 
     /* Find L2 entry for the first involved cluster */
     ret = get_cluster_table(bs, guest_offset, &l2_table, &l2_index);
@@ -1137,10 +1167,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
      * wrong with our code. */
     assert(nb_clusters > 0);
 
-    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-    if (ret < 0) {
-        return ret;
-    }
+    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
     /* Allocate, if necessary at a given offset in the image file */
     alloc_cluster_offset = start_of_cluster(s, *host_offset);
@@ -1249,7 +1276,7 @@ fail:
 int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
     int *num, uint64_t *host_offset, QCowL2Meta **m)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     uint64_t start, remaining;
     uint64_t cluster_offset;
     uint64_t cur_bytes;
@@ -1261,7 +1288,7 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
 
 again:
     start = offset;
-    remaining = *num << BDRV_SECTOR_BITS;
+    remaining = (uint64_t)*num << BDRV_SECTOR_BITS;
     cluster_offset = 0;
     *host_offset = 0;
     cur_bytes = 0;
@@ -1383,7 +1410,7 @@ static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
 
 int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     int ret, csize, nb_csectors, sector_offset;
     uint64_t coffset;
 
@@ -1393,7 +1420,8 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
         sector_offset = coffset & 511;
         csize = nb_csectors * 512 - sector_offset;
         BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data, nb_csectors);
+        ret = bdrv_read(bs->file->bs, coffset >> 9, s->cluster_data,
+                        nb_csectors);
         if (ret < 0) {
             return ret;
         }
@@ -1412,9 +1440,10 @@ int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
  * clusters.
  */
 static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
-    unsigned int nb_clusters, enum qcow2_discard_type type)
+                             uint64_t nb_clusters, enum qcow2_discard_type type,
+                             bool full_discard)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     uint64_t *l2_table;
     int l2_index;
     int ret;
@@ -1427,6 +1456,7 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
 
     /* Limit nb_clusters to one L2 table */
     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    assert(nb_clusters <= INT_MAX);
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t old_l2_entry;
@@ -1434,23 +1464,30 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         old_l2_entry = be64_to_cpu(l2_table[l2_index + i]);
 
         /*
-         * Make sure that a discarded area reads back as zeroes for v3 images
-         * (we cannot do it for v2 without actually writing a zero-filled
-         * buffer). We can skip the operation if the cluster is already marked
-         * as zero, or if it's unallocated and we don't have a backing file.
+         * If full_discard is false, make sure that a discarded area reads back
+         * as zeroes for v3 images (we cannot do it for v2 without actually
+         * writing a zero-filled buffer). We can skip the operation if the
+         * cluster is already marked as zero, or if it's unallocated and we
+         * don't have a backing file.
          *
          * TODO We might want to use bdrv_get_block_status(bs) here, but we're
          * holding s->lock, so that doesn't work today.
+         *
+         * If full_discard is true, the sector should not read back as zeroes,
+         * but rather fall through to the backing file.
          */
         switch (qcow2_get_cluster_type(old_l2_entry)) {
             case QCOW2_CLUSTER_UNALLOCATED:
-                if (!bs->backing_hd) {
+                if (full_discard || !bs->backing) {
                     continue;
                 }
                 break;
 
             case QCOW2_CLUSTER_ZERO:
-                continue;
+                if (!full_discard) {
+                    continue;
+                }
+                break;
 
             case QCOW2_CLUSTER_NORMAL:
             case QCOW2_CLUSTER_COMPRESSED:
@@ -1461,8 +1498,8 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         }
 
         /* First remove L2 entries */
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
-        if (s->qcow_version >= 3) {
+        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
+        if (!full_discard && s->qcow_version >= 3) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
         } else {
             l2_table[l2_index + i] = cpu_to_be64(0);
@@ -1472,20 +1509,17 @@ static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
         qcow2_free_any_clusters(bs, old_l2_entry, 1, type);
     }
 
-    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-    if (ret < 0) {
-        return ret;
-    }
+    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
     return nb_clusters;
 }
 
 int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
-    int nb_sectors, enum qcow2_discard_type type)
+    int nb_sectors, enum qcow2_discard_type type, bool full_discard)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     uint64_t end_offset;
-    unsigned int nb_clusters;
+    uint64_t nb_clusters;
     int ret;
 
     end_offset = offset + (nb_sectors << BDRV_SECTOR_BITS);
@@ -1504,7 +1538,7 @@ int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
 
     /* Each L2 table is handled by its own loop iteration */
     while (nb_clusters > 0) {
-        ret = discard_single_l2(bs, offset, nb_clusters, type);
+        ret = discard_single_l2(bs, offset, nb_clusters, type, full_discard);
         if (ret < 0) {
             goto fail;
         }
@@ -1527,9 +1561,9 @@ fail:
  * clusters.
  */
 static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
-    unsigned int nb_clusters)
+                          uint64_t nb_clusters)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     uint64_t *l2_table;
     int l2_index;
     int ret;
@@ -1542,6 +1576,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
 
     /* Limit nb_clusters to one L2 table */
     nb_clusters = MIN(nb_clusters, s->l2_size - l2_index);
+    assert(nb_clusters <= INT_MAX);
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t old_offset;
@@ -1549,7 +1584,7 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
         old_offset = be64_to_cpu(l2_table[l2_index + i]);
 
         /* Update L2 entries */
-        qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+        qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
         if (old_offset & QCOW_OFLAG_COMPRESSED) {
             l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
             qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
@@ -1558,18 +1593,15 @@ static int zero_single_l2(BlockDriverState *bs, uint64_t offset,
         }
     }
 
-    ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
-    if (ret < 0) {
-        return ret;
-    }
+    qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
 
     return nb_clusters;
 }
 
 int qcow2_zero_clusters(BlockDriverState *bs, uint64_t offset, int nb_sectors)
 {
-    BDRVQcowState *s = bs->opaque;
-    unsigned int nb_clusters;
+    BDRVQcow2State *s = bs->opaque;
+    uint64_t nb_clusters;
     int ret;
 
     /* The zero flag is only supported by version 3 and newer */
@@ -1604,17 +1636,17 @@ fail:
  * Expands all zero clusters in a specific L1 table (or deallocates them, for
  * non-backed non-pre-allocated zero clusters).
  *
- * expanded_clusters is a bitmap where every bit corresponds to one cluster in
- * the image file; a bit gets set if the corresponding cluster has been used for
- * zero expansion (i.e., has been filled with zeroes and is referenced from an
- * L2 table). nb_clusters contains the total cluster count of the image file,
- * i.e., the number of bits in expanded_clusters.
+ * l1_entries and *visited_l1_entries are used to keep track of progress for
+ * status_cb(). l1_entries contains the total number of L1 entries and
+ * *visited_l1_entries counts all visited L1 entries.
  */
 static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
-                                      int l1_size, uint8_t **expanded_clusters,
-                                      uint64_t *nb_clusters)
+                                      int l1_size, int64_t *visited_l1_entries,
+                                      int64_t l1_entries,
+                                      BlockDriverAmendStatusCB *status_cb,
+                                      void *cb_opaque)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     bool is_active_l1 = (l1_table == s->l1_table);
     uint64_t *l2_table = NULL;
     int ret;
@@ -1623,7 +1655,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
     if (!is_active_l1) {
         /* inactive L2 tables require a buffer to be stored in when loading
          * them from disk */
-        l2_table = qemu_try_blockalign(bs->file, s->cluster_size);
+        l2_table = qemu_try_blockalign(bs->file->bs, s->cluster_size);
         if (l2_table == NULL) {
             return -ENOMEM;
         }
@@ -1632,57 +1664,56 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
     for (i = 0; i < l1_size; i++) {
         uint64_t l2_offset = l1_table[i] & L1E_OFFSET_MASK;
         bool l2_dirty = false;
+        uint64_t l2_refcount;
 
         if (!l2_offset) {
             /* unallocated */
+            (*visited_l1_entries)++;
+            if (status_cb) {
+                status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
+            }
             continue;
         }
 
+        if (offset_into_cluster(s, l2_offset)) {
+            qcow2_signal_corruption(bs, true, -1, -1, "L2 table offset %#"
+                                    PRIx64 " unaligned (L1 index: %#x)",
+                                    l2_offset, i);
+            ret = -EIO;
+            goto fail;
+        }
+
         if (is_active_l1) {
             /* get active L2 tables from cache */
             ret = qcow2_cache_get(bs, s->l2_table_cache, l2_offset,
                     (void **)&l2_table);
         } else {
             /* load inactive L2 tables from disk */
-            ret = bdrv_read(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                    (void *)l2_table, s->cluster_sectors);
+            ret = bdrv_read(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+                            (void *)l2_table, s->cluster_sectors);
         }
         if (ret < 0) {
             goto fail;
         }
 
+        ret = qcow2_get_refcount(bs, l2_offset >> s->cluster_bits,
+                                 &l2_refcount);
+        if (ret < 0) {
+            goto fail;
+        }
+
         for (j = 0; j < s->l2_size; j++) {
             uint64_t l2_entry = be64_to_cpu(l2_table[j]);
-            int64_t offset = l2_entry & L2E_OFFSET_MASK, cluster_index;
+            int64_t offset = l2_entry & L2E_OFFSET_MASK;
             int cluster_type = qcow2_get_cluster_type(l2_entry);
             bool preallocated = offset != 0;
 
-            if (cluster_type == QCOW2_CLUSTER_NORMAL) {
-                cluster_index = offset >> s->cluster_bits;
-                assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
-                if ((*expanded_clusters)[cluster_index / 8] &
-                    (1 << (cluster_index % 8))) {
-                    /* Probably a shared L2 table; this cluster was a zero
-                     * cluster which has been expanded, its refcount
-                     * therefore most likely requires an update. */
-                    ret = qcow2_update_cluster_refcount(bs, cluster_index, 1,
-                                                        QCOW2_DISCARD_NEVER);
-                    if (ret < 0) {
-                        goto fail;
-                    }
-                    /* Since we just increased the refcount, the COPIED flag may
-                     * no longer be set. */
-                    l2_table[j] = cpu_to_be64(l2_entry & ~QCOW_OFLAG_COPIED);
-                    l2_dirty = true;
-                }
-                continue;
-            }
-            else if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_ZERO) {
+            if (cluster_type != QCOW2_CLUSTER_ZERO) {
                 continue;
             }
 
             if (!preallocated) {
-                if (!bs->backing_hd) {
+                if (!bs->backing) {
                     /* not backed; therefore we can simply deallocate the
                      * cluster */
                     l2_table[j] = 0;
@@ -1695,6 +1726,33 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                     ret = offset;
                     goto fail;
                 }
+
+                if (l2_refcount > 1) {
+                    /* For shared L2 tables, set the refcount accordingly (it is
+                     * already 1 and needs to be l2_refcount) */
+                    ret = qcow2_update_cluster_refcount(bs,
+                            offset >> s->cluster_bits,
+                            refcount_diff(1, l2_refcount), false,
+                            QCOW2_DISCARD_OTHER);
+                    if (ret < 0) {
+                        qcow2_free_clusters(bs, offset, s->cluster_size,
+                                            QCOW2_DISCARD_OTHER);
+                        goto fail;
+                    }
+                }
+            }
+
+            if (offset_into_cluster(s, offset)) {
+                qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
+                                        "%#" PRIx64 " unaligned (L2 offset: %#"
+                                        PRIx64 ", L2 index: %#x)", offset,
+                                        l2_offset, j);
+                if (!preallocated) {
+                    qcow2_free_clusters(bs, offset, s->cluster_size,
+                                        QCOW2_DISCARD_ALWAYS);
+                }
+                ret = -EIO;
+                goto fail;
             }
 
             ret = qcow2_pre_write_overlap_check(bs, 0, offset, s->cluster_size);
@@ -1706,7 +1764,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 goto fail;
             }
 
-            ret = bdrv_write_zeroes(bs->file, offset / BDRV_SECTOR_SIZE,
+            ret = bdrv_write_zeroes(bs->file->bs, offset / BDRV_SECTOR_SIZE,
                                     s->cluster_sectors, 0);
             if (ret < 0) {
                 if (!preallocated) {
@@ -1716,41 +1774,20 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 goto fail;
             }
 
-            l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
-            l2_dirty = true;
-
-            cluster_index = offset >> s->cluster_bits;
-
-            if (cluster_index >= *nb_clusters) {
-                uint64_t old_bitmap_size = (*nb_clusters + 7) / 8;
-                uint64_t new_bitmap_size;
-                /* The offset may lie beyond the old end of the underlying image
-                 * file for growable files only */
-                assert(bs->file->growable);
-                *nb_clusters = size_to_clusters(s, bs->file->total_sectors *
-                                                BDRV_SECTOR_SIZE);
-                new_bitmap_size = (*nb_clusters + 7) / 8;
-                *expanded_clusters = g_realloc(*expanded_clusters,
-                                               new_bitmap_size);
-                /* clear the newly allocated space */
-                memset(&(*expanded_clusters)[old_bitmap_size], 0,
-                       new_bitmap_size - old_bitmap_size);
+            if (l2_refcount == 1) {
+                l2_table[j] = cpu_to_be64(offset | QCOW_OFLAG_COPIED);
+            } else {
+                l2_table[j] = cpu_to_be64(offset);
             }
-
-            assert((cluster_index >= 0) && (cluster_index < *nb_clusters));
-            (*expanded_clusters)[cluster_index / 8] |= 1 << (cluster_index % 8);
+            l2_dirty = true;
         }
 
         if (is_active_l1) {
             if (l2_dirty) {
-                qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
+                qcow2_cache_entry_mark_dirty(bs, s->l2_table_cache, l2_table);
                 qcow2_cache_depends_on_flush(s->l2_table_cache);
             }
-            ret = qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
-            if (ret < 0) {
-                l2_table = NULL;
-                goto fail;
-            }
+            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
         } else {
             if (l2_dirty) {
                 ret = qcow2_pre_write_overlap_check(bs,
@@ -1760,13 +1797,18 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                     goto fail;
                 }
 
-                ret = bdrv_write(bs->file, l2_offset / BDRV_SECTOR_SIZE,
-                        (void *)l2_table, s->cluster_sectors);
+                ret = bdrv_write(bs->file->bs, l2_offset / BDRV_SECTOR_SIZE,
+                                 (void *)l2_table, s->cluster_sectors);
                 if (ret < 0) {
                     goto fail;
                 }
             }
         }
+
+        (*visited_l1_entries)++;
+        if (status_cb) {
+            status_cb(bs, *visited_l1_entries, l1_entries, cb_opaque);
+        }
     }
 
     ret = 0;
@@ -1776,12 +1818,7 @@ fail:
         if (!is_active_l1) {
             qemu_vfree(l2_table);
         } else {
-            if (ret < 0) {
-                qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2_table);
-            } else {
-                ret = qcow2_cache_put(bs, s->l2_table_cache,
-                        (void **)&l2_table);
-            }
+            qcow2_cache_put(bs, s->l2_table_cache, (void **) &l2_table);
         }
     }
     return ret;
@@ -1793,25 +1830,26 @@ fail:
  * allocation for pre-allocated ones). This is important for downgrading to a
  * qcow2 version which doesn't yet support metadata zero clusters.
  */
-int qcow2_expand_zero_clusters(BlockDriverState *bs)
+int qcow2_expand_zero_clusters(BlockDriverState *bs,
+                               BlockDriverAmendStatusCB *status_cb,
+                               void *cb_opaque)
 {
-    BDRVQcowState *s = bs->opaque;
+    BDRVQcow2State *s = bs->opaque;
     uint64_t *l1_table = NULL;
-    uint64_t nb_clusters;
-    uint8_t *expanded_clusters;
+    int64_t l1_entries = 0, visited_l1_entries = 0;
     int ret;
     int i, j;
 
-    nb_clusters = size_to_clusters(s, bs->file->total_sectors *
-                                   BDRV_SECTOR_SIZE);
-    expanded_clusters = g_try_malloc0((nb_clusters + 7) / 8);
-    if (expanded_clusters == NULL) {
-        ret = -ENOMEM;
-        goto fail;
+    if (status_cb) {
+        l1_entries = s->l1_size;
+        for (i = 0; i < s->nb_snapshots; i++) {
+            l1_entries += s->snapshots[i].l1_size;
+        }
     }
 
     ret = expand_zero_clusters_in_l1(bs, s->l1_table, s->l1_size,
-                                     &expanded_clusters, &nb_clusters);
+                                     &visited_l1_entries, l1_entries,
+                                     status_cb, cb_opaque);
     if (ret < 0) {
         goto fail;
     }
@@ -1834,8 +1872,9 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
 
         l1_table = g_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
 
-        ret = bdrv_read(bs->file, s->snapshots[i].l1_table_offset /
-                BDRV_SECTOR_SIZE, (void *)l1_table, l1_sectors);
+        ret = bdrv_read(bs->file->bs,
+                        s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
+                        (void *)l1_table, l1_sectors);
         if (ret < 0) {
             goto fail;
         }
@@ -1845,7 +1884,8 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
         }
 
         ret = expand_zero_clusters_in_l1(bs, l1_table, s->snapshots[i].l1_size,
-                                         &expanded_clusters, &nb_clusters);
+                                         &visited_l1_entries, l1_entries,
+                                         status_cb, cb_opaque);
         if (ret < 0) {
             goto fail;
         }
@@ -1854,7 +1894,6 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs)
     ret = 0;
 
 fail:
-    g_free(expanded_clusters);
     g_free(l1_table);
     return ret;
 }