]> git.proxmox.com Git - mirror_qemu.git/blobdiff - block/qcow2-cluster.c
CODING_STYLE: specify the indent rule for multiline code
[mirror_qemu.git] / block / qcow2-cluster.c
index e406b0f3b9ead9417aa67146ca0203130bcfffa1..974a4e8656229ec5881a7f8da179775c5e7cabde 100644 (file)
 #include "qemu/osdep.h"
 #include <zlib.h>
 
+#include "qapi/error.h"
 #include "qemu-common.h"
 #include "block/block_int.h"
-#include "block/qcow2.h"
+#include "qcow2.h"
 #include "qemu/bswap.h"
 #include "trace.h"
 
@@ -126,11 +127,11 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
 
     new_l1_size2 = sizeof(uint64_t) * new_l1_size;
     new_l1_table = qemu_try_blockalign(bs->file->bs,
-                                       align_offset(new_l1_size2, 512));
+                                       ROUND_UP(new_l1_size2, 512));
     if (new_l1_table == NULL) {
         return -ENOMEM;
     }
-    memset(new_l1_table, 0, align_offset(new_l1_size2, 512));
+    memset(new_l1_table, 0, ROUND_UP(new_l1_size2, 512));
 
     if (s->l1_size) {
         memcpy(new_l1_table, s->l1_table, s->l1_size * sizeof(uint64_t));
@@ -152,7 +153,7 @@ int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
     /* the L1 position has not yet been updated, so these clusters must
      * indeed be completely free */
     ret = qcow2_pre_write_overlap_check(bs, 0, new_l1_table_offset,
-                                        new_l1_size2);
+                                        new_l1_size2, false);
     if (ret < 0) {
         goto fail;
     }
@@ -237,7 +238,7 @@ int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
     }
 
     ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L1,
-            s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
+            s->l1_table_offset + 8 * l1_start_index, sizeof(buf), false);
     if (ret < 0) {
         return ret;
     }
@@ -284,6 +285,9 @@ static int l2_allocate(BlockDriverState *bs, int l1_index)
         goto fail;
     }
 
+    /* The offset must fit in the offset field of the L1 table entry */
+    assert((l2_offset & L1E_OFFSET_MASK) == l2_offset);
+
     /* If we're allocating the table at offset 0 then something is wrong */
     if (l2_offset == 0) {
         qcow2_signal_corruption(bs, true, -1, -1, "Preventing invalid "
@@ -376,8 +380,8 @@ fail:
  * as contiguous. (This allows it, for example, to stop at the first compressed
  * cluster which may require a different handling)
  */
-static int count_contiguous_clusters(int nb_clusters, int cluster_size,
-        uint64_t *l2_slice, uint64_t stop_flags)
+static int count_contiguous_clusters(BlockDriverState *bs, int nb_clusters,
+        int cluster_size, uint64_t *l2_slice, uint64_t stop_flags)
 {
     int i;
     QCow2ClusterType first_cluster_type;
@@ -385,12 +389,12 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
     uint64_t first_entry = be64_to_cpu(l2_slice[0]);
     uint64_t offset = first_entry & mask;
 
-    if (!offset) {
+    first_cluster_type = qcow2_get_cluster_type(bs, first_entry);
+    if (first_cluster_type == QCOW2_CLUSTER_UNALLOCATED) {
         return 0;
     }
 
     /* must be allocated */
-    first_cluster_type = qcow2_get_cluster_type(first_entry);
     assert(first_cluster_type == QCOW2_CLUSTER_NORMAL ||
            first_cluster_type == QCOW2_CLUSTER_ZERO_ALLOC);
 
@@ -401,14 +405,15 @@ static int count_contiguous_clusters(int nb_clusters, int cluster_size,
         }
     }
 
-       return i;
+        return i;
 }
 
 /*
  * Checks how many consecutive unallocated clusters in a given L2
  * slice have the same cluster type.
  */
-static int count_contiguous_clusters_unallocated(int nb_clusters,
+static int count_contiguous_clusters_unallocated(BlockDriverState *bs,
+                                                 int nb_clusters,
                                                  uint64_t *l2_slice,
                                                  QCow2ClusterType wanted_type)
 {
@@ -418,7 +423,7 @@ static int count_contiguous_clusters_unallocated(int nb_clusters,
            wanted_type == QCOW2_CLUSTER_UNALLOCATED);
     for (i = 0; i < nb_clusters; i++) {
         uint64_t entry = be64_to_cpu(l2_slice[i]);
-        QCow2ClusterType type = qcow2_get_cluster_type(entry);
+        QCow2ClusterType type = qcow2_get_cluster_type(bs, entry);
 
         if (type != wanted_type) {
             break;
@@ -485,6 +490,7 @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
                                              unsigned offset_in_cluster,
                                              QEMUIOVector *qiov)
 {
+    BDRVQcow2State *s = bs->opaque;
     int ret;
 
     if (qiov->size == 0) {
@@ -492,13 +498,13 @@ static int coroutine_fn do_perform_cow_write(BlockDriverState *bs,
     }
 
     ret = qcow2_pre_write_overlap_check(bs, 0,
-            cluster_offset + offset_in_cluster, qiov->size);
+            cluster_offset + offset_in_cluster, qiov->size, true);
     if (ret < 0) {
         return ret;
     }
 
     BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
-    ret = bdrv_co_pwritev(bs->file, cluster_offset + offset_in_cluster,
+    ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster,
                           qiov->size, qiov, 0);
     if (ret < 0) {
         return ret;
@@ -591,7 +597,7 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
      * true */
     assert(nb_clusters <= INT_MAX);
 
-    type = qcow2_get_cluster_type(*cluster_offset);
+    type = qcow2_get_cluster_type(bs, *cluster_offset);
     if (s->qcow_version < 3 && (type == QCOW2_CLUSTER_ZERO_PLAIN ||
                                 type == QCOW2_CLUSTER_ZERO_ALLOC)) {
         qcow2_signal_corruption(bs, true, -1, -1, "Zero cluster entry found"
@@ -602,6 +608,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     }
     switch (type) {
     case QCOW2_CLUSTER_COMPRESSED:
+        if (has_data_file(bs)) {
+            qcow2_signal_corruption(bs, true, -1, -1, "Compressed cluster "
+                                    "entry found in image with external data "
+                                    "file (L2 offset: %#" PRIx64 ", L2 index: "
+                                    "%#x)", l2_offset, l2_index);
+            ret = -EIO;
+            goto fail;
+        }
         /* Compressed clusters can only be processed one by one */
         c = 1;
         *cluster_offset &= L2E_COMPRESSED_OFFSET_SIZE_MASK;
@@ -609,14 +623,14 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
     case QCOW2_CLUSTER_ZERO_PLAIN:
     case QCOW2_CLUSTER_UNALLOCATED:
         /* how many empty clusters ? */
-        c = count_contiguous_clusters_unallocated(nb_clusters,
+        c = count_contiguous_clusters_unallocated(bs, nb_clusters,
                                                   &l2_slice[l2_index], type);
         *cluster_offset = 0;
         break;
     case QCOW2_CLUSTER_ZERO_ALLOC:
     case QCOW2_CLUSTER_NORMAL:
         /* how many allocated clusters ? */
-        c = count_contiguous_clusters(nb_clusters, s->cluster_size,
+        c = count_contiguous_clusters(bs, nb_clusters, s->cluster_size,
                                       &l2_slice[l2_index], QCOW_OFLAG_ZERO);
         *cluster_offset &= L2E_OFFSET_MASK;
         if (offset_into_cluster(s, *cluster_offset)) {
@@ -628,6 +642,17 @@ int qcow2_get_cluster_offset(BlockDriverState *bs, uint64_t offset,
             ret = -EIO;
             goto fail;
         }
+        if (has_data_file(bs) && *cluster_offset != offset - offset_in_cluster)
+        {
+            qcow2_signal_corruption(bs, true, -1, -1,
+                                    "External data file host cluster offset %#"
+                                    PRIx64 " does not match guest cluster "
+                                    "offset: %#" PRIx64
+                                    ", L2 index: %#x)", *cluster_offset,
+                                    offset - offset_in_cluster, l2_index);
+            ret = -EIO;
+            goto fail;
+        }
         break;
     default:
         abort();
@@ -731,19 +756,16 @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
 /*
  * alloc_compressed_cluster_offset
  *
- * For a given offset of the disk image, return cluster offset in
- * qcow2 file.
- *
- * If the offset is not found, allocate a new compressed cluster.
- *
- * Return the cluster offset if successful,
- * Return 0, otherwise.
+ * For a given offset on the virtual disk, allocate a new compressed cluster
+ * and put the host offset of the cluster into *host_offset. If a cluster is
+ * already allocated at the offset, return an error.
  *
+ * Return 0 on success and -errno in error cases
  */
-
-uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
-                                               uint64_t offset,
-                                               int compressed_size)
+int qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
+                                          uint64_t offset,
+                                          int compressed_size,
+                                          uint64_t *host_offset)
 {
     BDRVQcow2State *s = bs->opaque;
     int l2_index, ret;
@@ -751,9 +773,13 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     int64_t cluster_offset;
     int nb_csectors;
 
+    if (has_data_file(bs)) {
+        return 0;
+    }
+
     ret = get_cluster_table(bs, offset, &l2_slice, &l2_index);
     if (ret < 0) {
-        return 0;
+        return ret;
     }
 
     /* Compression can't overwrite anything. Fail if the cluster was already
@@ -761,13 +787,13 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     cluster_offset = be64_to_cpu(l2_slice[l2_index]);
     if (cluster_offset & L2E_OFFSET_MASK) {
         qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
-        return 0;
+        return -EIO;
     }
 
     cluster_offset = qcow2_alloc_bytes(bs, compressed_size);
     if (cluster_offset < 0) {
         qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
-        return 0;
+        return cluster_offset;
     }
 
     nb_csectors = ((cluster_offset + compressed_size - 1) >> 9) -
@@ -785,7 +811,8 @@ uint64_t qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
     l2_slice[l2_index] = cpu_to_be64(cluster_offset);
     qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
-    return cluster_offset;
+    *host_offset = cluster_offset & s->cluster_offset_mask;
+    return 0;
 }
 
 static int perform_cow(BlockDriverState *bs, QCowL2Meta *m)
@@ -993,19 +1020,30 @@ err:
     return ret;
  }
 
+/**
+ * Frees the allocated clusters because the request failed and they won't
+ * actually be linked.
+ */
+void qcow2_alloc_cluster_abort(BlockDriverState *bs, QCowL2Meta *m)
+{
+    BDRVQcow2State *s = bs->opaque;
+    qcow2_free_clusters(bs, m->alloc_offset, m->nb_clusters << s->cluster_bits,
+                        QCOW2_DISCARD_NEVER);
+}
+
 /*
  * Returns the number of contiguous clusters that can be used for an allocating
  * write, but require COW to be performed (this includes yet unallocated space,
  * which must copy from the backing file)
  */
-static int count_cow_clusters(BDRVQcow2State *s, int nb_clusters,
+static int count_cow_clusters(BlockDriverState *bs, int nb_clusters,
     uint64_t *l2_slice, int l2_index)
 {
     int i;
 
     for (i = 0; i < nb_clusters; i++) {
         uint64_t l2_entry = be64_to_cpu(l2_slice[l2_index + i]);
-        QCow2ClusterType cluster_type = qcow2_get_cluster_type(l2_entry);
+        QCow2ClusterType cluster_type = qcow2_get_cluster_type(bs, l2_entry);
 
         switch(cluster_type) {
         case QCOW2_CLUSTER_NORMAL:
@@ -1093,9 +1131,9 @@ static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
 
 /*
  * Checks how many already allocated clusters that don't require a copy on
- * write there are at the given guest_offset (up to *bytes). If
- * *host_offset is not zero, only physically contiguous clusters beginning at
- * this host offset are counted.
+ * write there are at the given guest_offset (up to *bytes). If *host_offset is
+ * not INV_OFFSET, only physically contiguous clusters beginning at this host
+ * offset are counted.
  *
  * Note that guest_offset may not be cluster aligned. In this case, the
  * returned *host_offset points to exact byte referenced by guest_offset and
@@ -1127,8 +1165,8 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
     trace_qcow2_handle_copied(qemu_coroutine_self(), guest_offset, *host_offset,
                               *bytes);
 
-    assert(*host_offset == 0 ||    offset_into_cluster(s, guest_offset)
-                                == offset_into_cluster(s, *host_offset));
+    assert(*host_offset == INV_OFFSET || offset_into_cluster(s, guest_offset)
+                                      == offset_into_cluster(s, *host_offset));
 
     /*
      * Calculate the number of clusters to look for. We stop at L2 slice
@@ -1150,7 +1188,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
     cluster_offset = be64_to_cpu(l2_slice[l2_index]);
 
     /* Check how many clusters are already allocated and don't need COW */
-    if (qcow2_get_cluster_type(cluster_offset) == QCOW2_CLUSTER_NORMAL
+    if (qcow2_get_cluster_type(bs, cluster_offset) == QCOW2_CLUSTER_NORMAL
         && (cluster_offset & QCOW_OFLAG_COPIED))
     {
         /* If a specific host_offset is required, check it */
@@ -1166,7 +1204,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
             goto out;
         }
 
-        if (*host_offset != 0 && !offset_matches) {
+        if (*host_offset != INV_OFFSET && !offset_matches) {
             *bytes = 0;
             ret = 0;
             goto out;
@@ -1174,7 +1212,7 @@ static int handle_copied(BlockDriverState *bs, uint64_t guest_offset,
 
         /* We keep all QCOW_OFLAG_COPIED clusters */
         keep_clusters =
-            count_contiguous_clusters(nb_clusters, s->cluster_size,
+            count_contiguous_clusters(bs, nb_clusters, s->cluster_size,
                                       &l2_slice[l2_index],
                                       QCOW_OFLAG_COPIED | QCOW_OFLAG_ZERO);
         assert(keep_clusters <= nb_clusters);
@@ -1209,10 +1247,10 @@ out:
  * contain the number of clusters that have been allocated and are contiguous
  * in the image file.
  *
- * If *host_offset is non-zero, it specifies the offset in the image file at
- * which the new clusters must start. *nb_clusters can be 0 on return in this
- * case if the cluster at host_offset is already in use. If *host_offset is
- * zero, the clusters can be allocated anywhere in the image file.
+ * If *host_offset is not INV_OFFSET, it specifies the offset in the image file
+ * at which the new clusters must start. *nb_clusters can be 0 on return in
+ * this case if the cluster at host_offset is already in use. If *host_offset
+ * is INV_OFFSET, the clusters can be allocated anywhere in the image file.
  *
  * *host_offset is updated to contain the offset into the image file at which
  * the first allocated cluster starts.
@@ -1229,9 +1267,16 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
     trace_qcow2_do_alloc_clusters_offset(qemu_coroutine_self(), guest_offset,
                                          *host_offset, *nb_clusters);
 
+    if (has_data_file(bs)) {
+        assert(*host_offset == INV_OFFSET ||
+               *host_offset == start_of_cluster(s, guest_offset));
+        *host_offset = start_of_cluster(s, guest_offset);
+        return 0;
+    }
+
     /* Allocate new clusters */
     trace_qcow2_cluster_alloc_phys(qemu_coroutine_self());
-    if (*host_offset == 0) {
+    if (*host_offset == INV_OFFSET) {
         int64_t cluster_offset =
             qcow2_alloc_clusters(bs, *nb_clusters * s->cluster_size);
         if (cluster_offset < 0) {
@@ -1251,8 +1296,8 @@ static int do_alloc_cluster_offset(BlockDriverState *bs, uint64_t guest_offset,
 
 /*
  * Allocates new clusters for an area that either is yet unallocated or needs a
- * copy on write. If *host_offset is non-zero, clusters are only allocated if
- * the new allocation can match the specified host offset.
+ * copy on write. If *host_offset is not INV_OFFSET, clusters are only
+ * allocated if the new allocation can match the specified host offset.
  *
  * Note that guest_offset may not be cluster aligned. In this case, the
  * returned *host_offset points to exact byte referenced by guest_offset and
@@ -1280,7 +1325,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     int ret;
     bool keep_old_clusters = false;
 
-    uint64_t alloc_cluster_offset = 0;
+    uint64_t alloc_cluster_offset = INV_OFFSET;
 
     trace_qcow2_handle_alloc(qemu_coroutine_self(), guest_offset, *host_offset,
                              *bytes);
@@ -1309,7 +1354,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
     if (entry & QCOW_OFLAG_COMPRESSED) {
         nb_clusters = 1;
     } else {
-        nb_clusters = count_cow_clusters(s, nb_clusters, l2_slice, l2_index);
+        nb_clusters = count_cow_clusters(bs, nb_clusters, l2_slice, l2_index);
     }
 
     /* This function is only called when there were no non-COW clusters, so if
@@ -1317,9 +1362,9 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
      * wrong with our code. */
     assert(nb_clusters > 0);
 
-    if (qcow2_get_cluster_type(entry) == QCOW2_CLUSTER_ZERO_ALLOC &&
+    if (qcow2_get_cluster_type(bs, entry) == QCOW2_CLUSTER_ZERO_ALLOC &&
         (entry & QCOW_OFLAG_COPIED) &&
-        (!*host_offset ||
+        (*host_offset == INV_OFFSET ||
          start_of_cluster(s, *host_offset) == (entry & L2E_OFFSET_MASK)))
     {
         int preallocated_nb_clusters;
@@ -1337,7 +1382,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
          * would be fine, too, but count_cow_clusters() above has limited
          * nb_clusters already to a range of COW clusters */
         preallocated_nb_clusters =
-            count_contiguous_clusters(nb_clusters, s->cluster_size,
+            count_contiguous_clusters(bs, nb_clusters, s->cluster_size,
                                       &l2_slice[l2_index], QCOW_OFLAG_COPIED);
         assert(preallocated_nb_clusters > 0);
 
@@ -1351,9 +1396,10 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
 
     qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
 
-    if (!alloc_cluster_offset) {
+    if (alloc_cluster_offset == INV_OFFSET) {
         /* Allocate, if necessary at a given offset in the image file */
-        alloc_cluster_offset = start_of_cluster(s, *host_offset);
+        alloc_cluster_offset = *host_offset == INV_OFFSET ? INV_OFFSET :
+                               start_of_cluster(s, *host_offset);
         ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
                                       &nb_clusters);
         if (ret < 0) {
@@ -1366,16 +1412,7 @@ static int handle_alloc(BlockDriverState *bs, uint64_t guest_offset,
             return 0;
         }
 
-        /* !*host_offset would overwrite the image header and is reserved for
-         * "no host offset preferred". If 0 was a valid host offset, it'd
-         * trigger the following overlap check; do that now to avoid having an
-         * invalid value in *host_offset. */
-        if (!alloc_cluster_offset) {
-            ret = qcow2_pre_write_overlap_check(bs, 0, alloc_cluster_offset,
-                                                nb_clusters * s->cluster_size);
-            assert(ret < 0);
-            goto fail;
-        }
+        assert(alloc_cluster_offset != INV_OFFSET);
     }
 
     /*
@@ -1467,14 +1504,14 @@ int qcow2_alloc_cluster_offset(BlockDriverState *bs, uint64_t offset,
 again:
     start = offset;
     remaining = *bytes;
-    cluster_offset = 0;
-    *host_offset = 0;
+    cluster_offset = INV_OFFSET;
+    *host_offset = INV_OFFSET;
     cur_bytes = 0;
     *m = NULL;
 
     while (true) {
 
-        if (!*host_offset) {
+        if (*host_offset == INV_OFFSET && cluster_offset != INV_OFFSET) {
             *host_offset = start_of_cluster(s, cluster_offset);
         }
 
@@ -1482,7 +1519,10 @@ again:
 
         start           += cur_bytes;
         remaining       -= cur_bytes;
-        cluster_offset  += cur_bytes;
+
+        if (cluster_offset != INV_OFFSET) {
+            cluster_offset += cur_bytes;
+        }
 
         if (remaining == 0) {
             break;
@@ -1554,81 +1594,11 @@ again:
 
     *bytes -= remaining;
     assert(*bytes > 0);
-    assert(*host_offset != 0);
+    assert(*host_offset != INV_OFFSET);
 
     return 0;
 }
 
-static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
-                             const uint8_t *buf, int buf_size)
-{
-    z_stream strm1, *strm = &strm1;
-    int ret, out_len;
-
-    memset(strm, 0, sizeof(*strm));
-
-    strm->next_in = (uint8_t *)buf;
-    strm->avail_in = buf_size;
-    strm->next_out = out_buf;
-    strm->avail_out = out_buf_size;
-
-    ret = inflateInit2(strm, -12);
-    if (ret != Z_OK)
-        return -1;
-    ret = inflate(strm, Z_FINISH);
-    out_len = strm->next_out - out_buf;
-    if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
-        out_len != out_buf_size) {
-        inflateEnd(strm);
-        return -1;
-    }
-    inflateEnd(strm);
-    return 0;
-}
-
-int qcow2_decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
-{
-    BDRVQcow2State *s = bs->opaque;
-    int ret, csize, nb_csectors, sector_offset;
-    uint64_t coffset;
-
-    coffset = cluster_offset & s->cluster_offset_mask;
-    if (s->cluster_cache_offset != coffset) {
-        nb_csectors = ((cluster_offset >> s->csize_shift) & s->csize_mask) + 1;
-        sector_offset = coffset & 511;
-        csize = nb_csectors * 512 - sector_offset;
-
-        /* Allocate buffers on first decompress operation, most images are
-         * uncompressed and the memory overhead can be avoided.  The buffers
-         * are freed in .bdrv_close().
-         */
-        if (!s->cluster_data) {
-            /* one more sector for decompressed data alignment */
-            s->cluster_data = qemu_try_blockalign(bs->file->bs,
-                    QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size + 512);
-            if (!s->cluster_data) {
-                return -ENOMEM;
-            }
-        }
-        if (!s->cluster_cache) {
-            s->cluster_cache = g_malloc(s->cluster_size);
-        }
-
-        BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
-        ret = bdrv_read(bs->file, coffset >> 9, s->cluster_data,
-                        nb_csectors);
-        if (ret < 0) {
-            return ret;
-        }
-        if (decompress_buffer(s->cluster_cache, s->cluster_size,
-                              s->cluster_data + sector_offset, csize) < 0) {
-            return -EIO;
-        }
-        s->cluster_cache_offset = coffset;
-    }
-    return 0;
-}
-
 /*
  * This discards as many clusters of nb_clusters as possible at once (i.e.
  * all clusters in the same L2 slice) and returns the number of discarded
@@ -1671,7 +1641,7 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
          * If full_discard is true, the sector should not read back as zeroes,
          * but rather fall through to the backing file.
          */
-        switch (qcow2_get_cluster_type(old_l2_entry)) {
+        switch (qcow2_get_cluster_type(bs, old_l2_entry)) {
         case QCOW2_CLUSTER_UNALLOCATED:
             if (full_discard || !bs->backing) {
                 continue;
@@ -1784,7 +1754,7 @@ static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
          * Minimize L2 changes if the cluster already reads back as
          * zeroes with correct allocation.
          */
-        cluster_type = qcow2_get_cluster_type(old_offset);
+        cluster_type = qcow2_get_cluster_type(bs, old_offset);
         if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN ||
             (cluster_type == QCOW2_CLUSTER_ZERO_ALLOC && !unmap)) {
             continue;
@@ -1813,6 +1783,16 @@ int qcow2_cluster_zeroize(BlockDriverState *bs, uint64_t offset,
     int64_t cleared;
     int ret;
 
+    /* If we have to stay in sync with an external data file, zero out
+     * s->data_file first. */
+    if (data_file_is_raw(bs)) {
+        assert(has_data_file(bs));
+        ret = bdrv_co_pwrite_zeroes(s->data_file, offset, bytes, flags);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
     /* Caller must pass aligned values, except at image end */
     assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
     assert(QEMU_IS_ALIGNED(end_offset, s->cluster_size) ||
@@ -1926,7 +1906,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 uint64_t l2_entry = be64_to_cpu(l2_slice[j]);
                 int64_t offset = l2_entry & L2E_OFFSET_MASK;
                 QCow2ClusterType cluster_type =
-                    qcow2_get_cluster_type(l2_entry);
+                    qcow2_get_cluster_type(bs, l2_entry);
 
                 if (cluster_type != QCOW2_CLUSTER_ZERO_PLAIN &&
                     cluster_type != QCOW2_CLUSTER_ZERO_ALLOC) {
@@ -1980,7 +1960,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 }
 
                 ret = qcow2_pre_write_overlap_check(bs, 0, offset,
-                                                    s->cluster_size);
+                                                    s->cluster_size, true);
                 if (ret < 0) {
                     if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                         qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -1989,7 +1969,8 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                     goto fail;
                 }
 
-                ret = bdrv_pwrite_zeroes(bs->file, offset, s->cluster_size, 0);
+                ret = bdrv_pwrite_zeroes(s->data_file, offset,
+                                         s->cluster_size, 0);
                 if (ret < 0) {
                     if (cluster_type == QCOW2_CLUSTER_ZERO_PLAIN) {
                         qcow2_free_clusters(bs, offset, s->cluster_size,
@@ -2016,7 +1997,7 @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
                 if (l2_dirty) {
                     ret = qcow2_pre_write_overlap_check(
                         bs, QCOW2_OL_INACTIVE_L2 | QCOW2_OL_ACTIVE_L2,
-                        slice_offset, slice_size2);
+                        slice_offset, slice_size2, false);
                     if (ret < 0) {
                         goto fail;
                     }
@@ -2092,11 +2073,21 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
     }
 
     for (i = 0; i < s->nb_snapshots; i++) {
-        int l1_sectors = DIV_ROUND_UP(s->snapshots[i].l1_size *
-                                      sizeof(uint64_t), BDRV_SECTOR_SIZE);
+        int l1_size2;
+        uint64_t *new_l1_table;
+        Error *local_err = NULL;
+
+        ret = qcow2_validate_table(bs, s->snapshots[i].l1_table_offset,
+                                   s->snapshots[i].l1_size, sizeof(uint64_t),
+                                   QCOW_MAX_L1_SIZE, "Snapshot L1 table",
+                                   &local_err);
+        if (ret < 0) {
+            error_report_err(local_err);
+            goto fail;
+        }
 
-        uint64_t *new_l1_table =
-            g_try_realloc(l1_table, l1_sectors * BDRV_SECTOR_SIZE);
+        l1_size2 = s->snapshots[i].l1_size * sizeof(uint64_t);
+        new_l1_table = g_try_realloc(l1_table, l1_size2);
 
         if (!new_l1_table) {
             ret = -ENOMEM;
@@ -2105,9 +2096,8 @@ int qcow2_expand_zero_clusters(BlockDriverState *bs,
 
         l1_table = new_l1_table;
 
-        ret = bdrv_read(bs->file,
-                        s->snapshots[i].l1_table_offset / BDRV_SECTOR_SIZE,
-                        (void *)l1_table, l1_sectors);
+        ret = bdrv_pread(bs->file, s->snapshots[i].l1_table_offset,
+                         l1_table, l1_size2);
         if (ret < 0) {
             goto fail;
         }