qcow2: add discard-no-unref option

author Jean-Louis Dupond <jean-louis@dupond.be>

Mon, 5 Jun 2023 08:45:24 +0000 (10:45 +0200)

committer Hanna Czenczek <hreitz@redhat.com>

Mon, 5 Jun 2023 11:15:42 +0000 (13:15 +0200)
author Jean-Louis Dupond <jean-louis@dupond.be>
Mon, 5 Jun 2023 08:45:24 +0000 (10:45 +0200)
committer Hanna Czenczek <hreitz@redhat.com>
Mon, 5 Jun 2023 11:15:42 +0000 (13:15 +0200)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c

index 39cda7f907ecf2dba9f44ee188013c3d6185cc30..2e76de027c4db05c6edf24a7c6760bcbe3280b91 100644 (file)
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1925,6 +1925,10 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
          uint64_t new_l2_bitmap = old_l2_bitmap;
          QCow2ClusterType cluster_type =
              qcow2_get_cluster_type(bs, old_l2_entry);
+        bool keep_reference = (cluster_type != QCOW2_CLUSTER_COMPRESSED) &&
+                              !full_discard &&
+                              (s->discard_no_unref &&
+                               type == QCOW2_DISCARD_REQUEST);
  
          /*
           * If full_discard is true, the cluster should not read back as zeroes,
@@ -1943,10 +1947,22 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
              new_l2_entry = new_l2_bitmap = 0;
          } else if (bs->backing || qcow2_cluster_is_allocated(cluster_type)) {
              if (has_subclusters(s)) {
-                new_l2_entry = 0;
+                if (keep_reference) {
+                    new_l2_entry = old_l2_entry;
+                } else {
+                    new_l2_entry = 0;
+                }
                  new_l2_bitmap = QCOW_L2_BITMAP_ALL_ZEROES;
              } else {
-                new_l2_entry = s->qcow_version >= 3 ? QCOW_OFLAG_ZERO : 0;
+                if (s->qcow_version >= 3) {
+                    if (keep_reference) {
+                        new_l2_entry |= QCOW_OFLAG_ZERO;
+                    } else {
+                        new_l2_entry = QCOW_OFLAG_ZERO;
+                    }
+                } else {
+                    new_l2_entry = 0;
+                }
              }
          }
  
@@ -1960,8 +1976,16 @@ static int discard_in_l2_slice(BlockDriverState *bs, uint64_t offset,
          if (has_subclusters(s)) {
              set_l2_bitmap(s, l2_slice, l2_index + i, new_l2_bitmap);
          }
-        /* Then decrease the refcount */
-        qcow2_free_any_cluster(bs, old_l2_entry, type);
+        if (!keep_reference) {
+            /* Then decrease the refcount */
+            qcow2_free_any_cluster(bs, old_l2_entry, type);
+        } else if (s->discard_passthrough[type] &&
+                   (cluster_type == QCOW2_CLUSTER_NORMAL ||
+                    cluster_type == QCOW2_CLUSTER_ZERO_ALLOC)) {
+            /* If we keep the reference, pass on the discard still */
+            bdrv_pdiscard(s->data_file, old_l2_entry & L2E_OFFSET_MASK,
+                          s->cluster_size);
+       }
      }
  
      qcow2_cache_put(s->l2_table_cache, (void **) &l2_slice);
diff --git a/block/qcow2.c b/block/qcow2.c

index 7f3948360d05475b4ca2646a77395d2131e51e01..e23edd48c29eef91f95cb3c89183338c07ae2fdc 100644 (file)
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -682,6 +682,7 @@ static const char *const mutable_opts[] = {
      QCOW2_OPT_DISCARD_REQUEST,
      QCOW2_OPT_DISCARD_SNAPSHOT,
      QCOW2_OPT_DISCARD_OTHER,
+    QCOW2_OPT_DISCARD_NO_UNREF,
      QCOW2_OPT_OVERLAP,
      QCOW2_OPT_OVERLAP_TEMPLATE,
      QCOW2_OPT_OVERLAP_MAIN_HEADER,
@@ -726,6 +727,11 @@ static QemuOptsList qcow2_runtime_opts = {
              .type = QEMU_OPT_BOOL,
              .help = "Generate discard requests when other clusters are freed",
          },
+        {
+            .name = QCOW2_OPT_DISCARD_NO_UNREF,
+            .type = QEMU_OPT_BOOL,
+            .help = "Do not unreference discarded clusters",
+        },
          {
              .name = QCOW2_OPT_OVERLAP,
              .type = QEMU_OPT_STRING,
@@ -969,6 +975,7 @@ typedef struct Qcow2ReopenState {
      bool use_lazy_refcounts;
      int overlap_check;
      bool discard_passthrough[QCOW2_DISCARD_MAX];
+    bool discard_no_unref;
      uint64_t cache_clean_interval;
      QCryptoBlockOpenOptions *crypto_opts; /* Disk encryption runtime options */
  } Qcow2ReopenState;
@@ -1140,6 +1147,15 @@ static int qcow2_update_options_prepare(BlockDriverState *bs,
      r->discard_passthrough[QCOW2_DISCARD_OTHER] =
          qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_OTHER, false);
  
+    r->discard_no_unref = qemu_opt_get_bool(opts, QCOW2_OPT_DISCARD_NO_UNREF,
+                                            false);
+    if (r->discard_no_unref && s->qcow_version < 3) {
+        error_setg(errp,
+                   "discard-no-unref is only supported since qcow2 version 3");
+        ret = -EINVAL;
+        goto fail;
+    }
+
      switch (s->crypt_method_header) {
      case QCOW_CRYPT_NONE:
          if (encryptfmt) {
@@ -1220,6 +1236,8 @@ static void qcow2_update_options_commit(BlockDriverState *bs,
          s->discard_passthrough[i] = r->discard_passthrough[i];
      }
  
+    s->discard_no_unref = r->discard_no_unref;
+
      if (s->cache_clean_interval != r->cache_clean_interval) {
          cache_clean_timer_del(bs);
          s->cache_clean_interval = r->cache_clean_interval;
diff --git a/block/qcow2.h b/block/qcow2.h

index 4f67eb912ada524f6de29d4988ab1424f9ccb118..ea9adb5706da1a3b8c54aaacc9cf5696132e1ce8 100644 (file)
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -133,6 +133,7 @@
  #define QCOW2_OPT_DISCARD_REQUEST "pass-discard-request"
  #define QCOW2_OPT_DISCARD_SNAPSHOT "pass-discard-snapshot"
  #define QCOW2_OPT_DISCARD_OTHER "pass-discard-other"
+#define QCOW2_OPT_DISCARD_NO_UNREF "discard-no-unref"
  #define QCOW2_OPT_OVERLAP "overlap-check"
  #define QCOW2_OPT_OVERLAP_TEMPLATE "overlap-check.template"
  #define QCOW2_OPT_OVERLAP_MAIN_HEADER "overlap-check.main-header"
@@ -385,6 +386,8 @@ typedef struct BDRVQcow2State {
  
      bool discard_passthrough[QCOW2_DISCARD_MAX];
  
+    bool discard_no_unref;
+
      int overlap_check; /* bitmask of Qcow2MetadataOverlap values */
      bool signaled_corruption;
  
diff --git a/qapi/block-core.json b/qapi/block-core.json

index 4bf89171c6488d12b7f82b6060d1f200b5b2b53c..5dd5f7e4b0ba094b68e2cae2470e4579f4e6f3b7 100644 (file)
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -3478,6 +3478,17 @@
  # @pass-discard-other: whether discard requests for the data source
  #     should be issued on other occasions where a cluster gets freed
  #
+# @discard-no-unref: when enabled, discards from the guest will not cause
+#     cluster allocations to be relinquished. This prevents qcow2 fragmentation
+#     that would be caused by such discards. Besides potential
+#     performance degradation, such fragmentation can lead to increased
+#     allocation of clusters past the end of the image file,
+#     resulting in image files whose file length can grow much larger
+#     than their guest disk size would suggest.
+#     If image file length is of concern (e.g. when storing qcow2
+#     images directly on block devices), you should consider enabling
+#     this option. (since 8.1)
+#
  # @overlap-check: which overlap checks to perform for writes to the
  #     image, defaults to 'cached' (since 2.2)
  #
@@ -3516,6 +3527,7 @@
              '*pass-discard-request': 'bool',
              '*pass-discard-snapshot': 'bool',
              '*pass-discard-other': 'bool',
+            '*discard-no-unref': 'bool',
              '*overlap-check': 'Qcow2OverlapChecks',
              '*cache-size': 'int',
              '*l2-cache-size': 'int',
diff --git a/qemu-options.hx b/qemu-options.hx

index b37eb9662bfe804054ac9921cc1318ede020d766..b57489d7ca3b2a3b9df93ff9e6a9a42ea20cf1b7 100644 (file)
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -1431,6 +1431,18 @@ SRST
              issued on other occasions where a cluster gets freed
              (on/off; default: off)
  
+        ``discard-no-unref``
+            When enabled, discards from the guest will not cause cluster
+            allocations to be relinquished. This prevents qcow2 fragmentation
+            that would be caused by such discards. Besides potential
+            performance degradation, such fragmentation can lead to increased
+            allocation of clusters past the end of the image file,
+            resulting in image files whose file length can grow much larger
+            than their guest disk size would suggest.
+            If image file length is of concern (e.g. when storing qcow2
+            images directly on block devices), you should consider enabling
+            this option.
+
          ``overlap-check``
              Which overlap checks to perform for writes to the image
              (none/constant/cached/all; default: cached). For details or
author	Jean-Louis Dupond <jean-louis@dupond.be>
	Mon, 5 Jun 2023 08:45:24 +0000 (10:45 +0200)
committer	Hanna Czenczek <hreitz@redhat.com>
	Mon, 5 Jun 2023 11:15:42 +0000 (13:15 +0200)
block/qcow2-cluster.c		patch \| blob \| blame \| history
block/qcow2.c		patch \| blob \| blame \| history
block/qcow2.h		patch \| blob \| blame \| history
qapi/block-core.json		patch \| blob \| blame \| history
qemu-options.hx		patch \| blob \| blame \| history