--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:22 +0200
+Subject: [PATCH] block/copy-before-write: fix permission
+
+In case when source node does not have any parents, the condition still
+works as required: backup job do create the parent by
+
+ block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
+
+Still, in this case checking @perm variable doesn't work, as backup job
+creates the root blk with empty permissions (as it rely on CBW filter
+to require correct permissions and don't want to create extra
+conflicts).
+
+So, we should not check @perm.
+
+The hack may be dropped entirely when transactional insertion of
+filter (when we don't try to recalculate permissions in intermediate
+state, when filter does conflict with original parent of the source
+node) merged (old big series
+"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
+current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
+
+[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
+[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index b866e42271..a2dddf6f57 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -364,9 +364,13 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
+ perm, shared, nperm, nshared);
+
+ if (!QLIST_EMPTY(&bs->parents)) {
+- if (perm & BLK_PERM_WRITE) {
+- *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+- }
++ /*
++ * Note, that source child may be shared with backup job. Backup job
++ * does create own blk parent on copy-before-write node, so this
++ * works even if source node does not have any parents before backup
++ * start
++ */
++ *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+ }
+ }
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:23 +0200
+Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
+
+First thing that crashes on unligned access here is
+bdrv_reset_dirty_bitmap(). Correct way is to align-down the
+snapshot-discard request.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index a2dddf6f57..0a219c2b75 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
+ cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+ {
+ BDRVCopyBeforeWriteState *s = bs->opaque;
++ uint32_t cluster_size = block_copy_cluster_size(s->bcs);
++ int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
++ int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
++ int64_t aligned_bytes;
++
++ if (aligned_end <= aligned_offset) {
++ return 0;
++ }
++ aligned_bytes = aligned_end - aligned_offset;
+
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
+- bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
++ bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
++ aligned_bytes);
+ }
+
+- block_copy_reset(s->bcs, offset, bytes);
++ block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
+
+- return bdrv_co_pdiscard(s->target, offset, bytes);
++ return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
+ }
+
+ static void cbw_refresh_filename(BlockDriverState *bs)
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:24 +0200
+Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
+ node
+
+Currently block_copy creates copy_bitmap in source node. But that is in
+bad relation with .independent_close=true of copy-before-write filter:
+source node may be detached and removed before .bdrv_close() handler
+called, which should call block_copy_state_free(), which in turn should
+remove copy_bitmap.
+
+That's all not ideal: it would be better if internal bitmap of
+block-copy object is not attached to any node. But that is not possible
+now.
+
+The simplest solution is just create copy_bitmap in filter node, where
+anyway two other bitmaps are created.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c | 3 +-
+ block/copy-before-write.c | 2 +-
+ include/block/block-copy.h | 1 +
+ tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
+ 4 files changed, 60 insertions(+), 58 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index e13d7bc6b6..b61685f1a2 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -346,6 +346,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ }
+
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
++ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ Error **errp)
+ {
+@@ -360,7 +361,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ return NULL;
+ }
+
+- copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
++ copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
+ errp);
+ if (!copy_bitmap) {
+ return NULL;
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 0a219c2b75..d3b95bd600 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -470,7 +470,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+ bs->file->bs->supported_zero_flags);
+
+- s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
++ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+ if (!s->bcs) {
+ error_prepend(errp, "Cannot create block-copy-state: ");
+ ret = -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 0700953ab8..8b41643bfa 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
+ typedef struct BlockCopyCallState BlockCopyCallState;
+
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
++ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ Error **errp);
+
+diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
+index aa76131ca9..c33dd7f3a9 100644
+--- a/tests/qemu-iotests/257.out
++++ b/tests/qemu-iotests/257.out
+@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:25 +0200
+Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
+
+Add a parameter that enables discard-after-copy. That is mostly useful
+in "push backup with fleecing" scheme, when source is snapshot-access
+format driver node, based on copy-before-write filter snapshot-access
+API:
+
+[guest] [snapshot-access] ~~ blockdev-backup ~~> [backup target]
+ | |
+ | root | file
+ v v
+[copy-before-write]
+ | |
+ | file | target
+ v v
+[active disk] [temp.img]
+
+In this case discard-after-copy does two things:
+
+ - discard data in temp.img to save disk space
+ - avoid further copy-before-write operation in discarded area
+
+Note that we have to declare WRITE permission on source in
+copy-before-write filter, for discard to work. Still we can't take it
+unconditionally, as it will break normal backup from RO source. So, we
+have to add a parameter and pass it thorough bdrv_open flags.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c | 5 +++--
+ block/block-copy.c | 9 +++++++++
+ block/copy-before-write.c | 15 +++++++++++++--
+ block/copy-before-write.h | 1 +
+ block/replication.c | 4 ++--
+ blockdev.c | 2 +-
+ include/block/block-common.h | 2 ++
+ include/block/block-copy.h | 1 +
+ include/block/block_int-global-state.h | 2 +-
+ qapi/block-core.json | 4 ++++
+ 10 files changed, 37 insertions(+), 8 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index af87fa6aa9..3dc955f625 100644
+--- a/block/backup.c
++++ b/block/backup.c
+@@ -332,7 +332,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+ BitmapSyncMode bitmap_mode,
+- bool compress,
++ bool compress, bool discard_source,
+ const char *filter_node_name,
+ BackupPerf *perf,
+ BlockdevOnError on_source_error,
+@@ -429,7 +429,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ goto error;
+ }
+
+- cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
++ cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
++ &bcs, errp);
+ if (!cbw) {
+ goto error;
+ }
+diff --git a/block/block-copy.c b/block/block-copy.c
+index b61685f1a2..3c61e52bae 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
+ CoMutex lock;
+ int64_t in_flight_bytes;
+ BlockCopyMethod method;
++ bool discard_source;
+ BlockReqList reqs;
+ QLIST_HEAD(, BlockCopyCallState) calls;
+ /*
+@@ -348,6 +349,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
++ bool discard_source,
+ Error **errp)
+ {
+ ERRP_GUARD();
+@@ -409,6 +411,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ cluster_size),
+ };
+
++ s->discard_source = discard_source;
+ block_copy_set_copy_opts(s, false, false);
+
+ ratelimit_init(&s->rate_limit);
+@@ -580,6 +583,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+ co_put_to_shres(s->mem, t->req.bytes);
+ block_copy_task_end(t, ret);
+
++ if (s->discard_source && ret == 0) {
++ int64_t nbytes =
++ MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
++ bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
++ }
++
+ return ret;
+ }
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index d3b95bd600..3503702d71 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
+ BdrvChild *target;
+ OnCbwError on_cbw_error;
+ uint32_t cbw_timeout_ns;
++ bool discard_source;
+
+ /*
+ * @lock: protects access to @access_bitmap, @done_bitmap and
+@@ -357,6 +358,8 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+ {
++ BDRVCopyBeforeWriteState *s = bs->opaque;
++
+ if (!(role & BDRV_CHILD_FILTERED)) {
+ /*
+ * Target child
+@@ -381,6 +384,10 @@ static void cbw_child_perm(BlockDriverState *bs, BdrvChild *c,
+ * start
+ */
+ *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
++ if (s->discard_source) {
++ *nperm = *nperm | BLK_PERM_WRITE;
++ }
++
+ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+ }
+ }
+@@ -470,7 +477,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+ bs->file->bs->supported_zero_flags);
+
+- s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
++ s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
++ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
++ flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+ if (!s->bcs) {
+ error_prepend(errp, "Cannot create block-copy-state: ");
+ ret = -EINVAL;
+@@ -544,12 +553,14 @@ BlockDriver bdrv_cbw_filter = {
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
++ bool discard_source,
+ BlockCopyState **bcs,
+ Error **errp)
+ {
+ BDRVCopyBeforeWriteState *state;
+ BlockDriverState *top;
+ QDict *opts;
++ int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
+
+ assert(source->total_sectors == target->total_sectors);
+ GLOBAL_STATE_CODE();
+@@ -562,7 +573,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ qdict_put_str(opts, "file", bdrv_get_node_name(source));
+ qdict_put_str(opts, "target", bdrv_get_node_name(target));
+
+- top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
++ top = bdrv_insert_node(source, opts, flags, errp);
+ if (!top) {
+ return NULL;
+ }
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 6e72bb25e9..01af0cd3c4 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -39,6 +39,7 @@
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
++ bool discard_source,
+ BlockCopyState **bcs,
+ Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/block/replication.c b/block/replication.c
+index ea4bf1aa80..39ad78cf98 100644
+--- a/block/replication.c
++++ b/block/replication.c
+@@ -579,8 +579,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
+
+ s->backup_job = backup_job_create(
+ NULL, s->secondary_disk->bs, s->hidden_disk->bs,
+- 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
+- &perf,
++ 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
++ NULL, &perf,
+ BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
+ backup_job_completed, bs, NULL, &local_err);
+diff --git a/blockdev.c b/blockdev.c
+index 7793143d76..ce3fef924c 100644
+--- a/blockdev.c
++++ b/blockdev.c
+@@ -2802,7 +2802,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+
+ job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+ backup->sync, bmap, backup->bitmap_mode,
+- backup->compress,
++ backup->compress, backup->discard_source,
+ backup->filter_node_name,
+ &perf,
+ backup->on_source_error,
+diff --git a/include/block/block-common.h b/include/block/block-common.h
+index e15395f2cb..913a8b259c 100644
+--- a/include/block/block-common.h
++++ b/include/block/block-common.h
+@@ -234,6 +234,8 @@ typedef enum {
+ read-write fails */
+ #define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
+
++#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
++
+ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+
+
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 8b41643bfa..bdc703bacd 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
++ bool discard_source,
+ Error **errp);
+
+ /* Function should be called prior any actual copy request */
+diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
+index 32f0f9858a..546f2b5532 100644
+--- a/include/block/block_int-global-state.h
++++ b/include/block/block_int-global-state.h
+@@ -189,7 +189,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ MirrorSyncMode sync_mode,
+ BdrvDirtyBitmap *sync_bitmap,
+ BitmapSyncMode bitmap_mode,
+- bool compress,
++ bool compress, bool discard_source,
+ const char *filter_node_name,
+ BackupPerf *perf,
+ BlockdevOnError on_source_error,
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 09de550c95..4297e5beda 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -1816,6 +1816,9 @@
+ # node specified by @drive. If this option is not given, a node
+ # name is autogenerated. (Since: 4.2)
+ #
++# @discard-source: Discard blocks on source which are already copied
++# to the target. (Since 9.0)
++#
+ # @x-perf: Performance options. (Since 6.0)
+ #
+ # Features:
+@@ -1837,6 +1840,7 @@
+ '*on-target-error': 'BlockdevOnError',
+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
+ '*filter-node-name': 'str',
++ '*discard-source': 'bool',
+ '*x-perf': { 'type': 'BackupPerf',
+ 'features': [ 'unstable' ] } } }
+
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:26 +0200
+Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Copy-before-write operations will use at least this granularity and in
+particular, discard requests to the source node will too. If the
+granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+The QAPI uses uint32 so the value will be non-negative, but still fit
+into a uint64_t.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c | 17 +++++++++++++----
+ block/copy-before-write.c | 3 ++-
+ include/block/block-copy.h | 1 +
+ qapi/block-core.json | 8 +++++++-
+ 4 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 3c61e52bae..c9a722a5a6 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+ }
+
+ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
++ int64_t min_cluster_size,
+ Error **errp)
+ {
+ int ret;
+@@ -330,7 +331,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ "used. If the actual block size of the target exceeds "
+ "this default, the backup may be unusable",
+ BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
++ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+ } else if (ret < 0 && !target_does_cow) {
+ error_setg_errno(errp, -ret,
+ "Couldn't determine the cluster size of the target image, "
+@@ -340,16 +341,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ return ret;
+ } else if (ret < 0 && target_does_cow) {
+ /* Not fatal; just trudge on ahead. */
+- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
++ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+ }
+
+- return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
++ return MAX(min_cluster_size,
++ MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
+ }
+
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ bool discard_source,
++ int64_t min_cluster_size,
+ Error **errp)
+ {
+ ERRP_GUARD();
+@@ -358,7 +361,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BdrvDirtyBitmap *copy_bitmap;
+ bool is_fleecing;
+
+- cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
++ if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
++ error_setg(errp, "min-cluster-size needs to be a power of 2");
++ return NULL;
++ }
++
++ cluster_size = block_copy_calculate_cluster_size(target->bs,
++ min_cluster_size, errp);
+ if (cluster_size < 0) {
+ return NULL;
+ }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 3503702d71..4a8c5bdb62 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -479,7 +479,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+
+ s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+- flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
++ flags & BDRV_O_CBW_DISCARD_SOURCE,
++ opts->min_cluster_size, errp);
+ if (!s->bcs) {
+ error_prepend(errp, "Cannot create block-copy-state: ");
+ ret = -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index bdc703bacd..77857c6c68 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ bool discard_source,
++ int64_t min_cluster_size,
+ Error **errp);
+
+ /* Function should be called prior any actual copy request */
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 4297e5beda..33e7e3c090 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -4825,12 +4825,18 @@
+ # @on-cbw-error parameter will decide how this failure is handled.
+ # Default 0. (Since 7.1)
+ #
++# @min-cluster-size: Minimum size of blocks used by copy-before-write
++# operations. Has to be a power of 2. No effect if smaller than
++# the maximum of the target's cluster size and 64 KiB. Default 0.
++# (Since 8.1)
++#
+ # Since: 6.2
+ ##
+ { 'struct': 'BlockdevOptionsCbw',
+ 'base': 'BlockdevOptionsGenericFormat',
+ 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
+- '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
++ '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
++ '*min-cluster-size': 'uint32' } }
+
+ ##
+ # @BlockdevOptions:
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:27 +0200
+Subject: [PATCH] backup: add minimum cluster size to performance options
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Backup/block-copy will use at least this granularity for copy operations
+and in particular, discard requests to the backup source will too. If
+the granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c | 2 +-
+ block/copy-before-write.c | 2 ++
+ block/copy-before-write.h | 1 +
+ blockdev.c | 3 +++
+ qapi/block-core.json | 9 +++++++--
+ 5 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index 3dc955f625..ac5bd81338 100644
+--- a/block/backup.c
++++ b/block/backup.c
+@@ -430,7 +430,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ }
+
+ cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+- &bcs, errp);
++ perf->min_cluster_size, &bcs, errp);
+ if (!cbw) {
+ goto error;
+ }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 4a8c5bdb62..9ca5ec5e5c 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -555,6 +555,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
+ bool discard_source,
++ int64_t min_cluster_size,
+ BlockCopyState **bcs,
+ Error **errp)
+ {
+@@ -573,6 +574,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ }
+ qdict_put_str(opts, "file", bdrv_get_node_name(source));
+ qdict_put_str(opts, "target", bdrv_get_node_name(target));
++ qdict_put_int(opts, "min-cluster-size", min_cluster_size);
+
+ top = bdrv_insert_node(source, opts, flags, errp);
+ if (!top) {
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 01af0cd3c4..dc6cafe7fa 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
+ bool discard_source,
++ int64_t min_cluster_size,
+ BlockCopyState **bcs,
+ Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/blockdev.c b/blockdev.c
+index ce3fef924c..5ae1dde73c 100644
+--- a/blockdev.c
++++ b/blockdev.c
+@@ -2729,6 +2729,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+ if (backup->x_perf->has_max_chunk) {
+ perf.max_chunk = backup->x_perf->max_chunk;
+ }
++ if (backup->x_perf->has_min_cluster_size) {
++ perf.min_cluster_size = backup->x_perf->min_cluster_size;
++ }
+ }
+
+ if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 33e7e3c090..58fd637e86 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -1757,11 +1757,16 @@
+ # it should not be less than job cluster size which is calculated
+ # as maximum of target image cluster size and 64k. Default 0.
+ #
++# @min-cluster-size: Minimum size of blocks used by copy-before-write
++# and background copy operations. Has to be a power of 2. No
++# effect if smaller than the maximum of the target's cluster size
++# and 64 KiB. Default 0. (Since 8.1)
++#
+ # Since: 6.0
+ ##
+ { 'struct': 'BackupPerf',
+- 'data': { '*use-copy-range': 'bool',
+- '*max-workers': 'int', '*max-chunk': 'int64' } }
++ 'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
++ '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
+
+ ##
+ # @BackupCommon:
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:28 +0200
+Subject: [PATCH] PVE backup: add fleecing option
+
+When a fleecing option is given, it is expected that each device has
+a corresponding "-fleecing" block device already attached, except for
+EFI disk and TPM state, where fleecing is never used.
+
+The following graph was adapted from [0] which also contains more
+details about fleecing.
+
+[guest]
+ |
+ | root
+ v file
+[copy-before-write]<------[snapshot-access]
+ | |
+ | file | target
+ v v
+[source] [fleecing]
+
+For fleecing, a copy-before-write filter is inserted on top of the
+source node, as well as a snapshot-access node pointing to the filter
+node which allows to read the consistent state of the image at the
+time it was inserted. New guest writes are passed through the
+copy-before-write filter which will first copy over old data to the
+fleecing image in case that old data is still needed by the
+snapshot-access node.
+
+The backup process will sequentially read from the snapshot access,
+which has a bitmap and knows whether to read from the original image
+or the fleecing image to get the "snapshot" state, i.e. data from the
+source image at the time when the copy-before-write filter was
+inserted. After reading, the copied sections are discarded from the
+fleecing image to reduce space usage.
+
+All of this can be restricted by an initial dirty bitmap to parts of
+the source image that are required for an incremental backup.
+
+For discard to work, it is necessary that the fleecing image does not
+have a larger cluster size than the backup job granularity. Since
+querying that size does not always work, e.g. for RBD with krbd, the
+cluster size will not be reported, a minimum of 4 MiB is used. A job
+with PBS target already has at least this granularity, so it's just
+relevant for other targets. I.e. edge cases where this minimum is not
+enough should be very rare in practice. If ever necessary in the
+future, can still add a passed-in value for the backup QMP command to
+override.
+
+Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
+are set when installing the copy-before-write filter and
+snapshot-access. When an error or timeout occurs, the problematic (and
+each further) snapshot operation will fail and thus cancel the backup
+instead of breaking the guest write.
+
+Note that job_id cannot be inferred from the snapshot-access bs because
+it has no parent, so just pass the one from the original bs.
+
+[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/monitor/block-hmp-cmds.c | 1 +
+ pve-backup.c | 143 ++++++++++++++++++++++++++++++++-
+ qapi/block-core.json | 8 +-
+ 3 files changed, 148 insertions(+), 4 deletions(-)
+
+diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
+index 6efe28cef5..ca29cc4281 100644
+--- a/block/monitor/block-hmp-cmds.c
++++ b/block/monitor/block-hmp-cmds.c
+@@ -1064,6 +1064,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
+ NULL, NULL,
+ devlist, qdict_haskey(qdict, "speed"), speed,
+ false, 0, // BackupPerf max-workers
++ false, false, // fleecing
+ &error);
+
+ hmp_handle_error(mon, error);
+diff --git a/pve-backup.c b/pve-backup.c
+index e6b17b797e..00aaff6509 100644
+--- a/pve-backup.c
++++ b/pve-backup.c
+@@ -7,8 +7,10 @@
+ #include "sysemu/blockdev.h"
+ #include "block/block_int-global-state.h"
+ #include "block/blockjob.h"
++#include "block/copy-before-write.h"
+ #include "block/dirty-bitmap.h"
+ #include "qapi/qapi-commands-block.h"
++#include "qapi/qmp/qdict.h"
+ #include "qapi/qmp/qerror.h"
+ #include "qemu/cutils.h"
+
+@@ -80,8 +82,15 @@ static void pvebackup_init(void)
+ // initialize PVEBackupState at startup
+ opts_init(pvebackup_init);
+
++typedef struct PVEBackupFleecingInfo {
++ BlockDriverState *bs;
++ BlockDriverState *cbw;
++ BlockDriverState *snapshot_access;
++} PVEBackupFleecingInfo;
++
+ typedef struct PVEBackupDevInfo {
+ BlockDriverState *bs;
++ PVEBackupFleecingInfo fleecing;
+ size_t size;
+ uint64_t block_size;
+ uint8_t dev_id;
+@@ -361,6 +370,25 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+ PVEBackupDevInfo *di = opaque;
+ di->completed_ret = ret;
+
++ /*
++ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
++ * won't be done as a coroutine anyways:
++ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
++ * just spawn a BH calling bdrv_unref().
++ * - For cbw, draining would need to spawn a BH.
++ *
++ * Note that the AioContext lock is already acquired by our caller, i.e.
++ * job_finalize_single_locked()
++ */
++ if (di->fleecing.snapshot_access) {
++ bdrv_unref(di->fleecing.snapshot_access);
++ di->fleecing.snapshot_access = NULL;
++ }
++ if (di->fleecing.cbw) {
++ bdrv_cbw_drop(di->fleecing.cbw);
++ di->fleecing.cbw = NULL;
++ }
++
+ /*
+ * Schedule stream cleanup in async coroutine. close_image and finish might
+ * take a while, so we can't block on them here. This way it also doesn't
+@@ -521,9 +549,82 @@ static void create_backup_jobs_bh(void *opaque) {
+
+ bdrv_drained_begin(di->bs);
+
++ BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
++
++ BlockDriverState *source_bs = di->bs;
++ bool discard_source = false;
++ const char *job_id = bdrv_get_device_name(di->bs);
++ if (di->fleecing.bs) {
++ QDict *cbw_opts = qdict_new();
++ qdict_put_str(cbw_opts, "driver", "copy-before-write");
++ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
++
++ if (di->bitmap) {
++ /*
++ * Only guest writes to parts relevant for the backup need to be intercepted with
++ * old data being copied to the fleecing image.
++ */
++ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
++ }
++ /*
++ * Fleecing storage is supposed to be fast and it's better to break backup than guest
++ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
++ * abort a bit before that.
++ */
++ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
++ qdict_put_int(cbw_opts, "cbw-timeout", 45);
++
++ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
++
++ if (!di->fleecing.cbw) {
++ error_setg(errp, "appending cbw node for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ break;
++ }
++
++ QDict *snapshot_access_opts = qdict_new();
++ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
++ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
++
++ /*
++ * Holding the AioContext lock here would cause a deadlock, because bdrv_open_driver()
++ * will aquire it a second time. But it's allowed to be held exactly once when polling
++ * and that happens when the bdrv_refresh_total_sectors() call is made there.
++ */
++ aio_context_release(aio_context);
++ di->fleecing.snapshot_access =
++ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
++ aio_context_acquire(aio_context);
++ if (!di->fleecing.snapshot_access) {
++ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ break;
++ }
++ source_bs = di->fleecing.snapshot_access;
++ discard_source = true;
++
++ /*
++ * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
++ * on the fleecing image won't work if the backup job's granularity is less than the RBD
++ * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
++ * target, the backup job granularity would already be at least this much.
++ */
++ perf.min_cluster_size = 4 * 1024 * 1024;
++ /*
++ * For discard to work, cluster size for the backup job must be at least the same as for
++ * the fleecing image.
++ */
++ BlockDriverInfo bdi;
++ if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
++ perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
++ }
++ }
++
+ BlockJob *job = backup_job_create(
+- NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+- bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
++ job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
++ bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
+ &local_err);
+
+@@ -581,6 +682,14 @@ static void create_backup_jobs_bh(void *opaque) {
+ aio_co_enter(data->ctx, data->co);
+ }
+
++/*
++ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
++ */
++static bool device_uses_fleecing(const char *device_id)
++{
++ return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
++}
++
+ /*
+ * Returns a list of device infos, which needs to be freed by the caller. In
+ * case of an error, errp will be set, but the returned value might still be a
+@@ -588,6 +697,7 @@ static void create_backup_jobs_bh(void *opaque) {
+ */
+ static GList coroutine_fn *get_device_info(
+ const char *devlist,
++ bool fleecing,
+ Error **errp)
+ {
+ gchar **devs = NULL;
+@@ -611,6 +721,31 @@ static GList coroutine_fn *get_device_info(
+ }
+ PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+ di->bs = bs;
++
++ if (fleecing && device_uses_fleecing(*d)) {
++ g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
++ BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
++ if (!fleecing_blk) {
++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
++ "Device '%s' not found", fleecing_devid);
++ goto err;
++ }
++ BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
++ if (!bdrv_co_is_inserted(fleecing_bs)) {
++ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, fleecing_devid);
++ goto err;
++ }
++ /*
++ * Fleecing image needs to be the same size to act as a cbw target.
++ */
++ if (bs->total_sectors != fleecing_bs->total_sectors) {
++ error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
++ fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
++ goto err;
++ }
++ di->fleecing.bs = fleecing_bs;
++ }
++
+ di_list = g_list_append(di_list, di);
+ d++;
+ }
+@@ -660,6 +795,7 @@ UuidInfo coroutine_fn *qmp_backup(
+ const char *devlist,
+ bool has_speed, int64_t speed,
+ bool has_max_workers, int64_t max_workers,
++ bool has_fleecing, bool fleecing,
+ Error **errp)
+ {
+ assert(qemu_in_coroutine());
+@@ -687,7 +823,7 @@ UuidInfo coroutine_fn *qmp_backup(
+ /* Todo: try to auto-detect format based on file name */
+ format = has_format ? format : BACKUP_FORMAT_VMA;
+
+- di_list = get_device_info(devlist, &local_err);
++ di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto err;
+@@ -1086,5 +1222,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+ ret->query_bitmap_info = true;
+ ret->pbs_masterkey = true;
+ ret->backup_max_workers = true;
++ ret->backup_fleecing = true;
+ return ret;
+ }
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 58fd637e86..0bc5f42677 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -933,6 +933,10 @@
+ #
+ # @max-workers: see @BackupPerf for details. Default 16.
+ #
++# @fleecing: perform a backup with fleecing. For each device in @devlist, a
++# corresponing '-fleecing' device with the same size already needs to
++# be present.
++#
+ # Returns: the uuid of the backup job
+ #
+ ##
+@@ -953,7 +957,8 @@
+ '*firewall-file': 'str',
+ '*devlist': 'str',
+ '*speed': 'int',
+- '*max-workers': 'int' },
++ '*max-workers': 'int',
++ '*fleecing': 'bool' },
+ 'returns': 'UuidInfo', 'coroutine': true }
+
+ ##
+@@ -1009,6 +1014,7 @@
+ 'pbs-dirty-bitmap-migration': 'bool',
+ 'pbs-masterkey': 'bool',
+ 'pbs-library-version': 'str',
++ 'backup-fleecing': 'bool',
+ 'backup-max-workers': 'bool' } }
+
+ ##
pve/0043-alloc-track-fix-deadlock-during-drop.patch
pve/0044-migration-for-snapshots-hold-the-BQL-during-setup-ca.patch
pve/0045-savevm-async-don-t-hold-BQL-during-setup.patch
+pve/0046-block-copy-before-write-fix-permission.patch
+pve/0047-block-copy-before-write-support-unligned-snapshot-di.patch
+pve/0048-block-copy-before-write-create-block_copy-bitmap-in-.patch
+pve/0049-qapi-blockdev-backup-add-discard-source-parameter.patch
+pve/0050-copy-before-write-allow-specifying-minimum-cluster-s.patch
+pve/0051-backup-add-minimum-cluster-size-to-performance-optio.patch
+pve/0052-PVE-backup-add-fleecing-option.patch