errp);
if (!job) {
diff --git a/blockdev.c b/blockdev.c
-index 057601dcf0..8682814a7a 100644
+index 4c33c3f5f0..f3e508a6a7 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2776,6 +2776,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
has_granularity, granularity,
has_buf_size, buf_size,
diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
-index d2201e27f4..cc1387ae02 100644
+index eb2d92a226..f0c642b194 100644
--- a/include/block/block_int-global-state.h
+++ b/include/block/block_int-global-state.h
@@ -158,7 +158,9 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 4b18e01b85..0902b0a024 100644
+index b179d65520..905da8be72 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
-@@ -2170,6 +2170,15 @@
+@@ -2174,6 +2174,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
-@@ -2212,7 +2221,9 @@
+@@ -2216,7 +2225,9 @@
{ 'struct': 'DriveMirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*format': 'str', '*node-name': 'str', '*replaces': 'str',
'*speed': 'int', '*granularity': 'uint32',
'*buf-size': 'int', '*on-source-error': 'BlockdevOnError',
'*on-target-error': 'BlockdevOnError',
-@@ -2492,6 +2503,15 @@
+@@ -2496,6 +2507,15 @@
# destination (all the disk, only the sectors allocated in the
# topmost image, or only new I/O).
#
# @granularity: granularity of the dirty bitmap, default is 64K if the
# image format doesn't have clusters, 4K if the clusters are
# smaller than that, else the cluster size. Must be a power of 2
-@@ -2540,7 +2560,8 @@
+@@ -2544,7 +2564,8 @@
{ 'command': 'blockdev-mirror',
'data': { '*job-id': 'str', 'device': 'str', 'target': 'str',
'*replaces': 'str',
1 file changed, 3 insertions(+)
diff --git a/blockdev.c b/blockdev.c
-index 8682814a7a..5b75a085ee 100644
+index f3e508a6a7..37b8437f3e 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2873,6 +2873,9 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
if (bitmap_mode != BITMAP_SYNC_MODE_NEVER) {
diff --git a/blockdev.c b/blockdev.c
-index 5b75a085ee..d27d8c38ec 100644
+index 37b8437f3e..ed8198f351 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -2852,7 +2852,36 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:22 +0200
+Subject: [PATCH] block/copy-before-write: fix permission
+
+In case when source node does not have any parents, the condition still
+works as required: backup job do create the parent by
+
+ block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
+
+Still, in this case checking @perm variable doesn't work, as backup job
+creates the root blk with empty permissions (as it rely on CBW filter
+to require correct permissions and don't want to create extra
+conflicts).
+
+So, we should not check @perm.
+
+The hack may be dropped entirely when transactional insertion of
+filter (when we don't try to recalculate permissions in intermediate
+state, when filter does conflict with original parent of the source
+node) merged (old big series
+"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
+current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
+
+[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
+[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 026fa9840f..5a9456d426 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+ perm, shared, nperm, nshared);
+
+ if (!QLIST_EMPTY(&bs->parents)) {
+- if (perm & BLK_PERM_WRITE) {
+- *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+- }
++ /*
++ * Note, that source child may be shared with backup job. Backup job
++ * does create own blk parent on copy-before-write node, so this
++ * works even if source node does not have any parents before backup
++ * start
++ */
++ *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
+ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+ }
+ }
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:23 +0200
+Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
+
+First thing that crashes on unligned access here is
+bdrv_reset_dirty_bitmap(). Correct way is to align-down the
+snapshot-discard request.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/copy-before-write.c | 16 +++++++++++++---
+ 1 file changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 5a9456d426..c0e70669a2 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
+ cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
+ {
+ BDRVCopyBeforeWriteState *s = bs->opaque;
++ uint32_t cluster_size = block_copy_cluster_size(s->bcs);
++ int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
++ int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
++ int64_t aligned_bytes;
++
++ if (aligned_end <= aligned_offset) {
++ return 0;
++ }
++ aligned_bytes = aligned_end - aligned_offset;
+
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
+- bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
++ bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
++ aligned_bytes);
+ }
+
+- block_copy_reset(s->bcs, offset, bytes);
++ block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
+
+- return bdrv_co_pdiscard(s->target, offset, bytes);
++ return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
+ }
+
+ static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:24 +0200
+Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
+ node
+
+Currently block_copy creates copy_bitmap in source node. But that is in
+bad relation with .independent_close=true of copy-before-write filter:
+source node may be detached and removed before .bdrv_close() handler
+called, which should call block_copy_state_free(), which in turn should
+remove copy_bitmap.
+
+That's all not ideal: it would be better if internal bitmap of
+block-copy object is not attached to any node. But that is not possible
+now.
+
+The simplest solution is just create copy_bitmap in filter node, where
+anyway two other bitmaps are created.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c | 3 +-
+ block/copy-before-write.c | 2 +-
+ include/block/block-copy.h | 1 +
+ tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
+ 4 files changed, 60 insertions(+), 58 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 9ee3dd7ef5..8fca2c3698 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -351,6 +351,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ }
+
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
++ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ Error **errp)
+ {
+@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ return NULL;
+ }
+
+- copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
++ copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
+ errp);
+ if (!copy_bitmap) {
+ return NULL;
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index c0e70669a2..94db31512d 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+ bs->file->bs->supported_zero_flags);
+
+- s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
++ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
+ if (!s->bcs) {
+ error_prepend(errp, "Cannot create block-copy-state: ");
+ return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 0700953ab8..8b41643bfa 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
+ typedef struct BlockCopyCallState BlockCopyCallState;
+
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
++ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ Error **errp);
+
+diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
+index aa76131ca9..c33dd7f3a9 100644
+--- a/tests/qemu-iotests/257.out
++++ b/tests/qemu-iotests/257.out
+@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
+@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- }
+- ],
+- "drive0": [
++ },
+ {
+ "busy": false,
+ "count": 0,
+ "granularity": 65536,
+ "persistent": false,
+ "recording": false
+- },
++ }
++ ],
++ "drive0": [
+ {
+ "busy": false,
+ "count": 458752,
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Date: Thu, 11 Apr 2024 11:29:25 +0200
+Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
+
+Add a parameter that enables discard-after-copy. That is mostly useful
+in "push backup with fleecing" scheme, when source is snapshot-access
+format driver node, based on copy-before-write filter snapshot-access
+API:
+
+[guest] [snapshot-access] ~~ blockdev-backup ~~> [backup target]
+ | |
+ | root | file
+ v v
+[copy-before-write]
+ | |
+ | file | target
+ v v
+[active disk] [temp.img]
+
+In this case discard-after-copy does two things:
+
+ - discard data in temp.img to save disk space
+ - avoid further copy-before-write operation in discarded area
+
+Note that we have to declare WRITE permission on source in
+copy-before-write filter, for discard to work. Still we can't take it
+unconditionally, as it will break normal backup from RO source. So, we
+have to add a parameter and pass it thorough bdrv_open flags.
+
+Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c | 5 +++--
+ block/block-copy.c | 9 +++++++++
+ block/copy-before-write.c | 15 +++++++++++++--
+ block/copy-before-write.h | 1 +
+ block/replication.c | 4 ++--
+ blockdev.c | 2 +-
+ include/block/block-common.h | 2 ++
+ include/block/block-copy.h | 1 +
+ include/block/block_int-global-state.h | 2 +-
+ qapi/block-core.json | 4 ++++
+ 10 files changed, 37 insertions(+), 8 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index ec29d6b810..3dd2e229d2 100644
+--- a/block/backup.c
++++ b/block/backup.c
+@@ -356,7 +356,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
+ BitmapSyncMode bitmap_mode,
+- bool compress,
++ bool compress, bool discard_source,
+ const char *filter_node_name,
+ BackupPerf *perf,
+ BlockdevOnError on_source_error,
+@@ -457,7 +457,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ goto error;
+ }
+
+- cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
++ cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
++ &bcs, errp);
+ if (!cbw) {
+ goto error;
+ }
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 8fca2c3698..7e3b378528 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
+ CoMutex lock;
+ int64_t in_flight_bytes;
+ BlockCopyMethod method;
++ bool discard_source;
+ BlockReqList reqs;
+ QLIST_HEAD(, BlockCopyCallState) calls;
+ /*
+@@ -353,6 +354,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
++ bool discard_source,
+ Error **errp)
+ {
+ ERRP_GUARD();
+@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ cluster_size),
+ };
+
++ s->discard_source = discard_source;
+ block_copy_set_copy_opts(s, false, false);
+
+ ratelimit_init(&s->rate_limit);
+@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+ co_put_to_shres(s->mem, t->req.bytes);
+ block_copy_task_end(t, ret);
+
++ if (s->discard_source && ret == 0) {
++ int64_t nbytes =
++ MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
++ bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
++ }
++
+ return ret;
+ }
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 94db31512d..853e01a1eb 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
+ BdrvChild *target;
+ OnCbwError on_cbw_error;
+ uint64_t cbw_timeout_ns;
++ bool discard_source;
+
+ /*
+ * @lock: protects access to @access_bitmap, @done_bitmap and
+@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+ uint64_t perm, uint64_t shared,
+ uint64_t *nperm, uint64_t *nshared)
+ {
++ BDRVCopyBeforeWriteState *s = bs->opaque;
++
+ if (!(role & BDRV_CHILD_FILTERED)) {
+ /*
+ * Target child
+@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
+ * start
+ */
+ *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
++ if (s->discard_source) {
++ *nperm = *nperm | BLK_PERM_WRITE;
++ }
++
+ *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
+ }
+ }
+@@ -468,7 +475,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
+ bs->file->bs->supported_zero_flags);
+
+- s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
++ s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
++ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
++ flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
+ if (!s->bcs) {
+ error_prepend(errp, "Cannot create block-copy-state: ");
+ return -EINVAL;
+@@ -535,12 +544,14 @@ static BlockDriver bdrv_cbw_filter = {
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
++ bool discard_source,
+ BlockCopyState **bcs,
+ Error **errp)
+ {
+ BDRVCopyBeforeWriteState *state;
+ BlockDriverState *top;
+ QDict *opts;
++ int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
+
+ assert(source->total_sectors == target->total_sectors);
+ GLOBAL_STATE_CODE();
+@@ -553,7 +564,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ qdict_put_str(opts, "file", bdrv_get_node_name(source));
+ qdict_put_str(opts, "target", bdrv_get_node_name(target));
+
+- top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
++ top = bdrv_insert_node(source, opts, flags, errp);
+ if (!top) {
+ return NULL;
+ }
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 6e72bb25e9..01af0cd3c4 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -39,6 +39,7 @@
+ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
++ bool discard_source,
+ BlockCopyState **bcs,
+ Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/block/replication.c b/block/replication.c
+index ca6bd0a720..0415a5e8b7 100644
+--- a/block/replication.c
++++ b/block/replication.c
+@@ -582,8 +582,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
+
+ s->backup_job = backup_job_create(
+ NULL, s->secondary_disk->bs, s->hidden_disk->bs,
+- 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
+- &perf,
++ 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
++ NULL, &perf,
+ BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
+ backup_job_completed, bs, NULL, &local_err);
+diff --git a/blockdev.c b/blockdev.c
+index 057601dcf0..4c33c3f5f0 100644
+--- a/blockdev.c
++++ b/blockdev.c
+@@ -2726,7 +2726,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+
+ job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
+ backup->sync, bmap, backup->bitmap_mode,
+- backup->compress,
++ backup->compress, backup->discard_source,
+ backup->filter_node_name,
+ &perf,
+ backup->on_source_error,
+diff --git a/include/block/block-common.h b/include/block/block-common.h
+index a846023a09..338fe5ff7a 100644
+--- a/include/block/block-common.h
++++ b/include/block/block-common.h
+@@ -243,6 +243,8 @@ typedef enum {
+ read-write fails */
+ #define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
+
++#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
++
+ #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
+
+
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index 8b41643bfa..bdc703bacd 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
++ bool discard_source,
+ Error **errp);
+
+ /* Function should be called prior any actual copy request */
+diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
+index d2201e27f4..eb2d92a226 100644
+--- a/include/block/block_int-global-state.h
++++ b/include/block/block_int-global-state.h
+@@ -193,7 +193,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ MirrorSyncMode sync_mode,
+ BdrvDirtyBitmap *sync_bitmap,
+ BitmapSyncMode bitmap_mode,
+- bool compress,
++ bool compress, bool discard_source,
+ const char *filter_node_name,
+ BackupPerf *perf,
+ BlockdevOnError on_source_error,
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 4b18e01b85..b179d65520 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -1610,6 +1610,9 @@
+ # node specified by @drive. If this option is not given, a node
+ # name is autogenerated. (Since: 4.2)
+ #
++# @discard-source: Discard blocks on source which are already copied
++# to the target. (Since 9.0)
++#
+ # @x-perf: Performance options. (Since 6.0)
+ #
+ # Features:
+@@ -1631,6 +1634,7 @@
+ '*on-target-error': 'BlockdevOnError',
+ '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
+ '*filter-node-name': 'str',
++ '*discard-source': 'bool',
+ '*x-perf': { 'type': 'BackupPerf',
+ 'features': [ 'unstable' ] } } }
+
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Thomas Huth <thuth@redhat.com>
+Date: Tue, 18 Jun 2024 14:19:58 +0200
+Subject: [PATCH] hw/virtio: Fix the de-initialization of vhost-user devices
+
+The unrealize functions of the various vhost-user devices are
+calling the corresponding vhost_*_set_status() functions with a
+status of 0 to shut down the device correctly.
+
+Now these vhost_*_set_status() functions all follow this scheme:
+
+ bool should_start = virtio_device_should_start(vdev, status);
+
+ if (vhost_dev_is_started(&vvc->vhost_dev) == should_start) {
+ return;
+ }
+
+ if (should_start) {
+ /* ... do the initialization stuff ... */
+ } else {
+ /* ... do the cleanup stuff ... */
+ }
+
+The problem here is virtio_device_should_start(vdev, 0) currently
+always returns "true" since it internally only looks at vdev->started
+instead of looking at the "status" parameter. Thus once the device
+got started once, virtio_device_should_start() always returns true
+and thus the vhost_*_set_status() functions return early, without
+ever doing any clean-up when being called with status == 0. This
+causes e.g. problems when trying to hot-plug and hot-unplug a vhost
+user devices multiple times since the de-initialization step is
+completely skipped during the unplug operation.
+
+This bug has been introduced in commit 9f6bcfd99f ("hw/virtio: move
+vm_running check to virtio_device_started") which replaced
+
+ should_start = status & VIRTIO_CONFIG_S_DRIVER_OK;
+
+with
+
+ should_start = virtio_device_started(vdev, status);
+
+which later got replaced by virtio_device_should_start(). This blocked
+the possibility to set should_start to false in case the status flag
+VIRTIO_CONFIG_S_DRIVER_OK was not set.
+
+Fix it by adjusting the virtio_device_should_start() function to
+only consider the status flag instead of vdev->started. Since this
+function is only used in the various vhost_*_set_status() functions
+for exactly the same purpose, it should be fine to fix it in this
+central place there without any risk to change the behavior of other
+code.
+
+Fixes: 9f6bcfd99f ("hw/virtio: move vm_running check to virtio_device_started")
+Buglink: https://issues.redhat.com/browse/RHEL-40708
+Signed-off-by: Thomas Huth <thuth@redhat.com>
+Message-Id: <20240618121958.88673-1-thuth@redhat.com>
+Reviewed-by: Manos Pitsidianakis <manos.pitsidianakis@linaro.org>
+Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+(cherry picked from commit d72479b11797c28893e1e3fc565497a9cae5ca16)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ include/hw/virtio/virtio.h | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h
+index 7d5ffdc145..2eafad17b8 100644
+--- a/include/hw/virtio/virtio.h
++++ b/include/hw/virtio/virtio.h
+@@ -470,9 +470,9 @@ static inline bool virtio_device_started(VirtIODevice *vdev, uint8_t status)
+ * @vdev - the VirtIO device
+ * @status - the devices status bits
+ *
+- * This is similar to virtio_device_started() but also encapsulates a
+- * check on the VM status which would prevent a device starting
+- * anyway.
++ * This is similar to virtio_device_started() but ignores vdev->started
++ * and also encapsulates a check on the VM status which would prevent a
++ * device from starting anyway.
+ */
+ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status)
+ {
+@@ -480,7 +480,7 @@ static inline bool virtio_device_should_start(VirtIODevice *vdev, uint8_t status
+ return false;
+ }
+
+- return virtio_device_started(vdev, status);
++ return status & VIRTIO_CONFIG_S_DRIVER_OK;
+ }
+
+ static inline void virtio_set_started(VirtIODevice *vdev, bool started)
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Daniyal Khan <danikhan632@gmail.com>
+Date: Wed, 17 Jul 2024 16:01:47 +1000
+Subject: [PATCH] target/arm: Use float_status copy in sme_fmopa_s
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We made a copy above because the fp exception flags
+are not propagated back to the FPST register, but
+then failed to use the copy.
+
+Cc: qemu-stable@nongnu.org
+Fixes: 558e956c719 ("target/arm: Implement FMOPA, FMOPS (non-widening)")
+Signed-off-by: Daniyal Khan <danikhan632@gmail.com>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Message-id: 20240717060149.204788-2-richard.henderson@linaro.org
+[rth: Split from a larger patch]
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+(cherry picked from commit 31d93fedf41c24b0badb38cd9317590d1ef74e37)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/sme_helper.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/target/arm/tcg/sme_helper.c b/target/arm/tcg/sme_helper.c
+index e2e0575039..5a6dd76489 100644
+--- a/target/arm/tcg/sme_helper.c
++++ b/target/arm/tcg/sme_helper.c
+@@ -916,7 +916,7 @@ void HELPER(sme_fmopa_s)(void *vza, void *vzn, void *vzm, void *vpn,
+ if (pb & 1) {
+ uint32_t *a = vza_row + H1_4(col);
+ uint32_t *m = vzm + H1_4(col);
+- *a = float32_muladd(n, *m, *a, 0, vst);
++ *a = float32_muladd(n, *m, *a, 0, &fpst);
+ }
+ col += 4;
+ pb >>= 4;
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Richard Henderson <richard.henderson@linaro.org>
+Date: Wed, 17 Jul 2024 16:01:48 +1000
+Subject: [PATCH] target/arm: Use FPST_F16 for SME FMOPA (widening)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This operation has float16 inputs and thus must use
+the FZ16 control not the FZ control.
+
+Cc: qemu-stable@nongnu.org
+Fixes: 3916841ac75 ("target/arm: Implement FMOPA, FMOPS (widening)")
+Reported-by: Daniyal Khan <danikhan632@gmail.com>
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Message-id: 20240717060149.204788-3-richard.henderson@linaro.org
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/2374
+Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
+Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
+Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
+(cherry picked from commit 207d30b5fdb5b45a36f26eefcf52fe2c1714dd4f)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ target/arm/tcg/translate-sme.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/target/arm/tcg/translate-sme.c b/target/arm/tcg/translate-sme.c
+index 46c7fce8b4..185a8a917b 100644
+--- a/target/arm/tcg/translate-sme.c
++++ b/target/arm/tcg/translate-sme.c
+@@ -304,6 +304,7 @@ static bool do_outprod(DisasContext *s, arg_op *a, MemOp esz,
+ }
+
+ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
++ ARMFPStatusFlavour e_fpst,
+ gen_helper_gvec_5_ptr *fn)
+ {
+ int svl = streaming_vec_reg_size(s);
+@@ -319,15 +320,18 @@ static bool do_outprod_fpst(DisasContext *s, arg_op *a, MemOp esz,
+ zm = vec_full_reg_ptr(s, a->zm);
+ pn = pred_full_reg_ptr(s, a->pn);
+ pm = pred_full_reg_ptr(s, a->pm);
+- fpst = fpstatus_ptr(FPST_FPCR);
++ fpst = fpstatus_ptr(e_fpst);
+
+ fn(za, zn, zm, pn, pm, fpst, tcg_constant_i32(desc));
+ return true;
+ }
+
+-TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_h)
+-TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a, MO_32, gen_helper_sme_fmopa_s)
+-TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a, MO_64, gen_helper_sme_fmopa_d)
++TRANS_FEAT(FMOPA_h, aa64_sme, do_outprod_fpst, a,
++ MO_32, FPST_FPCR_F16, gen_helper_sme_fmopa_h)
++TRANS_FEAT(FMOPA_s, aa64_sme, do_outprod_fpst, a,
++ MO_32, FPST_FPCR, gen_helper_sme_fmopa_s)
++TRANS_FEAT(FMOPA_d, aa64_sme_f64f64, do_outprod_fpst, a,
++ MO_64, FPST_FPCR, gen_helper_sme_fmopa_d)
+
+ /* TODO: FEAT_EBF16 */
+ TRANS_FEAT(BFMOPA, aa64_sme, do_outprod, a, MO_32, gen_helper_sme_bfmopa)
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Wed, 10 Jul 2024 17:25:29 +0200
+Subject: [PATCH] scsi: fix regression and honor bootindex again for legacy
+ drives
+
+Commit 3089637461 ("scsi: Don't ignore most usb-storage properties")
+removed the call to object_property_set_int() and thus the 'set'
+method for the bootindex property was also not called anymore. Here
+that method is device_set_bootindex() (as configured by
+scsi_dev_instance_init() -> device_add_bootindex_property()) which as
+a side effect registers the device via add_boot_device_path().
+
+As reported by a downstream user [0], the bootindex property did not
+have the desired effect anymore for legacy drives. Fix the regression
+by explicitly calling the add_boot_device_path() function after
+checking that the bootindex is not yet used (to avoid
+add_boot_device_path() calling exit()).
+
+[0]: https://forum.proxmox.com/threads/149772/post-679433
+
+Cc: qemu-stable@nongnu.org
+Fixes: 3089637461 ("scsi: Don't ignore most usb-storage properties")
+Suggested-by: Kevin Wolf <kwolf@redhat.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Link: https://lore.kernel.org/r/20240710152529.1737407-1-f.ebner@proxmox.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit 57a8a80d1a5b28797b21d30bfc60601945820e51)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/scsi/scsi-bus.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
+index 9e40b0c920..53eff5dd3d 100644
+--- a/hw/scsi/scsi-bus.c
++++ b/hw/scsi/scsi-bus.c
+@@ -384,6 +384,7 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
+ DeviceState *dev;
+ SCSIDevice *s;
+ DriveInfo *dinfo;
++ Error *local_err = NULL;
+
+ if (blk_is_sg(blk)) {
+ driver = "scsi-generic";
+@@ -403,6 +404,14 @@ SCSIDevice *scsi_bus_legacy_add_drive(SCSIBus *bus, BlockBackend *blk,
+ s = SCSI_DEVICE(dev);
+ s->conf = *conf;
+
++ check_boot_index(conf->bootindex, &local_err);
++ if (local_err) {
++ object_unparent(OBJECT(dev));
++ error_propagate(errp, local_err);
++ return NULL;
++ }
++ add_boot_device_path(conf->bootindex, dev, NULL);
++
+ qdev_prop_set_uint32(dev, "scsi-id", unit);
+ if (object_property_find(OBJECT(dev), "removable")) {
+ qdev_prop_set_bit(dev, "removable", removable);
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 15 Jul 2024 15:14:03 +0200
+Subject: [PATCH] hw/scsi/lsi53c895a: bump instruction limit in scripts
+ processing to fix regression
+
+Commit 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts
+processing") reduced the maximum allowed instruction count by
+a factor of 100 all the way down to 100.
+
+This causes the "Check Point R81.20 Gaia" appliance [0] to fail to
+boot after fully finishing the installation via the appliance's web
+interface (there is already one reboot before that).
+
+With a limit of 150, the appliance still fails to boot, while with a
+limit of 200, it works. Bump to 500 to fix the regression and be on
+the safe side.
+
+Originally reported in the Proxmox community forum[1].
+
+[0]: https://support.checkpoint.com/results/download/124397
+[1]: https://forum.proxmox.com/threads/149772/post-683459
+
+Cc: qemu-stable@nongnu.org
+Fixes: 9876359990 ("hw/scsi/lsi53c895a: add timer to scripts processing")
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Acked-by: Sven Schnelle <svens@stackframe.org>
+Link: https://lore.kernel.org/r/20240715131403.223239-1-f.ebner@proxmox.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit a4975023fb13cf229bd59c9ceec1b8cbdc5b9a20)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ hw/scsi/lsi53c895a.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
+index eb9828dd5e..f1935e5328 100644
+--- a/hw/scsi/lsi53c895a.c
++++ b/hw/scsi/lsi53c895a.c
+@@ -188,7 +188,7 @@ static const char *names[] = {
+ #define LSI_TAG_VALID (1 << 16)
+
+ /* Maximum instructions to process. */
+-#define LSI_MAX_INSN 100
++#define LSI_MAX_INSN 500
+
+ typedef struct lsi_request {
+ SCSIRequest *req;
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Kevin Wolf <kwolf@redhat.com>
+Date: Thu, 27 Jun 2024 20:12:44 +0200
+Subject: [PATCH] block-copy: Fix missing graph lock
+
+The graph lock needs to be held when calling bdrv_co_pdiscard(). Fix
+block_copy_task_entry() to take it for the call.
+
+WITH_GRAPH_RDLOCK_GUARD() was implemented in a weak way because of
+limitations in clang's Thread Safety Analysis at the time, so that it
+only asserts that the lock is held (which allows calling functions that
+require the lock), but we never deal with the unlocking (so even after
+the scope of the guard, the compiler assumes that the lock is still
+held). This is why the compiler didn't catch this locking error.
+
+Signed-off-by: Kevin Wolf <kwolf@redhat.com>
+Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
+(picked from https://lore.kernel.org/qemu-devel/20240627181245.281403-2-kwolf@redhat.com/)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ block/block-copy.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index 7e3b378528..cc618e4561 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -595,7 +595,9 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
+ if (s->discard_source && ret == 0) {
+ int64_t nbytes =
+ MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
+- bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
++ WITH_GRAPH_RDLOCK_GUARD() {
++ bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
++ }
+ }
+
+ return ret;
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli@nutanix.com>
+Date: Fri, 12 Jul 2024 09:26:59 +0000
+Subject: [PATCH] Revert "qemu-char: do not operate on sources from finalize
+ callbacks"
+
+This reverts commit 2b316774f60291f57ca9ecb6a9f0712c532cae34.
+
+After 038b4217884c ("Revert "chardev: use a child source for qio input
+source"") we've been observing the "iwp->src == NULL" assertion
+triggering periodically during the initial capabilities querying by
+libvirtd. One of possible backtraces:
+
+Thread 1 (Thread 0x7f16cd4f0700 (LWP 43858)):
+0 __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:50
+1 0x00007f16c6c21e65 in __GI_abort () at abort.c:79
+2 0x00007f16c6c21d39 in __assert_fail_base at assert.c:92
+3 0x00007f16c6c46e86 in __GI___assert_fail (assertion=assertion@entry=0x562e9bcdaadd "iwp->src == NULL", file=file@entry=0x562e9bcdaac8 "../chardev/char-io.c", line=line@entry=99, function=function@entry=0x562e9bcdab10 <__PRETTY_FUNCTION__.20549> "io_watch_poll_finalize") at assert.c:101
+4 0x0000562e9ba20c2c in io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:99
+5 io_watch_poll_finalize (source=<optimized out>) at ../chardev/char-io.c:88
+6 0x00007f16c904aae0 in g_source_unref_internal () from /lib64/libglib-2.0.so.0
+7 0x00007f16c904baf9 in g_source_destroy_internal () from /lib64/libglib-2.0.so.0
+8 0x0000562e9ba20db0 in io_remove_watch_poll (source=0x562e9d6720b0) at ../chardev/char-io.c:147
+9 remove_fd_in_watch (chr=chr@entry=0x562e9d5f3800) at ../chardev/char-io.c:153
+10 0x0000562e9ba23ffb in update_ioc_handlers (s=0x562e9d5f3800) at ../chardev/char-socket.c:592
+11 0x0000562e9ba2072f in qemu_chr_fe_set_handlers_full at ../chardev/char-fe.c:279
+12 0x0000562e9ba207a9 in qemu_chr_fe_set_handlers at ../chardev/char-fe.c:304
+13 0x0000562e9ba2ca75 in monitor_qmp_setup_handlers_bh (opaque=0x562e9d4c2c60) at ../monitor/qmp.c:509
+14 0x0000562e9bb6222e in aio_bh_poll (ctx=ctx@entry=0x562e9d4c2f20) at ../util/async.c:216
+15 0x0000562e9bb4de0a in aio_poll (ctx=0x562e9d4c2f20, blocking=blocking@entry=true) at ../util/aio-posix.c:722
+16 0x0000562e9b99dfaa in iothread_run (opaque=0x562e9d4c26f0) at ../iothread.c:63
+17 0x0000562e9bb505a4 in qemu_thread_start (args=0x562e9d4c7ea0) at ../util/qemu-thread-posix.c:543
+18 0x00007f16c70081ca in start_thread (arg=<optimized out>) at pthread_create.c:479
+19 0x00007f16c6c398d3 in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95
+
+io_remove_watch_poll(), which makes sure that iwp->src is NULL, calls
+g_source_destroy() which finds that iwp->src is not NULL in the finalize
+callback. This can only happen if another thread has managed to trigger
+io_watch_poll_prepare() callback in the meantime.
+
+Move iwp->src destruction back to the finalize callback to prevent the
+described race, and also remove the stale comment. The deadlock glib bug
+was fixed back in 2010 by b35820285668 ("gmain: move finalization of
+GSource outside of context lock").
+
+Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sergey Dyasli <sergey.dyasli@nutanix.com>
+Link: https://lore.kernel.org/r/20240712092659.216206-1-sergey.dyasli@nutanix.com
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+(cherry picked from commit e0bf95443ee9326d44031373420cf9f3513ee255)
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+---
+ chardev/char-io.c | 19 +++++--------------
+ 1 file changed, 5 insertions(+), 14 deletions(-)
+
+diff --git a/chardev/char-io.c b/chardev/char-io.c
+index dab77b112e..3be17b51ca 100644
+--- a/chardev/char-io.c
++++ b/chardev/char-io.c
+@@ -87,16 +87,12 @@ static gboolean io_watch_poll_dispatch(GSource *source, GSourceFunc callback,
+
+ static void io_watch_poll_finalize(GSource *source)
+ {
+- /*
+- * Due to a glib bug, removing the last reference to a source
+- * inside a finalize callback causes recursive locking (and a
+- * deadlock). This is not a problem inside other callbacks,
+- * including dispatch callbacks, so we call io_remove_watch_poll
+- * to remove this source. At this point, iwp->src must
+- * be NULL, or we would leak it.
+- */
+ IOWatchPoll *iwp = io_watch_poll_from_source(source);
+- assert(iwp->src == NULL);
++ if (iwp->src) {
++ g_source_destroy(iwp->src);
++ g_source_unref(iwp->src);
++ iwp->src = NULL;
++ }
+ }
+
+ static GSourceFuncs io_watch_poll_funcs = {
+@@ -139,11 +135,6 @@ static void io_remove_watch_poll(GSource *source)
+ IOWatchPoll *iwp;
+
+ iwp = io_watch_poll_from_source(source);
+- if (iwp->src) {
+- g_source_destroy(iwp->src);
+- g_source_unref(iwp->src);
+- iwp->src = NULL;
+- }
+ g_source_destroy(&iwp->parent);
+ }
+
};
return raw_co_create(&options, errp);
diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 0902b0a024..0653c244cf 100644
+index 905da8be72..3db587a6e4 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
-@@ -4952,6 +4952,10 @@
+@@ -4956,6 +4956,10 @@
# @extent-size-hint: Extent size hint to add to the image file; 0 for
# not adding an extent size hint (default: 1 MB, since 5.1)
#
# Since: 2.12
##
{ 'struct': 'BlockdevCreateOptionsFile',
-@@ -4959,7 +4963,8 @@
+@@ -4963,7 +4967,8 @@
'size': 'size',
'*preallocation': 'PreallocMode',
'*nocow': 'bool',
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/block/backup.c b/block/backup.c
-index ec29d6b810..270957c0cd 100644
+index 3dd2e229d2..eba5b11493 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -237,8 +237,8 @@ static void backup_init_bcs_bitmap(BackupBlockJob *job)
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
int64_t offset = 0;
int64_t count;
-@@ -501,6 +499,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+@@ -502,6 +500,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
&error_abort);
bdrv_graph_wrunlock();
+ return bs;
+}
diff --git a/block/backup.c b/block/backup.c
-index 270957c0cd..16d611c4ca 100644
+index eba5b11493..1963e47ab9 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -29,28 +29,6 @@
static const BlockJobDriver backup_job_driver;
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
-@@ -461,6 +439,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+@@ -462,6 +440,14 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
}
cluster_size = block_copy_cluster_size(bcs);
+ hmp_handle_error(mon, error);
+}
diff --git a/blockdev.c b/blockdev.c
-index d27d8c38ec..5e5dbc1da9 100644
+index ed8198f351..1054a69279 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -37,6 +37,7 @@
+ return ret;
+}
diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 0653c244cf..dbd5d9b993 100644
+index 3db587a6e4..d05fffce1d 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -851,6 +851,239 @@
summary_info += {'libdaxctl support': libdaxctl}
summary_info += {'libudev': libudev}
diff --git a/qapi/block-core.json b/qapi/block-core.json
-index dbd5d9b993..e79775656c 100644
+index d05fffce1d..e7cf3d94f3 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
-@@ -3453,6 +3453,7 @@
+@@ -3457,6 +3457,7 @@
'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
'raw', 'rbd',
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
'ssh', 'throttle', 'vdi', 'vhdx',
{ 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
-@@ -3539,6 +3540,33 @@
+@@ -3543,6 +3544,33 @@
{ 'struct': 'BlockdevOptionsNull',
'data': { '*size': 'int', '*latency-ns': 'uint64', '*read-zeroes': 'bool' } }
##
# @BlockdevOptionsNVMe:
#
-@@ -4973,6 +5001,7 @@
+@@ -4977,6 +5005,7 @@
'nfs': 'BlockdevOptionsNfs',
'null-aio': 'BlockdevOptionsNull',
'null-co': 'BlockdevOptionsNull',
ret->pbs_masterkey = true;
ret->backup_max_workers = true;
diff --git a/qapi/block-core.json b/qapi/block-core.json
-index e79775656c..cb58a664ef 100644
+index e7cf3d94f3..282e2e8a8c 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -1004,6 +1004,11 @@
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:22 +0200
-Subject: [PATCH] block/copy-before-write: fix permission
-
-In case when source node does not have any parents, the condition still
-works as required: backup job do create the parent by
-
- block_job_create -> block_job_add_bdrv -> bdrv_root_attach_child
-
-Still, in this case checking @perm variable doesn't work, as backup job
-creates the root blk with empty permissions (as it rely on CBW filter
-to require correct permissions and don't want to create extra
-conflicts).
-
-So, we should not check @perm.
-
-The hack may be dropped entirely when transactional insertion of
-filter (when we don't try to recalculate permissions in intermediate
-state, when filter does conflict with original parent of the source
-node) merged (old big series
-"[PATCH v5 00/45] Transactional block-graph modifying API"[1] and it's
-current in-flight part is "[PATCH v8 0/7] blockdev-replace"[2])
-
-[1] https://patchew.org/QEMU/20220330212902.590099-1-vsementsov@openvz.org/
-[2] https://patchew.org/QEMU/20231017184444.932733-1-vsementsov@yandex-team.ru/
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/copy-before-write.c | 10 +++++++---
- 1 file changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 026fa9840f..5a9456d426 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -364,9 +364,13 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
- perm, shared, nperm, nshared);
-
- if (!QLIST_EMPTY(&bs->parents)) {
-- if (perm & BLK_PERM_WRITE) {
-- *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-- }
-+ /*
-+ * Note, that source child may be shared with backup job. Backup job
-+ * does create own blk parent on copy-before-write node, so this
-+ * works even if source node does not have any parents before backup
-+ * start
-+ */
-+ *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
- *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
- }
- }
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:26 +0200
+Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Copy-before-write operations will use at least this granularity and in
+particular, discard requests to the source node will too. If the
+granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+The QAPI uses uint32 so the value will be non-negative, but still fit
+into a uint64_t.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/block-copy.c | 17 +++++++++++++----
+ block/copy-before-write.c | 3 ++-
+ include/block/block-copy.h | 1 +
+ qapi/block-core.json | 8 +++++++-
+ 4 files changed, 23 insertions(+), 6 deletions(-)
+
+diff --git a/block/block-copy.c b/block/block-copy.c
+index cc618e4561..12d662e9d4 100644
+--- a/block/block-copy.c
++++ b/block/block-copy.c
+@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
+ }
+
+ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
++ int64_t min_cluster_size,
+ Error **errp)
+ {
+ int ret;
+@@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ "used. If the actual block size of the target exceeds "
+ "this default, the backup may be unusable",
+ BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
++ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+ } else if (ret < 0 && !target_does_cow) {
+ error_setg_errno(errp, -ret,
+ "Couldn't determine the cluster size of the target image, "
+@@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
+ return ret;
+ } else if (ret < 0 && target_does_cow) {
+ /* Not fatal; just trudge on ahead. */
+- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
++ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
+ }
+
+- return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
++ return MAX(min_cluster_size,
++ MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
+ }
+
+ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ bool discard_source,
++ int64_t min_cluster_size,
+ Error **errp)
+ {
+ ERRP_GUARD();
+@@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+
+ GLOBAL_STATE_CODE();
+
+- cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
++ if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
++ error_setg(errp, "min-cluster-size needs to be a power of 2");
++ return NULL;
++ }
++
++ cluster_size = block_copy_calculate_cluster_size(target->bs,
++ min_cluster_size, errp);
+ if (cluster_size < 0) {
+ return NULL;
+ }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 853e01a1eb..47b3cdd09f 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -477,7 +477,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
+
+ s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
+ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
+- flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
++ flags & BDRV_O_CBW_DISCARD_SOURCE,
++ opts->min_cluster_size, errp);
+ if (!s->bcs) {
+ error_prepend(errp, "Cannot create block-copy-state: ");
+ return -EINVAL;
+diff --git a/include/block/block-copy.h b/include/block/block-copy.h
+index bdc703bacd..77857c6c68 100644
+--- a/include/block/block-copy.h
++++ b/include/block/block-copy.h
+@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
+ BlockDriverState *copy_bitmap_bs,
+ const BdrvDirtyBitmap *bitmap,
+ bool discard_source,
++ int64_t min_cluster_size,
+ Error **errp);
+
+ /* Function should be called prior any actual copy request */
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 282e2e8a8c..9caf04cbe9 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -4926,12 +4926,18 @@
+ # @on-cbw-error parameter will decide how this failure is handled.
+ # Default 0. (Since 7.1)
+ #
++# @min-cluster-size: Minimum size of blocks used by copy-before-write
++# operations. Has to be a power of 2. No effect if smaller than
++# the maximum of the target's cluster size and 64 KiB. Default 0.
++# (Since 8.1)
++#
+ # Since: 6.2
+ ##
+ { 'struct': 'BlockdevOptionsCbw',
+ 'base': 'BlockdevOptionsGenericFormat',
+ 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
+- '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
++ '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
++ '*min-cluster-size': 'uint32' } }
+
+ ##
+ # @BlockdevOptions:
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:27 +0200
+Subject: [PATCH] backup: add minimum cluster size to performance options
+
+Useful to make discard-source work in the context of backup fleecing
+when the fleecing image has a larger granularity than the backup
+target.
+
+Backup/block-copy will use at least this granularity for copy operations
+and in particular, discard requests to the backup source will too. If
+the granularity is too small, they will just be aligned down in
+cbw_co_pdiscard_snapshot() and thus effectively ignored.
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/backup.c | 2 +-
+ block/copy-before-write.c | 2 ++
+ block/copy-before-write.h | 1 +
+ blockdev.c | 3 +++
+ qapi/block-core.json | 9 +++++++--
+ 5 files changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/block/backup.c b/block/backup.c
+index 1963e47ab9..fe69723ada 100644
+--- a/block/backup.c
++++ b/block/backup.c
+@@ -434,7 +434,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
+ }
+
+ cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
+- &bcs, errp);
++ perf->min_cluster_size, &bcs, errp);
+ if (!cbw) {
+ goto error;
+ }
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index 47b3cdd09f..bba58326d7 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -546,6 +546,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
+ bool discard_source,
++ int64_t min_cluster_size,
+ BlockCopyState **bcs,
+ Error **errp)
+ {
+@@ -564,6 +565,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ }
+ qdict_put_str(opts, "file", bdrv_get_node_name(source));
+ qdict_put_str(opts, "target", bdrv_get_node_name(target));
++ qdict_put_int(opts, "min-cluster-size", min_cluster_size);
+
+ top = bdrv_insert_node(source, opts, flags, errp);
+ if (!top) {
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index 01af0cd3c4..dc6cafe7fa 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockDriverState *target,
+ const char *filter_node_name,
+ bool discard_source,
++ int64_t min_cluster_size,
+ BlockCopyState **bcs,
+ Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
+diff --git a/blockdev.c b/blockdev.c
+index 1054a69279..cbe224387b 100644
+--- a/blockdev.c
++++ b/blockdev.c
+@@ -2654,6 +2654,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
+ if (backup->x_perf->has_max_chunk) {
+ perf.max_chunk = backup->x_perf->max_chunk;
+ }
++ if (backup->x_perf->has_min_cluster_size) {
++ perf.min_cluster_size = backup->x_perf->min_cluster_size;
++ }
+ }
+
+ if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index 9caf04cbe9..df934647ed 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -1790,11 +1790,16 @@
+ # it should not be less than job cluster size which is calculated
+ # as maximum of target image cluster size and 64k. Default 0.
+ #
++# @min-cluster-size: Minimum size of blocks used by copy-before-write
++# and background copy operations. Has to be a power of 2. No
++# effect if smaller than the maximum of the target's cluster size
++# and 64 KiB. Default 0. (Since 8.1)
++#
+ # Since: 6.0
+ ##
+ { 'struct': 'BackupPerf',
+- 'data': { '*use-copy-range': 'bool',
+- '*max-workers': 'int', '*max-chunk': 'int64' } }
++ 'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
++ '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
+
+ ##
+ # @BackupCommon:
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:23 +0200
-Subject: [PATCH] block/copy-before-write: support unligned snapshot-discard
-
-First thing that crashes on unligned access here is
-bdrv_reset_dirty_bitmap(). Correct way is to align-down the
-snapshot-discard request.
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/copy-before-write.c | 16 +++++++++++++---
- 1 file changed, 13 insertions(+), 3 deletions(-)
-
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 5a9456d426..c0e70669a2 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -325,14 +325,24 @@ static int coroutine_fn GRAPH_RDLOCK
- cbw_co_pdiscard_snapshot(BlockDriverState *bs, int64_t offset, int64_t bytes)
- {
- BDRVCopyBeforeWriteState *s = bs->opaque;
-+ uint32_t cluster_size = block_copy_cluster_size(s->bcs);
-+ int64_t aligned_offset = QEMU_ALIGN_UP(offset, cluster_size);
-+ int64_t aligned_end = QEMU_ALIGN_DOWN(offset + bytes, cluster_size);
-+ int64_t aligned_bytes;
-+
-+ if (aligned_end <= aligned_offset) {
-+ return 0;
-+ }
-+ aligned_bytes = aligned_end - aligned_offset;
-
- WITH_QEMU_LOCK_GUARD(&s->lock) {
-- bdrv_reset_dirty_bitmap(s->access_bitmap, offset, bytes);
-+ bdrv_reset_dirty_bitmap(s->access_bitmap, aligned_offset,
-+ aligned_bytes);
- }
-
-- block_copy_reset(s->bcs, offset, bytes);
-+ block_copy_reset(s->bcs, aligned_offset, aligned_bytes);
-
-- return bdrv_co_pdiscard(s->target, offset, bytes);
-+ return bdrv_co_pdiscard(s->target, aligned_offset, aligned_bytes);
- }
-
- static void GRAPH_RDLOCK cbw_refresh_filename(BlockDriverState *bs)
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Thu, 11 Apr 2024 11:29:28 +0200
+Subject: [PATCH] PVE backup: add fleecing option
+
+When a fleecing option is given, it is expected that each device has
+a corresponding "-fleecing" block device already attached, except for
+EFI disk and TPM state, where fleecing is never used.
+
+The following graph was adapted from [0] which also contains more
+details about fleecing.
+
+[guest]
+ |
+ | root
+ v file
+[copy-before-write]<------[snapshot-access]
+ | |
+ | file | target
+ v v
+[source] [fleecing]
+
+For fleecing, a copy-before-write filter is inserted on top of the
+source node, as well as a snapshot-access node pointing to the filter
+node which allows to read the consistent state of the image at the
+time it was inserted. New guest writes are passed through the
+copy-before-write filter which will first copy over old data to the
+fleecing image in case that old data is still needed by the
+snapshot-access node.
+
+The backup process will sequentially read from the snapshot access,
+which has a bitmap and knows whether to read from the original image
+or the fleecing image to get the "snapshot" state, i.e. data from the
+source image at the time when the copy-before-write filter was
+inserted. After reading, the copied sections are discarded from the
+fleecing image to reduce space usage.
+
+All of this can be restricted by an initial dirty bitmap to parts of
+the source image that are required for an incremental backup.
+
+For discard to work, it is necessary that the fleecing image does not
+have a larger cluster size than the backup job granularity. Since
+querying that size does not always work, e.g. for RBD with krbd, the
+cluster size will not be reported, a minimum of 4 MiB is used. A job
+with PBS target already has at least this granularity, so it's just
+relevant for other targets. I.e. edge cases where this minimum is not
+enough should be very rare in practice. If ever necessary in the
+future, can still add a passed-in value for the backup QMP command to
+override.
+
+Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
+are set when installing the copy-before-write filter and
+snapshot-access. When an error or timeout occurs, the problematic (and
+each further) snapshot operation will fail and thus cancel the backup
+instead of breaking the guest write.
+
+Note that job_id cannot be inferred from the snapshot-access bs because
+it has no parent, so just pass the one from the original bs.
+
+[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
+
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
+---
+ block/monitor/block-hmp-cmds.c | 1 +
+ pve-backup.c | 135 ++++++++++++++++++++++++++++++++-
+ qapi/block-core.json | 10 ++-
+ 3 files changed, 142 insertions(+), 4 deletions(-)
+
+diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
+index 5000c084c5..70b3de4c7e 100644
+--- a/block/monitor/block-hmp-cmds.c
++++ b/block/monitor/block-hmp-cmds.c
+@@ -1043,6 +1043,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
+ NULL, NULL,
+ devlist, qdict_haskey(qdict, "speed"), speed,
+ false, 0, // BackupPerf max-workers
++ false, false, // fleecing
+ &error);
+
+ hmp_handle_error(mon, error);
+diff --git a/pve-backup.c b/pve-backup.c
+index 5ebb6a3947..a747d12d3d 100644
+--- a/pve-backup.c
++++ b/pve-backup.c
+@@ -7,9 +7,11 @@
+ #include "sysemu/blockdev.h"
+ #include "block/block_int-global-state.h"
+ #include "block/blockjob.h"
++#include "block/copy-before-write.h"
+ #include "block/dirty-bitmap.h"
+ #include "block/graph-lock.h"
+ #include "qapi/qapi-commands-block.h"
++#include "qapi/qmp/qdict.h"
+ #include "qapi/qmp/qerror.h"
+ #include "qemu/cutils.h"
+
+@@ -80,8 +82,15 @@ static void pvebackup_init(void)
+ // initialize PVEBackupState at startup
+ opts_init(pvebackup_init);
+
++typedef struct PVEBackupFleecingInfo {
++ BlockDriverState *bs;
++ BlockDriverState *cbw;
++ BlockDriverState *snapshot_access;
++} PVEBackupFleecingInfo;
++
+ typedef struct PVEBackupDevInfo {
+ BlockDriverState *bs;
++ PVEBackupFleecingInfo fleecing;
+ size_t size;
+ uint64_t block_size;
+ uint8_t dev_id;
+@@ -353,6 +362,22 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+ PVEBackupDevInfo *di = opaque;
+ di->completed_ret = ret;
+
++ /*
++ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
++ * won't be done as a coroutine anyways:
++ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
++ * just spawn a BH calling bdrv_unref().
++ * - For cbw, draining would need to spawn a BH.
++ */
++ if (di->fleecing.snapshot_access) {
++ bdrv_unref(di->fleecing.snapshot_access);
++ di->fleecing.snapshot_access = NULL;
++ }
++ if (di->fleecing.cbw) {
++ bdrv_cbw_drop(di->fleecing.cbw);
++ di->fleecing.cbw = NULL;
++ }
++
+ /*
+ * Needs to happen outside of coroutine, because it takes the graph write lock.
+ */
+@@ -519,9 +544,77 @@ static void create_backup_jobs_bh(void *opaque) {
+ }
+ bdrv_drained_begin(di->bs);
+
++ BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
++
++ BlockDriverState *source_bs = di->bs;
++ bool discard_source = false;
++ bdrv_graph_co_rdlock();
++ const char *job_id = bdrv_get_device_name(di->bs);
++ bdrv_graph_co_rdunlock();
++ if (di->fleecing.bs) {
++ QDict *cbw_opts = qdict_new();
++ qdict_put_str(cbw_opts, "driver", "copy-before-write");
++ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
++
++ if (di->bitmap) {
++ /*
++ * Only guest writes to parts relevant for the backup need to be intercepted with
++ * old data being copied to the fleecing image.
++ */
++ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
++ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
++ }
++ /*
++ * Fleecing storage is supposed to be fast and it's better to break backup than guest
++ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
++ * abort a bit before that.
++ */
++ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
++ qdict_put_int(cbw_opts, "cbw-timeout", 45);
++
++ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
++
++ if (!di->fleecing.cbw) {
++ error_setg(errp, "appending cbw node for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ break;
++ }
++
++ QDict *snapshot_access_opts = qdict_new();
++ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
++ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
++
++ di->fleecing.snapshot_access =
++ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
++ if (!di->fleecing.snapshot_access) {
++ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
++ local_err ? error_get_pretty(local_err) : "unknown error");
++ break;
++ }
++ source_bs = di->fleecing.snapshot_access;
++ discard_source = true;
++
++ /*
++ * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
++ * on the fleecing image won't work if the backup job's granularity is less than the RBD
++ * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
++ * target, the backup job granularity would already be at least this much.
++ */
++ perf.min_cluster_size = 4 * 1024 * 1024;
++ /*
++ * For discard to work, cluster size for the backup job must be at least the same as for
++ * the fleecing image.
++ */
++ BlockDriverInfo bdi;
++ if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
++ perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
++ }
++ }
++
+ BlockJob *job = backup_job_create(
+- NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
+- bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
++ job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
++ bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
+ BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
+ &local_err);
+
+@@ -577,6 +670,14 @@ static void create_backup_jobs_bh(void *opaque) {
+ aio_co_enter(data->ctx, data->co);
+ }
+
++/*
++ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
++ */
++static bool device_uses_fleecing(const char *device_id)
++{
++ return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
++}
++
+ /*
+ * Returns a list of device infos, which needs to be freed by the caller. In
+ * case of an error, errp will be set, but the returned value might still be a
+@@ -584,6 +685,7 @@ static void create_backup_jobs_bh(void *opaque) {
+ */
+ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+ const char *devlist,
++ bool fleecing,
+ Error **errp)
+ {
+ gchar **devs = NULL;
+@@ -607,6 +709,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
+ }
+ PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
+ di->bs = bs;
++
++ if (fleecing && device_uses_fleecing(*d)) {
++ g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
++ BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
++ if (!fleecing_blk) {
++ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
++ "Device '%s' not found", fleecing_devid);
++ goto err;
++ }
++ BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
++ if (!bdrv_co_is_inserted(fleecing_bs)) {
++ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, fleecing_devid);
++ goto err;
++ }
++ /*
++ * Fleecing image needs to be the same size to act as a cbw target.
++ */
++ if (bs->total_sectors != fleecing_bs->total_sectors) {
++ error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
++ fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
++ goto err;
++ }
++ di->fleecing.bs = fleecing_bs;
++ }
++
+ di_list = g_list_append(di_list, di);
+ d++;
+ }
+@@ -656,6 +783,7 @@ UuidInfo coroutine_fn *qmp_backup(
+ const char *devlist,
+ bool has_speed, int64_t speed,
+ bool has_max_workers, int64_t max_workers,
++ bool has_fleecing, bool fleecing,
+ Error **errp)
+ {
+ assert(qemu_in_coroutine());
+@@ -684,7 +812,7 @@ UuidInfo coroutine_fn *qmp_backup(
+ format = has_format ? format : BACKUP_FORMAT_VMA;
+
+ bdrv_graph_co_rdlock();
+- di_list = get_device_info(devlist, &local_err);
++ di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
+ bdrv_graph_co_rdunlock();
+ if (local_err) {
+ error_propagate(errp, local_err);
+@@ -1089,5 +1217,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
+ ret->query_bitmap_info = true;
+ ret->pbs_masterkey = true;
+ ret->backup_max_workers = true;
++ ret->backup_fleecing = true;
+ return ret;
+ }
+diff --git a/qapi/block-core.json b/qapi/block-core.json
+index df934647ed..ff441d4258 100644
+--- a/qapi/block-core.json
++++ b/qapi/block-core.json
+@@ -948,6 +948,10 @@
+ #
+ # @max-workers: see @BackupPerf for details. Default 16.
+ #
++# @fleecing: perform a backup with fleecing. For each device in @devlist, a
++# corresponing '-fleecing' device with the same size already needs to
++# be present.
++#
+ # Returns: the uuid of the backup job
+ #
+ ##
+@@ -968,7 +972,8 @@
+ '*firewall-file': 'str',
+ '*devlist': 'str',
+ '*speed': 'int',
+- '*max-workers': 'int' },
++ '*max-workers': 'int',
++ '*fleecing': 'bool' },
+ 'returns': 'UuidInfo', 'coroutine': true }
+
+ ##
+@@ -1014,6 +1019,8 @@
+ #
+ # @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
+ #
++# @backup-fleecing: Whether backup fleecing is supported or not.
++#
+ # @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
+ # supported or not.
+ #
+@@ -1025,6 +1032,7 @@
+ 'pbs-dirty-bitmap-migration': 'bool',
+ 'pbs-masterkey': 'bool',
+ 'pbs-library-version': 'str',
++ 'backup-fleecing': 'bool',
+ 'backup-max-workers': 'bool' } }
+
+ ##
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:24 +0200
-Subject: [PATCH] block/copy-before-write: create block_copy bitmap in filter
- node
-
-Currently block_copy creates copy_bitmap in source node. But that is in
-bad relation with .independent_close=true of copy-before-write filter:
-source node may be detached and removed before .bdrv_close() handler
-called, which should call block_copy_state_free(), which in turn should
-remove copy_bitmap.
-
-That's all not ideal: it would be better if internal bitmap of
-block-copy object is not attached to any node. But that is not possible
-now.
-
-The simplest solution is just create copy_bitmap in filter node, where
-anyway two other bitmaps are created.
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/block-copy.c | 3 +-
- block/copy-before-write.c | 2 +-
- include/block/block-copy.h | 1 +
- tests/qemu-iotests/257.out | 112 ++++++++++++++++++-------------------
- 4 files changed, 60 insertions(+), 58 deletions(-)
-
-diff --git a/block/block-copy.c b/block/block-copy.c
-index 9ee3dd7ef5..8fca2c3698 100644
---- a/block/block-copy.c
-+++ b/block/block-copy.c
-@@ -351,6 +351,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
- }
-
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-+ BlockDriverState *copy_bitmap_bs,
- const BdrvDirtyBitmap *bitmap,
- Error **errp)
- {
-@@ -367,7 +368,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- return NULL;
- }
-
-- copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
-+ copy_bitmap = bdrv_create_dirty_bitmap(copy_bitmap_bs, cluster_size, NULL,
- errp);
- if (!copy_bitmap) {
- return NULL;
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index c0e70669a2..94db31512d 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -468,7 +468,7 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
- bs->file->bs->supported_zero_flags);
-
-- s->bcs = block_copy_state_new(bs->file, s->target, bitmap, errp);
-+ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
- if (!s->bcs) {
- error_prepend(errp, "Cannot create block-copy-state: ");
- return -EINVAL;
-diff --git a/include/block/block-copy.h b/include/block/block-copy.h
-index 0700953ab8..8b41643bfa 100644
---- a/include/block/block-copy.h
-+++ b/include/block/block-copy.h
-@@ -25,6 +25,7 @@ typedef struct BlockCopyState BlockCopyState;
- typedef struct BlockCopyCallState BlockCopyCallState;
-
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-+ BlockDriverState *copy_bitmap_bs,
- const BdrvDirtyBitmap *bitmap,
- Error **errp);
-
-diff --git a/tests/qemu-iotests/257.out b/tests/qemu-iotests/257.out
-index aa76131ca9..c33dd7f3a9 100644
---- a/tests/qemu-iotests/257.out
-+++ b/tests/qemu-iotests/257.out
-@@ -120,16 +120,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -596,16 +596,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -865,16 +865,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -1341,16 +1341,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -1610,16 +1610,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -2086,16 +2086,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -2355,16 +2355,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -2831,16 +2831,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -3100,16 +3100,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -3576,16 +3576,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -3845,16 +3845,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -4321,16 +4321,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -4590,16 +4590,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
-@@ -5066,16 +5066,16 @@ write -P0x67 0x3fe0000 0x20000
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- }
-- ],
-- "drive0": [
-+ },
- {
- "busy": false,
- "count": 0,
- "granularity": 65536,
- "persistent": false,
- "recording": false
-- },
-+ }
-+ ],
-+ "drive0": [
- {
- "busy": false,
- "count": 458752,
--- /dev/null
+From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
+From: Fiona Ebner <f.ebner@proxmox.com>
+Date: Mon, 29 Apr 2024 14:43:58 +0200
+Subject: [PATCH] PVE backup: improve error when copy-before-write fails for
+ fleecing
+
+With fleecing, failure for copy-before-write does not fail the guest
+write, but only sets the snapshot error that is associated to the
+copy-before-write filter, making further requests to the snapshot
+access fail with EACCES, which then also fails the job. But that error
+code is not the root cause of why the backup failed, so bubble up the
+original snapshot error instead.
+
+Reported-by: Friedrich Weber <f.weber@proxmox.com>
+Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
+Tested-by: Friedrich Weber <f.weber@proxmox.com>
+---
+ block/copy-before-write.c | 18 ++++++++++++------
+ block/copy-before-write.h | 1 +
+ pve-backup.c | 9 +++++++++
+ 3 files changed, 22 insertions(+), 6 deletions(-)
+
+diff --git a/block/copy-before-write.c b/block/copy-before-write.c
+index bba58326d7..50cc4c7aae 100644
+--- a/block/copy-before-write.c
++++ b/block/copy-before-write.c
+@@ -27,6 +27,7 @@
+ #include "qapi/qmp/qjson.h"
+
+ #include "sysemu/block-backend.h"
++#include "qemu/atomic.h"
+ #include "qemu/cutils.h"
+ #include "qapi/error.h"
+ #include "block/block_int.h"
+@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
+ * @snapshot_error is normally zero. But on first copy-before-write failure
+ * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
+ * value of this error (<0). After that all in-flight and further
+- * snapshot-API requests will fail with that error.
++ * snapshot-API requests will fail with that error. To be accessed with
++ * atomics.
+ */
+ int snapshot_error;
+ } BDRVCopyBeforeWriteState;
+@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+ return 0;
+ }
+
+- if (s->snapshot_error) {
++ if (qatomic_read(&s->snapshot_error)) {
+ return 0;
+ }
+
+@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
+ WITH_QEMU_LOCK_GUARD(&s->lock) {
+ if (ret < 0) {
+ assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
+- if (!s->snapshot_error) {
+- s->snapshot_error = ret;
+- }
++ qatomic_cmpxchg(&s->snapshot_error, 0, ret);
+ } else {
+ bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
+ }
+@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
+
+ QEMU_LOCK_GUARD(&s->lock);
+
+- if (s->snapshot_error) {
++ if (qatomic_read(&s->snapshot_error)) {
+ g_free(req);
+ return NULL;
+ }
+@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
+ bdrv_unref(bs);
+ }
+
++int bdrv_cbw_snapshot_error(BlockDriverState *bs)
++{
++ BDRVCopyBeforeWriteState *s = bs->opaque;
++ return qatomic_read(&s->snapshot_error);
++}
++
+ static void cbw_init(void)
+ {
+ bdrv_register(&bdrv_cbw_filter);
+diff --git a/block/copy-before-write.h b/block/copy-before-write.h
+index dc6cafe7fa..a27d2d7d9f 100644
+--- a/block/copy-before-write.h
++++ b/block/copy-before-write.h
+@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
+ BlockCopyState **bcs,
+ Error **errp);
+ void bdrv_cbw_drop(BlockDriverState *bs);
++int bdrv_cbw_snapshot_error(BlockDriverState *bs);
+
+ #endif /* COPY_BEFORE_WRITE_H */
+diff --git a/pve-backup.c b/pve-backup.c
+index a747d12d3d..4e730aa3da 100644
+--- a/pve-backup.c
++++ b/pve-backup.c
+@@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
+ di->fleecing.snapshot_access = NULL;
+ }
+ if (di->fleecing.cbw) {
++ /*
++ * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
++ * error, making further requests to the snapshot fail with EACCES, which then also fail the
++ * job. But that code is not the root cause and just confusing, so update it.
++ */
++ int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
++ if (di->completed_ret == -EACCES && snapshot_error) {
++ di->completed_ret = snapshot_error;
++ }
+ bdrv_cbw_drop(di->fleecing.cbw);
+ di->fleecing.cbw = NULL;
+ }
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Date: Thu, 11 Apr 2024 11:29:25 +0200
-Subject: [PATCH] qapi: blockdev-backup: add discard-source parameter
-
-Add a parameter that enables discard-after-copy. That is mostly useful
-in "push backup with fleecing" scheme, when source is snapshot-access
-format driver node, based on copy-before-write filter snapshot-access
-API:
-
-[guest] [snapshot-access] ~~ blockdev-backup ~~> [backup target]
- | |
- | root | file
- v v
-[copy-before-write]
- | |
- | file | target
- v v
-[active disk] [temp.img]
-
-In this case discard-after-copy does two things:
-
- - discard data in temp.img to save disk space
- - avoid further copy-before-write operation in discarded area
-
-Note that we have to declare WRITE permission on source in
-copy-before-write filter, for discard to work. Still we can't take it
-unconditionally, as it will break normal backup from RO source. So, we
-have to add a parameter and pass it thorough bdrv_open flags.
-
-Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@yandex-team.ru>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/backup.c | 5 +++--
- block/block-copy.c | 9 +++++++++
- block/copy-before-write.c | 15 +++++++++++++--
- block/copy-before-write.h | 1 +
- block/replication.c | 4 ++--
- blockdev.c | 2 +-
- include/block/block-common.h | 2 ++
- include/block/block-copy.h | 1 +
- include/block/block_int-global-state.h | 2 +-
- qapi/block-core.json | 4 ++++
- 10 files changed, 37 insertions(+), 8 deletions(-)
-
-diff --git a/block/backup.c b/block/backup.c
-index 16d611c4ca..1963e47ab9 100644
---- a/block/backup.c
-+++ b/block/backup.c
-@@ -332,7 +332,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
- BlockDriverState *target, int64_t speed,
- MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
- BitmapSyncMode bitmap_mode,
-- bool compress,
-+ bool compress, bool discard_source,
- const char *filter_node_name,
- BackupPerf *perf,
- BlockdevOnError on_source_error,
-@@ -433,7 +433,8 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
- goto error;
- }
-
-- cbw = bdrv_cbw_append(bs, target, filter_node_name, &bcs, errp);
-+ cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
-+ &bcs, errp);
- if (!cbw) {
- goto error;
- }
-diff --git a/block/block-copy.c b/block/block-copy.c
-index 8fca2c3698..7e3b378528 100644
---- a/block/block-copy.c
-+++ b/block/block-copy.c
-@@ -137,6 +137,7 @@ typedef struct BlockCopyState {
- CoMutex lock;
- int64_t in_flight_bytes;
- BlockCopyMethod method;
-+ bool discard_source;
- BlockReqList reqs;
- QLIST_HEAD(, BlockCopyCallState) calls;
- /*
-@@ -353,6 +354,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- BlockDriverState *copy_bitmap_bs,
- const BdrvDirtyBitmap *bitmap,
-+ bool discard_source,
- Error **errp)
- {
- ERRP_GUARD();
-@@ -418,6 +420,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- cluster_size),
- };
-
-+ s->discard_source = discard_source;
- block_copy_set_copy_opts(s, false, false);
-
- ratelimit_init(&s->rate_limit);
-@@ -589,6 +592,12 @@ static coroutine_fn int block_copy_task_entry(AioTask *task)
- co_put_to_shres(s->mem, t->req.bytes);
- block_copy_task_end(t, ret);
-
-+ if (s->discard_source && ret == 0) {
-+ int64_t nbytes =
-+ MIN(t->req.offset + t->req.bytes, s->len) - t->req.offset;
-+ bdrv_co_pdiscard(s->source, t->req.offset, nbytes);
-+ }
-+
- return ret;
- }
-
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 94db31512d..853e01a1eb 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -44,6 +44,7 @@ typedef struct BDRVCopyBeforeWriteState {
- BdrvChild *target;
- OnCbwError on_cbw_error;
- uint64_t cbw_timeout_ns;
-+ bool discard_source;
-
- /*
- * @lock: protects access to @access_bitmap, @done_bitmap and
-@@ -357,6 +358,8 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
- uint64_t perm, uint64_t shared,
- uint64_t *nperm, uint64_t *nshared)
- {
-+ BDRVCopyBeforeWriteState *s = bs->opaque;
-+
- if (!(role & BDRV_CHILD_FILTERED)) {
- /*
- * Target child
-@@ -381,6 +384,10 @@ cbw_child_perm(BlockDriverState *bs, BdrvChild *c, BdrvChildRole role,
- * start
- */
- *nperm = *nperm | BLK_PERM_CONSISTENT_READ;
-+ if (s->discard_source) {
-+ *nperm = *nperm | BLK_PERM_WRITE;
-+ }
-+
- *nshared &= ~(BLK_PERM_WRITE | BLK_PERM_RESIZE);
- }
- }
-@@ -468,7 +475,9 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
- bs->file->bs->supported_zero_flags);
-
-- s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap, errp);
-+ s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
-+ s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
-+ flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
- if (!s->bcs) {
- error_prepend(errp, "Cannot create block-copy-state: ");
- return -EINVAL;
-@@ -535,12 +544,14 @@ static BlockDriver bdrv_cbw_filter = {
- BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
- BlockDriverState *target,
- const char *filter_node_name,
-+ bool discard_source,
- BlockCopyState **bcs,
- Error **errp)
- {
- BDRVCopyBeforeWriteState *state;
- BlockDriverState *top;
- QDict *opts;
-+ int flags = BDRV_O_RDWR | (discard_source ? BDRV_O_CBW_DISCARD_SOURCE : 0);
-
- assert(source->total_sectors == target->total_sectors);
- GLOBAL_STATE_CODE();
-@@ -553,7 +564,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
- qdict_put_str(opts, "file", bdrv_get_node_name(source));
- qdict_put_str(opts, "target", bdrv_get_node_name(target));
-
-- top = bdrv_insert_node(source, opts, BDRV_O_RDWR, errp);
-+ top = bdrv_insert_node(source, opts, flags, errp);
- if (!top) {
- return NULL;
- }
-diff --git a/block/copy-before-write.h b/block/copy-before-write.h
-index 6e72bb25e9..01af0cd3c4 100644
---- a/block/copy-before-write.h
-+++ b/block/copy-before-write.h
-@@ -39,6 +39,7 @@
- BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
- BlockDriverState *target,
- const char *filter_node_name,
-+ bool discard_source,
- BlockCopyState **bcs,
- Error **errp);
- void bdrv_cbw_drop(BlockDriverState *bs);
-diff --git a/block/replication.c b/block/replication.c
-index ca6bd0a720..0415a5e8b7 100644
---- a/block/replication.c
-+++ b/block/replication.c
-@@ -582,8 +582,8 @@ static void replication_start(ReplicationState *rs, ReplicationMode mode,
-
- s->backup_job = backup_job_create(
- NULL, s->secondary_disk->bs, s->hidden_disk->bs,
-- 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, NULL,
-- &perf,
-+ 0, MIRROR_SYNC_MODE_NONE, NULL, 0, false, false,
-+ NULL, &perf,
- BLOCKDEV_ON_ERROR_REPORT,
- BLOCKDEV_ON_ERROR_REPORT, JOB_INTERNAL,
- backup_job_completed, bs, NULL, &local_err);
-diff --git a/blockdev.c b/blockdev.c
-index 5e5dbc1da9..1054a69279 100644
---- a/blockdev.c
-+++ b/blockdev.c
-@@ -2727,7 +2727,7 @@ static BlockJob *do_backup_common(BackupCommon *backup,
-
- job = backup_job_create(backup->job_id, bs, target_bs, backup->speed,
- backup->sync, bmap, backup->bitmap_mode,
-- backup->compress,
-+ backup->compress, backup->discard_source,
- backup->filter_node_name,
- &perf,
- backup->on_source_error,
-diff --git a/include/block/block-common.h b/include/block/block-common.h
-index a846023a09..338fe5ff7a 100644
---- a/include/block/block-common.h
-+++ b/include/block/block-common.h
-@@ -243,6 +243,8 @@ typedef enum {
- read-write fails */
- #define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
-
-+#define BDRV_O_CBW_DISCARD_SOURCE 0x80000 /* for copy-before-write filter */
-+
- #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
-
-
-diff --git a/include/block/block-copy.h b/include/block/block-copy.h
-index 8b41643bfa..bdc703bacd 100644
---- a/include/block/block-copy.h
-+++ b/include/block/block-copy.h
-@@ -27,6 +27,7 @@ typedef struct BlockCopyCallState BlockCopyCallState;
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- BlockDriverState *copy_bitmap_bs,
- const BdrvDirtyBitmap *bitmap,
-+ bool discard_source,
- Error **errp);
-
- /* Function should be called prior any actual copy request */
-diff --git a/include/block/block_int-global-state.h b/include/block/block_int-global-state.h
-index cc1387ae02..f0c642b194 100644
---- a/include/block/block_int-global-state.h
-+++ b/include/block/block_int-global-state.h
-@@ -195,7 +195,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
- MirrorSyncMode sync_mode,
- BdrvDirtyBitmap *sync_bitmap,
- BitmapSyncMode bitmap_mode,
-- bool compress,
-+ bool compress, bool discard_source,
- const char *filter_node_name,
- BackupPerf *perf,
- BlockdevOnError on_source_error,
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index cb58a664ef..282e2e8a8c 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -1849,6 +1849,9 @@
- # node specified by @drive. If this option is not given, a node
- # name is autogenerated. (Since: 4.2)
- #
-+# @discard-source: Discard blocks on source which are already copied
-+# to the target. (Since 9.0)
-+#
- # @x-perf: Performance options. (Since 6.0)
- #
- # Features:
-@@ -1870,6 +1873,7 @@
- '*on-target-error': 'BlockdevOnError',
- '*auto-finalize': 'bool', '*auto-dismiss': 'bool',
- '*filter-node-name': 'str',
-+ '*discard-source': 'bool',
- '*x-perf': { 'type': 'BackupPerf',
- 'features': [ 'unstable' ] } } }
-
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 11 Apr 2024 11:29:26 +0200
-Subject: [PATCH] copy-before-write: allow specifying minimum cluster size
-
-Useful to make discard-source work in the context of backup fleecing
-when the fleecing image has a larger granularity than the backup
-target.
-
-Copy-before-write operations will use at least this granularity and in
-particular, discard requests to the source node will too. If the
-granularity is too small, they will just be aligned down in
-cbw_co_pdiscard_snapshot() and thus effectively ignored.
-
-The QAPI uses uint32 so the value will be non-negative, but still fit
-into a uint64_t.
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/block-copy.c | 17 +++++++++++++----
- block/copy-before-write.c | 3 ++-
- include/block/block-copy.h | 1 +
- qapi/block-core.json | 8 +++++++-
- 4 files changed, 23 insertions(+), 6 deletions(-)
-
-diff --git a/block/block-copy.c b/block/block-copy.c
-index 7e3b378528..adb1cbb440 100644
---- a/block/block-copy.c
-+++ b/block/block-copy.c
-@@ -310,6 +310,7 @@ void block_copy_set_copy_opts(BlockCopyState *s, bool use_copy_range,
- }
-
- static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
-+ int64_t min_cluster_size,
- Error **errp)
- {
- int ret;
-@@ -335,7 +336,7 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
- "used. If the actual block size of the target exceeds "
- "this default, the backup may be unusable",
- BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
-- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
-+ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
- } else if (ret < 0 && !target_does_cow) {
- error_setg_errno(errp, -ret,
- "Couldn't determine the cluster size of the target image, "
-@@ -345,16 +346,18 @@ static int64_t block_copy_calculate_cluster_size(BlockDriverState *target,
- return ret;
- } else if (ret < 0 && target_does_cow) {
- /* Not fatal; just trudge on ahead. */
-- return BLOCK_COPY_CLUSTER_SIZE_DEFAULT;
-+ return MAX(min_cluster_size, BLOCK_COPY_CLUSTER_SIZE_DEFAULT);
- }
-
-- return MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
-+ return MAX(min_cluster_size,
-+ MAX(BLOCK_COPY_CLUSTER_SIZE_DEFAULT, bdi.cluster_size));
- }
-
- BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- BlockDriverState *copy_bitmap_bs,
- const BdrvDirtyBitmap *bitmap,
- bool discard_source,
-+ int64_t min_cluster_size,
- Error **errp)
- {
- ERRP_GUARD();
-@@ -365,7 +368,13 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
-
- GLOBAL_STATE_CODE();
-
-- cluster_size = block_copy_calculate_cluster_size(target->bs, errp);
-+ if (min_cluster_size && !is_power_of_2(min_cluster_size)) {
-+ error_setg(errp, "min-cluster-size needs to be a power of 2");
-+ return NULL;
-+ }
-+
-+ cluster_size = block_copy_calculate_cluster_size(target->bs,
-+ min_cluster_size, errp);
- if (cluster_size < 0) {
- return NULL;
- }
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 853e01a1eb..47b3cdd09f 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -477,7 +477,8 @@ static int cbw_open(BlockDriverState *bs, QDict *options, int flags,
-
- s->discard_source = flags & BDRV_O_CBW_DISCARD_SOURCE;
- s->bcs = block_copy_state_new(bs->file, s->target, bs, bitmap,
-- flags & BDRV_O_CBW_DISCARD_SOURCE, errp);
-+ flags & BDRV_O_CBW_DISCARD_SOURCE,
-+ opts->min_cluster_size, errp);
- if (!s->bcs) {
- error_prepend(errp, "Cannot create block-copy-state: ");
- return -EINVAL;
-diff --git a/include/block/block-copy.h b/include/block/block-copy.h
-index bdc703bacd..77857c6c68 100644
---- a/include/block/block-copy.h
-+++ b/include/block/block-copy.h
-@@ -28,6 +28,7 @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
- BlockDriverState *copy_bitmap_bs,
- const BdrvDirtyBitmap *bitmap,
- bool discard_source,
-+ int64_t min_cluster_size,
- Error **errp);
-
- /* Function should be called prior any actual copy request */
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 282e2e8a8c..9caf04cbe9 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -4926,12 +4926,18 @@
- # @on-cbw-error parameter will decide how this failure is handled.
- # Default 0. (Since 7.1)
- #
-+# @min-cluster-size: Minimum size of blocks used by copy-before-write
-+# operations. Has to be a power of 2. No effect if smaller than
-+# the maximum of the target's cluster size and 64 KiB. Default 0.
-+# (Since 8.1)
-+#
- # Since: 6.2
- ##
- { 'struct': 'BlockdevOptionsCbw',
- 'base': 'BlockdevOptionsGenericFormat',
- 'data': { 'target': 'BlockdevRef', '*bitmap': 'BlockDirtyBitmap',
-- '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32' } }
-+ '*on-cbw-error': 'OnCbwError', '*cbw-timeout': 'uint32',
-+ '*min-cluster-size': 'uint32' } }
-
- ##
- # @BlockdevOptions:
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 11 Apr 2024 11:29:27 +0200
-Subject: [PATCH] backup: add minimum cluster size to performance options
-
-Useful to make discard-source work in the context of backup fleecing
-when the fleecing image has a larger granularity than the backup
-target.
-
-Backup/block-copy will use at least this granularity for copy operations
-and in particular, discard requests to the backup source will too. If
-the granularity is too small, they will just be aligned down in
-cbw_co_pdiscard_snapshot() and thus effectively ignored.
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/backup.c | 2 +-
- block/copy-before-write.c | 2 ++
- block/copy-before-write.h | 1 +
- blockdev.c | 3 +++
- qapi/block-core.json | 9 +++++++--
- 5 files changed, 14 insertions(+), 3 deletions(-)
-
-diff --git a/block/backup.c b/block/backup.c
-index 1963e47ab9..fe69723ada 100644
---- a/block/backup.c
-+++ b/block/backup.c
-@@ -434,7 +434,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
- }
-
- cbw = bdrv_cbw_append(bs, target, filter_node_name, discard_source,
-- &bcs, errp);
-+ perf->min_cluster_size, &bcs, errp);
- if (!cbw) {
- goto error;
- }
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index 47b3cdd09f..bba58326d7 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -546,6 +546,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
- BlockDriverState *target,
- const char *filter_node_name,
- bool discard_source,
-+ int64_t min_cluster_size,
- BlockCopyState **bcs,
- Error **errp)
- {
-@@ -564,6 +565,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
- }
- qdict_put_str(opts, "file", bdrv_get_node_name(source));
- qdict_put_str(opts, "target", bdrv_get_node_name(target));
-+ qdict_put_int(opts, "min-cluster-size", min_cluster_size);
-
- top = bdrv_insert_node(source, opts, flags, errp);
- if (!top) {
-diff --git a/block/copy-before-write.h b/block/copy-before-write.h
-index 01af0cd3c4..dc6cafe7fa 100644
---- a/block/copy-before-write.h
-+++ b/block/copy-before-write.h
-@@ -40,6 +40,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
- BlockDriverState *target,
- const char *filter_node_name,
- bool discard_source,
-+ int64_t min_cluster_size,
- BlockCopyState **bcs,
- Error **errp);
- void bdrv_cbw_drop(BlockDriverState *bs);
-diff --git a/blockdev.c b/blockdev.c
-index 1054a69279..cbe224387b 100644
---- a/blockdev.c
-+++ b/blockdev.c
-@@ -2654,6 +2654,9 @@ static BlockJob *do_backup_common(BackupCommon *backup,
- if (backup->x_perf->has_max_chunk) {
- perf.max_chunk = backup->x_perf->max_chunk;
- }
-+ if (backup->x_perf->has_min_cluster_size) {
-+ perf.min_cluster_size = backup->x_perf->min_cluster_size;
-+ }
- }
-
- if ((backup->sync == MIRROR_SYNC_MODE_BITMAP) ||
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index 9caf04cbe9..df934647ed 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -1790,11 +1790,16 @@
- # it should not be less than job cluster size which is calculated
- # as maximum of target image cluster size and 64k. Default 0.
- #
-+# @min-cluster-size: Minimum size of blocks used by copy-before-write
-+# and background copy operations. Has to be a power of 2. No
-+# effect if smaller than the maximum of the target's cluster size
-+# and 64 KiB. Default 0. (Since 8.1)
-+#
- # Since: 6.0
- ##
- { 'struct': 'BackupPerf',
-- 'data': { '*use-copy-range': 'bool',
-- '*max-workers': 'int', '*max-chunk': 'int64' } }
-+ 'data': { '*use-copy-range': 'bool', '*max-workers': 'int',
-+ '*max-chunk': 'int64', '*min-cluster-size': 'uint32' } }
-
- ##
- # @BackupCommon:
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Thu, 11 Apr 2024 11:29:28 +0200
-Subject: [PATCH] PVE backup: add fleecing option
-
-When a fleecing option is given, it is expected that each device has
-a corresponding "-fleecing" block device already attached, except for
-EFI disk and TPM state, where fleecing is never used.
-
-The following graph was adapted from [0] which also contains more
-details about fleecing.
-
-[guest]
- |
- | root
- v file
-[copy-before-write]<------[snapshot-access]
- | |
- | file | target
- v v
-[source] [fleecing]
-
-For fleecing, a copy-before-write filter is inserted on top of the
-source node, as well as a snapshot-access node pointing to the filter
-node which allows to read the consistent state of the image at the
-time it was inserted. New guest writes are passed through the
-copy-before-write filter which will first copy over old data to the
-fleecing image in case that old data is still needed by the
-snapshot-access node.
-
-The backup process will sequentially read from the snapshot access,
-which has a bitmap and knows whether to read from the original image
-or the fleecing image to get the "snapshot" state, i.e. data from the
-source image at the time when the copy-before-write filter was
-inserted. After reading, the copied sections are discarded from the
-fleecing image to reduce space usage.
-
-All of this can be restricted by an initial dirty bitmap to parts of
-the source image that are required for an incremental backup.
-
-For discard to work, it is necessary that the fleecing image does not
-have a larger cluster size than the backup job granularity. Since
-querying that size does not always work, e.g. for RBD with krbd, the
-cluster size will not be reported, a minimum of 4 MiB is used. A job
-with PBS target already has at least this granularity, so it's just
-relevant for other targets. I.e. edge cases where this minimum is not
-enough should be very rare in practice. If ever necessary in the
-future, can still add a passed-in value for the backup QMP command to
-override.
-
-Additionally, the cbw-timeout and on-cbw-error=break-snapshot options
-are set when installing the copy-before-write filter and
-snapshot-access. When an error or timeout occurs, the problematic (and
-each further) snapshot operation will fail and thus cancel the backup
-instead of breaking the guest write.
-
-Note that job_id cannot be inferred from the snapshot-access bs because
-it has no parent, so just pass the one from the original bs.
-
-[0]: https://www.mail-archive.com/qemu-devel@nongnu.org/msg876056.html
-
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com>
----
- block/monitor/block-hmp-cmds.c | 1 +
- pve-backup.c | 135 ++++++++++++++++++++++++++++++++-
- qapi/block-core.json | 10 ++-
- 3 files changed, 142 insertions(+), 4 deletions(-)
-
-diff --git a/block/monitor/block-hmp-cmds.c b/block/monitor/block-hmp-cmds.c
-index 5000c084c5..70b3de4c7e 100644
---- a/block/monitor/block-hmp-cmds.c
-+++ b/block/monitor/block-hmp-cmds.c
-@@ -1043,6 +1043,7 @@ void coroutine_fn hmp_backup(Monitor *mon, const QDict *qdict)
- NULL, NULL,
- devlist, qdict_haskey(qdict, "speed"), speed,
- false, 0, // BackupPerf max-workers
-+ false, false, // fleecing
- &error);
-
- hmp_handle_error(mon, error);
-diff --git a/pve-backup.c b/pve-backup.c
-index 5ebb6a3947..a747d12d3d 100644
---- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -7,9 +7,11 @@
- #include "sysemu/blockdev.h"
- #include "block/block_int-global-state.h"
- #include "block/blockjob.h"
-+#include "block/copy-before-write.h"
- #include "block/dirty-bitmap.h"
- #include "block/graph-lock.h"
- #include "qapi/qapi-commands-block.h"
-+#include "qapi/qmp/qdict.h"
- #include "qapi/qmp/qerror.h"
- #include "qemu/cutils.h"
-
-@@ -80,8 +82,15 @@ static void pvebackup_init(void)
- // initialize PVEBackupState at startup
- opts_init(pvebackup_init);
-
-+typedef struct PVEBackupFleecingInfo {
-+ BlockDriverState *bs;
-+ BlockDriverState *cbw;
-+ BlockDriverState *snapshot_access;
-+} PVEBackupFleecingInfo;
-+
- typedef struct PVEBackupDevInfo {
- BlockDriverState *bs;
-+ PVEBackupFleecingInfo fleecing;
- size_t size;
- uint64_t block_size;
- uint8_t dev_id;
-@@ -353,6 +362,22 @@ static void pvebackup_complete_cb(void *opaque, int ret)
- PVEBackupDevInfo *di = opaque;
- di->completed_ret = ret;
-
-+ /*
-+ * Handle block-graph specific cleanup (for fleecing) outside of the coroutine, because the work
-+ * won't be done as a coroutine anyways:
-+ * - For snapshot_access, allows doing bdrv_unref() directly. Doing it via bdrv_co_unref() would
-+ * just spawn a BH calling bdrv_unref().
-+ * - For cbw, draining would need to spawn a BH.
-+ */
-+ if (di->fleecing.snapshot_access) {
-+ bdrv_unref(di->fleecing.snapshot_access);
-+ di->fleecing.snapshot_access = NULL;
-+ }
-+ if (di->fleecing.cbw) {
-+ bdrv_cbw_drop(di->fleecing.cbw);
-+ di->fleecing.cbw = NULL;
-+ }
-+
- /*
- * Needs to happen outside of coroutine, because it takes the graph write lock.
- */
-@@ -519,9 +544,77 @@ static void create_backup_jobs_bh(void *opaque) {
- }
- bdrv_drained_begin(di->bs);
-
-+ BackupPerf perf = (BackupPerf){ .max_workers = backup_state.perf.max_workers };
-+
-+ BlockDriverState *source_bs = di->bs;
-+ bool discard_source = false;
-+ bdrv_graph_co_rdlock();
-+ const char *job_id = bdrv_get_device_name(di->bs);
-+ bdrv_graph_co_rdunlock();
-+ if (di->fleecing.bs) {
-+ QDict *cbw_opts = qdict_new();
-+ qdict_put_str(cbw_opts, "driver", "copy-before-write");
-+ qdict_put_str(cbw_opts, "file", bdrv_get_node_name(di->bs));
-+ qdict_put_str(cbw_opts, "target", bdrv_get_node_name(di->fleecing.bs));
-+
-+ if (di->bitmap) {
-+ /*
-+ * Only guest writes to parts relevant for the backup need to be intercepted with
-+ * old data being copied to the fleecing image.
-+ */
-+ qdict_put_str(cbw_opts, "bitmap.node", bdrv_get_node_name(di->bs));
-+ qdict_put_str(cbw_opts, "bitmap.name", bdrv_dirty_bitmap_name(di->bitmap));
-+ }
-+ /*
-+ * Fleecing storage is supposed to be fast and it's better to break backup than guest
-+ * writes. Certain guest drivers like VirtIO-win have 60 seconds timeout by default, so
-+ * abort a bit before that.
-+ */
-+ qdict_put_str(cbw_opts, "on-cbw-error", "break-snapshot");
-+ qdict_put_int(cbw_opts, "cbw-timeout", 45);
-+
-+ di->fleecing.cbw = bdrv_insert_node(di->bs, cbw_opts, BDRV_O_RDWR, &local_err);
-+
-+ if (!di->fleecing.cbw) {
-+ error_setg(errp, "appending cbw node for fleecing failed: %s",
-+ local_err ? error_get_pretty(local_err) : "unknown error");
-+ break;
-+ }
-+
-+ QDict *snapshot_access_opts = qdict_new();
-+ qdict_put_str(snapshot_access_opts, "driver", "snapshot-access");
-+ qdict_put_str(snapshot_access_opts, "file", bdrv_get_node_name(di->fleecing.cbw));
-+
-+ di->fleecing.snapshot_access =
-+ bdrv_open(NULL, NULL, snapshot_access_opts, BDRV_O_RDWR | BDRV_O_UNMAP, &local_err);
-+ if (!di->fleecing.snapshot_access) {
-+ error_setg(errp, "setting up snapshot access for fleecing failed: %s",
-+ local_err ? error_get_pretty(local_err) : "unknown error");
-+ break;
-+ }
-+ source_bs = di->fleecing.snapshot_access;
-+ discard_source = true;
-+
-+ /*
-+ * bdrv_get_info() just retuns 0 (= doesn't matter) for RBD when using krbd. But discard
-+ * on the fleecing image won't work if the backup job's granularity is less than the RBD
-+ * object size (default 4 MiB), so it does matter. Always use at least 4 MiB. With a PBS
-+ * target, the backup job granularity would already be at least this much.
-+ */
-+ perf.min_cluster_size = 4 * 1024 * 1024;
-+ /*
-+ * For discard to work, cluster size for the backup job must be at least the same as for
-+ * the fleecing image.
-+ */
-+ BlockDriverInfo bdi;
-+ if (bdrv_get_info(di->fleecing.bs, &bdi) >= 0) {
-+ perf.min_cluster_size = MAX(perf.min_cluster_size, bdi.cluster_size);
-+ }
-+ }
-+
- BlockJob *job = backup_job_create(
-- NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap,
-- bitmap_mode, false, NULL, &backup_state.perf, BLOCKDEV_ON_ERROR_REPORT,
-+ job_id, source_bs, di->target, backup_state.speed, sync_mode, di->bitmap,
-+ bitmap_mode, false, discard_source, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT,
- BLOCKDEV_ON_ERROR_REPORT, JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn,
- &local_err);
-
-@@ -577,6 +670,14 @@ static void create_backup_jobs_bh(void *opaque) {
- aio_co_enter(data->ctx, data->co);
- }
-
-+/*
-+ * EFI disk and TPM state are small and it's just not worth setting up fleecing for them.
-+ */
-+static bool device_uses_fleecing(const char *device_id)
-+{
-+ return strncmp(device_id, "drive-efidisk", 13) && strncmp(device_id, "drive-tpmstate", 14);
-+}
-+
- /*
- * Returns a list of device infos, which needs to be freed by the caller. In
- * case of an error, errp will be set, but the returned value might still be a
-@@ -584,6 +685,7 @@ static void create_backup_jobs_bh(void *opaque) {
- */
- static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
- const char *devlist,
-+ bool fleecing,
- Error **errp)
- {
- gchar **devs = NULL;
-@@ -607,6 +709,31 @@ static GList coroutine_fn GRAPH_RDLOCK *get_device_info(
- }
- PVEBackupDevInfo *di = g_new0(PVEBackupDevInfo, 1);
- di->bs = bs;
-+
-+ if (fleecing && device_uses_fleecing(*d)) {
-+ g_autofree gchar *fleecing_devid = g_strconcat(*d, "-fleecing", NULL);
-+ BlockBackend *fleecing_blk = blk_by_name(fleecing_devid);
-+ if (!fleecing_blk) {
-+ error_set(errp, ERROR_CLASS_DEVICE_NOT_FOUND,
-+ "Device '%s' not found", fleecing_devid);
-+ goto err;
-+ }
-+ BlockDriverState *fleecing_bs = blk_bs(fleecing_blk);
-+ if (!bdrv_co_is_inserted(fleecing_bs)) {
-+ error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, fleecing_devid);
-+ goto err;
-+ }
-+ /*
-+ * Fleecing image needs to be the same size to act as a cbw target.
-+ */
-+ if (bs->total_sectors != fleecing_bs->total_sectors) {
-+ error_setg(errp, "Size mismatch for '%s' - sector count %ld != %ld",
-+ fleecing_devid, fleecing_bs->total_sectors, bs->total_sectors);
-+ goto err;
-+ }
-+ di->fleecing.bs = fleecing_bs;
-+ }
-+
- di_list = g_list_append(di_list, di);
- d++;
- }
-@@ -656,6 +783,7 @@ UuidInfo coroutine_fn *qmp_backup(
- const char *devlist,
- bool has_speed, int64_t speed,
- bool has_max_workers, int64_t max_workers,
-+ bool has_fleecing, bool fleecing,
- Error **errp)
- {
- assert(qemu_in_coroutine());
-@@ -684,7 +812,7 @@ UuidInfo coroutine_fn *qmp_backup(
- format = has_format ? format : BACKUP_FORMAT_VMA;
-
- bdrv_graph_co_rdlock();
-- di_list = get_device_info(devlist, &local_err);
-+ di_list = get_device_info(devlist, has_fleecing && fleecing, &local_err);
- bdrv_graph_co_rdunlock();
- if (local_err) {
- error_propagate(errp, local_err);
-@@ -1089,5 +1217,6 @@ ProxmoxSupportStatus *qmp_query_proxmox_support(Error **errp)
- ret->query_bitmap_info = true;
- ret->pbs_masterkey = true;
- ret->backup_max_workers = true;
-+ ret->backup_fleecing = true;
- return ret;
- }
-diff --git a/qapi/block-core.json b/qapi/block-core.json
-index df934647ed..ff441d4258 100644
---- a/qapi/block-core.json
-+++ b/qapi/block-core.json
-@@ -948,6 +948,10 @@
- #
- # @max-workers: see @BackupPerf for details. Default 16.
- #
-+# @fleecing: perform a backup with fleecing. For each device in @devlist, a
-+# corresponing '-fleecing' device with the same size already needs to
-+# be present.
-+#
- # Returns: the uuid of the backup job
- #
- ##
-@@ -968,7 +972,8 @@
- '*firewall-file': 'str',
- '*devlist': 'str',
- '*speed': 'int',
-- '*max-workers': 'int' },
-+ '*max-workers': 'int',
-+ '*fleecing': 'bool' },
- 'returns': 'UuidInfo', 'coroutine': true }
-
- ##
-@@ -1014,6 +1019,8 @@
- #
- # @pbs-library-version: Running version of libproxmox-backup-qemu0 library.
- #
-+# @backup-fleecing: Whether backup fleecing is supported or not.
-+#
- # @backup-max-workers: Whether the 'max-workers' @BackupPerf setting is
- # supported or not.
- #
-@@ -1025,6 +1032,7 @@
- 'pbs-dirty-bitmap-migration': 'bool',
- 'pbs-masterkey': 'bool',
- 'pbs-library-version': 'str',
-+ 'backup-fleecing': 'bool',
- 'backup-max-workers': 'bool' } }
-
- ##
+++ /dev/null
-From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
-From: Fiona Ebner <f.ebner@proxmox.com>
-Date: Mon, 29 Apr 2024 14:43:58 +0200
-Subject: [PATCH] PVE backup: improve error when copy-before-write fails for
- fleecing
-
-With fleecing, failure for copy-before-write does not fail the guest
-write, but only sets the snapshot error that is associated to the
-copy-before-write filter, making further requests to the snapshot
-access fail with EACCES, which then also fails the job. But that error
-code is not the root cause of why the backup failed, so bubble up the
-original snapshot error instead.
-
-Reported-by: Friedrich Weber <f.weber@proxmox.com>
-Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
-Tested-by: Friedrich Weber <f.weber@proxmox.com>
----
- block/copy-before-write.c | 18 ++++++++++++------
- block/copy-before-write.h | 1 +
- pve-backup.c | 9 +++++++++
- 3 files changed, 22 insertions(+), 6 deletions(-)
-
-diff --git a/block/copy-before-write.c b/block/copy-before-write.c
-index bba58326d7..50cc4c7aae 100644
---- a/block/copy-before-write.c
-+++ b/block/copy-before-write.c
-@@ -27,6 +27,7 @@
- #include "qapi/qmp/qjson.h"
-
- #include "sysemu/block-backend.h"
-+#include "qemu/atomic.h"
- #include "qemu/cutils.h"
- #include "qapi/error.h"
- #include "block/block_int.h"
-@@ -74,7 +75,8 @@ typedef struct BDRVCopyBeforeWriteState {
- * @snapshot_error is normally zero. But on first copy-before-write failure
- * when @on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT, @snapshot_error takes
- * value of this error (<0). After that all in-flight and further
-- * snapshot-API requests will fail with that error.
-+ * snapshot-API requests will fail with that error. To be accessed with
-+ * atomics.
- */
- int snapshot_error;
- } BDRVCopyBeforeWriteState;
-@@ -114,7 +116,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
- return 0;
- }
-
-- if (s->snapshot_error) {
-+ if (qatomic_read(&s->snapshot_error)) {
- return 0;
- }
-
-@@ -138,9 +140,7 @@ static coroutine_fn int cbw_do_copy_before_write(BlockDriverState *bs,
- WITH_QEMU_LOCK_GUARD(&s->lock) {
- if (ret < 0) {
- assert(s->on_cbw_error == ON_CBW_ERROR_BREAK_SNAPSHOT);
-- if (!s->snapshot_error) {
-- s->snapshot_error = ret;
-- }
-+ qatomic_cmpxchg(&s->snapshot_error, 0, ret);
- } else {
- bdrv_set_dirty_bitmap(s->done_bitmap, off, end - off);
- }
-@@ -214,7 +214,7 @@ cbw_snapshot_read_lock(BlockDriverState *bs, int64_t offset, int64_t bytes,
-
- QEMU_LOCK_GUARD(&s->lock);
-
-- if (s->snapshot_error) {
-+ if (qatomic_read(&s->snapshot_error)) {
- g_free(req);
- return NULL;
- }
-@@ -585,6 +585,12 @@ void bdrv_cbw_drop(BlockDriverState *bs)
- bdrv_unref(bs);
- }
-
-+int bdrv_cbw_snapshot_error(BlockDriverState *bs)
-+{
-+ BDRVCopyBeforeWriteState *s = bs->opaque;
-+ return qatomic_read(&s->snapshot_error);
-+}
-+
- static void cbw_init(void)
- {
- bdrv_register(&bdrv_cbw_filter);
-diff --git a/block/copy-before-write.h b/block/copy-before-write.h
-index dc6cafe7fa..a27d2d7d9f 100644
---- a/block/copy-before-write.h
-+++ b/block/copy-before-write.h
-@@ -44,5 +44,6 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
- BlockCopyState **bcs,
- Error **errp);
- void bdrv_cbw_drop(BlockDriverState *bs);
-+int bdrv_cbw_snapshot_error(BlockDriverState *bs);
-
- #endif /* COPY_BEFORE_WRITE_H */
-diff --git a/pve-backup.c b/pve-backup.c
-index a747d12d3d..4e730aa3da 100644
---- a/pve-backup.c
-+++ b/pve-backup.c
-@@ -374,6 +374,15 @@ static void pvebackup_complete_cb(void *opaque, int ret)
- di->fleecing.snapshot_access = NULL;
- }
- if (di->fleecing.cbw) {
-+ /*
-+ * With fleecing, failure for cbw does not fail the guest write, but only sets the snapshot
-+ * error, making further requests to the snapshot fail with EACCES, which then also fail the
-+ * job. But that code is not the root cause and just confusing, so update it.
-+ */
-+ int snapshot_error = bdrv_cbw_snapshot_error(di->fleecing.cbw);
-+ if (di->completed_ret == -EACCES && snapshot_error) {
-+ di->completed_ret = snapshot_error;
-+ }
- bdrv_cbw_drop(di->fleecing.cbw);
- di->fleecing.cbw = NULL;
- }
extra/0004-Revert-x86-acpi-workaround-Windows-not-handling-name.patch
extra/0005-block-copy-before-write-use-uint64_t-for-timeout-in-.patch
extra/0006-Revert-virtio-pci-fix-use-of-a-released-vector.patch
+extra/0007-block-copy-before-write-fix-permission.patch
+extra/0008-block-copy-before-write-support-unligned-snapshot-di.patch
+extra/0009-block-copy-before-write-create-block_copy-bitmap-in-.patch
+extra/0010-qapi-blockdev-backup-add-discard-source-parameter.patch
+extra/0011-hw-virtio-Fix-the-de-initialization-of-vhost-user-de.patch
+extra/0012-target-arm-Use-float_status-copy-in-sme_fmopa_s.patch
+extra/0013-target-arm-Use-FPST_F16-for-SME-FMOPA-widening.patch
+extra/0014-scsi-fix-regression-and-honor-bootindex-again-for-le.patch
+extra/0015-hw-scsi-lsi53c895a-bump-instruction-limit-in-scripts.patch
+extra/0016-block-copy-Fix-missing-graph-lock.patch
+extra/0017-Revert-qemu-char-do-not-operate-on-sources-from-fina.patch
bitmap-mirror/0001-drive-mirror-add-support-for-sync-bitmap-mode-never.patch
bitmap-mirror/0002-drive-mirror-add-support-for-conditional-and-always-.patch
bitmap-mirror/0003-mirror-add-check-for-bitmap-mode-without-bitmap.patch
pve/0041-Revert-block-rbd-implement-bdrv_co_block_status.patch
pve/0042-alloc-track-error-out-when-auto-remove-is-not-set.patch
pve/0043-alloc-track-avoid-seemingly-superfluous-child-permis.patch
-pve/0044-block-copy-before-write-fix-permission.patch
-pve/0045-block-copy-before-write-support-unligned-snapshot-di.patch
-pve/0046-block-copy-before-write-create-block_copy-bitmap-in-.patch
-pve/0047-qapi-blockdev-backup-add-discard-source-parameter.patch
-pve/0048-copy-before-write-allow-specifying-minimum-cluster-s.patch
-pve/0049-backup-add-minimum-cluster-size-to-performance-optio.patch
-pve/0050-PVE-backup-add-fleecing-option.patch
-pve/0051-PVE-backup-improve-error-when-copy-before-write-fail.patch
+pve/0044-copy-before-write-allow-specifying-minimum-cluster-s.patch
+pve/0045-backup-add-minimum-cluster-size-to-performance-optio.patch
+pve/0046-PVE-backup-add-fleecing-option.patch
+pve/0047-PVE-backup-improve-error-when-copy-before-write-fail.patch