]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0053-Revert-block-rbd-implement-bdrv_co_block_status.patch
add revert to work around performance regression when backing up large RBD disk
[pve-qemu.git] / debian / patches / pve / 0053-Revert-block-rbd-implement-bdrv_co_block_status.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Fabian Ebner <f.ebner@proxmox.com>
3 Date: Tue, 17 May 2022 09:46:02 +0200
4 Subject: [PATCH] Revert "block/rbd: implement bdrv_co_block_status"
5
6 During backup, bdrv_co_block_status is called for each block copy
7 chunk. When RBD is used, the current implementation with
8 rbd_diff_iterate2() using whole_object=true takes about linearly more
9 time, depending on the image size. Since there are linearly more
10 chunks, the slowdown is quadratic, becoming unacceptable for large
11 images (starting somewhere between 500-1000 GiB in my testing).
12
13 This reverts commit 0347a8fd4c3faaedf119be04c197804be40a384b as a
14 stop-gap measure, until it's clear how to make the implemenation
15 more efficient.
16
17 Upstream bug report:
18 https://gitlab.com/qemu-project/qemu/-/issues/1026
19
20 Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
21 ---
22 block/rbd.c | 112 ----------------------------------------------------
23 1 file changed, 112 deletions(-)
24
25 diff --git a/block/rbd.c b/block/rbd.c
26 index a4b8fb482c..3393b06a4e 100644
27 --- a/block/rbd.c
28 +++ b/block/rbd.c
29 @@ -97,12 +97,6 @@ typedef struct RBDTask {
30 int64_t ret;
31 } RBDTask;
32
33 -typedef struct RBDDiffIterateReq {
34 - uint64_t offs;
35 - uint64_t bytes;
36 - bool exists;
37 -} RBDDiffIterateReq;
38 -
39 static int qemu_rbd_connect(rados_t *cluster, rados_ioctx_t *io_ctx,
40 BlockdevOptionsRbd *opts, bool cache,
41 const char *keypairs, const char *secretid,
42 @@ -1267,111 +1261,6 @@ static ImageInfoSpecific *qemu_rbd_get_specific_info(BlockDriverState *bs,
43 return spec_info;
44 }
45
46 -/*
47 - * rbd_diff_iterate2 allows to interrupt the exection by returning a negative
48 - * value in the callback routine. Choose a value that does not conflict with
49 - * an existing exitcode and return it if we want to prematurely stop the
50 - * execution because we detected a change in the allocation status.
51 - */
52 -#define QEMU_RBD_EXIT_DIFF_ITERATE2 -9000
53 -
54 -static int qemu_rbd_diff_iterate_cb(uint64_t offs, size_t len,
55 - int exists, void *opaque)
56 -{
57 - RBDDiffIterateReq *req = opaque;
58 -
59 - assert(req->offs + req->bytes <= offs);
60 - /*
61 - * we do not diff against a snapshot so we should never receive a callback
62 - * for a hole.
63 - */
64 - assert(exists);
65 -
66 - if (!req->exists && offs > req->offs) {
67 - /*
68 - * we started in an unallocated area and hit the first allocated
69 - * block. req->bytes must be set to the length of the unallocated area
70 - * before the allocated area. stop further processing.
71 - */
72 - req->bytes = offs - req->offs;
73 - return QEMU_RBD_EXIT_DIFF_ITERATE2;
74 - }
75 -
76 - if (req->exists && offs > req->offs + req->bytes) {
77 - /*
78 - * we started in an allocated area and jumped over an unallocated area,
79 - * req->bytes contains the length of the allocated area before the
80 - * unallocated area. stop further processing.
81 - */
82 - return QEMU_RBD_EXIT_DIFF_ITERATE2;
83 - }
84 -
85 - req->bytes += len;
86 - req->exists = true;
87 -
88 - return 0;
89 -}
90 -
91 -static int coroutine_fn qemu_rbd_co_block_status(BlockDriverState *bs,
92 - bool want_zero, int64_t offset,
93 - int64_t bytes, int64_t *pnum,
94 - int64_t *map,
95 - BlockDriverState **file)
96 -{
97 - BDRVRBDState *s = bs->opaque;
98 - int status, r;
99 - RBDDiffIterateReq req = { .offs = offset };
100 - uint64_t features, flags;
101 -
102 - assert(offset + bytes <= s->image_size);
103 -
104 - /* default to all sectors allocated */
105 - status = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
106 - *map = offset;
107 - *file = bs;
108 - *pnum = bytes;
109 -
110 - /* check if RBD image supports fast-diff */
111 - r = rbd_get_features(s->image, &features);
112 - if (r < 0) {
113 - return status;
114 - }
115 - if (!(features & RBD_FEATURE_FAST_DIFF)) {
116 - return status;
117 - }
118 -
119 - /* check if RBD fast-diff result is valid */
120 - r = rbd_get_flags(s->image, &flags);
121 - if (r < 0) {
122 - return status;
123 - }
124 - if (flags & RBD_FLAG_FAST_DIFF_INVALID) {
125 - return status;
126 - }
127 -
128 - r = rbd_diff_iterate2(s->image, NULL, offset, bytes, true, true,
129 - qemu_rbd_diff_iterate_cb, &req);
130 - if (r < 0 && r != QEMU_RBD_EXIT_DIFF_ITERATE2) {
131 - return status;
132 - }
133 - assert(req.bytes <= bytes);
134 - if (!req.exists) {
135 - if (r == 0) {
136 - /*
137 - * rbd_diff_iterate2 does not invoke callbacks for unallocated
138 - * areas. This here catches the case where no callback was
139 - * invoked at all (req.bytes == 0).
140 - */
141 - assert(req.bytes == 0);
142 - req.bytes = bytes;
143 - }
144 - status = BDRV_BLOCK_ZERO | BDRV_BLOCK_OFFSET_VALID;
145 - }
146 -
147 - *pnum = req.bytes;
148 - return status;
149 -}
150 -
151 static int64_t qemu_rbd_getlength(BlockDriverState *bs)
152 {
153 BDRVRBDState *s = bs->opaque;
154 @@ -1607,7 +1496,6 @@ static BlockDriver bdrv_rbd = {
155 #ifdef LIBRBD_SUPPORTS_WRITE_ZEROES
156 .bdrv_co_pwrite_zeroes = qemu_rbd_co_pwrite_zeroes,
157 #endif
158 - .bdrv_co_block_status = qemu_rbd_co_block_status,
159
160 .bdrv_snapshot_create = qemu_rbd_snap_create,
161 .bdrv_snapshot_delete = qemu_rbd_snap_remove,