]> git.proxmox.com Git - pve-qemu.git/blob - debian/patches/pve/0052-PVE-Backup-Use-more-coroutines-and-don-t-block-on-fi.patch
8ae58fe99b0609d513e7fce0eefaef4eee96d320
[pve-qemu.git] / debian / patches / pve / 0052-PVE-Backup-Use-more-coroutines-and-don-t-block-on-fi.patch
1 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2 From: Stefan Reiter <s.reiter@proxmox.com>
3 Date: Mon, 28 Sep 2020 13:40:51 +0200
4 Subject: [PATCH] PVE-Backup: Use more coroutines and don't block on finishing
5
6 proxmox_backup_co_finish is already async, but previously we would wait
7 for the coroutine using block_on_coroutine_fn(). Avoid this by
8 scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup)
9 as a real coroutine when calling from pvebackup_complete_cb. This is ok,
10 since complete_stream uses the backup_mutex internally to synchronize,
11 and other streams can happily continue writing in the meantime anyway.
12
13 To accomodate, backup_mutex is converted to a CoMutex. This means
14 converting every user to a coroutine. This is not just useful here, but
15 will come in handy once this series[0] is merged, and QMP calls can be
16 yield-able coroutines too. Then we can also finally get rid of
17 block_on_coroutine_fn.
18
19 Cases of aio_context_acquire/release from within what is now a coroutine
20 are changed to aio_co_reschedule_self, which works since a running
21 coroutine always holds the aio lock for the context it is running in.
22
23 job_cancel_sync is changed to regular job_cancel, since job_cancel_sync
24 uses AIO_WAIT_WHILE internally, which is forbidden from Coroutines.
25
26 create_backup_jobs cannot be run from a coroutine, so it is run
27 directly. This does however mean that it runs unlocked, as it cannot
28 acquire a CoMutex (see it's comment for rational on why that's ok).
29
30 To communicate the finishing state, a new property is introduced to
31 query-backup: 'finishing'. A new state is explicitly not used, since
32 that would break compatibility with older qemu-server versions.
33
34 [0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html
35 ---
36 proxmox-backup-client.h | 1 +
37 pve-backup.c | 124 ++++++++++++++++++++++------------------
38 qapi/block-core.json | 5 +-
39 3 files changed, 74 insertions(+), 56 deletions(-)
40
41 diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h
42 index 20fd6b1719..a4781c5851 100644
43 --- a/proxmox-backup-client.h
44 +++ b/proxmox-backup-client.h
45 @@ -5,6 +5,7 @@
46 #include "qemu/coroutine.h"
47 #include "proxmox-backup-qemu.h"
48
49 +// FIXME: Remove once coroutines are supported for QMP
50 void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg);
51
52 int coroutine_fn
53 diff --git a/pve-backup.c b/pve-backup.c
54 index 9562e9c98d..53cf23ed5a 100644
55 --- a/pve-backup.c
56 +++ b/pve-backup.c
57 @@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap";
58
59 static struct PVEBackupState {
60 struct {
61 - // Everithing accessed from qmp_backup_query command is protected using lock
62 + // Everything accessed from qmp_backup_query command is protected using
63 + // this lock. Do NOT hold this lock for long times, as it is sometimes
64 + // acquired from coroutines, and thus any wait time may block the guest.
65 QemuMutex lock;
66 Error *error;
67 time_t start_time;
68 @@ -47,20 +49,21 @@ static struct PVEBackupState {
69 size_t reused;
70 size_t zero_bytes;
71 GList *bitmap_list;
72 + bool finishing;
73 } stat;
74 int64_t speed;
75 VmaWriter *vmaw;
76 ProxmoxBackupHandle *pbs;
77 GList *di_list;
78 JobTxn *txn;
79 - QemuMutex backup_mutex;
80 + CoMutex backup_mutex;
81 CoMutex dump_callback_mutex;
82 } backup_state;
83
84 static void pvebackup_init(void)
85 {
86 qemu_mutex_init(&backup_state.stat.lock);
87 - qemu_mutex_init(&backup_state.backup_mutex);
88 + qemu_co_mutex_init(&backup_state.backup_mutex);
89 qemu_co_mutex_init(&backup_state.dump_callback_mutex);
90 }
91
92 @@ -72,6 +75,7 @@ typedef struct PVEBackupDevInfo {
93 size_t size;
94 uint64_t block_size;
95 uint8_t dev_id;
96 + int completed_ret; // INT_MAX if not completed
97 char targetfile[PATH_MAX];
98 BdrvDirtyBitmap *bitmap;
99 BlockDriverState *target;
100 @@ -227,12 +231,12 @@ pvebackup_co_dump_vma_cb(
101 }
102
103 // assumes the caller holds backup_mutex
104 -static void coroutine_fn pvebackup_co_cleanup(void *unused)
105 +static void coroutine_fn pvebackup_co_cleanup(void)
106 {
107 assert(qemu_in_coroutine());
108
109 qemu_mutex_lock(&backup_state.stat.lock);
110 - backup_state.stat.end_time = time(NULL);
111 + backup_state.stat.finishing = true;
112 qemu_mutex_unlock(&backup_state.stat.lock);
113
114 if (backup_state.vmaw) {
115 @@ -261,12 +265,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused)
116
117 g_list_free(backup_state.di_list);
118 backup_state.di_list = NULL;
119 +
120 + qemu_mutex_lock(&backup_state.stat.lock);
121 + backup_state.stat.end_time = time(NULL);
122 + backup_state.stat.finishing = false;
123 + qemu_mutex_unlock(&backup_state.stat.lock);
124 }
125
126 -// assumes the caller holds backup_mutex
127 -static void coroutine_fn pvebackup_complete_stream(void *opaque)
128 +static void coroutine_fn pvebackup_co_complete_stream(void *opaque)
129 {
130 PVEBackupDevInfo *di = opaque;
131 + int ret = di->completed_ret;
132 +
133 + qemu_co_mutex_lock(&backup_state.backup_mutex);
134 +
135 + if (ret < 0) {
136 + Error *local_err = NULL;
137 + error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
138 + pvebackup_propagate_error(local_err);
139 + }
140 +
141 + di->bs = NULL;
142 +
143 + assert(di->target == NULL);
144
145 bool error_or_canceled = pvebackup_error_or_canceled();
146
147 @@ -281,27 +302,6 @@ static void coroutine_fn pvebackup_complete_stream(void *opaque)
148 pvebackup_propagate_error(local_err);
149 }
150 }
151 -}
152 -
153 -static void pvebackup_complete_cb(void *opaque, int ret)
154 -{
155 - assert(!qemu_in_coroutine());
156 -
157 - PVEBackupDevInfo *di = opaque;
158 -
159 - qemu_mutex_lock(&backup_state.backup_mutex);
160 -
161 - if (ret < 0) {
162 - Error *local_err = NULL;
163 - error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
164 - pvebackup_propagate_error(local_err);
165 - }
166 -
167 - di->bs = NULL;
168 -
169 - assert(di->target == NULL);
170 -
171 - block_on_coroutine_fn(pvebackup_complete_stream, di);
172
173 // remove self from job list
174 backup_state.di_list = g_list_remove(backup_state.di_list, di);
175 @@ -310,21 +310,36 @@ static void pvebackup_complete_cb(void *opaque, int ret)
176
177 /* call cleanup if we're the last job */
178 if (!g_list_first(backup_state.di_list)) {
179 - block_on_coroutine_fn(pvebackup_co_cleanup, NULL);
180 + pvebackup_co_cleanup();
181 }
182
183 - qemu_mutex_unlock(&backup_state.backup_mutex);
184 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
185 }
186
187 -static void pvebackup_cancel(void)
188 +static void pvebackup_complete_cb(void *opaque, int ret)
189 {
190 assert(!qemu_in_coroutine());
191
192 + PVEBackupDevInfo *di = opaque;
193 + di->completed_ret = ret;
194 +
195 + /*
196 + * Schedule stream cleanup in async coroutine. close_image and finish might
197 + * take a while, so we can't block on them here.
198 + * Note: di is a pointer to an entry in the global backup_state struct, so
199 + * it stays valid.
200 + */
201 + Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di);
202 + aio_co_schedule(qemu_get_aio_context(), co);
203 +}
204 +
205 +static void coroutine_fn pvebackup_co_cancel(void *opaque)
206 +{
207 Error *cancel_err = NULL;
208 error_setg(&cancel_err, "backup canceled");
209 pvebackup_propagate_error(cancel_err);
210
211 - qemu_mutex_lock(&backup_state.backup_mutex);
212 + qemu_co_mutex_lock(&backup_state.backup_mutex);
213
214 if (backup_state.vmaw) {
215 /* make sure vma writer does not block anymore */
216 @@ -342,27 +357,16 @@ static void pvebackup_cancel(void)
217 ((PVEBackupDevInfo *)bdi->data)->job :
218 NULL;
219
220 - /* ref the job before releasing the mutex, just to be safe */
221 if (cancel_job) {
222 - job_ref(&cancel_job->job);
223 + job_cancel(&cancel_job->job, false);
224 }
225
226 - /* job_cancel_sync may enter the job, so we need to release the
227 - * backup_mutex to avoid deadlock */
228 - qemu_mutex_unlock(&backup_state.backup_mutex);
229 -
230 - if (cancel_job) {
231 - AioContext *aio_context = cancel_job->job.aio_context;
232 - aio_context_acquire(aio_context);
233 - job_cancel_sync(&cancel_job->job);
234 - job_unref(&cancel_job->job);
235 - aio_context_release(aio_context);
236 - }
237 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
238 }
239
240 void qmp_backup_cancel(Error **errp)
241 {
242 - pvebackup_cancel();
243 + block_on_coroutine_fn(pvebackup_co_cancel, NULL);
244 }
245
246 // assumes the caller holds backup_mutex
247 @@ -415,6 +419,14 @@ static int coroutine_fn pvebackup_co_add_config(
248 goto out;
249 }
250
251 +/*
252 + * backup_job_create can *not* be run from a coroutine (and requires an
253 + * acquired AioContext), so this can't either.
254 + * This does imply that this function cannot run with backup_mutex acquired.
255 + * That is ok because it is only ever called between setting up the backup_state
256 + * struct and starting the jobs, and from within a QMP call. This means that no
257 + * other QMP call can interrupt, and no background job is running yet.
258 + */
259 static bool create_backup_jobs(void) {
260
261 assert(!qemu_in_coroutine());
262 @@ -523,11 +535,12 @@ typedef struct QmpBackupTask {
263 UuidInfo *result;
264 } QmpBackupTask;
265
266 -// assumes the caller holds backup_mutex
267 static void coroutine_fn pvebackup_co_prepare(void *opaque)
268 {
269 assert(qemu_in_coroutine());
270
271 + qemu_co_mutex_lock(&backup_state.backup_mutex);
272 +
273 QmpBackupTask *task = opaque;
274
275 task->result = NULL; // just to be sure
276 @@ -616,6 +629,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
277 }
278 di->size = size;
279 total += size;
280 +
281 + di->completed_ret = INT_MAX;
282 }
283
284 uuid_generate(uuid);
285 @@ -847,6 +862,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
286 backup_state.stat.dirty = total - backup_state.stat.reused;
287 backup_state.stat.transferred = 0;
288 backup_state.stat.zero_bytes = 0;
289 + backup_state.stat.finishing = false;
290
291 qemu_mutex_unlock(&backup_state.stat.lock);
292
293 @@ -861,6 +877,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque)
294 uuid_info->UUID = uuid_str;
295
296 task->result = uuid_info;
297 +
298 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
299 return;
300
301 err_mutex:
302 @@ -903,6 +921,8 @@ err:
303 }
304
305 task->result = NULL;
306 +
307 + qemu_co_mutex_unlock(&backup_state.backup_mutex);
308 return;
309 }
310
311 @@ -956,22 +976,15 @@ UuidInfo *qmp_backup(
312 .errp = errp,
313 };
314
315 - qemu_mutex_lock(&backup_state.backup_mutex);
316 -
317 block_on_coroutine_fn(pvebackup_co_prepare, &task);
318
319 if (*errp == NULL) {
320 bool errors = create_backup_jobs();
321 - qemu_mutex_unlock(&backup_state.backup_mutex);
322
323 if (!errors) {
324 - /* start the first job in the transaction
325 - * note: this might directly enter the job, so we need to do this
326 - * after unlocking the backup_mutex */
327 + // start the first job in the transaction
328 job_txn_start_seq(backup_state.txn);
329 }
330 - } else {
331 - qemu_mutex_unlock(&backup_state.backup_mutex);
332 }
333
334 return task.result;
335 @@ -1025,6 +1038,7 @@ BackupStatus *qmp_query_backup(Error **errp)
336 info->transferred = backup_state.stat.transferred;
337 info->has_reused = true;
338 info->reused = backup_state.stat.reused;
339 + info->finishing = backup_state.stat.finishing;
340
341 qemu_mutex_unlock(&backup_state.stat.lock);
342
343 diff --git a/qapi/block-core.json b/qapi/block-core.json
344 index 5fc42e87f3..b31ad8d989 100644
345 --- a/qapi/block-core.json
346 +++ b/qapi/block-core.json
347 @@ -784,12 +784,15 @@
348 #
349 # @uuid: uuid for this backup job
350 #
351 +# @finishing: if status='active' and finishing=true, then the backup process is
352 +# waiting for the target to finish.
353 +#
354 ##
355 { 'struct': 'BackupStatus',
356 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int',
357 '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int',
358 '*start-time': 'int', '*end-time': 'int',
359 - '*backup-file': 'str', '*uuid': 'str' } }
360 + '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } }
361
362 ##
363 # @BackupFormat: