]>
Commit | Line | Data |
---|---|---|
d333327a SR |
1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
2 | From: Stefan Reiter <s.reiter@proxmox.com> | |
3 | Date: Mon, 28 Sep 2020 13:40:51 +0200 | |
4 | Subject: [PATCH] PVE-Backup: Use more coroutines and don't block on finishing | |
5 | ||
6 | proxmox_backup_co_finish is already async, but previously we would wait | |
7 | for the coroutine using block_on_coroutine_fn(). Avoid this by | |
8 | scheduling pvebackup_co_complete_stream (and thus pvebackup_co_cleanup) | |
9 | as a real coroutine when calling from pvebackup_complete_cb. This is ok, | |
10 | since complete_stream uses the backup_mutex internally to synchronize, | |
11 | and other streams can happily continue writing in the meantime anyway. | |
12 | ||
13 | To accomodate, backup_mutex is converted to a CoMutex. This means | |
14 | converting every user to a coroutine. This is not just useful here, but | |
15 | will come in handy once this series[0] is merged, and QMP calls can be | |
16 | yield-able coroutines too. Then we can also finally get rid of | |
17 | block_on_coroutine_fn. | |
18 | ||
19 | Cases of aio_context_acquire/release from within what is now a coroutine | |
20 | are changed to aio_co_reschedule_self, which works since a running | |
21 | coroutine always holds the aio lock for the context it is running in. | |
22 | ||
23 | job_cancel_sync is changed to regular job_cancel, since job_cancel_sync | |
24 | uses AIO_WAIT_WHILE internally, which is forbidden from Coroutines. | |
25 | ||
26 | create_backup_jobs cannot be run from a coroutine, so it is run | |
27 | directly. This does however mean that it runs unlocked, as it cannot | |
28 | acquire a CoMutex (see it's comment for rational on why that's ok). | |
29 | ||
30 | To communicate the finishing state, a new property is introduced to | |
31 | query-backup: 'finishing'. A new state is explicitly not used, since | |
32 | that would break compatibility with older qemu-server versions. | |
33 | ||
34 | [0] https://lists.gnu.org/archive/html/qemu-devel/2020-09/msg03515.html | |
35 | --- | |
36 | proxmox-backup-client.h | 1 + | |
37 | pve-backup.c | 124 ++++++++++++++++++++++------------------ | |
38 | qapi/block-core.json | 5 +- | |
39 | 3 files changed, 74 insertions(+), 56 deletions(-) | |
40 | ||
41 | diff --git a/proxmox-backup-client.h b/proxmox-backup-client.h | |
42 | index 20fd6b1719..a4781c5851 100644 | |
43 | --- a/proxmox-backup-client.h | |
44 | +++ b/proxmox-backup-client.h | |
45 | @@ -5,6 +5,7 @@ | |
46 | #include "qemu/coroutine.h" | |
47 | #include "proxmox-backup-qemu.h" | |
48 | ||
49 | +// FIXME: Remove once coroutines are supported for QMP | |
50 | void block_on_coroutine_fn(CoroutineEntry *entry, void *entry_arg); | |
51 | ||
52 | int coroutine_fn | |
53 | diff --git a/pve-backup.c b/pve-backup.c | |
54 | index 9562e9c98d..53cf23ed5a 100644 | |
55 | --- a/pve-backup.c | |
56 | +++ b/pve-backup.c | |
57 | @@ -33,7 +33,9 @@ const char *PBS_BITMAP_NAME = "pbs-incremental-dirty-bitmap"; | |
58 | ||
59 | static struct PVEBackupState { | |
60 | struct { | |
61 | - // Everithing accessed from qmp_backup_query command is protected using lock | |
62 | + // Everything accessed from qmp_backup_query command is protected using | |
63 | + // this lock. Do NOT hold this lock for long times, as it is sometimes | |
64 | + // acquired from coroutines, and thus any wait time may block the guest. | |
65 | QemuMutex lock; | |
66 | Error *error; | |
67 | time_t start_time; | |
68 | @@ -47,20 +49,21 @@ static struct PVEBackupState { | |
69 | size_t reused; | |
70 | size_t zero_bytes; | |
71 | GList *bitmap_list; | |
72 | + bool finishing; | |
73 | } stat; | |
74 | int64_t speed; | |
75 | VmaWriter *vmaw; | |
76 | ProxmoxBackupHandle *pbs; | |
77 | GList *di_list; | |
78 | JobTxn *txn; | |
79 | - QemuMutex backup_mutex; | |
80 | + CoMutex backup_mutex; | |
81 | CoMutex dump_callback_mutex; | |
82 | } backup_state; | |
83 | ||
84 | static void pvebackup_init(void) | |
85 | { | |
86 | qemu_mutex_init(&backup_state.stat.lock); | |
87 | - qemu_mutex_init(&backup_state.backup_mutex); | |
88 | + qemu_co_mutex_init(&backup_state.backup_mutex); | |
89 | qemu_co_mutex_init(&backup_state.dump_callback_mutex); | |
90 | } | |
91 | ||
92 | @@ -72,6 +75,7 @@ typedef struct PVEBackupDevInfo { | |
93 | size_t size; | |
94 | uint64_t block_size; | |
95 | uint8_t dev_id; | |
96 | + int completed_ret; // INT_MAX if not completed | |
97 | char targetfile[PATH_MAX]; | |
98 | BdrvDirtyBitmap *bitmap; | |
99 | BlockDriverState *target; | |
100 | @@ -227,12 +231,12 @@ pvebackup_co_dump_vma_cb( | |
101 | } | |
102 | ||
103 | // assumes the caller holds backup_mutex | |
104 | -static void coroutine_fn pvebackup_co_cleanup(void *unused) | |
105 | +static void coroutine_fn pvebackup_co_cleanup(void) | |
106 | { | |
107 | assert(qemu_in_coroutine()); | |
108 | ||
109 | qemu_mutex_lock(&backup_state.stat.lock); | |
110 | - backup_state.stat.end_time = time(NULL); | |
111 | + backup_state.stat.finishing = true; | |
112 | qemu_mutex_unlock(&backup_state.stat.lock); | |
113 | ||
114 | if (backup_state.vmaw) { | |
115 | @@ -261,12 +265,29 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused) | |
116 | ||
117 | g_list_free(backup_state.di_list); | |
118 | backup_state.di_list = NULL; | |
119 | + | |
120 | + qemu_mutex_lock(&backup_state.stat.lock); | |
121 | + backup_state.stat.end_time = time(NULL); | |
122 | + backup_state.stat.finishing = false; | |
123 | + qemu_mutex_unlock(&backup_state.stat.lock); | |
124 | } | |
125 | ||
126 | -// assumes the caller holds backup_mutex | |
127 | -static void coroutine_fn pvebackup_complete_stream(void *opaque) | |
128 | +static void coroutine_fn pvebackup_co_complete_stream(void *opaque) | |
129 | { | |
130 | PVEBackupDevInfo *di = opaque; | |
131 | + int ret = di->completed_ret; | |
132 | + | |
133 | + qemu_co_mutex_lock(&backup_state.backup_mutex); | |
134 | + | |
135 | + if (ret < 0) { | |
136 | + Error *local_err = NULL; | |
137 | + error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret)); | |
138 | + pvebackup_propagate_error(local_err); | |
139 | + } | |
140 | + | |
141 | + di->bs = NULL; | |
142 | + | |
143 | + assert(di->target == NULL); | |
144 | ||
145 | bool error_or_canceled = pvebackup_error_or_canceled(); | |
146 | ||
147 | @@ -281,27 +302,6 @@ static void coroutine_fn pvebackup_complete_stream(void *opaque) | |
148 | pvebackup_propagate_error(local_err); | |
149 | } | |
150 | } | |
151 | -} | |
152 | - | |
153 | -static void pvebackup_complete_cb(void *opaque, int ret) | |
154 | -{ | |
155 | - assert(!qemu_in_coroutine()); | |
156 | - | |
157 | - PVEBackupDevInfo *di = opaque; | |
158 | - | |
159 | - qemu_mutex_lock(&backup_state.backup_mutex); | |
160 | - | |
161 | - if (ret < 0) { | |
162 | - Error *local_err = NULL; | |
163 | - error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret)); | |
164 | - pvebackup_propagate_error(local_err); | |
165 | - } | |
166 | - | |
167 | - di->bs = NULL; | |
168 | - | |
169 | - assert(di->target == NULL); | |
170 | - | |
171 | - block_on_coroutine_fn(pvebackup_complete_stream, di); | |
172 | ||
173 | // remove self from job list | |
174 | backup_state.di_list = g_list_remove(backup_state.di_list, di); | |
175 | @@ -310,21 +310,36 @@ static void pvebackup_complete_cb(void *opaque, int ret) | |
176 | ||
177 | /* call cleanup if we're the last job */ | |
178 | if (!g_list_first(backup_state.di_list)) { | |
179 | - block_on_coroutine_fn(pvebackup_co_cleanup, NULL); | |
180 | + pvebackup_co_cleanup(); | |
181 | } | |
182 | ||
183 | - qemu_mutex_unlock(&backup_state.backup_mutex); | |
184 | + qemu_co_mutex_unlock(&backup_state.backup_mutex); | |
185 | } | |
186 | ||
187 | -static void pvebackup_cancel(void) | |
188 | +static void pvebackup_complete_cb(void *opaque, int ret) | |
189 | { | |
190 | assert(!qemu_in_coroutine()); | |
191 | ||
192 | + PVEBackupDevInfo *di = opaque; | |
193 | + di->completed_ret = ret; | |
194 | + | |
195 | + /* | |
196 | + * Schedule stream cleanup in async coroutine. close_image and finish might | |
197 | + * take a while, so we can't block on them here. | |
198 | + * Note: di is a pointer to an entry in the global backup_state struct, so | |
199 | + * it stays valid. | |
200 | + */ | |
201 | + Coroutine *co = qemu_coroutine_create(pvebackup_co_complete_stream, di); | |
202 | + aio_co_schedule(qemu_get_aio_context(), co); | |
203 | +} | |
204 | + | |
205 | +static void coroutine_fn pvebackup_co_cancel(void *opaque) | |
206 | +{ | |
207 | Error *cancel_err = NULL; | |
208 | error_setg(&cancel_err, "backup canceled"); | |
209 | pvebackup_propagate_error(cancel_err); | |
210 | ||
211 | - qemu_mutex_lock(&backup_state.backup_mutex); | |
212 | + qemu_co_mutex_lock(&backup_state.backup_mutex); | |
213 | ||
214 | if (backup_state.vmaw) { | |
215 | /* make sure vma writer does not block anymore */ | |
216 | @@ -342,27 +357,16 @@ static void pvebackup_cancel(void) | |
217 | ((PVEBackupDevInfo *)bdi->data)->job : | |
218 | NULL; | |
219 | ||
220 | - /* ref the job before releasing the mutex, just to be safe */ | |
221 | if (cancel_job) { | |
222 | - job_ref(&cancel_job->job); | |
223 | + job_cancel(&cancel_job->job, false); | |
224 | } | |
225 | ||
226 | - /* job_cancel_sync may enter the job, so we need to release the | |
227 | - * backup_mutex to avoid deadlock */ | |
228 | - qemu_mutex_unlock(&backup_state.backup_mutex); | |
229 | - | |
230 | - if (cancel_job) { | |
231 | - AioContext *aio_context = cancel_job->job.aio_context; | |
232 | - aio_context_acquire(aio_context); | |
233 | - job_cancel_sync(&cancel_job->job); | |
234 | - job_unref(&cancel_job->job); | |
235 | - aio_context_release(aio_context); | |
236 | - } | |
237 | + qemu_co_mutex_unlock(&backup_state.backup_mutex); | |
238 | } | |
239 | ||
240 | void qmp_backup_cancel(Error **errp) | |
241 | { | |
242 | - pvebackup_cancel(); | |
243 | + block_on_coroutine_fn(pvebackup_co_cancel, NULL); | |
244 | } | |
245 | ||
246 | // assumes the caller holds backup_mutex | |
247 | @@ -415,6 +419,14 @@ static int coroutine_fn pvebackup_co_add_config( | |
248 | goto out; | |
249 | } | |
250 | ||
251 | +/* | |
252 | + * backup_job_create can *not* be run from a coroutine (and requires an | |
253 | + * acquired AioContext), so this can't either. | |
254 | + * This does imply that this function cannot run with backup_mutex acquired. | |
255 | + * That is ok because it is only ever called between setting up the backup_state | |
256 | + * struct and starting the jobs, and from within a QMP call. This means that no | |
257 | + * other QMP call can interrupt, and no background job is running yet. | |
258 | + */ | |
259 | static bool create_backup_jobs(void) { | |
260 | ||
261 | assert(!qemu_in_coroutine()); | |
262 | @@ -523,11 +535,12 @@ typedef struct QmpBackupTask { | |
263 | UuidInfo *result; | |
264 | } QmpBackupTask; | |
265 | ||
266 | -// assumes the caller holds backup_mutex | |
267 | static void coroutine_fn pvebackup_co_prepare(void *opaque) | |
268 | { | |
269 | assert(qemu_in_coroutine()); | |
270 | ||
271 | + qemu_co_mutex_lock(&backup_state.backup_mutex); | |
272 | + | |
273 | QmpBackupTask *task = opaque; | |
274 | ||
275 | task->result = NULL; // just to be sure | |
276 | @@ -616,6 +629,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) | |
277 | } | |
278 | di->size = size; | |
279 | total += size; | |
280 | + | |
281 | + di->completed_ret = INT_MAX; | |
282 | } | |
283 | ||
284 | uuid_generate(uuid); | |
285 | @@ -847,6 +862,7 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) | |
286 | backup_state.stat.dirty = total - backup_state.stat.reused; | |
287 | backup_state.stat.transferred = 0; | |
288 | backup_state.stat.zero_bytes = 0; | |
289 | + backup_state.stat.finishing = false; | |
290 | ||
291 | qemu_mutex_unlock(&backup_state.stat.lock); | |
292 | ||
293 | @@ -861,6 +877,8 @@ static void coroutine_fn pvebackup_co_prepare(void *opaque) | |
294 | uuid_info->UUID = uuid_str; | |
295 | ||
296 | task->result = uuid_info; | |
297 | + | |
298 | + qemu_co_mutex_unlock(&backup_state.backup_mutex); | |
299 | return; | |
300 | ||
301 | err_mutex: | |
302 | @@ -903,6 +921,8 @@ err: | |
303 | } | |
304 | ||
305 | task->result = NULL; | |
306 | + | |
307 | + qemu_co_mutex_unlock(&backup_state.backup_mutex); | |
308 | return; | |
309 | } | |
310 | ||
311 | @@ -956,22 +976,15 @@ UuidInfo *qmp_backup( | |
312 | .errp = errp, | |
313 | }; | |
314 | ||
315 | - qemu_mutex_lock(&backup_state.backup_mutex); | |
316 | - | |
317 | block_on_coroutine_fn(pvebackup_co_prepare, &task); | |
318 | ||
319 | if (*errp == NULL) { | |
320 | bool errors = create_backup_jobs(); | |
321 | - qemu_mutex_unlock(&backup_state.backup_mutex); | |
322 | ||
323 | if (!errors) { | |
324 | - /* start the first job in the transaction | |
325 | - * note: this might directly enter the job, so we need to do this | |
326 | - * after unlocking the backup_mutex */ | |
327 | + // start the first job in the transaction | |
328 | job_txn_start_seq(backup_state.txn); | |
329 | } | |
330 | - } else { | |
331 | - qemu_mutex_unlock(&backup_state.backup_mutex); | |
332 | } | |
333 | ||
334 | return task.result; | |
335 | @@ -1025,6 +1038,7 @@ BackupStatus *qmp_query_backup(Error **errp) | |
336 | info->transferred = backup_state.stat.transferred; | |
337 | info->has_reused = true; | |
338 | info->reused = backup_state.stat.reused; | |
339 | + info->finishing = backup_state.stat.finishing; | |
340 | ||
341 | qemu_mutex_unlock(&backup_state.stat.lock); | |
342 | ||
343 | diff --git a/qapi/block-core.json b/qapi/block-core.json | |
344 | index 5fc42e87f3..b31ad8d989 100644 | |
345 | --- a/qapi/block-core.json | |
346 | +++ b/qapi/block-core.json | |
347 | @@ -784,12 +784,15 @@ | |
348 | # | |
349 | # @uuid: uuid for this backup job | |
350 | # | |
351 | +# @finishing: if status='active' and finishing=true, then the backup process is | |
352 | +# waiting for the target to finish. | |
353 | +# | |
354 | ## | |
355 | { 'struct': 'BackupStatus', | |
356 | 'data': {'*status': 'str', '*errmsg': 'str', '*total': 'int', '*dirty': 'int', | |
357 | '*transferred': 'int', '*zero-bytes': 'int', '*reused': 'int', | |
358 | '*start-time': 'int', '*end-time': 'int', | |
359 | - '*backup-file': 'str', '*uuid': 'str' } } | |
360 | + '*backup-file': 'str', '*uuid': 'str', 'finishing': 'bool' } } | |
361 | ||
362 | ## | |
363 | # @BackupFormat: |