]> git.proxmox.com Git - pve-qemu.git/blame - debian/patches/pve/0031-PVE-Backup-avoid-coroutines-to-fix-AIO-freeze-cleanu.patch
Fix dirty-bitmap PBS backup with multiple drives
[pve-qemu.git] / debian / patches / pve / 0031-PVE-Backup-avoid-coroutines-to-fix-AIO-freeze-cleanu.patch
CommitLineData
83faa3fe
TL
1From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
2From: Dietmar Maurer <dietmar@proxmox.com>
d7f4e01a 3Date: Mon, 6 Apr 2020 12:17:00 +0200
83faa3fe
TL
4Subject: [PATCH] PVE-Backup: avoid coroutines to fix AIO freeze, cleanups
5
6We observed various AIO pool loop freezes, so we decided to avoid
7coroutines and restrict ourselfes using similar code as upstream
8(see blockdev.c: do_backup_common).
9
10* avoid coroutine for job related code (causes hangs with iothreads)
d7f4e01a
TL
11 - We then acquire/release all mutexes outside coroutines now, so we can now
12 correctly use a normal mutex.
83faa3fe
TL
13
14* split pvebackup_co_dump_cb into:
15 - pvebackup_co_dump_pbs_cb and
16 - pvebackup_co_dump_pbs_cb
17
18* new helper functions
19 - pvebackup_propagate_error
20 - pvebackup_error_or_canceled
21 - pvebackup_add_transfered_bytes
22
23* avoid cancel flag (not needed)
24
25* simplify backup_cancel logic
26
27There is progress on upstream to support running qmp commands inside
28coroutines, see:
83faa3fe
TL
29https://lists.gnu.org/archive/html/qemu-devel/2020-02/msg04852.html
30
31We should consider using that when it is available in upstream qemu.
d7f4e01a
TL
32
33Signed-off-by: Dietmar Maurer <dietmar@proxmox.com>
83faa3fe 34---
d7f4e01a
TL
35 pve-backup.c | 638 ++++++++++++++++++++++++++-------------------------
36 1 file changed, 320 insertions(+), 318 deletions(-)
83faa3fe
TL
37
38diff --git a/pve-backup.c b/pve-backup.c
d7f4e01a 39index 9ae89fb679..bb917ee972 100644
83faa3fe
TL
40--- a/pve-backup.c
41+++ b/pve-backup.c
d7f4e01a 42@@ -11,11 +11,27 @@
83faa3fe
TL
43
44 /* PVE backup state and related function */
45
d7f4e01a
TL
46+/*
47+ * Note: A resume from a qemu_coroutine_yield can happen in a different thread,
48+ * so you may not use normal mutexes within coroutines:
49+ *
50+ * ---bad-example---
51+ * qemu_rec_mutex_lock(lock)
52+ * ...
53+ * qemu_coroutine_yield() // wait for something
54+ * // we are now inside a different thread
55+ * qemu_rec_mutex_unlock(lock) // Crash - wrong thread!!
56+ * ---end-bad-example--
57+ *
58+ * ==> Always use CoMutext inside coroutines.
59+ * ==> Never acquire/release AioContext withing coroutines (because that use QemuRecMutex)
60+ *
61+ */
62
83faa3fe
TL
63 static struct PVEBackupState {
64 struct {
65- // Everithing accessed from qmp command, protected using rwlock
66- CoRwlock rwlock;
67+ // Everithing accessed from qmp_backup_query command is protected using lock
d7f4e01a 68+ QemuMutex lock;
83faa3fe
TL
69 Error *error;
70 time_t start_time;
71 time_t end_time;
d7f4e01a 72@@ -25,19 +41,20 @@ static struct PVEBackupState {
83faa3fe
TL
73 size_t total;
74 size_t transferred;
75 size_t zero_bytes;
76- bool cancel;
77 } stat;
78 int64_t speed;
79 VmaWriter *vmaw;
80 ProxmoxBackupHandle *pbs;
81 GList *di_list;
82- CoMutex backup_mutex;
d7f4e01a
TL
83+ QemuMutex backup_mutex;
84+ CoMutex dump_callback_mutex;
83faa3fe
TL
85 } backup_state;
86
87 static void pvebackup_init(void)
88 {
89- qemu_co_rwlock_init(&backup_state.stat.rwlock);
90- qemu_co_mutex_init(&backup_state.backup_mutex);
d7f4e01a
TL
91+ qemu_mutex_init(&backup_state.stat.lock);
92+ qemu_mutex_init(&backup_state.backup_mutex);
93+ qemu_co_mutex_init(&backup_state.dump_callback_mutex);
83faa3fe
TL
94 }
95
96 // initialize PVEBackupState at startup
d7f4e01a 97@@ -52,10 +69,54 @@ typedef struct PVEBackupDevInfo {
83faa3fe
TL
98 BlockDriverState *target;
99 } PVEBackupDevInfo;
100
101-static void pvebackup_co_run_next_job(void);
102+static void pvebackup_run_next_job(void);
d7f4e01a 103
83faa3fe
TL
104+static BlockJob *
105+lookup_active_block_job(PVEBackupDevInfo *di)
106+{
107+ if (!di->completed && di->bs) {
108+ for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
109+ if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
110+ continue;
111+ }
112+
113+ BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
114+ if (bjob && bjob->source_bs == di->bs) {
115+ return job;
116+ }
117+ }
118+ }
119+ return NULL;
120+}
121+
122+static void pvebackup_propagate_error(Error *err)
123+{
d7f4e01a 124+ qemu_mutex_lock(&backup_state.stat.lock);
83faa3fe 125+ error_propagate(&backup_state.stat.error, err);
d7f4e01a 126+ qemu_mutex_unlock(&backup_state.stat.lock);
83faa3fe
TL
127+}
128+
129+static bool pvebackup_error_or_canceled(void)
130+{
d7f4e01a 131+ qemu_mutex_lock(&backup_state.stat.lock);
83faa3fe 132+ bool error_or_canceled = !!backup_state.stat.error;
d7f4e01a 133+ qemu_mutex_unlock(&backup_state.stat.lock);
83faa3fe
TL
134+
135+ return error_or_canceled;
136+}
d7f4e01a 137+
83faa3fe
TL
138+static void pvebackup_add_transfered_bytes(size_t transferred, size_t zero_bytes)
139+{
d7f4e01a 140+ qemu_mutex_lock(&backup_state.stat.lock);
83faa3fe
TL
141+ backup_state.stat.zero_bytes += zero_bytes;
142+ backup_state.stat.transferred += transferred;
d7f4e01a 143+ qemu_mutex_unlock(&backup_state.stat.lock);
83faa3fe
TL
144+}
145+
146+// This may get called from multiple coroutines in multiple io-threads
147+// Note1: this may get called after job_cancel()
148 static int coroutine_fn
149-pvebackup_co_dump_cb(
150+pvebackup_co_dump_pbs_cb(
151 void *opaque,
152 uint64_t start,
153 uint64_t bytes,
d7f4e01a 154@@ -67,137 +128,127 @@ pvebackup_co_dump_cb(
83faa3fe
TL
155 const unsigned char *buf = pbuf;
156 PVEBackupDevInfo *di = opaque;
157
158- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
159- bool cancel = backup_state.stat.cancel;
160- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
161+ assert(backup_state.pbs);
162+
163+ Error *local_err = NULL;
164+ int pbs_res = -1;
165+
d7f4e01a 166+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
83faa3fe
TL
167
168- if (cancel) {
169- return size; // return success
170+ // avoid deadlock if job is cancelled
171+ if (pvebackup_error_or_canceled()) {
d7f4e01a 172+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
83faa3fe
TL
173+ return -1;
174 }
175
176- qemu_co_mutex_lock(&backup_state.backup_mutex);
177+ pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
d7f4e01a 178+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
83faa3fe
TL
179
180- int ret = -1;
181+ if (pbs_res < 0) {
182+ pvebackup_propagate_error(local_err);
183+ return pbs_res;
184+ } else {
185+ pvebackup_add_transfered_bytes(size, !buf ? size : 0);
186+ }
187
188- if (backup_state.vmaw) {
189- size_t zero_bytes = 0;
190- uint64_t remaining = size;
191-
192- uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
193- if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
194- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
195- if (!backup_state.stat.error) {
196- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
197- error_setg(&backup_state.stat.error,
198- "got unaligned write inside backup dump "
199- "callback (sector %ld)", start);
200- }
201- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
202- qemu_co_mutex_unlock(&backup_state.backup_mutex);
203- return -1; // not aligned to cluster size
204- }
205+ return size;
206+}
207
208- while (remaining > 0) {
209- ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num,
210- buf, &zero_bytes);
211- ++cluster_num;
212- if (buf) {
213- buf += VMA_CLUSTER_SIZE;
214- }
215- if (ret < 0) {
216- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
217- if (!backup_state.stat.error) {
218- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
219- vma_writer_error_propagate(backup_state.vmaw, &backup_state.stat.error);
220- }
221- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
222+// This may get called from multiple coroutines in multiple io-threads
223+static int coroutine_fn
224+pvebackup_co_dump_vma_cb(
225+ void *opaque,
226+ uint64_t start,
227+ uint64_t bytes,
228+ const void *pbuf)
229+{
230+ assert(qemu_in_coroutine());
231
232- qemu_co_mutex_unlock(&backup_state.backup_mutex);
233- return ret;
234- } else {
235- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
236- backup_state.stat.zero_bytes += zero_bytes;
237- if (remaining >= VMA_CLUSTER_SIZE) {
238- backup_state.stat.transferred += VMA_CLUSTER_SIZE;
239- remaining -= VMA_CLUSTER_SIZE;
240- } else {
241- backup_state.stat.transferred += remaining;
242- remaining = 0;
243- }
244- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
245- }
246- }
247- } else if (backup_state.pbs) {
248- Error *local_err = NULL;
249- int pbs_res = -1;
250+ const uint64_t size = bytes;
251+ const unsigned char *buf = pbuf;
252+ PVEBackupDevInfo *di = opaque;
253
254- pbs_res = proxmox_backup_co_write_data(backup_state.pbs, di->dev_id, buf, start, size, &local_err);
d7f4e01a 255+ int ret = -1;
83faa3fe
TL
256
257- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
d7f4e01a 258+ assert(backup_state.vmaw);
83faa3fe
TL
259
260- if (pbs_res < 0) {
261- error_propagate(&backup_state.stat.error, local_err);
262- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
263- qemu_co_mutex_unlock(&backup_state.backup_mutex);
264- return pbs_res;
265- } else {
266- if (!buf) {
267- backup_state.stat.zero_bytes += size;
268- }
269- backup_state.stat.transferred += size;
83faa3fe
TL
270+ uint64_t remaining = size;
271+
272+ uint64_t cluster_num = start / VMA_CLUSTER_SIZE;
273+ if ((cluster_num * VMA_CLUSTER_SIZE) != start) {
274+ Error *local_err = NULL;
275+ error_setg(&local_err,
276+ "got unaligned write inside backup dump "
277+ "callback (sector %ld)", start);
278+ pvebackup_propagate_error(local_err);
279+ return -1; // not aligned to cluster size
280+ }
281+
282+ while (remaining > 0) {
d7f4e01a 283+ qemu_co_mutex_lock(&backup_state.dump_callback_mutex);
83faa3fe
TL
284+ // avoid deadlock if job is cancelled
285+ if (pvebackup_error_or_canceled()) {
d7f4e01a 286+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
83faa3fe
TL
287+ return -1;
288 }
289
290- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
291+ size_t zero_bytes = 0;
292+ ret = vma_writer_write(backup_state.vmaw, di->dev_id, cluster_num, buf, &zero_bytes);
d7f4e01a 293+ qemu_co_mutex_unlock(&backup_state.dump_callback_mutex);
83faa3fe
TL
294
295- } else {
296- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
297- if (!buf) {
298- backup_state.stat.zero_bytes += size;
299+ ++cluster_num;
300+ if (buf) {
301+ buf += VMA_CLUSTER_SIZE;
302+ }
303+ if (ret < 0) {
304+ Error *local_err = NULL;
305+ vma_writer_error_propagate(backup_state.vmaw, &local_err);
306+ pvebackup_propagate_error(local_err);
307+ return ret;
308+ } else {
309+ if (remaining >= VMA_CLUSTER_SIZE) {
310+ assert(ret == VMA_CLUSTER_SIZE);
311+ pvebackup_add_transfered_bytes(VMA_CLUSTER_SIZE, zero_bytes);
312+ remaining -= VMA_CLUSTER_SIZE;
313+ } else {
314+ assert(ret == remaining);
315+ pvebackup_add_transfered_bytes(remaining, zero_bytes);
316+ remaining = 0;
317+ }
318 }
319- backup_state.stat.transferred += size;
320- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
321 }
322
323- qemu_co_mutex_unlock(&backup_state.backup_mutex);
324-
325 return size;
326 }
327
328-static void coroutine_fn pvebackup_co_cleanup(void)
d7f4e01a 329+// assumes the caller holds backup_mutex
83faa3fe
TL
330+static void coroutine_fn pvebackup_co_cleanup(void *unused)
331 {
332 assert(qemu_in_coroutine());
333
334- qemu_co_mutex_lock(&backup_state.backup_mutex);
d7f4e01a 335-
83faa3fe 336- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
d7f4e01a 337+ qemu_mutex_lock(&backup_state.stat.lock);
83faa3fe
TL
338 backup_state.stat.end_time = time(NULL);
339- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
d7f4e01a 340+ qemu_mutex_unlock(&backup_state.stat.lock);
83faa3fe
TL
341
342 if (backup_state.vmaw) {
343 Error *local_err = NULL;
344 vma_writer_close(backup_state.vmaw, &local_err);
345
346 if (local_err != NULL) {
347- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
348- error_propagate(&backup_state.stat.error, local_err);
349- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
350- }
351+ pvebackup_propagate_error(local_err);
352+ }
353
354 backup_state.vmaw = NULL;
355 }
356
357 if (backup_state.pbs) {
358- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
359- bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
360- if (!error_or_canceled) {
361+ if (!pvebackup_error_or_canceled()) {
362 Error *local_err = NULL;
363 proxmox_backup_co_finish(backup_state.pbs, &local_err);
364 if (local_err != NULL) {
365- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
366- error_propagate(&backup_state.stat.error, local_err);
367- }
368+ pvebackup_propagate_error(local_err);
369+ }
370 }
371- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
372
373 proxmox_backup_disconnect(backup_state.pbs);
374 backup_state.pbs = NULL;
d7f4e01a 375@@ -205,43 +256,14 @@ static void coroutine_fn pvebackup_co_cleanup(void)
83faa3fe
TL
376
377 g_list_free(backup_state.di_list);
378 backup_state.di_list = NULL;
379- qemu_co_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
380 }
381
382-typedef struct PVEBackupCompeteCallbackData {
383- PVEBackupDevInfo *di;
384- int result;
385-} PVEBackupCompeteCallbackData;
386-
387-static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
d7f4e01a 388+// assumes the caller holds backup_mutex
83faa3fe
TL
389+static void coroutine_fn pvebackup_complete_stream(void *opaque)
390 {
391- assert(qemu_in_coroutine());
392-
393- PVEBackupCompeteCallbackData *cb_data = opaque;
394-
395- qemu_co_mutex_lock(&backup_state.backup_mutex);
396-
397- PVEBackupDevInfo *di = cb_data->di;
398- int ret = cb_data->result;
399-
400- di->completed = true;
401-
402- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
403- bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
404-
405- if (ret < 0 && !backup_state.stat.error) {
406- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
407- error_setg(&backup_state.stat.error, "job failed with err %d - %s",
408- ret, strerror(-ret));
409- }
410- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
411-
412- di->bs = NULL;
413+ PVEBackupDevInfo *di = opaque;
414
415- if (di->target) {
416- bdrv_unref(di->target);
417- di->target = NULL;
418- }
419+ bool error_or_canceled = pvebackup_error_or_canceled();
420
421 if (backup_state.vmaw) {
422 vma_writer_close_stream(backup_state.vmaw, di->dev_id);
d7f4e01a 423@@ -251,110 +273,101 @@ static void coroutine_fn pvebackup_co_complete_cb(void *opaque)
83faa3fe
TL
424 Error *local_err = NULL;
425 proxmox_backup_co_close_image(backup_state.pbs, di->dev_id, &local_err);
426 if (local_err != NULL) {
427- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
428- error_propagate(&backup_state.stat.error, local_err);
429- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
430+ pvebackup_propagate_error(local_err);
431 }
432 }
433+}
434
435- // remove self from job queue
436- backup_state.di_list = g_list_remove(backup_state.di_list, di);
437- g_free(di);
438+static void pvebackup_complete_cb(void *opaque, int ret)
439+{
440+ assert(!qemu_in_coroutine());
83faa3fe
TL
441
442- int pending_jobs = g_list_length(backup_state.di_list);
d7f4e01a 443+ PVEBackupDevInfo *di = opaque;
83faa3fe
TL
444
445- qemu_co_mutex_unlock(&backup_state.backup_mutex);
d7f4e01a 446+ qemu_mutex_lock(&backup_state.backup_mutex);
83faa3fe
TL
447
448- if (pending_jobs > 0) {
449- pvebackup_co_run_next_job();
450- } else {
451- pvebackup_co_cleanup();
d7f4e01a
TL
452+ di->completed = true;
453+
83faa3fe
TL
454+ if (ret < 0) {
455+ Error *local_err = NULL;
456+ error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret));
457+ pvebackup_propagate_error(local_err);
458 }
459-}
460
461-static void pvebackup_complete_cb(void *opaque, int ret)
462-{
463- // This can be called from the main loop, or from a coroutine
464- PVEBackupCompeteCallbackData cb_data = {
465- .di = opaque,
466- .result = ret,
467- };
468+ di->bs = NULL;
469
470- if (qemu_in_coroutine()) {
471- pvebackup_co_complete_cb(&cb_data);
472- } else {
473- block_on_coroutine_fn(pvebackup_co_complete_cb, &cb_data);
474- }
475-}
476+ assert(di->target == NULL);
477
478-static void coroutine_fn pvebackup_co_cancel(void *opaque)
479-{
480- assert(qemu_in_coroutine());
481+ block_on_coroutine_fn(pvebackup_complete_stream, di);
482
483- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
484- backup_state.stat.cancel = true;
485- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
486+ // remove self from job queue
487+ backup_state.di_list = g_list_remove(backup_state.di_list, di);
488
489- qemu_co_mutex_lock(&backup_state.backup_mutex);
490+ g_free(di);
491
492- // Avoid race between block jobs and backup-cancel command:
493- if (!(backup_state.vmaw || backup_state.pbs)) {
494- qemu_co_mutex_unlock(&backup_state.backup_mutex);
495- return;
496- }
d7f4e01a 497+ qemu_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
498
499- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
500- if (!backup_state.stat.error) {
501- qemu_co_rwlock_upgrade(&backup_state.stat.rwlock);
502- error_setg(&backup_state.stat.error, "backup cancelled");
503- }
504- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
505+ pvebackup_run_next_job();
506+}
507+
508+static void pvebackup_cancel(void)
509+{
d7f4e01a
TL
510+ assert(!qemu_in_coroutine());
511+
83faa3fe
TL
512+ Error *cancel_err = NULL;
513+ error_setg(&cancel_err, "backup canceled");
514+ pvebackup_propagate_error(cancel_err);
515+
d7f4e01a 516+ qemu_mutex_lock(&backup_state.backup_mutex);
83faa3fe
TL
517
518 if (backup_state.vmaw) {
519 /* make sure vma writer does not block anymore */
520- vma_writer_set_error(backup_state.vmaw, "backup cancelled");
521+ vma_writer_set_error(backup_state.vmaw, "backup canceled");
522 }
523
524 if (backup_state.pbs) {
525- proxmox_backup_abort(backup_state.pbs, "backup cancelled");
526+ proxmox_backup_abort(backup_state.pbs, "backup canceled");
527 }
528
529- bool running_jobs = 0;
530- GList *l = backup_state.di_list;
531- while (l) {
532- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
533- l = g_list_next(l);
534- if (!di->completed && di->bs) {
535- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
536- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
537- continue;
538- }
d7f4e01a 539+ qemu_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
540
541- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
542- if (bjob && bjob->source_bs == di->bs) {
543- AioContext *aio_context = job->job.aio_context;
544- aio_context_acquire(aio_context);
545+ for(;;) {
546
547- if (!di->completed) {
548- running_jobs += 1;
549- job_cancel(&job->job, false);
550- }
551- aio_context_release(aio_context);
552- }
553+ BlockJob *next_job = NULL;
554+
d7f4e01a 555+ qemu_mutex_lock(&backup_state.backup_mutex);
83faa3fe
TL
556+
557+ GList *l = backup_state.di_list;
558+ while (l) {
559+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
560+ l = g_list_next(l);
561+
562+ BlockJob *job = lookup_active_block_job(di);
563+ if (job != NULL) {
564+ next_job = job;
565+ break;
566 }
567 }
568- }
569
570- qemu_co_mutex_unlock(&backup_state.backup_mutex);
d7f4e01a 571+ qemu_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
572
573- if (running_jobs == 0) pvebackup_co_cleanup(); // else job will call completion handler
574+ if (next_job) {
575+ AioContext *aio_context = next_job->job.aio_context;
576+ aio_context_acquire(aio_context);
577+ job_cancel_sync(&next_job->job);
578+ aio_context_release(aio_context);
579+ } else {
580+ break;
581+ }
582+ }
583 }
584
585 void qmp_backup_cancel(Error **errp)
586 {
587- block_on_coroutine_fn(pvebackup_co_cancel, NULL);
588+ pvebackup_cancel();
589 }
590
d7f4e01a 591+// assumes the caller holds backup_mutex
83faa3fe 592 static int coroutine_fn pvebackup_co_add_config(
d7f4e01a
TL
593 const char *file,
594 const char *name,
595@@ -406,46 +419,97 @@ static int coroutine_fn pvebackup_co_add_config(
83faa3fe
TL
596
597 bool job_should_pause(Job *job);
598
599-static void coroutine_fn pvebackup_co_run_next_job(void)
600+static void pvebackup_run_next_job(void)
601 {
602- assert(qemu_in_coroutine());
603+ assert(!qemu_in_coroutine());
604
605- qemu_co_mutex_lock(&backup_state.backup_mutex);
d7f4e01a 606+ qemu_mutex_lock(&backup_state.backup_mutex);
83faa3fe
TL
607
608 GList *l = backup_state.di_list;
609 while (l) {
610 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
611 l = g_list_next(l);
612- if (!di->completed && di->bs) {
613- for (BlockJob *job = block_job_next(NULL); job; job = block_job_next(job)) {
614- if (job->job.driver->job_type != JOB_TYPE_BACKUP) {
615- continue;
616- }
617
618- BackupBlockJob *bjob = container_of(job, BackupBlockJob, common);
619- if (bjob && bjob->source_bs == di->bs) {
620- AioContext *aio_context = job->job.aio_context;
621- qemu_co_mutex_unlock(&backup_state.backup_mutex);
622- aio_context_acquire(aio_context);
623-
624- if (job_should_pause(&job->job)) {
625- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
626- bool error_or_canceled = backup_state.stat.error || backup_state.stat.cancel;
627- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
628-
629- if (error_or_canceled) {
630- job_cancel(&job->job, false);
631- } else {
632- job_resume(&job->job);
633- }
634- }
635- aio_context_release(aio_context);
636- return;
637+ BlockJob *job = lookup_active_block_job(di);
638+
639+ if (job) {
d7f4e01a 640+ qemu_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
641+
642+ AioContext *aio_context = job->job.aio_context;
643+ aio_context_acquire(aio_context);
644+
645+ if (job_should_pause(&job->job)) {
646+ bool error_or_canceled = pvebackup_error_or_canceled();
647+ if (error_or_canceled) {
648+ job_cancel_sync(&job->job);
649+ } else {
650+ job_resume(&job->job);
651 }
652 }
653+ aio_context_release(aio_context);
654+ return;
d7f4e01a
TL
655+ }
656+ }
83faa3fe
TL
657+
658+ block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup
d7f4e01a
TL
659+
660+ qemu_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
661+}
662+
663+static bool create_backup_jobs(void) {
664+
665+ assert(!qemu_in_coroutine());
666+
667+ Error *local_err = NULL;
668+
669+ /* create and start all jobs (paused state) */
670+ GList *l = backup_state.di_list;
671+ while (l) {
672+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
673+ l = g_list_next(l);
674+
675+ assert(di->target != NULL);
676+
677+ AioContext *aio_context = bdrv_get_aio_context(di->bs);
678+ aio_context_acquire(aio_context);
679+
680+ BlockJob *job = backup_job_create(
681+ NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
682+ BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
683+ JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
684+
685+ aio_context_release(aio_context);
686+
687+ if (!job || local_err != NULL) {
688+ Error *create_job_err = NULL;
689+ error_setg(&create_job_err, "backup_job_create failed: %s",
690+ local_err ? error_get_pretty(local_err) : "null");
691+
692+ pvebackup_propagate_error(create_job_err);
693+ break;
694+ }
695+ job_start(&job->job);
696+
697+ bdrv_unref(di->target);
698+ di->target = NULL;
699+ }
700+
701+ bool errors = pvebackup_error_or_canceled();
702+
703+ if (errors) {
704+ l = backup_state.di_list;
705+ while (l) {
706+ PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
707+ l = g_list_next(l);
708+
709+ if (di->target) {
710+ bdrv_unref(di->target);
711+ di->target = NULL;
712+ }
d7f4e01a
TL
713 }
714 }
715- qemu_co_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
716+
717+ return errors;
718 }
719
720 typedef struct QmpBackupTask {
d7f4e01a 721@@ -476,7 +540,8 @@ typedef struct QmpBackupTask {
83faa3fe
TL
722 UuidInfo *result;
723 } QmpBackupTask;
724
725-static void coroutine_fn pvebackup_co_start(void *opaque)
d7f4e01a 726+// assumes the caller holds backup_mutex
83faa3fe
TL
727+static void coroutine_fn pvebackup_co_prepare(void *opaque)
728 {
729 assert(qemu_in_coroutine());
730
d7f4e01a 731@@ -495,16 +560,12 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
83faa3fe
TL
732 GList *di_list = NULL;
733 GList *l;
734 UuidInfo *uuid_info;
735- BlockJob *job;
736
737 const char *config_name = "qemu-server.conf";
738 const char *firewall_name = "qemu-server.fw";
739
740- qemu_co_mutex_lock(&backup_state.backup_mutex);
d7f4e01a 741-
83faa3fe
TL
742 if (backup_state.di_list) {
743- qemu_co_mutex_unlock(&backup_state.backup_mutex);
d7f4e01a
TL
744- error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
745+ error_set(task->errp, ERROR_CLASS_GENERIC_ERROR,
83faa3fe
TL
746 "previous backup not finished");
747 return;
d7f4e01a
TL
748 }
749@@ -631,7 +692,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
83faa3fe
TL
750 if (dev_id < 0)
751 goto err;
752
753- if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_cb, di, task->errp))) {
754+ if (!(di->target = bdrv_backup_dump_create(dump_cb_block_size, di->size, pvebackup_co_dump_pbs_cb, di, task->errp))) {
755 goto err;
756 }
757
d7f4e01a 758@@ -652,7 +713,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
83faa3fe
TL
759 PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
760 l = g_list_next(l);
761
762- if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_cb, di, task->errp))) {
763+ if (!(di->target = bdrv_backup_dump_create(VMA_CLUSTER_SIZE, di->size, pvebackup_co_dump_vma_cb, di, task->errp))) {
764 goto err;
765 }
766
d7f4e01a 767@@ -717,9 +778,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
83faa3fe
TL
768 }
769 /* initialize global backup_state now */
770
771- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
772-
773- backup_state.stat.cancel = false;
d7f4e01a 774+ qemu_mutex_lock(&backup_state.stat.lock);
83faa3fe
TL
775
776 if (backup_state.stat.error) {
777 error_free(backup_state.stat.error);
d7f4e01a 778@@ -742,7 +801,7 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
83faa3fe
TL
779 backup_state.stat.transferred = 0;
780 backup_state.stat.zero_bytes = 0;
781
782- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
d7f4e01a 783+ qemu_mutex_unlock(&backup_state.stat.lock);
83faa3fe
TL
784
785 backup_state.speed = (task->has_speed && task->speed > 0) ? task->speed : 0;
786
d7f4e01a 787@@ -751,48 +810,6 @@ static void coroutine_fn pvebackup_co_start(void *opaque)
83faa3fe
TL
788
789 backup_state.di_list = di_list;
790
791- /* start all jobs (paused state) */
792- l = di_list;
793- while (l) {
794- PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data;
795- l = g_list_next(l);
796-
d7f4e01a 797- // make sure target runs in same aoi_context as source
83faa3fe
TL
798- AioContext *aio_context = bdrv_get_aio_context(di->bs);
799- aio_context_acquire(aio_context);
d7f4e01a
TL
800- GSList *ignore = NULL;
801- bdrv_set_aio_context_ignore(di->target, aio_context, &ignore);
802- g_slist_free(ignore);
803- aio_context_release(aio_context);
83faa3fe
TL
804-
805- job = backup_job_create(NULL, di->bs, di->target, backup_state.speed, MIRROR_SYNC_MODE_FULL, NULL,
806- BITMAP_SYNC_MODE_NEVER, false, NULL, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT,
807- JOB_DEFAULT, pvebackup_complete_cb, di, 1, NULL, &local_err);
83faa3fe
TL
808- if (!job || local_err != NULL) {
809- qemu_co_rwlock_wrlock(&backup_state.stat.rwlock);
810- error_setg(&backup_state.stat.error, "backup_job_create failed");
811- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
812- break;
813- }
814- job_start(&job->job);
815- if (di->target) {
816- bdrv_unref(di->target);
817- di->target = NULL;
818- }
819- }
820-
821- qemu_co_mutex_unlock(&backup_state.backup_mutex);
822-
823- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
824- bool no_errors = !backup_state.stat.error;
825- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
826-
827- if (no_errors) {
828- pvebackup_co_run_next_job(); // run one job
829- } else {
830- pvebackup_co_cancel(NULL);
831- }
d7f4e01a 832-
83faa3fe
TL
833 uuid_info = g_malloc0(sizeof(*uuid_info));
834 uuid_info->UUID = uuid_str;
d7f4e01a
TL
835
836@@ -835,8 +852,6 @@ err:
83faa3fe
TL
837 rmdir(backup_dir);
838 }
839
840- qemu_co_mutex_unlock(&backup_state.backup_mutex);
d7f4e01a 841-
83faa3fe
TL
842 task->result = NULL;
843 return;
d7f4e01a
TL
844 }
845@@ -880,32 +895,31 @@ UuidInfo *qmp_backup(
83faa3fe
TL
846 .errp = errp,
847 };
848
849- block_on_coroutine_fn(pvebackup_co_start, &task);
d7f4e01a
TL
850+ qemu_mutex_lock(&backup_state.backup_mutex);
851
852- return task.result;
853-}
83faa3fe 854+ block_on_coroutine_fn(pvebackup_co_prepare, &task);
d7f4e01a 855
83faa3fe 856+ if (*errp == NULL) {
83faa3fe 857+ create_backup_jobs();
d7f4e01a 858+ qemu_mutex_unlock(&backup_state.backup_mutex);
83faa3fe 859+ pvebackup_run_next_job();
d7f4e01a
TL
860+ } else {
861+ qemu_mutex_unlock(&backup_state.backup_mutex);
83faa3fe
TL
862+ }
863
83faa3fe
TL
864-typedef struct QmpQueryBackupTask {
865- Error **errp;
866- BackupStatus *result;
867-} QmpQueryBackupTask;
d7f4e01a
TL
868+ return task.result;
869+}
870
83faa3fe
TL
871-static void coroutine_fn pvebackup_co_query(void *opaque)
872+BackupStatus *qmp_query_backup(Error **errp)
873 {
874- assert(qemu_in_coroutine());
875-
876- QmpQueryBackupTask *task = opaque;
877-
878 BackupStatus *info = g_malloc0(sizeof(*info));
879
880- qemu_co_rwlock_rdlock(&backup_state.stat.rwlock);
d7f4e01a 881+ qemu_mutex_lock(&backup_state.stat.lock);
83faa3fe
TL
882
883 if (!backup_state.stat.start_time) {
884 /* not started, return {} */
885- task->result = info;
886- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
887- return;
d7f4e01a 888+ qemu_mutex_unlock(&backup_state.stat.lock);
83faa3fe
TL
889+ return info;
890 }
891
892 info->has_status = true;
d7f4e01a 893@@ -941,19 +955,7 @@ static void coroutine_fn pvebackup_co_query(void *opaque)
83faa3fe
TL
894 info->has_transferred = true;
895 info->transferred = backup_state.stat.transferred;
896
897- task->result = info;
d7f4e01a 898+ qemu_mutex_unlock(&backup_state.stat.lock);
83faa3fe
TL
899
900- qemu_co_rwlock_unlock(&backup_state.stat.rwlock);
901-}
902-
903-BackupStatus *qmp_query_backup(Error **errp)
904-{
905- QmpQueryBackupTask task = {
906- .errp = errp,
907- .result = NULL,
908- };
909-
910- block_on_coroutine_fn(pvebackup_co_query, &task);
911-
912- return task.result;
913+ return info;
914 }