]>
Commit | Line | Data |
---|---|---|
d333327a SR |
1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 |
2 | From: Stefan Reiter <s.reiter@proxmox.com> | |
3 | Date: Thu, 20 Aug 2020 14:25:00 +0200 | |
4 | Subject: [PATCH] PVE-Backup: Use a transaction to synchronize job states | |
5 | ||
6 | By using a JobTxn, we can sync dirty bitmaps only when *all* jobs were | |
7 | successful - meaning we don't need to remove them when the backup fails, | |
8 | since QEMU's BITMAP_SYNC_MODE_ON_SUCCESS will now handle that for us. | |
9 | ||
10 | To keep the rate-limiting and IO impact from before, we use a sequential | |
11 | transaction, so drives will still be backed up one after the other. | |
72ae34ec SR |
12 | |
13 | Signed-off-by: Stefan Reiter <s.reiter@proxmox.com> | |
ddbf7a87 | 14 | Signed-off-by: Thomas Lamprecht <t.lamprecht@proxmox.com> |
d03e1b3c FE |
15 | [FE: add new force parameter to job_cancel_sync calls |
16 | adapt for new job lock mechanism replacing AioContext locks] | |
17 | Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> | |
d333327a | 18 | --- |
d03e1b3c FE |
19 | pve-backup.c | 163 ++++++++++++++++----------------------------------- |
20 | 1 file changed, 50 insertions(+), 113 deletions(-) | |
d333327a SR |
21 | |
22 | diff --git a/pve-backup.c b/pve-backup.c | |
d03e1b3c | 23 | index c85b2ecd83..b5fb844434 100644 |
d333327a SR |
24 | --- a/pve-backup.c |
25 | +++ b/pve-backup.c | |
26 | @@ -52,6 +52,7 @@ static struct PVEBackupState { | |
27 | VmaWriter *vmaw; | |
28 | ProxmoxBackupHandle *pbs; | |
29 | GList *di_list; | |
30 | + JobTxn *txn; | |
31 | QemuMutex backup_mutex; | |
32 | CoMutex dump_callback_mutex; | |
33 | } backup_state; | |
d03e1b3c | 34 | @@ -71,34 +72,12 @@ typedef struct PVEBackupDevInfo { |
d333327a SR |
35 | size_t size; |
36 | uint64_t block_size; | |
37 | uint8_t dev_id; | |
38 | - bool completed; | |
39 | char targetfile[PATH_MAX]; | |
40 | BdrvDirtyBitmap *bitmap; | |
41 | BlockDriverState *target; | |
42 | + BlockJob *job; | |
43 | } PVEBackupDevInfo; | |
44 | ||
45 | -static void pvebackup_run_next_job(void); | |
46 | - | |
47 | -static BlockJob * | |
48 | -lookup_active_block_job(PVEBackupDevInfo *di) | |
49 | -{ | |
50 | - if (!di->completed && di->bs) { | |
d03e1b3c FE |
51 | - WITH_JOB_LOCK_GUARD() { |
52 | - for (BlockJob *job = block_job_next_locked(NULL); job; job = block_job_next_locked(job)) { | |
53 | - if (job->job.driver->job_type != JOB_TYPE_BACKUP) { | |
54 | - continue; | |
55 | - } | |
d333327a | 56 | - |
d03e1b3c FE |
57 | - BackupBlockJob *bjob = container_of(job, BackupBlockJob, common); |
58 | - if (bjob && bjob->source_bs == di->bs) { | |
59 | - return job; | |
60 | - } | |
d333327a SR |
61 | - } |
62 | - } | |
63 | - } | |
64 | - return NULL; | |
65 | -} | |
66 | - | |
67 | static void pvebackup_propagate_error(Error *err) | |
68 | { | |
69 | qemu_mutex_lock(&backup_state.stat.lock); | |
d03e1b3c | 70 | @@ -274,18 +253,6 @@ static void coroutine_fn pvebackup_co_cleanup(void *unused) |
d333327a SR |
71 | if (local_err != NULL) { |
72 | pvebackup_propagate_error(local_err); | |
73 | } | |
74 | - } else { | |
75 | - // on error or cancel we cannot ensure synchronization of dirty | |
76 | - // bitmaps with backup server, so remove all and do full backup next | |
77 | - GList *l = backup_state.di_list; | |
78 | - while (l) { | |
79 | - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; | |
80 | - l = g_list_next(l); | |
81 | - | |
82 | - if (di->bitmap) { | |
83 | - bdrv_release_dirty_bitmap(di->bitmap); | |
84 | - } | |
85 | - } | |
86 | } | |
87 | ||
88 | proxmox_backup_disconnect(backup_state.pbs); | |
d03e1b3c | 89 | @@ -324,8 +291,6 @@ static void pvebackup_complete_cb(void *opaque, int ret) |
d333327a SR |
90 | |
91 | qemu_mutex_lock(&backup_state.backup_mutex); | |
92 | ||
93 | - di->completed = true; | |
94 | - | |
95 | if (ret < 0) { | |
96 | Error *local_err = NULL; | |
97 | error_setg(&local_err, "job failed with err %d - %s", ret, strerror(-ret)); | |
d03e1b3c | 98 | @@ -338,20 +303,17 @@ static void pvebackup_complete_cb(void *opaque, int ret) |
d333327a SR |
99 | |
100 | block_on_coroutine_fn(pvebackup_complete_stream, di); | |
101 | ||
102 | - // remove self from job queue | |
103 | + // remove self from job list | |
104 | backup_state.di_list = g_list_remove(backup_state.di_list, di); | |
105 | ||
106 | - if (di->bitmap && ret < 0) { | |
107 | - // on error or cancel we cannot ensure synchronization of dirty | |
108 | - // bitmaps with backup server, so remove all and do full backup next | |
109 | - bdrv_release_dirty_bitmap(di->bitmap); | |
110 | - } | |
111 | - | |
112 | g_free(di); | |
113 | ||
114 | - qemu_mutex_unlock(&backup_state.backup_mutex); | |
115 | + /* call cleanup if we're the last job */ | |
116 | + if (!g_list_first(backup_state.di_list)) { | |
117 | + block_on_coroutine_fn(pvebackup_co_cleanup, NULL); | |
118 | + } | |
119 | ||
120 | - pvebackup_run_next_job(); | |
121 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
122 | } | |
123 | ||
124 | static void pvebackup_cancel(void) | |
d03e1b3c | 125 | @@ -373,32 +335,28 @@ static void pvebackup_cancel(void) |
d333327a SR |
126 | proxmox_backup_abort(backup_state.pbs, "backup canceled"); |
127 | } | |
128 | ||
4fd0fa7f TL |
129 | - qemu_mutex_unlock(&backup_state.backup_mutex); |
130 | - | |
d333327a SR |
131 | - for(;;) { |
132 | - | |
133 | - BlockJob *next_job = NULL; | |
4fd0fa7f TL |
134 | + /* it's enough to cancel one job in the transaction, the rest will follow |
135 | + * automatically */ | |
136 | + GList *bdi = g_list_first(backup_state.di_list); | |
137 | + BlockJob *cancel_job = bdi && bdi->data ? | |
138 | + ((PVEBackupDevInfo *)bdi->data)->job : | |
139 | + NULL; | |
140 | ||
d03e1b3c FE |
141 | - qemu_mutex_lock(&backup_state.backup_mutex); |
142 | - | |
143 | - GList *l = backup_state.di_list; | |
144 | - while (l) { | |
145 | - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; | |
146 | - l = g_list_next(l); | |
147 | - | |
d333327a SR |
148 | - BlockJob *job = lookup_active_block_job(di); |
149 | - if (job != NULL) { | |
150 | - next_job = job; | |
151 | - break; | |
152 | - } | |
4fd0fa7f TL |
153 | + /* ref the job before releasing the mutex, just to be safe */ |
154 | + if (cancel_job) { | |
d03e1b3c FE |
155 | + WITH_JOB_LOCK_GUARD() { |
156 | + job_ref_locked(&cancel_job->job); | |
157 | } | |
4fd0fa7f TL |
158 | + } |
159 | ||
d333327a | 160 | - qemu_mutex_unlock(&backup_state.backup_mutex); |
4fd0fa7f TL |
161 | + /* job_cancel_sync may enter the job, so we need to release the |
162 | + * backup_mutex to avoid deadlock */ | |
163 | + qemu_mutex_unlock(&backup_state.backup_mutex); | |
164 | ||
d333327a | 165 | - if (next_job) { |
4567474e | 166 | - job_cancel_sync(&next_job->job, true); |
d333327a SR |
167 | - } else { |
168 | - break; | |
d333327a | 169 | + if (cancel_job) { |
d03e1b3c FE |
170 | + WITH_JOB_LOCK_GUARD() { |
171 | + job_cancel_sync_locked(&cancel_job->job, true); | |
172 | + job_unref_locked(&cancel_job->job); | |
173 | } | |
d333327a SR |
174 | } |
175 | } | |
d03e1b3c | 176 | @@ -458,49 +416,19 @@ static int coroutine_fn pvebackup_co_add_config( |
d333327a SR |
177 | goto out; |
178 | } | |
179 | ||
d03e1b3c | 180 | -bool job_should_pause_locked(Job *job); |
d333327a SR |
181 | - |
182 | -static void pvebackup_run_next_job(void) | |
183 | -{ | |
184 | - assert(!qemu_in_coroutine()); | |
185 | - | |
186 | - qemu_mutex_lock(&backup_state.backup_mutex); | |
187 | - | |
188 | - GList *l = backup_state.di_list; | |
189 | - while (l) { | |
190 | - PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; | |
191 | - l = g_list_next(l); | |
192 | - | |
193 | - BlockJob *job = lookup_active_block_job(di); | |
194 | - | |
195 | - if (job) { | |
196 | - qemu_mutex_unlock(&backup_state.backup_mutex); | |
197 | - | |
d03e1b3c FE |
198 | - WITH_JOB_LOCK_GUARD() { |
199 | - if (job_should_pause_locked(&job->job)) { | |
200 | - bool error_or_canceled = pvebackup_error_or_canceled(); | |
201 | - if (error_or_canceled) { | |
202 | - job_cancel_sync_locked(&job->job, true); | |
203 | - } else { | |
204 | - job_resume_locked(&job->job); | |
205 | - } | |
d333327a SR |
206 | - } |
207 | - } | |
d333327a SR |
208 | - return; |
209 | - } | |
210 | - } | |
211 | - | |
212 | - block_on_coroutine_fn(pvebackup_co_cleanup, NULL); // no more jobs, run cleanup | |
213 | - | |
214 | - qemu_mutex_unlock(&backup_state.backup_mutex); | |
215 | -} | |
216 | - | |
217 | static bool create_backup_jobs(void) { | |
218 | ||
219 | assert(!qemu_in_coroutine()); | |
220 | ||
221 | Error *local_err = NULL; | |
222 | ||
223 | + /* create job transaction to synchronize bitmap commit and cancel all | |
224 | + * jobs in case one errors */ | |
225 | + if (backup_state.txn) { | |
226 | + job_txn_unref(backup_state.txn); | |
227 | + } | |
228 | + backup_state.txn = job_txn_new_seq(); | |
229 | + | |
8dca018b SR |
230 | BackupPerf perf = { .max_workers = 16 }; |
231 | ||
d333327a | 232 | /* create and start all jobs (paused state) */ |
d03e1b3c | 233 | @@ -523,7 +451,7 @@ static bool create_backup_jobs(void) { |
d333327a SR |
234 | BlockJob *job = backup_job_create( |
235 | NULL, di->bs, di->target, backup_state.speed, sync_mode, di->bitmap, | |
8dca018b SR |
236 | bitmap_mode, false, NULL, &perf, BLOCKDEV_ON_ERROR_REPORT, BLOCKDEV_ON_ERROR_REPORT, |
237 | - JOB_DEFAULT, pvebackup_complete_cb, di, NULL, &local_err); | |
d333327a SR |
238 | + JOB_DEFAULT, pvebackup_complete_cb, di, backup_state.txn, &local_err); |
239 | ||
240 | aio_context_release(aio_context); | |
241 | ||
d03e1b3c | 242 | @@ -535,7 +463,8 @@ static bool create_backup_jobs(void) { |
d333327a SR |
243 | pvebackup_propagate_error(create_job_err); |
244 | break; | |
245 | } | |
246 | - job_start(&job->job); | |
247 | + | |
248 | + di->job = job; | |
249 | ||
250 | bdrv_unref(di->target); | |
251 | di->target = NULL; | |
d03e1b3c | 252 | @@ -553,6 +482,12 @@ static bool create_backup_jobs(void) { |
d333327a SR |
253 | bdrv_unref(di->target); |
254 | di->target = NULL; | |
255 | } | |
256 | + | |
257 | + if (di->job) { | |
d03e1b3c FE |
258 | + WITH_JOB_LOCK_GUARD() { |
259 | + job_unref_locked(&di->job->job); | |
260 | + } | |
d333327a SR |
261 | + } |
262 | } | |
263 | } | |
264 | ||
d03e1b3c | 265 | @@ -943,10 +878,6 @@ err: |
d333327a SR |
266 | PVEBackupDevInfo *di = (PVEBackupDevInfo *)l->data; |
267 | l = g_list_next(l); | |
268 | ||
269 | - if (di->bitmap) { | |
270 | - bdrv_release_dirty_bitmap(di->bitmap); | |
271 | - } | |
272 | - | |
273 | if (di->target) { | |
274 | bdrv_unref(di->target); | |
275 | } | |
d03e1b3c | 276 | @@ -1035,9 +966,15 @@ UuidInfo *qmp_backup( |
d333327a SR |
277 | block_on_coroutine_fn(pvebackup_co_prepare, &task); |
278 | ||
279 | if (*errp == NULL) { | |
280 | - create_backup_jobs(); | |
281 | + bool errors = create_backup_jobs(); | |
282 | qemu_mutex_unlock(&backup_state.backup_mutex); | |
283 | - pvebackup_run_next_job(); | |
284 | + | |
285 | + if (!errors) { | |
286 | + /* start the first job in the transaction | |
287 | + * note: this might directly enter the job, so we need to do this | |
288 | + * after unlocking the backup_mutex */ | |
289 | + job_txn_start_seq(backup_state.txn); | |
290 | + } | |
291 | } else { | |
292 | qemu_mutex_unlock(&backup_state.backup_mutex); | |
293 | } |