]> git.proxmox.com Git - qemu.git/blob - block/mirror.c
block: implement dirty bitmap using HBitmap
[qemu.git] / block / mirror.c
1 /*
2 * Image mirroring
3 *
4 * Copyright Red Hat, Inc. 2012
5 *
6 * Authors:
7 * Paolo Bonzini <pbonzini@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14 #include "trace.h"
15 #include "block/blockjob.h"
16 #include "block/block_int.h"
17 #include "qemu/ratelimit.h"
18
19 enum {
20 /*
21 * Size of data buffer for populating the image file. This should be large
22 * enough to process multiple clusters in a single call, so that populating
23 * contiguous regions of the image is efficient.
24 */
25 BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */
26 };
27
28 #define SLICE_TIME 100000000ULL /* ns */
29
30 typedef struct MirrorBlockJob {
31 BlockJob common;
32 RateLimit limit;
33 BlockDriverState *target;
34 MirrorSyncMode mode;
35 BlockdevOnError on_source_error, on_target_error;
36 bool synced;
37 bool should_complete;
38 int64_t sector_num;
39 HBitmapIter hbi;
40 uint8_t *buf;
41 } MirrorBlockJob;
42
43 static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
44 int error)
45 {
46 s->synced = false;
47 if (read) {
48 return block_job_error_action(&s->common, s->common.bs,
49 s->on_source_error, true, error);
50 } else {
51 return block_job_error_action(&s->common, s->target,
52 s->on_target_error, false, error);
53 }
54 }
55
56 static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
57 BlockErrorAction *p_action)
58 {
59 BlockDriverState *source = s->common.bs;
60 BlockDriverState *target = s->target;
61 QEMUIOVector qiov;
62 int ret, nb_sectors;
63 int64_t end;
64 struct iovec iov;
65
66 s->sector_num = hbitmap_iter_next(&s->hbi);
67 if (s->sector_num < 0) {
68 bdrv_dirty_iter_init(source, &s->hbi);
69 s->sector_num = hbitmap_iter_next(&s->hbi);
70 trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
71 assert(s->sector_num >= 0);
72 }
73
74 end = s->common.len >> BDRV_SECTOR_BITS;
75 nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num);
76 bdrv_reset_dirty(source, s->sector_num, nb_sectors);
77
78 /* Copy the dirty cluster. */
79 iov.iov_base = s->buf;
80 iov.iov_len = nb_sectors * 512;
81 qemu_iovec_init_external(&qiov, &iov, 1);
82
83 trace_mirror_one_iteration(s, s->sector_num, nb_sectors);
84 ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov);
85 if (ret < 0) {
86 *p_action = mirror_error_action(s, true, -ret);
87 goto fail;
88 }
89 ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov);
90 if (ret < 0) {
91 *p_action = mirror_error_action(s, false, -ret);
92 s->synced = false;
93 goto fail;
94 }
95 return 0;
96
97 fail:
98 /* Try again later. */
99 bdrv_set_dirty(source, s->sector_num, nb_sectors);
100 return ret;
101 }
102
103 static void coroutine_fn mirror_run(void *opaque)
104 {
105 MirrorBlockJob *s = opaque;
106 BlockDriverState *bs = s->common.bs;
107 int64_t sector_num, end;
108 int ret = 0;
109 int n;
110
111 if (block_job_is_cancelled(&s->common)) {
112 goto immediate_exit;
113 }
114
115 s->common.len = bdrv_getlength(bs);
116 if (s->common.len < 0) {
117 block_job_completed(&s->common, s->common.len);
118 return;
119 }
120
121 end = s->common.len >> BDRV_SECTOR_BITS;
122 s->buf = qemu_blockalign(bs, BLOCK_SIZE);
123
124 if (s->mode != MIRROR_SYNC_MODE_NONE) {
125 /* First part, loop on the sectors and initialize the dirty bitmap. */
126 BlockDriverState *base;
127 base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
128 for (sector_num = 0; sector_num < end; ) {
129 int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1;
130 ret = bdrv_co_is_allocated_above(bs, base,
131 sector_num, next - sector_num, &n);
132
133 if (ret < 0) {
134 goto immediate_exit;
135 }
136
137 assert(n > 0);
138 if (ret == 1) {
139 bdrv_set_dirty(bs, sector_num, n);
140 sector_num = next;
141 } else {
142 sector_num += n;
143 }
144 }
145 }
146
147 bdrv_dirty_iter_init(bs, &s->hbi);
148 for (;;) {
149 uint64_t delay_ns;
150 int64_t cnt;
151 bool should_complete;
152
153 cnt = bdrv_get_dirty_count(bs);
154 if (cnt != 0) {
155 BlockErrorAction action = BDRV_ACTION_REPORT;
156 ret = mirror_iteration(s, &action);
157 if (ret < 0 && action == BDRV_ACTION_REPORT) {
158 goto immediate_exit;
159 }
160 cnt = bdrv_get_dirty_count(bs);
161 }
162
163 should_complete = false;
164 if (cnt == 0) {
165 trace_mirror_before_flush(s);
166 ret = bdrv_flush(s->target);
167 if (ret < 0) {
168 if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
169 goto immediate_exit;
170 }
171 } else {
172 /* We're out of the streaming phase. From now on, if the job
173 * is cancelled we will actually complete all pending I/O and
174 * report completion. This way, block-job-cancel will leave
175 * the target in a consistent state.
176 */
177 s->common.offset = end * BDRV_SECTOR_SIZE;
178 if (!s->synced) {
179 block_job_ready(&s->common);
180 s->synced = true;
181 }
182
183 should_complete = s->should_complete ||
184 block_job_is_cancelled(&s->common);
185 cnt = bdrv_get_dirty_count(bs);
186 }
187 }
188
189 if (cnt == 0 && should_complete) {
190 /* The dirty bitmap is not updated while operations are pending.
191 * If we're about to exit, wait for pending operations before
192 * calling bdrv_get_dirty_count(bs), or we may exit while the
193 * source has dirty data to copy!
194 *
195 * Note that I/O can be submitted by the guest while
196 * mirror_populate runs.
197 */
198 trace_mirror_before_drain(s, cnt);
199 bdrv_drain_all();
200 cnt = bdrv_get_dirty_count(bs);
201 }
202
203 ret = 0;
204 trace_mirror_before_sleep(s, cnt, s->synced);
205 if (!s->synced) {
206 /* Publish progress */
207 s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE;
208
209 if (s->common.speed) {
210 delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK);
211 } else {
212 delay_ns = 0;
213 }
214
215 /* Note that even when no rate limit is applied we need to yield
216 * with no pending I/O here so that bdrv_drain_all() returns.
217 */
218 block_job_sleep_ns(&s->common, rt_clock, delay_ns);
219 if (block_job_is_cancelled(&s->common)) {
220 break;
221 }
222 } else if (!should_complete) {
223 delay_ns = (cnt == 0 ? SLICE_TIME : 0);
224 block_job_sleep_ns(&s->common, rt_clock, delay_ns);
225 } else if (cnt == 0) {
226 /* The two disks are in sync. Exit and report successful
227 * completion.
228 */
229 assert(QLIST_EMPTY(&bs->tracked_requests));
230 s->common.cancelled = false;
231 break;
232 }
233 }
234
235 immediate_exit:
236 qemu_vfree(s->buf);
237 bdrv_set_dirty_tracking(bs, false);
238 bdrv_iostatus_disable(s->target);
239 if (s->should_complete && ret == 0) {
240 if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
241 bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
242 }
243 bdrv_swap(s->target, s->common.bs);
244 }
245 bdrv_close(s->target);
246 bdrv_delete(s->target);
247 block_job_completed(&s->common, ret);
248 }
249
250 static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
251 {
252 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
253
254 if (speed < 0) {
255 error_set(errp, QERR_INVALID_PARAMETER, "speed");
256 return;
257 }
258 ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
259 }
260
261 static void mirror_iostatus_reset(BlockJob *job)
262 {
263 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
264
265 bdrv_iostatus_reset(s->target);
266 }
267
268 static void mirror_complete(BlockJob *job, Error **errp)
269 {
270 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
271 int ret;
272
273 ret = bdrv_open_backing_file(s->target);
274 if (ret < 0) {
275 char backing_filename[PATH_MAX];
276 bdrv_get_full_backing_filename(s->target, backing_filename,
277 sizeof(backing_filename));
278 error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
279 return;
280 }
281 if (!s->synced) {
282 error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
283 return;
284 }
285
286 s->should_complete = true;
287 block_job_resume(job);
288 }
289
290 static BlockJobType mirror_job_type = {
291 .instance_size = sizeof(MirrorBlockJob),
292 .job_type = "mirror",
293 .set_speed = mirror_set_speed,
294 .iostatus_reset= mirror_iostatus_reset,
295 .complete = mirror_complete,
296 };
297
298 void mirror_start(BlockDriverState *bs, BlockDriverState *target,
299 int64_t speed, MirrorSyncMode mode,
300 BlockdevOnError on_source_error,
301 BlockdevOnError on_target_error,
302 BlockDriverCompletionFunc *cb,
303 void *opaque, Error **errp)
304 {
305 MirrorBlockJob *s;
306
307 if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
308 on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
309 !bdrv_iostatus_is_enabled(bs)) {
310 error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
311 return;
312 }
313
314 s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
315 if (!s) {
316 return;
317 }
318
319 s->on_source_error = on_source_error;
320 s->on_target_error = on_target_error;
321 s->target = target;
322 s->mode = mode;
323 bdrv_set_dirty_tracking(bs, true);
324 bdrv_set_enable_write_cache(s->target, true);
325 bdrv_set_on_error(s->target, on_target_error, on_target_error);
326 bdrv_iostatus_enable(s->target);
327 s->common.co = qemu_coroutine_create(mirror_run);
328 trace_mirror_start(bs, s, s->common.co, opaque);
329 qemu_coroutine_enter(s->common.co, s);
330 }