]> git.proxmox.com Git - mirror_qemu.git/blame - block/mirror.c
mirror: allow customizing the granularity
[mirror_qemu.git] / block / mirror.c
CommitLineData
893f7eba
PB
1/*
2 * Image mirroring
3 *
4 * Copyright Red Hat, Inc. 2012
5 *
6 * Authors:
7 * Paolo Bonzini <pbonzini@redhat.com>
8 *
9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10 * See the COPYING.LIB file in the top-level directory.
11 *
12 */
13
14#include "trace.h"
737e150e
PB
15#include "block/blockjob.h"
16#include "block/block_int.h"
893f7eba 17#include "qemu/ratelimit.h"
b812f671 18#include "qemu/bitmap.h"
893f7eba 19
893f7eba
PB
20#define SLICE_TIME 100000000ULL /* ns */
21
22typedef struct MirrorBlockJob {
23 BlockJob common;
24 RateLimit limit;
25 BlockDriverState *target;
26 MirrorSyncMode mode;
b952b558 27 BlockdevOnError on_source_error, on_target_error;
d63ffd87
PB
28 bool synced;
29 bool should_complete;
893f7eba 30 int64_t sector_num;
eee13dfe 31 int64_t granularity;
b812f671
PB
32 size_t buf_size;
33 unsigned long *cow_bitmap;
8f0720ec 34 HBitmapIter hbi;
893f7eba
PB
35 uint8_t *buf;
36} MirrorBlockJob;
37
b952b558
PB
38static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read,
39 int error)
40{
41 s->synced = false;
42 if (read) {
43 return block_job_error_action(&s->common, s->common.bs,
44 s->on_source_error, true, error);
45 } else {
46 return block_job_error_action(&s->common, s->target,
47 s->on_target_error, false, error);
48 }
49}
50
51static int coroutine_fn mirror_iteration(MirrorBlockJob *s,
52 BlockErrorAction *p_action)
893f7eba
PB
53{
54 BlockDriverState *source = s->common.bs;
55 BlockDriverState *target = s->target;
56 QEMUIOVector qiov;
eee13dfe 57 int ret, nb_sectors, sectors_per_chunk;
b812f671 58 int64_t end, sector_num, chunk_num;
893f7eba
PB
59 struct iovec iov;
60
8f0720ec
PB
61 s->sector_num = hbitmap_iter_next(&s->hbi);
62 if (s->sector_num < 0) {
63 bdrv_dirty_iter_init(source, &s->hbi);
64 s->sector_num = hbitmap_iter_next(&s->hbi);
65 trace_mirror_restart_iter(s, bdrv_get_dirty_count(source));
66 assert(s->sector_num >= 0);
67 }
68
b812f671
PB
69 /* If we have no backing file yet in the destination, and the cluster size
70 * is very large, we need to do COW ourselves. The first time a cluster is
71 * copied, copy it entirely.
72 *
eee13dfe
PB
73 * Because both the granularity and the cluster size are powers of two, the
74 * number of sectors to copy cannot exceed one cluster.
b812f671
PB
75 */
76 sector_num = s->sector_num;
eee13dfe
PB
77 sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS;
78 chunk_num = sector_num / sectors_per_chunk;
b812f671
PB
79 if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) {
80 trace_mirror_cow(s, sector_num);
81 bdrv_round_to_clusters(s->target,
eee13dfe 82 sector_num, sectors_per_chunk,
b812f671
PB
83 &sector_num, &nb_sectors);
84 }
85
893f7eba 86 end = s->common.len >> BDRV_SECTOR_BITS;
b812f671
PB
87 nb_sectors = MIN(nb_sectors, end - sector_num);
88 bdrv_reset_dirty(source, sector_num, nb_sectors);
893f7eba
PB
89
90 /* Copy the dirty cluster. */
91 iov.iov_base = s->buf;
92 iov.iov_len = nb_sectors * 512;
93 qemu_iovec_init_external(&qiov, &iov, 1);
94
b812f671
PB
95 trace_mirror_one_iteration(s, sector_num, nb_sectors);
96 ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov);
893f7eba 97 if (ret < 0) {
b952b558
PB
98 *p_action = mirror_error_action(s, true, -ret);
99 goto fail;
100 }
b812f671 101 ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov);
b952b558
PB
102 if (ret < 0) {
103 *p_action = mirror_error_action(s, false, -ret);
104 s->synced = false;
105 goto fail;
893f7eba 106 }
b812f671 107 if (s->cow_bitmap) {
eee13dfe
PB
108 bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk,
109 nb_sectors / sectors_per_chunk);
b812f671 110 }
b952b558
PB
111 return 0;
112
113fail:
114 /* Try again later. */
b812f671 115 bdrv_set_dirty(source, sector_num, nb_sectors);
b952b558 116 return ret;
893f7eba
PB
117}
118
119static void coroutine_fn mirror_run(void *opaque)
120{
121 MirrorBlockJob *s = opaque;
122 BlockDriverState *bs = s->common.bs;
eee13dfe 123 int64_t sector_num, end, sectors_per_chunk, length;
b812f671
PB
124 BlockDriverInfo bdi;
125 char backing_filename[1024];
893f7eba
PB
126 int ret = 0;
127 int n;
893f7eba
PB
128
129 if (block_job_is_cancelled(&s->common)) {
130 goto immediate_exit;
131 }
132
133 s->common.len = bdrv_getlength(bs);
134 if (s->common.len < 0) {
135 block_job_completed(&s->common, s->common.len);
136 return;
137 }
138
b812f671
PB
139 /* If we have no backing file yet in the destination, we cannot let
140 * the destination do COW. Instead, we copy sectors around the
141 * dirty data if needed. We need a bitmap to do that.
142 */
143 bdrv_get_backing_filename(s->target, backing_filename,
144 sizeof(backing_filename));
145 if (backing_filename[0] && !s->target->backing_hd) {
146 bdrv_get_info(s->target, &bdi);
eee13dfe 147 if (s->granularity < bdi.cluster_size) {
b812f671 148 s->buf_size = bdi.cluster_size;
eee13dfe 149 length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity;
b812f671
PB
150 s->cow_bitmap = bitmap_new(length);
151 }
152 }
153
893f7eba 154 end = s->common.len >> BDRV_SECTOR_BITS;
b812f671 155 s->buf = qemu_blockalign(bs, s->buf_size);
eee13dfe 156 sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS;
893f7eba
PB
157
158 if (s->mode != MIRROR_SYNC_MODE_NONE) {
159 /* First part, loop on the sectors and initialize the dirty bitmap. */
160 BlockDriverState *base;
161 base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd;
162 for (sector_num = 0; sector_num < end; ) {
eee13dfe 163 int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1;
893f7eba
PB
164 ret = bdrv_co_is_allocated_above(bs, base,
165 sector_num, next - sector_num, &n);
166
167 if (ret < 0) {
168 goto immediate_exit;
169 }
170
171 assert(n > 0);
172 if (ret == 1) {
173 bdrv_set_dirty(bs, sector_num, n);
174 sector_num = next;
175 } else {
176 sector_num += n;
177 }
178 }
179 }
180
8f0720ec 181 bdrv_dirty_iter_init(bs, &s->hbi);
893f7eba
PB
182 for (;;) {
183 uint64_t delay_ns;
184 int64_t cnt;
185 bool should_complete;
186
187 cnt = bdrv_get_dirty_count(bs);
188 if (cnt != 0) {
b952b558
PB
189 BlockErrorAction action = BDRV_ACTION_REPORT;
190 ret = mirror_iteration(s, &action);
191 if (ret < 0 && action == BDRV_ACTION_REPORT) {
893f7eba
PB
192 goto immediate_exit;
193 }
194 cnt = bdrv_get_dirty_count(bs);
195 }
196
197 should_complete = false;
198 if (cnt == 0) {
199 trace_mirror_before_flush(s);
200 ret = bdrv_flush(s->target);
201 if (ret < 0) {
b952b558
PB
202 if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) {
203 goto immediate_exit;
204 }
205 } else {
206 /* We're out of the streaming phase. From now on, if the job
207 * is cancelled we will actually complete all pending I/O and
208 * report completion. This way, block-job-cancel will leave
209 * the target in a consistent state.
210 */
211 s->common.offset = end * BDRV_SECTOR_SIZE;
212 if (!s->synced) {
213 block_job_ready(&s->common);
214 s->synced = true;
215 }
216
217 should_complete = s->should_complete ||
218 block_job_is_cancelled(&s->common);
219 cnt = bdrv_get_dirty_count(bs);
d63ffd87 220 }
893f7eba
PB
221 }
222
223 if (cnt == 0 && should_complete) {
224 /* The dirty bitmap is not updated while operations are pending.
225 * If we're about to exit, wait for pending operations before
226 * calling bdrv_get_dirty_count(bs), or we may exit while the
227 * source has dirty data to copy!
228 *
229 * Note that I/O can be submitted by the guest while
230 * mirror_populate runs.
231 */
232 trace_mirror_before_drain(s, cnt);
233 bdrv_drain_all();
234 cnt = bdrv_get_dirty_count(bs);
235 }
236
237 ret = 0;
d63ffd87
PB
238 trace_mirror_before_sleep(s, cnt, s->synced);
239 if (!s->synced) {
893f7eba 240 /* Publish progress */
acc906c6 241 s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE;
893f7eba
PB
242
243 if (s->common.speed) {
eee13dfe 244 delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk);
893f7eba
PB
245 } else {
246 delay_ns = 0;
247 }
248
249 /* Note that even when no rate limit is applied we need to yield
c57b6656 250 * with no pending I/O here so that bdrv_drain_all() returns.
893f7eba
PB
251 */
252 block_job_sleep_ns(&s->common, rt_clock, delay_ns);
253 if (block_job_is_cancelled(&s->common)) {
254 break;
255 }
256 } else if (!should_complete) {
257 delay_ns = (cnt == 0 ? SLICE_TIME : 0);
258 block_job_sleep_ns(&s->common, rt_clock, delay_ns);
259 } else if (cnt == 0) {
260 /* The two disks are in sync. Exit and report successful
261 * completion.
262 */
263 assert(QLIST_EMPTY(&bs->tracked_requests));
264 s->common.cancelled = false;
265 break;
266 }
267 }
268
269immediate_exit:
7191bf31 270 qemu_vfree(s->buf);
b812f671 271 g_free(s->cow_bitmap);
50717e94 272 bdrv_set_dirty_tracking(bs, 0);
b952b558 273 bdrv_iostatus_disable(s->target);
d63ffd87
PB
274 if (s->should_complete && ret == 0) {
275 if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) {
276 bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL);
277 }
278 bdrv_swap(s->target, s->common.bs);
279 }
893f7eba
PB
280 bdrv_close(s->target);
281 bdrv_delete(s->target);
282 block_job_completed(&s->common, ret);
283}
284
285static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp)
286{
287 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
288
289 if (speed < 0) {
290 error_set(errp, QERR_INVALID_PARAMETER, "speed");
291 return;
292 }
293 ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
294}
295
b952b558
PB
296static void mirror_iostatus_reset(BlockJob *job)
297{
298 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
299
300 bdrv_iostatus_reset(s->target);
301}
302
d63ffd87
PB
303static void mirror_complete(BlockJob *job, Error **errp)
304{
305 MirrorBlockJob *s = container_of(job, MirrorBlockJob, common);
306 int ret;
307
308 ret = bdrv_open_backing_file(s->target);
309 if (ret < 0) {
310 char backing_filename[PATH_MAX];
311 bdrv_get_full_backing_filename(s->target, backing_filename,
312 sizeof(backing_filename));
313 error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename);
314 return;
315 }
316 if (!s->synced) {
317 error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name);
318 return;
319 }
320
321 s->should_complete = true;
322 block_job_resume(job);
323}
324
893f7eba
PB
325static BlockJobType mirror_job_type = {
326 .instance_size = sizeof(MirrorBlockJob),
327 .job_type = "mirror",
328 .set_speed = mirror_set_speed,
b952b558 329 .iostatus_reset= mirror_iostatus_reset,
d63ffd87 330 .complete = mirror_complete,
893f7eba
PB
331};
332
333void mirror_start(BlockDriverState *bs, BlockDriverState *target,
eee13dfe 334 int64_t speed, int64_t granularity, MirrorSyncMode mode,
b952b558
PB
335 BlockdevOnError on_source_error,
336 BlockdevOnError on_target_error,
893f7eba
PB
337 BlockDriverCompletionFunc *cb,
338 void *opaque, Error **errp)
339{
340 MirrorBlockJob *s;
341
eee13dfe
PB
342 if (granularity == 0) {
343 /* Choose the default granularity based on the target file's cluster
344 * size, clamped between 4k and 64k. */
345 BlockDriverInfo bdi;
346 if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) {
347 granularity = MAX(4096, bdi.cluster_size);
348 granularity = MIN(65536, granularity);
349 } else {
350 granularity = 65536;
351 }
352 }
353
354 assert ((granularity & (granularity - 1)) == 0);
355
b952b558
PB
356 if ((on_source_error == BLOCKDEV_ON_ERROR_STOP ||
357 on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
358 !bdrv_iostatus_is_enabled(bs)) {
359 error_set(errp, QERR_INVALID_PARAMETER, "on-source-error");
360 return;
361 }
362
893f7eba
PB
363 s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp);
364 if (!s) {
365 return;
366 }
367
b952b558
PB
368 s->on_source_error = on_source_error;
369 s->on_target_error = on_target_error;
893f7eba
PB
370 s->target = target;
371 s->mode = mode;
eee13dfe
PB
372 s->granularity = granularity;
373 s->buf_size = granularity;
b812f671 374
eee13dfe 375 bdrv_set_dirty_tracking(bs, granularity);
893f7eba 376 bdrv_set_enable_write_cache(s->target, true);
b952b558
PB
377 bdrv_set_on_error(s->target, on_target_error, on_target_error);
378 bdrv_iostatus_enable(s->target);
893f7eba
PB
379 s->common.co = qemu_coroutine_create(mirror_run);
380 trace_mirror_start(bs, s, s->common.co, opaque);
381 qemu_coroutine_enter(s->common.co, s);
382}