4 * Copyright Red Hat, Inc. 2012
7 * Paolo Bonzini <pbonzini@redhat.com>
9 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
10 * See the COPYING.LIB file in the top-level directory.
15 #include "block/blockjob.h"
16 #include "block/block_int.h"
17 #include "qemu/ratelimit.h"
18 #include "qemu/bitmap.h"
20 #define BLOCK_SIZE (1 << 20)
21 #define BDRV_SECTORS_PER_DIRTY_CHUNK (BLOCK_SIZE >> BDRV_SECTOR_BITS)
23 #define SLICE_TIME 100000000ULL /* ns */
25 typedef struct MirrorBlockJob
{
28 BlockDriverState
*target
;
30 BlockdevOnError on_source_error
, on_target_error
;
35 unsigned long *cow_bitmap
;
40 static BlockErrorAction
mirror_error_action(MirrorBlockJob
*s
, bool read
,
45 return block_job_error_action(&s
->common
, s
->common
.bs
,
46 s
->on_source_error
, true, error
);
48 return block_job_error_action(&s
->common
, s
->target
,
49 s
->on_target_error
, false, error
);
53 static int coroutine_fn
mirror_iteration(MirrorBlockJob
*s
,
54 BlockErrorAction
*p_action
)
56 BlockDriverState
*source
= s
->common
.bs
;
57 BlockDriverState
*target
= s
->target
;
60 int64_t end
, sector_num
, chunk_num
;
63 s
->sector_num
= hbitmap_iter_next(&s
->hbi
);
64 if (s
->sector_num
< 0) {
65 bdrv_dirty_iter_init(source
, &s
->hbi
);
66 s
->sector_num
= hbitmap_iter_next(&s
->hbi
);
67 trace_mirror_restart_iter(s
, bdrv_get_dirty_count(source
));
68 assert(s
->sector_num
>= 0);
71 /* If we have no backing file yet in the destination, and the cluster size
72 * is very large, we need to do COW ourselves. The first time a cluster is
73 * copied, copy it entirely.
75 * Because both BDRV_SECTORS_PER_DIRTY_CHUNK and the cluster size are
76 * powers of two, the number of sectors to copy cannot exceed one cluster.
78 sector_num
= s
->sector_num
;
79 nb_sectors
= BDRV_SECTORS_PER_DIRTY_CHUNK
;
80 chunk_num
= sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
;
81 if (s
->cow_bitmap
&& !test_bit(chunk_num
, s
->cow_bitmap
)) {
82 trace_mirror_cow(s
, sector_num
);
83 bdrv_round_to_clusters(s
->target
,
84 sector_num
, BDRV_SECTORS_PER_DIRTY_CHUNK
,
85 §or_num
, &nb_sectors
);
88 end
= s
->common
.len
>> BDRV_SECTOR_BITS
;
89 nb_sectors
= MIN(nb_sectors
, end
- sector_num
);
90 bdrv_reset_dirty(source
, sector_num
, nb_sectors
);
92 /* Copy the dirty cluster. */
93 iov
.iov_base
= s
->buf
;
94 iov
.iov_len
= nb_sectors
* 512;
95 qemu_iovec_init_external(&qiov
, &iov
, 1);
97 trace_mirror_one_iteration(s
, sector_num
, nb_sectors
);
98 ret
= bdrv_co_readv(source
, sector_num
, nb_sectors
, &qiov
);
100 *p_action
= mirror_error_action(s
, true, -ret
);
103 ret
= bdrv_co_writev(target
, sector_num
, nb_sectors
, &qiov
);
105 *p_action
= mirror_error_action(s
, false, -ret
);
110 bitmap_set(s
->cow_bitmap
, sector_num
/ BDRV_SECTORS_PER_DIRTY_CHUNK
,
111 nb_sectors
/ BDRV_SECTORS_PER_DIRTY_CHUNK
);
116 /* Try again later. */
117 bdrv_set_dirty(source
, sector_num
, nb_sectors
);
121 static void coroutine_fn
mirror_run(void *opaque
)
123 MirrorBlockJob
*s
= opaque
;
124 BlockDriverState
*bs
= s
->common
.bs
;
125 int64_t sector_num
, end
, length
;
127 char backing_filename
[1024];
131 if (block_job_is_cancelled(&s
->common
)) {
135 s
->common
.len
= bdrv_getlength(bs
);
136 if (s
->common
.len
< 0) {
137 block_job_completed(&s
->common
, s
->common
.len
);
141 /* If we have no backing file yet in the destination, we cannot let
142 * the destination do COW. Instead, we copy sectors around the
143 * dirty data if needed. We need a bitmap to do that.
145 bdrv_get_backing_filename(s
->target
, backing_filename
,
146 sizeof(backing_filename
));
147 if (backing_filename
[0] && !s
->target
->backing_hd
) {
148 bdrv_get_info(s
->target
, &bdi
);
149 if (s
->buf_size
< bdi
.cluster_size
) {
150 s
->buf_size
= bdi
.cluster_size
;
151 length
= (bdrv_getlength(bs
) + BLOCK_SIZE
- 1) / BLOCK_SIZE
;
152 s
->cow_bitmap
= bitmap_new(length
);
156 end
= s
->common
.len
>> BDRV_SECTOR_BITS
;
157 s
->buf
= qemu_blockalign(bs
, s
->buf_size
);
159 if (s
->mode
!= MIRROR_SYNC_MODE_NONE
) {
160 /* First part, loop on the sectors and initialize the dirty bitmap. */
161 BlockDriverState
*base
;
162 base
= s
->mode
== MIRROR_SYNC_MODE_FULL
? NULL
: bs
->backing_hd
;
163 for (sector_num
= 0; sector_num
< end
; ) {
164 int64_t next
= (sector_num
| (BDRV_SECTORS_PER_DIRTY_CHUNK
- 1)) + 1;
165 ret
= bdrv_co_is_allocated_above(bs
, base
,
166 sector_num
, next
- sector_num
, &n
);
174 bdrv_set_dirty(bs
, sector_num
, n
);
182 bdrv_dirty_iter_init(bs
, &s
->hbi
);
186 bool should_complete
;
188 cnt
= bdrv_get_dirty_count(bs
);
190 BlockErrorAction action
= BDRV_ACTION_REPORT
;
191 ret
= mirror_iteration(s
, &action
);
192 if (ret
< 0 && action
== BDRV_ACTION_REPORT
) {
195 cnt
= bdrv_get_dirty_count(bs
);
198 should_complete
= false;
200 trace_mirror_before_flush(s
);
201 ret
= bdrv_flush(s
->target
);
203 if (mirror_error_action(s
, false, -ret
) == BDRV_ACTION_REPORT
) {
207 /* We're out of the streaming phase. From now on, if the job
208 * is cancelled we will actually complete all pending I/O and
209 * report completion. This way, block-job-cancel will leave
210 * the target in a consistent state.
212 s
->common
.offset
= end
* BDRV_SECTOR_SIZE
;
214 block_job_ready(&s
->common
);
218 should_complete
= s
->should_complete
||
219 block_job_is_cancelled(&s
->common
);
220 cnt
= bdrv_get_dirty_count(bs
);
224 if (cnt
== 0 && should_complete
) {
225 /* The dirty bitmap is not updated while operations are pending.
226 * If we're about to exit, wait for pending operations before
227 * calling bdrv_get_dirty_count(bs), or we may exit while the
228 * source has dirty data to copy!
230 * Note that I/O can be submitted by the guest while
231 * mirror_populate runs.
233 trace_mirror_before_drain(s
, cnt
);
235 cnt
= bdrv_get_dirty_count(bs
);
239 trace_mirror_before_sleep(s
, cnt
, s
->synced
);
241 /* Publish progress */
242 s
->common
.offset
= (end
- cnt
) * BDRV_SECTOR_SIZE
;
244 if (s
->common
.speed
) {
245 delay_ns
= ratelimit_calculate_delay(&s
->limit
, BDRV_SECTORS_PER_DIRTY_CHUNK
);
250 /* Note that even when no rate limit is applied we need to yield
251 * with no pending I/O here so that bdrv_drain_all() returns.
253 block_job_sleep_ns(&s
->common
, rt_clock
, delay_ns
);
254 if (block_job_is_cancelled(&s
->common
)) {
257 } else if (!should_complete
) {
258 delay_ns
= (cnt
== 0 ? SLICE_TIME
: 0);
259 block_job_sleep_ns(&s
->common
, rt_clock
, delay_ns
);
260 } else if (cnt
== 0) {
261 /* The two disks are in sync. Exit and report successful
264 assert(QLIST_EMPTY(&bs
->tracked_requests
));
265 s
->common
.cancelled
= false;
272 g_free(s
->cow_bitmap
);
273 bdrv_set_dirty_tracking(bs
, 0);
274 bdrv_iostatus_disable(s
->target
);
275 if (s
->should_complete
&& ret
== 0) {
276 if (bdrv_get_flags(s
->target
) != bdrv_get_flags(s
->common
.bs
)) {
277 bdrv_reopen(s
->target
, bdrv_get_flags(s
->common
.bs
), NULL
);
279 bdrv_swap(s
->target
, s
->common
.bs
);
281 bdrv_close(s
->target
);
282 bdrv_delete(s
->target
);
283 block_job_completed(&s
->common
, ret
);
286 static void mirror_set_speed(BlockJob
*job
, int64_t speed
, Error
**errp
)
288 MirrorBlockJob
*s
= container_of(job
, MirrorBlockJob
, common
);
291 error_set(errp
, QERR_INVALID_PARAMETER
, "speed");
294 ratelimit_set_speed(&s
->limit
, speed
/ BDRV_SECTOR_SIZE
, SLICE_TIME
);
297 static void mirror_iostatus_reset(BlockJob
*job
)
299 MirrorBlockJob
*s
= container_of(job
, MirrorBlockJob
, common
);
301 bdrv_iostatus_reset(s
->target
);
304 static void mirror_complete(BlockJob
*job
, Error
**errp
)
306 MirrorBlockJob
*s
= container_of(job
, MirrorBlockJob
, common
);
309 ret
= bdrv_open_backing_file(s
->target
);
311 char backing_filename
[PATH_MAX
];
312 bdrv_get_full_backing_filename(s
->target
, backing_filename
,
313 sizeof(backing_filename
));
314 error_set(errp
, QERR_OPEN_FILE_FAILED
, backing_filename
);
318 error_set(errp
, QERR_BLOCK_JOB_NOT_READY
, job
->bs
->device_name
);
322 s
->should_complete
= true;
323 block_job_resume(job
);
326 static BlockJobType mirror_job_type
= {
327 .instance_size
= sizeof(MirrorBlockJob
),
328 .job_type
= "mirror",
329 .set_speed
= mirror_set_speed
,
330 .iostatus_reset
= mirror_iostatus_reset
,
331 .complete
= mirror_complete
,
334 void mirror_start(BlockDriverState
*bs
, BlockDriverState
*target
,
335 int64_t speed
, MirrorSyncMode mode
,
336 BlockdevOnError on_source_error
,
337 BlockdevOnError on_target_error
,
338 BlockDriverCompletionFunc
*cb
,
339 void *opaque
, Error
**errp
)
343 if ((on_source_error
== BLOCKDEV_ON_ERROR_STOP
||
344 on_source_error
== BLOCKDEV_ON_ERROR_ENOSPC
) &&
345 !bdrv_iostatus_is_enabled(bs
)) {
346 error_set(errp
, QERR_INVALID_PARAMETER
, "on-source-error");
350 s
= block_job_create(&mirror_job_type
, bs
, speed
, cb
, opaque
, errp
);
355 s
->on_source_error
= on_source_error
;
356 s
->on_target_error
= on_target_error
;
359 s
->buf_size
= BLOCK_SIZE
;
361 bdrv_set_dirty_tracking(bs
, BLOCK_SIZE
);
362 bdrv_set_enable_write_cache(s
->target
, true);
363 bdrv_set_on_error(s
->target
, on_target_error
, on_target_error
);
364 bdrv_iostatus_enable(s
->target
);
365 s
->common
.co
= qemu_coroutine_create(mirror_run
);
366 trace_mirror_start(bs
, s
, s
->common
.co
, opaque
);
367 qemu_coroutine_enter(s
->common
.co
, s
);