]>
Commit | Line | Data |
---|---|---|
893f7eba PB |
1 | /* |
2 | * Image mirroring | |
3 | * | |
4 | * Copyright Red Hat, Inc. 2012 | |
5 | * | |
6 | * Authors: | |
7 | * Paolo Bonzini <pbonzini@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | |
10 | * See the COPYING.LIB file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include "trace.h" | |
737e150e PB |
15 | #include "block/blockjob.h" |
16 | #include "block/block_int.h" | |
893f7eba | 17 | #include "qemu/ratelimit.h" |
b812f671 | 18 | #include "qemu/bitmap.h" |
893f7eba | 19 | |
893f7eba PB |
20 | #define SLICE_TIME 100000000ULL /* ns */ |
21 | ||
22 | typedef struct MirrorBlockJob { | |
23 | BlockJob common; | |
24 | RateLimit limit; | |
25 | BlockDriverState *target; | |
26 | MirrorSyncMode mode; | |
b952b558 | 27 | BlockdevOnError on_source_error, on_target_error; |
d63ffd87 PB |
28 | bool synced; |
29 | bool should_complete; | |
893f7eba | 30 | int64_t sector_num; |
eee13dfe | 31 | int64_t granularity; |
b812f671 PB |
32 | size_t buf_size; |
33 | unsigned long *cow_bitmap; | |
8f0720ec | 34 | HBitmapIter hbi; |
893f7eba PB |
35 | uint8_t *buf; |
36 | } MirrorBlockJob; | |
37 | ||
b952b558 PB |
38 | static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, |
39 | int error) | |
40 | { | |
41 | s->synced = false; | |
42 | if (read) { | |
43 | return block_job_error_action(&s->common, s->common.bs, | |
44 | s->on_source_error, true, error); | |
45 | } else { | |
46 | return block_job_error_action(&s->common, s->target, | |
47 | s->on_target_error, false, error); | |
48 | } | |
49 | } | |
50 | ||
51 | static int coroutine_fn mirror_iteration(MirrorBlockJob *s, | |
52 | BlockErrorAction *p_action) | |
893f7eba PB |
53 | { |
54 | BlockDriverState *source = s->common.bs; | |
55 | BlockDriverState *target = s->target; | |
56 | QEMUIOVector qiov; | |
eee13dfe | 57 | int ret, nb_sectors, sectors_per_chunk; |
b812f671 | 58 | int64_t end, sector_num, chunk_num; |
893f7eba PB |
59 | struct iovec iov; |
60 | ||
8f0720ec PB |
61 | s->sector_num = hbitmap_iter_next(&s->hbi); |
62 | if (s->sector_num < 0) { | |
63 | bdrv_dirty_iter_init(source, &s->hbi); | |
64 | s->sector_num = hbitmap_iter_next(&s->hbi); | |
65 | trace_mirror_restart_iter(s, bdrv_get_dirty_count(source)); | |
66 | assert(s->sector_num >= 0); | |
67 | } | |
68 | ||
b812f671 PB |
69 | /* If we have no backing file yet in the destination, and the cluster size |
70 | * is very large, we need to do COW ourselves. The first time a cluster is | |
71 | * copied, copy it entirely. | |
72 | * | |
eee13dfe PB |
73 | * Because both the granularity and the cluster size are powers of two, the |
74 | * number of sectors to copy cannot exceed one cluster. | |
b812f671 PB |
75 | */ |
76 | sector_num = s->sector_num; | |
eee13dfe PB |
77 | sectors_per_chunk = nb_sectors = s->granularity >> BDRV_SECTOR_BITS; |
78 | chunk_num = sector_num / sectors_per_chunk; | |
b812f671 PB |
79 | if (s->cow_bitmap && !test_bit(chunk_num, s->cow_bitmap)) { |
80 | trace_mirror_cow(s, sector_num); | |
81 | bdrv_round_to_clusters(s->target, | |
eee13dfe | 82 | sector_num, sectors_per_chunk, |
b812f671 PB |
83 | §or_num, &nb_sectors); |
84 | } | |
85 | ||
893f7eba | 86 | end = s->common.len >> BDRV_SECTOR_BITS; |
b812f671 PB |
87 | nb_sectors = MIN(nb_sectors, end - sector_num); |
88 | bdrv_reset_dirty(source, sector_num, nb_sectors); | |
893f7eba PB |
89 | |
90 | /* Copy the dirty cluster. */ | |
91 | iov.iov_base = s->buf; | |
92 | iov.iov_len = nb_sectors * 512; | |
93 | qemu_iovec_init_external(&qiov, &iov, 1); | |
94 | ||
b812f671 PB |
95 | trace_mirror_one_iteration(s, sector_num, nb_sectors); |
96 | ret = bdrv_co_readv(source, sector_num, nb_sectors, &qiov); | |
893f7eba | 97 | if (ret < 0) { |
b952b558 PB |
98 | *p_action = mirror_error_action(s, true, -ret); |
99 | goto fail; | |
100 | } | |
b812f671 | 101 | ret = bdrv_co_writev(target, sector_num, nb_sectors, &qiov); |
b952b558 PB |
102 | if (ret < 0) { |
103 | *p_action = mirror_error_action(s, false, -ret); | |
104 | s->synced = false; | |
105 | goto fail; | |
893f7eba | 106 | } |
b812f671 | 107 | if (s->cow_bitmap) { |
eee13dfe PB |
108 | bitmap_set(s->cow_bitmap, sector_num / sectors_per_chunk, |
109 | nb_sectors / sectors_per_chunk); | |
b812f671 | 110 | } |
b952b558 PB |
111 | return 0; |
112 | ||
113 | fail: | |
114 | /* Try again later. */ | |
b812f671 | 115 | bdrv_set_dirty(source, sector_num, nb_sectors); |
b952b558 | 116 | return ret; |
893f7eba PB |
117 | } |
118 | ||
119 | static void coroutine_fn mirror_run(void *opaque) | |
120 | { | |
121 | MirrorBlockJob *s = opaque; | |
122 | BlockDriverState *bs = s->common.bs; | |
eee13dfe | 123 | int64_t sector_num, end, sectors_per_chunk, length; |
b812f671 PB |
124 | BlockDriverInfo bdi; |
125 | char backing_filename[1024]; | |
893f7eba PB |
126 | int ret = 0; |
127 | int n; | |
893f7eba PB |
128 | |
129 | if (block_job_is_cancelled(&s->common)) { | |
130 | goto immediate_exit; | |
131 | } | |
132 | ||
133 | s->common.len = bdrv_getlength(bs); | |
134 | if (s->common.len < 0) { | |
135 | block_job_completed(&s->common, s->common.len); | |
136 | return; | |
137 | } | |
138 | ||
b812f671 PB |
139 | /* If we have no backing file yet in the destination, we cannot let |
140 | * the destination do COW. Instead, we copy sectors around the | |
141 | * dirty data if needed. We need a bitmap to do that. | |
142 | */ | |
143 | bdrv_get_backing_filename(s->target, backing_filename, | |
144 | sizeof(backing_filename)); | |
145 | if (backing_filename[0] && !s->target->backing_hd) { | |
146 | bdrv_get_info(s->target, &bdi); | |
eee13dfe | 147 | if (s->granularity < bdi.cluster_size) { |
b812f671 | 148 | s->buf_size = bdi.cluster_size; |
eee13dfe | 149 | length = (bdrv_getlength(bs) + s->granularity - 1) / s->granularity; |
b812f671 PB |
150 | s->cow_bitmap = bitmap_new(length); |
151 | } | |
152 | } | |
153 | ||
893f7eba | 154 | end = s->common.len >> BDRV_SECTOR_BITS; |
b812f671 | 155 | s->buf = qemu_blockalign(bs, s->buf_size); |
eee13dfe | 156 | sectors_per_chunk = s->granularity >> BDRV_SECTOR_BITS; |
893f7eba PB |
157 | |
158 | if (s->mode != MIRROR_SYNC_MODE_NONE) { | |
159 | /* First part, loop on the sectors and initialize the dirty bitmap. */ | |
160 | BlockDriverState *base; | |
161 | base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; | |
162 | for (sector_num = 0; sector_num < end; ) { | |
eee13dfe | 163 | int64_t next = (sector_num | (sectors_per_chunk - 1)) + 1; |
893f7eba PB |
164 | ret = bdrv_co_is_allocated_above(bs, base, |
165 | sector_num, next - sector_num, &n); | |
166 | ||
167 | if (ret < 0) { | |
168 | goto immediate_exit; | |
169 | } | |
170 | ||
171 | assert(n > 0); | |
172 | if (ret == 1) { | |
173 | bdrv_set_dirty(bs, sector_num, n); | |
174 | sector_num = next; | |
175 | } else { | |
176 | sector_num += n; | |
177 | } | |
178 | } | |
179 | } | |
180 | ||
8f0720ec | 181 | bdrv_dirty_iter_init(bs, &s->hbi); |
893f7eba PB |
182 | for (;;) { |
183 | uint64_t delay_ns; | |
184 | int64_t cnt; | |
185 | bool should_complete; | |
186 | ||
187 | cnt = bdrv_get_dirty_count(bs); | |
188 | if (cnt != 0) { | |
b952b558 PB |
189 | BlockErrorAction action = BDRV_ACTION_REPORT; |
190 | ret = mirror_iteration(s, &action); | |
191 | if (ret < 0 && action == BDRV_ACTION_REPORT) { | |
893f7eba PB |
192 | goto immediate_exit; |
193 | } | |
194 | cnt = bdrv_get_dirty_count(bs); | |
195 | } | |
196 | ||
197 | should_complete = false; | |
198 | if (cnt == 0) { | |
199 | trace_mirror_before_flush(s); | |
200 | ret = bdrv_flush(s->target); | |
201 | if (ret < 0) { | |
b952b558 PB |
202 | if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { |
203 | goto immediate_exit; | |
204 | } | |
205 | } else { | |
206 | /* We're out of the streaming phase. From now on, if the job | |
207 | * is cancelled we will actually complete all pending I/O and | |
208 | * report completion. This way, block-job-cancel will leave | |
209 | * the target in a consistent state. | |
210 | */ | |
211 | s->common.offset = end * BDRV_SECTOR_SIZE; | |
212 | if (!s->synced) { | |
213 | block_job_ready(&s->common); | |
214 | s->synced = true; | |
215 | } | |
216 | ||
217 | should_complete = s->should_complete || | |
218 | block_job_is_cancelled(&s->common); | |
219 | cnt = bdrv_get_dirty_count(bs); | |
d63ffd87 | 220 | } |
893f7eba PB |
221 | } |
222 | ||
223 | if (cnt == 0 && should_complete) { | |
224 | /* The dirty bitmap is not updated while operations are pending. | |
225 | * If we're about to exit, wait for pending operations before | |
226 | * calling bdrv_get_dirty_count(bs), or we may exit while the | |
227 | * source has dirty data to copy! | |
228 | * | |
229 | * Note that I/O can be submitted by the guest while | |
230 | * mirror_populate runs. | |
231 | */ | |
232 | trace_mirror_before_drain(s, cnt); | |
233 | bdrv_drain_all(); | |
234 | cnt = bdrv_get_dirty_count(bs); | |
235 | } | |
236 | ||
237 | ret = 0; | |
d63ffd87 PB |
238 | trace_mirror_before_sleep(s, cnt, s->synced); |
239 | if (!s->synced) { | |
893f7eba | 240 | /* Publish progress */ |
acc906c6 | 241 | s->common.offset = (end - cnt) * BDRV_SECTOR_SIZE; |
893f7eba PB |
242 | |
243 | if (s->common.speed) { | |
eee13dfe | 244 | delay_ns = ratelimit_calculate_delay(&s->limit, sectors_per_chunk); |
893f7eba PB |
245 | } else { |
246 | delay_ns = 0; | |
247 | } | |
248 | ||
249 | /* Note that even when no rate limit is applied we need to yield | |
c57b6656 | 250 | * with no pending I/O here so that bdrv_drain_all() returns. |
893f7eba PB |
251 | */ |
252 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); | |
253 | if (block_job_is_cancelled(&s->common)) { | |
254 | break; | |
255 | } | |
256 | } else if (!should_complete) { | |
257 | delay_ns = (cnt == 0 ? SLICE_TIME : 0); | |
258 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); | |
259 | } else if (cnt == 0) { | |
260 | /* The two disks are in sync. Exit and report successful | |
261 | * completion. | |
262 | */ | |
263 | assert(QLIST_EMPTY(&bs->tracked_requests)); | |
264 | s->common.cancelled = false; | |
265 | break; | |
266 | } | |
267 | } | |
268 | ||
269 | immediate_exit: | |
7191bf31 | 270 | qemu_vfree(s->buf); |
b812f671 | 271 | g_free(s->cow_bitmap); |
50717e94 | 272 | bdrv_set_dirty_tracking(bs, 0); |
b952b558 | 273 | bdrv_iostatus_disable(s->target); |
d63ffd87 PB |
274 | if (s->should_complete && ret == 0) { |
275 | if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { | |
276 | bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); | |
277 | } | |
278 | bdrv_swap(s->target, s->common.bs); | |
279 | } | |
893f7eba PB |
280 | bdrv_close(s->target); |
281 | bdrv_delete(s->target); | |
282 | block_job_completed(&s->common, ret); | |
283 | } | |
284 | ||
285 | static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) | |
286 | { | |
287 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
288 | ||
289 | if (speed < 0) { | |
290 | error_set(errp, QERR_INVALID_PARAMETER, "speed"); | |
291 | return; | |
292 | } | |
293 | ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); | |
294 | } | |
295 | ||
b952b558 PB |
296 | static void mirror_iostatus_reset(BlockJob *job) |
297 | { | |
298 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
299 | ||
300 | bdrv_iostatus_reset(s->target); | |
301 | } | |
302 | ||
d63ffd87 PB |
303 | static void mirror_complete(BlockJob *job, Error **errp) |
304 | { | |
305 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
306 | int ret; | |
307 | ||
308 | ret = bdrv_open_backing_file(s->target); | |
309 | if (ret < 0) { | |
310 | char backing_filename[PATH_MAX]; | |
311 | bdrv_get_full_backing_filename(s->target, backing_filename, | |
312 | sizeof(backing_filename)); | |
313 | error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); | |
314 | return; | |
315 | } | |
316 | if (!s->synced) { | |
317 | error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); | |
318 | return; | |
319 | } | |
320 | ||
321 | s->should_complete = true; | |
322 | block_job_resume(job); | |
323 | } | |
324 | ||
893f7eba PB |
325 | static BlockJobType mirror_job_type = { |
326 | .instance_size = sizeof(MirrorBlockJob), | |
327 | .job_type = "mirror", | |
328 | .set_speed = mirror_set_speed, | |
b952b558 | 329 | .iostatus_reset= mirror_iostatus_reset, |
d63ffd87 | 330 | .complete = mirror_complete, |
893f7eba PB |
331 | }; |
332 | ||
333 | void mirror_start(BlockDriverState *bs, BlockDriverState *target, | |
eee13dfe | 334 | int64_t speed, int64_t granularity, MirrorSyncMode mode, |
b952b558 PB |
335 | BlockdevOnError on_source_error, |
336 | BlockdevOnError on_target_error, | |
893f7eba PB |
337 | BlockDriverCompletionFunc *cb, |
338 | void *opaque, Error **errp) | |
339 | { | |
340 | MirrorBlockJob *s; | |
341 | ||
eee13dfe PB |
342 | if (granularity == 0) { |
343 | /* Choose the default granularity based on the target file's cluster | |
344 | * size, clamped between 4k and 64k. */ | |
345 | BlockDriverInfo bdi; | |
346 | if (bdrv_get_info(target, &bdi) >= 0 && bdi.cluster_size != 0) { | |
347 | granularity = MAX(4096, bdi.cluster_size); | |
348 | granularity = MIN(65536, granularity); | |
349 | } else { | |
350 | granularity = 65536; | |
351 | } | |
352 | } | |
353 | ||
354 | assert ((granularity & (granularity - 1)) == 0); | |
355 | ||
b952b558 PB |
356 | if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || |
357 | on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && | |
358 | !bdrv_iostatus_is_enabled(bs)) { | |
359 | error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); | |
360 | return; | |
361 | } | |
362 | ||
893f7eba PB |
363 | s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); |
364 | if (!s) { | |
365 | return; | |
366 | } | |
367 | ||
b952b558 PB |
368 | s->on_source_error = on_source_error; |
369 | s->on_target_error = on_target_error; | |
893f7eba PB |
370 | s->target = target; |
371 | s->mode = mode; | |
eee13dfe PB |
372 | s->granularity = granularity; |
373 | s->buf_size = granularity; | |
b812f671 | 374 | |
eee13dfe | 375 | bdrv_set_dirty_tracking(bs, granularity); |
893f7eba | 376 | bdrv_set_enable_write_cache(s->target, true); |
b952b558 PB |
377 | bdrv_set_on_error(s->target, on_target_error, on_target_error); |
378 | bdrv_iostatus_enable(s->target); | |
893f7eba PB |
379 | s->common.co = qemu_coroutine_create(mirror_run); |
380 | trace_mirror_start(bs, s, s->common.co, opaque); | |
381 | qemu_coroutine_enter(s->common.co, s); | |
382 | } |