]>
Commit | Line | Data |
---|---|---|
893f7eba PB |
1 | /* |
2 | * Image mirroring | |
3 | * | |
4 | * Copyright Red Hat, Inc. 2012 | |
5 | * | |
6 | * Authors: | |
7 | * Paolo Bonzini <pbonzini@redhat.com> | |
8 | * | |
9 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | |
10 | * See the COPYING.LIB file in the top-level directory. | |
11 | * | |
12 | */ | |
13 | ||
14 | #include "trace.h" | |
15 | #include "blockjob.h" | |
16 | #include "block_int.h" | |
17 | #include "qemu/ratelimit.h" | |
18 | ||
19 | enum { | |
20 | /* | |
21 | * Size of data buffer for populating the image file. This should be large | |
22 | * enough to process multiple clusters in a single call, so that populating | |
23 | * contiguous regions of the image is efficient. | |
24 | */ | |
25 | BLOCK_SIZE = 512 * BDRV_SECTORS_PER_DIRTY_CHUNK, /* in bytes */ | |
26 | }; | |
27 | ||
28 | #define SLICE_TIME 100000000ULL /* ns */ | |
29 | ||
30 | typedef struct MirrorBlockJob { | |
31 | BlockJob common; | |
32 | RateLimit limit; | |
33 | BlockDriverState *target; | |
34 | MirrorSyncMode mode; | |
b952b558 | 35 | BlockdevOnError on_source_error, on_target_error; |
d63ffd87 PB |
36 | bool synced; |
37 | bool should_complete; | |
893f7eba PB |
38 | int64_t sector_num; |
39 | uint8_t *buf; | |
40 | } MirrorBlockJob; | |
41 | ||
b952b558 PB |
42 | static BlockErrorAction mirror_error_action(MirrorBlockJob *s, bool read, |
43 | int error) | |
44 | { | |
45 | s->synced = false; | |
46 | if (read) { | |
47 | return block_job_error_action(&s->common, s->common.bs, | |
48 | s->on_source_error, true, error); | |
49 | } else { | |
50 | return block_job_error_action(&s->common, s->target, | |
51 | s->on_target_error, false, error); | |
52 | } | |
53 | } | |
54 | ||
55 | static int coroutine_fn mirror_iteration(MirrorBlockJob *s, | |
56 | BlockErrorAction *p_action) | |
893f7eba PB |
57 | { |
58 | BlockDriverState *source = s->common.bs; | |
59 | BlockDriverState *target = s->target; | |
60 | QEMUIOVector qiov; | |
61 | int ret, nb_sectors; | |
62 | int64_t end; | |
63 | struct iovec iov; | |
64 | ||
65 | end = s->common.len >> BDRV_SECTOR_BITS; | |
66 | s->sector_num = bdrv_get_next_dirty(source, s->sector_num); | |
67 | nb_sectors = MIN(BDRV_SECTORS_PER_DIRTY_CHUNK, end - s->sector_num); | |
68 | bdrv_reset_dirty(source, s->sector_num, nb_sectors); | |
69 | ||
70 | /* Copy the dirty cluster. */ | |
71 | iov.iov_base = s->buf; | |
72 | iov.iov_len = nb_sectors * 512; | |
73 | qemu_iovec_init_external(&qiov, &iov, 1); | |
74 | ||
75 | trace_mirror_one_iteration(s, s->sector_num, nb_sectors); | |
76 | ret = bdrv_co_readv(source, s->sector_num, nb_sectors, &qiov); | |
77 | if (ret < 0) { | |
b952b558 PB |
78 | *p_action = mirror_error_action(s, true, -ret); |
79 | goto fail; | |
80 | } | |
81 | ret = bdrv_co_writev(target, s->sector_num, nb_sectors, &qiov); | |
82 | if (ret < 0) { | |
83 | *p_action = mirror_error_action(s, false, -ret); | |
84 | s->synced = false; | |
85 | goto fail; | |
893f7eba | 86 | } |
b952b558 PB |
87 | return 0; |
88 | ||
89 | fail: | |
90 | /* Try again later. */ | |
91 | bdrv_set_dirty(source, s->sector_num, nb_sectors); | |
92 | return ret; | |
893f7eba PB |
93 | } |
94 | ||
95 | static void coroutine_fn mirror_run(void *opaque) | |
96 | { | |
97 | MirrorBlockJob *s = opaque; | |
98 | BlockDriverState *bs = s->common.bs; | |
99 | int64_t sector_num, end; | |
100 | int ret = 0; | |
101 | int n; | |
893f7eba PB |
102 | |
103 | if (block_job_is_cancelled(&s->common)) { | |
104 | goto immediate_exit; | |
105 | } | |
106 | ||
107 | s->common.len = bdrv_getlength(bs); | |
108 | if (s->common.len < 0) { | |
109 | block_job_completed(&s->common, s->common.len); | |
110 | return; | |
111 | } | |
112 | ||
113 | end = s->common.len >> BDRV_SECTOR_BITS; | |
114 | s->buf = qemu_blockalign(bs, BLOCK_SIZE); | |
115 | ||
116 | if (s->mode != MIRROR_SYNC_MODE_NONE) { | |
117 | /* First part, loop on the sectors and initialize the dirty bitmap. */ | |
118 | BlockDriverState *base; | |
119 | base = s->mode == MIRROR_SYNC_MODE_FULL ? NULL : bs->backing_hd; | |
120 | for (sector_num = 0; sector_num < end; ) { | |
121 | int64_t next = (sector_num | (BDRV_SECTORS_PER_DIRTY_CHUNK - 1)) + 1; | |
122 | ret = bdrv_co_is_allocated_above(bs, base, | |
123 | sector_num, next - sector_num, &n); | |
124 | ||
125 | if (ret < 0) { | |
126 | goto immediate_exit; | |
127 | } | |
128 | ||
129 | assert(n > 0); | |
130 | if (ret == 1) { | |
131 | bdrv_set_dirty(bs, sector_num, n); | |
132 | sector_num = next; | |
133 | } else { | |
134 | sector_num += n; | |
135 | } | |
136 | } | |
137 | } | |
138 | ||
139 | s->sector_num = -1; | |
140 | for (;;) { | |
141 | uint64_t delay_ns; | |
142 | int64_t cnt; | |
143 | bool should_complete; | |
144 | ||
145 | cnt = bdrv_get_dirty_count(bs); | |
146 | if (cnt != 0) { | |
b952b558 PB |
147 | BlockErrorAction action = BDRV_ACTION_REPORT; |
148 | ret = mirror_iteration(s, &action); | |
149 | if (ret < 0 && action == BDRV_ACTION_REPORT) { | |
893f7eba PB |
150 | goto immediate_exit; |
151 | } | |
152 | cnt = bdrv_get_dirty_count(bs); | |
153 | } | |
154 | ||
155 | should_complete = false; | |
156 | if (cnt == 0) { | |
157 | trace_mirror_before_flush(s); | |
158 | ret = bdrv_flush(s->target); | |
159 | if (ret < 0) { | |
b952b558 PB |
160 | if (mirror_error_action(s, false, -ret) == BDRV_ACTION_REPORT) { |
161 | goto immediate_exit; | |
162 | } | |
163 | } else { | |
164 | /* We're out of the streaming phase. From now on, if the job | |
165 | * is cancelled we will actually complete all pending I/O and | |
166 | * report completion. This way, block-job-cancel will leave | |
167 | * the target in a consistent state. | |
168 | */ | |
169 | s->common.offset = end * BDRV_SECTOR_SIZE; | |
170 | if (!s->synced) { | |
171 | block_job_ready(&s->common); | |
172 | s->synced = true; | |
173 | } | |
174 | ||
175 | should_complete = s->should_complete || | |
176 | block_job_is_cancelled(&s->common); | |
177 | cnt = bdrv_get_dirty_count(bs); | |
d63ffd87 | 178 | } |
893f7eba PB |
179 | } |
180 | ||
181 | if (cnt == 0 && should_complete) { | |
182 | /* The dirty bitmap is not updated while operations are pending. | |
183 | * If we're about to exit, wait for pending operations before | |
184 | * calling bdrv_get_dirty_count(bs), or we may exit while the | |
185 | * source has dirty data to copy! | |
186 | * | |
187 | * Note that I/O can be submitted by the guest while | |
188 | * mirror_populate runs. | |
189 | */ | |
190 | trace_mirror_before_drain(s, cnt); | |
191 | bdrv_drain_all(); | |
192 | cnt = bdrv_get_dirty_count(bs); | |
193 | } | |
194 | ||
195 | ret = 0; | |
d63ffd87 PB |
196 | trace_mirror_before_sleep(s, cnt, s->synced); |
197 | if (!s->synced) { | |
893f7eba PB |
198 | /* Publish progress */ |
199 | s->common.offset = end * BDRV_SECTOR_SIZE - cnt * BLOCK_SIZE; | |
200 | ||
201 | if (s->common.speed) { | |
202 | delay_ns = ratelimit_calculate_delay(&s->limit, BDRV_SECTORS_PER_DIRTY_CHUNK); | |
203 | } else { | |
204 | delay_ns = 0; | |
205 | } | |
206 | ||
207 | /* Note that even when no rate limit is applied we need to yield | |
208 | * with no pending I/O here so that qemu_aio_flush() returns. | |
209 | */ | |
210 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); | |
211 | if (block_job_is_cancelled(&s->common)) { | |
212 | break; | |
213 | } | |
214 | } else if (!should_complete) { | |
215 | delay_ns = (cnt == 0 ? SLICE_TIME : 0); | |
216 | block_job_sleep_ns(&s->common, rt_clock, delay_ns); | |
217 | } else if (cnt == 0) { | |
218 | /* The two disks are in sync. Exit and report successful | |
219 | * completion. | |
220 | */ | |
221 | assert(QLIST_EMPTY(&bs->tracked_requests)); | |
222 | s->common.cancelled = false; | |
223 | break; | |
224 | } | |
225 | } | |
226 | ||
227 | immediate_exit: | |
228 | g_free(s->buf); | |
229 | bdrv_set_dirty_tracking(bs, false); | |
b952b558 | 230 | bdrv_iostatus_disable(s->target); |
d63ffd87 PB |
231 | if (s->should_complete && ret == 0) { |
232 | if (bdrv_get_flags(s->target) != bdrv_get_flags(s->common.bs)) { | |
233 | bdrv_reopen(s->target, bdrv_get_flags(s->common.bs), NULL); | |
234 | } | |
235 | bdrv_swap(s->target, s->common.bs); | |
236 | } | |
893f7eba PB |
237 | bdrv_close(s->target); |
238 | bdrv_delete(s->target); | |
239 | block_job_completed(&s->common, ret); | |
240 | } | |
241 | ||
242 | static void mirror_set_speed(BlockJob *job, int64_t speed, Error **errp) | |
243 | { | |
244 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
245 | ||
246 | if (speed < 0) { | |
247 | error_set(errp, QERR_INVALID_PARAMETER, "speed"); | |
248 | return; | |
249 | } | |
250 | ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); | |
251 | } | |
252 | ||
b952b558 PB |
253 | static void mirror_iostatus_reset(BlockJob *job) |
254 | { | |
255 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
256 | ||
257 | bdrv_iostatus_reset(s->target); | |
258 | } | |
259 | ||
d63ffd87 PB |
260 | static void mirror_complete(BlockJob *job, Error **errp) |
261 | { | |
262 | MirrorBlockJob *s = container_of(job, MirrorBlockJob, common); | |
263 | int ret; | |
264 | ||
265 | ret = bdrv_open_backing_file(s->target); | |
266 | if (ret < 0) { | |
267 | char backing_filename[PATH_MAX]; | |
268 | bdrv_get_full_backing_filename(s->target, backing_filename, | |
269 | sizeof(backing_filename)); | |
270 | error_set(errp, QERR_OPEN_FILE_FAILED, backing_filename); | |
271 | return; | |
272 | } | |
273 | if (!s->synced) { | |
274 | error_set(errp, QERR_BLOCK_JOB_NOT_READY, job->bs->device_name); | |
275 | return; | |
276 | } | |
277 | ||
278 | s->should_complete = true; | |
279 | block_job_resume(job); | |
280 | } | |
281 | ||
893f7eba PB |
282 | static BlockJobType mirror_job_type = { |
283 | .instance_size = sizeof(MirrorBlockJob), | |
284 | .job_type = "mirror", | |
285 | .set_speed = mirror_set_speed, | |
b952b558 | 286 | .iostatus_reset= mirror_iostatus_reset, |
d63ffd87 | 287 | .complete = mirror_complete, |
893f7eba PB |
288 | }; |
289 | ||
290 | void mirror_start(BlockDriverState *bs, BlockDriverState *target, | |
291 | int64_t speed, MirrorSyncMode mode, | |
b952b558 PB |
292 | BlockdevOnError on_source_error, |
293 | BlockdevOnError on_target_error, | |
893f7eba PB |
294 | BlockDriverCompletionFunc *cb, |
295 | void *opaque, Error **errp) | |
296 | { | |
297 | MirrorBlockJob *s; | |
298 | ||
b952b558 PB |
299 | if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || |
300 | on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && | |
301 | !bdrv_iostatus_is_enabled(bs)) { | |
302 | error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); | |
303 | return; | |
304 | } | |
305 | ||
893f7eba PB |
306 | s = block_job_create(&mirror_job_type, bs, speed, cb, opaque, errp); |
307 | if (!s) { | |
308 | return; | |
309 | } | |
310 | ||
b952b558 PB |
311 | s->on_source_error = on_source_error; |
312 | s->on_target_error = on_target_error; | |
893f7eba PB |
313 | s->target = target; |
314 | s->mode = mode; | |
315 | bdrv_set_dirty_tracking(bs, true); | |
316 | bdrv_set_enable_write_cache(s->target, true); | |
b952b558 PB |
317 | bdrv_set_on_error(s->target, on_target_error, on_target_error); |
318 | bdrv_iostatus_enable(s->target); | |
893f7eba PB |
319 | s->common.co = qemu_coroutine_create(mirror_run); |
320 | trace_mirror_start(bs, s, s->common.co, opaque); | |
321 | qemu_coroutine_enter(s->common.co, s); | |
322 | } |