]>
Commit | Line | Data |
---|---|---|
e1c66c6d LE |
1 | /* BlockDriver implementation for "raw" |
2 | * | |
ad82be2f | 3 | * Copyright (C) 2010-2016 Red Hat, Inc. |
ff369a48 | 4 | * Copyright (C) 2010, Blue Swirl <blauwirbel@gmail.com> |
775d6afd | 5 | * Copyright (C) 2009, Anthony Liguori <aliguori@us.ibm.com> |
e1c66c6d LE |
6 | * |
7 | * Author: | |
8 | * Laszlo Ersek <lersek@redhat.com> | |
9 | * | |
10 | * Permission is hereby granted, free of charge, to any person obtaining a copy | |
11 | * of this software and associated documentation files (the "Software"), to | |
12 | * deal in the Software without restriction, including without limitation the | |
13 | * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or | |
14 | * sell copies of the Software, and to permit persons to whom the Software is | |
15 | * furnished to do so, subject to the following conditions: | |
16 | * | |
17 | * The above copyright notice and this permission notice shall be included in | |
18 | * all copies or substantial portions of the Software. | |
19 | * | |
20 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
21 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
22 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
23 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
24 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
25 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
26 | * IN THE SOFTWARE. | |
27 | */ | |
28 | ||
80c71a24 | 29 | #include "qemu/osdep.h" |
e1c66c6d | 30 | #include "block/block_int.h" |
da34e65c | 31 | #include "qapi/error.h" |
ff369a48 LE |
32 | #include "qemu/option.h" |
33 | ||
2fdc7045 TG |
34 | typedef struct BDRVRawState { |
35 | uint64_t offset; | |
36 | uint64_t size; | |
37 | bool has_size; | |
38 | } BDRVRawState; | |
39 | ||
40 | static QemuOptsList raw_runtime_opts = { | |
41 | .name = "raw", | |
42 | .head = QTAILQ_HEAD_INITIALIZER(raw_runtime_opts.head), | |
43 | .desc = { | |
44 | { | |
45 | .name = "offset", | |
46 | .type = QEMU_OPT_SIZE, | |
47 | .help = "offset in the disk where the image starts", | |
48 | }, | |
49 | { | |
50 | .name = "size", | |
51 | .type = QEMU_OPT_SIZE, | |
52 | .help = "virtual disk size", | |
53 | }, | |
54 | { /* end of list */ } | |
55 | }, | |
56 | }; | |
57 | ||
cd3a4cf6 CL |
58 | static QemuOptsList raw_create_opts = { |
59 | .name = "raw-create-opts", | |
60 | .head = QTAILQ_HEAD_INITIALIZER(raw_create_opts.head), | |
61 | .desc = { | |
62 | { | |
63 | .name = BLOCK_OPT_SIZE, | |
64 | .type = QEMU_OPT_SIZE, | |
65 | .help = "Virtual disk size" | |
66 | }, | |
67 | { /* end of list */ } | |
68 | } | |
ff369a48 | 69 | }; |
e1c66c6d | 70 | |
2fdc7045 TG |
71 | static int raw_read_options(QDict *options, BlockDriverState *bs, |
72 | BDRVRawState *s, Error **errp) | |
73 | { | |
74 | Error *local_err = NULL; | |
75 | QemuOpts *opts = NULL; | |
76 | int64_t real_size = 0; | |
77 | int ret; | |
78 | ||
79 | real_size = bdrv_getlength(bs->file->bs); | |
80 | if (real_size < 0) { | |
81 | error_setg_errno(errp, -real_size, "Could not get image size"); | |
82 | return real_size; | |
83 | } | |
84 | ||
85 | opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); | |
86 | qemu_opts_absorb_qdict(opts, options, &local_err); | |
87 | if (local_err) { | |
88 | error_propagate(errp, local_err); | |
89 | ret = -EINVAL; | |
90 | goto end; | |
91 | } | |
92 | ||
93 | s->offset = qemu_opt_get_size(opts, "offset", 0); | |
94 | if (qemu_opt_find(opts, "size") != NULL) { | |
95 | s->size = qemu_opt_get_size(opts, "size", 0); | |
96 | s->has_size = true; | |
97 | } else { | |
98 | s->has_size = false; | |
99 | s->size = real_size - s->offset; | |
100 | } | |
101 | ||
102 | /* Check size and offset */ | |
103 | if (real_size < s->offset || (real_size - s->offset) < s->size) { | |
104 | error_setg(errp, "The sum of offset (%" PRIu64 ") and size " | |
105 | "(%" PRIu64 ") has to be smaller or equal to the " | |
106 | " actual size of the containing file (%" PRId64 ")", | |
107 | s->offset, s->size, real_size); | |
108 | ret = -EINVAL; | |
109 | goto end; | |
110 | } | |
111 | ||
112 | /* Make sure size is multiple of BDRV_SECTOR_SIZE to prevent rounding | |
113 | * up and leaking out of the specified area. */ | |
114 | if (!QEMU_IS_ALIGNED(s->size, BDRV_SECTOR_SIZE)) { | |
115 | error_setg(errp, "Specified size is not multiple of %llu", | |
116 | BDRV_SECTOR_SIZE); | |
117 | ret = -EINVAL; | |
118 | goto end; | |
119 | } | |
120 | ||
121 | ret = 0; | |
122 | ||
123 | end: | |
124 | ||
125 | qemu_opts_del(opts); | |
126 | ||
127 | return ret; | |
128 | } | |
129 | ||
7a6d3fc5 LE |
130 | static int raw_reopen_prepare(BDRVReopenState *reopen_state, |
131 | BlockReopenQueue *queue, Error **errp) | |
e1c66c6d | 132 | { |
2fdc7045 TG |
133 | assert(reopen_state != NULL); |
134 | assert(reopen_state->bs != NULL); | |
135 | ||
136 | reopen_state->opaque = g_new0(BDRVRawState, 1); | |
137 | ||
138 | return raw_read_options( | |
139 | reopen_state->options, | |
140 | reopen_state->bs, | |
141 | reopen_state->opaque, | |
142 | errp); | |
143 | } | |
144 | ||
145 | static void raw_reopen_commit(BDRVReopenState *state) | |
146 | { | |
147 | BDRVRawState *new_s = state->opaque; | |
148 | BDRVRawState *s = state->bs->opaque; | |
149 | ||
150 | memcpy(s, new_s, sizeof(BDRVRawState)); | |
151 | ||
152 | g_free(state->opaque); | |
153 | state->opaque = NULL; | |
154 | } | |
155 | ||
156 | static void raw_reopen_abort(BDRVReopenState *state) | |
157 | { | |
158 | g_free(state->opaque); | |
159 | state->opaque = NULL; | |
e1c66c6d LE |
160 | } |
161 | ||
decaeed7 EB |
162 | static int coroutine_fn raw_co_preadv(BlockDriverState *bs, uint64_t offset, |
163 | uint64_t bytes, QEMUIOVector *qiov, | |
164 | int flags) | |
e1c66c6d | 165 | { |
2fdc7045 TG |
166 | BDRVRawState *s = bs->opaque; |
167 | ||
168 | if (offset > UINT64_MAX - s->offset) { | |
169 | return -EINVAL; | |
170 | } | |
171 | offset += s->offset; | |
172 | ||
9eaafd90 | 173 | BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); |
decaeed7 | 174 | return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); |
e1c66c6d LE |
175 | } |
176 | ||
decaeed7 EB |
177 | static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, |
178 | uint64_t bytes, QEMUIOVector *qiov, | |
179 | int flags) | |
e1c66c6d | 180 | { |
2fdc7045 | 181 | BDRVRawState *s = bs->opaque; |
38f3ef57 KW |
182 | void *buf = NULL; |
183 | BlockDriver *drv; | |
184 | QEMUIOVector local_qiov; | |
185 | int ret; | |
186 | ||
2fdc7045 TG |
187 | if (s->has_size && (offset > s->size || bytes > (s->size - offset))) { |
188 | /* There's not enough space for the data. Don't write anything and just | |
189 | * fail to prevent leaking out of the size specified in options. */ | |
190 | return -ENOSPC; | |
191 | } | |
192 | ||
193 | if (offset > UINT64_MAX - s->offset) { | |
194 | ret = -EINVAL; | |
195 | goto fail; | |
196 | } | |
197 | ||
decaeed7 EB |
198 | if (bs->probed && offset < BLOCK_PROBE_BUF_SIZE && bytes) { |
199 | /* Handling partial writes would be a pain - so we just | |
200 | * require that guests have 512-byte request alignment if | |
201 | * probing occurred */ | |
38f3ef57 KW |
202 | QEMU_BUILD_BUG_ON(BLOCK_PROBE_BUF_SIZE != 512); |
203 | QEMU_BUILD_BUG_ON(BDRV_SECTOR_SIZE != 512); | |
decaeed7 | 204 | assert(offset == 0 && bytes >= BLOCK_PROBE_BUF_SIZE); |
38f3ef57 | 205 | |
9a4f4c31 | 206 | buf = qemu_try_blockalign(bs->file->bs, 512); |
38f3ef57 KW |
207 | if (!buf) { |
208 | ret = -ENOMEM; | |
209 | goto fail; | |
210 | } | |
211 | ||
212 | ret = qemu_iovec_to_buf(qiov, 0, buf, 512); | |
213 | if (ret != 512) { | |
214 | ret = -EINVAL; | |
215 | goto fail; | |
216 | } | |
217 | ||
218 | drv = bdrv_probe_all(buf, 512, NULL); | |
219 | if (drv != bs->drv) { | |
220 | ret = -EPERM; | |
221 | goto fail; | |
222 | } | |
223 | ||
224 | /* Use the checked buffer, a malicious guest might be overwriting its | |
225 | * original buffer in the background. */ | |
226 | qemu_iovec_init(&local_qiov, qiov->niov + 1); | |
227 | qemu_iovec_add(&local_qiov, buf, 512); | |
228 | qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512); | |
229 | qiov = &local_qiov; | |
230 | } | |
231 | ||
2fdc7045 TG |
232 | offset += s->offset; |
233 | ||
9eaafd90 | 234 | BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); |
decaeed7 | 235 | ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags); |
38f3ef57 KW |
236 | |
237 | fail: | |
238 | if (qiov == &local_qiov) { | |
239 | qemu_iovec_destroy(&local_qiov); | |
240 | } | |
241 | qemu_vfree(buf); | |
242 | return ret; | |
e1c66c6d LE |
243 | } |
244 | ||
b6b8a333 PB |
245 | static int64_t coroutine_fn raw_co_get_block_status(BlockDriverState *bs, |
246 | int64_t sector_num, | |
67a0fd2a FZ |
247 | int nb_sectors, int *pnum, |
248 | BlockDriverState **file) | |
e1c66c6d | 249 | { |
2fdc7045 | 250 | BDRVRawState *s = bs->opaque; |
92bc50a5 | 251 | *pnum = nb_sectors; |
02650acb | 252 | *file = bs->file->bs; |
2fdc7045 | 253 | sector_num += s->offset / BDRV_SECTOR_SIZE; |
92bc50a5 PL |
254 | return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA | |
255 | (sector_num << BDRV_SECTOR_BITS); | |
e1c66c6d LE |
256 | } |
257 | ||
39ad937e EB |
258 | static int coroutine_fn raw_co_pwrite_zeroes(BlockDriverState *bs, |
259 | int64_t offset, int count, | |
260 | BdrvRequestFlags flags) | |
e1c66c6d | 261 | { |
2fdc7045 TG |
262 | BDRVRawState *s = bs->opaque; |
263 | if (offset > UINT64_MAX - s->offset) { | |
264 | return -EINVAL; | |
265 | } | |
266 | offset += s->offset; | |
a03ef88f | 267 | return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags); |
e1c66c6d LE |
268 | } |
269 | ||
5f61ad07 EB |
270 | static int coroutine_fn raw_co_pdiscard(BlockDriverState *bs, |
271 | int64_t offset, int count) | |
e1c66c6d | 272 | { |
2fdc7045 TG |
273 | BDRVRawState *s = bs->opaque; |
274 | if (offset > UINT64_MAX - s->offset) { | |
275 | return -EINVAL; | |
276 | } | |
277 | offset += s->offset; | |
5f61ad07 | 278 | return bdrv_co_pdiscard(bs->file->bs, offset, count); |
e1c66c6d LE |
279 | } |
280 | ||
7a6d3fc5 | 281 | static int64_t raw_getlength(BlockDriverState *bs) |
e1c66c6d | 282 | { |
2fdc7045 TG |
283 | int64_t len; |
284 | BDRVRawState *s = bs->opaque; | |
285 | ||
286 | /* Update size. It should not change unless the file was externally | |
287 | * modified. */ | |
288 | len = bdrv_getlength(bs->file->bs); | |
289 | if (len < 0) { | |
290 | return len; | |
291 | } | |
292 | ||
293 | if (len < s->offset) { | |
294 | s->size = 0; | |
295 | } else { | |
296 | if (s->has_size) { | |
297 | /* Try to honour the size */ | |
298 | s->size = MIN(s->size, len - s->offset); | |
299 | } else { | |
300 | s->size = len - s->offset; | |
301 | } | |
302 | } | |
303 | ||
304 | return s->size; | |
e1c66c6d LE |
305 | } |
306 | ||
7a6d3fc5 | 307 | static int raw_get_info(BlockDriverState *bs, BlockDriverInfo *bdi) |
e1c66c6d | 308 | { |
9a4f4c31 | 309 | return bdrv_get_info(bs->file->bs, bdi); |
e1c66c6d LE |
310 | } |
311 | ||
decaeed7 EB |
312 | static void raw_refresh_limits(BlockDriverState *bs, Error **errp) |
313 | { | |
314 | if (bs->probed) { | |
315 | /* To make it easier to protect the first sector, any probed | |
316 | * image is restricted to read-modify-write on sub-sector | |
317 | * operations. */ | |
318 | bs->bl.request_alignment = BDRV_SECTOR_SIZE; | |
319 | } | |
320 | } | |
321 | ||
7a6d3fc5 | 322 | static int raw_truncate(BlockDriverState *bs, int64_t offset) |
e1c66c6d | 323 | { |
2fdc7045 TG |
324 | BDRVRawState *s = bs->opaque; |
325 | ||
326 | if (s->has_size) { | |
327 | return -ENOTSUP; | |
328 | } | |
329 | ||
330 | if (INT64_MAX - offset < s->offset) { | |
331 | return -EINVAL; | |
332 | } | |
333 | ||
334 | s->size = offset; | |
335 | offset += s->offset; | |
9a4f4c31 | 336 | return bdrv_truncate(bs->file->bs, offset); |
e1c66c6d LE |
337 | } |
338 | ||
7a6d3fc5 | 339 | static int raw_media_changed(BlockDriverState *bs) |
e1c66c6d | 340 | { |
9a4f4c31 | 341 | return bdrv_media_changed(bs->file->bs); |
e1c66c6d LE |
342 | } |
343 | ||
7a6d3fc5 | 344 | static void raw_eject(BlockDriverState *bs, bool eject_flag) |
e1c66c6d | 345 | { |
9a4f4c31 | 346 | bdrv_eject(bs->file->bs, eject_flag); |
e1c66c6d LE |
347 | } |
348 | ||
7a6d3fc5 | 349 | static void raw_lock_medium(BlockDriverState *bs, bool locked) |
e1c66c6d | 350 | { |
9a4f4c31 | 351 | bdrv_lock_medium(bs->file->bs, locked); |
e1c66c6d LE |
352 | } |
353 | ||
151a2930 | 354 | static int raw_co_ioctl(BlockDriverState *bs, unsigned long int req, void *buf) |
e1c66c6d | 355 | { |
2fdc7045 TG |
356 | BDRVRawState *s = bs->opaque; |
357 | if (s->offset || s->has_size) { | |
358 | return -ENOTSUP; | |
359 | } | |
151a2930 | 360 | return bdrv_co_ioctl(bs->file->bs, req, buf); |
e1c66c6d LE |
361 | } |
362 | ||
7a6d3fc5 | 363 | static int raw_has_zero_init(BlockDriverState *bs) |
e1c66c6d | 364 | { |
9a4f4c31 | 365 | return bdrv_has_zero_init(bs->file->bs); |
e1c66c6d LE |
366 | } |
367 | ||
cd3a4cf6 | 368 | static int raw_create(const char *filename, QemuOpts *opts, Error **errp) |
1565262c | 369 | { |
9be38598 | 370 | return bdrv_create_file(filename, opts, errp); |
1565262c | 371 | } |
01dd96d8 | 372 | |
015a1036 HR |
373 | static int raw_open(BlockDriverState *bs, QDict *options, int flags, |
374 | Error **errp) | |
01dd96d8 | 375 | { |
2fdc7045 TG |
376 | BDRVRawState *s = bs->opaque; |
377 | int ret; | |
378 | ||
9a4f4c31 | 379 | bs->sg = bs->file->bs->sg; |
8a39b4d6 EB |
380 | bs->supported_write_flags = BDRV_REQ_FUA & |
381 | bs->file->bs->supported_write_flags; | |
382 | bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & | |
383 | bs->file->bs->supported_zero_flags; | |
38f3ef57 KW |
384 | |
385 | if (bs->probed && !bdrv_is_read_only(bs)) { | |
386 | fprintf(stderr, | |
387 | "WARNING: Image format was not specified for '%s' and probing " | |
388 | "guessed raw.\n" | |
389 | " Automatically detecting the format is dangerous for " | |
390 | "raw images, write operations on block 0 will be restricted.\n" | |
391 | " Specify the 'raw' format explicitly to remove the " | |
392 | "restrictions.\n", | |
9a4f4c31 | 393 | bs->file->bs->filename); |
38f3ef57 KW |
394 | } |
395 | ||
2fdc7045 TG |
396 | ret = raw_read_options(options, bs, s, errp); |
397 | if (ret < 0) { | |
398 | return ret; | |
399 | } | |
400 | ||
401 | if (bs->sg && (s->offset || s->has_size)) { | |
402 | error_setg(errp, "Cannot use offset/size with SCSI generic devices"); | |
403 | return -EINVAL; | |
404 | } | |
405 | ||
01dd96d8 LE |
406 | return 0; |
407 | } | |
408 | ||
7a6d3fc5 | 409 | static void raw_close(BlockDriverState *bs) |
01dd96d8 LE |
410 | { |
411 | } | |
412 | ||
7a6d3fc5 | 413 | static int raw_probe(const uint8_t *buf, int buf_size, const char *filename) |
01dd96d8 LE |
414 | { |
415 | /* smallest possible positive score so that raw is used if and only if no | |
416 | * other block driver works | |
417 | */ | |
418 | return 1; | |
419 | } | |
775d6afd | 420 | |
1a9335e4 ET |
421 | static int raw_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) |
422 | { | |
2fdc7045 TG |
423 | BDRVRawState *s = bs->opaque; |
424 | int ret; | |
425 | ||
426 | ret = bdrv_probe_blocksizes(bs->file->bs, bsz); | |
427 | if (ret < 0) { | |
428 | return ret; | |
429 | } | |
430 | ||
431 | if (!QEMU_IS_ALIGNED(s->offset, MAX(bsz->log, bsz->phys))) { | |
432 | return -ENOTSUP; | |
433 | } | |
434 | ||
435 | return 0; | |
1a9335e4 ET |
436 | } |
437 | ||
438 | static int raw_probe_geometry(BlockDriverState *bs, HDGeometry *geo) | |
439 | { | |
2fdc7045 TG |
440 | BDRVRawState *s = bs->opaque; |
441 | if (s->offset || s->has_size) { | |
442 | return -ENOTSUP; | |
443 | } | |
9a4f4c31 | 444 | return bdrv_probe_geometry(bs->file->bs, geo); |
1a9335e4 ET |
445 | } |
446 | ||
5f535a94 | 447 | BlockDriver bdrv_raw = { |
775d6afd | 448 | .format_name = "raw", |
2fdc7045 | 449 | .instance_size = sizeof(BDRVRawState), |
775d6afd LE |
450 | .bdrv_probe = &raw_probe, |
451 | .bdrv_reopen_prepare = &raw_reopen_prepare, | |
2fdc7045 TG |
452 | .bdrv_reopen_commit = &raw_reopen_commit, |
453 | .bdrv_reopen_abort = &raw_reopen_abort, | |
775d6afd LE |
454 | .bdrv_open = &raw_open, |
455 | .bdrv_close = &raw_close, | |
c282e1fd | 456 | .bdrv_create = &raw_create, |
decaeed7 EB |
457 | .bdrv_co_preadv = &raw_co_preadv, |
458 | .bdrv_co_pwritev = &raw_co_pwritev, | |
39ad937e | 459 | .bdrv_co_pwrite_zeroes = &raw_co_pwrite_zeroes, |
5f61ad07 | 460 | .bdrv_co_pdiscard = &raw_co_pdiscard, |
b6b8a333 | 461 | .bdrv_co_get_block_status = &raw_co_get_block_status, |
775d6afd LE |
462 | .bdrv_truncate = &raw_truncate, |
463 | .bdrv_getlength = &raw_getlength, | |
b94a2610 | 464 | .has_variable_length = true, |
775d6afd | 465 | .bdrv_get_info = &raw_get_info, |
decaeed7 | 466 | .bdrv_refresh_limits = &raw_refresh_limits, |
1a9335e4 ET |
467 | .bdrv_probe_blocksizes = &raw_probe_blocksizes, |
468 | .bdrv_probe_geometry = &raw_probe_geometry, | |
775d6afd LE |
469 | .bdrv_media_changed = &raw_media_changed, |
470 | .bdrv_eject = &raw_eject, | |
471 | .bdrv_lock_medium = &raw_lock_medium, | |
151a2930 | 472 | .bdrv_co_ioctl = &raw_co_ioctl, |
cd3a4cf6 | 473 | .create_opts = &raw_create_opts, |
775d6afd LE |
474 | .bdrv_has_zero_init = &raw_has_zero_init |
475 | }; | |
476 | ||
477 | static void bdrv_raw_init(void) | |
478 | { | |
479 | bdrv_register(&bdrv_raw); | |
480 | } | |
481 | ||
482 | block_init(bdrv_raw_init); |