]> git.proxmox.com Git - mirror_qemu.git/blob - block/vpc.c
block: Convert bdrv_pread(v) to BdrvChild
[mirror_qemu.git] / block / vpc.c
1 /*
2 * Block driver for Connectix / Microsoft Virtual PC images
3 *
4 * Copyright (c) 2005 Alex Beregszaszi
5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25 #include "qemu/osdep.h"
26 #include "qapi/error.h"
27 #include "qemu-common.h"
28 #include "block/block_int.h"
29 #include "sysemu/block-backend.h"
30 #include "qemu/module.h"
31 #include "migration/migration.h"
32 #include "qemu/bswap.h"
33 #if defined(CONFIG_UUID)
34 #include <uuid/uuid.h>
35 #endif
36
37 /**************************************************************/
38
39 #define HEADER_SIZE 512
40
41 //#define CACHE
42
43 enum vhd_type {
44 VHD_FIXED = 2,
45 VHD_DYNAMIC = 3,
46 VHD_DIFFERENCING = 4,
47 };
48
49 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
50 #define VHD_TIMESTAMP_BASE 946684800
51
52 #define VHD_CHS_MAX_C 65535LL
53 #define VHD_CHS_MAX_H 16
54 #define VHD_CHS_MAX_S 255
55
56 #define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
57 #define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
58
59 #define VPC_OPT_FORCE_SIZE "force_size"
60
61 /* always big-endian */
62 typedef struct vhd_footer {
63 char creator[8]; /* "conectix" */
64 uint32_t features;
65 uint32_t version;
66
67 /* Offset of next header structure, 0xFFFFFFFF if none */
68 uint64_t data_offset;
69
70 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
71 uint32_t timestamp;
72
73 char creator_app[4]; /* e.g., "vpc " */
74 uint16_t major;
75 uint16_t minor;
76 char creator_os[4]; /* "Wi2k" */
77
78 uint64_t orig_size;
79 uint64_t current_size;
80
81 uint16_t cyls;
82 uint8_t heads;
83 uint8_t secs_per_cyl;
84
85 uint32_t type;
86
87 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
88 the bytes in the footer without the checksum field") */
89 uint32_t checksum;
90
91 /* UUID used to identify a parent hard disk (backing file) */
92 uint8_t uuid[16];
93
94 uint8_t in_saved_state;
95 } QEMU_PACKED VHDFooter;
96
97 typedef struct vhd_dyndisk_header {
98 char magic[8]; /* "cxsparse" */
99
100 /* Offset of next header structure, 0xFFFFFFFF if none */
101 uint64_t data_offset;
102
103 /* Offset of the Block Allocation Table (BAT) */
104 uint64_t table_offset;
105
106 uint32_t version;
107 uint32_t max_table_entries; /* 32bit/entry */
108
109 /* 2 MB by default, must be a power of two */
110 uint32_t block_size;
111
112 uint32_t checksum;
113 uint8_t parent_uuid[16];
114 uint32_t parent_timestamp;
115 uint32_t reserved;
116
117 /* Backing file name (in UTF-16) */
118 uint8_t parent_name[512];
119
120 struct {
121 uint32_t platform;
122 uint32_t data_space;
123 uint32_t data_length;
124 uint32_t reserved;
125 uint64_t data_offset;
126 } parent_locator[8];
127 } QEMU_PACKED VHDDynDiskHeader;
128
129 typedef struct BDRVVPCState {
130 CoMutex lock;
131 uint8_t footer_buf[HEADER_SIZE];
132 uint64_t free_data_block_offset;
133 int max_table_entries;
134 uint32_t *pagetable;
135 uint64_t bat_offset;
136 uint64_t last_bitmap_offset;
137
138 uint32_t block_size;
139 uint32_t bitmap_size;
140 bool force_use_chs;
141 bool force_use_sz;
142
143 #ifdef CACHE
144 uint8_t *pageentry_u8;
145 uint32_t *pageentry_u32;
146 uint16_t *pageentry_u16;
147
148 uint64_t last_bitmap;
149 #endif
150
151 Error *migration_blocker;
152 } BDRVVPCState;
153
154 #define VPC_OPT_SIZE_CALC "force_size_calc"
155 static QemuOptsList vpc_runtime_opts = {
156 .name = "vpc-runtime-opts",
157 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
158 .desc = {
159 {
160 .name = VPC_OPT_SIZE_CALC,
161 .type = QEMU_OPT_STRING,
162 .help = "Force disk size calculation to use either CHS geometry, "
163 "or use the disk current_size specified in the VHD footer. "
164 "{chs, current_size}"
165 },
166 { /* end of list */ }
167 }
168 };
169
170 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
171 {
172 uint32_t res = 0;
173 int i;
174
175 for (i = 0; i < size; i++)
176 res += buf[i];
177
178 return ~res;
179 }
180
181
182 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
183 {
184 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
185 return 100;
186 return 0;
187 }
188
189 static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
190 Error **errp)
191 {
192 BDRVVPCState *s = bs->opaque;
193 const char *size_calc;
194
195 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
196
197 if (!size_calc) {
198 /* no override, use autodetect only */
199 } else if (!strcmp(size_calc, "current_size")) {
200 s->force_use_sz = true;
201 } else if (!strcmp(size_calc, "chs")) {
202 s->force_use_chs = true;
203 } else {
204 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
205 }
206 }
207
208 static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
209 Error **errp)
210 {
211 BDRVVPCState *s = bs->opaque;
212 int i;
213 VHDFooter *footer;
214 VHDDynDiskHeader *dyndisk_header;
215 QemuOpts *opts = NULL;
216 Error *local_err = NULL;
217 bool use_chs;
218 uint8_t buf[HEADER_SIZE];
219 uint32_t checksum;
220 uint64_t computed_size;
221 uint64_t pagetable_size;
222 int disk_type = VHD_DYNAMIC;
223 int ret;
224
225 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
226 qemu_opts_absorb_qdict(opts, options, &local_err);
227 if (local_err) {
228 error_propagate(errp, local_err);
229 ret = -EINVAL;
230 goto fail;
231 }
232
233 vpc_parse_options(bs, opts, &local_err);
234 if (local_err) {
235 error_propagate(errp, local_err);
236 ret = -EINVAL;
237 goto fail;
238 }
239
240 ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
241 if (ret < 0) {
242 error_setg(errp, "Unable to read VHD header");
243 goto fail;
244 }
245
246 footer = (VHDFooter *) s->footer_buf;
247 if (strncmp(footer->creator, "conectix", 8)) {
248 int64_t offset = bdrv_getlength(bs->file->bs);
249 if (offset < 0) {
250 ret = offset;
251 error_setg(errp, "Invalid file size");
252 goto fail;
253 } else if (offset < HEADER_SIZE) {
254 ret = -EINVAL;
255 error_setg(errp, "File too small for a VHD header");
256 goto fail;
257 }
258
259 /* If a fixed disk, the footer is found only at the end of the file */
260 ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
261 HEADER_SIZE);
262 if (ret < 0) {
263 goto fail;
264 }
265 if (strncmp(footer->creator, "conectix", 8)) {
266 error_setg(errp, "invalid VPC image");
267 ret = -EINVAL;
268 goto fail;
269 }
270 disk_type = VHD_FIXED;
271 }
272
273 checksum = be32_to_cpu(footer->checksum);
274 footer->checksum = 0;
275 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
276 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
277 "incorrect.\n", bs->filename);
278
279 /* Write 'checksum' back to footer, or else will leave it with zero. */
280 footer->checksum = cpu_to_be32(checksum);
281
282 /* The visible size of a image in Virtual PC depends on the geometry
283 rather than on the size stored in the footer (the size in the footer
284 is too large usually) */
285 bs->total_sectors = (int64_t)
286 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
287
288 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
289 * VHD image sizes differently. VPC will rely on CHS geometry,
290 * while Hyper-V and disk2vhd use the size specified in the footer.
291 *
292 * We use a couple of approaches to try and determine the correct method:
293 * look at the Creator App field, and look for images that have CHS
294 * geometry that is the maximum value.
295 *
296 * If the CHS geometry is the maximum CHS geometry, then we assume that
297 * the size is the footer->current_size to avoid truncation. Otherwise,
298 * we follow the table based on footer->creator_app:
299 *
300 * Known creator apps:
301 * 'vpc ' : CHS Virtual PC (uses disk geometry)
302 * 'qemu' : CHS QEMU (uses disk geometry)
303 * 'qem2' : current_size QEMU (uses current_size)
304 * 'win ' : current_size Hyper-V
305 * 'd2v ' : current_size Disk2vhd
306 * 'tap\0' : current_size XenServer
307 * 'CTXS' : current_size XenConverter
308 *
309 * The user can override the table values via drive options, however
310 * even with an override we will still use current_size for images
311 * that have CHS geometry of the maximum size.
312 */
313 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
314 !!strncmp(footer->creator_app, "qem2", 4) &&
315 !!strncmp(footer->creator_app, "d2v ", 4) &&
316 !!strncmp(footer->creator_app, "CTXS", 4) &&
317 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
318
319 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
320 bs->total_sectors = be64_to_cpu(footer->current_size) /
321 BDRV_SECTOR_SIZE;
322 }
323
324 /* Allow a maximum disk size of 2040 GiB */
325 if (bs->total_sectors > VHD_MAX_SECTORS) {
326 ret = -EFBIG;
327 goto fail;
328 }
329
330 if (disk_type == VHD_DYNAMIC) {
331 ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
332 HEADER_SIZE);
333 if (ret < 0) {
334 error_setg(errp, "Error reading dynamic VHD header");
335 goto fail;
336 }
337
338 dyndisk_header = (VHDDynDiskHeader *) buf;
339
340 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
341 error_setg(errp, "Invalid header magic");
342 ret = -EINVAL;
343 goto fail;
344 }
345
346 s->block_size = be32_to_cpu(dyndisk_header->block_size);
347 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
348 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
349 ret = -EINVAL;
350 goto fail;
351 }
352 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
353
354 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
355
356 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
357 error_setg(errp, "Too many blocks");
358 ret = -EINVAL;
359 goto fail;
360 }
361
362 computed_size = (uint64_t) s->max_table_entries * s->block_size;
363 if (computed_size < bs->total_sectors * 512) {
364 error_setg(errp, "Page table too small");
365 ret = -EINVAL;
366 goto fail;
367 }
368
369 if (s->max_table_entries > SIZE_MAX / 4 ||
370 s->max_table_entries > (int) INT_MAX / 4) {
371 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
372 s->max_table_entries);
373 ret = -EINVAL;
374 goto fail;
375 }
376
377 pagetable_size = (uint64_t) s->max_table_entries * 4;
378
379 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
380 if (s->pagetable == NULL) {
381 error_setg(errp, "Unable to allocate memory for page table");
382 ret = -ENOMEM;
383 goto fail;
384 }
385
386 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
387
388 ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
389 pagetable_size);
390 if (ret < 0) {
391 error_setg(errp, "Error reading pagetable");
392 goto fail;
393 }
394
395 s->free_data_block_offset =
396 ROUND_UP(s->bat_offset + pagetable_size, 512);
397
398 for (i = 0; i < s->max_table_entries; i++) {
399 be32_to_cpus(&s->pagetable[i]);
400 if (s->pagetable[i] != 0xFFFFFFFF) {
401 int64_t next = (512 * (int64_t) s->pagetable[i]) +
402 s->bitmap_size + s->block_size;
403
404 if (next > s->free_data_block_offset) {
405 s->free_data_block_offset = next;
406 }
407 }
408 }
409
410 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
411 error_setg(errp, "block-vpc: free_data_block_offset points after "
412 "the end of file. The image has been truncated.");
413 ret = -EINVAL;
414 goto fail;
415 }
416
417 s->last_bitmap_offset = (int64_t) -1;
418
419 #ifdef CACHE
420 s->pageentry_u8 = g_malloc(512);
421 s->pageentry_u32 = s->pageentry_u8;
422 s->pageentry_u16 = s->pageentry_u8;
423 s->last_pagetable = -1;
424 #endif
425 }
426
427 qemu_co_mutex_init(&s->lock);
428
429 /* Disable migration when VHD images are used */
430 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
431 "does not support live migration",
432 bdrv_get_device_or_node_name(bs));
433 migrate_add_blocker(s->migration_blocker);
434
435 return 0;
436
437 fail:
438 qemu_vfree(s->pagetable);
439 #ifdef CACHE
440 g_free(s->pageentry_u8);
441 #endif
442 return ret;
443 }
444
445 static int vpc_reopen_prepare(BDRVReopenState *state,
446 BlockReopenQueue *queue, Error **errp)
447 {
448 return 0;
449 }
450
451 /*
452 * Returns the absolute byte offset of the given sector in the image file.
453 * If the sector is not allocated, -1 is returned instead.
454 *
455 * The parameter write must be 1 if the offset will be used for a write
456 * operation (the block bitmaps is updated then), 0 otherwise.
457 */
458 static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
459 bool write)
460 {
461 BDRVVPCState *s = bs->opaque;
462 uint64_t bitmap_offset, block_offset;
463 uint32_t pagetable_index, offset_in_block;
464
465 pagetable_index = offset / s->block_size;
466 offset_in_block = offset % s->block_size;
467
468 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
469 return -1; /* not allocated */
470
471 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
472 block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
473
474 /* We must ensure that we don't write to any sectors which are marked as
475 unused in the bitmap. We get away with setting all bits in the block
476 bitmap each time we write to a new block. This might cause Virtual PC to
477 miss sparse read optimization, but it's not a problem in terms of
478 correctness. */
479 if (write && (s->last_bitmap_offset != bitmap_offset)) {
480 uint8_t bitmap[s->bitmap_size];
481
482 s->last_bitmap_offset = bitmap_offset;
483 memset(bitmap, 0xff, s->bitmap_size);
484 bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
485 }
486
487 return block_offset;
488 }
489
490 static inline int64_t get_sector_offset(BlockDriverState *bs,
491 int64_t sector_num, bool write)
492 {
493 return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
494 }
495
496 /*
497 * Writes the footer to the end of the image file. This is needed when the
498 * file grows as it overwrites the old footer
499 *
500 * Returns 0 on success and < 0 on error
501 */
502 static int rewrite_footer(BlockDriverState* bs)
503 {
504 int ret;
505 BDRVVPCState *s = bs->opaque;
506 int64_t offset = s->free_data_block_offset;
507
508 ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
509 if (ret < 0)
510 return ret;
511
512 return 0;
513 }
514
515 /*
516 * Allocates a new block. This involves writing a new footer and updating
517 * the Block Allocation Table to use the space at the old end of the image
518 * file (overwriting the old footer)
519 *
520 * Returns the sectors' offset in the image file on success and < 0 on error
521 */
522 static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
523 {
524 BDRVVPCState *s = bs->opaque;
525 int64_t bat_offset;
526 uint32_t index, bat_value;
527 int ret;
528 uint8_t bitmap[s->bitmap_size];
529
530 /* Check if sector_num is valid */
531 if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
532 return -EINVAL;
533 }
534
535 /* Write entry into in-memory BAT */
536 index = offset / s->block_size;
537 assert(s->pagetable[index] == 0xFFFFFFFF);
538 s->pagetable[index] = s->free_data_block_offset / 512;
539
540 /* Initialize the block's bitmap */
541 memset(bitmap, 0xff, s->bitmap_size);
542 ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
543 s->bitmap_size);
544 if (ret < 0) {
545 return ret;
546 }
547
548 /* Write new footer (the old one will be overwritten) */
549 s->free_data_block_offset += s->block_size + s->bitmap_size;
550 ret = rewrite_footer(bs);
551 if (ret < 0)
552 goto fail;
553
554 /* Write BAT entry to disk */
555 bat_offset = s->bat_offset + (4 * index);
556 bat_value = cpu_to_be32(s->pagetable[index]);
557 ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
558 if (ret < 0)
559 goto fail;
560
561 return get_image_offset(bs, offset, false);
562
563 fail:
564 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
565 return ret;
566 }
567
568 static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
569 {
570 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
571 VHDFooter *footer = (VHDFooter *) s->footer_buf;
572
573 if (be32_to_cpu(footer->type) != VHD_FIXED) {
574 bdi->cluster_size = s->block_size;
575 }
576
577 bdi->unallocated_blocks_are_zero = true;
578 return 0;
579 }
580
581 static int coroutine_fn
582 vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
583 QEMUIOVector *qiov, int flags)
584 {
585 BDRVVPCState *s = bs->opaque;
586 int ret;
587 int64_t image_offset;
588 int64_t n_bytes;
589 int64_t bytes_done = 0;
590 VHDFooter *footer = (VHDFooter *) s->footer_buf;
591 QEMUIOVector local_qiov;
592
593 if (be32_to_cpu(footer->type) == VHD_FIXED) {
594 return bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, 0);
595 }
596
597 qemu_co_mutex_lock(&s->lock);
598 qemu_iovec_init(&local_qiov, qiov->niov);
599
600 while (bytes > 0) {
601 image_offset = get_image_offset(bs, offset, false);
602 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
603
604 if (image_offset == -1) {
605 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
606 } else {
607 qemu_iovec_reset(&local_qiov);
608 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
609
610 ret = bdrv_co_preadv(bs->file->bs, image_offset, n_bytes,
611 &local_qiov, 0);
612 if (ret < 0) {
613 goto fail;
614 }
615 }
616
617 bytes -= n_bytes;
618 offset += n_bytes;
619 bytes_done += n_bytes;
620 }
621
622 ret = 0;
623 fail:
624 qemu_iovec_destroy(&local_qiov);
625 qemu_co_mutex_unlock(&s->lock);
626
627 return ret;
628 }
629
630 static int coroutine_fn
631 vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
632 QEMUIOVector *qiov, int flags)
633 {
634 BDRVVPCState *s = bs->opaque;
635 int64_t image_offset;
636 int64_t n_bytes;
637 int64_t bytes_done = 0;
638 int ret;
639 VHDFooter *footer = (VHDFooter *) s->footer_buf;
640 QEMUIOVector local_qiov;
641
642 if (be32_to_cpu(footer->type) == VHD_FIXED) {
643 return bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, 0);
644 }
645
646 qemu_co_mutex_lock(&s->lock);
647 qemu_iovec_init(&local_qiov, qiov->niov);
648
649 while (bytes > 0) {
650 image_offset = get_image_offset(bs, offset, true);
651 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
652
653 if (image_offset == -1) {
654 image_offset = alloc_block(bs, offset);
655 if (image_offset < 0) {
656 ret = image_offset;
657 goto fail;
658 }
659 }
660
661 qemu_iovec_reset(&local_qiov);
662 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
663
664 ret = bdrv_co_pwritev(bs->file->bs, image_offset, n_bytes,
665 &local_qiov, 0);
666 if (ret < 0) {
667 goto fail;
668 }
669
670 bytes -= n_bytes;
671 offset += n_bytes;
672 bytes_done += n_bytes;
673 }
674
675 ret = 0;
676 fail:
677 qemu_iovec_destroy(&local_qiov);
678 qemu_co_mutex_unlock(&s->lock);
679
680 return ret;
681 }
682
683 static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
684 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
685 {
686 BDRVVPCState *s = bs->opaque;
687 VHDFooter *footer = (VHDFooter*) s->footer_buf;
688 int64_t start, offset;
689 bool allocated;
690 int n;
691
692 if (be32_to_cpu(footer->type) == VHD_FIXED) {
693 *pnum = nb_sectors;
694 *file = bs->file->bs;
695 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
696 (sector_num << BDRV_SECTOR_BITS);
697 }
698
699 offset = get_sector_offset(bs, sector_num, 0);
700 start = offset;
701 allocated = (offset != -1);
702 *pnum = 0;
703
704 do {
705 /* All sectors in a block are contiguous (without using the bitmap) */
706 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
707 - sector_num;
708 n = MIN(n, nb_sectors);
709
710 *pnum += n;
711 sector_num += n;
712 nb_sectors -= n;
713 /* *pnum can't be greater than one block for allocated
714 * sectors since there is always a bitmap in between. */
715 if (allocated) {
716 *file = bs->file->bs;
717 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
718 }
719 if (nb_sectors == 0) {
720 break;
721 }
722 offset = get_sector_offset(bs, sector_num, 0);
723 } while (offset == -1);
724
725 return 0;
726 }
727
728 /*
729 * Calculates the number of cylinders, heads and sectors per cylinder
730 * based on a given number of sectors. This is the algorithm described
731 * in the VHD specification.
732 *
733 * Note that the geometry doesn't always exactly match total_sectors but
734 * may round it down.
735 *
736 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
737 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
738 * and instead allow up to 255 heads.
739 */
740 static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
741 uint8_t* heads, uint8_t* secs_per_cyl)
742 {
743 uint32_t cyls_times_heads;
744
745 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
746
747 if (total_sectors >= 65535LL * 16 * 63) {
748 *secs_per_cyl = 255;
749 *heads = 16;
750 cyls_times_heads = total_sectors / *secs_per_cyl;
751 } else {
752 *secs_per_cyl = 17;
753 cyls_times_heads = total_sectors / *secs_per_cyl;
754 *heads = (cyls_times_heads + 1023) / 1024;
755
756 if (*heads < 4) {
757 *heads = 4;
758 }
759
760 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
761 *secs_per_cyl = 31;
762 *heads = 16;
763 cyls_times_heads = total_sectors / *secs_per_cyl;
764 }
765
766 if (cyls_times_heads >= (*heads * 1024)) {
767 *secs_per_cyl = 63;
768 *heads = 16;
769 cyls_times_heads = total_sectors / *secs_per_cyl;
770 }
771 }
772
773 *cyls = cyls_times_heads / *heads;
774
775 return 0;
776 }
777
778 static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
779 int64_t total_sectors)
780 {
781 VHDDynDiskHeader *dyndisk_header =
782 (VHDDynDiskHeader *) buf;
783 size_t block_size, num_bat_entries;
784 int i;
785 int ret;
786 int64_t offset = 0;
787
788 /* Write the footer (twice: at the beginning and at the end) */
789 block_size = 0x200000;
790 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
791
792 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
793 if (ret < 0) {
794 goto fail;
795 }
796
797 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
798 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
799 if (ret < 0) {
800 goto fail;
801 }
802
803 /* Write the initial BAT */
804 offset = 3 * 512;
805
806 memset(buf, 0xFF, 512);
807 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
808 ret = blk_pwrite(blk, offset, buf, 512, 0);
809 if (ret < 0) {
810 goto fail;
811 }
812 offset += 512;
813 }
814
815 /* Prepare the Dynamic Disk Header */
816 memset(buf, 0, 1024);
817
818 memcpy(dyndisk_header->magic, "cxsparse", 8);
819
820 /*
821 * Note: The spec is actually wrong here for data_offset, it says
822 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
823 */
824 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
825 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
826 dyndisk_header->version = cpu_to_be32(0x00010000);
827 dyndisk_header->block_size = cpu_to_be32(block_size);
828 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
829
830 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
831
832 /* Write the header */
833 offset = 512;
834
835 ret = blk_pwrite(blk, offset, buf, 1024, 0);
836 if (ret < 0) {
837 goto fail;
838 }
839
840 fail:
841 return ret;
842 }
843
844 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
845 int64_t total_size)
846 {
847 int ret;
848
849 /* Add footer to total size */
850 total_size += HEADER_SIZE;
851
852 ret = blk_truncate(blk, total_size);
853 if (ret < 0) {
854 return ret;
855 }
856
857 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
858 if (ret < 0) {
859 return ret;
860 }
861
862 return ret;
863 }
864
865 static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
866 {
867 uint8_t buf[1024];
868 VHDFooter *footer = (VHDFooter *) buf;
869 char *disk_type_param;
870 int i;
871 uint16_t cyls = 0;
872 uint8_t heads = 0;
873 uint8_t secs_per_cyl = 0;
874 int64_t total_sectors;
875 int64_t total_size;
876 int disk_type;
877 int ret = -EIO;
878 bool force_size;
879 Error *local_err = NULL;
880 BlockBackend *blk = NULL;
881
882 /* Read out options */
883 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
884 BDRV_SECTOR_SIZE);
885 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
886 if (disk_type_param) {
887 if (!strcmp(disk_type_param, "dynamic")) {
888 disk_type = VHD_DYNAMIC;
889 } else if (!strcmp(disk_type_param, "fixed")) {
890 disk_type = VHD_FIXED;
891 } else {
892 error_setg(errp, "Invalid disk type, %s", disk_type_param);
893 ret = -EINVAL;
894 goto out;
895 }
896 } else {
897 disk_type = VHD_DYNAMIC;
898 }
899
900 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
901
902 ret = bdrv_create_file(filename, opts, &local_err);
903 if (ret < 0) {
904 error_propagate(errp, local_err);
905 goto out;
906 }
907
908 blk = blk_new_open(filename, NULL, NULL,
909 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
910 if (blk == NULL) {
911 error_propagate(errp, local_err);
912 ret = -EIO;
913 goto out;
914 }
915
916 blk_set_allow_write_beyond_eof(blk, true);
917
918 /*
919 * Calculate matching total_size and geometry. Increase the number of
920 * sectors requested until we get enough (or fail). This ensures that
921 * qemu-img convert doesn't truncate images, but rather rounds up.
922 *
923 * If the image size can't be represented by a spec conformant CHS geometry,
924 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
925 * the image size from the VHD footer to calculate total_sectors.
926 */
927 if (force_size) {
928 /* This will force the use of total_size for sector count, below */
929 cyls = VHD_CHS_MAX_C;
930 heads = VHD_CHS_MAX_H;
931 secs_per_cyl = VHD_CHS_MAX_S;
932 } else {
933 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
934 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
935 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
936 }
937 }
938
939 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
940 total_sectors = total_size / BDRV_SECTOR_SIZE;
941 /* Allow a maximum disk size of 2040 GiB */
942 if (total_sectors > VHD_MAX_SECTORS) {
943 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
944 ret = -EFBIG;
945 goto out;
946 }
947 } else {
948 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
949 total_size = total_sectors * BDRV_SECTOR_SIZE;
950 }
951
952 /* Prepare the Hard Disk Footer */
953 memset(buf, 0, 1024);
954
955 memcpy(footer->creator, "conectix", 8);
956 if (force_size) {
957 memcpy(footer->creator_app, "qem2", 4);
958 } else {
959 memcpy(footer->creator_app, "qemu", 4);
960 }
961 memcpy(footer->creator_os, "Wi2k", 4);
962
963 footer->features = cpu_to_be32(0x02);
964 footer->version = cpu_to_be32(0x00010000);
965 if (disk_type == VHD_DYNAMIC) {
966 footer->data_offset = cpu_to_be64(HEADER_SIZE);
967 } else {
968 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
969 }
970 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
971
972 /* Version of Virtual PC 2007 */
973 footer->major = cpu_to_be16(0x0005);
974 footer->minor = cpu_to_be16(0x0003);
975 footer->orig_size = cpu_to_be64(total_size);
976 footer->current_size = cpu_to_be64(total_size);
977 footer->cyls = cpu_to_be16(cyls);
978 footer->heads = heads;
979 footer->secs_per_cyl = secs_per_cyl;
980
981 footer->type = cpu_to_be32(disk_type);
982
983 #if defined(CONFIG_UUID)
984 uuid_generate(footer->uuid);
985 #endif
986
987 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
988
989 if (disk_type == VHD_DYNAMIC) {
990 ret = create_dynamic_disk(blk, buf, total_sectors);
991 } else {
992 ret = create_fixed_disk(blk, buf, total_size);
993 }
994 if (ret < 0) {
995 error_setg(errp, "Unable to create or write VHD header");
996 }
997
998 out:
999 blk_unref(blk);
1000 g_free(disk_type_param);
1001 return ret;
1002 }
1003
1004 static int vpc_has_zero_init(BlockDriverState *bs)
1005 {
1006 BDRVVPCState *s = bs->opaque;
1007 VHDFooter *footer = (VHDFooter *) s->footer_buf;
1008
1009 if (be32_to_cpu(footer->type) == VHD_FIXED) {
1010 return bdrv_has_zero_init(bs->file->bs);
1011 } else {
1012 return 1;
1013 }
1014 }
1015
1016 static void vpc_close(BlockDriverState *bs)
1017 {
1018 BDRVVPCState *s = bs->opaque;
1019 qemu_vfree(s->pagetable);
1020 #ifdef CACHE
1021 g_free(s->pageentry_u8);
1022 #endif
1023
1024 migrate_del_blocker(s->migration_blocker);
1025 error_free(s->migration_blocker);
1026 }
1027
1028 static QemuOptsList vpc_create_opts = {
1029 .name = "vpc-create-opts",
1030 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1031 .desc = {
1032 {
1033 .name = BLOCK_OPT_SIZE,
1034 .type = QEMU_OPT_SIZE,
1035 .help = "Virtual disk size"
1036 },
1037 {
1038 .name = BLOCK_OPT_SUBFMT,
1039 .type = QEMU_OPT_STRING,
1040 .help =
1041 "Type of virtual hard disk format. Supported formats are "
1042 "{dynamic (default) | fixed} "
1043 },
1044 {
1045 .name = VPC_OPT_FORCE_SIZE,
1046 .type = QEMU_OPT_BOOL,
1047 .help = "Force disk size calculation to use the actual size "
1048 "specified, rather than using the nearest CHS-based "
1049 "calculation"
1050 },
1051 { /* end of list */ }
1052 }
1053 };
1054
1055 static BlockDriver bdrv_vpc = {
1056 .format_name = "vpc",
1057 .instance_size = sizeof(BDRVVPCState),
1058
1059 .bdrv_probe = vpc_probe,
1060 .bdrv_open = vpc_open,
1061 .bdrv_close = vpc_close,
1062 .bdrv_reopen_prepare = vpc_reopen_prepare,
1063 .bdrv_create = vpc_create,
1064
1065 .bdrv_co_preadv = vpc_co_preadv,
1066 .bdrv_co_pwritev = vpc_co_pwritev,
1067 .bdrv_co_get_block_status = vpc_co_get_block_status,
1068
1069 .bdrv_get_info = vpc_get_info,
1070
1071 .create_opts = &vpc_create_opts,
1072 .bdrv_has_zero_init = vpc_has_zero_init,
1073 };
1074
1075 static void bdrv_vpc_init(void)
1076 {
1077 bdrv_register(&bdrv_vpc);
1078 }
1079
1080 block_init(bdrv_vpc_init);