]> git.proxmox.com Git - mirror_qemu.git/blob - block/vpc.c
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging
[mirror_qemu.git] / block / vpc.c
1 /*
2 * Block driver for Connectix / Microsoft Virtual PC images
3 *
4 * Copyright (c) 2005 Alex Beregszaszi
5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25 #include "qemu/osdep.h"
26 #include "qapi/error.h"
27 #include "qemu-common.h"
28 #include "block/block_int.h"
29 #include "sysemu/block-backend.h"
30 #include "qemu/module.h"
31 #include "migration/migration.h"
32 #if defined(CONFIG_UUID)
33 #include <uuid/uuid.h>
34 #endif
35
36 /**************************************************************/
37
38 #define HEADER_SIZE 512
39
40 //#define CACHE
41
42 enum vhd_type {
43 VHD_FIXED = 2,
44 VHD_DYNAMIC = 3,
45 VHD_DIFFERENCING = 4,
46 };
47
48 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
49 #define VHD_TIMESTAMP_BASE 946684800
50
51 #define VHD_CHS_MAX_C 65535LL
52 #define VHD_CHS_MAX_H 16
53 #define VHD_CHS_MAX_S 255
54
55 #define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
56 #define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
57
58 #define VPC_OPT_FORCE_SIZE "force_size"
59
60 /* always big-endian */
61 typedef struct vhd_footer {
62 char creator[8]; /* "conectix" */
63 uint32_t features;
64 uint32_t version;
65
66 /* Offset of next header structure, 0xFFFFFFFF if none */
67 uint64_t data_offset;
68
69 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
70 uint32_t timestamp;
71
72 char creator_app[4]; /* e.g., "vpc " */
73 uint16_t major;
74 uint16_t minor;
75 char creator_os[4]; /* "Wi2k" */
76
77 uint64_t orig_size;
78 uint64_t current_size;
79
80 uint16_t cyls;
81 uint8_t heads;
82 uint8_t secs_per_cyl;
83
84 uint32_t type;
85
86 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
87 the bytes in the footer without the checksum field") */
88 uint32_t checksum;
89
90 /* UUID used to identify a parent hard disk (backing file) */
91 uint8_t uuid[16];
92
93 uint8_t in_saved_state;
94 } QEMU_PACKED VHDFooter;
95
96 typedef struct vhd_dyndisk_header {
97 char magic[8]; /* "cxsparse" */
98
99 /* Offset of next header structure, 0xFFFFFFFF if none */
100 uint64_t data_offset;
101
102 /* Offset of the Block Allocation Table (BAT) */
103 uint64_t table_offset;
104
105 uint32_t version;
106 uint32_t max_table_entries; /* 32bit/entry */
107
108 /* 2 MB by default, must be a power of two */
109 uint32_t block_size;
110
111 uint32_t checksum;
112 uint8_t parent_uuid[16];
113 uint32_t parent_timestamp;
114 uint32_t reserved;
115
116 /* Backing file name (in UTF-16) */
117 uint8_t parent_name[512];
118
119 struct {
120 uint32_t platform;
121 uint32_t data_space;
122 uint32_t data_length;
123 uint32_t reserved;
124 uint64_t data_offset;
125 } parent_locator[8];
126 } QEMU_PACKED VHDDynDiskHeader;
127
128 typedef struct BDRVVPCState {
129 CoMutex lock;
130 uint8_t footer_buf[HEADER_SIZE];
131 uint64_t free_data_block_offset;
132 int max_table_entries;
133 uint32_t *pagetable;
134 uint64_t bat_offset;
135 uint64_t last_bitmap_offset;
136
137 uint32_t block_size;
138 uint32_t bitmap_size;
139 bool force_use_chs;
140 bool force_use_sz;
141
142 #ifdef CACHE
143 uint8_t *pageentry_u8;
144 uint32_t *pageentry_u32;
145 uint16_t *pageentry_u16;
146
147 uint64_t last_bitmap;
148 #endif
149
150 Error *migration_blocker;
151 } BDRVVPCState;
152
153 #define VPC_OPT_SIZE_CALC "force_size_calc"
154 static QemuOptsList vpc_runtime_opts = {
155 .name = "vpc-runtime-opts",
156 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
157 .desc = {
158 {
159 .name = VPC_OPT_SIZE_CALC,
160 .type = QEMU_OPT_STRING,
161 .help = "Force disk size calculation to use either CHS geometry, "
162 "or use the disk current_size specified in the VHD footer. "
163 "{chs, current_size}"
164 },
165 { /* end of list */ }
166 }
167 };
168
169 static uint32_t vpc_checksum(uint8_t* buf, size_t size)
170 {
171 uint32_t res = 0;
172 int i;
173
174 for (i = 0; i < size; i++)
175 res += buf[i];
176
177 return ~res;
178 }
179
180
181 static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
182 {
183 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
184 return 100;
185 return 0;
186 }
187
188 static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
189 Error **errp)
190 {
191 BDRVVPCState *s = bs->opaque;
192 const char *size_calc;
193
194 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
195
196 if (!size_calc) {
197 /* no override, use autodetect only */
198 } else if (!strcmp(size_calc, "current_size")) {
199 s->force_use_sz = true;
200 } else if (!strcmp(size_calc, "chs")) {
201 s->force_use_chs = true;
202 } else {
203 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
204 }
205 }
206
207 static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
208 Error **errp)
209 {
210 BDRVVPCState *s = bs->opaque;
211 int i;
212 VHDFooter *footer;
213 VHDDynDiskHeader *dyndisk_header;
214 QemuOpts *opts = NULL;
215 Error *local_err = NULL;
216 bool use_chs;
217 uint8_t buf[HEADER_SIZE];
218 uint32_t checksum;
219 uint64_t computed_size;
220 uint64_t pagetable_size;
221 int disk_type = VHD_DYNAMIC;
222 int ret;
223
224 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
225 qemu_opts_absorb_qdict(opts, options, &local_err);
226 if (local_err) {
227 error_propagate(errp, local_err);
228 ret = -EINVAL;
229 goto fail;
230 }
231
232 vpc_parse_options(bs, opts, &local_err);
233 if (local_err) {
234 error_propagate(errp, local_err);
235 ret = -EINVAL;
236 goto fail;
237 }
238
239 ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
240 if (ret < 0) {
241 error_setg(errp, "Unable to read VHD header");
242 goto fail;
243 }
244
245 footer = (VHDFooter *) s->footer_buf;
246 if (strncmp(footer->creator, "conectix", 8)) {
247 int64_t offset = bdrv_getlength(bs->file->bs);
248 if (offset < 0) {
249 ret = offset;
250 error_setg(errp, "Invalid file size");
251 goto fail;
252 } else if (offset < HEADER_SIZE) {
253 ret = -EINVAL;
254 error_setg(errp, "File too small for a VHD header");
255 goto fail;
256 }
257
258 /* If a fixed disk, the footer is found only at the end of the file */
259 ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
260 HEADER_SIZE);
261 if (ret < 0) {
262 goto fail;
263 }
264 if (strncmp(footer->creator, "conectix", 8)) {
265 error_setg(errp, "invalid VPC image");
266 ret = -EINVAL;
267 goto fail;
268 }
269 disk_type = VHD_FIXED;
270 }
271
272 checksum = be32_to_cpu(footer->checksum);
273 footer->checksum = 0;
274 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
275 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
276 "incorrect.\n", bs->filename);
277
278 /* Write 'checksum' back to footer, or else will leave it with zero. */
279 footer->checksum = cpu_to_be32(checksum);
280
281 /* The visible size of a image in Virtual PC depends on the geometry
282 rather than on the size stored in the footer (the size in the footer
283 is too large usually) */
284 bs->total_sectors = (int64_t)
285 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
286
287 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
288 * VHD image sizes differently. VPC will rely on CHS geometry,
289 * while Hyper-V and disk2vhd use the size specified in the footer.
290 *
291 * We use a couple of approaches to try and determine the correct method:
292 * look at the Creator App field, and look for images that have CHS
293 * geometry that is the maximum value.
294 *
295 * If the CHS geometry is the maximum CHS geometry, then we assume that
296 * the size is the footer->current_size to avoid truncation. Otherwise,
297 * we follow the table based on footer->creator_app:
298 *
299 * Known creator apps:
300 * 'vpc ' : CHS Virtual PC (uses disk geometry)
301 * 'qemu' : CHS QEMU (uses disk geometry)
302 * 'qem2' : current_size QEMU (uses current_size)
303 * 'win ' : current_size Hyper-V
304 * 'd2v ' : current_size Disk2vhd
305 * 'tap\0' : current_size XenServer
306 * 'CTXS' : current_size XenConverter
307 *
308 * The user can override the table values via drive options, however
309 * even with an override we will still use current_size for images
310 * that have CHS geometry of the maximum size.
311 */
312 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
313 !!strncmp(footer->creator_app, "qem2", 4) &&
314 !!strncmp(footer->creator_app, "d2v ", 4) &&
315 !!strncmp(footer->creator_app, "CTXS", 4) &&
316 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
317
318 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
319 bs->total_sectors = be64_to_cpu(footer->current_size) /
320 BDRV_SECTOR_SIZE;
321 }
322
323 /* Allow a maximum disk size of 2040 GiB */
324 if (bs->total_sectors > VHD_MAX_SECTORS) {
325 ret = -EFBIG;
326 goto fail;
327 }
328
329 if (disk_type == VHD_DYNAMIC) {
330 ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
331 HEADER_SIZE);
332 if (ret < 0) {
333 error_setg(errp, "Error reading dynamic VHD header");
334 goto fail;
335 }
336
337 dyndisk_header = (VHDDynDiskHeader *) buf;
338
339 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
340 error_setg(errp, "Invalid header magic");
341 ret = -EINVAL;
342 goto fail;
343 }
344
345 s->block_size = be32_to_cpu(dyndisk_header->block_size);
346 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
347 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
348 ret = -EINVAL;
349 goto fail;
350 }
351 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
352
353 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
354
355 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
356 error_setg(errp, "Too many blocks");
357 ret = -EINVAL;
358 goto fail;
359 }
360
361 computed_size = (uint64_t) s->max_table_entries * s->block_size;
362 if (computed_size < bs->total_sectors * 512) {
363 error_setg(errp, "Page table too small");
364 ret = -EINVAL;
365 goto fail;
366 }
367
368 if (s->max_table_entries > SIZE_MAX / 4 ||
369 s->max_table_entries > (int) INT_MAX / 4) {
370 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
371 s->max_table_entries);
372 ret = -EINVAL;
373 goto fail;
374 }
375
376 pagetable_size = (uint64_t) s->max_table_entries * 4;
377
378 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
379 if (s->pagetable == NULL) {
380 error_setg(errp, "Unable to allocate memory for page table");
381 ret = -ENOMEM;
382 goto fail;
383 }
384
385 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
386
387 ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
388 pagetable_size);
389 if (ret < 0) {
390 error_setg(errp, "Error reading pagetable");
391 goto fail;
392 }
393
394 s->free_data_block_offset =
395 ROUND_UP(s->bat_offset + pagetable_size, 512);
396
397 for (i = 0; i < s->max_table_entries; i++) {
398 be32_to_cpus(&s->pagetable[i]);
399 if (s->pagetable[i] != 0xFFFFFFFF) {
400 int64_t next = (512 * (int64_t) s->pagetable[i]) +
401 s->bitmap_size + s->block_size;
402
403 if (next > s->free_data_block_offset) {
404 s->free_data_block_offset = next;
405 }
406 }
407 }
408
409 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
410 error_setg(errp, "block-vpc: free_data_block_offset points after "
411 "the end of file. The image has been truncated.");
412 ret = -EINVAL;
413 goto fail;
414 }
415
416 s->last_bitmap_offset = (int64_t) -1;
417
418 #ifdef CACHE
419 s->pageentry_u8 = g_malloc(512);
420 s->pageentry_u32 = s->pageentry_u8;
421 s->pageentry_u16 = s->pageentry_u8;
422 s->last_pagetable = -1;
423 #endif
424 }
425
426 qemu_co_mutex_init(&s->lock);
427
428 /* Disable migration when VHD images are used */
429 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
430 "does not support live migration",
431 bdrv_get_device_or_node_name(bs));
432 migrate_add_blocker(s->migration_blocker);
433
434 return 0;
435
436 fail:
437 qemu_vfree(s->pagetable);
438 #ifdef CACHE
439 g_free(s->pageentry_u8);
440 #endif
441 return ret;
442 }
443
444 static int vpc_reopen_prepare(BDRVReopenState *state,
445 BlockReopenQueue *queue, Error **errp)
446 {
447 return 0;
448 }
449
450 /*
451 * Returns the absolute byte offset of the given sector in the image file.
452 * If the sector is not allocated, -1 is returned instead.
453 *
454 * The parameter write must be 1 if the offset will be used for a write
455 * operation (the block bitmaps is updated then), 0 otherwise.
456 */
457 static inline int64_t get_sector_offset(BlockDriverState *bs,
458 int64_t sector_num, int write)
459 {
460 BDRVVPCState *s = bs->opaque;
461 uint64_t offset = sector_num * 512;
462 uint64_t bitmap_offset, block_offset;
463 uint32_t pagetable_index, pageentry_index;
464
465 pagetable_index = offset / s->block_size;
466 pageentry_index = (offset % s->block_size) / 512;
467
468 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
469 return -1; /* not allocated */
470
471 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
472 block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
473
474 /* We must ensure that we don't write to any sectors which are marked as
475 unused in the bitmap. We get away with setting all bits in the block
476 bitmap each time we write to a new block. This might cause Virtual PC to
477 miss sparse read optimization, but it's not a problem in terms of
478 correctness. */
479 if (write && (s->last_bitmap_offset != bitmap_offset)) {
480 uint8_t bitmap[s->bitmap_size];
481
482 s->last_bitmap_offset = bitmap_offset;
483 memset(bitmap, 0xff, s->bitmap_size);
484 bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
485 }
486
487 return block_offset;
488 }
489
490 /*
491 * Writes the footer to the end of the image file. This is needed when the
492 * file grows as it overwrites the old footer
493 *
494 * Returns 0 on success and < 0 on error
495 */
496 static int rewrite_footer(BlockDriverState* bs)
497 {
498 int ret;
499 BDRVVPCState *s = bs->opaque;
500 int64_t offset = s->free_data_block_offset;
501
502 ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
503 if (ret < 0)
504 return ret;
505
506 return 0;
507 }
508
509 /*
510 * Allocates a new block. This involves writing a new footer and updating
511 * the Block Allocation Table to use the space at the old end of the image
512 * file (overwriting the old footer)
513 *
514 * Returns the sectors' offset in the image file on success and < 0 on error
515 */
516 static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
517 {
518 BDRVVPCState *s = bs->opaque;
519 int64_t bat_offset;
520 uint32_t index, bat_value;
521 int ret;
522 uint8_t bitmap[s->bitmap_size];
523
524 /* Check if sector_num is valid */
525 if ((sector_num < 0) || (sector_num > bs->total_sectors))
526 return -1;
527
528 /* Write entry into in-memory BAT */
529 index = (sector_num * 512) / s->block_size;
530 if (s->pagetable[index] != 0xFFFFFFFF)
531 return -1;
532
533 s->pagetable[index] = s->free_data_block_offset / 512;
534
535 /* Initialize the block's bitmap */
536 memset(bitmap, 0xff, s->bitmap_size);
537 ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
538 s->bitmap_size);
539 if (ret < 0) {
540 return ret;
541 }
542
543 /* Write new footer (the old one will be overwritten) */
544 s->free_data_block_offset += s->block_size + s->bitmap_size;
545 ret = rewrite_footer(bs);
546 if (ret < 0)
547 goto fail;
548
549 /* Write BAT entry to disk */
550 bat_offset = s->bat_offset + (4 * index);
551 bat_value = cpu_to_be32(s->pagetable[index]);
552 ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
553 if (ret < 0)
554 goto fail;
555
556 return get_sector_offset(bs, sector_num, 0);
557
558 fail:
559 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
560 return -1;
561 }
562
563 static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
564 {
565 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
566 VHDFooter *footer = (VHDFooter *) s->footer_buf;
567
568 if (be32_to_cpu(footer->type) != VHD_FIXED) {
569 bdi->cluster_size = s->block_size;
570 }
571
572 bdi->unallocated_blocks_are_zero = true;
573 return 0;
574 }
575
576 static int vpc_read(BlockDriverState *bs, int64_t sector_num,
577 uint8_t *buf, int nb_sectors)
578 {
579 BDRVVPCState *s = bs->opaque;
580 int ret;
581 int64_t offset;
582 int64_t sectors, sectors_per_block;
583 VHDFooter *footer = (VHDFooter *) s->footer_buf;
584
585 if (be32_to_cpu(footer->type) == VHD_FIXED) {
586 return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
587 }
588 while (nb_sectors > 0) {
589 offset = get_sector_offset(bs, sector_num, 0);
590
591 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
592 sectors = sectors_per_block - (sector_num % sectors_per_block);
593 if (sectors > nb_sectors) {
594 sectors = nb_sectors;
595 }
596
597 if (offset == -1) {
598 memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
599 } else {
600 ret = bdrv_pread(bs->file->bs, offset, buf,
601 sectors * BDRV_SECTOR_SIZE);
602 if (ret != sectors * BDRV_SECTOR_SIZE) {
603 return -1;
604 }
605 }
606
607 nb_sectors -= sectors;
608 sector_num += sectors;
609 buf += sectors * BDRV_SECTOR_SIZE;
610 }
611 return 0;
612 }
613
614 static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
615 uint8_t *buf, int nb_sectors)
616 {
617 int ret;
618 BDRVVPCState *s = bs->opaque;
619 qemu_co_mutex_lock(&s->lock);
620 ret = vpc_read(bs, sector_num, buf, nb_sectors);
621 qemu_co_mutex_unlock(&s->lock);
622 return ret;
623 }
624
625 static int vpc_write(BlockDriverState *bs, int64_t sector_num,
626 const uint8_t *buf, int nb_sectors)
627 {
628 BDRVVPCState *s = bs->opaque;
629 int64_t offset;
630 int64_t sectors, sectors_per_block;
631 int ret;
632 VHDFooter *footer = (VHDFooter *) s->footer_buf;
633
634 if (be32_to_cpu(footer->type) == VHD_FIXED) {
635 return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
636 }
637 while (nb_sectors > 0) {
638 offset = get_sector_offset(bs, sector_num, 1);
639
640 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
641 sectors = sectors_per_block - (sector_num % sectors_per_block);
642 if (sectors > nb_sectors) {
643 sectors = nb_sectors;
644 }
645
646 if (offset == -1) {
647 offset = alloc_block(bs, sector_num);
648 if (offset < 0)
649 return -1;
650 }
651
652 ret = bdrv_pwrite(bs->file->bs, offset, buf,
653 sectors * BDRV_SECTOR_SIZE);
654 if (ret != sectors * BDRV_SECTOR_SIZE) {
655 return -1;
656 }
657
658 nb_sectors -= sectors;
659 sector_num += sectors;
660 buf += sectors * BDRV_SECTOR_SIZE;
661 }
662
663 return 0;
664 }
665
666 static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
667 const uint8_t *buf, int nb_sectors)
668 {
669 int ret;
670 BDRVVPCState *s = bs->opaque;
671 qemu_co_mutex_lock(&s->lock);
672 ret = vpc_write(bs, sector_num, buf, nb_sectors);
673 qemu_co_mutex_unlock(&s->lock);
674 return ret;
675 }
676
677 static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
678 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
679 {
680 BDRVVPCState *s = bs->opaque;
681 VHDFooter *footer = (VHDFooter*) s->footer_buf;
682 int64_t start, offset;
683 bool allocated;
684 int n;
685
686 if (be32_to_cpu(footer->type) == VHD_FIXED) {
687 *pnum = nb_sectors;
688 *file = bs->file->bs;
689 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
690 (sector_num << BDRV_SECTOR_BITS);
691 }
692
693 offset = get_sector_offset(bs, sector_num, 0);
694 start = offset;
695 allocated = (offset != -1);
696 *pnum = 0;
697
698 do {
699 /* All sectors in a block are contiguous (without using the bitmap) */
700 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
701 - sector_num;
702 n = MIN(n, nb_sectors);
703
704 *pnum += n;
705 sector_num += n;
706 nb_sectors -= n;
707 /* *pnum can't be greater than one block for allocated
708 * sectors since there is always a bitmap in between. */
709 if (allocated) {
710 *file = bs->file->bs;
711 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
712 }
713 if (nb_sectors == 0) {
714 break;
715 }
716 offset = get_sector_offset(bs, sector_num, 0);
717 } while (offset == -1);
718
719 return 0;
720 }
721
722 /*
723 * Calculates the number of cylinders, heads and sectors per cylinder
724 * based on a given number of sectors. This is the algorithm described
725 * in the VHD specification.
726 *
727 * Note that the geometry doesn't always exactly match total_sectors but
728 * may round it down.
729 *
730 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
731 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
732 * and instead allow up to 255 heads.
733 */
734 static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
735 uint8_t* heads, uint8_t* secs_per_cyl)
736 {
737 uint32_t cyls_times_heads;
738
739 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
740
741 if (total_sectors >= 65535LL * 16 * 63) {
742 *secs_per_cyl = 255;
743 *heads = 16;
744 cyls_times_heads = total_sectors / *secs_per_cyl;
745 } else {
746 *secs_per_cyl = 17;
747 cyls_times_heads = total_sectors / *secs_per_cyl;
748 *heads = (cyls_times_heads + 1023) / 1024;
749
750 if (*heads < 4) {
751 *heads = 4;
752 }
753
754 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
755 *secs_per_cyl = 31;
756 *heads = 16;
757 cyls_times_heads = total_sectors / *secs_per_cyl;
758 }
759
760 if (cyls_times_heads >= (*heads * 1024)) {
761 *secs_per_cyl = 63;
762 *heads = 16;
763 cyls_times_heads = total_sectors / *secs_per_cyl;
764 }
765 }
766
767 *cyls = cyls_times_heads / *heads;
768
769 return 0;
770 }
771
772 static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
773 int64_t total_sectors)
774 {
775 VHDDynDiskHeader *dyndisk_header =
776 (VHDDynDiskHeader *) buf;
777 size_t block_size, num_bat_entries;
778 int i;
779 int ret;
780 int64_t offset = 0;
781
782 /* Write the footer (twice: at the beginning and at the end) */
783 block_size = 0x200000;
784 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
785
786 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
787 if (ret < 0) {
788 goto fail;
789 }
790
791 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
792 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
793 if (ret < 0) {
794 goto fail;
795 }
796
797 /* Write the initial BAT */
798 offset = 3 * 512;
799
800 memset(buf, 0xFF, 512);
801 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
802 ret = blk_pwrite(blk, offset, buf, 512);
803 if (ret < 0) {
804 goto fail;
805 }
806 offset += 512;
807 }
808
809 /* Prepare the Dynamic Disk Header */
810 memset(buf, 0, 1024);
811
812 memcpy(dyndisk_header->magic, "cxsparse", 8);
813
814 /*
815 * Note: The spec is actually wrong here for data_offset, it says
816 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
817 */
818 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
819 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
820 dyndisk_header->version = cpu_to_be32(0x00010000);
821 dyndisk_header->block_size = cpu_to_be32(block_size);
822 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
823
824 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
825
826 /* Write the header */
827 offset = 512;
828
829 ret = blk_pwrite(blk, offset, buf, 1024);
830 if (ret < 0) {
831 goto fail;
832 }
833
834 fail:
835 return ret;
836 }
837
838 static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
839 int64_t total_size)
840 {
841 int ret;
842
843 /* Add footer to total size */
844 total_size += HEADER_SIZE;
845
846 ret = blk_truncate(blk, total_size);
847 if (ret < 0) {
848 return ret;
849 }
850
851 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
852 if (ret < 0) {
853 return ret;
854 }
855
856 return ret;
857 }
858
859 static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
860 {
861 uint8_t buf[1024];
862 VHDFooter *footer = (VHDFooter *) buf;
863 char *disk_type_param;
864 int i;
865 uint16_t cyls = 0;
866 uint8_t heads = 0;
867 uint8_t secs_per_cyl = 0;
868 int64_t total_sectors;
869 int64_t total_size;
870 int disk_type;
871 int ret = -EIO;
872 bool force_size;
873 Error *local_err = NULL;
874 BlockBackend *blk = NULL;
875
876 /* Read out options */
877 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
878 BDRV_SECTOR_SIZE);
879 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
880 if (disk_type_param) {
881 if (!strcmp(disk_type_param, "dynamic")) {
882 disk_type = VHD_DYNAMIC;
883 } else if (!strcmp(disk_type_param, "fixed")) {
884 disk_type = VHD_FIXED;
885 } else {
886 error_setg(errp, "Invalid disk type, %s", disk_type_param);
887 ret = -EINVAL;
888 goto out;
889 }
890 } else {
891 disk_type = VHD_DYNAMIC;
892 }
893
894 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
895
896 ret = bdrv_create_file(filename, opts, &local_err);
897 if (ret < 0) {
898 error_propagate(errp, local_err);
899 goto out;
900 }
901
902 blk = blk_new_open(filename, NULL, NULL,
903 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
904 if (blk == NULL) {
905 error_propagate(errp, local_err);
906 ret = -EIO;
907 goto out;
908 }
909
910 blk_set_allow_write_beyond_eof(blk, true);
911
912 /*
913 * Calculate matching total_size and geometry. Increase the number of
914 * sectors requested until we get enough (or fail). This ensures that
915 * qemu-img convert doesn't truncate images, but rather rounds up.
916 *
917 * If the image size can't be represented by a spec conformant CHS geometry,
918 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
919 * the image size from the VHD footer to calculate total_sectors.
920 */
921 if (force_size) {
922 /* This will force the use of total_size for sector count, below */
923 cyls = VHD_CHS_MAX_C;
924 heads = VHD_CHS_MAX_H;
925 secs_per_cyl = VHD_CHS_MAX_S;
926 } else {
927 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
928 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
929 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
930 }
931 }
932
933 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
934 total_sectors = total_size / BDRV_SECTOR_SIZE;
935 /* Allow a maximum disk size of 2040 GiB */
936 if (total_sectors > VHD_MAX_SECTORS) {
937 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
938 ret = -EFBIG;
939 goto out;
940 }
941 } else {
942 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
943 total_size = total_sectors * BDRV_SECTOR_SIZE;
944 }
945
946 /* Prepare the Hard Disk Footer */
947 memset(buf, 0, 1024);
948
949 memcpy(footer->creator, "conectix", 8);
950 if (force_size) {
951 memcpy(footer->creator_app, "qem2", 4);
952 } else {
953 memcpy(footer->creator_app, "qemu", 4);
954 }
955 memcpy(footer->creator_os, "Wi2k", 4);
956
957 footer->features = cpu_to_be32(0x02);
958 footer->version = cpu_to_be32(0x00010000);
959 if (disk_type == VHD_DYNAMIC) {
960 footer->data_offset = cpu_to_be64(HEADER_SIZE);
961 } else {
962 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
963 }
964 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
965
966 /* Version of Virtual PC 2007 */
967 footer->major = cpu_to_be16(0x0005);
968 footer->minor = cpu_to_be16(0x0003);
969 footer->orig_size = cpu_to_be64(total_size);
970 footer->current_size = cpu_to_be64(total_size);
971 footer->cyls = cpu_to_be16(cyls);
972 footer->heads = heads;
973 footer->secs_per_cyl = secs_per_cyl;
974
975 footer->type = cpu_to_be32(disk_type);
976
977 #if defined(CONFIG_UUID)
978 uuid_generate(footer->uuid);
979 #endif
980
981 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
982
983 if (disk_type == VHD_DYNAMIC) {
984 ret = create_dynamic_disk(blk, buf, total_sectors);
985 } else {
986 ret = create_fixed_disk(blk, buf, total_size);
987 }
988 if (ret < 0) {
989 error_setg(errp, "Unable to create or write VHD header");
990 }
991
992 out:
993 blk_unref(blk);
994 g_free(disk_type_param);
995 return ret;
996 }
997
998 static int vpc_has_zero_init(BlockDriverState *bs)
999 {
1000 BDRVVPCState *s = bs->opaque;
1001 VHDFooter *footer = (VHDFooter *) s->footer_buf;
1002
1003 if (be32_to_cpu(footer->type) == VHD_FIXED) {
1004 return bdrv_has_zero_init(bs->file->bs);
1005 } else {
1006 return 1;
1007 }
1008 }
1009
1010 static void vpc_close(BlockDriverState *bs)
1011 {
1012 BDRVVPCState *s = bs->opaque;
1013 qemu_vfree(s->pagetable);
1014 #ifdef CACHE
1015 g_free(s->pageentry_u8);
1016 #endif
1017
1018 migrate_del_blocker(s->migration_blocker);
1019 error_free(s->migration_blocker);
1020 }
1021
1022 static QemuOptsList vpc_create_opts = {
1023 .name = "vpc-create-opts",
1024 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1025 .desc = {
1026 {
1027 .name = BLOCK_OPT_SIZE,
1028 .type = QEMU_OPT_SIZE,
1029 .help = "Virtual disk size"
1030 },
1031 {
1032 .name = BLOCK_OPT_SUBFMT,
1033 .type = QEMU_OPT_STRING,
1034 .help =
1035 "Type of virtual hard disk format. Supported formats are "
1036 "{dynamic (default) | fixed} "
1037 },
1038 {
1039 .name = VPC_OPT_FORCE_SIZE,
1040 .type = QEMU_OPT_BOOL,
1041 .help = "Force disk size calculation to use the actual size "
1042 "specified, rather than using the nearest CHS-based "
1043 "calculation"
1044 },
1045 { /* end of list */ }
1046 }
1047 };
1048
1049 static BlockDriver bdrv_vpc = {
1050 .format_name = "vpc",
1051 .instance_size = sizeof(BDRVVPCState),
1052
1053 .bdrv_probe = vpc_probe,
1054 .bdrv_open = vpc_open,
1055 .bdrv_close = vpc_close,
1056 .bdrv_reopen_prepare = vpc_reopen_prepare,
1057 .bdrv_create = vpc_create,
1058
1059 .bdrv_read = vpc_co_read,
1060 .bdrv_write = vpc_co_write,
1061 .bdrv_co_get_block_status = vpc_co_get_block_status,
1062
1063 .bdrv_get_info = vpc_get_info,
1064
1065 .create_opts = &vpc_create_opts,
1066 .bdrv_has_zero_init = vpc_has_zero_init,
1067 };
1068
1069 static void bdrv_vpc_init(void)
1070 {
1071 bdrv_register(&bdrv_vpc);
1072 }
1073
1074 block_init(bdrv_vpc_init);