]> git.proxmox.com Git - mirror_qemu.git/blame - block/vpc.c
block: Convert bdrv_pread(v) to BdrvChild
[mirror_qemu.git] / block / vpc.c
CommitLineData
6a0f9e82 1/*
cc2040f8 2 * Block driver for Connectix / Microsoft Virtual PC images
5fafdf24 3 *
6a0f9e82 4 * Copyright (c) 2005 Alex Beregszaszi
15d35bc5 5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
5fafdf24 6 *
6a0f9e82
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
80c71a24 25#include "qemu/osdep.h"
da34e65c 26#include "qapi/error.h"
faf07963 27#include "qemu-common.h"
737e150e 28#include "block/block_int.h"
b8f45cdf 29#include "sysemu/block-backend.h"
1de7afc9 30#include "qemu/module.h"
caf71f86 31#include "migration/migration.h"
58369e22 32#include "qemu/bswap.h"
1fe1fa51
CA
33#if defined(CONFIG_UUID)
34#include <uuid/uuid.h>
35#endif
6a0f9e82
FB
36
37/**************************************************************/
38
39#define HEADER_SIZE 512
40
41//#define CACHE
42
2cfacb62
AL
43enum vhd_type {
44 VHD_FIXED = 2,
45 VHD_DYNAMIC = 3,
46 VHD_DIFFERENCING = 4,
47};
48
9c057d0b 49/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
57c7d9e5
AL
50#define VHD_TIMESTAMP_BASE 946684800
51
fb9245c2
JC
52#define VHD_CHS_MAX_C 65535LL
53#define VHD_CHS_MAX_H 16
54#define VHD_CHS_MAX_S 255
55
c23fb11b 56#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
fb9245c2
JC
57#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
58
59#define VPC_OPT_FORCE_SIZE "force_size"
97f1c45c 60
9c057d0b 61/* always big-endian */
e54835c0 62typedef struct vhd_footer {
9c057d0b 63 char creator[8]; /* "conectix" */
2cfacb62
AL
64 uint32_t features;
65 uint32_t version;
66
9c057d0b 67 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
68 uint64_t data_offset;
69
9c057d0b 70 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
2cfacb62
AL
71 uint32_t timestamp;
72
9c057d0b 73 char creator_app[4]; /* e.g., "vpc " */
2cfacb62
AL
74 uint16_t major;
75 uint16_t minor;
9c057d0b 76 char creator_os[4]; /* "Wi2k" */
2cfacb62
AL
77
78 uint64_t orig_size;
03671ded 79 uint64_t current_size;
2cfacb62
AL
80
81 uint16_t cyls;
82 uint8_t heads;
83 uint8_t secs_per_cyl;
84
85 uint32_t type;
86
9c057d0b
JC
87 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
88 the bytes in the footer without the checksum field") */
2cfacb62
AL
89 uint32_t checksum;
90
9c057d0b 91 /* UUID used to identify a parent hard disk (backing file) */
2cfacb62
AL
92 uint8_t uuid[16];
93
94 uint8_t in_saved_state;
e54835c0 95} QEMU_PACKED VHDFooter;
b9fa33a6 96
e54835c0 97typedef struct vhd_dyndisk_header {
9c057d0b 98 char magic[8]; /* "cxsparse" */
2cfacb62 99
9c057d0b 100 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
101 uint64_t data_offset;
102
9c057d0b 103 /* Offset of the Block Allocation Table (BAT) */
2cfacb62
AL
104 uint64_t table_offset;
105
106 uint32_t version;
9c057d0b 107 uint32_t max_table_entries; /* 32bit/entry */
2cfacb62 108
9c057d0b 109 /* 2 MB by default, must be a power of two */
2cfacb62
AL
110 uint32_t block_size;
111
112 uint32_t checksum;
113 uint8_t parent_uuid[16];
114 uint32_t parent_timestamp;
115 uint32_t reserved;
116
9c057d0b 117 /* Backing file name (in UTF-16) */
2cfacb62
AL
118 uint8_t parent_name[512];
119
120 struct {
121 uint32_t platform;
122 uint32_t data_space;
123 uint32_t data_length;
124 uint32_t reserved;
125 uint64_t data_offset;
126 } parent_locator[8];
e54835c0 127} QEMU_PACKED VHDDynDiskHeader;
6a0f9e82
FB
128
129typedef struct BDRVVPCState {
848c66e8 130 CoMutex lock;
15d35bc5
AL
131 uint8_t footer_buf[HEADER_SIZE];
132 uint64_t free_data_block_offset;
2cfacb62 133 int max_table_entries;
6a0f9e82 134 uint32_t *pagetable;
15d35bc5
AL
135 uint64_t bat_offset;
136 uint64_t last_bitmap_offset;
6a0f9e82 137
2cfacb62 138 uint32_t block_size;
15d35bc5 139 uint32_t bitmap_size;
c540d53a
JC
140 bool force_use_chs;
141 bool force_use_sz;
15d35bc5 142
6a0f9e82
FB
143#ifdef CACHE
144 uint8_t *pageentry_u8;
145 uint32_t *pageentry_u32;
146 uint16_t *pageentry_u16;
3b46e624 147
6a0f9e82
FB
148 uint64_t last_bitmap;
149#endif
612ff3d8
KW
150
151 Error *migration_blocker;
6a0f9e82
FB
152} BDRVVPCState;
153
c540d53a
JC
154#define VPC_OPT_SIZE_CALC "force_size_calc"
155static QemuOptsList vpc_runtime_opts = {
156 .name = "vpc-runtime-opts",
157 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
158 .desc = {
159 {
160 .name = VPC_OPT_SIZE_CALC,
161 .type = QEMU_OPT_STRING,
162 .help = "Force disk size calculation to use either CHS geometry, "
163 "or use the disk current_size specified in the VHD footer. "
164 "{chs, current_size}"
165 },
166 { /* end of list */ }
167 }
168};
169
57c7d9e5
AL
170static uint32_t vpc_checksum(uint8_t* buf, size_t size)
171{
172 uint32_t res = 0;
173 int i;
174
175 for (i = 0; i < size; i++)
176 res += buf[i];
177
178 return ~res;
179}
180
181
6a0f9e82
FB
182static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
183{
ffe8ab83 184 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
6a0f9e82 185 return 100;
6a0f9e82
FB
186 return 0;
187}
188
c540d53a
JC
189static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
190 Error **errp)
191{
192 BDRVVPCState *s = bs->opaque;
193 const char *size_calc;
194
195 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
196
197 if (!size_calc) {
198 /* no override, use autodetect only */
199 } else if (!strcmp(size_calc, "current_size")) {
200 s->force_use_sz = true;
201 } else if (!strcmp(size_calc, "chs")) {
202 s->force_use_chs = true;
203 } else {
204 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
205 }
206}
207
015a1036
HR
208static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
209 Error **errp)
6a0f9e82
FB
210{
211 BDRVVPCState *s = bs->opaque;
66f82cee 212 int i;
e54835c0
JC
213 VHDFooter *footer;
214 VHDDynDiskHeader *dyndisk_header;
c540d53a
JC
215 QemuOpts *opts = NULL;
216 Error *local_err = NULL;
217 bool use_chs;
b9fa33a6 218 uint8_t buf[HEADER_SIZE];
57c7d9e5 219 uint32_t checksum;
97f1c45c 220 uint64_t computed_size;
b15deac7 221 uint64_t pagetable_size;
24da78db 222 int disk_type = VHD_DYNAMIC;
59294e46 223 int ret;
6a0f9e82 224
c540d53a
JC
225 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
226 qemu_opts_absorb_qdict(opts, options, &local_err);
227 if (local_err) {
228 error_propagate(errp, local_err);
229 ret = -EINVAL;
230 goto fail;
231 }
232
233 vpc_parse_options(bs, opts, &local_err);
234 if (local_err) {
235 error_propagate(errp, local_err);
236 ret = -EINVAL;
237 goto fail;
238 }
239
cf2ab8fc 240 ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
59294e46 241 if (ret < 0) {
32f6439c 242 error_setg(errp, "Unable to read VHD header");
6a0f9e82 243 goto fail;
59294e46 244 }
6a0f9e82 245
e54835c0 246 footer = (VHDFooter *) s->footer_buf;
24da78db 247 if (strncmp(footer->creator, "conectix", 8)) {
9a4f4c31 248 int64_t offset = bdrv_getlength(bs->file->bs);
59294e46
KW
249 if (offset < 0) {
250 ret = offset;
32f6439c 251 error_setg(errp, "Invalid file size");
59294e46
KW
252 goto fail;
253 } else if (offset < HEADER_SIZE) {
254 ret = -EINVAL;
32f6439c 255 error_setg(errp, "File too small for a VHD header");
24da78db
CA
256 goto fail;
257 }
59294e46 258
24da78db 259 /* If a fixed disk, the footer is found only at the end of the file */
cf2ab8fc 260 ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
59294e46
KW
261 HEADER_SIZE);
262 if (ret < 0) {
24da78db
CA
263 goto fail;
264 }
265 if (strncmp(footer->creator, "conectix", 8)) {
76abe407
PB
266 error_setg(errp, "invalid VPC image");
267 ret = -EINVAL;
24da78db
CA
268 goto fail;
269 }
270 disk_type = VHD_FIXED;
271 }
6a0f9e82 272
57c7d9e5
AL
273 checksum = be32_to_cpu(footer->checksum);
274 footer->checksum = 0;
275 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
276 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
66f82cee 277 "incorrect.\n", bs->filename);
57c7d9e5 278
c088b691 279 /* Write 'checksum' back to footer, or else will leave it with zero. */
a4127c42 280 footer->checksum = cpu_to_be32(checksum);
c088b691 281
9c057d0b
JC
282 /* The visible size of a image in Virtual PC depends on the geometry
283 rather than on the size stored in the footer (the size in the footer
284 is too large usually) */
33ccf667
SH
285 bs->total_sectors = (int64_t)
286 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
1fa79228 287
c540d53a
JC
288 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
289 * VHD image sizes differently. VPC will rely on CHS geometry,
290 * while Hyper-V and disk2vhd use the size specified in the footer.
291 *
292 * We use a couple of approaches to try and determine the correct method:
293 * look at the Creator App field, and look for images that have CHS
294 * geometry that is the maximum value.
295 *
296 * If the CHS geometry is the maximum CHS geometry, then we assume that
297 * the size is the footer->current_size to avoid truncation. Otherwise,
298 * we follow the table based on footer->creator_app:
299 *
300 * Known creator apps:
301 * 'vpc ' : CHS Virtual PC (uses disk geometry)
302 * 'qemu' : CHS QEMU (uses disk geometry)
fb9245c2 303 * 'qem2' : current_size QEMU (uses current_size)
c540d53a
JC
304 * 'win ' : current_size Hyper-V
305 * 'd2v ' : current_size Disk2vhd
9bdfb9e8 306 * 'tap\0' : current_size XenServer
bab246db 307 * 'CTXS' : current_size XenConverter
c540d53a
JC
308 *
309 * The user can override the table values via drive options, however
310 * even with an override we will still use current_size for images
311 * that have CHS geometry of the maximum size.
312 */
313 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
fb9245c2 314 !!strncmp(footer->creator_app, "qem2", 4) &&
9bdfb9e8 315 !!strncmp(footer->creator_app, "d2v ", 4) &&
bab246db 316 !!strncmp(footer->creator_app, "CTXS", 4) &&
9bdfb9e8 317 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
c540d53a
JC
318
319 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
03671ded 320 bs->total_sectors = be64_to_cpu(footer->current_size) /
c540d53a 321 BDRV_SECTOR_SIZE;
0173e7bb
PL
322 }
323
c23fb11b
JC
324 /* Allow a maximum disk size of 2040 GiB */
325 if (bs->total_sectors > VHD_MAX_SECTORS) {
59294e46 326 ret = -EFBIG;
efc8243d
SH
327 goto fail;
328 }
329
24da78db 330 if (disk_type == VHD_DYNAMIC) {
cf2ab8fc 331 ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
59294e46
KW
332 HEADER_SIZE);
333 if (ret < 0) {
32f6439c 334 error_setg(errp, "Error reading dynamic VHD header");
24da78db
CA
335 goto fail;
336 }
b9fa33a6 337
e54835c0 338 dyndisk_header = (VHDDynDiskHeader *) buf;
6a0f9e82 339
24da78db 340 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
32f6439c 341 error_setg(errp, "Invalid header magic");
59294e46 342 ret = -EINVAL;
24da78db
CA
343 goto fail;
344 }
6a0f9e82 345
24da78db 346 s->block_size = be32_to_cpu(dyndisk_header->block_size);
5e71dfad
KW
347 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
348 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
349 ret = -EINVAL;
350 goto fail;
351 }
24da78db 352 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
15d35bc5 353
24da78db 354 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
97f1c45c
JC
355
356 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
32f6439c 357 error_setg(errp, "Too many blocks");
97f1c45c
JC
358 ret = -EINVAL;
359 goto fail;
360 }
97f1c45c
JC
361
362 computed_size = (uint64_t) s->max_table_entries * s->block_size;
363 if (computed_size < bs->total_sectors * 512) {
32f6439c 364 error_setg(errp, "Page table too small");
97f1c45c
JC
365 ret = -EINVAL;
366 goto fail;
367 }
368
b15deac7
JC
369 if (s->max_table_entries > SIZE_MAX / 4 ||
370 s->max_table_entries > (int) INT_MAX / 4) {
371 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
372 s->max_table_entries);
373 ret = -EINVAL;
374 goto fail;
375 }
376
377 pagetable_size = (uint64_t) s->max_table_entries * 4;
378
9a4f4c31 379 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
5fb09cd5 380 if (s->pagetable == NULL) {
32f6439c 381 error_setg(errp, "Unable to allocate memory for page table");
5fb09cd5
KW
382 ret = -ENOMEM;
383 goto fail;
384 }
b71d1c2e 385
24da78db 386 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
59294e46 387
cf2ab8fc 388 ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
9a4f4c31 389 pagetable_size);
59294e46 390 if (ret < 0) {
32f6439c 391 error_setg(errp, "Error reading pagetable");
24da78db
CA
392 goto fail;
393 }
b71d1c2e 394
24da78db 395 s->free_data_block_offset =
b15deac7 396 ROUND_UP(s->bat_offset + pagetable_size, 512);
15d35bc5 397
24da78db
CA
398 for (i = 0; i < s->max_table_entries; i++) {
399 be32_to_cpus(&s->pagetable[i]);
400 if (s->pagetable[i] != 0xFFFFFFFF) {
401 int64_t next = (512 * (int64_t) s->pagetable[i]) +
402 s->bitmap_size + s->block_size;
15d35bc5 403
24da78db
CA
404 if (next > s->free_data_block_offset) {
405 s->free_data_block_offset = next;
406 }
407 }
15d35bc5 408 }
15d35bc5 409
9a4f4c31 410 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
fb8fe35f
PL
411 error_setg(errp, "block-vpc: free_data_block_offset points after "
412 "the end of file. The image has been truncated.");
413 ret = -EINVAL;
414 goto fail;
415 }
416
24da78db 417 s->last_bitmap_offset = (int64_t) -1;
6a0f9e82 418
6a0f9e82 419#ifdef CACHE
24da78db
CA
420 s->pageentry_u8 = g_malloc(512);
421 s->pageentry_u32 = s->pageentry_u8;
422 s->pageentry_u16 = s->pageentry_u8;
423 s->last_pagetable = -1;
6a0f9e82 424#endif
24da78db 425 }
6a0f9e82 426
848c66e8 427 qemu_co_mutex_init(&s->lock);
612ff3d8
KW
428
429 /* Disable migration when VHD images are used */
81e5f78a
AG
430 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
431 "does not support live migration",
432 bdrv_get_device_or_node_name(bs));
612ff3d8
KW
433 migrate_add_blocker(s->migration_blocker);
434
6a0f9e82 435 return 0;
59294e46
KW
436
437fail:
97f1c45c 438 qemu_vfree(s->pagetable);
59294e46
KW
439#ifdef CACHE
440 g_free(s->pageentry_u8);
441#endif
442 return ret;
6a0f9e82
FB
443}
444
3fe4b700
JC
445static int vpc_reopen_prepare(BDRVReopenState *state,
446 BlockReopenQueue *queue, Error **errp)
447{
448 return 0;
449}
450
b71d1c2e
AL
451/*
452 * Returns the absolute byte offset of the given sector in the image file.
453 * If the sector is not allocated, -1 is returned instead.
15d35bc5
AL
454 *
455 * The parameter write must be 1 if the offset will be used for a write
456 * operation (the block bitmaps is updated then), 0 otherwise.
b71d1c2e 457 */
d46b7cc6
KW
458static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
459 bool write)
6a0f9e82
FB
460{
461 BDRVVPCState *s = bs->opaque;
6a0f9e82 462 uint64_t bitmap_offset, block_offset;
d46b7cc6 463 uint32_t pagetable_index, offset_in_block;
6a0f9e82 464
2cfacb62 465 pagetable_index = offset / s->block_size;
d46b7cc6 466 offset_in_block = offset % s->block_size;
3b46e624 467
15d35bc5 468 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
9c057d0b 469 return -1; /* not allocated */
6a0f9e82 470
378e2aea 471 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
d46b7cc6 472 block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
15d35bc5 473
9c057d0b
JC
474 /* We must ensure that we don't write to any sectors which are marked as
475 unused in the bitmap. We get away with setting all bits in the block
476 bitmap each time we write to a new block. This might cause Virtual PC to
477 miss sparse read optimization, but it's not a problem in terms of
478 correctness. */
15d35bc5
AL
479 if (write && (s->last_bitmap_offset != bitmap_offset)) {
480 uint8_t bitmap[s->bitmap_size];
481
482 s->last_bitmap_offset = bitmap_offset;
483 memset(bitmap, 0xff, s->bitmap_size);
9a4f4c31 484 bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
15d35bc5 485 }
3b46e624 486
b71d1c2e 487 return block_offset;
6a0f9e82
FB
488}
489
d46b7cc6
KW
490static inline int64_t get_sector_offset(BlockDriverState *bs,
491 int64_t sector_num, bool write)
492{
493 return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
494}
495
15d35bc5
AL
496/*
497 * Writes the footer to the end of the image file. This is needed when the
498 * file grows as it overwrites the old footer
499 *
500 * Returns 0 on success and < 0 on error
501 */
502static int rewrite_footer(BlockDriverState* bs)
503{
504 int ret;
505 BDRVVPCState *s = bs->opaque;
506 int64_t offset = s->free_data_block_offset;
507
9a4f4c31 508 ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
15d35bc5
AL
509 if (ret < 0)
510 return ret;
511
512 return 0;
513}
514
515/*
516 * Allocates a new block. This involves writing a new footer and updating
517 * the Block Allocation Table to use the space at the old end of the image
518 * file (overwriting the old footer)
519 *
520 * Returns the sectors' offset in the image file on success and < 0 on error
521 */
513b0f02 522static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
15d35bc5
AL
523{
524 BDRVVPCState *s = bs->opaque;
525 int64_t bat_offset;
526 uint32_t index, bat_value;
527 int ret;
528 uint8_t bitmap[s->bitmap_size];
529
9c057d0b 530 /* Check if sector_num is valid */
513b0f02
KW
531 if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
532 return -EINVAL;
533 }
15d35bc5 534
9c057d0b 535 /* Write entry into in-memory BAT */
513b0f02
KW
536 index = offset / s->block_size;
537 assert(s->pagetable[index] == 0xFFFFFFFF);
15d35bc5
AL
538 s->pagetable[index] = s->free_data_block_offset / 512;
539
9c057d0b 540 /* Initialize the block's bitmap */
15d35bc5 541 memset(bitmap, 0xff, s->bitmap_size);
9a4f4c31 542 ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
078a458e 543 s->bitmap_size);
5bb1cbac
KW
544 if (ret < 0) {
545 return ret;
546 }
15d35bc5 547
9c057d0b 548 /* Write new footer (the old one will be overwritten) */
15d35bc5
AL
549 s->free_data_block_offset += s->block_size + s->bitmap_size;
550 ret = rewrite_footer(bs);
551 if (ret < 0)
552 goto fail;
553
9c057d0b 554 /* Write BAT entry to disk */
15d35bc5 555 bat_offset = s->bat_offset + (4 * index);
a4127c42 556 bat_value = cpu_to_be32(s->pagetable[index]);
9a4f4c31 557 ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
15d35bc5
AL
558 if (ret < 0)
559 goto fail;
560
513b0f02 561 return get_image_offset(bs, offset, false);
15d35bc5
AL
562
563fail:
564 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
513b0f02 565 return ret;
15d35bc5
AL
566}
567
97b00e28
PB
568static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
569{
570 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
571 VHDFooter *footer = (VHDFooter *) s->footer_buf;
572
0d4cc3e7 573 if (be32_to_cpu(footer->type) != VHD_FIXED) {
97b00e28
PB
574 bdi->cluster_size = s->block_size;
575 }
576
95de6d70 577 bdi->unallocated_blocks_are_zero = true;
97b00e28
PB
578 return 0;
579}
580
d46b7cc6
KW
581static int coroutine_fn
582vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
583 QEMUIOVector *qiov, int flags)
6a0f9e82 584{
6c6ea921 585 BDRVVPCState *s = bs->opaque;
6a0f9e82 586 int ret;
d46b7cc6
KW
587 int64_t image_offset;
588 int64_t n_bytes;
589 int64_t bytes_done = 0;
e54835c0 590 VHDFooter *footer = (VHDFooter *) s->footer_buf;
d46b7cc6 591 QEMUIOVector local_qiov;
6a0f9e82 592
0d4cc3e7 593 if (be32_to_cpu(footer->type) == VHD_FIXED) {
d46b7cc6 594 return bdrv_co_preadv(bs->file->bs, offset, bytes, qiov, 0);
24da78db 595 }
b71d1c2e 596
d46b7cc6
KW
597 qemu_co_mutex_lock(&s->lock);
598 qemu_iovec_init(&local_qiov, qiov->niov);
6c6ea921 599
d46b7cc6
KW
600 while (bytes > 0) {
601 image_offset = get_image_offset(bs, offset, false);
602 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
603
604 if (image_offset == -1) {
605 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
b71d1c2e 606 } else {
d46b7cc6
KW
607 qemu_iovec_reset(&local_qiov);
608 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
609
610 ret = bdrv_co_preadv(bs->file->bs, image_offset, n_bytes,
611 &local_qiov, 0);
612 if (ret < 0) {
613 goto fail;
6c6ea921 614 }
b71d1c2e
AL
615 }
616
d46b7cc6
KW
617 bytes -= n_bytes;
618 offset += n_bytes;
619 bytes_done += n_bytes;
6a0f9e82 620 }
6a0f9e82 621
d46b7cc6
KW
622 ret = 0;
623fail:
624 qemu_iovec_destroy(&local_qiov);
2914caa0 625 qemu_co_mutex_unlock(&s->lock);
d46b7cc6 626
2914caa0
PB
627 return ret;
628}
629
513b0f02
KW
630static int coroutine_fn
631vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
632 QEMUIOVector *qiov, int flags)
15d35bc5 633{
6c6ea921 634 BDRVVPCState *s = bs->opaque;
513b0f02
KW
635 int64_t image_offset;
636 int64_t n_bytes;
637 int64_t bytes_done = 0;
15d35bc5 638 int ret;
e54835c0 639 VHDFooter *footer = (VHDFooter *) s->footer_buf;
513b0f02 640 QEMUIOVector local_qiov;
15d35bc5 641
0d4cc3e7 642 if (be32_to_cpu(footer->type) == VHD_FIXED) {
513b0f02 643 return bdrv_co_pwritev(bs->file->bs, offset, bytes, qiov, 0);
24da78db 644 }
15d35bc5 645
513b0f02
KW
646 qemu_co_mutex_lock(&s->lock);
647 qemu_iovec_init(&local_qiov, qiov->niov);
648
649 while (bytes > 0) {
650 image_offset = get_image_offset(bs, offset, true);
651 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
6c6ea921 652
513b0f02
KW
653 if (image_offset == -1) {
654 image_offset = alloc_block(bs, offset);
655 if (image_offset < 0) {
656 ret = image_offset;
657 goto fail;
658 }
15d35bc5
AL
659 }
660
513b0f02
KW
661 qemu_iovec_reset(&local_qiov);
662 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
663
664 ret = bdrv_co_pwritev(bs->file->bs, image_offset, n_bytes,
665 &local_qiov, 0);
666 if (ret < 0) {
667 goto fail;
6c6ea921 668 }
15d35bc5 669
513b0f02
KW
670 bytes -= n_bytes;
671 offset += n_bytes;
672 bytes_done += n_bytes;
15d35bc5
AL
673 }
674
513b0f02
KW
675 ret = 0;
676fail:
677 qemu_iovec_destroy(&local_qiov);
e183ef75 678 qemu_co_mutex_unlock(&s->lock);
513b0f02 679
e183ef75
PB
680 return ret;
681}
682
0cc84887 683static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
67a0fd2a 684 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
0cc84887
KW
685{
686 BDRVVPCState *s = bs->opaque;
687 VHDFooter *footer = (VHDFooter*) s->footer_buf;
2ec711dc 688 int64_t start, offset;
0cc84887
KW
689 bool allocated;
690 int n;
691
692 if (be32_to_cpu(footer->type) == VHD_FIXED) {
693 *pnum = nb_sectors;
7429e207 694 *file = bs->file->bs;
0cc84887
KW
695 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
696 (sector_num << BDRV_SECTOR_BITS);
697 }
698
699 offset = get_sector_offset(bs, sector_num, 0);
700 start = offset;
701 allocated = (offset != -1);
702 *pnum = 0;
703
704 do {
705 /* All sectors in a block are contiguous (without using the bitmap) */
706 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
707 - sector_num;
708 n = MIN(n, nb_sectors);
709
710 *pnum += n;
711 sector_num += n;
712 nb_sectors -= n;
2ec711dc
PL
713 /* *pnum can't be greater than one block for allocated
714 * sectors since there is always a bitmap in between. */
715 if (allocated) {
7429e207 716 *file = bs->file->bs;
2ec711dc
PL
717 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
718 }
0cc84887
KW
719 if (nb_sectors == 0) {
720 break;
721 }
0cc84887 722 offset = get_sector_offset(bs, sector_num, 0);
2ec711dc 723 } while (offset == -1);
0cc84887 724
2ec711dc 725 return 0;
0cc84887
KW
726}
727
57c7d9e5
AL
728/*
729 * Calculates the number of cylinders, heads and sectors per cylinder
730 * based on a given number of sectors. This is the algorithm described
731 * in the VHD specification.
732 *
733 * Note that the geometry doesn't always exactly match total_sectors but
734 * may round it down.
6e9ea0c0 735 *
c23fb11b 736 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
258d2edb
CA
737 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
738 * and instead allow up to 255 heads.
57c7d9e5 739 */
6e9ea0c0 740static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
57c7d9e5
AL
741 uint8_t* heads, uint8_t* secs_per_cyl)
742{
743 uint32_t cyls_times_heads;
744
690cbb09 745 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
57c7d9e5 746
690cbb09 747 if (total_sectors >= 65535LL * 16 * 63) {
57c7d9e5 748 *secs_per_cyl = 255;
690cbb09 749 *heads = 16;
57c7d9e5
AL
750 cyls_times_heads = total_sectors / *secs_per_cyl;
751 } else {
752 *secs_per_cyl = 17;
753 cyls_times_heads = total_sectors / *secs_per_cyl;
754 *heads = (cyls_times_heads + 1023) / 1024;
755
690cbb09 756 if (*heads < 4) {
57c7d9e5 757 *heads = 4;
690cbb09 758 }
57c7d9e5
AL
759
760 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
761 *secs_per_cyl = 31;
762 *heads = 16;
763 cyls_times_heads = total_sectors / *secs_per_cyl;
764 }
765
766 if (cyls_times_heads >= (*heads * 1024)) {
767 *secs_per_cyl = 63;
768 *heads = 16;
769 cyls_times_heads = total_sectors / *secs_per_cyl;
770 }
771 }
772
dede4188 773 *cyls = cyls_times_heads / *heads;
6e9ea0c0
AJ
774
775 return 0;
57c7d9e5
AL
776}
777
b8f45cdf 778static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 779 int64_t total_sectors)
57c7d9e5 780{
e54835c0
JC
781 VHDDynDiskHeader *dyndisk_header =
782 (VHDDynDiskHeader *) buf;
57c7d9e5 783 size_t block_size, num_bat_entries;
24da78db 784 int i;
fef6070e
JC
785 int ret;
786 int64_t offset = 0;
57c7d9e5 787
9c057d0b 788 /* Write the footer (twice: at the beginning and at the end) */
57c7d9e5
AL
789 block_size = 0x200000;
790 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
791
8341f00d 792 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
40a99aac 793 if (ret < 0) {
f0ff243a
BS
794 goto fail;
795 }
57c7d9e5 796
fef6070e 797 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
8341f00d 798 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
fef6070e 799 if (ret < 0) {
f0ff243a
BS
800 goto fail;
801 }
57c7d9e5 802
9c057d0b 803 /* Write the initial BAT */
fef6070e 804 offset = 3 * 512;
57c7d9e5
AL
805
806 memset(buf, 0xFF, 512);
f0ff243a 807 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
8341f00d 808 ret = blk_pwrite(blk, offset, buf, 512, 0);
fef6070e 809 if (ret < 0) {
f0ff243a
BS
810 goto fail;
811 }
fef6070e 812 offset += 512;
f0ff243a 813 }
57c7d9e5 814
9c057d0b 815 /* Prepare the Dynamic Disk Header */
57c7d9e5
AL
816 memset(buf, 0, 1024);
817
5ec4d682 818 memcpy(dyndisk_header->magic, "cxsparse", 8);
57c7d9e5 819
78439f6a
CA
820 /*
821 * Note: The spec is actually wrong here for data_offset, it says
822 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
823 */
a4127c42
SH
824 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
825 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
826 dyndisk_header->version = cpu_to_be32(0x00010000);
827 dyndisk_header->block_size = cpu_to_be32(block_size);
828 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
57c7d9e5 829
a4127c42 830 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
57c7d9e5 831
9c057d0b 832 /* Write the header */
fef6070e 833 offset = 512;
57c7d9e5 834
8341f00d 835 ret = blk_pwrite(blk, offset, buf, 1024, 0);
fef6070e 836 if (ret < 0) {
f0ff243a
BS
837 goto fail;
838 }
f0ff243a 839
24da78db
CA
840 fail:
841 return ret;
842}
843
b8f45cdf 844static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 845 int64_t total_size)
24da78db 846{
fef6070e 847 int ret;
24da78db
CA
848
849 /* Add footer to total size */
fef6070e
JC
850 total_size += HEADER_SIZE;
851
b8f45cdf 852 ret = blk_truncate(blk, total_size);
fef6070e
JC
853 if (ret < 0) {
854 return ret;
24da78db
CA
855 }
856
8341f00d 857 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
fef6070e
JC
858 if (ret < 0) {
859 return ret;
860 }
24da78db 861
24da78db
CA
862 return ret;
863}
864
fec9921f 865static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
24da78db
CA
866{
867 uint8_t buf[1024];
e54835c0 868 VHDFooter *footer = (VHDFooter *) buf;
fec9921f 869 char *disk_type_param;
fef6070e 870 int i;
24da78db
CA
871 uint16_t cyls = 0;
872 uint8_t heads = 0;
873 uint8_t secs_per_cyl = 0;
874 int64_t total_sectors;
875 int64_t total_size;
876 int disk_type;
877 int ret = -EIO;
fb9245c2 878 bool force_size;
fef6070e 879 Error *local_err = NULL;
b8f45cdf 880 BlockBackend *blk = NULL;
24da78db
CA
881
882 /* Read out options */
c2eb918e
HT
883 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
884 BDRV_SECTOR_SIZE);
fec9921f
CL
885 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
886 if (disk_type_param) {
887 if (!strcmp(disk_type_param, "dynamic")) {
24da78db 888 disk_type = VHD_DYNAMIC;
fec9921f 889 } else if (!strcmp(disk_type_param, "fixed")) {
24da78db
CA
890 disk_type = VHD_FIXED;
891 } else {
0211b9be 892 error_setg(errp, "Invalid disk type, %s", disk_type_param);
fec9921f
CL
893 ret = -EINVAL;
894 goto out;
24da78db
CA
895 }
896 } else {
897 disk_type = VHD_DYNAMIC;
898 }
899
fb9245c2
JC
900 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
901
fef6070e
JC
902 ret = bdrv_create_file(filename, opts, &local_err);
903 if (ret < 0) {
904 error_propagate(errp, local_err);
fec9921f 905 goto out;
24da78db 906 }
b8f45cdf 907
efaa7c4e 908 blk = blk_new_open(filename, NULL, NULL,
72e775c7 909 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
b8f45cdf 910 if (blk == NULL) {
fef6070e 911 error_propagate(errp, local_err);
b8f45cdf 912 ret = -EIO;
fef6070e 913 goto out;
4ab15590
CL
914 }
915
b8f45cdf
KW
916 blk_set_allow_write_beyond_eof(blk, true);
917
ecd880d9
KW
918 /*
919 * Calculate matching total_size and geometry. Increase the number of
920 * sectors requested until we get enough (or fail). This ensures that
921 * qemu-img convert doesn't truncate images, but rather rounds up.
690cbb09 922 *
fb9245c2 923 * If the image size can't be represented by a spec conformant CHS geometry,
690cbb09
PL
924 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
925 * the image size from the VHD footer to calculate total_sectors.
ecd880d9 926 */
fb9245c2
JC
927 if (force_size) {
928 /* This will force the use of total_size for sector count, below */
929 cyls = VHD_CHS_MAX_C;
930 heads = VHD_CHS_MAX_H;
931 secs_per_cyl = VHD_CHS_MAX_S;
932 } else {
933 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
934 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
935 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
936 }
690cbb09
PL
937 }
938
939 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
940 total_sectors = total_size / BDRV_SECTOR_SIZE;
c23fb11b 941 /* Allow a maximum disk size of 2040 GiB */
690cbb09 942 if (total_sectors > VHD_MAX_SECTORS) {
0211b9be 943 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
24da78db 944 ret = -EFBIG;
fef6070e 945 goto out;
24da78db 946 }
690cbb09
PL
947 } else {
948 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
949 total_size = total_sectors * BDRV_SECTOR_SIZE;
24da78db 950 }
ecd880d9 951
24da78db
CA
952 /* Prepare the Hard Disk Footer */
953 memset(buf, 0, 1024);
954
955 memcpy(footer->creator, "conectix", 8);
fb9245c2
JC
956 if (force_size) {
957 memcpy(footer->creator_app, "qem2", 4);
958 } else {
959 memcpy(footer->creator_app, "qemu", 4);
960 }
24da78db
CA
961 memcpy(footer->creator_os, "Wi2k", 4);
962
a4127c42
SH
963 footer->features = cpu_to_be32(0x02);
964 footer->version = cpu_to_be32(0x00010000);
24da78db 965 if (disk_type == VHD_DYNAMIC) {
a4127c42 966 footer->data_offset = cpu_to_be64(HEADER_SIZE);
24da78db 967 } else {
a4127c42 968 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
24da78db 969 }
a4127c42 970 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
24da78db
CA
971
972 /* Version of Virtual PC 2007 */
a4127c42
SH
973 footer->major = cpu_to_be16(0x0005);
974 footer->minor = cpu_to_be16(0x0003);
3f3f20dc 975 footer->orig_size = cpu_to_be64(total_size);
03671ded 976 footer->current_size = cpu_to_be64(total_size);
a4127c42 977 footer->cyls = cpu_to_be16(cyls);
24da78db
CA
978 footer->heads = heads;
979 footer->secs_per_cyl = secs_per_cyl;
980
a4127c42 981 footer->type = cpu_to_be32(disk_type);
24da78db 982
1fe1fa51
CA
983#if defined(CONFIG_UUID)
984 uuid_generate(footer->uuid);
985#endif
24da78db 986
a4127c42 987 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
24da78db
CA
988
989 if (disk_type == VHD_DYNAMIC) {
b8f45cdf 990 ret = create_dynamic_disk(blk, buf, total_sectors);
24da78db 991 } else {
b8f45cdf 992 ret = create_fixed_disk(blk, buf, total_size);
24da78db 993 }
0211b9be
JC
994 if (ret < 0) {
995 error_setg(errp, "Unable to create or write VHD header");
996 }
24da78db 997
fec9921f 998out:
b8f45cdf 999 blk_unref(blk);
fec9921f 1000 g_free(disk_type_param);
f0ff243a 1001 return ret;
57c7d9e5
AL
1002}
1003
72c6cc94
KW
1004static int vpc_has_zero_init(BlockDriverState *bs)
1005{
1006 BDRVVPCState *s = bs->opaque;
e54835c0 1007 VHDFooter *footer = (VHDFooter *) s->footer_buf;
72c6cc94 1008
0d4cc3e7 1009 if (be32_to_cpu(footer->type) == VHD_FIXED) {
9a4f4c31 1010 return bdrv_has_zero_init(bs->file->bs);
72c6cc94
KW
1011 } else {
1012 return 1;
1013 }
1014}
1015
6a0f9e82
FB
1016static void vpc_close(BlockDriverState *bs)
1017{
1018 BDRVVPCState *s = bs->opaque;
97f1c45c 1019 qemu_vfree(s->pagetable);
6a0f9e82 1020#ifdef CACHE
7267c094 1021 g_free(s->pageentry_u8);
6a0f9e82 1022#endif
612ff3d8
KW
1023
1024 migrate_del_blocker(s->migration_blocker);
1025 error_free(s->migration_blocker);
6a0f9e82
FB
1026}
1027
fec9921f
CL
1028static QemuOptsList vpc_create_opts = {
1029 .name = "vpc-create-opts",
1030 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1031 .desc = {
1032 {
1033 .name = BLOCK_OPT_SIZE,
1034 .type = QEMU_OPT_SIZE,
1035 .help = "Virtual disk size"
1036 },
1037 {
1038 .name = BLOCK_OPT_SUBFMT,
1039 .type = QEMU_OPT_STRING,
1040 .help =
1041 "Type of virtual hard disk format. Supported formats are "
1042 "{dynamic (default) | fixed} "
1043 },
fb9245c2
JC
1044 {
1045 .name = VPC_OPT_FORCE_SIZE,
1046 .type = QEMU_OPT_BOOL,
1047 .help = "Force disk size calculation to use the actual size "
1048 "specified, rather than using the nearest CHS-based "
1049 "calculation"
1050 },
fec9921f
CL
1051 { /* end of list */ }
1052 }
0e7e1989
KW
1053};
1054
5efa9d5a 1055static BlockDriver bdrv_vpc = {
4a411185
KW
1056 .format_name = "vpc",
1057 .instance_size = sizeof(BDRVVPCState),
c68b89ac 1058
72c6cc94
KW
1059 .bdrv_probe = vpc_probe,
1060 .bdrv_open = vpc_open,
1061 .bdrv_close = vpc_close,
1062 .bdrv_reopen_prepare = vpc_reopen_prepare,
c282e1fd 1063 .bdrv_create = vpc_create,
0e7e1989 1064
d46b7cc6 1065 .bdrv_co_preadv = vpc_co_preadv,
513b0f02 1066 .bdrv_co_pwritev = vpc_co_pwritev,
0cc84887 1067 .bdrv_co_get_block_status = vpc_co_get_block_status,
c68b89ac 1068
97b00e28
PB
1069 .bdrv_get_info = vpc_get_info,
1070
fec9921f 1071 .create_opts = &vpc_create_opts,
72c6cc94 1072 .bdrv_has_zero_init = vpc_has_zero_init,
6a0f9e82 1073};
5efa9d5a
AL
1074
1075static void bdrv_vpc_init(void)
1076{
1077 bdrv_register(&bdrv_vpc);
1078}
1079
1080block_init(bdrv_vpc_init);