]> git.proxmox.com Git - mirror_qemu.git/blame - block/vpc.c
vmdk: Implement .bdrv_co_pwritev() interface
[mirror_qemu.git] / block / vpc.c
CommitLineData
6a0f9e82 1/*
cc2040f8 2 * Block driver for Connectix / Microsoft Virtual PC images
5fafdf24 3 *
6a0f9e82 4 * Copyright (c) 2005 Alex Beregszaszi
15d35bc5 5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
5fafdf24 6 *
6a0f9e82
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
80c71a24 25#include "qemu/osdep.h"
da34e65c 26#include "qapi/error.h"
faf07963 27#include "qemu-common.h"
737e150e 28#include "block/block_int.h"
b8f45cdf 29#include "sysemu/block-backend.h"
1de7afc9 30#include "qemu/module.h"
caf71f86 31#include "migration/migration.h"
1fe1fa51
CA
32#if defined(CONFIG_UUID)
33#include <uuid/uuid.h>
34#endif
6a0f9e82
FB
35
36/**************************************************************/
37
38#define HEADER_SIZE 512
39
40//#define CACHE
41
2cfacb62
AL
42enum vhd_type {
43 VHD_FIXED = 2,
44 VHD_DYNAMIC = 3,
45 VHD_DIFFERENCING = 4,
46};
47
9c057d0b 48/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
57c7d9e5
AL
49#define VHD_TIMESTAMP_BASE 946684800
50
fb9245c2
JC
51#define VHD_CHS_MAX_C 65535LL
52#define VHD_CHS_MAX_H 16
53#define VHD_CHS_MAX_S 255
54
c23fb11b 55#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
fb9245c2
JC
56#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
57
58#define VPC_OPT_FORCE_SIZE "force_size"
97f1c45c 59
9c057d0b 60/* always big-endian */
e54835c0 61typedef struct vhd_footer {
9c057d0b 62 char creator[8]; /* "conectix" */
2cfacb62
AL
63 uint32_t features;
64 uint32_t version;
65
9c057d0b 66 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
67 uint64_t data_offset;
68
9c057d0b 69 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
2cfacb62
AL
70 uint32_t timestamp;
71
9c057d0b 72 char creator_app[4]; /* e.g., "vpc " */
2cfacb62
AL
73 uint16_t major;
74 uint16_t minor;
9c057d0b 75 char creator_os[4]; /* "Wi2k" */
2cfacb62
AL
76
77 uint64_t orig_size;
03671ded 78 uint64_t current_size;
2cfacb62
AL
79
80 uint16_t cyls;
81 uint8_t heads;
82 uint8_t secs_per_cyl;
83
84 uint32_t type;
85
9c057d0b
JC
86 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
87 the bytes in the footer without the checksum field") */
2cfacb62
AL
88 uint32_t checksum;
89
9c057d0b 90 /* UUID used to identify a parent hard disk (backing file) */
2cfacb62
AL
91 uint8_t uuid[16];
92
93 uint8_t in_saved_state;
e54835c0 94} QEMU_PACKED VHDFooter;
b9fa33a6 95
e54835c0 96typedef struct vhd_dyndisk_header {
9c057d0b 97 char magic[8]; /* "cxsparse" */
2cfacb62 98
9c057d0b 99 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
100 uint64_t data_offset;
101
9c057d0b 102 /* Offset of the Block Allocation Table (BAT) */
2cfacb62
AL
103 uint64_t table_offset;
104
105 uint32_t version;
9c057d0b 106 uint32_t max_table_entries; /* 32bit/entry */
2cfacb62 107
9c057d0b 108 /* 2 MB by default, must be a power of two */
2cfacb62
AL
109 uint32_t block_size;
110
111 uint32_t checksum;
112 uint8_t parent_uuid[16];
113 uint32_t parent_timestamp;
114 uint32_t reserved;
115
9c057d0b 116 /* Backing file name (in UTF-16) */
2cfacb62
AL
117 uint8_t parent_name[512];
118
119 struct {
120 uint32_t platform;
121 uint32_t data_space;
122 uint32_t data_length;
123 uint32_t reserved;
124 uint64_t data_offset;
125 } parent_locator[8];
e54835c0 126} QEMU_PACKED VHDDynDiskHeader;
6a0f9e82
FB
127
128typedef struct BDRVVPCState {
848c66e8 129 CoMutex lock;
15d35bc5
AL
130 uint8_t footer_buf[HEADER_SIZE];
131 uint64_t free_data_block_offset;
2cfacb62 132 int max_table_entries;
6a0f9e82 133 uint32_t *pagetable;
15d35bc5
AL
134 uint64_t bat_offset;
135 uint64_t last_bitmap_offset;
6a0f9e82 136
2cfacb62 137 uint32_t block_size;
15d35bc5 138 uint32_t bitmap_size;
c540d53a
JC
139 bool force_use_chs;
140 bool force_use_sz;
15d35bc5 141
6a0f9e82
FB
142#ifdef CACHE
143 uint8_t *pageentry_u8;
144 uint32_t *pageentry_u32;
145 uint16_t *pageentry_u16;
3b46e624 146
6a0f9e82
FB
147 uint64_t last_bitmap;
148#endif
612ff3d8
KW
149
150 Error *migration_blocker;
6a0f9e82
FB
151} BDRVVPCState;
152
c540d53a
JC
153#define VPC_OPT_SIZE_CALC "force_size_calc"
154static QemuOptsList vpc_runtime_opts = {
155 .name = "vpc-runtime-opts",
156 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
157 .desc = {
158 {
159 .name = VPC_OPT_SIZE_CALC,
160 .type = QEMU_OPT_STRING,
161 .help = "Force disk size calculation to use either CHS geometry, "
162 "or use the disk current_size specified in the VHD footer. "
163 "{chs, current_size}"
164 },
165 { /* end of list */ }
166 }
167};
168
57c7d9e5
AL
169static uint32_t vpc_checksum(uint8_t* buf, size_t size)
170{
171 uint32_t res = 0;
172 int i;
173
174 for (i = 0; i < size; i++)
175 res += buf[i];
176
177 return ~res;
178}
179
180
6a0f9e82
FB
181static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
182{
ffe8ab83 183 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
6a0f9e82 184 return 100;
6a0f9e82
FB
185 return 0;
186}
187
c540d53a
JC
188static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
189 Error **errp)
190{
191 BDRVVPCState *s = bs->opaque;
192 const char *size_calc;
193
194 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
195
196 if (!size_calc) {
197 /* no override, use autodetect only */
198 } else if (!strcmp(size_calc, "current_size")) {
199 s->force_use_sz = true;
200 } else if (!strcmp(size_calc, "chs")) {
201 s->force_use_chs = true;
202 } else {
203 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
204 }
205}
206
015a1036
HR
207static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
208 Error **errp)
6a0f9e82
FB
209{
210 BDRVVPCState *s = bs->opaque;
66f82cee 211 int i;
e54835c0
JC
212 VHDFooter *footer;
213 VHDDynDiskHeader *dyndisk_header;
c540d53a
JC
214 QemuOpts *opts = NULL;
215 Error *local_err = NULL;
216 bool use_chs;
b9fa33a6 217 uint8_t buf[HEADER_SIZE];
57c7d9e5 218 uint32_t checksum;
97f1c45c 219 uint64_t computed_size;
b15deac7 220 uint64_t pagetable_size;
24da78db 221 int disk_type = VHD_DYNAMIC;
59294e46 222 int ret;
6a0f9e82 223
c540d53a
JC
224 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
225 qemu_opts_absorb_qdict(opts, options, &local_err);
226 if (local_err) {
227 error_propagate(errp, local_err);
228 ret = -EINVAL;
229 goto fail;
230 }
231
232 vpc_parse_options(bs, opts, &local_err);
233 if (local_err) {
234 error_propagate(errp, local_err);
235 ret = -EINVAL;
236 goto fail;
237 }
238
9a4f4c31 239 ret = bdrv_pread(bs->file->bs, 0, s->footer_buf, HEADER_SIZE);
59294e46 240 if (ret < 0) {
32f6439c 241 error_setg(errp, "Unable to read VHD header");
6a0f9e82 242 goto fail;
59294e46 243 }
6a0f9e82 244
e54835c0 245 footer = (VHDFooter *) s->footer_buf;
24da78db 246 if (strncmp(footer->creator, "conectix", 8)) {
9a4f4c31 247 int64_t offset = bdrv_getlength(bs->file->bs);
59294e46
KW
248 if (offset < 0) {
249 ret = offset;
32f6439c 250 error_setg(errp, "Invalid file size");
59294e46
KW
251 goto fail;
252 } else if (offset < HEADER_SIZE) {
253 ret = -EINVAL;
32f6439c 254 error_setg(errp, "File too small for a VHD header");
24da78db
CA
255 goto fail;
256 }
59294e46 257
24da78db 258 /* If a fixed disk, the footer is found only at the end of the file */
9a4f4c31 259 ret = bdrv_pread(bs->file->bs, offset-HEADER_SIZE, s->footer_buf,
59294e46
KW
260 HEADER_SIZE);
261 if (ret < 0) {
24da78db
CA
262 goto fail;
263 }
264 if (strncmp(footer->creator, "conectix", 8)) {
76abe407
PB
265 error_setg(errp, "invalid VPC image");
266 ret = -EINVAL;
24da78db
CA
267 goto fail;
268 }
269 disk_type = VHD_FIXED;
270 }
6a0f9e82 271
57c7d9e5
AL
272 checksum = be32_to_cpu(footer->checksum);
273 footer->checksum = 0;
274 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
275 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
66f82cee 276 "incorrect.\n", bs->filename);
57c7d9e5 277
c088b691 278 /* Write 'checksum' back to footer, or else will leave it with zero. */
a4127c42 279 footer->checksum = cpu_to_be32(checksum);
c088b691 280
9c057d0b
JC
281 /* The visible size of a image in Virtual PC depends on the geometry
282 rather than on the size stored in the footer (the size in the footer
283 is too large usually) */
33ccf667
SH
284 bs->total_sectors = (int64_t)
285 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
1fa79228 286
c540d53a
JC
287 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
288 * VHD image sizes differently. VPC will rely on CHS geometry,
289 * while Hyper-V and disk2vhd use the size specified in the footer.
290 *
291 * We use a couple of approaches to try and determine the correct method:
292 * look at the Creator App field, and look for images that have CHS
293 * geometry that is the maximum value.
294 *
295 * If the CHS geometry is the maximum CHS geometry, then we assume that
296 * the size is the footer->current_size to avoid truncation. Otherwise,
297 * we follow the table based on footer->creator_app:
298 *
299 * Known creator apps:
300 * 'vpc ' : CHS Virtual PC (uses disk geometry)
301 * 'qemu' : CHS QEMU (uses disk geometry)
fb9245c2 302 * 'qem2' : current_size QEMU (uses current_size)
c540d53a
JC
303 * 'win ' : current_size Hyper-V
304 * 'd2v ' : current_size Disk2vhd
9bdfb9e8 305 * 'tap\0' : current_size XenServer
bab246db 306 * 'CTXS' : current_size XenConverter
c540d53a
JC
307 *
308 * The user can override the table values via drive options, however
309 * even with an override we will still use current_size for images
310 * that have CHS geometry of the maximum size.
311 */
312 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
fb9245c2 313 !!strncmp(footer->creator_app, "qem2", 4) &&
9bdfb9e8 314 !!strncmp(footer->creator_app, "d2v ", 4) &&
bab246db 315 !!strncmp(footer->creator_app, "CTXS", 4) &&
9bdfb9e8 316 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
c540d53a
JC
317
318 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
03671ded 319 bs->total_sectors = be64_to_cpu(footer->current_size) /
c540d53a 320 BDRV_SECTOR_SIZE;
0173e7bb
PL
321 }
322
c23fb11b
JC
323 /* Allow a maximum disk size of 2040 GiB */
324 if (bs->total_sectors > VHD_MAX_SECTORS) {
59294e46 325 ret = -EFBIG;
efc8243d
SH
326 goto fail;
327 }
328
24da78db 329 if (disk_type == VHD_DYNAMIC) {
9a4f4c31 330 ret = bdrv_pread(bs->file->bs, be64_to_cpu(footer->data_offset), buf,
59294e46
KW
331 HEADER_SIZE);
332 if (ret < 0) {
32f6439c 333 error_setg(errp, "Error reading dynamic VHD header");
24da78db
CA
334 goto fail;
335 }
b9fa33a6 336
e54835c0 337 dyndisk_header = (VHDDynDiskHeader *) buf;
6a0f9e82 338
24da78db 339 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
32f6439c 340 error_setg(errp, "Invalid header magic");
59294e46 341 ret = -EINVAL;
24da78db
CA
342 goto fail;
343 }
6a0f9e82 344
24da78db 345 s->block_size = be32_to_cpu(dyndisk_header->block_size);
5e71dfad
KW
346 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
347 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
348 ret = -EINVAL;
349 goto fail;
350 }
24da78db 351 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
15d35bc5 352
24da78db 353 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
97f1c45c
JC
354
355 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
32f6439c 356 error_setg(errp, "Too many blocks");
97f1c45c
JC
357 ret = -EINVAL;
358 goto fail;
359 }
97f1c45c
JC
360
361 computed_size = (uint64_t) s->max_table_entries * s->block_size;
362 if (computed_size < bs->total_sectors * 512) {
32f6439c 363 error_setg(errp, "Page table too small");
97f1c45c
JC
364 ret = -EINVAL;
365 goto fail;
366 }
367
b15deac7
JC
368 if (s->max_table_entries > SIZE_MAX / 4 ||
369 s->max_table_entries > (int) INT_MAX / 4) {
370 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
371 s->max_table_entries);
372 ret = -EINVAL;
373 goto fail;
374 }
375
376 pagetable_size = (uint64_t) s->max_table_entries * 4;
377
9a4f4c31 378 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
5fb09cd5 379 if (s->pagetable == NULL) {
32f6439c 380 error_setg(errp, "Unable to allocate memory for page table");
5fb09cd5
KW
381 ret = -ENOMEM;
382 goto fail;
383 }
b71d1c2e 384
24da78db 385 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
59294e46 386
9a4f4c31
KW
387 ret = bdrv_pread(bs->file->bs, s->bat_offset, s->pagetable,
388 pagetable_size);
59294e46 389 if (ret < 0) {
32f6439c 390 error_setg(errp, "Error reading pagetable");
24da78db
CA
391 goto fail;
392 }
b71d1c2e 393
24da78db 394 s->free_data_block_offset =
b15deac7 395 ROUND_UP(s->bat_offset + pagetable_size, 512);
15d35bc5 396
24da78db
CA
397 for (i = 0; i < s->max_table_entries; i++) {
398 be32_to_cpus(&s->pagetable[i]);
399 if (s->pagetable[i] != 0xFFFFFFFF) {
400 int64_t next = (512 * (int64_t) s->pagetable[i]) +
401 s->bitmap_size + s->block_size;
15d35bc5 402
24da78db
CA
403 if (next > s->free_data_block_offset) {
404 s->free_data_block_offset = next;
405 }
406 }
15d35bc5 407 }
15d35bc5 408
9a4f4c31 409 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
fb8fe35f
PL
410 error_setg(errp, "block-vpc: free_data_block_offset points after "
411 "the end of file. The image has been truncated.");
412 ret = -EINVAL;
413 goto fail;
414 }
415
24da78db 416 s->last_bitmap_offset = (int64_t) -1;
6a0f9e82 417
6a0f9e82 418#ifdef CACHE
24da78db
CA
419 s->pageentry_u8 = g_malloc(512);
420 s->pageentry_u32 = s->pageentry_u8;
421 s->pageentry_u16 = s->pageentry_u8;
422 s->last_pagetable = -1;
6a0f9e82 423#endif
24da78db 424 }
6a0f9e82 425
848c66e8 426 qemu_co_mutex_init(&s->lock);
612ff3d8
KW
427
428 /* Disable migration when VHD images are used */
81e5f78a
AG
429 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
430 "does not support live migration",
431 bdrv_get_device_or_node_name(bs));
612ff3d8
KW
432 migrate_add_blocker(s->migration_blocker);
433
6a0f9e82 434 return 0;
59294e46
KW
435
436fail:
97f1c45c 437 qemu_vfree(s->pagetable);
59294e46
KW
438#ifdef CACHE
439 g_free(s->pageentry_u8);
440#endif
441 return ret;
6a0f9e82
FB
442}
443
3fe4b700
JC
444static int vpc_reopen_prepare(BDRVReopenState *state,
445 BlockReopenQueue *queue, Error **errp)
446{
447 return 0;
448}
449
b71d1c2e
AL
450/*
451 * Returns the absolute byte offset of the given sector in the image file.
452 * If the sector is not allocated, -1 is returned instead.
15d35bc5
AL
453 *
454 * The parameter write must be 1 if the offset will be used for a write
455 * operation (the block bitmaps is updated then), 0 otherwise.
b71d1c2e 456 */
15d35bc5
AL
457static inline int64_t get_sector_offset(BlockDriverState *bs,
458 int64_t sector_num, int write)
6a0f9e82
FB
459{
460 BDRVVPCState *s = bs->opaque;
461 uint64_t offset = sector_num * 512;
462 uint64_t bitmap_offset, block_offset;
463 uint32_t pagetable_index, pageentry_index;
464
2cfacb62
AL
465 pagetable_index = offset / s->block_size;
466 pageentry_index = (offset % s->block_size) / 512;
3b46e624 467
15d35bc5 468 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
9c057d0b 469 return -1; /* not allocated */
6a0f9e82 470
378e2aea 471 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
15d35bc5
AL
472 block_offset = bitmap_offset + s->bitmap_size + (512 * pageentry_index);
473
9c057d0b
JC
474 /* We must ensure that we don't write to any sectors which are marked as
475 unused in the bitmap. We get away with setting all bits in the block
476 bitmap each time we write to a new block. This might cause Virtual PC to
477 miss sparse read optimization, but it's not a problem in terms of
478 correctness. */
15d35bc5
AL
479 if (write && (s->last_bitmap_offset != bitmap_offset)) {
480 uint8_t bitmap[s->bitmap_size];
481
482 s->last_bitmap_offset = bitmap_offset;
483 memset(bitmap, 0xff, s->bitmap_size);
9a4f4c31 484 bdrv_pwrite_sync(bs->file->bs, bitmap_offset, bitmap, s->bitmap_size);
15d35bc5 485 }
3b46e624 486
b71d1c2e 487 return block_offset;
6a0f9e82
FB
488}
489
15d35bc5
AL
490/*
491 * Writes the footer to the end of the image file. This is needed when the
492 * file grows as it overwrites the old footer
493 *
494 * Returns 0 on success and < 0 on error
495 */
496static int rewrite_footer(BlockDriverState* bs)
497{
498 int ret;
499 BDRVVPCState *s = bs->opaque;
500 int64_t offset = s->free_data_block_offset;
501
9a4f4c31 502 ret = bdrv_pwrite_sync(bs->file->bs, offset, s->footer_buf, HEADER_SIZE);
15d35bc5
AL
503 if (ret < 0)
504 return ret;
505
506 return 0;
507}
508
509/*
510 * Allocates a new block. This involves writing a new footer and updating
511 * the Block Allocation Table to use the space at the old end of the image
512 * file (overwriting the old footer)
513 *
514 * Returns the sectors' offset in the image file on success and < 0 on error
515 */
516static int64_t alloc_block(BlockDriverState* bs, int64_t sector_num)
517{
518 BDRVVPCState *s = bs->opaque;
519 int64_t bat_offset;
520 uint32_t index, bat_value;
521 int ret;
522 uint8_t bitmap[s->bitmap_size];
523
9c057d0b 524 /* Check if sector_num is valid */
15d35bc5
AL
525 if ((sector_num < 0) || (sector_num > bs->total_sectors))
526 return -1;
527
9c057d0b 528 /* Write entry into in-memory BAT */
15d35bc5
AL
529 index = (sector_num * 512) / s->block_size;
530 if (s->pagetable[index] != 0xFFFFFFFF)
531 return -1;
532
533 s->pagetable[index] = s->free_data_block_offset / 512;
534
9c057d0b 535 /* Initialize the block's bitmap */
15d35bc5 536 memset(bitmap, 0xff, s->bitmap_size);
9a4f4c31 537 ret = bdrv_pwrite_sync(bs->file->bs, s->free_data_block_offset, bitmap,
078a458e 538 s->bitmap_size);
5bb1cbac
KW
539 if (ret < 0) {
540 return ret;
541 }
15d35bc5 542
9c057d0b 543 /* Write new footer (the old one will be overwritten) */
15d35bc5
AL
544 s->free_data_block_offset += s->block_size + s->bitmap_size;
545 ret = rewrite_footer(bs);
546 if (ret < 0)
547 goto fail;
548
9c057d0b 549 /* Write BAT entry to disk */
15d35bc5 550 bat_offset = s->bat_offset + (4 * index);
a4127c42 551 bat_value = cpu_to_be32(s->pagetable[index]);
9a4f4c31 552 ret = bdrv_pwrite_sync(bs->file->bs, bat_offset, &bat_value, 4);
15d35bc5
AL
553 if (ret < 0)
554 goto fail;
555
556 return get_sector_offset(bs, sector_num, 0);
557
558fail:
559 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
560 return -1;
561}
562
97b00e28
PB
563static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
564{
565 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
566 VHDFooter *footer = (VHDFooter *) s->footer_buf;
567
0d4cc3e7 568 if (be32_to_cpu(footer->type) != VHD_FIXED) {
97b00e28
PB
569 bdi->cluster_size = s->block_size;
570 }
571
95de6d70 572 bdi->unallocated_blocks_are_zero = true;
97b00e28
PB
573 return 0;
574}
575
5fafdf24 576static int vpc_read(BlockDriverState *bs, int64_t sector_num,
6a0f9e82
FB
577 uint8_t *buf, int nb_sectors)
578{
6c6ea921 579 BDRVVPCState *s = bs->opaque;
6a0f9e82 580 int ret;
b71d1c2e 581 int64_t offset;
6c6ea921 582 int64_t sectors, sectors_per_block;
e54835c0 583 VHDFooter *footer = (VHDFooter *) s->footer_buf;
6a0f9e82 584
0d4cc3e7 585 if (be32_to_cpu(footer->type) == VHD_FIXED) {
9a4f4c31 586 return bdrv_read(bs->file->bs, sector_num, buf, nb_sectors);
24da78db 587 }
6a0f9e82 588 while (nb_sectors > 0) {
15d35bc5 589 offset = get_sector_offset(bs, sector_num, 0);
b71d1c2e 590
6c6ea921
KW
591 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
592 sectors = sectors_per_block - (sector_num % sectors_per_block);
593 if (sectors > nb_sectors) {
594 sectors = nb_sectors;
595 }
596
b71d1c2e 597 if (offset == -1) {
6c6ea921 598 memset(buf, 0, sectors * BDRV_SECTOR_SIZE);
b71d1c2e 599 } else {
9a4f4c31 600 ret = bdrv_pread(bs->file->bs, offset, buf,
6c6ea921
KW
601 sectors * BDRV_SECTOR_SIZE);
602 if (ret != sectors * BDRV_SECTOR_SIZE) {
b71d1c2e 603 return -1;
6c6ea921 604 }
b71d1c2e
AL
605 }
606
6c6ea921
KW
607 nb_sectors -= sectors;
608 sector_num += sectors;
609 buf += sectors * BDRV_SECTOR_SIZE;
6a0f9e82
FB
610 }
611 return 0;
612}
613
2914caa0
PB
614static coroutine_fn int vpc_co_read(BlockDriverState *bs, int64_t sector_num,
615 uint8_t *buf, int nb_sectors)
616{
617 int ret;
618 BDRVVPCState *s = bs->opaque;
619 qemu_co_mutex_lock(&s->lock);
620 ret = vpc_read(bs, sector_num, buf, nb_sectors);
621 qemu_co_mutex_unlock(&s->lock);
622 return ret;
623}
624
15d35bc5
AL
625static int vpc_write(BlockDriverState *bs, int64_t sector_num,
626 const uint8_t *buf, int nb_sectors)
627{
6c6ea921 628 BDRVVPCState *s = bs->opaque;
15d35bc5 629 int64_t offset;
6c6ea921 630 int64_t sectors, sectors_per_block;
15d35bc5 631 int ret;
e54835c0 632 VHDFooter *footer = (VHDFooter *) s->footer_buf;
15d35bc5 633
0d4cc3e7 634 if (be32_to_cpu(footer->type) == VHD_FIXED) {
9a4f4c31 635 return bdrv_write(bs->file->bs, sector_num, buf, nb_sectors);
24da78db 636 }
15d35bc5
AL
637 while (nb_sectors > 0) {
638 offset = get_sector_offset(bs, sector_num, 1);
639
6c6ea921
KW
640 sectors_per_block = s->block_size >> BDRV_SECTOR_BITS;
641 sectors = sectors_per_block - (sector_num % sectors_per_block);
642 if (sectors > nb_sectors) {
643 sectors = nb_sectors;
644 }
645
15d35bc5
AL
646 if (offset == -1) {
647 offset = alloc_block(bs, sector_num);
648 if (offset < 0)
649 return -1;
650 }
651
9a4f4c31
KW
652 ret = bdrv_pwrite(bs->file->bs, offset, buf,
653 sectors * BDRV_SECTOR_SIZE);
6c6ea921 654 if (ret != sectors * BDRV_SECTOR_SIZE) {
15d35bc5 655 return -1;
6c6ea921 656 }
15d35bc5 657
6c6ea921
KW
658 nb_sectors -= sectors;
659 sector_num += sectors;
660 buf += sectors * BDRV_SECTOR_SIZE;
15d35bc5
AL
661 }
662
663 return 0;
664}
665
e183ef75
PB
666static coroutine_fn int vpc_co_write(BlockDriverState *bs, int64_t sector_num,
667 const uint8_t *buf, int nb_sectors)
668{
669 int ret;
670 BDRVVPCState *s = bs->opaque;
671 qemu_co_mutex_lock(&s->lock);
672 ret = vpc_write(bs, sector_num, buf, nb_sectors);
673 qemu_co_mutex_unlock(&s->lock);
674 return ret;
675}
676
0cc84887 677static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
67a0fd2a 678 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
0cc84887
KW
679{
680 BDRVVPCState *s = bs->opaque;
681 VHDFooter *footer = (VHDFooter*) s->footer_buf;
2ec711dc 682 int64_t start, offset;
0cc84887
KW
683 bool allocated;
684 int n;
685
686 if (be32_to_cpu(footer->type) == VHD_FIXED) {
687 *pnum = nb_sectors;
7429e207 688 *file = bs->file->bs;
0cc84887
KW
689 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
690 (sector_num << BDRV_SECTOR_BITS);
691 }
692
693 offset = get_sector_offset(bs, sector_num, 0);
694 start = offset;
695 allocated = (offset != -1);
696 *pnum = 0;
697
698 do {
699 /* All sectors in a block are contiguous (without using the bitmap) */
700 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
701 - sector_num;
702 n = MIN(n, nb_sectors);
703
704 *pnum += n;
705 sector_num += n;
706 nb_sectors -= n;
2ec711dc
PL
707 /* *pnum can't be greater than one block for allocated
708 * sectors since there is always a bitmap in between. */
709 if (allocated) {
7429e207 710 *file = bs->file->bs;
2ec711dc
PL
711 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
712 }
0cc84887
KW
713 if (nb_sectors == 0) {
714 break;
715 }
0cc84887 716 offset = get_sector_offset(bs, sector_num, 0);
2ec711dc 717 } while (offset == -1);
0cc84887 718
2ec711dc 719 return 0;
0cc84887
KW
720}
721
57c7d9e5
AL
722/*
723 * Calculates the number of cylinders, heads and sectors per cylinder
724 * based on a given number of sectors. This is the algorithm described
725 * in the VHD specification.
726 *
727 * Note that the geometry doesn't always exactly match total_sectors but
728 * may round it down.
6e9ea0c0 729 *
c23fb11b 730 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
258d2edb
CA
731 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
732 * and instead allow up to 255 heads.
57c7d9e5 733 */
6e9ea0c0 734static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
57c7d9e5
AL
735 uint8_t* heads, uint8_t* secs_per_cyl)
736{
737 uint32_t cyls_times_heads;
738
690cbb09 739 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
57c7d9e5 740
690cbb09 741 if (total_sectors >= 65535LL * 16 * 63) {
57c7d9e5 742 *secs_per_cyl = 255;
690cbb09 743 *heads = 16;
57c7d9e5
AL
744 cyls_times_heads = total_sectors / *secs_per_cyl;
745 } else {
746 *secs_per_cyl = 17;
747 cyls_times_heads = total_sectors / *secs_per_cyl;
748 *heads = (cyls_times_heads + 1023) / 1024;
749
690cbb09 750 if (*heads < 4) {
57c7d9e5 751 *heads = 4;
690cbb09 752 }
57c7d9e5
AL
753
754 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
755 *secs_per_cyl = 31;
756 *heads = 16;
757 cyls_times_heads = total_sectors / *secs_per_cyl;
758 }
759
760 if (cyls_times_heads >= (*heads * 1024)) {
761 *secs_per_cyl = 63;
762 *heads = 16;
763 cyls_times_heads = total_sectors / *secs_per_cyl;
764 }
765 }
766
dede4188 767 *cyls = cyls_times_heads / *heads;
6e9ea0c0
AJ
768
769 return 0;
57c7d9e5
AL
770}
771
b8f45cdf 772static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 773 int64_t total_sectors)
57c7d9e5 774{
e54835c0
JC
775 VHDDynDiskHeader *dyndisk_header =
776 (VHDDynDiskHeader *) buf;
57c7d9e5 777 size_t block_size, num_bat_entries;
24da78db 778 int i;
fef6070e
JC
779 int ret;
780 int64_t offset = 0;
57c7d9e5 781
9c057d0b 782 /* Write the footer (twice: at the beginning and at the end) */
57c7d9e5
AL
783 block_size = 0x200000;
784 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
785
b8f45cdf 786 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
40a99aac 787 if (ret < 0) {
f0ff243a
BS
788 goto fail;
789 }
57c7d9e5 790
fef6070e 791 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
b8f45cdf 792 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE);
fef6070e 793 if (ret < 0) {
f0ff243a
BS
794 goto fail;
795 }
57c7d9e5 796
9c057d0b 797 /* Write the initial BAT */
fef6070e 798 offset = 3 * 512;
57c7d9e5
AL
799
800 memset(buf, 0xFF, 512);
f0ff243a 801 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
b8f45cdf 802 ret = blk_pwrite(blk, offset, buf, 512);
fef6070e 803 if (ret < 0) {
f0ff243a
BS
804 goto fail;
805 }
fef6070e 806 offset += 512;
f0ff243a 807 }
57c7d9e5 808
9c057d0b 809 /* Prepare the Dynamic Disk Header */
57c7d9e5
AL
810 memset(buf, 0, 1024);
811
5ec4d682 812 memcpy(dyndisk_header->magic, "cxsparse", 8);
57c7d9e5 813
78439f6a
CA
814 /*
815 * Note: The spec is actually wrong here for data_offset, it says
816 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
817 */
a4127c42
SH
818 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
819 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
820 dyndisk_header->version = cpu_to_be32(0x00010000);
821 dyndisk_header->block_size = cpu_to_be32(block_size);
822 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
57c7d9e5 823
a4127c42 824 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
57c7d9e5 825
9c057d0b 826 /* Write the header */
fef6070e 827 offset = 512;
57c7d9e5 828
b8f45cdf 829 ret = blk_pwrite(blk, offset, buf, 1024);
fef6070e 830 if (ret < 0) {
f0ff243a
BS
831 goto fail;
832 }
f0ff243a 833
24da78db
CA
834 fail:
835 return ret;
836}
837
b8f45cdf 838static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 839 int64_t total_size)
24da78db 840{
fef6070e 841 int ret;
24da78db
CA
842
843 /* Add footer to total size */
fef6070e
JC
844 total_size += HEADER_SIZE;
845
b8f45cdf 846 ret = blk_truncate(blk, total_size);
fef6070e
JC
847 if (ret < 0) {
848 return ret;
24da78db
CA
849 }
850
b8f45cdf 851 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE);
fef6070e
JC
852 if (ret < 0) {
853 return ret;
854 }
24da78db 855
24da78db
CA
856 return ret;
857}
858
fec9921f 859static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
24da78db
CA
860{
861 uint8_t buf[1024];
e54835c0 862 VHDFooter *footer = (VHDFooter *) buf;
fec9921f 863 char *disk_type_param;
fef6070e 864 int i;
24da78db
CA
865 uint16_t cyls = 0;
866 uint8_t heads = 0;
867 uint8_t secs_per_cyl = 0;
868 int64_t total_sectors;
869 int64_t total_size;
870 int disk_type;
871 int ret = -EIO;
fb9245c2 872 bool force_size;
fef6070e 873 Error *local_err = NULL;
b8f45cdf 874 BlockBackend *blk = NULL;
24da78db
CA
875
876 /* Read out options */
c2eb918e
HT
877 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
878 BDRV_SECTOR_SIZE);
fec9921f
CL
879 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
880 if (disk_type_param) {
881 if (!strcmp(disk_type_param, "dynamic")) {
24da78db 882 disk_type = VHD_DYNAMIC;
fec9921f 883 } else if (!strcmp(disk_type_param, "fixed")) {
24da78db
CA
884 disk_type = VHD_FIXED;
885 } else {
0211b9be 886 error_setg(errp, "Invalid disk type, %s", disk_type_param);
fec9921f
CL
887 ret = -EINVAL;
888 goto out;
24da78db
CA
889 }
890 } else {
891 disk_type = VHD_DYNAMIC;
892 }
893
fb9245c2
JC
894 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
895
fef6070e
JC
896 ret = bdrv_create_file(filename, opts, &local_err);
897 if (ret < 0) {
898 error_propagate(errp, local_err);
fec9921f 899 goto out;
24da78db 900 }
b8f45cdf 901
efaa7c4e 902 blk = blk_new_open(filename, NULL, NULL,
72e775c7 903 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
b8f45cdf 904 if (blk == NULL) {
fef6070e 905 error_propagate(errp, local_err);
b8f45cdf 906 ret = -EIO;
fef6070e 907 goto out;
4ab15590
CL
908 }
909
b8f45cdf
KW
910 blk_set_allow_write_beyond_eof(blk, true);
911
ecd880d9
KW
912 /*
913 * Calculate matching total_size and geometry. Increase the number of
914 * sectors requested until we get enough (or fail). This ensures that
915 * qemu-img convert doesn't truncate images, but rather rounds up.
690cbb09 916 *
fb9245c2 917 * If the image size can't be represented by a spec conformant CHS geometry,
690cbb09
PL
918 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
919 * the image size from the VHD footer to calculate total_sectors.
ecd880d9 920 */
fb9245c2
JC
921 if (force_size) {
922 /* This will force the use of total_size for sector count, below */
923 cyls = VHD_CHS_MAX_C;
924 heads = VHD_CHS_MAX_H;
925 secs_per_cyl = VHD_CHS_MAX_S;
926 } else {
927 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
928 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
929 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
930 }
690cbb09
PL
931 }
932
933 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
934 total_sectors = total_size / BDRV_SECTOR_SIZE;
c23fb11b 935 /* Allow a maximum disk size of 2040 GiB */
690cbb09 936 if (total_sectors > VHD_MAX_SECTORS) {
0211b9be 937 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
24da78db 938 ret = -EFBIG;
fef6070e 939 goto out;
24da78db 940 }
690cbb09
PL
941 } else {
942 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
943 total_size = total_sectors * BDRV_SECTOR_SIZE;
24da78db 944 }
ecd880d9 945
24da78db
CA
946 /* Prepare the Hard Disk Footer */
947 memset(buf, 0, 1024);
948
949 memcpy(footer->creator, "conectix", 8);
fb9245c2
JC
950 if (force_size) {
951 memcpy(footer->creator_app, "qem2", 4);
952 } else {
953 memcpy(footer->creator_app, "qemu", 4);
954 }
24da78db
CA
955 memcpy(footer->creator_os, "Wi2k", 4);
956
a4127c42
SH
957 footer->features = cpu_to_be32(0x02);
958 footer->version = cpu_to_be32(0x00010000);
24da78db 959 if (disk_type == VHD_DYNAMIC) {
a4127c42 960 footer->data_offset = cpu_to_be64(HEADER_SIZE);
24da78db 961 } else {
a4127c42 962 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
24da78db 963 }
a4127c42 964 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
24da78db
CA
965
966 /* Version of Virtual PC 2007 */
a4127c42
SH
967 footer->major = cpu_to_be16(0x0005);
968 footer->minor = cpu_to_be16(0x0003);
3f3f20dc 969 footer->orig_size = cpu_to_be64(total_size);
03671ded 970 footer->current_size = cpu_to_be64(total_size);
a4127c42 971 footer->cyls = cpu_to_be16(cyls);
24da78db
CA
972 footer->heads = heads;
973 footer->secs_per_cyl = secs_per_cyl;
974
a4127c42 975 footer->type = cpu_to_be32(disk_type);
24da78db 976
1fe1fa51
CA
977#if defined(CONFIG_UUID)
978 uuid_generate(footer->uuid);
979#endif
24da78db 980
a4127c42 981 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
24da78db
CA
982
983 if (disk_type == VHD_DYNAMIC) {
b8f45cdf 984 ret = create_dynamic_disk(blk, buf, total_sectors);
24da78db 985 } else {
b8f45cdf 986 ret = create_fixed_disk(blk, buf, total_size);
24da78db 987 }
0211b9be
JC
988 if (ret < 0) {
989 error_setg(errp, "Unable to create or write VHD header");
990 }
24da78db 991
fec9921f 992out:
b8f45cdf 993 blk_unref(blk);
fec9921f 994 g_free(disk_type_param);
f0ff243a 995 return ret;
57c7d9e5
AL
996}
997
72c6cc94
KW
998static int vpc_has_zero_init(BlockDriverState *bs)
999{
1000 BDRVVPCState *s = bs->opaque;
e54835c0 1001 VHDFooter *footer = (VHDFooter *) s->footer_buf;
72c6cc94 1002
0d4cc3e7 1003 if (be32_to_cpu(footer->type) == VHD_FIXED) {
9a4f4c31 1004 return bdrv_has_zero_init(bs->file->bs);
72c6cc94
KW
1005 } else {
1006 return 1;
1007 }
1008}
1009
6a0f9e82
FB
1010static void vpc_close(BlockDriverState *bs)
1011{
1012 BDRVVPCState *s = bs->opaque;
97f1c45c 1013 qemu_vfree(s->pagetable);
6a0f9e82 1014#ifdef CACHE
7267c094 1015 g_free(s->pageentry_u8);
6a0f9e82 1016#endif
612ff3d8
KW
1017
1018 migrate_del_blocker(s->migration_blocker);
1019 error_free(s->migration_blocker);
6a0f9e82
FB
1020}
1021
fec9921f
CL
1022static QemuOptsList vpc_create_opts = {
1023 .name = "vpc-create-opts",
1024 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1025 .desc = {
1026 {
1027 .name = BLOCK_OPT_SIZE,
1028 .type = QEMU_OPT_SIZE,
1029 .help = "Virtual disk size"
1030 },
1031 {
1032 .name = BLOCK_OPT_SUBFMT,
1033 .type = QEMU_OPT_STRING,
1034 .help =
1035 "Type of virtual hard disk format. Supported formats are "
1036 "{dynamic (default) | fixed} "
1037 },
fb9245c2
JC
1038 {
1039 .name = VPC_OPT_FORCE_SIZE,
1040 .type = QEMU_OPT_BOOL,
1041 .help = "Force disk size calculation to use the actual size "
1042 "specified, rather than using the nearest CHS-based "
1043 "calculation"
1044 },
fec9921f
CL
1045 { /* end of list */ }
1046 }
0e7e1989
KW
1047};
1048
5efa9d5a 1049static BlockDriver bdrv_vpc = {
4a411185
KW
1050 .format_name = "vpc",
1051 .instance_size = sizeof(BDRVVPCState),
c68b89ac 1052
72c6cc94
KW
1053 .bdrv_probe = vpc_probe,
1054 .bdrv_open = vpc_open,
1055 .bdrv_close = vpc_close,
1056 .bdrv_reopen_prepare = vpc_reopen_prepare,
c282e1fd 1057 .bdrv_create = vpc_create,
0e7e1989 1058
0cc84887
KW
1059 .bdrv_read = vpc_co_read,
1060 .bdrv_write = vpc_co_write,
1061 .bdrv_co_get_block_status = vpc_co_get_block_status,
c68b89ac 1062
97b00e28
PB
1063 .bdrv_get_info = vpc_get_info,
1064
fec9921f 1065 .create_opts = &vpc_create_opts,
72c6cc94 1066 .bdrv_has_zero_init = vpc_has_zero_init,
6a0f9e82 1067};
5efa9d5a
AL
1068
1069static void bdrv_vpc_init(void)
1070{
1071 bdrv_register(&bdrv_vpc);
1072}
1073
1074block_init(bdrv_vpc_init);