]> git.proxmox.com Git - mirror_qemu.git/blame - block/vpc.c
Merge remote-tracking branch 'sstabellini/tags/xen-20161122-tag' into staging
[mirror_qemu.git] / block / vpc.c
CommitLineData
6a0f9e82 1/*
cc2040f8 2 * Block driver for Connectix / Microsoft Virtual PC images
5fafdf24 3 *
6a0f9e82 4 * Copyright (c) 2005 Alex Beregszaszi
15d35bc5 5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
5fafdf24 6 *
6a0f9e82
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
80c71a24 25#include "qemu/osdep.h"
da34e65c 26#include "qapi/error.h"
faf07963 27#include "qemu-common.h"
737e150e 28#include "block/block_int.h"
b8f45cdf 29#include "sysemu/block-backend.h"
1de7afc9 30#include "qemu/module.h"
caf71f86 31#include "migration/migration.h"
58369e22 32#include "qemu/bswap.h"
38440a21 33#include "qemu/uuid.h"
6a0f9e82
FB
34
35/**************************************************************/
36
37#define HEADER_SIZE 512
38
39//#define CACHE
40
2cfacb62
AL
41enum vhd_type {
42 VHD_FIXED = 2,
43 VHD_DYNAMIC = 3,
44 VHD_DIFFERENCING = 4,
45};
46
9c057d0b 47/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
57c7d9e5
AL
48#define VHD_TIMESTAMP_BASE 946684800
49
fb9245c2
JC
50#define VHD_CHS_MAX_C 65535LL
51#define VHD_CHS_MAX_H 16
52#define VHD_CHS_MAX_S 255
53
c23fb11b 54#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
fb9245c2
JC
55#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
56
57#define VPC_OPT_FORCE_SIZE "force_size"
97f1c45c 58
9c057d0b 59/* always big-endian */
e54835c0 60typedef struct vhd_footer {
9c057d0b 61 char creator[8]; /* "conectix" */
2cfacb62
AL
62 uint32_t features;
63 uint32_t version;
64
9c057d0b 65 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
66 uint64_t data_offset;
67
9c057d0b 68 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
2cfacb62
AL
69 uint32_t timestamp;
70
9c057d0b 71 char creator_app[4]; /* e.g., "vpc " */
2cfacb62
AL
72 uint16_t major;
73 uint16_t minor;
9c057d0b 74 char creator_os[4]; /* "Wi2k" */
2cfacb62
AL
75
76 uint64_t orig_size;
03671ded 77 uint64_t current_size;
2cfacb62
AL
78
79 uint16_t cyls;
80 uint8_t heads;
81 uint8_t secs_per_cyl;
82
83 uint32_t type;
84
9c057d0b
JC
85 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
86 the bytes in the footer without the checksum field") */
2cfacb62
AL
87 uint32_t checksum;
88
9c057d0b 89 /* UUID used to identify a parent hard disk (backing file) */
38440a21 90 QemuUUID uuid;
2cfacb62
AL
91
92 uint8_t in_saved_state;
e54835c0 93} QEMU_PACKED VHDFooter;
b9fa33a6 94
e54835c0 95typedef struct vhd_dyndisk_header {
9c057d0b 96 char magic[8]; /* "cxsparse" */
2cfacb62 97
9c057d0b 98 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
99 uint64_t data_offset;
100
9c057d0b 101 /* Offset of the Block Allocation Table (BAT) */
2cfacb62
AL
102 uint64_t table_offset;
103
104 uint32_t version;
9c057d0b 105 uint32_t max_table_entries; /* 32bit/entry */
2cfacb62 106
9c057d0b 107 /* 2 MB by default, must be a power of two */
2cfacb62
AL
108 uint32_t block_size;
109
110 uint32_t checksum;
111 uint8_t parent_uuid[16];
112 uint32_t parent_timestamp;
113 uint32_t reserved;
114
9c057d0b 115 /* Backing file name (in UTF-16) */
2cfacb62
AL
116 uint8_t parent_name[512];
117
118 struct {
119 uint32_t platform;
120 uint32_t data_space;
121 uint32_t data_length;
122 uint32_t reserved;
123 uint64_t data_offset;
124 } parent_locator[8];
e54835c0 125} QEMU_PACKED VHDDynDiskHeader;
6a0f9e82
FB
126
127typedef struct BDRVVPCState {
848c66e8 128 CoMutex lock;
15d35bc5
AL
129 uint8_t footer_buf[HEADER_SIZE];
130 uint64_t free_data_block_offset;
2cfacb62 131 int max_table_entries;
6a0f9e82 132 uint32_t *pagetable;
15d35bc5
AL
133 uint64_t bat_offset;
134 uint64_t last_bitmap_offset;
6a0f9e82 135
2cfacb62 136 uint32_t block_size;
15d35bc5 137 uint32_t bitmap_size;
c540d53a
JC
138 bool force_use_chs;
139 bool force_use_sz;
15d35bc5 140
6a0f9e82
FB
141#ifdef CACHE
142 uint8_t *pageentry_u8;
143 uint32_t *pageentry_u32;
144 uint16_t *pageentry_u16;
3b46e624 145
6a0f9e82
FB
146 uint64_t last_bitmap;
147#endif
612ff3d8
KW
148
149 Error *migration_blocker;
6a0f9e82
FB
150} BDRVVPCState;
151
c540d53a
JC
152#define VPC_OPT_SIZE_CALC "force_size_calc"
153static QemuOptsList vpc_runtime_opts = {
154 .name = "vpc-runtime-opts",
155 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
156 .desc = {
157 {
158 .name = VPC_OPT_SIZE_CALC,
159 .type = QEMU_OPT_STRING,
160 .help = "Force disk size calculation to use either CHS geometry, "
161 "or use the disk current_size specified in the VHD footer. "
162 "{chs, current_size}"
163 },
164 { /* end of list */ }
165 }
166};
167
57c7d9e5
AL
168static uint32_t vpc_checksum(uint8_t* buf, size_t size)
169{
170 uint32_t res = 0;
171 int i;
172
173 for (i = 0; i < size; i++)
174 res += buf[i];
175
176 return ~res;
177}
178
179
6a0f9e82
FB
180static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
181{
ffe8ab83 182 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
6a0f9e82 183 return 100;
6a0f9e82
FB
184 return 0;
185}
186
c540d53a
JC
187static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
188 Error **errp)
189{
190 BDRVVPCState *s = bs->opaque;
191 const char *size_calc;
192
193 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
194
195 if (!size_calc) {
196 /* no override, use autodetect only */
197 } else if (!strcmp(size_calc, "current_size")) {
198 s->force_use_sz = true;
199 } else if (!strcmp(size_calc, "chs")) {
200 s->force_use_chs = true;
201 } else {
202 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
203 }
204}
205
015a1036
HR
206static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
207 Error **errp)
6a0f9e82
FB
208{
209 BDRVVPCState *s = bs->opaque;
66f82cee 210 int i;
e54835c0
JC
211 VHDFooter *footer;
212 VHDDynDiskHeader *dyndisk_header;
c540d53a
JC
213 QemuOpts *opts = NULL;
214 Error *local_err = NULL;
215 bool use_chs;
b9fa33a6 216 uint8_t buf[HEADER_SIZE];
57c7d9e5 217 uint32_t checksum;
97f1c45c 218 uint64_t computed_size;
b15deac7 219 uint64_t pagetable_size;
24da78db 220 int disk_type = VHD_DYNAMIC;
59294e46 221 int ret;
6a0f9e82 222
c540d53a
JC
223 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
224 qemu_opts_absorb_qdict(opts, options, &local_err);
225 if (local_err) {
226 error_propagate(errp, local_err);
227 ret = -EINVAL;
228 goto fail;
229 }
230
231 vpc_parse_options(bs, opts, &local_err);
232 if (local_err) {
233 error_propagate(errp, local_err);
234 ret = -EINVAL;
235 goto fail;
236 }
237
cf2ab8fc 238 ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
59294e46 239 if (ret < 0) {
32f6439c 240 error_setg(errp, "Unable to read VHD header");
6a0f9e82 241 goto fail;
59294e46 242 }
6a0f9e82 243
e54835c0 244 footer = (VHDFooter *) s->footer_buf;
24da78db 245 if (strncmp(footer->creator, "conectix", 8)) {
9a4f4c31 246 int64_t offset = bdrv_getlength(bs->file->bs);
59294e46
KW
247 if (offset < 0) {
248 ret = offset;
32f6439c 249 error_setg(errp, "Invalid file size");
59294e46
KW
250 goto fail;
251 } else if (offset < HEADER_SIZE) {
252 ret = -EINVAL;
32f6439c 253 error_setg(errp, "File too small for a VHD header");
24da78db
CA
254 goto fail;
255 }
59294e46 256
24da78db 257 /* If a fixed disk, the footer is found only at the end of the file */
cf2ab8fc 258 ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
59294e46
KW
259 HEADER_SIZE);
260 if (ret < 0) {
24da78db
CA
261 goto fail;
262 }
263 if (strncmp(footer->creator, "conectix", 8)) {
76abe407
PB
264 error_setg(errp, "invalid VPC image");
265 ret = -EINVAL;
24da78db
CA
266 goto fail;
267 }
268 disk_type = VHD_FIXED;
269 }
6a0f9e82 270
57c7d9e5
AL
271 checksum = be32_to_cpu(footer->checksum);
272 footer->checksum = 0;
273 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
274 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
66f82cee 275 "incorrect.\n", bs->filename);
57c7d9e5 276
c088b691 277 /* Write 'checksum' back to footer, or else will leave it with zero. */
a4127c42 278 footer->checksum = cpu_to_be32(checksum);
c088b691 279
9c057d0b
JC
280 /* The visible size of a image in Virtual PC depends on the geometry
281 rather than on the size stored in the footer (the size in the footer
282 is too large usually) */
33ccf667
SH
283 bs->total_sectors = (int64_t)
284 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
1fa79228 285
c540d53a
JC
286 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
287 * VHD image sizes differently. VPC will rely on CHS geometry,
288 * while Hyper-V and disk2vhd use the size specified in the footer.
289 *
290 * We use a couple of approaches to try and determine the correct method:
291 * look at the Creator App field, and look for images that have CHS
292 * geometry that is the maximum value.
293 *
294 * If the CHS geometry is the maximum CHS geometry, then we assume that
295 * the size is the footer->current_size to avoid truncation. Otherwise,
296 * we follow the table based on footer->creator_app:
297 *
298 * Known creator apps:
299 * 'vpc ' : CHS Virtual PC (uses disk geometry)
300 * 'qemu' : CHS QEMU (uses disk geometry)
fb9245c2 301 * 'qem2' : current_size QEMU (uses current_size)
c540d53a
JC
302 * 'win ' : current_size Hyper-V
303 * 'd2v ' : current_size Disk2vhd
9bdfb9e8 304 * 'tap\0' : current_size XenServer
bab246db 305 * 'CTXS' : current_size XenConverter
c540d53a
JC
306 *
307 * The user can override the table values via drive options, however
308 * even with an override we will still use current_size for images
309 * that have CHS geometry of the maximum size.
310 */
311 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
fb9245c2 312 !!strncmp(footer->creator_app, "qem2", 4) &&
9bdfb9e8 313 !!strncmp(footer->creator_app, "d2v ", 4) &&
bab246db 314 !!strncmp(footer->creator_app, "CTXS", 4) &&
9bdfb9e8 315 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
c540d53a
JC
316
317 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
03671ded 318 bs->total_sectors = be64_to_cpu(footer->current_size) /
c540d53a 319 BDRV_SECTOR_SIZE;
0173e7bb
PL
320 }
321
c23fb11b
JC
322 /* Allow a maximum disk size of 2040 GiB */
323 if (bs->total_sectors > VHD_MAX_SECTORS) {
59294e46 324 ret = -EFBIG;
efc8243d
SH
325 goto fail;
326 }
327
24da78db 328 if (disk_type == VHD_DYNAMIC) {
cf2ab8fc 329 ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
59294e46
KW
330 HEADER_SIZE);
331 if (ret < 0) {
32f6439c 332 error_setg(errp, "Error reading dynamic VHD header");
24da78db
CA
333 goto fail;
334 }
b9fa33a6 335
e54835c0 336 dyndisk_header = (VHDDynDiskHeader *) buf;
6a0f9e82 337
24da78db 338 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
32f6439c 339 error_setg(errp, "Invalid header magic");
59294e46 340 ret = -EINVAL;
24da78db
CA
341 goto fail;
342 }
6a0f9e82 343
24da78db 344 s->block_size = be32_to_cpu(dyndisk_header->block_size);
5e71dfad
KW
345 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
346 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
347 ret = -EINVAL;
348 goto fail;
349 }
24da78db 350 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
15d35bc5 351
24da78db 352 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
97f1c45c
JC
353
354 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
32f6439c 355 error_setg(errp, "Too many blocks");
97f1c45c
JC
356 ret = -EINVAL;
357 goto fail;
358 }
97f1c45c
JC
359
360 computed_size = (uint64_t) s->max_table_entries * s->block_size;
361 if (computed_size < bs->total_sectors * 512) {
32f6439c 362 error_setg(errp, "Page table too small");
97f1c45c
JC
363 ret = -EINVAL;
364 goto fail;
365 }
366
b15deac7
JC
367 if (s->max_table_entries > SIZE_MAX / 4 ||
368 s->max_table_entries > (int) INT_MAX / 4) {
369 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
370 s->max_table_entries);
371 ret = -EINVAL;
372 goto fail;
373 }
374
375 pagetable_size = (uint64_t) s->max_table_entries * 4;
376
9a4f4c31 377 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
5fb09cd5 378 if (s->pagetable == NULL) {
32f6439c 379 error_setg(errp, "Unable to allocate memory for page table");
5fb09cd5
KW
380 ret = -ENOMEM;
381 goto fail;
382 }
b71d1c2e 383
24da78db 384 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
59294e46 385
cf2ab8fc 386 ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
9a4f4c31 387 pagetable_size);
59294e46 388 if (ret < 0) {
32f6439c 389 error_setg(errp, "Error reading pagetable");
24da78db
CA
390 goto fail;
391 }
b71d1c2e 392
24da78db 393 s->free_data_block_offset =
b15deac7 394 ROUND_UP(s->bat_offset + pagetable_size, 512);
15d35bc5 395
24da78db
CA
396 for (i = 0; i < s->max_table_entries; i++) {
397 be32_to_cpus(&s->pagetable[i]);
398 if (s->pagetable[i] != 0xFFFFFFFF) {
399 int64_t next = (512 * (int64_t) s->pagetable[i]) +
400 s->bitmap_size + s->block_size;
15d35bc5 401
24da78db
CA
402 if (next > s->free_data_block_offset) {
403 s->free_data_block_offset = next;
404 }
405 }
15d35bc5 406 }
15d35bc5 407
9a4f4c31 408 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
fb8fe35f
PL
409 error_setg(errp, "block-vpc: free_data_block_offset points after "
410 "the end of file. The image has been truncated.");
411 ret = -EINVAL;
412 goto fail;
413 }
414
24da78db 415 s->last_bitmap_offset = (int64_t) -1;
6a0f9e82 416
6a0f9e82 417#ifdef CACHE
24da78db
CA
418 s->pageentry_u8 = g_malloc(512);
419 s->pageentry_u32 = s->pageentry_u8;
420 s->pageentry_u16 = s->pageentry_u8;
421 s->last_pagetable = -1;
6a0f9e82 422#endif
24da78db 423 }
6a0f9e82 424
848c66e8 425 qemu_co_mutex_init(&s->lock);
612ff3d8
KW
426
427 /* Disable migration when VHD images are used */
81e5f78a
AG
428 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
429 "does not support live migration",
430 bdrv_get_device_or_node_name(bs));
612ff3d8
KW
431 migrate_add_blocker(s->migration_blocker);
432
6a0f9e82 433 return 0;
59294e46
KW
434
435fail:
97f1c45c 436 qemu_vfree(s->pagetable);
59294e46
KW
437#ifdef CACHE
438 g_free(s->pageentry_u8);
439#endif
440 return ret;
6a0f9e82
FB
441}
442
3fe4b700
JC
443static int vpc_reopen_prepare(BDRVReopenState *state,
444 BlockReopenQueue *queue, Error **errp)
445{
446 return 0;
447}
448
b71d1c2e
AL
449/*
450 * Returns the absolute byte offset of the given sector in the image file.
451 * If the sector is not allocated, -1 is returned instead.
15d35bc5
AL
452 *
453 * The parameter write must be 1 if the offset will be used for a write
454 * operation (the block bitmaps is updated then), 0 otherwise.
b71d1c2e 455 */
d46b7cc6
KW
456static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
457 bool write)
6a0f9e82
FB
458{
459 BDRVVPCState *s = bs->opaque;
6a0f9e82 460 uint64_t bitmap_offset, block_offset;
d46b7cc6 461 uint32_t pagetable_index, offset_in_block;
6a0f9e82 462
2cfacb62 463 pagetable_index = offset / s->block_size;
d46b7cc6 464 offset_in_block = offset % s->block_size;
3b46e624 465
15d35bc5 466 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
9c057d0b 467 return -1; /* not allocated */
6a0f9e82 468
378e2aea 469 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
d46b7cc6 470 block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
15d35bc5 471
9c057d0b
JC
472 /* We must ensure that we don't write to any sectors which are marked as
473 unused in the bitmap. We get away with setting all bits in the block
474 bitmap each time we write to a new block. This might cause Virtual PC to
475 miss sparse read optimization, but it's not a problem in terms of
476 correctness. */
15d35bc5
AL
477 if (write && (s->last_bitmap_offset != bitmap_offset)) {
478 uint8_t bitmap[s->bitmap_size];
479
480 s->last_bitmap_offset = bitmap_offset;
481 memset(bitmap, 0xff, s->bitmap_size);
d9ca2ea2 482 bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
15d35bc5 483 }
3b46e624 484
b71d1c2e 485 return block_offset;
6a0f9e82
FB
486}
487
d46b7cc6
KW
488static inline int64_t get_sector_offset(BlockDriverState *bs,
489 int64_t sector_num, bool write)
490{
491 return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
492}
493
15d35bc5
AL
494/*
495 * Writes the footer to the end of the image file. This is needed when the
496 * file grows as it overwrites the old footer
497 *
498 * Returns 0 on success and < 0 on error
499 */
500static int rewrite_footer(BlockDriverState* bs)
501{
502 int ret;
503 BDRVVPCState *s = bs->opaque;
504 int64_t offset = s->free_data_block_offset;
505
d9ca2ea2 506 ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
15d35bc5
AL
507 if (ret < 0)
508 return ret;
509
510 return 0;
511}
512
513/*
514 * Allocates a new block. This involves writing a new footer and updating
515 * the Block Allocation Table to use the space at the old end of the image
516 * file (overwriting the old footer)
517 *
518 * Returns the sectors' offset in the image file on success and < 0 on error
519 */
513b0f02 520static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
15d35bc5
AL
521{
522 BDRVVPCState *s = bs->opaque;
523 int64_t bat_offset;
524 uint32_t index, bat_value;
525 int ret;
526 uint8_t bitmap[s->bitmap_size];
527
9c057d0b 528 /* Check if sector_num is valid */
513b0f02
KW
529 if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
530 return -EINVAL;
531 }
15d35bc5 532
9c057d0b 533 /* Write entry into in-memory BAT */
513b0f02
KW
534 index = offset / s->block_size;
535 assert(s->pagetable[index] == 0xFFFFFFFF);
15d35bc5
AL
536 s->pagetable[index] = s->free_data_block_offset / 512;
537
9c057d0b 538 /* Initialize the block's bitmap */
15d35bc5 539 memset(bitmap, 0xff, s->bitmap_size);
d9ca2ea2 540 ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
078a458e 541 s->bitmap_size);
5bb1cbac
KW
542 if (ret < 0) {
543 return ret;
544 }
15d35bc5 545
9c057d0b 546 /* Write new footer (the old one will be overwritten) */
15d35bc5
AL
547 s->free_data_block_offset += s->block_size + s->bitmap_size;
548 ret = rewrite_footer(bs);
549 if (ret < 0)
550 goto fail;
551
9c057d0b 552 /* Write BAT entry to disk */
15d35bc5 553 bat_offset = s->bat_offset + (4 * index);
a4127c42 554 bat_value = cpu_to_be32(s->pagetable[index]);
d9ca2ea2 555 ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
15d35bc5
AL
556 if (ret < 0)
557 goto fail;
558
513b0f02 559 return get_image_offset(bs, offset, false);
15d35bc5
AL
560
561fail:
562 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
513b0f02 563 return ret;
15d35bc5
AL
564}
565
97b00e28
PB
566static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
567{
568 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
569 VHDFooter *footer = (VHDFooter *) s->footer_buf;
570
0d4cc3e7 571 if (be32_to_cpu(footer->type) != VHD_FIXED) {
97b00e28
PB
572 bdi->cluster_size = s->block_size;
573 }
574
95de6d70 575 bdi->unallocated_blocks_are_zero = true;
97b00e28
PB
576 return 0;
577}
578
d46b7cc6
KW
579static int coroutine_fn
580vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
581 QEMUIOVector *qiov, int flags)
6a0f9e82 582{
6c6ea921 583 BDRVVPCState *s = bs->opaque;
6a0f9e82 584 int ret;
d46b7cc6
KW
585 int64_t image_offset;
586 int64_t n_bytes;
587 int64_t bytes_done = 0;
e54835c0 588 VHDFooter *footer = (VHDFooter *) s->footer_buf;
d46b7cc6 589 QEMUIOVector local_qiov;
6a0f9e82 590
0d4cc3e7 591 if (be32_to_cpu(footer->type) == VHD_FIXED) {
a03ef88f 592 return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
24da78db 593 }
b71d1c2e 594
d46b7cc6
KW
595 qemu_co_mutex_lock(&s->lock);
596 qemu_iovec_init(&local_qiov, qiov->niov);
6c6ea921 597
d46b7cc6
KW
598 while (bytes > 0) {
599 image_offset = get_image_offset(bs, offset, false);
600 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
601
602 if (image_offset == -1) {
603 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
b71d1c2e 604 } else {
d46b7cc6
KW
605 qemu_iovec_reset(&local_qiov);
606 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
607
a03ef88f 608 ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
d46b7cc6
KW
609 &local_qiov, 0);
610 if (ret < 0) {
611 goto fail;
6c6ea921 612 }
b71d1c2e
AL
613 }
614
d46b7cc6
KW
615 bytes -= n_bytes;
616 offset += n_bytes;
617 bytes_done += n_bytes;
6a0f9e82 618 }
6a0f9e82 619
d46b7cc6
KW
620 ret = 0;
621fail:
622 qemu_iovec_destroy(&local_qiov);
2914caa0 623 qemu_co_mutex_unlock(&s->lock);
d46b7cc6 624
2914caa0
PB
625 return ret;
626}
627
513b0f02
KW
628static int coroutine_fn
629vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
630 QEMUIOVector *qiov, int flags)
15d35bc5 631{
6c6ea921 632 BDRVVPCState *s = bs->opaque;
513b0f02
KW
633 int64_t image_offset;
634 int64_t n_bytes;
635 int64_t bytes_done = 0;
15d35bc5 636 int ret;
e54835c0 637 VHDFooter *footer = (VHDFooter *) s->footer_buf;
513b0f02 638 QEMUIOVector local_qiov;
15d35bc5 639
0d4cc3e7 640 if (be32_to_cpu(footer->type) == VHD_FIXED) {
a03ef88f 641 return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
24da78db 642 }
15d35bc5 643
513b0f02
KW
644 qemu_co_mutex_lock(&s->lock);
645 qemu_iovec_init(&local_qiov, qiov->niov);
646
647 while (bytes > 0) {
648 image_offset = get_image_offset(bs, offset, true);
649 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
6c6ea921 650
513b0f02
KW
651 if (image_offset == -1) {
652 image_offset = alloc_block(bs, offset);
653 if (image_offset < 0) {
654 ret = image_offset;
655 goto fail;
656 }
15d35bc5
AL
657 }
658
513b0f02
KW
659 qemu_iovec_reset(&local_qiov);
660 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
661
a03ef88f 662 ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
513b0f02
KW
663 &local_qiov, 0);
664 if (ret < 0) {
665 goto fail;
6c6ea921 666 }
15d35bc5 667
513b0f02
KW
668 bytes -= n_bytes;
669 offset += n_bytes;
670 bytes_done += n_bytes;
15d35bc5
AL
671 }
672
513b0f02
KW
673 ret = 0;
674fail:
675 qemu_iovec_destroy(&local_qiov);
e183ef75 676 qemu_co_mutex_unlock(&s->lock);
513b0f02 677
e183ef75
PB
678 return ret;
679}
680
0cc84887 681static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
67a0fd2a 682 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
0cc84887
KW
683{
684 BDRVVPCState *s = bs->opaque;
685 VHDFooter *footer = (VHDFooter*) s->footer_buf;
2ec711dc 686 int64_t start, offset;
0cc84887
KW
687 bool allocated;
688 int n;
689
690 if (be32_to_cpu(footer->type) == VHD_FIXED) {
691 *pnum = nb_sectors;
7429e207 692 *file = bs->file->bs;
0cc84887
KW
693 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
694 (sector_num << BDRV_SECTOR_BITS);
695 }
696
697 offset = get_sector_offset(bs, sector_num, 0);
698 start = offset;
699 allocated = (offset != -1);
700 *pnum = 0;
701
702 do {
703 /* All sectors in a block are contiguous (without using the bitmap) */
704 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
705 - sector_num;
706 n = MIN(n, nb_sectors);
707
708 *pnum += n;
709 sector_num += n;
710 nb_sectors -= n;
2ec711dc
PL
711 /* *pnum can't be greater than one block for allocated
712 * sectors since there is always a bitmap in between. */
713 if (allocated) {
7429e207 714 *file = bs->file->bs;
2ec711dc
PL
715 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
716 }
0cc84887
KW
717 if (nb_sectors == 0) {
718 break;
719 }
0cc84887 720 offset = get_sector_offset(bs, sector_num, 0);
2ec711dc 721 } while (offset == -1);
0cc84887 722
2ec711dc 723 return 0;
0cc84887
KW
724}
725
57c7d9e5
AL
726/*
727 * Calculates the number of cylinders, heads and sectors per cylinder
728 * based on a given number of sectors. This is the algorithm described
729 * in the VHD specification.
730 *
731 * Note that the geometry doesn't always exactly match total_sectors but
732 * may round it down.
6e9ea0c0 733 *
c23fb11b 734 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
258d2edb
CA
735 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
736 * and instead allow up to 255 heads.
57c7d9e5 737 */
6e9ea0c0 738static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
57c7d9e5
AL
739 uint8_t* heads, uint8_t* secs_per_cyl)
740{
741 uint32_t cyls_times_heads;
742
690cbb09 743 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
57c7d9e5 744
690cbb09 745 if (total_sectors >= 65535LL * 16 * 63) {
57c7d9e5 746 *secs_per_cyl = 255;
690cbb09 747 *heads = 16;
57c7d9e5
AL
748 cyls_times_heads = total_sectors / *secs_per_cyl;
749 } else {
750 *secs_per_cyl = 17;
751 cyls_times_heads = total_sectors / *secs_per_cyl;
752 *heads = (cyls_times_heads + 1023) / 1024;
753
690cbb09 754 if (*heads < 4) {
57c7d9e5 755 *heads = 4;
690cbb09 756 }
57c7d9e5
AL
757
758 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
759 *secs_per_cyl = 31;
760 *heads = 16;
761 cyls_times_heads = total_sectors / *secs_per_cyl;
762 }
763
764 if (cyls_times_heads >= (*heads * 1024)) {
765 *secs_per_cyl = 63;
766 *heads = 16;
767 cyls_times_heads = total_sectors / *secs_per_cyl;
768 }
769 }
770
dede4188 771 *cyls = cyls_times_heads / *heads;
6e9ea0c0
AJ
772
773 return 0;
57c7d9e5
AL
774}
775
b8f45cdf 776static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 777 int64_t total_sectors)
57c7d9e5 778{
e54835c0
JC
779 VHDDynDiskHeader *dyndisk_header =
780 (VHDDynDiskHeader *) buf;
57c7d9e5 781 size_t block_size, num_bat_entries;
24da78db 782 int i;
fef6070e
JC
783 int ret;
784 int64_t offset = 0;
57c7d9e5 785
9c057d0b 786 /* Write the footer (twice: at the beginning and at the end) */
57c7d9e5
AL
787 block_size = 0x200000;
788 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
789
8341f00d 790 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
40a99aac 791 if (ret < 0) {
f0ff243a
BS
792 goto fail;
793 }
57c7d9e5 794
fef6070e 795 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
8341f00d 796 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
fef6070e 797 if (ret < 0) {
f0ff243a
BS
798 goto fail;
799 }
57c7d9e5 800
9c057d0b 801 /* Write the initial BAT */
fef6070e 802 offset = 3 * 512;
57c7d9e5
AL
803
804 memset(buf, 0xFF, 512);
f0ff243a 805 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
8341f00d 806 ret = blk_pwrite(blk, offset, buf, 512, 0);
fef6070e 807 if (ret < 0) {
f0ff243a
BS
808 goto fail;
809 }
fef6070e 810 offset += 512;
f0ff243a 811 }
57c7d9e5 812
9c057d0b 813 /* Prepare the Dynamic Disk Header */
57c7d9e5
AL
814 memset(buf, 0, 1024);
815
5ec4d682 816 memcpy(dyndisk_header->magic, "cxsparse", 8);
57c7d9e5 817
78439f6a
CA
818 /*
819 * Note: The spec is actually wrong here for data_offset, it says
820 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
821 */
a4127c42
SH
822 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
823 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
824 dyndisk_header->version = cpu_to_be32(0x00010000);
825 dyndisk_header->block_size = cpu_to_be32(block_size);
826 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
57c7d9e5 827
a4127c42 828 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
57c7d9e5 829
9c057d0b 830 /* Write the header */
fef6070e 831 offset = 512;
57c7d9e5 832
8341f00d 833 ret = blk_pwrite(blk, offset, buf, 1024, 0);
fef6070e 834 if (ret < 0) {
f0ff243a
BS
835 goto fail;
836 }
f0ff243a 837
24da78db
CA
838 fail:
839 return ret;
840}
841
b8f45cdf 842static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 843 int64_t total_size)
24da78db 844{
fef6070e 845 int ret;
24da78db
CA
846
847 /* Add footer to total size */
fef6070e
JC
848 total_size += HEADER_SIZE;
849
b8f45cdf 850 ret = blk_truncate(blk, total_size);
fef6070e
JC
851 if (ret < 0) {
852 return ret;
24da78db
CA
853 }
854
8341f00d 855 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
fef6070e
JC
856 if (ret < 0) {
857 return ret;
858 }
24da78db 859
24da78db
CA
860 return ret;
861}
862
fec9921f 863static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
24da78db
CA
864{
865 uint8_t buf[1024];
e54835c0 866 VHDFooter *footer = (VHDFooter *) buf;
fec9921f 867 char *disk_type_param;
fef6070e 868 int i;
24da78db
CA
869 uint16_t cyls = 0;
870 uint8_t heads = 0;
871 uint8_t secs_per_cyl = 0;
872 int64_t total_sectors;
873 int64_t total_size;
874 int disk_type;
875 int ret = -EIO;
fb9245c2 876 bool force_size;
fef6070e 877 Error *local_err = NULL;
b8f45cdf 878 BlockBackend *blk = NULL;
24da78db
CA
879
880 /* Read out options */
c2eb918e
HT
881 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
882 BDRV_SECTOR_SIZE);
fec9921f
CL
883 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
884 if (disk_type_param) {
885 if (!strcmp(disk_type_param, "dynamic")) {
24da78db 886 disk_type = VHD_DYNAMIC;
fec9921f 887 } else if (!strcmp(disk_type_param, "fixed")) {
24da78db
CA
888 disk_type = VHD_FIXED;
889 } else {
0211b9be 890 error_setg(errp, "Invalid disk type, %s", disk_type_param);
fec9921f
CL
891 ret = -EINVAL;
892 goto out;
24da78db
CA
893 }
894 } else {
895 disk_type = VHD_DYNAMIC;
896 }
897
fb9245c2
JC
898 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
899
fef6070e
JC
900 ret = bdrv_create_file(filename, opts, &local_err);
901 if (ret < 0) {
902 error_propagate(errp, local_err);
fec9921f 903 goto out;
24da78db 904 }
b8f45cdf 905
efaa7c4e 906 blk = blk_new_open(filename, NULL, NULL,
72e775c7 907 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
b8f45cdf 908 if (blk == NULL) {
fef6070e 909 error_propagate(errp, local_err);
b8f45cdf 910 ret = -EIO;
fef6070e 911 goto out;
4ab15590
CL
912 }
913
b8f45cdf
KW
914 blk_set_allow_write_beyond_eof(blk, true);
915
ecd880d9
KW
916 /*
917 * Calculate matching total_size and geometry. Increase the number of
918 * sectors requested until we get enough (or fail). This ensures that
919 * qemu-img convert doesn't truncate images, but rather rounds up.
690cbb09 920 *
fb9245c2 921 * If the image size can't be represented by a spec conformant CHS geometry,
690cbb09
PL
922 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
923 * the image size from the VHD footer to calculate total_sectors.
ecd880d9 924 */
fb9245c2
JC
925 if (force_size) {
926 /* This will force the use of total_size for sector count, below */
927 cyls = VHD_CHS_MAX_C;
928 heads = VHD_CHS_MAX_H;
929 secs_per_cyl = VHD_CHS_MAX_S;
930 } else {
931 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
932 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
933 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
934 }
690cbb09
PL
935 }
936
937 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
938 total_sectors = total_size / BDRV_SECTOR_SIZE;
c23fb11b 939 /* Allow a maximum disk size of 2040 GiB */
690cbb09 940 if (total_sectors > VHD_MAX_SECTORS) {
0211b9be 941 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
24da78db 942 ret = -EFBIG;
fef6070e 943 goto out;
24da78db 944 }
690cbb09
PL
945 } else {
946 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
947 total_size = total_sectors * BDRV_SECTOR_SIZE;
24da78db 948 }
ecd880d9 949
24da78db
CA
950 /* Prepare the Hard Disk Footer */
951 memset(buf, 0, 1024);
952
953 memcpy(footer->creator, "conectix", 8);
fb9245c2
JC
954 if (force_size) {
955 memcpy(footer->creator_app, "qem2", 4);
956 } else {
957 memcpy(footer->creator_app, "qemu", 4);
958 }
24da78db
CA
959 memcpy(footer->creator_os, "Wi2k", 4);
960
a4127c42
SH
961 footer->features = cpu_to_be32(0x02);
962 footer->version = cpu_to_be32(0x00010000);
24da78db 963 if (disk_type == VHD_DYNAMIC) {
a4127c42 964 footer->data_offset = cpu_to_be64(HEADER_SIZE);
24da78db 965 } else {
a4127c42 966 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
24da78db 967 }
a4127c42 968 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
24da78db
CA
969
970 /* Version of Virtual PC 2007 */
a4127c42
SH
971 footer->major = cpu_to_be16(0x0005);
972 footer->minor = cpu_to_be16(0x0003);
3f3f20dc 973 footer->orig_size = cpu_to_be64(total_size);
03671ded 974 footer->current_size = cpu_to_be64(total_size);
a4127c42 975 footer->cyls = cpu_to_be16(cyls);
24da78db
CA
976 footer->heads = heads;
977 footer->secs_per_cyl = secs_per_cyl;
978
a4127c42 979 footer->type = cpu_to_be32(disk_type);
24da78db 980
38440a21 981 qemu_uuid_generate(&footer->uuid);
24da78db 982
a4127c42 983 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
24da78db
CA
984
985 if (disk_type == VHD_DYNAMIC) {
b8f45cdf 986 ret = create_dynamic_disk(blk, buf, total_sectors);
24da78db 987 } else {
b8f45cdf 988 ret = create_fixed_disk(blk, buf, total_size);
24da78db 989 }
0211b9be
JC
990 if (ret < 0) {
991 error_setg(errp, "Unable to create or write VHD header");
992 }
24da78db 993
fec9921f 994out:
b8f45cdf 995 blk_unref(blk);
fec9921f 996 g_free(disk_type_param);
f0ff243a 997 return ret;
57c7d9e5
AL
998}
999
72c6cc94
KW
1000static int vpc_has_zero_init(BlockDriverState *bs)
1001{
1002 BDRVVPCState *s = bs->opaque;
e54835c0 1003 VHDFooter *footer = (VHDFooter *) s->footer_buf;
72c6cc94 1004
0d4cc3e7 1005 if (be32_to_cpu(footer->type) == VHD_FIXED) {
9a4f4c31 1006 return bdrv_has_zero_init(bs->file->bs);
72c6cc94
KW
1007 } else {
1008 return 1;
1009 }
1010}
1011
6a0f9e82
FB
1012static void vpc_close(BlockDriverState *bs)
1013{
1014 BDRVVPCState *s = bs->opaque;
97f1c45c 1015 qemu_vfree(s->pagetable);
6a0f9e82 1016#ifdef CACHE
7267c094 1017 g_free(s->pageentry_u8);
6a0f9e82 1018#endif
612ff3d8
KW
1019
1020 migrate_del_blocker(s->migration_blocker);
1021 error_free(s->migration_blocker);
6a0f9e82
FB
1022}
1023
fec9921f
CL
1024static QemuOptsList vpc_create_opts = {
1025 .name = "vpc-create-opts",
1026 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1027 .desc = {
1028 {
1029 .name = BLOCK_OPT_SIZE,
1030 .type = QEMU_OPT_SIZE,
1031 .help = "Virtual disk size"
1032 },
1033 {
1034 .name = BLOCK_OPT_SUBFMT,
1035 .type = QEMU_OPT_STRING,
1036 .help =
1037 "Type of virtual hard disk format. Supported formats are "
1038 "{dynamic (default) | fixed} "
1039 },
fb9245c2
JC
1040 {
1041 .name = VPC_OPT_FORCE_SIZE,
1042 .type = QEMU_OPT_BOOL,
1043 .help = "Force disk size calculation to use the actual size "
1044 "specified, rather than using the nearest CHS-based "
1045 "calculation"
1046 },
fec9921f
CL
1047 { /* end of list */ }
1048 }
0e7e1989
KW
1049};
1050
5efa9d5a 1051static BlockDriver bdrv_vpc = {
4a411185
KW
1052 .format_name = "vpc",
1053 .instance_size = sizeof(BDRVVPCState),
c68b89ac 1054
72c6cc94
KW
1055 .bdrv_probe = vpc_probe,
1056 .bdrv_open = vpc_open,
1057 .bdrv_close = vpc_close,
1058 .bdrv_reopen_prepare = vpc_reopen_prepare,
c282e1fd 1059 .bdrv_create = vpc_create,
0e7e1989 1060
d46b7cc6 1061 .bdrv_co_preadv = vpc_co_preadv,
513b0f02 1062 .bdrv_co_pwritev = vpc_co_pwritev,
0cc84887 1063 .bdrv_co_get_block_status = vpc_co_get_block_status,
c68b89ac 1064
97b00e28
PB
1065 .bdrv_get_info = vpc_get_info,
1066
fec9921f 1067 .create_opts = &vpc_create_opts,
72c6cc94 1068 .bdrv_has_zero_init = vpc_has_zero_init,
6a0f9e82 1069};
5efa9d5a
AL
1070
1071static void bdrv_vpc_init(void)
1072{
1073 bdrv_register(&bdrv_vpc);
1074}
1075
1076block_init(bdrv_vpc_init);