]> git.proxmox.com Git - mirror_qemu.git/blame - block/vpc.c
block: Pass BdrvChild to bdrv_truncate()
[mirror_qemu.git] / block / vpc.c
CommitLineData
6a0f9e82 1/*
cc2040f8 2 * Block driver for Connectix / Microsoft Virtual PC images
5fafdf24 3 *
6a0f9e82 4 * Copyright (c) 2005 Alex Beregszaszi
15d35bc5 5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
5fafdf24 6 *
6a0f9e82
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
80c71a24 25#include "qemu/osdep.h"
da34e65c 26#include "qapi/error.h"
faf07963 27#include "qemu-common.h"
737e150e 28#include "block/block_int.h"
b8f45cdf 29#include "sysemu/block-backend.h"
1de7afc9 30#include "qemu/module.h"
caf71f86 31#include "migration/migration.h"
58369e22 32#include "qemu/bswap.h"
38440a21 33#include "qemu/uuid.h"
6a0f9e82
FB
34
35/**************************************************************/
36
37#define HEADER_SIZE 512
38
39//#define CACHE
40
2cfacb62
AL
41enum vhd_type {
42 VHD_FIXED = 2,
43 VHD_DYNAMIC = 3,
44 VHD_DIFFERENCING = 4,
45};
46
9c057d0b 47/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
57c7d9e5
AL
48#define VHD_TIMESTAMP_BASE 946684800
49
fb9245c2
JC
50#define VHD_CHS_MAX_C 65535LL
51#define VHD_CHS_MAX_H 16
52#define VHD_CHS_MAX_S 255
53
c23fb11b 54#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
fb9245c2
JC
55#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
56
57#define VPC_OPT_FORCE_SIZE "force_size"
97f1c45c 58
9c057d0b 59/* always big-endian */
e54835c0 60typedef struct vhd_footer {
9c057d0b 61 char creator[8]; /* "conectix" */
2cfacb62
AL
62 uint32_t features;
63 uint32_t version;
64
9c057d0b 65 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
66 uint64_t data_offset;
67
9c057d0b 68 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
2cfacb62
AL
69 uint32_t timestamp;
70
9c057d0b 71 char creator_app[4]; /* e.g., "vpc " */
2cfacb62
AL
72 uint16_t major;
73 uint16_t minor;
9c057d0b 74 char creator_os[4]; /* "Wi2k" */
2cfacb62
AL
75
76 uint64_t orig_size;
03671ded 77 uint64_t current_size;
2cfacb62
AL
78
79 uint16_t cyls;
80 uint8_t heads;
81 uint8_t secs_per_cyl;
82
83 uint32_t type;
84
9c057d0b
JC
85 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
86 the bytes in the footer without the checksum field") */
2cfacb62
AL
87 uint32_t checksum;
88
9c057d0b 89 /* UUID used to identify a parent hard disk (backing file) */
38440a21 90 QemuUUID uuid;
2cfacb62
AL
91
92 uint8_t in_saved_state;
e54835c0 93} QEMU_PACKED VHDFooter;
b9fa33a6 94
e54835c0 95typedef struct vhd_dyndisk_header {
9c057d0b 96 char magic[8]; /* "cxsparse" */
2cfacb62 97
9c057d0b 98 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
99 uint64_t data_offset;
100
9c057d0b 101 /* Offset of the Block Allocation Table (BAT) */
2cfacb62
AL
102 uint64_t table_offset;
103
104 uint32_t version;
9c057d0b 105 uint32_t max_table_entries; /* 32bit/entry */
2cfacb62 106
9c057d0b 107 /* 2 MB by default, must be a power of two */
2cfacb62
AL
108 uint32_t block_size;
109
110 uint32_t checksum;
111 uint8_t parent_uuid[16];
112 uint32_t parent_timestamp;
113 uint32_t reserved;
114
9c057d0b 115 /* Backing file name (in UTF-16) */
2cfacb62
AL
116 uint8_t parent_name[512];
117
118 struct {
119 uint32_t platform;
120 uint32_t data_space;
121 uint32_t data_length;
122 uint32_t reserved;
123 uint64_t data_offset;
124 } parent_locator[8];
e54835c0 125} QEMU_PACKED VHDDynDiskHeader;
6a0f9e82
FB
126
127typedef struct BDRVVPCState {
848c66e8 128 CoMutex lock;
15d35bc5
AL
129 uint8_t footer_buf[HEADER_SIZE];
130 uint64_t free_data_block_offset;
2cfacb62 131 int max_table_entries;
6a0f9e82 132 uint32_t *pagetable;
15d35bc5
AL
133 uint64_t bat_offset;
134 uint64_t last_bitmap_offset;
6a0f9e82 135
2cfacb62 136 uint32_t block_size;
15d35bc5 137 uint32_t bitmap_size;
c540d53a
JC
138 bool force_use_chs;
139 bool force_use_sz;
15d35bc5 140
6a0f9e82
FB
141#ifdef CACHE
142 uint8_t *pageentry_u8;
143 uint32_t *pageentry_u32;
144 uint16_t *pageentry_u16;
3b46e624 145
6a0f9e82
FB
146 uint64_t last_bitmap;
147#endif
612ff3d8
KW
148
149 Error *migration_blocker;
6a0f9e82
FB
150} BDRVVPCState;
151
c540d53a
JC
152#define VPC_OPT_SIZE_CALC "force_size_calc"
153static QemuOptsList vpc_runtime_opts = {
154 .name = "vpc-runtime-opts",
155 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
156 .desc = {
157 {
158 .name = VPC_OPT_SIZE_CALC,
159 .type = QEMU_OPT_STRING,
160 .help = "Force disk size calculation to use either CHS geometry, "
161 "or use the disk current_size specified in the VHD footer. "
162 "{chs, current_size}"
163 },
164 { /* end of list */ }
165 }
166};
167
57c7d9e5
AL
168static uint32_t vpc_checksum(uint8_t* buf, size_t size)
169{
170 uint32_t res = 0;
171 int i;
172
173 for (i = 0; i < size; i++)
174 res += buf[i];
175
176 return ~res;
177}
178
179
6a0f9e82
FB
180static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
181{
ffe8ab83 182 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
6a0f9e82 183 return 100;
6a0f9e82
FB
184 return 0;
185}
186
c540d53a
JC
187static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
188 Error **errp)
189{
190 BDRVVPCState *s = bs->opaque;
191 const char *size_calc;
192
193 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
194
195 if (!size_calc) {
196 /* no override, use autodetect only */
197 } else if (!strcmp(size_calc, "current_size")) {
198 s->force_use_sz = true;
199 } else if (!strcmp(size_calc, "chs")) {
200 s->force_use_chs = true;
201 } else {
202 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
203 }
204}
205
015a1036
HR
206static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
207 Error **errp)
6a0f9e82
FB
208{
209 BDRVVPCState *s = bs->opaque;
66f82cee 210 int i;
e54835c0
JC
211 VHDFooter *footer;
212 VHDDynDiskHeader *dyndisk_header;
c540d53a
JC
213 QemuOpts *opts = NULL;
214 Error *local_err = NULL;
215 bool use_chs;
b9fa33a6 216 uint8_t buf[HEADER_SIZE];
57c7d9e5 217 uint32_t checksum;
97f1c45c 218 uint64_t computed_size;
b15deac7 219 uint64_t pagetable_size;
24da78db 220 int disk_type = VHD_DYNAMIC;
59294e46 221 int ret;
6a0f9e82 222
c540d53a
JC
223 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
224 qemu_opts_absorb_qdict(opts, options, &local_err);
225 if (local_err) {
226 error_propagate(errp, local_err);
227 ret = -EINVAL;
228 goto fail;
229 }
230
231 vpc_parse_options(bs, opts, &local_err);
232 if (local_err) {
233 error_propagate(errp, local_err);
234 ret = -EINVAL;
235 goto fail;
236 }
237
cf2ab8fc 238 ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
59294e46 239 if (ret < 0) {
32f6439c 240 error_setg(errp, "Unable to read VHD header");
6a0f9e82 241 goto fail;
59294e46 242 }
6a0f9e82 243
e54835c0 244 footer = (VHDFooter *) s->footer_buf;
24da78db 245 if (strncmp(footer->creator, "conectix", 8)) {
9a4f4c31 246 int64_t offset = bdrv_getlength(bs->file->bs);
59294e46
KW
247 if (offset < 0) {
248 ret = offset;
32f6439c 249 error_setg(errp, "Invalid file size");
59294e46
KW
250 goto fail;
251 } else if (offset < HEADER_SIZE) {
252 ret = -EINVAL;
32f6439c 253 error_setg(errp, "File too small for a VHD header");
24da78db
CA
254 goto fail;
255 }
59294e46 256
24da78db 257 /* If a fixed disk, the footer is found only at the end of the file */
cf2ab8fc 258 ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
59294e46
KW
259 HEADER_SIZE);
260 if (ret < 0) {
24da78db
CA
261 goto fail;
262 }
263 if (strncmp(footer->creator, "conectix", 8)) {
76abe407
PB
264 error_setg(errp, "invalid VPC image");
265 ret = -EINVAL;
24da78db
CA
266 goto fail;
267 }
268 disk_type = VHD_FIXED;
269 }
6a0f9e82 270
57c7d9e5
AL
271 checksum = be32_to_cpu(footer->checksum);
272 footer->checksum = 0;
273 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
274 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
66f82cee 275 "incorrect.\n", bs->filename);
57c7d9e5 276
c088b691 277 /* Write 'checksum' back to footer, or else will leave it with zero. */
a4127c42 278 footer->checksum = cpu_to_be32(checksum);
c088b691 279
9c057d0b
JC
280 /* The visible size of a image in Virtual PC depends on the geometry
281 rather than on the size stored in the footer (the size in the footer
282 is too large usually) */
33ccf667
SH
283 bs->total_sectors = (int64_t)
284 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
1fa79228 285
c540d53a
JC
286 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
287 * VHD image sizes differently. VPC will rely on CHS geometry,
288 * while Hyper-V and disk2vhd use the size specified in the footer.
289 *
290 * We use a couple of approaches to try and determine the correct method:
291 * look at the Creator App field, and look for images that have CHS
292 * geometry that is the maximum value.
293 *
294 * If the CHS geometry is the maximum CHS geometry, then we assume that
295 * the size is the footer->current_size to avoid truncation. Otherwise,
296 * we follow the table based on footer->creator_app:
297 *
298 * Known creator apps:
299 * 'vpc ' : CHS Virtual PC (uses disk geometry)
300 * 'qemu' : CHS QEMU (uses disk geometry)
fb9245c2 301 * 'qem2' : current_size QEMU (uses current_size)
c540d53a
JC
302 * 'win ' : current_size Hyper-V
303 * 'd2v ' : current_size Disk2vhd
9bdfb9e8 304 * 'tap\0' : current_size XenServer
bab246db 305 * 'CTXS' : current_size XenConverter
c540d53a
JC
306 *
307 * The user can override the table values via drive options, however
308 * even with an override we will still use current_size for images
309 * that have CHS geometry of the maximum size.
310 */
311 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
fb9245c2 312 !!strncmp(footer->creator_app, "qem2", 4) &&
9bdfb9e8 313 !!strncmp(footer->creator_app, "d2v ", 4) &&
bab246db 314 !!strncmp(footer->creator_app, "CTXS", 4) &&
9bdfb9e8 315 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
c540d53a
JC
316
317 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
03671ded 318 bs->total_sectors = be64_to_cpu(footer->current_size) /
c540d53a 319 BDRV_SECTOR_SIZE;
0173e7bb
PL
320 }
321
c23fb11b
JC
322 /* Allow a maximum disk size of 2040 GiB */
323 if (bs->total_sectors > VHD_MAX_SECTORS) {
59294e46 324 ret = -EFBIG;
efc8243d
SH
325 goto fail;
326 }
327
24da78db 328 if (disk_type == VHD_DYNAMIC) {
cf2ab8fc 329 ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
59294e46
KW
330 HEADER_SIZE);
331 if (ret < 0) {
32f6439c 332 error_setg(errp, "Error reading dynamic VHD header");
24da78db
CA
333 goto fail;
334 }
b9fa33a6 335
e54835c0 336 dyndisk_header = (VHDDynDiskHeader *) buf;
6a0f9e82 337
24da78db 338 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
32f6439c 339 error_setg(errp, "Invalid header magic");
59294e46 340 ret = -EINVAL;
24da78db
CA
341 goto fail;
342 }
6a0f9e82 343
24da78db 344 s->block_size = be32_to_cpu(dyndisk_header->block_size);
5e71dfad
KW
345 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
346 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
347 ret = -EINVAL;
348 goto fail;
349 }
24da78db 350 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
15d35bc5 351
24da78db 352 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
97f1c45c
JC
353
354 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
32f6439c 355 error_setg(errp, "Too many blocks");
97f1c45c
JC
356 ret = -EINVAL;
357 goto fail;
358 }
97f1c45c
JC
359
360 computed_size = (uint64_t) s->max_table_entries * s->block_size;
361 if (computed_size < bs->total_sectors * 512) {
32f6439c 362 error_setg(errp, "Page table too small");
97f1c45c
JC
363 ret = -EINVAL;
364 goto fail;
365 }
366
b15deac7
JC
367 if (s->max_table_entries > SIZE_MAX / 4 ||
368 s->max_table_entries > (int) INT_MAX / 4) {
369 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
370 s->max_table_entries);
371 ret = -EINVAL;
372 goto fail;
373 }
374
375 pagetable_size = (uint64_t) s->max_table_entries * 4;
376
9a4f4c31 377 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
5fb09cd5 378 if (s->pagetable == NULL) {
32f6439c 379 error_setg(errp, "Unable to allocate memory for page table");
5fb09cd5
KW
380 ret = -ENOMEM;
381 goto fail;
382 }
b71d1c2e 383
24da78db 384 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
59294e46 385
cf2ab8fc 386 ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
9a4f4c31 387 pagetable_size);
59294e46 388 if (ret < 0) {
32f6439c 389 error_setg(errp, "Error reading pagetable");
24da78db
CA
390 goto fail;
391 }
b71d1c2e 392
24da78db 393 s->free_data_block_offset =
b15deac7 394 ROUND_UP(s->bat_offset + pagetable_size, 512);
15d35bc5 395
24da78db
CA
396 for (i = 0; i < s->max_table_entries; i++) {
397 be32_to_cpus(&s->pagetable[i]);
398 if (s->pagetable[i] != 0xFFFFFFFF) {
399 int64_t next = (512 * (int64_t) s->pagetable[i]) +
400 s->bitmap_size + s->block_size;
15d35bc5 401
24da78db
CA
402 if (next > s->free_data_block_offset) {
403 s->free_data_block_offset = next;
404 }
405 }
15d35bc5 406 }
15d35bc5 407
9a4f4c31 408 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
fb8fe35f
PL
409 error_setg(errp, "block-vpc: free_data_block_offset points after "
410 "the end of file. The image has been truncated.");
411 ret = -EINVAL;
412 goto fail;
413 }
414
24da78db 415 s->last_bitmap_offset = (int64_t) -1;
6a0f9e82 416
6a0f9e82 417#ifdef CACHE
24da78db
CA
418 s->pageentry_u8 = g_malloc(512);
419 s->pageentry_u32 = s->pageentry_u8;
420 s->pageentry_u16 = s->pageentry_u8;
421 s->last_pagetable = -1;
6a0f9e82 422#endif
24da78db 423 }
6a0f9e82 424
612ff3d8 425 /* Disable migration when VHD images are used */
81e5f78a
AG
426 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
427 "does not support live migration",
428 bdrv_get_device_or_node_name(bs));
fe44dc91
AA
429 ret = migrate_add_blocker(s->migration_blocker, &local_err);
430 if (local_err) {
431 error_propagate(errp, local_err);
432 error_free(s->migration_blocker);
433 goto fail;
434 }
435
436 qemu_co_mutex_init(&s->lock);
612ff3d8 437
6a0f9e82 438 return 0;
59294e46
KW
439
440fail:
97f1c45c 441 qemu_vfree(s->pagetable);
59294e46
KW
442#ifdef CACHE
443 g_free(s->pageentry_u8);
444#endif
445 return ret;
6a0f9e82
FB
446}
447
3fe4b700
JC
448static int vpc_reopen_prepare(BDRVReopenState *state,
449 BlockReopenQueue *queue, Error **errp)
450{
451 return 0;
452}
453
b71d1c2e
AL
454/*
455 * Returns the absolute byte offset of the given sector in the image file.
456 * If the sector is not allocated, -1 is returned instead.
15d35bc5
AL
457 *
458 * The parameter write must be 1 if the offset will be used for a write
459 * operation (the block bitmaps is updated then), 0 otherwise.
b71d1c2e 460 */
d46b7cc6
KW
461static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
462 bool write)
6a0f9e82
FB
463{
464 BDRVVPCState *s = bs->opaque;
6a0f9e82 465 uint64_t bitmap_offset, block_offset;
d46b7cc6 466 uint32_t pagetable_index, offset_in_block;
6a0f9e82 467
2cfacb62 468 pagetable_index = offset / s->block_size;
d46b7cc6 469 offset_in_block = offset % s->block_size;
3b46e624 470
15d35bc5 471 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
9c057d0b 472 return -1; /* not allocated */
6a0f9e82 473
378e2aea 474 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
d46b7cc6 475 block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
15d35bc5 476
9c057d0b
JC
477 /* We must ensure that we don't write to any sectors which are marked as
478 unused in the bitmap. We get away with setting all bits in the block
479 bitmap each time we write to a new block. This might cause Virtual PC to
480 miss sparse read optimization, but it's not a problem in terms of
481 correctness. */
15d35bc5
AL
482 if (write && (s->last_bitmap_offset != bitmap_offset)) {
483 uint8_t bitmap[s->bitmap_size];
484
485 s->last_bitmap_offset = bitmap_offset;
486 memset(bitmap, 0xff, s->bitmap_size);
d9ca2ea2 487 bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
15d35bc5 488 }
3b46e624 489
b71d1c2e 490 return block_offset;
6a0f9e82
FB
491}
492
d46b7cc6
KW
493static inline int64_t get_sector_offset(BlockDriverState *bs,
494 int64_t sector_num, bool write)
495{
496 return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
497}
498
15d35bc5
AL
499/*
500 * Writes the footer to the end of the image file. This is needed when the
501 * file grows as it overwrites the old footer
502 *
503 * Returns 0 on success and < 0 on error
504 */
505static int rewrite_footer(BlockDriverState* bs)
506{
507 int ret;
508 BDRVVPCState *s = bs->opaque;
509 int64_t offset = s->free_data_block_offset;
510
d9ca2ea2 511 ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
15d35bc5
AL
512 if (ret < 0)
513 return ret;
514
515 return 0;
516}
517
518/*
519 * Allocates a new block. This involves writing a new footer and updating
520 * the Block Allocation Table to use the space at the old end of the image
521 * file (overwriting the old footer)
522 *
523 * Returns the sectors' offset in the image file on success and < 0 on error
524 */
513b0f02 525static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
15d35bc5
AL
526{
527 BDRVVPCState *s = bs->opaque;
528 int64_t bat_offset;
529 uint32_t index, bat_value;
530 int ret;
531 uint8_t bitmap[s->bitmap_size];
532
9c057d0b 533 /* Check if sector_num is valid */
513b0f02
KW
534 if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
535 return -EINVAL;
536 }
15d35bc5 537
9c057d0b 538 /* Write entry into in-memory BAT */
513b0f02
KW
539 index = offset / s->block_size;
540 assert(s->pagetable[index] == 0xFFFFFFFF);
15d35bc5
AL
541 s->pagetable[index] = s->free_data_block_offset / 512;
542
9c057d0b 543 /* Initialize the block's bitmap */
15d35bc5 544 memset(bitmap, 0xff, s->bitmap_size);
d9ca2ea2 545 ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
078a458e 546 s->bitmap_size);
5bb1cbac
KW
547 if (ret < 0) {
548 return ret;
549 }
15d35bc5 550
9c057d0b 551 /* Write new footer (the old one will be overwritten) */
15d35bc5
AL
552 s->free_data_block_offset += s->block_size + s->bitmap_size;
553 ret = rewrite_footer(bs);
554 if (ret < 0)
555 goto fail;
556
9c057d0b 557 /* Write BAT entry to disk */
15d35bc5 558 bat_offset = s->bat_offset + (4 * index);
a4127c42 559 bat_value = cpu_to_be32(s->pagetable[index]);
d9ca2ea2 560 ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
15d35bc5
AL
561 if (ret < 0)
562 goto fail;
563
513b0f02 564 return get_image_offset(bs, offset, false);
15d35bc5
AL
565
566fail:
567 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
513b0f02 568 return ret;
15d35bc5
AL
569}
570
97b00e28
PB
571static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
572{
573 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
574 VHDFooter *footer = (VHDFooter *) s->footer_buf;
575
0d4cc3e7 576 if (be32_to_cpu(footer->type) != VHD_FIXED) {
97b00e28
PB
577 bdi->cluster_size = s->block_size;
578 }
579
95de6d70 580 bdi->unallocated_blocks_are_zero = true;
97b00e28
PB
581 return 0;
582}
583
d46b7cc6
KW
584static int coroutine_fn
585vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
586 QEMUIOVector *qiov, int flags)
6a0f9e82 587{
6c6ea921 588 BDRVVPCState *s = bs->opaque;
6a0f9e82 589 int ret;
d46b7cc6
KW
590 int64_t image_offset;
591 int64_t n_bytes;
592 int64_t bytes_done = 0;
e54835c0 593 VHDFooter *footer = (VHDFooter *) s->footer_buf;
d46b7cc6 594 QEMUIOVector local_qiov;
6a0f9e82 595
0d4cc3e7 596 if (be32_to_cpu(footer->type) == VHD_FIXED) {
a03ef88f 597 return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
24da78db 598 }
b71d1c2e 599
d46b7cc6
KW
600 qemu_co_mutex_lock(&s->lock);
601 qemu_iovec_init(&local_qiov, qiov->niov);
6c6ea921 602
d46b7cc6
KW
603 while (bytes > 0) {
604 image_offset = get_image_offset(bs, offset, false);
605 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
606
607 if (image_offset == -1) {
608 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
b71d1c2e 609 } else {
d46b7cc6
KW
610 qemu_iovec_reset(&local_qiov);
611 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
612
a03ef88f 613 ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
d46b7cc6
KW
614 &local_qiov, 0);
615 if (ret < 0) {
616 goto fail;
6c6ea921 617 }
b71d1c2e
AL
618 }
619
d46b7cc6
KW
620 bytes -= n_bytes;
621 offset += n_bytes;
622 bytes_done += n_bytes;
6a0f9e82 623 }
6a0f9e82 624
d46b7cc6
KW
625 ret = 0;
626fail:
627 qemu_iovec_destroy(&local_qiov);
2914caa0 628 qemu_co_mutex_unlock(&s->lock);
d46b7cc6 629
2914caa0
PB
630 return ret;
631}
632
513b0f02
KW
633static int coroutine_fn
634vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
635 QEMUIOVector *qiov, int flags)
15d35bc5 636{
6c6ea921 637 BDRVVPCState *s = bs->opaque;
513b0f02
KW
638 int64_t image_offset;
639 int64_t n_bytes;
640 int64_t bytes_done = 0;
15d35bc5 641 int ret;
e54835c0 642 VHDFooter *footer = (VHDFooter *) s->footer_buf;
513b0f02 643 QEMUIOVector local_qiov;
15d35bc5 644
0d4cc3e7 645 if (be32_to_cpu(footer->type) == VHD_FIXED) {
a03ef88f 646 return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
24da78db 647 }
15d35bc5 648
513b0f02
KW
649 qemu_co_mutex_lock(&s->lock);
650 qemu_iovec_init(&local_qiov, qiov->niov);
651
652 while (bytes > 0) {
653 image_offset = get_image_offset(bs, offset, true);
654 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
6c6ea921 655
513b0f02
KW
656 if (image_offset == -1) {
657 image_offset = alloc_block(bs, offset);
658 if (image_offset < 0) {
659 ret = image_offset;
660 goto fail;
661 }
15d35bc5
AL
662 }
663
513b0f02
KW
664 qemu_iovec_reset(&local_qiov);
665 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
666
a03ef88f 667 ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
513b0f02
KW
668 &local_qiov, 0);
669 if (ret < 0) {
670 goto fail;
6c6ea921 671 }
15d35bc5 672
513b0f02
KW
673 bytes -= n_bytes;
674 offset += n_bytes;
675 bytes_done += n_bytes;
15d35bc5
AL
676 }
677
513b0f02
KW
678 ret = 0;
679fail:
680 qemu_iovec_destroy(&local_qiov);
e183ef75 681 qemu_co_mutex_unlock(&s->lock);
513b0f02 682
e183ef75
PB
683 return ret;
684}
685
0cc84887 686static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
67a0fd2a 687 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
0cc84887
KW
688{
689 BDRVVPCState *s = bs->opaque;
690 VHDFooter *footer = (VHDFooter*) s->footer_buf;
2ec711dc 691 int64_t start, offset;
0cc84887
KW
692 bool allocated;
693 int n;
694
695 if (be32_to_cpu(footer->type) == VHD_FIXED) {
696 *pnum = nb_sectors;
7429e207 697 *file = bs->file->bs;
0cc84887
KW
698 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
699 (sector_num << BDRV_SECTOR_BITS);
700 }
701
702 offset = get_sector_offset(bs, sector_num, 0);
703 start = offset;
704 allocated = (offset != -1);
705 *pnum = 0;
706
707 do {
708 /* All sectors in a block are contiguous (without using the bitmap) */
709 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
710 - sector_num;
711 n = MIN(n, nb_sectors);
712
713 *pnum += n;
714 sector_num += n;
715 nb_sectors -= n;
2ec711dc
PL
716 /* *pnum can't be greater than one block for allocated
717 * sectors since there is always a bitmap in between. */
718 if (allocated) {
7429e207 719 *file = bs->file->bs;
2ec711dc
PL
720 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
721 }
0cc84887
KW
722 if (nb_sectors == 0) {
723 break;
724 }
0cc84887 725 offset = get_sector_offset(bs, sector_num, 0);
2ec711dc 726 } while (offset == -1);
0cc84887 727
2ec711dc 728 return 0;
0cc84887
KW
729}
730
57c7d9e5
AL
731/*
732 * Calculates the number of cylinders, heads and sectors per cylinder
733 * based on a given number of sectors. This is the algorithm described
734 * in the VHD specification.
735 *
736 * Note that the geometry doesn't always exactly match total_sectors but
737 * may round it down.
6e9ea0c0 738 *
c23fb11b 739 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
258d2edb
CA
740 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
741 * and instead allow up to 255 heads.
57c7d9e5 742 */
6e9ea0c0 743static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
57c7d9e5
AL
744 uint8_t* heads, uint8_t* secs_per_cyl)
745{
746 uint32_t cyls_times_heads;
747
690cbb09 748 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
57c7d9e5 749
690cbb09 750 if (total_sectors >= 65535LL * 16 * 63) {
57c7d9e5 751 *secs_per_cyl = 255;
690cbb09 752 *heads = 16;
57c7d9e5
AL
753 cyls_times_heads = total_sectors / *secs_per_cyl;
754 } else {
755 *secs_per_cyl = 17;
756 cyls_times_heads = total_sectors / *secs_per_cyl;
757 *heads = (cyls_times_heads + 1023) / 1024;
758
690cbb09 759 if (*heads < 4) {
57c7d9e5 760 *heads = 4;
690cbb09 761 }
57c7d9e5
AL
762
763 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
764 *secs_per_cyl = 31;
765 *heads = 16;
766 cyls_times_heads = total_sectors / *secs_per_cyl;
767 }
768
769 if (cyls_times_heads >= (*heads * 1024)) {
770 *secs_per_cyl = 63;
771 *heads = 16;
772 cyls_times_heads = total_sectors / *secs_per_cyl;
773 }
774 }
775
dede4188 776 *cyls = cyls_times_heads / *heads;
6e9ea0c0
AJ
777
778 return 0;
57c7d9e5
AL
779}
780
b8f45cdf 781static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 782 int64_t total_sectors)
57c7d9e5 783{
e54835c0
JC
784 VHDDynDiskHeader *dyndisk_header =
785 (VHDDynDiskHeader *) buf;
57c7d9e5 786 size_t block_size, num_bat_entries;
24da78db 787 int i;
fef6070e
JC
788 int ret;
789 int64_t offset = 0;
57c7d9e5 790
9c057d0b 791 /* Write the footer (twice: at the beginning and at the end) */
57c7d9e5
AL
792 block_size = 0x200000;
793 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
794
8341f00d 795 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
40a99aac 796 if (ret < 0) {
f0ff243a
BS
797 goto fail;
798 }
57c7d9e5 799
fef6070e 800 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
8341f00d 801 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
fef6070e 802 if (ret < 0) {
f0ff243a
BS
803 goto fail;
804 }
57c7d9e5 805
9c057d0b 806 /* Write the initial BAT */
fef6070e 807 offset = 3 * 512;
57c7d9e5
AL
808
809 memset(buf, 0xFF, 512);
f0ff243a 810 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
8341f00d 811 ret = blk_pwrite(blk, offset, buf, 512, 0);
fef6070e 812 if (ret < 0) {
f0ff243a
BS
813 goto fail;
814 }
fef6070e 815 offset += 512;
f0ff243a 816 }
57c7d9e5 817
9c057d0b 818 /* Prepare the Dynamic Disk Header */
57c7d9e5
AL
819 memset(buf, 0, 1024);
820
5ec4d682 821 memcpy(dyndisk_header->magic, "cxsparse", 8);
57c7d9e5 822
78439f6a
CA
823 /*
824 * Note: The spec is actually wrong here for data_offset, it says
825 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
826 */
a4127c42
SH
827 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
828 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
829 dyndisk_header->version = cpu_to_be32(0x00010000);
830 dyndisk_header->block_size = cpu_to_be32(block_size);
831 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
57c7d9e5 832
a4127c42 833 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
57c7d9e5 834
9c057d0b 835 /* Write the header */
fef6070e 836 offset = 512;
57c7d9e5 837
8341f00d 838 ret = blk_pwrite(blk, offset, buf, 1024, 0);
fef6070e 839 if (ret < 0) {
f0ff243a
BS
840 goto fail;
841 }
f0ff243a 842
24da78db
CA
843 fail:
844 return ret;
845}
846
b8f45cdf 847static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 848 int64_t total_size)
24da78db 849{
fef6070e 850 int ret;
24da78db
CA
851
852 /* Add footer to total size */
fef6070e
JC
853 total_size += HEADER_SIZE;
854
b8f45cdf 855 ret = blk_truncate(blk, total_size);
fef6070e
JC
856 if (ret < 0) {
857 return ret;
24da78db
CA
858 }
859
8341f00d 860 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
fef6070e
JC
861 if (ret < 0) {
862 return ret;
863 }
24da78db 864
24da78db
CA
865 return ret;
866}
867
fec9921f 868static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
24da78db
CA
869{
870 uint8_t buf[1024];
e54835c0 871 VHDFooter *footer = (VHDFooter *) buf;
fec9921f 872 char *disk_type_param;
fef6070e 873 int i;
24da78db
CA
874 uint16_t cyls = 0;
875 uint8_t heads = 0;
876 uint8_t secs_per_cyl = 0;
877 int64_t total_sectors;
878 int64_t total_size;
879 int disk_type;
880 int ret = -EIO;
fb9245c2 881 bool force_size;
fef6070e 882 Error *local_err = NULL;
b8f45cdf 883 BlockBackend *blk = NULL;
24da78db
CA
884
885 /* Read out options */
c2eb918e
HT
886 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
887 BDRV_SECTOR_SIZE);
fec9921f
CL
888 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
889 if (disk_type_param) {
890 if (!strcmp(disk_type_param, "dynamic")) {
24da78db 891 disk_type = VHD_DYNAMIC;
fec9921f 892 } else if (!strcmp(disk_type_param, "fixed")) {
24da78db
CA
893 disk_type = VHD_FIXED;
894 } else {
0211b9be 895 error_setg(errp, "Invalid disk type, %s", disk_type_param);
fec9921f
CL
896 ret = -EINVAL;
897 goto out;
24da78db
CA
898 }
899 } else {
900 disk_type = VHD_DYNAMIC;
901 }
902
fb9245c2
JC
903 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
904
fef6070e
JC
905 ret = bdrv_create_file(filename, opts, &local_err);
906 if (ret < 0) {
907 error_propagate(errp, local_err);
fec9921f 908 goto out;
24da78db 909 }
b8f45cdf 910
efaa7c4e 911 blk = blk_new_open(filename, NULL, NULL,
72e775c7 912 BDRV_O_RDWR | BDRV_O_PROTOCOL, &local_err);
b8f45cdf 913 if (blk == NULL) {
fef6070e 914 error_propagate(errp, local_err);
b8f45cdf 915 ret = -EIO;
fef6070e 916 goto out;
4ab15590
CL
917 }
918
b8f45cdf
KW
919 blk_set_allow_write_beyond_eof(blk, true);
920
ecd880d9
KW
921 /*
922 * Calculate matching total_size and geometry. Increase the number of
923 * sectors requested until we get enough (or fail). This ensures that
924 * qemu-img convert doesn't truncate images, but rather rounds up.
690cbb09 925 *
fb9245c2 926 * If the image size can't be represented by a spec conformant CHS geometry,
690cbb09
PL
927 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
928 * the image size from the VHD footer to calculate total_sectors.
ecd880d9 929 */
fb9245c2
JC
930 if (force_size) {
931 /* This will force the use of total_size for sector count, below */
932 cyls = VHD_CHS_MAX_C;
933 heads = VHD_CHS_MAX_H;
934 secs_per_cyl = VHD_CHS_MAX_S;
935 } else {
936 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
937 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
938 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
939 }
690cbb09
PL
940 }
941
942 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
943 total_sectors = total_size / BDRV_SECTOR_SIZE;
c23fb11b 944 /* Allow a maximum disk size of 2040 GiB */
690cbb09 945 if (total_sectors > VHD_MAX_SECTORS) {
0211b9be 946 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
24da78db 947 ret = -EFBIG;
fef6070e 948 goto out;
24da78db 949 }
690cbb09
PL
950 } else {
951 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
952 total_size = total_sectors * BDRV_SECTOR_SIZE;
24da78db 953 }
ecd880d9 954
24da78db
CA
955 /* Prepare the Hard Disk Footer */
956 memset(buf, 0, 1024);
957
958 memcpy(footer->creator, "conectix", 8);
fb9245c2
JC
959 if (force_size) {
960 memcpy(footer->creator_app, "qem2", 4);
961 } else {
962 memcpy(footer->creator_app, "qemu", 4);
963 }
24da78db
CA
964 memcpy(footer->creator_os, "Wi2k", 4);
965
a4127c42
SH
966 footer->features = cpu_to_be32(0x02);
967 footer->version = cpu_to_be32(0x00010000);
24da78db 968 if (disk_type == VHD_DYNAMIC) {
a4127c42 969 footer->data_offset = cpu_to_be64(HEADER_SIZE);
24da78db 970 } else {
a4127c42 971 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
24da78db 972 }
a4127c42 973 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
24da78db
CA
974
975 /* Version of Virtual PC 2007 */
a4127c42
SH
976 footer->major = cpu_to_be16(0x0005);
977 footer->minor = cpu_to_be16(0x0003);
3f3f20dc 978 footer->orig_size = cpu_to_be64(total_size);
03671ded 979 footer->current_size = cpu_to_be64(total_size);
a4127c42 980 footer->cyls = cpu_to_be16(cyls);
24da78db
CA
981 footer->heads = heads;
982 footer->secs_per_cyl = secs_per_cyl;
983
a4127c42 984 footer->type = cpu_to_be32(disk_type);
24da78db 985
38440a21 986 qemu_uuid_generate(&footer->uuid);
24da78db 987
a4127c42 988 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
24da78db
CA
989
990 if (disk_type == VHD_DYNAMIC) {
b8f45cdf 991 ret = create_dynamic_disk(blk, buf, total_sectors);
24da78db 992 } else {
b8f45cdf 993 ret = create_fixed_disk(blk, buf, total_size);
24da78db 994 }
0211b9be
JC
995 if (ret < 0) {
996 error_setg(errp, "Unable to create or write VHD header");
997 }
24da78db 998
fec9921f 999out:
b8f45cdf 1000 blk_unref(blk);
fec9921f 1001 g_free(disk_type_param);
f0ff243a 1002 return ret;
57c7d9e5
AL
1003}
1004
72c6cc94
KW
1005static int vpc_has_zero_init(BlockDriverState *bs)
1006{
1007 BDRVVPCState *s = bs->opaque;
e54835c0 1008 VHDFooter *footer = (VHDFooter *) s->footer_buf;
72c6cc94 1009
0d4cc3e7 1010 if (be32_to_cpu(footer->type) == VHD_FIXED) {
9a4f4c31 1011 return bdrv_has_zero_init(bs->file->bs);
72c6cc94
KW
1012 } else {
1013 return 1;
1014 }
1015}
1016
6a0f9e82
FB
1017static void vpc_close(BlockDriverState *bs)
1018{
1019 BDRVVPCState *s = bs->opaque;
97f1c45c 1020 qemu_vfree(s->pagetable);
6a0f9e82 1021#ifdef CACHE
7267c094 1022 g_free(s->pageentry_u8);
6a0f9e82 1023#endif
612ff3d8
KW
1024
1025 migrate_del_blocker(s->migration_blocker);
1026 error_free(s->migration_blocker);
6a0f9e82
FB
1027}
1028
fec9921f
CL
1029static QemuOptsList vpc_create_opts = {
1030 .name = "vpc-create-opts",
1031 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1032 .desc = {
1033 {
1034 .name = BLOCK_OPT_SIZE,
1035 .type = QEMU_OPT_SIZE,
1036 .help = "Virtual disk size"
1037 },
1038 {
1039 .name = BLOCK_OPT_SUBFMT,
1040 .type = QEMU_OPT_STRING,
1041 .help =
1042 "Type of virtual hard disk format. Supported formats are "
1043 "{dynamic (default) | fixed} "
1044 },
fb9245c2
JC
1045 {
1046 .name = VPC_OPT_FORCE_SIZE,
1047 .type = QEMU_OPT_BOOL,
1048 .help = "Force disk size calculation to use the actual size "
1049 "specified, rather than using the nearest CHS-based "
1050 "calculation"
1051 },
fec9921f
CL
1052 { /* end of list */ }
1053 }
0e7e1989
KW
1054};
1055
5efa9d5a 1056static BlockDriver bdrv_vpc = {
4a411185
KW
1057 .format_name = "vpc",
1058 .instance_size = sizeof(BDRVVPCState),
c68b89ac 1059
72c6cc94
KW
1060 .bdrv_probe = vpc_probe,
1061 .bdrv_open = vpc_open,
1062 .bdrv_close = vpc_close,
1063 .bdrv_reopen_prepare = vpc_reopen_prepare,
c282e1fd 1064 .bdrv_create = vpc_create,
0e7e1989 1065
d46b7cc6 1066 .bdrv_co_preadv = vpc_co_preadv,
513b0f02 1067 .bdrv_co_pwritev = vpc_co_pwritev,
0cc84887 1068 .bdrv_co_get_block_status = vpc_co_get_block_status,
c68b89ac 1069
97b00e28
PB
1070 .bdrv_get_info = vpc_get_info,
1071
fec9921f 1072 .create_opts = &vpc_create_opts,
72c6cc94 1073 .bdrv_has_zero_init = vpc_has_zero_init,
6a0f9e82 1074};
5efa9d5a
AL
1075
1076static void bdrv_vpc_init(void)
1077{
1078 bdrv_register(&bdrv_vpc);
1079}
1080
1081block_init(bdrv_vpc_init);