]> git.proxmox.com Git - mirror_qemu.git/blame - block/vpc.c
rbd: Fix to cleanly reject -drive without pool or image
[mirror_qemu.git] / block / vpc.c
CommitLineData
6a0f9e82 1/*
cc2040f8 2 * Block driver for Connectix / Microsoft Virtual PC images
5fafdf24 3 *
6a0f9e82 4 * Copyright (c) 2005 Alex Beregszaszi
15d35bc5 5 * Copyright (c) 2009 Kevin Wolf <kwolf@suse.de>
5fafdf24 6 *
6a0f9e82
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
80c71a24 25#include "qemu/osdep.h"
da34e65c 26#include "qapi/error.h"
faf07963 27#include "qemu-common.h"
737e150e 28#include "block/block_int.h"
b8f45cdf 29#include "sysemu/block-backend.h"
1de7afc9 30#include "qemu/module.h"
caf71f86 31#include "migration/migration.h"
58369e22 32#include "qemu/bswap.h"
38440a21 33#include "qemu/uuid.h"
6a0f9e82
FB
34
35/**************************************************************/
36
37#define HEADER_SIZE 512
38
39//#define CACHE
40
2cfacb62
AL
41enum vhd_type {
42 VHD_FIXED = 2,
43 VHD_DYNAMIC = 3,
44 VHD_DIFFERENCING = 4,
45};
46
9c057d0b 47/* Seconds since Jan 1, 2000 0:00:00 (UTC) */
57c7d9e5
AL
48#define VHD_TIMESTAMP_BASE 946684800
49
fb9245c2
JC
50#define VHD_CHS_MAX_C 65535LL
51#define VHD_CHS_MAX_H 16
52#define VHD_CHS_MAX_S 255
53
c23fb11b 54#define VHD_MAX_SECTORS 0xff000000 /* 2040 GiB max image size */
fb9245c2
JC
55#define VHD_MAX_GEOMETRY (VHD_CHS_MAX_C * VHD_CHS_MAX_H * VHD_CHS_MAX_S)
56
57#define VPC_OPT_FORCE_SIZE "force_size"
97f1c45c 58
9c057d0b 59/* always big-endian */
e54835c0 60typedef struct vhd_footer {
9c057d0b 61 char creator[8]; /* "conectix" */
2cfacb62
AL
62 uint32_t features;
63 uint32_t version;
64
9c057d0b 65 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
66 uint64_t data_offset;
67
9c057d0b 68 /* Seconds since Jan 1, 2000 0:00:00 (UTC) */
2cfacb62
AL
69 uint32_t timestamp;
70
9c057d0b 71 char creator_app[4]; /* e.g., "vpc " */
2cfacb62
AL
72 uint16_t major;
73 uint16_t minor;
9c057d0b 74 char creator_os[4]; /* "Wi2k" */
2cfacb62
AL
75
76 uint64_t orig_size;
03671ded 77 uint64_t current_size;
2cfacb62
AL
78
79 uint16_t cyls;
80 uint8_t heads;
81 uint8_t secs_per_cyl;
82
83 uint32_t type;
84
9c057d0b
JC
85 /* Checksum of the Hard Disk Footer ("one's complement of the sum of all
86 the bytes in the footer without the checksum field") */
2cfacb62
AL
87 uint32_t checksum;
88
9c057d0b 89 /* UUID used to identify a parent hard disk (backing file) */
38440a21 90 QemuUUID uuid;
2cfacb62
AL
91
92 uint8_t in_saved_state;
e54835c0 93} QEMU_PACKED VHDFooter;
b9fa33a6 94
e54835c0 95typedef struct vhd_dyndisk_header {
9c057d0b 96 char magic[8]; /* "cxsparse" */
2cfacb62 97
9c057d0b 98 /* Offset of next header structure, 0xFFFFFFFF if none */
2cfacb62
AL
99 uint64_t data_offset;
100
9c057d0b 101 /* Offset of the Block Allocation Table (BAT) */
2cfacb62
AL
102 uint64_t table_offset;
103
104 uint32_t version;
9c057d0b 105 uint32_t max_table_entries; /* 32bit/entry */
2cfacb62 106
9c057d0b 107 /* 2 MB by default, must be a power of two */
2cfacb62
AL
108 uint32_t block_size;
109
110 uint32_t checksum;
111 uint8_t parent_uuid[16];
112 uint32_t parent_timestamp;
113 uint32_t reserved;
114
9c057d0b 115 /* Backing file name (in UTF-16) */
2cfacb62
AL
116 uint8_t parent_name[512];
117
118 struct {
119 uint32_t platform;
120 uint32_t data_space;
121 uint32_t data_length;
122 uint32_t reserved;
123 uint64_t data_offset;
124 } parent_locator[8];
e54835c0 125} QEMU_PACKED VHDDynDiskHeader;
6a0f9e82
FB
126
127typedef struct BDRVVPCState {
848c66e8 128 CoMutex lock;
15d35bc5
AL
129 uint8_t footer_buf[HEADER_SIZE];
130 uint64_t free_data_block_offset;
2cfacb62 131 int max_table_entries;
6a0f9e82 132 uint32_t *pagetable;
15d35bc5
AL
133 uint64_t bat_offset;
134 uint64_t last_bitmap_offset;
6a0f9e82 135
2cfacb62 136 uint32_t block_size;
15d35bc5 137 uint32_t bitmap_size;
c540d53a
JC
138 bool force_use_chs;
139 bool force_use_sz;
15d35bc5 140
6a0f9e82
FB
141#ifdef CACHE
142 uint8_t *pageentry_u8;
143 uint32_t *pageentry_u32;
144 uint16_t *pageentry_u16;
3b46e624 145
6a0f9e82
FB
146 uint64_t last_bitmap;
147#endif
612ff3d8
KW
148
149 Error *migration_blocker;
6a0f9e82
FB
150} BDRVVPCState;
151
c540d53a
JC
152#define VPC_OPT_SIZE_CALC "force_size_calc"
153static QemuOptsList vpc_runtime_opts = {
154 .name = "vpc-runtime-opts",
155 .head = QTAILQ_HEAD_INITIALIZER(vpc_runtime_opts.head),
156 .desc = {
157 {
158 .name = VPC_OPT_SIZE_CALC,
159 .type = QEMU_OPT_STRING,
160 .help = "Force disk size calculation to use either CHS geometry, "
161 "or use the disk current_size specified in the VHD footer. "
162 "{chs, current_size}"
163 },
164 { /* end of list */ }
165 }
166};
167
57c7d9e5
AL
168static uint32_t vpc_checksum(uint8_t* buf, size_t size)
169{
170 uint32_t res = 0;
171 int i;
172
173 for (i = 0; i < size; i++)
174 res += buf[i];
175
176 return ~res;
177}
178
179
6a0f9e82
FB
180static int vpc_probe(const uint8_t *buf, int buf_size, const char *filename)
181{
ffe8ab83 182 if (buf_size >= 8 && !strncmp((char *)buf, "conectix", 8))
6a0f9e82 183 return 100;
6a0f9e82
FB
184 return 0;
185}
186
c540d53a
JC
187static void vpc_parse_options(BlockDriverState *bs, QemuOpts *opts,
188 Error **errp)
189{
190 BDRVVPCState *s = bs->opaque;
191 const char *size_calc;
192
193 size_calc = qemu_opt_get(opts, VPC_OPT_SIZE_CALC);
194
195 if (!size_calc) {
196 /* no override, use autodetect only */
197 } else if (!strcmp(size_calc, "current_size")) {
198 s->force_use_sz = true;
199 } else if (!strcmp(size_calc, "chs")) {
200 s->force_use_chs = true;
201 } else {
202 error_setg(errp, "Invalid size calculation mode: '%s'", size_calc);
203 }
204}
205
015a1036
HR
206static int vpc_open(BlockDriverState *bs, QDict *options, int flags,
207 Error **errp)
6a0f9e82
FB
208{
209 BDRVVPCState *s = bs->opaque;
66f82cee 210 int i;
e54835c0
JC
211 VHDFooter *footer;
212 VHDDynDiskHeader *dyndisk_header;
c540d53a
JC
213 QemuOpts *opts = NULL;
214 Error *local_err = NULL;
215 bool use_chs;
b9fa33a6 216 uint8_t buf[HEADER_SIZE];
57c7d9e5 217 uint32_t checksum;
97f1c45c 218 uint64_t computed_size;
b15deac7 219 uint64_t pagetable_size;
24da78db 220 int disk_type = VHD_DYNAMIC;
59294e46 221 int ret;
6a0f9e82 222
4e4bf5c4
KW
223 bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file,
224 false, errp);
225 if (!bs->file) {
226 return -EINVAL;
227 }
228
c540d53a
JC
229 opts = qemu_opts_create(&vpc_runtime_opts, NULL, 0, &error_abort);
230 qemu_opts_absorb_qdict(opts, options, &local_err);
231 if (local_err) {
232 error_propagate(errp, local_err);
233 ret = -EINVAL;
234 goto fail;
235 }
236
237 vpc_parse_options(bs, opts, &local_err);
238 if (local_err) {
239 error_propagate(errp, local_err);
240 ret = -EINVAL;
241 goto fail;
242 }
243
cf2ab8fc 244 ret = bdrv_pread(bs->file, 0, s->footer_buf, HEADER_SIZE);
59294e46 245 if (ret < 0) {
32f6439c 246 error_setg(errp, "Unable to read VHD header");
6a0f9e82 247 goto fail;
59294e46 248 }
6a0f9e82 249
e54835c0 250 footer = (VHDFooter *) s->footer_buf;
24da78db 251 if (strncmp(footer->creator, "conectix", 8)) {
9a4f4c31 252 int64_t offset = bdrv_getlength(bs->file->bs);
59294e46
KW
253 if (offset < 0) {
254 ret = offset;
32f6439c 255 error_setg(errp, "Invalid file size");
59294e46
KW
256 goto fail;
257 } else if (offset < HEADER_SIZE) {
258 ret = -EINVAL;
32f6439c 259 error_setg(errp, "File too small for a VHD header");
24da78db
CA
260 goto fail;
261 }
59294e46 262
24da78db 263 /* If a fixed disk, the footer is found only at the end of the file */
cf2ab8fc 264 ret = bdrv_pread(bs->file, offset-HEADER_SIZE, s->footer_buf,
59294e46
KW
265 HEADER_SIZE);
266 if (ret < 0) {
24da78db
CA
267 goto fail;
268 }
269 if (strncmp(footer->creator, "conectix", 8)) {
76abe407
PB
270 error_setg(errp, "invalid VPC image");
271 ret = -EINVAL;
24da78db
CA
272 goto fail;
273 }
274 disk_type = VHD_FIXED;
275 }
6a0f9e82 276
57c7d9e5
AL
277 checksum = be32_to_cpu(footer->checksum);
278 footer->checksum = 0;
279 if (vpc_checksum(s->footer_buf, HEADER_SIZE) != checksum)
280 fprintf(stderr, "block-vpc: The header checksum of '%s' is "
66f82cee 281 "incorrect.\n", bs->filename);
57c7d9e5 282
c088b691 283 /* Write 'checksum' back to footer, or else will leave it with zero. */
a4127c42 284 footer->checksum = cpu_to_be32(checksum);
c088b691 285
9c057d0b
JC
286 /* The visible size of a image in Virtual PC depends on the geometry
287 rather than on the size stored in the footer (the size in the footer
288 is too large usually) */
33ccf667
SH
289 bs->total_sectors = (int64_t)
290 be16_to_cpu(footer->cyls) * footer->heads * footer->secs_per_cyl;
1fa79228 291
c540d53a
JC
292 /* Microsoft Virtual PC and Microsoft Hyper-V produce and read
293 * VHD image sizes differently. VPC will rely on CHS geometry,
294 * while Hyper-V and disk2vhd use the size specified in the footer.
295 *
296 * We use a couple of approaches to try and determine the correct method:
297 * look at the Creator App field, and look for images that have CHS
298 * geometry that is the maximum value.
299 *
300 * If the CHS geometry is the maximum CHS geometry, then we assume that
301 * the size is the footer->current_size to avoid truncation. Otherwise,
302 * we follow the table based on footer->creator_app:
303 *
304 * Known creator apps:
305 * 'vpc ' : CHS Virtual PC (uses disk geometry)
306 * 'qemu' : CHS QEMU (uses disk geometry)
fb9245c2 307 * 'qem2' : current_size QEMU (uses current_size)
c540d53a
JC
308 * 'win ' : current_size Hyper-V
309 * 'd2v ' : current_size Disk2vhd
9bdfb9e8 310 * 'tap\0' : current_size XenServer
bab246db 311 * 'CTXS' : current_size XenConverter
c540d53a
JC
312 *
313 * The user can override the table values via drive options, however
314 * even with an override we will still use current_size for images
315 * that have CHS geometry of the maximum size.
316 */
317 use_chs = (!!strncmp(footer->creator_app, "win ", 4) &&
fb9245c2 318 !!strncmp(footer->creator_app, "qem2", 4) &&
9bdfb9e8 319 !!strncmp(footer->creator_app, "d2v ", 4) &&
bab246db 320 !!strncmp(footer->creator_app, "CTXS", 4) &&
9bdfb9e8 321 !!memcmp(footer->creator_app, "tap", 4)) || s->force_use_chs;
c540d53a
JC
322
323 if (!use_chs || bs->total_sectors == VHD_MAX_GEOMETRY || s->force_use_sz) {
03671ded 324 bs->total_sectors = be64_to_cpu(footer->current_size) /
c540d53a 325 BDRV_SECTOR_SIZE;
0173e7bb
PL
326 }
327
c23fb11b
JC
328 /* Allow a maximum disk size of 2040 GiB */
329 if (bs->total_sectors > VHD_MAX_SECTORS) {
59294e46 330 ret = -EFBIG;
efc8243d
SH
331 goto fail;
332 }
333
24da78db 334 if (disk_type == VHD_DYNAMIC) {
cf2ab8fc 335 ret = bdrv_pread(bs->file, be64_to_cpu(footer->data_offset), buf,
59294e46
KW
336 HEADER_SIZE);
337 if (ret < 0) {
32f6439c 338 error_setg(errp, "Error reading dynamic VHD header");
24da78db
CA
339 goto fail;
340 }
b9fa33a6 341
e54835c0 342 dyndisk_header = (VHDDynDiskHeader *) buf;
6a0f9e82 343
24da78db 344 if (strncmp(dyndisk_header->magic, "cxsparse", 8)) {
32f6439c 345 error_setg(errp, "Invalid header magic");
59294e46 346 ret = -EINVAL;
24da78db
CA
347 goto fail;
348 }
6a0f9e82 349
24da78db 350 s->block_size = be32_to_cpu(dyndisk_header->block_size);
5e71dfad
KW
351 if (!is_power_of_2(s->block_size) || s->block_size < BDRV_SECTOR_SIZE) {
352 error_setg(errp, "Invalid block size %" PRIu32, s->block_size);
353 ret = -EINVAL;
354 goto fail;
355 }
24da78db 356 s->bitmap_size = ((s->block_size / (8 * 512)) + 511) & ~511;
15d35bc5 357
24da78db 358 s->max_table_entries = be32_to_cpu(dyndisk_header->max_table_entries);
97f1c45c
JC
359
360 if ((bs->total_sectors * 512) / s->block_size > 0xffffffffU) {
32f6439c 361 error_setg(errp, "Too many blocks");
97f1c45c
JC
362 ret = -EINVAL;
363 goto fail;
364 }
97f1c45c
JC
365
366 computed_size = (uint64_t) s->max_table_entries * s->block_size;
367 if (computed_size < bs->total_sectors * 512) {
32f6439c 368 error_setg(errp, "Page table too small");
97f1c45c
JC
369 ret = -EINVAL;
370 goto fail;
371 }
372
b15deac7
JC
373 if (s->max_table_entries > SIZE_MAX / 4 ||
374 s->max_table_entries > (int) INT_MAX / 4) {
375 error_setg(errp, "Max Table Entries too large (%" PRId32 ")",
376 s->max_table_entries);
377 ret = -EINVAL;
378 goto fail;
379 }
380
381 pagetable_size = (uint64_t) s->max_table_entries * 4;
382
9a4f4c31 383 s->pagetable = qemu_try_blockalign(bs->file->bs, pagetable_size);
5fb09cd5 384 if (s->pagetable == NULL) {
32f6439c 385 error_setg(errp, "Unable to allocate memory for page table");
5fb09cd5
KW
386 ret = -ENOMEM;
387 goto fail;
388 }
b71d1c2e 389
24da78db 390 s->bat_offset = be64_to_cpu(dyndisk_header->table_offset);
59294e46 391
cf2ab8fc 392 ret = bdrv_pread(bs->file, s->bat_offset, s->pagetable,
9a4f4c31 393 pagetable_size);
59294e46 394 if (ret < 0) {
32f6439c 395 error_setg(errp, "Error reading pagetable");
24da78db
CA
396 goto fail;
397 }
b71d1c2e 398
24da78db 399 s->free_data_block_offset =
b15deac7 400 ROUND_UP(s->bat_offset + pagetable_size, 512);
15d35bc5 401
24da78db
CA
402 for (i = 0; i < s->max_table_entries; i++) {
403 be32_to_cpus(&s->pagetable[i]);
404 if (s->pagetable[i] != 0xFFFFFFFF) {
405 int64_t next = (512 * (int64_t) s->pagetable[i]) +
406 s->bitmap_size + s->block_size;
15d35bc5 407
24da78db
CA
408 if (next > s->free_data_block_offset) {
409 s->free_data_block_offset = next;
410 }
411 }
15d35bc5 412 }
15d35bc5 413
9a4f4c31 414 if (s->free_data_block_offset > bdrv_getlength(bs->file->bs)) {
fb8fe35f
PL
415 error_setg(errp, "block-vpc: free_data_block_offset points after "
416 "the end of file. The image has been truncated.");
417 ret = -EINVAL;
418 goto fail;
419 }
420
24da78db 421 s->last_bitmap_offset = (int64_t) -1;
6a0f9e82 422
6a0f9e82 423#ifdef CACHE
24da78db
CA
424 s->pageentry_u8 = g_malloc(512);
425 s->pageentry_u32 = s->pageentry_u8;
426 s->pageentry_u16 = s->pageentry_u8;
427 s->last_pagetable = -1;
6a0f9e82 428#endif
24da78db 429 }
6a0f9e82 430
612ff3d8 431 /* Disable migration when VHD images are used */
81e5f78a
AG
432 error_setg(&s->migration_blocker, "The vpc format used by node '%s' "
433 "does not support live migration",
434 bdrv_get_device_or_node_name(bs));
fe44dc91
AA
435 ret = migrate_add_blocker(s->migration_blocker, &local_err);
436 if (local_err) {
437 error_propagate(errp, local_err);
438 error_free(s->migration_blocker);
439 goto fail;
440 }
441
442 qemu_co_mutex_init(&s->lock);
612ff3d8 443
6a0f9e82 444 return 0;
59294e46
KW
445
446fail:
97f1c45c 447 qemu_vfree(s->pagetable);
59294e46
KW
448#ifdef CACHE
449 g_free(s->pageentry_u8);
450#endif
451 return ret;
6a0f9e82
FB
452}
453
3fe4b700
JC
454static int vpc_reopen_prepare(BDRVReopenState *state,
455 BlockReopenQueue *queue, Error **errp)
456{
457 return 0;
458}
459
b71d1c2e
AL
460/*
461 * Returns the absolute byte offset of the given sector in the image file.
462 * If the sector is not allocated, -1 is returned instead.
15d35bc5
AL
463 *
464 * The parameter write must be 1 if the offset will be used for a write
465 * operation (the block bitmaps is updated then), 0 otherwise.
b71d1c2e 466 */
d46b7cc6
KW
467static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
468 bool write)
6a0f9e82
FB
469{
470 BDRVVPCState *s = bs->opaque;
6a0f9e82 471 uint64_t bitmap_offset, block_offset;
d46b7cc6 472 uint32_t pagetable_index, offset_in_block;
6a0f9e82 473
2cfacb62 474 pagetable_index = offset / s->block_size;
d46b7cc6 475 offset_in_block = offset % s->block_size;
3b46e624 476
15d35bc5 477 if (pagetable_index >= s->max_table_entries || s->pagetable[pagetable_index] == 0xffffffff)
9c057d0b 478 return -1; /* not allocated */
6a0f9e82 479
378e2aea 480 bitmap_offset = 512 * (uint64_t) s->pagetable[pagetable_index];
d46b7cc6 481 block_offset = bitmap_offset + s->bitmap_size + offset_in_block;
15d35bc5 482
9c057d0b
JC
483 /* We must ensure that we don't write to any sectors which are marked as
484 unused in the bitmap. We get away with setting all bits in the block
485 bitmap each time we write to a new block. This might cause Virtual PC to
486 miss sparse read optimization, but it's not a problem in terms of
487 correctness. */
15d35bc5
AL
488 if (write && (s->last_bitmap_offset != bitmap_offset)) {
489 uint8_t bitmap[s->bitmap_size];
490
491 s->last_bitmap_offset = bitmap_offset;
492 memset(bitmap, 0xff, s->bitmap_size);
d9ca2ea2 493 bdrv_pwrite_sync(bs->file, bitmap_offset, bitmap, s->bitmap_size);
15d35bc5 494 }
3b46e624 495
b71d1c2e 496 return block_offset;
6a0f9e82
FB
497}
498
d46b7cc6
KW
499static inline int64_t get_sector_offset(BlockDriverState *bs,
500 int64_t sector_num, bool write)
501{
502 return get_image_offset(bs, sector_num * BDRV_SECTOR_SIZE, write);
503}
504
15d35bc5
AL
505/*
506 * Writes the footer to the end of the image file. This is needed when the
507 * file grows as it overwrites the old footer
508 *
509 * Returns 0 on success and < 0 on error
510 */
511static int rewrite_footer(BlockDriverState* bs)
512{
513 int ret;
514 BDRVVPCState *s = bs->opaque;
515 int64_t offset = s->free_data_block_offset;
516
d9ca2ea2 517 ret = bdrv_pwrite_sync(bs->file, offset, s->footer_buf, HEADER_SIZE);
15d35bc5
AL
518 if (ret < 0)
519 return ret;
520
521 return 0;
522}
523
524/*
525 * Allocates a new block. This involves writing a new footer and updating
526 * the Block Allocation Table to use the space at the old end of the image
527 * file (overwriting the old footer)
528 *
529 * Returns the sectors' offset in the image file on success and < 0 on error
530 */
513b0f02 531static int64_t alloc_block(BlockDriverState* bs, int64_t offset)
15d35bc5
AL
532{
533 BDRVVPCState *s = bs->opaque;
534 int64_t bat_offset;
535 uint32_t index, bat_value;
536 int ret;
537 uint8_t bitmap[s->bitmap_size];
538
9c057d0b 539 /* Check if sector_num is valid */
513b0f02
KW
540 if ((offset < 0) || (offset > bs->total_sectors * BDRV_SECTOR_SIZE)) {
541 return -EINVAL;
542 }
15d35bc5 543
9c057d0b 544 /* Write entry into in-memory BAT */
513b0f02
KW
545 index = offset / s->block_size;
546 assert(s->pagetable[index] == 0xFFFFFFFF);
15d35bc5
AL
547 s->pagetable[index] = s->free_data_block_offset / 512;
548
9c057d0b 549 /* Initialize the block's bitmap */
15d35bc5 550 memset(bitmap, 0xff, s->bitmap_size);
d9ca2ea2 551 ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset, bitmap,
078a458e 552 s->bitmap_size);
5bb1cbac
KW
553 if (ret < 0) {
554 return ret;
555 }
15d35bc5 556
9c057d0b 557 /* Write new footer (the old one will be overwritten) */
15d35bc5
AL
558 s->free_data_block_offset += s->block_size + s->bitmap_size;
559 ret = rewrite_footer(bs);
560 if (ret < 0)
561 goto fail;
562
9c057d0b 563 /* Write BAT entry to disk */
15d35bc5 564 bat_offset = s->bat_offset + (4 * index);
a4127c42 565 bat_value = cpu_to_be32(s->pagetable[index]);
d9ca2ea2 566 ret = bdrv_pwrite_sync(bs->file, bat_offset, &bat_value, 4);
15d35bc5
AL
567 if (ret < 0)
568 goto fail;
569
513b0f02 570 return get_image_offset(bs, offset, false);
15d35bc5
AL
571
572fail:
573 s->free_data_block_offset -= (s->block_size + s->bitmap_size);
513b0f02 574 return ret;
15d35bc5
AL
575}
576
97b00e28
PB
577static int vpc_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
578{
579 BDRVVPCState *s = (BDRVVPCState *)bs->opaque;
580 VHDFooter *footer = (VHDFooter *) s->footer_buf;
581
0d4cc3e7 582 if (be32_to_cpu(footer->type) != VHD_FIXED) {
97b00e28
PB
583 bdi->cluster_size = s->block_size;
584 }
585
95de6d70 586 bdi->unallocated_blocks_are_zero = true;
97b00e28
PB
587 return 0;
588}
589
d46b7cc6
KW
590static int coroutine_fn
591vpc_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
592 QEMUIOVector *qiov, int flags)
6a0f9e82 593{
6c6ea921 594 BDRVVPCState *s = bs->opaque;
6a0f9e82 595 int ret;
d46b7cc6
KW
596 int64_t image_offset;
597 int64_t n_bytes;
598 int64_t bytes_done = 0;
e54835c0 599 VHDFooter *footer = (VHDFooter *) s->footer_buf;
d46b7cc6 600 QEMUIOVector local_qiov;
6a0f9e82 601
0d4cc3e7 602 if (be32_to_cpu(footer->type) == VHD_FIXED) {
a03ef88f 603 return bdrv_co_preadv(bs->file, offset, bytes, qiov, 0);
24da78db 604 }
b71d1c2e 605
d46b7cc6
KW
606 qemu_co_mutex_lock(&s->lock);
607 qemu_iovec_init(&local_qiov, qiov->niov);
6c6ea921 608
d46b7cc6
KW
609 while (bytes > 0) {
610 image_offset = get_image_offset(bs, offset, false);
611 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
612
613 if (image_offset == -1) {
614 qemu_iovec_memset(qiov, bytes_done, 0, n_bytes);
b71d1c2e 615 } else {
d46b7cc6
KW
616 qemu_iovec_reset(&local_qiov);
617 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
618
a03ef88f 619 ret = bdrv_co_preadv(bs->file, image_offset, n_bytes,
d46b7cc6
KW
620 &local_qiov, 0);
621 if (ret < 0) {
622 goto fail;
6c6ea921 623 }
b71d1c2e
AL
624 }
625
d46b7cc6
KW
626 bytes -= n_bytes;
627 offset += n_bytes;
628 bytes_done += n_bytes;
6a0f9e82 629 }
6a0f9e82 630
d46b7cc6
KW
631 ret = 0;
632fail:
633 qemu_iovec_destroy(&local_qiov);
2914caa0 634 qemu_co_mutex_unlock(&s->lock);
d46b7cc6 635
2914caa0
PB
636 return ret;
637}
638
513b0f02
KW
639static int coroutine_fn
640vpc_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
641 QEMUIOVector *qiov, int flags)
15d35bc5 642{
6c6ea921 643 BDRVVPCState *s = bs->opaque;
513b0f02
KW
644 int64_t image_offset;
645 int64_t n_bytes;
646 int64_t bytes_done = 0;
15d35bc5 647 int ret;
e54835c0 648 VHDFooter *footer = (VHDFooter *) s->footer_buf;
513b0f02 649 QEMUIOVector local_qiov;
15d35bc5 650
0d4cc3e7 651 if (be32_to_cpu(footer->type) == VHD_FIXED) {
a03ef88f 652 return bdrv_co_pwritev(bs->file, offset, bytes, qiov, 0);
24da78db 653 }
15d35bc5 654
513b0f02
KW
655 qemu_co_mutex_lock(&s->lock);
656 qemu_iovec_init(&local_qiov, qiov->niov);
657
658 while (bytes > 0) {
659 image_offset = get_image_offset(bs, offset, true);
660 n_bytes = MIN(bytes, s->block_size - (offset % s->block_size));
6c6ea921 661
513b0f02
KW
662 if (image_offset == -1) {
663 image_offset = alloc_block(bs, offset);
664 if (image_offset < 0) {
665 ret = image_offset;
666 goto fail;
667 }
15d35bc5
AL
668 }
669
513b0f02
KW
670 qemu_iovec_reset(&local_qiov);
671 qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
672
a03ef88f 673 ret = bdrv_co_pwritev(bs->file, image_offset, n_bytes,
513b0f02
KW
674 &local_qiov, 0);
675 if (ret < 0) {
676 goto fail;
6c6ea921 677 }
15d35bc5 678
513b0f02
KW
679 bytes -= n_bytes;
680 offset += n_bytes;
681 bytes_done += n_bytes;
15d35bc5
AL
682 }
683
513b0f02
KW
684 ret = 0;
685fail:
686 qemu_iovec_destroy(&local_qiov);
e183ef75 687 qemu_co_mutex_unlock(&s->lock);
513b0f02 688
e183ef75
PB
689 return ret;
690}
691
0cc84887 692static int64_t coroutine_fn vpc_co_get_block_status(BlockDriverState *bs,
67a0fd2a 693 int64_t sector_num, int nb_sectors, int *pnum, BlockDriverState **file)
0cc84887
KW
694{
695 BDRVVPCState *s = bs->opaque;
696 VHDFooter *footer = (VHDFooter*) s->footer_buf;
2ec711dc 697 int64_t start, offset;
0cc84887
KW
698 bool allocated;
699 int n;
700
701 if (be32_to_cpu(footer->type) == VHD_FIXED) {
702 *pnum = nb_sectors;
7429e207 703 *file = bs->file->bs;
0cc84887
KW
704 return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID | BDRV_BLOCK_DATA |
705 (sector_num << BDRV_SECTOR_BITS);
706 }
707
708 offset = get_sector_offset(bs, sector_num, 0);
709 start = offset;
710 allocated = (offset != -1);
711 *pnum = 0;
712
713 do {
714 /* All sectors in a block are contiguous (without using the bitmap) */
715 n = ROUND_UP(sector_num + 1, s->block_size / BDRV_SECTOR_SIZE)
716 - sector_num;
717 n = MIN(n, nb_sectors);
718
719 *pnum += n;
720 sector_num += n;
721 nb_sectors -= n;
2ec711dc
PL
722 /* *pnum can't be greater than one block for allocated
723 * sectors since there is always a bitmap in between. */
724 if (allocated) {
7429e207 725 *file = bs->file->bs;
2ec711dc
PL
726 return BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | start;
727 }
0cc84887
KW
728 if (nb_sectors == 0) {
729 break;
730 }
0cc84887 731 offset = get_sector_offset(bs, sector_num, 0);
2ec711dc 732 } while (offset == -1);
0cc84887 733
2ec711dc 734 return 0;
0cc84887
KW
735}
736
57c7d9e5
AL
737/*
738 * Calculates the number of cylinders, heads and sectors per cylinder
739 * based on a given number of sectors. This is the algorithm described
740 * in the VHD specification.
741 *
742 * Note that the geometry doesn't always exactly match total_sectors but
743 * may round it down.
6e9ea0c0 744 *
c23fb11b 745 * Returns 0 on success, -EFBIG if the size is larger than 2040 GiB. Override
258d2edb
CA
746 * the hardware EIDE and ATA-2 limit of 16 heads (max disk size of 127 GB)
747 * and instead allow up to 255 heads.
57c7d9e5 748 */
6e9ea0c0 749static int calculate_geometry(int64_t total_sectors, uint16_t* cyls,
57c7d9e5
AL
750 uint8_t* heads, uint8_t* secs_per_cyl)
751{
752 uint32_t cyls_times_heads;
753
690cbb09 754 total_sectors = MIN(total_sectors, VHD_MAX_GEOMETRY);
57c7d9e5 755
690cbb09 756 if (total_sectors >= 65535LL * 16 * 63) {
57c7d9e5 757 *secs_per_cyl = 255;
690cbb09 758 *heads = 16;
57c7d9e5
AL
759 cyls_times_heads = total_sectors / *secs_per_cyl;
760 } else {
761 *secs_per_cyl = 17;
762 cyls_times_heads = total_sectors / *secs_per_cyl;
763 *heads = (cyls_times_heads + 1023) / 1024;
764
690cbb09 765 if (*heads < 4) {
57c7d9e5 766 *heads = 4;
690cbb09 767 }
57c7d9e5
AL
768
769 if (cyls_times_heads >= (*heads * 1024) || *heads > 16) {
770 *secs_per_cyl = 31;
771 *heads = 16;
772 cyls_times_heads = total_sectors / *secs_per_cyl;
773 }
774
775 if (cyls_times_heads >= (*heads * 1024)) {
776 *secs_per_cyl = 63;
777 *heads = 16;
778 cyls_times_heads = total_sectors / *secs_per_cyl;
779 }
780 }
781
dede4188 782 *cyls = cyls_times_heads / *heads;
6e9ea0c0
AJ
783
784 return 0;
57c7d9e5
AL
785}
786
b8f45cdf 787static int create_dynamic_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 788 int64_t total_sectors)
57c7d9e5 789{
e54835c0
JC
790 VHDDynDiskHeader *dyndisk_header =
791 (VHDDynDiskHeader *) buf;
57c7d9e5 792 size_t block_size, num_bat_entries;
24da78db 793 int i;
fef6070e
JC
794 int ret;
795 int64_t offset = 0;
57c7d9e5 796
9c057d0b 797 /* Write the footer (twice: at the beginning and at the end) */
57c7d9e5
AL
798 block_size = 0x200000;
799 num_bat_entries = (total_sectors + block_size / 512) / (block_size / 512);
800
8341f00d 801 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
40a99aac 802 if (ret < 0) {
f0ff243a
BS
803 goto fail;
804 }
57c7d9e5 805
fef6070e 806 offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
8341f00d 807 ret = blk_pwrite(blk, offset, buf, HEADER_SIZE, 0);
fef6070e 808 if (ret < 0) {
f0ff243a
BS
809 goto fail;
810 }
57c7d9e5 811
9c057d0b 812 /* Write the initial BAT */
fef6070e 813 offset = 3 * 512;
57c7d9e5
AL
814
815 memset(buf, 0xFF, 512);
f0ff243a 816 for (i = 0; i < (num_bat_entries * 4 + 511) / 512; i++) {
8341f00d 817 ret = blk_pwrite(blk, offset, buf, 512, 0);
fef6070e 818 if (ret < 0) {
f0ff243a
BS
819 goto fail;
820 }
fef6070e 821 offset += 512;
f0ff243a 822 }
57c7d9e5 823
9c057d0b 824 /* Prepare the Dynamic Disk Header */
57c7d9e5
AL
825 memset(buf, 0, 1024);
826
5ec4d682 827 memcpy(dyndisk_header->magic, "cxsparse", 8);
57c7d9e5 828
78439f6a
CA
829 /*
830 * Note: The spec is actually wrong here for data_offset, it says
831 * 0xFFFFFFFF, but MS tools expect all 64 bits to be set.
832 */
a4127c42
SH
833 dyndisk_header->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
834 dyndisk_header->table_offset = cpu_to_be64(3 * 512);
835 dyndisk_header->version = cpu_to_be32(0x00010000);
836 dyndisk_header->block_size = cpu_to_be32(block_size);
837 dyndisk_header->max_table_entries = cpu_to_be32(num_bat_entries);
57c7d9e5 838
a4127c42 839 dyndisk_header->checksum = cpu_to_be32(vpc_checksum(buf, 1024));
57c7d9e5 840
9c057d0b 841 /* Write the header */
fef6070e 842 offset = 512;
57c7d9e5 843
8341f00d 844 ret = blk_pwrite(blk, offset, buf, 1024, 0);
fef6070e 845 if (ret < 0) {
f0ff243a
BS
846 goto fail;
847 }
f0ff243a 848
24da78db
CA
849 fail:
850 return ret;
851}
852
b8f45cdf 853static int create_fixed_disk(BlockBackend *blk, uint8_t *buf,
fef6070e 854 int64_t total_size)
24da78db 855{
fef6070e 856 int ret;
24da78db
CA
857
858 /* Add footer to total size */
fef6070e
JC
859 total_size += HEADER_SIZE;
860
b8f45cdf 861 ret = blk_truncate(blk, total_size);
fef6070e
JC
862 if (ret < 0) {
863 return ret;
24da78db
CA
864 }
865
8341f00d 866 ret = blk_pwrite(blk, total_size - HEADER_SIZE, buf, HEADER_SIZE, 0);
fef6070e
JC
867 if (ret < 0) {
868 return ret;
869 }
24da78db 870
24da78db
CA
871 return ret;
872}
873
fec9921f 874static int vpc_create(const char *filename, QemuOpts *opts, Error **errp)
24da78db
CA
875{
876 uint8_t buf[1024];
e54835c0 877 VHDFooter *footer = (VHDFooter *) buf;
fec9921f 878 char *disk_type_param;
fef6070e 879 int i;
24da78db
CA
880 uint16_t cyls = 0;
881 uint8_t heads = 0;
882 uint8_t secs_per_cyl = 0;
883 int64_t total_sectors;
884 int64_t total_size;
885 int disk_type;
886 int ret = -EIO;
fb9245c2 887 bool force_size;
fef6070e 888 Error *local_err = NULL;
b8f45cdf 889 BlockBackend *blk = NULL;
24da78db
CA
890
891 /* Read out options */
c2eb918e
HT
892 total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
893 BDRV_SECTOR_SIZE);
fec9921f
CL
894 disk_type_param = qemu_opt_get_del(opts, BLOCK_OPT_SUBFMT);
895 if (disk_type_param) {
896 if (!strcmp(disk_type_param, "dynamic")) {
24da78db 897 disk_type = VHD_DYNAMIC;
fec9921f 898 } else if (!strcmp(disk_type_param, "fixed")) {
24da78db
CA
899 disk_type = VHD_FIXED;
900 } else {
0211b9be 901 error_setg(errp, "Invalid disk type, %s", disk_type_param);
fec9921f
CL
902 ret = -EINVAL;
903 goto out;
24da78db
CA
904 }
905 } else {
906 disk_type = VHD_DYNAMIC;
907 }
908
fb9245c2
JC
909 force_size = qemu_opt_get_bool_del(opts, VPC_OPT_FORCE_SIZE, false);
910
fef6070e
JC
911 ret = bdrv_create_file(filename, opts, &local_err);
912 if (ret < 0) {
913 error_propagate(errp, local_err);
fec9921f 914 goto out;
24da78db 915 }
b8f45cdf 916
efaa7c4e 917 blk = blk_new_open(filename, NULL, NULL,
55880601
KW
918 BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL,
919 &local_err);
b8f45cdf 920 if (blk == NULL) {
fef6070e 921 error_propagate(errp, local_err);
b8f45cdf 922 ret = -EIO;
fef6070e 923 goto out;
4ab15590
CL
924 }
925
b8f45cdf
KW
926 blk_set_allow_write_beyond_eof(blk, true);
927
ecd880d9
KW
928 /*
929 * Calculate matching total_size and geometry. Increase the number of
930 * sectors requested until we get enough (or fail). This ensures that
931 * qemu-img convert doesn't truncate images, but rather rounds up.
690cbb09 932 *
fb9245c2 933 * If the image size can't be represented by a spec conformant CHS geometry,
690cbb09
PL
934 * we set the geometry to 65535 x 16 x 255 (CxHxS) sectors and use
935 * the image size from the VHD footer to calculate total_sectors.
ecd880d9 936 */
fb9245c2
JC
937 if (force_size) {
938 /* This will force the use of total_size for sector count, below */
939 cyls = VHD_CHS_MAX_C;
940 heads = VHD_CHS_MAX_H;
941 secs_per_cyl = VHD_CHS_MAX_S;
942 } else {
943 total_sectors = MIN(VHD_MAX_GEOMETRY, total_size / BDRV_SECTOR_SIZE);
944 for (i = 0; total_sectors > (int64_t)cyls * heads * secs_per_cyl; i++) {
945 calculate_geometry(total_sectors + i, &cyls, &heads, &secs_per_cyl);
946 }
690cbb09
PL
947 }
948
949 if ((int64_t)cyls * heads * secs_per_cyl == VHD_MAX_GEOMETRY) {
950 total_sectors = total_size / BDRV_SECTOR_SIZE;
c23fb11b 951 /* Allow a maximum disk size of 2040 GiB */
690cbb09 952 if (total_sectors > VHD_MAX_SECTORS) {
0211b9be 953 error_setg(errp, "Disk size is too large, max size is 2040 GiB");
24da78db 954 ret = -EFBIG;
fef6070e 955 goto out;
24da78db 956 }
690cbb09
PL
957 } else {
958 total_sectors = (int64_t)cyls * heads * secs_per_cyl;
959 total_size = total_sectors * BDRV_SECTOR_SIZE;
24da78db 960 }
ecd880d9 961
24da78db
CA
962 /* Prepare the Hard Disk Footer */
963 memset(buf, 0, 1024);
964
965 memcpy(footer->creator, "conectix", 8);
fb9245c2
JC
966 if (force_size) {
967 memcpy(footer->creator_app, "qem2", 4);
968 } else {
969 memcpy(footer->creator_app, "qemu", 4);
970 }
24da78db
CA
971 memcpy(footer->creator_os, "Wi2k", 4);
972
a4127c42
SH
973 footer->features = cpu_to_be32(0x02);
974 footer->version = cpu_to_be32(0x00010000);
24da78db 975 if (disk_type == VHD_DYNAMIC) {
a4127c42 976 footer->data_offset = cpu_to_be64(HEADER_SIZE);
24da78db 977 } else {
a4127c42 978 footer->data_offset = cpu_to_be64(0xFFFFFFFFFFFFFFFFULL);
24da78db 979 }
a4127c42 980 footer->timestamp = cpu_to_be32(time(NULL) - VHD_TIMESTAMP_BASE);
24da78db
CA
981
982 /* Version of Virtual PC 2007 */
a4127c42
SH
983 footer->major = cpu_to_be16(0x0005);
984 footer->minor = cpu_to_be16(0x0003);
3f3f20dc 985 footer->orig_size = cpu_to_be64(total_size);
03671ded 986 footer->current_size = cpu_to_be64(total_size);
a4127c42 987 footer->cyls = cpu_to_be16(cyls);
24da78db
CA
988 footer->heads = heads;
989 footer->secs_per_cyl = secs_per_cyl;
990
a4127c42 991 footer->type = cpu_to_be32(disk_type);
24da78db 992
38440a21 993 qemu_uuid_generate(&footer->uuid);
24da78db 994
a4127c42 995 footer->checksum = cpu_to_be32(vpc_checksum(buf, HEADER_SIZE));
24da78db
CA
996
997 if (disk_type == VHD_DYNAMIC) {
b8f45cdf 998 ret = create_dynamic_disk(blk, buf, total_sectors);
24da78db 999 } else {
b8f45cdf 1000 ret = create_fixed_disk(blk, buf, total_size);
24da78db 1001 }
0211b9be
JC
1002 if (ret < 0) {
1003 error_setg(errp, "Unable to create or write VHD header");
1004 }
24da78db 1005
fec9921f 1006out:
b8f45cdf 1007 blk_unref(blk);
fec9921f 1008 g_free(disk_type_param);
f0ff243a 1009 return ret;
57c7d9e5
AL
1010}
1011
72c6cc94
KW
1012static int vpc_has_zero_init(BlockDriverState *bs)
1013{
1014 BDRVVPCState *s = bs->opaque;
e54835c0 1015 VHDFooter *footer = (VHDFooter *) s->footer_buf;
72c6cc94 1016
0d4cc3e7 1017 if (be32_to_cpu(footer->type) == VHD_FIXED) {
9a4f4c31 1018 return bdrv_has_zero_init(bs->file->bs);
72c6cc94
KW
1019 } else {
1020 return 1;
1021 }
1022}
1023
6a0f9e82
FB
1024static void vpc_close(BlockDriverState *bs)
1025{
1026 BDRVVPCState *s = bs->opaque;
97f1c45c 1027 qemu_vfree(s->pagetable);
6a0f9e82 1028#ifdef CACHE
7267c094 1029 g_free(s->pageentry_u8);
6a0f9e82 1030#endif
612ff3d8
KW
1031
1032 migrate_del_blocker(s->migration_blocker);
1033 error_free(s->migration_blocker);
6a0f9e82
FB
1034}
1035
fec9921f
CL
1036static QemuOptsList vpc_create_opts = {
1037 .name = "vpc-create-opts",
1038 .head = QTAILQ_HEAD_INITIALIZER(vpc_create_opts.head),
1039 .desc = {
1040 {
1041 .name = BLOCK_OPT_SIZE,
1042 .type = QEMU_OPT_SIZE,
1043 .help = "Virtual disk size"
1044 },
1045 {
1046 .name = BLOCK_OPT_SUBFMT,
1047 .type = QEMU_OPT_STRING,
1048 .help =
1049 "Type of virtual hard disk format. Supported formats are "
1050 "{dynamic (default) | fixed} "
1051 },
fb9245c2
JC
1052 {
1053 .name = VPC_OPT_FORCE_SIZE,
1054 .type = QEMU_OPT_BOOL,
1055 .help = "Force disk size calculation to use the actual size "
1056 "specified, rather than using the nearest CHS-based "
1057 "calculation"
1058 },
fec9921f
CL
1059 { /* end of list */ }
1060 }
0e7e1989
KW
1061};
1062
5efa9d5a 1063static BlockDriver bdrv_vpc = {
4a411185
KW
1064 .format_name = "vpc",
1065 .instance_size = sizeof(BDRVVPCState),
c68b89ac 1066
72c6cc94
KW
1067 .bdrv_probe = vpc_probe,
1068 .bdrv_open = vpc_open,
1069 .bdrv_close = vpc_close,
1070 .bdrv_reopen_prepare = vpc_reopen_prepare,
862f215f 1071 .bdrv_child_perm = bdrv_format_default_perms,
c282e1fd 1072 .bdrv_create = vpc_create,
0e7e1989 1073
d46b7cc6 1074 .bdrv_co_preadv = vpc_co_preadv,
513b0f02 1075 .bdrv_co_pwritev = vpc_co_pwritev,
0cc84887 1076 .bdrv_co_get_block_status = vpc_co_get_block_status,
c68b89ac 1077
97b00e28
PB
1078 .bdrv_get_info = vpc_get_info,
1079
fec9921f 1080 .create_opts = &vpc_create_opts,
72c6cc94 1081 .bdrv_has_zero_init = vpc_has_zero_init,
6a0f9e82 1082};
5efa9d5a
AL
1083
1084static void bdrv_vpc_init(void)
1085{
1086 bdrv_register(&bdrv_vpc);
1087}
1088
1089block_init(bdrv_vpc_init);