]> git.proxmox.com Git - qemu.git/blob - block/vhdx.c
0ee10a76bad206dd706c878418dc6c42119d9cd0
[qemu.git] / block / vhdx.c
1 /*
2 * Block driver for Hyper-V VHDX Images
3 *
4 * Copyright (c) 2013 Red Hat, Inc.,
5 *
6 * Authors:
7 * Jeff Cody <jcody@redhat.com>
8 *
9 * This is based on the "VHDX Format Specification v0.95", published 4/12/2012
10 * by Microsoft:
11 * https://www.microsoft.com/en-us/download/details.aspx?id=29681
12 *
13 * This work is licensed under the terms of the GNU LGPL, version 2 or later.
14 * See the COPYING.LIB file in the top-level directory.
15 *
16 */
17
18 #include "qemu-common.h"
19 #include "block/block_int.h"
20 #include "qemu/module.h"
21 #include "qemu/crc32c.h"
22 #include "block/vhdx.h"
23
24
25 /* Several metadata and region table data entries are identified by
26 * guids in a MS-specific GUID format. */
27
28
29 /* ------- Known Region Table GUIDs ---------------------- */
30 static const MSGUID bat_guid = { .data1 = 0x2dc27766,
31 .data2 = 0xf623,
32 .data3 = 0x4200,
33 .data4 = { 0x9d, 0x64, 0x11, 0x5e,
34 0x9b, 0xfd, 0x4a, 0x08} };
35
36 static const MSGUID metadata_guid = { .data1 = 0x8b7ca206,
37 .data2 = 0x4790,
38 .data3 = 0x4b9a,
39 .data4 = { 0xb8, 0xfe, 0x57, 0x5f,
40 0x05, 0x0f, 0x88, 0x6e} };
41
42
43
44 /* ------- Known Metadata Entry GUIDs ---------------------- */
45 static const MSGUID file_param_guid = { .data1 = 0xcaa16737,
46 .data2 = 0xfa36,
47 .data3 = 0x4d43,
48 .data4 = { 0xb3, 0xb6, 0x33, 0xf0,
49 0xaa, 0x44, 0xe7, 0x6b} };
50
51 static const MSGUID virtual_size_guid = { .data1 = 0x2FA54224,
52 .data2 = 0xcd1b,
53 .data3 = 0x4876,
54 .data4 = { 0xb2, 0x11, 0x5d, 0xbe,
55 0xd8, 0x3b, 0xf4, 0xb8} };
56
57 static const MSGUID page83_guid = { .data1 = 0xbeca12ab,
58 .data2 = 0xb2e6,
59 .data3 = 0x4523,
60 .data4 = { 0x93, 0xef, 0xc3, 0x09,
61 0xe0, 0x00, 0xc7, 0x46} };
62
63
64 static const MSGUID phys_sector_guid = { .data1 = 0xcda348c7,
65 .data2 = 0x445d,
66 .data3 = 0x4471,
67 .data4 = { 0x9c, 0xc9, 0xe9, 0x88,
68 0x52, 0x51, 0xc5, 0x56} };
69
70 static const MSGUID parent_locator_guid = { .data1 = 0xa8d35f2d,
71 .data2 = 0xb30b,
72 .data3 = 0x454d,
73 .data4 = { 0xab, 0xf7, 0xd3,
74 0xd8, 0x48, 0x34,
75 0xab, 0x0c} };
76
77 static const MSGUID logical_sector_guid = { .data1 = 0x8141bf1d,
78 .data2 = 0xa96f,
79 .data3 = 0x4709,
80 .data4 = { 0xba, 0x47, 0xf2,
81 0x33, 0xa8, 0xfa,
82 0xab, 0x5f} };
83
84 /* Each parent type must have a valid GUID; this is for parent images
85 * of type 'VHDX'. If we were to allow e.g. a QCOW2 parent, we would
86 * need to make up our own QCOW2 GUID type */
87 static const MSGUID parent_vhdx_guid = { .data1 = 0xb04aefb7,
88 .data2 = 0xd19e,
89 .data3 = 0x4a81,
90 .data4 = { 0xb7, 0x89, 0x25, 0xb8,
91 0xe9, 0x44, 0x59, 0x13} };
92
93
94 #define META_FILE_PARAMETER_PRESENT 0x01
95 #define META_VIRTUAL_DISK_SIZE_PRESENT 0x02
96 #define META_PAGE_83_PRESENT 0x04
97 #define META_LOGICAL_SECTOR_SIZE_PRESENT 0x08
98 #define META_PHYS_SECTOR_SIZE_PRESENT 0x10
99 #define META_PARENT_LOCATOR_PRESENT 0x20
100
101 #define META_ALL_PRESENT \
102 (META_FILE_PARAMETER_PRESENT | META_VIRTUAL_DISK_SIZE_PRESENT | \
103 META_PAGE_83_PRESENT | META_LOGICAL_SECTOR_SIZE_PRESENT | \
104 META_PHYS_SECTOR_SIZE_PRESENT)
105
106 typedef struct VHDXMetadataEntries {
107 VHDXMetadataTableEntry file_parameters_entry;
108 VHDXMetadataTableEntry virtual_disk_size_entry;
109 VHDXMetadataTableEntry page83_data_entry;
110 VHDXMetadataTableEntry logical_sector_size_entry;
111 VHDXMetadataTableEntry phys_sector_size_entry;
112 VHDXMetadataTableEntry parent_locator_entry;
113 uint16_t present;
114 } VHDXMetadataEntries;
115
116
117 typedef struct BDRVVHDXState {
118 CoMutex lock;
119
120 int curr_header;
121 VHDXHeader *headers[2];
122
123 VHDXRegionTableHeader rt;
124 VHDXRegionTableEntry bat_rt; /* region table for the BAT */
125 VHDXRegionTableEntry metadata_rt; /* region table for the metadata */
126
127 VHDXMetadataTableHeader metadata_hdr;
128 VHDXMetadataEntries metadata_entries;
129
130 VHDXFileParameters params;
131 uint32_t block_size;
132 uint32_t block_size_bits;
133 uint32_t sectors_per_block;
134 uint32_t sectors_per_block_bits;
135
136 uint64_t virtual_disk_size;
137 uint32_t logical_sector_size;
138 uint32_t physical_sector_size;
139
140 uint64_t chunk_ratio;
141 uint32_t chunk_ratio_bits;
142 uint32_t logical_sector_size_bits;
143
144 uint32_t bat_entries;
145 VHDXBatEntry *bat;
146 uint64_t bat_offset;
147
148 VHDXParentLocatorHeader parent_header;
149 VHDXParentLocatorEntry *parent_entries;
150
151 } BDRVVHDXState;
152
153 uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size,
154 int crc_offset)
155 {
156 uint32_t crc_new;
157 uint32_t crc_orig;
158 assert(buf != NULL);
159
160 if (crc_offset > 0) {
161 memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
162 memset(buf + crc_offset, 0, sizeof(crc_orig));
163 }
164
165 crc_new = crc32c(crc, buf, size);
166 if (crc_offset > 0) {
167 memcpy(buf + crc_offset, &crc_orig, sizeof(crc_orig));
168 }
169
170 return crc_new;
171 }
172
173 /* Validates the checksum of the buffer, with an in-place CRC.
174 *
175 * Zero is substituted during crc calculation for the original crc field,
176 * and the crc field is restored afterwards. But the buffer will be modifed
177 * during the calculation, so this may not be not suitable for multi-threaded
178 * use.
179 *
180 * crc_offset: byte offset in buf of the buffer crc
181 * buf: buffer pointer
182 * size: size of buffer (must be > crc_offset+4)
183 *
184 * returns true if checksum is valid, false otherwise
185 */
186 bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset)
187 {
188 uint32_t crc_orig;
189 uint32_t crc;
190
191 assert(buf != NULL);
192 assert(size > (crc_offset + 4));
193
194 memcpy(&crc_orig, buf + crc_offset, sizeof(crc_orig));
195 crc_orig = le32_to_cpu(crc_orig);
196
197 crc = vhdx_checksum_calc(0xffffffff, buf, size, crc_offset);
198
199 return crc == crc_orig;
200 }
201
202
203 /*
204 * Per the MS VHDX Specification, for every VHDX file:
205 * - The header section is fixed size - 1 MB
206 * - The header section is always the first "object"
207 * - The first 64KB of the header is the File Identifier
208 * - The first uint64 (8 bytes) is the VHDX Signature ("vhdxfile")
209 * - The following 512 bytes constitute a UTF-16 string identifiying the
210 * software that created the file, and is optional and diagnostic only.
211 *
212 * Therefore, we probe by looking for the vhdxfile signature "vhdxfile"
213 */
214 static int vhdx_probe(const uint8_t *buf, int buf_size, const char *filename)
215 {
216 if (buf_size >= 8 && !memcmp(buf, "vhdxfile", 8)) {
217 return 100;
218 }
219 return 0;
220 }
221
222 /* All VHDX structures on disk are little endian */
223 static void vhdx_header_le_import(VHDXHeader *h)
224 {
225 assert(h != NULL);
226
227 le32_to_cpus(&h->signature);
228 le32_to_cpus(&h->checksum);
229 le64_to_cpus(&h->sequence_number);
230
231 leguid_to_cpus(&h->file_write_guid);
232 leguid_to_cpus(&h->data_write_guid);
233 leguid_to_cpus(&h->log_guid);
234
235 le16_to_cpus(&h->log_version);
236 le16_to_cpus(&h->version);
237 le32_to_cpus(&h->log_length);
238 le64_to_cpus(&h->log_offset);
239 }
240
241
242 /* opens the specified header block from the VHDX file header section */
243 static int vhdx_parse_header(BlockDriverState *bs, BDRVVHDXState *s)
244 {
245 int ret = 0;
246 VHDXHeader *header1;
247 VHDXHeader *header2;
248 bool h1_valid = false;
249 bool h2_valid = false;
250 uint64_t h1_seq = 0;
251 uint64_t h2_seq = 0;
252 uint8_t *buffer;
253
254 header1 = qemu_blockalign(bs, sizeof(VHDXHeader));
255 header2 = qemu_blockalign(bs, sizeof(VHDXHeader));
256
257 buffer = qemu_blockalign(bs, VHDX_HEADER_SIZE);
258
259 s->headers[0] = header1;
260 s->headers[1] = header2;
261
262 /* We have to read the whole VHDX_HEADER_SIZE instead of
263 * sizeof(VHDXHeader), because the checksum is over the whole
264 * region */
265 ret = bdrv_pread(bs->file, VHDX_HEADER1_OFFSET, buffer, VHDX_HEADER_SIZE);
266 if (ret < 0) {
267 goto fail;
268 }
269 /* copy over just the relevant portion that we need */
270 memcpy(header1, buffer, sizeof(VHDXHeader));
271 vhdx_header_le_import(header1);
272
273 if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
274 !memcmp(&header1->signature, "head", 4) &&
275 header1->version == 1) {
276 h1_seq = header1->sequence_number;
277 h1_valid = true;
278 }
279
280 ret = bdrv_pread(bs->file, VHDX_HEADER2_OFFSET, buffer, VHDX_HEADER_SIZE);
281 if (ret < 0) {
282 goto fail;
283 }
284 /* copy over just the relevant portion that we need */
285 memcpy(header2, buffer, sizeof(VHDXHeader));
286 vhdx_header_le_import(header2);
287
288 if (vhdx_checksum_is_valid(buffer, VHDX_HEADER_SIZE, 4) &&
289 !memcmp(&header2->signature, "head", 4) &&
290 header2->version == 1) {
291 h2_seq = header2->sequence_number;
292 h2_valid = true;
293 }
294
295 /* If there is only 1 valid header (or no valid headers), we
296 * don't care what the sequence numbers are */
297 if (h1_valid && !h2_valid) {
298 s->curr_header = 0;
299 } else if (!h1_valid && h2_valid) {
300 s->curr_header = 1;
301 } else if (!h1_valid && !h2_valid) {
302 ret = -EINVAL;
303 goto fail;
304 } else {
305 /* If both headers are valid, then we choose the active one by the
306 * highest sequence number. If the sequence numbers are equal, that is
307 * invalid */
308 if (h1_seq > h2_seq) {
309 s->curr_header = 0;
310 } else if (h2_seq > h1_seq) {
311 s->curr_header = 1;
312 } else {
313 ret = -EINVAL;
314 goto fail;
315 }
316 }
317
318 ret = 0;
319
320 goto exit;
321
322 fail:
323 qerror_report(ERROR_CLASS_GENERIC_ERROR, "No valid VHDX header found");
324 qemu_vfree(header1);
325 qemu_vfree(header2);
326 s->headers[0] = NULL;
327 s->headers[1] = NULL;
328 exit:
329 qemu_vfree(buffer);
330 return ret;
331 }
332
333
334 static int vhdx_open_region_tables(BlockDriverState *bs, BDRVVHDXState *s)
335 {
336 int ret = 0;
337 uint8_t *buffer;
338 int offset = 0;
339 VHDXRegionTableEntry rt_entry;
340 uint32_t i;
341 bool bat_rt_found = false;
342 bool metadata_rt_found = false;
343
344 /* We have to read the whole 64KB block, because the crc32 is over the
345 * whole block */
346 buffer = qemu_blockalign(bs, VHDX_HEADER_BLOCK_SIZE);
347
348 ret = bdrv_pread(bs->file, VHDX_REGION_TABLE_OFFSET, buffer,
349 VHDX_HEADER_BLOCK_SIZE);
350 if (ret < 0) {
351 goto fail;
352 }
353 memcpy(&s->rt, buffer, sizeof(s->rt));
354 le32_to_cpus(&s->rt.signature);
355 le32_to_cpus(&s->rt.checksum);
356 le32_to_cpus(&s->rt.entry_count);
357 le32_to_cpus(&s->rt.reserved);
358 offset += sizeof(s->rt);
359
360 if (!vhdx_checksum_is_valid(buffer, VHDX_HEADER_BLOCK_SIZE, 4) ||
361 memcmp(&s->rt.signature, "regi", 4)) {
362 ret = -EINVAL;
363 goto fail;
364 }
365
366 /* Per spec, maximum region table entry count is 2047 */
367 if (s->rt.entry_count > 2047) {
368 ret = -EINVAL;
369 goto fail;
370 }
371
372 for (i = 0; i < s->rt.entry_count; i++) {
373 memcpy(&rt_entry, buffer + offset, sizeof(rt_entry));
374 offset += sizeof(rt_entry);
375
376 leguid_to_cpus(&rt_entry.guid);
377 le64_to_cpus(&rt_entry.file_offset);
378 le32_to_cpus(&rt_entry.length);
379 le32_to_cpus(&rt_entry.data_bits);
380
381 /* see if we recognize the entry */
382 if (guid_eq(rt_entry.guid, bat_guid)) {
383 /* must be unique; if we have already found it this is invalid */
384 if (bat_rt_found) {
385 ret = -EINVAL;
386 goto fail;
387 }
388 bat_rt_found = true;
389 s->bat_rt = rt_entry;
390 continue;
391 }
392
393 if (guid_eq(rt_entry.guid, metadata_guid)) {
394 /* must be unique; if we have already found it this is invalid */
395 if (metadata_rt_found) {
396 ret = -EINVAL;
397 goto fail;
398 }
399 metadata_rt_found = true;
400 s->metadata_rt = rt_entry;
401 continue;
402 }
403
404 if (rt_entry.data_bits & VHDX_REGION_ENTRY_REQUIRED) {
405 /* cannot read vhdx file - required region table entry that
406 * we do not understand. per spec, we must fail to open */
407 ret = -ENOTSUP;
408 goto fail;
409 }
410 }
411 ret = 0;
412
413 fail:
414 qemu_vfree(buffer);
415 return ret;
416 }
417
418
419
420 /* Metadata initial parser
421 *
422 * This loads all the metadata entry fields. This may cause additional
423 * fields to be processed (e.g. parent locator, etc..).
424 *
425 * There are 5 Metadata items that are always required:
426 * - File Parameters (block size, has a parent)
427 * - Virtual Disk Size (size, in bytes, of the virtual drive)
428 * - Page 83 Data (scsi page 83 guid)
429 * - Logical Sector Size (logical sector size in bytes, either 512 or
430 * 4096. We only support 512 currently)
431 * - Physical Sector Size (512 or 4096)
432 *
433 * Also, if the File Parameters indicate this is a differencing file,
434 * we must also look for the Parent Locator metadata item.
435 */
436 static int vhdx_parse_metadata(BlockDriverState *bs, BDRVVHDXState *s)
437 {
438 int ret = 0;
439 uint8_t *buffer;
440 int offset = 0;
441 uint32_t i = 0;
442 VHDXMetadataTableEntry md_entry;
443
444 buffer = qemu_blockalign(bs, VHDX_METADATA_TABLE_MAX_SIZE);
445
446 ret = bdrv_pread(bs->file, s->metadata_rt.file_offset, buffer,
447 VHDX_METADATA_TABLE_MAX_SIZE);
448 if (ret < 0) {
449 goto exit;
450 }
451 memcpy(&s->metadata_hdr, buffer, sizeof(s->metadata_hdr));
452 offset += sizeof(s->metadata_hdr);
453
454 le64_to_cpus(&s->metadata_hdr.signature);
455 le16_to_cpus(&s->metadata_hdr.reserved);
456 le16_to_cpus(&s->metadata_hdr.entry_count);
457
458 if (memcmp(&s->metadata_hdr.signature, "metadata", 8)) {
459 ret = -EINVAL;
460 goto exit;
461 }
462
463 s->metadata_entries.present = 0;
464
465 if ((s->metadata_hdr.entry_count * sizeof(md_entry)) >
466 (VHDX_METADATA_TABLE_MAX_SIZE - offset)) {
467 ret = -EINVAL;
468 goto exit;
469 }
470
471 for (i = 0; i < s->metadata_hdr.entry_count; i++) {
472 memcpy(&md_entry, buffer + offset, sizeof(md_entry));
473 offset += sizeof(md_entry);
474
475 leguid_to_cpus(&md_entry.item_id);
476 le32_to_cpus(&md_entry.offset);
477 le32_to_cpus(&md_entry.length);
478 le32_to_cpus(&md_entry.data_bits);
479 le32_to_cpus(&md_entry.reserved2);
480
481 if (guid_eq(md_entry.item_id, file_param_guid)) {
482 if (s->metadata_entries.present & META_FILE_PARAMETER_PRESENT) {
483 ret = -EINVAL;
484 goto exit;
485 }
486 s->metadata_entries.file_parameters_entry = md_entry;
487 s->metadata_entries.present |= META_FILE_PARAMETER_PRESENT;
488 continue;
489 }
490
491 if (guid_eq(md_entry.item_id, virtual_size_guid)) {
492 if (s->metadata_entries.present & META_VIRTUAL_DISK_SIZE_PRESENT) {
493 ret = -EINVAL;
494 goto exit;
495 }
496 s->metadata_entries.virtual_disk_size_entry = md_entry;
497 s->metadata_entries.present |= META_VIRTUAL_DISK_SIZE_PRESENT;
498 continue;
499 }
500
501 if (guid_eq(md_entry.item_id, page83_guid)) {
502 if (s->metadata_entries.present & META_PAGE_83_PRESENT) {
503 ret = -EINVAL;
504 goto exit;
505 }
506 s->metadata_entries.page83_data_entry = md_entry;
507 s->metadata_entries.present |= META_PAGE_83_PRESENT;
508 continue;
509 }
510
511 if (guid_eq(md_entry.item_id, logical_sector_guid)) {
512 if (s->metadata_entries.present &
513 META_LOGICAL_SECTOR_SIZE_PRESENT) {
514 ret = -EINVAL;
515 goto exit;
516 }
517 s->metadata_entries.logical_sector_size_entry = md_entry;
518 s->metadata_entries.present |= META_LOGICAL_SECTOR_SIZE_PRESENT;
519 continue;
520 }
521
522 if (guid_eq(md_entry.item_id, phys_sector_guid)) {
523 if (s->metadata_entries.present & META_PHYS_SECTOR_SIZE_PRESENT) {
524 ret = -EINVAL;
525 goto exit;
526 }
527 s->metadata_entries.phys_sector_size_entry = md_entry;
528 s->metadata_entries.present |= META_PHYS_SECTOR_SIZE_PRESENT;
529 continue;
530 }
531
532 if (guid_eq(md_entry.item_id, parent_locator_guid)) {
533 if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
534 ret = -EINVAL;
535 goto exit;
536 }
537 s->metadata_entries.parent_locator_entry = md_entry;
538 s->metadata_entries.present |= META_PARENT_LOCATOR_PRESENT;
539 continue;
540 }
541
542 if (md_entry.data_bits & VHDX_META_FLAGS_IS_REQUIRED) {
543 /* cannot read vhdx file - required region table entry that
544 * we do not understand. per spec, we must fail to open */
545 ret = -ENOTSUP;
546 goto exit;
547 }
548 }
549
550 if (s->metadata_entries.present != META_ALL_PRESENT) {
551 ret = -ENOTSUP;
552 goto exit;
553 }
554
555 ret = bdrv_pread(bs->file,
556 s->metadata_entries.file_parameters_entry.offset
557 + s->metadata_rt.file_offset,
558 &s->params,
559 sizeof(s->params));
560
561 if (ret < 0) {
562 goto exit;
563 }
564
565 le32_to_cpus(&s->params.block_size);
566 le32_to_cpus(&s->params.data_bits);
567
568
569 /* We now have the file parameters, so we can tell if this is a
570 * differencing file (i.e.. has_parent), is dynamic or fixed
571 * sized (leave_blocks_allocated), and the block size */
572
573 /* The parent locator required iff the file parameters has_parent set */
574 if (s->params.data_bits & VHDX_PARAMS_HAS_PARENT) {
575 if (s->metadata_entries.present & META_PARENT_LOCATOR_PRESENT) {
576 /* TODO: parse parent locator fields */
577 ret = -ENOTSUP; /* temp, until differencing files are supported */
578 goto exit;
579 } else {
580 /* if has_parent is set, but there is not parent locator present,
581 * then that is an invalid combination */
582 ret = -EINVAL;
583 goto exit;
584 }
585 }
586
587 /* determine virtual disk size, logical sector size,
588 * and phys sector size */
589
590 ret = bdrv_pread(bs->file,
591 s->metadata_entries.virtual_disk_size_entry.offset
592 + s->metadata_rt.file_offset,
593 &s->virtual_disk_size,
594 sizeof(uint64_t));
595 if (ret < 0) {
596 goto exit;
597 }
598 ret = bdrv_pread(bs->file,
599 s->metadata_entries.logical_sector_size_entry.offset
600 + s->metadata_rt.file_offset,
601 &s->logical_sector_size,
602 sizeof(uint32_t));
603 if (ret < 0) {
604 goto exit;
605 }
606 ret = bdrv_pread(bs->file,
607 s->metadata_entries.phys_sector_size_entry.offset
608 + s->metadata_rt.file_offset,
609 &s->physical_sector_size,
610 sizeof(uint32_t));
611 if (ret < 0) {
612 goto exit;
613 }
614
615 le64_to_cpus(&s->virtual_disk_size);
616 le32_to_cpus(&s->logical_sector_size);
617 le32_to_cpus(&s->physical_sector_size);
618
619 if (s->logical_sector_size == 0 || s->params.block_size == 0) {
620 ret = -EINVAL;
621 goto exit;
622 }
623
624 /* both block_size and sector_size are guaranteed powers of 2 */
625 s->sectors_per_block = s->params.block_size / s->logical_sector_size;
626 s->chunk_ratio = (VHDX_MAX_SECTORS_PER_BLOCK) *
627 (uint64_t)s->logical_sector_size /
628 (uint64_t)s->params.block_size;
629
630 /* These values are ones we will want to use for division / multiplication
631 * later on, and they are all guaranteed (per the spec) to be powers of 2,
632 * so we can take advantage of that for shift operations during
633 * reads/writes */
634 if (s->logical_sector_size & (s->logical_sector_size - 1)) {
635 ret = -EINVAL;
636 goto exit;
637 }
638 if (s->sectors_per_block & (s->sectors_per_block - 1)) {
639 ret = -EINVAL;
640 goto exit;
641 }
642 if (s->chunk_ratio & (s->chunk_ratio - 1)) {
643 ret = -EINVAL;
644 goto exit;
645 }
646 s->block_size = s->params.block_size;
647 if (s->block_size & (s->block_size - 1)) {
648 ret = -EINVAL;
649 goto exit;
650 }
651
652 s->logical_sector_size_bits = 31 - clz32(s->logical_sector_size);
653 s->sectors_per_block_bits = 31 - clz32(s->sectors_per_block);
654 s->chunk_ratio_bits = 63 - clz64(s->chunk_ratio);
655 s->block_size_bits = 31 - clz32(s->block_size);
656
657 ret = 0;
658
659 exit:
660 qemu_vfree(buffer);
661 return ret;
662 }
663
664 /* Parse the replay log. Per the VHDX spec, if the log is present
665 * it must be replayed prior to opening the file, even read-only.
666 *
667 * If read-only, we must replay the log in RAM (or refuse to open
668 * a dirty VHDX file read-only */
669 static int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s)
670 {
671 int ret = 0;
672 int i;
673 VHDXHeader *hdr;
674
675 hdr = s->headers[s->curr_header];
676
677 /* either the log guid, or log length is zero,
678 * then a replay log is present */
679 for (i = 0; i < sizeof(hdr->log_guid.data4); i++) {
680 ret |= hdr->log_guid.data4[i];
681 }
682 if (hdr->log_guid.data1 == 0 &&
683 hdr->log_guid.data2 == 0 &&
684 hdr->log_guid.data3 == 0 &&
685 ret == 0) {
686 goto exit;
687 }
688
689 /* per spec, only log version of 0 is supported */
690 if (hdr->log_version != 0) {
691 ret = -EINVAL;
692 goto exit;
693 }
694
695 if (hdr->log_length == 0) {
696 goto exit;
697 }
698
699 /* We currently do not support images with logs to replay */
700 ret = -ENOTSUP;
701
702 exit:
703 return ret;
704 }
705
706
707 static int vhdx_open(BlockDriverState *bs, QDict *options, int flags)
708 {
709 BDRVVHDXState *s = bs->opaque;
710 int ret = 0;
711 uint32_t i;
712 uint64_t signature;
713 uint32_t data_blocks_cnt, bitmap_blocks_cnt;
714
715
716 s->bat = NULL;
717
718 qemu_co_mutex_init(&s->lock);
719
720 /* validate the file signature */
721 ret = bdrv_pread(bs->file, 0, &signature, sizeof(uint64_t));
722 if (ret < 0) {
723 goto fail;
724 }
725 if (memcmp(&signature, "vhdxfile", 8)) {
726 ret = -EINVAL;
727 goto fail;
728 }
729
730 ret = vhdx_parse_header(bs, s);
731 if (ret) {
732 goto fail;
733 }
734
735 ret = vhdx_parse_log(bs, s);
736 if (ret) {
737 goto fail;
738 }
739
740 ret = vhdx_open_region_tables(bs, s);
741 if (ret) {
742 goto fail;
743 }
744
745 ret = vhdx_parse_metadata(bs, s);
746 if (ret) {
747 goto fail;
748 }
749 s->block_size = s->params.block_size;
750
751 /* the VHDX spec dictates that virtual_disk_size is always a multiple of
752 * logical_sector_size */
753 bs->total_sectors = s->virtual_disk_size >> s->logical_sector_size_bits;
754
755 data_blocks_cnt = s->virtual_disk_size >> s->block_size_bits;
756 if (s->virtual_disk_size - (data_blocks_cnt << s->block_size_bits)) {
757 data_blocks_cnt++;
758 }
759 bitmap_blocks_cnt = data_blocks_cnt >> s->chunk_ratio_bits;
760 if (data_blocks_cnt - (bitmap_blocks_cnt << s->chunk_ratio_bits)) {
761 bitmap_blocks_cnt++;
762 }
763
764 if (s->parent_entries) {
765 s->bat_entries = bitmap_blocks_cnt * (s->chunk_ratio + 1);
766 } else {
767 s->bat_entries = data_blocks_cnt +
768 ((data_blocks_cnt - 1) >> s->chunk_ratio_bits);
769 }
770
771 s->bat_offset = s->bat_rt.file_offset;
772
773 if (s->bat_entries > s->bat_rt.length / sizeof(VHDXBatEntry)) {
774 /* BAT allocation is not large enough for all entries */
775 ret = -EINVAL;
776 goto fail;
777 }
778
779 s->bat = qemu_blockalign(bs, s->bat_rt.length);
780
781 ret = bdrv_pread(bs->file, s->bat_offset, s->bat, s->bat_rt.length);
782 if (ret < 0) {
783 goto fail;
784 }
785
786 for (i = 0; i < s->bat_entries; i++) {
787 le64_to_cpus(&s->bat[i]);
788 }
789
790 if (flags & BDRV_O_RDWR) {
791 ret = -ENOTSUP;
792 goto fail;
793 }
794
795 /* TODO: differencing files, read, write */
796
797 return 0;
798 fail:
799 qemu_vfree(s->headers[0]);
800 qemu_vfree(s->headers[1]);
801 qemu_vfree(s->bat);
802 qemu_vfree(s->parent_entries);
803 return ret;
804 }
805
806 static int vhdx_reopen_prepare(BDRVReopenState *state,
807 BlockReopenQueue *queue, Error **errp)
808 {
809 return 0;
810 }
811
812
813 static coroutine_fn int vhdx_co_readv(BlockDriverState *bs, int64_t sector_num,
814 int nb_sectors, QEMUIOVector *qiov)
815 {
816 return -ENOTSUP;
817 }
818
819
820
821 static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
822 int nb_sectors, QEMUIOVector *qiov)
823 {
824 return -ENOTSUP;
825 }
826
827
828 static void vhdx_close(BlockDriverState *bs)
829 {
830 BDRVVHDXState *s = bs->opaque;
831 qemu_vfree(s->headers[0]);
832 qemu_vfree(s->headers[1]);
833 qemu_vfree(s->bat);
834 qemu_vfree(s->parent_entries);
835 }
836
837 static BlockDriver bdrv_vhdx = {
838 .format_name = "vhdx",
839 .instance_size = sizeof(BDRVVHDXState),
840 .bdrv_probe = vhdx_probe,
841 .bdrv_open = vhdx_open,
842 .bdrv_close = vhdx_close,
843 .bdrv_reopen_prepare = vhdx_reopen_prepare,
844 .bdrv_co_readv = vhdx_co_readv,
845 .bdrv_co_writev = vhdx_co_writev,
846 };
847
848 static void bdrv_vhdx_init(void)
849 {
850 bdrv_register(&bdrv_vhdx);
851 }
852
853 block_init(bdrv_vhdx_init);