]>
Commit | Line | Data |
---|---|---|
203cdba3 JC |
1 | /* |
2 | * Block driver for Hyper-V VHDX Images | |
3 | * | |
4 | * Copyright (c) 2013 Red Hat, Inc., | |
5 | * | |
6 | * Authors: | |
7 | * Jeff Cody <jcody@redhat.com> | |
8 | * | |
6e9d290b | 9 | * This is based on the "VHDX Format Specification v1.00", published 8/25/2012 |
203cdba3 | 10 | * by Microsoft: |
6e9d290b | 11 | * https://www.microsoft.com/en-us/download/details.aspx?id=34750 |
203cdba3 JC |
12 | * |
13 | * This work is licensed under the terms of the GNU LGPL, version 2 or later. | |
14 | * See the COPYING.LIB file in the top-level directory. | |
15 | * | |
16 | */ | |
17 | ||
18 | #ifndef BLOCK_VHDX_H | |
19 | #define BLOCK_VHDX_H | |
20 | ||
21 | /* Structures and fields present in the VHDX file */ | |
22 | ||
23 | /* The header section has the following blocks, | |
24 | * each block is 64KB: | |
25 | * | |
26 | * _____________________________________________________________________________ | |
27 | * | File Id. | Header 1 | Header 2 | Region Table | Reserved (768KB) | | |
28 | * |----------|---------------|------------|--------------|--------------------| | |
29 | * | | | | | | | |
30 | * 0.........64KB...........128KB........192KB..........256KB................1MB | |
31 | */ | |
32 | ||
625565d2 | 33 | #define VHDX_HEADER_BLOCK_SIZE (64 * 1024) |
203cdba3 JC |
34 | |
35 | #define VHDX_FILE_ID_OFFSET 0 | |
625565d2 JC |
36 | #define VHDX_HEADER1_OFFSET (VHDX_HEADER_BLOCK_SIZE * 1) |
37 | #define VHDX_HEADER2_OFFSET (VHDX_HEADER_BLOCK_SIZE * 2) | |
38 | #define VHDX_REGION_TABLE_OFFSET (VHDX_HEADER_BLOCK_SIZE * 3) | |
203cdba3 JC |
39 | |
40 | ||
41 | /* | |
42 | * A note on the use of MS-GUID fields. For more details on the GUID, | |
43 | * please see: https://en.wikipedia.org/wiki/Globally_unique_identifier. | |
44 | * | |
45 | * The VHDX specification only states that these are MS GUIDs, and which | |
46 | * bytes are data1-data4. It makes no mention of what algorithm should be used | |
47 | * to generate the GUID, nor what standard. However, looking at the specified | |
48 | * known GUID fields, it appears the GUIDs are: | |
49 | * Standard/DCE GUID type (noted by 10b in the MSB of byte 0 of .data4) | |
50 | * Random algorithm (noted by 0x4XXX for .data3) | |
51 | */ | |
52 | ||
53 | /* ---- HEADER SECTION STRUCTURES ---- */ | |
54 | ||
55 | /* These structures are ones that are defined in the VHDX specification | |
56 | * document */ | |
57 | ||
58 | typedef struct VHDXFileIdentifier { | |
59 | uint64_t signature; /* "vhdxfile" in ASCII */ | |
60 | uint16_t creator[256]; /* optional; utf-16 string to identify | |
61 | the vhdx file creator. Diagnotistic | |
62 | only */ | |
63 | } VHDXFileIdentifier; | |
64 | ||
65 | ||
66 | /* the guid is a 16 byte unique ID - the definition for this used by | |
67 | * Microsoft is not just 16 bytes though - it is a structure that is defined, | |
68 | * so we need to follow it here so that endianness does not trip us up */ | |
69 | ||
4f18b782 | 70 | typedef struct QEMU_PACKED MSGUID { |
203cdba3 JC |
71 | uint32_t data1; |
72 | uint16_t data2; | |
73 | uint16_t data3; | |
74 | uint8_t data4[8]; | |
75 | } MSGUID; | |
76 | ||
77 | #define guid_eq(a, b) \ | |
78 | (memcmp(&(a), &(b), sizeof(MSGUID)) == 0) | |
79 | ||
625565d2 JC |
80 | #define VHDX_HEADER_SIZE (4 * 1024) /* although the vhdx_header struct in disk |
81 | is only 582 bytes, for purposes of crc | |
82 | the header is the first 4KB of the 64KB | |
83 | block */ | |
203cdba3 JC |
84 | |
85 | /* The full header is 4KB, although the actual header data is much smaller. | |
86 | * But for the checksum calculation, it is over the entire 4KB structure, | |
87 | * not just the defined portion of it */ | |
88 | typedef struct QEMU_PACKED VHDXHeader { | |
89 | uint32_t signature; /* "head" in ASCII */ | |
90 | uint32_t checksum; /* CRC-32C hash of the whole header */ | |
91 | uint64_t sequence_number; /* Seq number of this header. Each | |
92 | VHDX file has 2 of these headers, | |
93 | and only the header with the highest | |
94 | sequence number is valid */ | |
625565d2 | 95 | MSGUID file_write_guid; /* 128 bit unique identifier. Must be |
203cdba3 JC |
96 | updated to new, unique value before |
97 | the first modification is made to | |
98 | file */ | |
99 | MSGUID data_write_guid; /* 128 bit unique identifier. Must be | |
100 | updated to new, unique value before | |
101 | the first modification is made to | |
102 | visible data. Visbile data is | |
103 | defined as: | |
104 | - system & user metadata | |
105 | - raw block data | |
106 | - disk size | |
107 | - any change that will | |
108 | cause the virtual disk | |
109 | sector read to differ | |
110 | ||
111 | This does not need to change if | |
112 | blocks are re-arranged */ | |
113 | MSGUID log_guid; /* 128 bit unique identifier. If zero, | |
114 | there is no valid log. If non-zero, | |
115 | log entries with this guid are | |
116 | valid. */ | |
117 | uint16_t log_version; /* version of the log format. Mustn't be | |
118 | zero, unless log_guid is also zero */ | |
6e9d290b | 119 | uint16_t version; /* version of the vhdx file. Currently, |
203cdba3 JC |
120 | only supported version is "1" */ |
121 | uint32_t log_length; /* length of the log. Must be multiple | |
122 | of 1MB */ | |
123 | uint64_t log_offset; /* byte offset in the file of the log. | |
124 | Must also be a multiple of 1MB */ | |
125 | } VHDXHeader; | |
126 | ||
127 | /* Header for the region table block */ | |
128 | typedef struct QEMU_PACKED VHDXRegionTableHeader { | |
129 | uint32_t signature; /* "regi" in ASCII */ | |
130 | uint32_t checksum; /* CRC-32C hash of the 64KB table */ | |
131 | uint32_t entry_count; /* number of valid entries */ | |
132 | uint32_t reserved; | |
133 | } VHDXRegionTableHeader; | |
134 | ||
135 | /* Individual region table entry. There may be a maximum of 2047 of these | |
136 | * | |
137 | * There are two known region table properties. Both are required. | |
138 | * BAT (block allocation table): 2DC27766F62342009D64115E9BFD4A08 | |
139 | * Metadata: 8B7CA20647904B9AB8FE575F050F886E | |
140 | */ | |
141 | #define VHDX_REGION_ENTRY_REQUIRED 0x01 /* if set, parser must understand | |
142 | this entry in order to open | |
143 | file */ | |
144 | typedef struct QEMU_PACKED VHDXRegionTableEntry { | |
145 | MSGUID guid; /* 128-bit unique identifier */ | |
146 | uint64_t file_offset; /* offset of the object in the file. | |
147 | Must be multiple of 1MB */ | |
148 | uint32_t length; /* length, in bytes, of the object */ | |
149 | uint32_t data_bits; | |
150 | } VHDXRegionTableEntry; | |
151 | ||
152 | ||
153 | /* ---- LOG ENTRY STRUCTURES ---- */ | |
625565d2 JC |
154 | #define VHDX_LOG_MIN_SIZE (1024 * 1024) |
155 | #define VHDX_LOG_SECTOR_SIZE 4096 | |
203cdba3 | 156 | #define VHDX_LOG_HDR_SIZE 64 |
625565d2 | 157 | #define VHDX_LOG_SIGNATURE 0x65676f6c |
203cdba3 JC |
158 | typedef struct QEMU_PACKED VHDXLogEntryHeader { |
159 | uint32_t signature; /* "loge" in ASCII */ | |
160 | uint32_t checksum; /* CRC-32C hash of the 64KB table */ | |
161 | uint32_t entry_length; /* length in bytes, multiple of 1MB */ | |
162 | uint32_t tail; /* byte offset of first log entry of a | |
163 | seq, where this entry is the last | |
164 | entry */ | |
165 | uint64_t sequence_number; /* incremented with each log entry. | |
166 | May not be zero. */ | |
167 | uint32_t descriptor_count; /* number of descriptors in this log | |
168 | entry, must be >= 0 */ | |
169 | uint32_t reserved; | |
170 | MSGUID log_guid; /* value of the log_guid from | |
171 | vhdx_header. If not found in | |
172 | vhdx_header, it is invalid */ | |
173 | uint64_t flushed_file_offset; /* see spec for full details - this | |
52f35022 | 174 | should be vhdx file size in bytes */ |
203cdba3 JC |
175 | uint64_t last_file_offset; /* size in bytes that all allocated |
176 | file structures fit into */ | |
177 | } VHDXLogEntryHeader; | |
178 | ||
179 | #define VHDX_LOG_DESC_SIZE 32 | |
625565d2 JC |
180 | #define VHDX_LOG_DESC_SIGNATURE 0x63736564 |
181 | #define VHDX_LOG_ZERO_SIGNATURE 0x6f72657a | |
203cdba3 JC |
182 | typedef struct QEMU_PACKED VHDXLogDescriptor { |
183 | uint32_t signature; /* "zero" or "desc" in ASCII */ | |
184 | union { | |
185 | uint32_t reserved; /* zero desc */ | |
186 | uint32_t trailing_bytes; /* data desc: bytes 4092-4096 of the | |
187 | data sector */ | |
188 | }; | |
189 | union { | |
190 | uint64_t zero_length; /* zero desc: length of the section to | |
191 | zero */ | |
192 | uint64_t leading_bytes; /* data desc: bytes 0-7 of the data | |
193 | sector */ | |
194 | }; | |
195 | uint64_t file_offset; /* file offset to write zeros - multiple | |
196 | of 4kB */ | |
197 | uint64_t sequence_number; /* must match same field in | |
198 | vhdx_log_entry_header */ | |
199 | } VHDXLogDescriptor; | |
200 | ||
625565d2 | 201 | #define VHDX_LOG_DATA_SIGNATURE 0x61746164 |
203cdba3 JC |
202 | typedef struct QEMU_PACKED VHDXLogDataSector { |
203 | uint32_t data_signature; /* "data" in ASCII */ | |
204 | uint32_t sequence_high; /* 4 MSB of 8 byte sequence_number */ | |
205 | uint8_t data[4084]; /* raw data, bytes 8-4091 (inclusive). | |
206 | see the data descriptor field for the | |
207 | other mising bytes */ | |
208 | uint32_t sequence_low; /* 4 LSB of 8 byte sequence_number */ | |
209 | } VHDXLogDataSector; | |
210 | ||
211 | ||
212 | ||
213 | /* block states - different state values depending on whether it is a | |
214 | * payload block, or a sector block. */ | |
215 | ||
216 | #define PAYLOAD_BLOCK_NOT_PRESENT 0 | |
217 | #define PAYLOAD_BLOCK_UNDEFINED 1 | |
218 | #define PAYLOAD_BLOCK_ZERO 2 | |
219 | #define PAYLOAD_BLOCK_UNMAPPED 5 | |
220 | #define PAYLOAD_BLOCK_FULL_PRESENT 6 | |
221 | #define PAYLOAD_BLOCK_PARTIALLY_PRESENT 7 | |
222 | ||
223 | #define SB_BLOCK_NOT_PRESENT 0 | |
224 | #define SB_BLOCK_PRESENT 6 | |
225 | ||
226 | /* per the spec */ | |
625565d2 | 227 | #define VHDX_MAX_SECTORS_PER_BLOCK (1 << 23) |
203cdba3 JC |
228 | |
229 | /* upper 44 bits are the file offset in 1MB units lower 3 bits are the state | |
230 | other bits are reserved */ | |
231 | #define VHDX_BAT_STATE_BIT_MASK 0x07 | |
625565d2 | 232 | #define VHDX_BAT_FILE_OFF_BITS (64 - 44) |
1a848fd4 | 233 | #define VHDX_BAT_FILE_OFF_MASK 0xFFFFFFFFFFF00000 /* upper 44 bits */ |
203cdba3 JC |
234 | typedef uint64_t VHDXBatEntry; |
235 | ||
236 | /* ---- METADATA REGION STRUCTURES ---- */ | |
237 | ||
238 | #define VHDX_METADATA_ENTRY_SIZE 32 | |
239 | #define VHDX_METADATA_MAX_ENTRIES 2047 /* not including the header */ | |
240 | #define VHDX_METADATA_TABLE_MAX_SIZE \ | |
241 | (VHDX_METADATA_ENTRY_SIZE * (VHDX_METADATA_MAX_ENTRIES+1)) | |
242 | typedef struct QEMU_PACKED VHDXMetadataTableHeader { | |
243 | uint64_t signature; /* "metadata" in ASCII */ | |
244 | uint16_t reserved; | |
245 | uint16_t entry_count; /* number table entries. <= 2047 */ | |
246 | uint32_t reserved2[5]; | |
247 | } VHDXMetadataTableHeader; | |
248 | ||
249 | #define VHDX_META_FLAGS_IS_USER 0x01 /* max 1024 entries */ | |
250 | #define VHDX_META_FLAGS_IS_VIRTUAL_DISK 0x02 /* virtual disk metadata if set, | |
251 | otherwise file metdata */ | |
252 | #define VHDX_META_FLAGS_IS_REQUIRED 0x04 /* parse must understand this | |
253 | entry to open the file */ | |
254 | typedef struct QEMU_PACKED VHDXMetadataTableEntry { | |
255 | MSGUID item_id; /* 128-bit identifier for metadata */ | |
256 | uint32_t offset; /* byte offset of the metadata. At | |
257 | least 64kB. Relative to start of | |
258 | metadata region */ | |
259 | /* note: if length = 0, so is offset */ | |
260 | uint32_t length; /* length of metadata. <= 1MB. */ | |
625565d2 JC |
261 | uint32_t data_bits; /* least-significant 3 bits are flags, |
262 | the rest are reserved (see above) */ | |
203cdba3 JC |
263 | uint32_t reserved2; |
264 | } VHDXMetadataTableEntry; | |
265 | ||
266 | #define VHDX_PARAMS_LEAVE_BLOCKS_ALLOCED 0x01 /* Do not change any blocks to | |
267 | be BLOCK_NOT_PRESENT. | |
268 | If set indicates a fixed | |
269 | size VHDX file */ | |
270 | #define VHDX_PARAMS_HAS_PARENT 0x02 /* has parent / backing file */ | |
271 | typedef struct QEMU_PACKED VHDXFileParameters { | |
272 | uint32_t block_size; /* size of each payload block, always | |
273 | power of 2, <= 256MB and >= 1MB. */ | |
625565d2 JC |
274 | uint32_t data_bits; /* least-significant 2 bits are flags, |
275 | the rest are reserved (see above) */ | |
203cdba3 JC |
276 | } VHDXFileParameters; |
277 | ||
278 | typedef struct QEMU_PACKED VHDXVirtualDiskSize { | |
279 | uint64_t virtual_disk_size; /* Size of the virtual disk, in bytes. | |
280 | Must be multiple of the sector size, | |
281 | max of 64TB */ | |
282 | } VHDXVirtualDiskSize; | |
283 | ||
284 | typedef struct QEMU_PACKED VHDXPage83Data { | |
285 | MSGUID page_83_data[16]; /* unique id for scsi devices that | |
286 | support page 0x83 */ | |
287 | } VHDXPage83Data; | |
288 | ||
289 | typedef struct QEMU_PACKED VHDXVirtualDiskLogicalSectorSize { | |
290 | uint32_t logical_sector_size; /* virtual disk sector size (in bytes). | |
291 | Can only be 512 or 4096 bytes */ | |
292 | } VHDXVirtualDiskLogicalSectorSize; | |
293 | ||
294 | typedef struct QEMU_PACKED VHDXVirtualDiskPhysicalSectorSize { | |
295 | uint32_t physical_sector_size; /* physical sector size (in bytes). | |
296 | Can only be 512 or 4096 bytes */ | |
297 | } VHDXVirtualDiskPhysicalSectorSize; | |
298 | ||
299 | typedef struct QEMU_PACKED VHDXParentLocatorHeader { | |
300 | MSGUID locator_type[16]; /* type of the parent virtual disk. */ | |
301 | uint16_t reserved; | |
302 | uint16_t key_value_count; /* number of key/value pairs for this | |
303 | locator */ | |
304 | } VHDXParentLocatorHeader; | |
305 | ||
306 | /* key and value strings are UNICODE strings, UTF-16 LE encoding, no NULs */ | |
307 | typedef struct QEMU_PACKED VHDXParentLocatorEntry { | |
308 | uint32_t key_offset; /* offset in metadata for key, > 0 */ | |
309 | uint32_t value_offset; /* offset in metadata for value, >0 */ | |
310 | uint16_t key_length; /* length of entry key, > 0 */ | |
311 | uint16_t value_length; /* length of entry value, > 0 */ | |
312 | } VHDXParentLocatorEntry; | |
313 | ||
314 | ||
315 | /* ----- END VHDX SPECIFICATION STRUCTURES ---- */ | |
316 | ||
28541d46 JC |
317 | typedef struct VHDXMetadataEntries { |
318 | VHDXMetadataTableEntry file_parameters_entry; | |
319 | VHDXMetadataTableEntry virtual_disk_size_entry; | |
320 | VHDXMetadataTableEntry page83_data_entry; | |
321 | VHDXMetadataTableEntry logical_sector_size_entry; | |
322 | VHDXMetadataTableEntry phys_sector_size_entry; | |
323 | VHDXMetadataTableEntry parent_locator_entry; | |
324 | uint16_t present; | |
325 | } VHDXMetadataEntries; | |
326 | ||
625565d2 JC |
327 | typedef struct VHDXLogEntries { |
328 | uint64_t offset; | |
329 | uint64_t length; | |
0a43a1b5 JC |
330 | uint32_t write; |
331 | uint32_t read; | |
332 | VHDXLogEntryHeader *hdr; | |
333 | void *desc_buffer; | |
334 | uint64_t sequence; | |
625565d2 JC |
335 | uint32_t tail; |
336 | } VHDXLogEntries; | |
337 | ||
1a848fd4 JC |
338 | typedef struct VHDXRegionEntry { |
339 | uint64_t start; | |
340 | uint64_t end; | |
341 | QLIST_ENTRY(VHDXRegionEntry) entries; | |
342 | } VHDXRegionEntry; | |
343 | ||
28541d46 JC |
344 | typedef struct BDRVVHDXState { |
345 | CoMutex lock; | |
346 | ||
347 | int curr_header; | |
348 | VHDXHeader *headers[2]; | |
349 | ||
350 | VHDXRegionTableHeader rt; | |
351 | VHDXRegionTableEntry bat_rt; /* region table for the BAT */ | |
352 | VHDXRegionTableEntry metadata_rt; /* region table for the metadata */ | |
353 | ||
354 | VHDXMetadataTableHeader metadata_hdr; | |
355 | VHDXMetadataEntries metadata_entries; | |
356 | ||
357 | VHDXFileParameters params; | |
358 | uint32_t block_size; | |
359 | uint32_t block_size_bits; | |
360 | uint32_t sectors_per_block; | |
361 | uint32_t sectors_per_block_bits; | |
362 | ||
363 | uint64_t virtual_disk_size; | |
364 | uint32_t logical_sector_size; | |
365 | uint32_t physical_sector_size; | |
366 | ||
367 | uint64_t chunk_ratio; | |
368 | uint32_t chunk_ratio_bits; | |
369 | uint32_t logical_sector_size_bits; | |
370 | ||
371 | uint32_t bat_entries; | |
372 | VHDXBatEntry *bat; | |
373 | uint64_t bat_offset; | |
374 | ||
c3906c5e | 375 | bool first_visible_write; |
28541d46 JC |
376 | MSGUID session_guid; |
377 | ||
625565d2 JC |
378 | VHDXLogEntries log; |
379 | ||
28541d46 JC |
380 | VHDXParentLocatorHeader parent_header; |
381 | VHDXParentLocatorEntry *parent_entries; | |
382 | ||
383 | Error *migration_blocker; | |
1a848fd4 JC |
384 | |
385 | QLIST_HEAD(VHDXRegionHead, VHDXRegionEntry) regions; | |
28541d46 | 386 | } BDRVVHDXState; |
e8d4e5ff | 387 | |
4f18b782 JC |
388 | void vhdx_guid_generate(MSGUID *guid); |
389 | ||
c3906c5e JC |
390 | int vhdx_update_headers(BlockDriverState *bs, BDRVVHDXState *s, bool rw, |
391 | MSGUID *log_guid); | |
392 | ||
4f18b782 | 393 | uint32_t vhdx_update_checksum(uint8_t *buf, size_t size, int crc_offset); |
e8d4e5ff JC |
394 | uint32_t vhdx_checksum_calc(uint32_t crc, uint8_t *buf, size_t size, |
395 | int crc_offset); | |
396 | ||
397 | bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset); | |
398 | ||
0a43a1b5 | 399 | int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed); |
e8d4e5ff | 400 | |
4f18b782 | 401 | static inline void leguid_to_cpus(MSGUID *guid) |
e8d4e5ff JC |
402 | { |
403 | le32_to_cpus(&guid->data1); | |
404 | le16_to_cpus(&guid->data2); | |
405 | le16_to_cpus(&guid->data3); | |
406 | } | |
407 | ||
4f18b782 JC |
408 | static inline void cpu_to_leguids(MSGUID *guid) |
409 | { | |
410 | cpu_to_le32s(&guid->data1); | |
411 | cpu_to_le16s(&guid->data2); | |
412 | cpu_to_le16s(&guid->data3); | |
413 | } | |
414 | ||
0f48e8f0 JC |
415 | void vhdx_header_le_import(VHDXHeader *h); |
416 | void vhdx_header_le_export(VHDXHeader *orig_h, VHDXHeader *new_h); | |
417 | void vhdx_log_desc_le_import(VHDXLogDescriptor *d); | |
418 | void vhdx_log_desc_le_export(VHDXLogDescriptor *d); | |
419 | void vhdx_log_data_le_export(VHDXLogDataSector *d); | |
420 | void vhdx_log_entry_hdr_le_import(VHDXLogEntryHeader *hdr); | |
421 | void vhdx_log_entry_hdr_le_export(VHDXLogEntryHeader *hdr); | |
422 | ||
c3906c5e JC |
423 | int vhdx_user_visible_write(BlockDriverState *bs, BDRVVHDXState *s); |
424 | ||
203cdba3 | 425 | #endif |