]> git.proxmox.com Git - qemu.git/blame - block/vmdk.c
VMDK: separate vmdk_open by format version
[qemu.git] / block / vmdk.c
CommitLineData
ea2384d3
FB
1/*
2 * Block driver for the VMDK format
5fafdf24 3 *
ea2384d3 4 * Copyright (c) 2004 Fabrice Bellard
ff1afc72 5 * Copyright (c) 2005 Filip Navara
5fafdf24 6 *
ea2384d3
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
5f4da8c0 25
faf07963 26#include "qemu-common.h"
ea2384d3 27#include "block_int.h"
5efa9d5a 28#include "module.h"
ea2384d3 29
ea2384d3
FB
30#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
32
33typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44} VMDK3Header;
45
46typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
ff1afc72 59} __attribute__((packed)) VMDK4Header;
ea2384d3
FB
60
61#define L2_CACHE_SIZE 16
62
b3976d3c
FZ
63typedef struct VmdkExtent {
64 BlockDriverState *file;
65 bool flat;
66 int64_t sectors;
67 int64_t end_sector;
ea2384d3 68 int64_t l1_table_offset;
ff1afc72 69 int64_t l1_backup_table_offset;
ea2384d3 70 uint32_t *l1_table;
ff1afc72 71 uint32_t *l1_backup_table;
ea2384d3
FB
72 unsigned int l1_size;
73 uint32_t l1_entry_sectors;
74
75 unsigned int l2_size;
76 uint32_t *l2_cache;
77 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
78 uint32_t l2_cache_counts[L2_CACHE_SIZE];
79
80 unsigned int cluster_sectors;
b3976d3c
FZ
81} VmdkExtent;
82
83typedef struct BDRVVmdkState {
5f4da8c0 84 uint32_t parent_cid;
b3976d3c
FZ
85 int num_extents;
86 /* Extent array with num_extents entries, ascend ordered by address */
87 VmdkExtent *extents;
ea2384d3
FB
88} BDRVVmdkState;
89
630530a6
TS
90typedef struct VmdkMetaData {
91 uint32_t offset;
92 unsigned int l1_index;
93 unsigned int l2_index;
94 unsigned int l2_offset;
95 int valid;
96} VmdkMetaData;
97
ea2384d3
FB
98static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
99{
100 uint32_t magic;
101
102 if (buf_size < 4)
103 return 0;
104 magic = be32_to_cpu(*(uint32_t *)buf);
105 if (magic == VMDK3_MAGIC ||
01fc99d6 106 magic == VMDK4_MAGIC) {
ea2384d3 107 return 100;
01fc99d6
FZ
108 } else {
109 const char *p = (const char *)buf;
110 const char *end = p + buf_size;
111 while (p < end) {
112 if (*p == '#') {
113 /* skip comment line */
114 while (p < end && *p != '\n') {
115 p++;
116 }
117 p++;
118 continue;
119 }
120 if (*p == ' ') {
121 while (p < end && *p == ' ') {
122 p++;
123 }
124 /* skip '\r' if windows line endings used. */
125 if (p < end && *p == '\r') {
126 p++;
127 }
128 /* only accept blank lines before 'version=' line */
129 if (p == end || *p != '\n') {
130 return 0;
131 }
132 p++;
133 continue;
134 }
135 if (end - p >= strlen("version=X\n")) {
136 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
137 strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
138 return 100;
139 }
140 }
141 if (end - p >= strlen("version=X\r\n")) {
142 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
143 strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
144 return 100;
145 }
146 }
147 return 0;
148 }
ea2384d3 149 return 0;
01fc99d6 150 }
ea2384d3
FB
151}
152
5f4da8c0
TS
153#define CHECK_CID 1
154
3b46e624 155#define SECTOR_SIZE 512
5f4da8c0 156#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
5fafdf24 157#define HEADER_SIZE 512 // first sector of 512 bytes
5f4da8c0 158
b3976d3c
FZ
159static void vmdk_free_extents(BlockDriverState *bs)
160{
161 int i;
162 BDRVVmdkState *s = bs->opaque;
163
164 for (i = 0; i < s->num_extents; i++) {
165 qemu_free(s->extents[i].l1_table);
166 qemu_free(s->extents[i].l2_cache);
167 qemu_free(s->extents[i].l1_backup_table);
168 }
169 qemu_free(s->extents);
170}
171
5f4da8c0 172static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
ea2384d3 173{
5f4da8c0
TS
174 char desc[DESC_SIZE];
175 uint32_t cid;
7ccfb2eb 176 const char *p_name, *cid_str;
5f4da8c0
TS
177 size_t cid_str_size;
178
179 /* the descriptor offset = 0x200 */
6511ef77 180 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
5f4da8c0
TS
181 return 0;
182
183 if (parent) {
184 cid_str = "parentCID";
185 cid_str_size = sizeof("parentCID");
186 } else {
187 cid_str = "CID";
188 cid_str_size = sizeof("CID");
189 }
190
511d2b14 191 if ((p_name = strstr(desc,cid_str)) != NULL) {
5f4da8c0
TS
192 p_name += cid_str_size;
193 sscanf(p_name,"%x",&cid);
194 }
195
196 return cid;
197}
198
199static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
200{
5f4da8c0
TS
201 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
202 char *p_name, *tmp_str;
203
204 /* the descriptor offset = 0x200 */
6511ef77 205 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
5f4da8c0
TS
206 return -1;
207
208 tmp_str = strstr(desc,"parentCID");
363a37d5 209 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
511d2b14 210 if ((p_name = strstr(desc,"CID")) != NULL) {
5f4da8c0 211 p_name += sizeof("CID");
363a37d5
BS
212 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
213 pstrcat(desc, sizeof(desc), tmp_desc);
5f4da8c0
TS
214 }
215
b8852e87 216 if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
5f4da8c0
TS
217 return -1;
218 return 0;
219}
220
221static int vmdk_is_cid_valid(BlockDriverState *bs)
222{
223#ifdef CHECK_CID
224 BDRVVmdkState *s = bs->opaque;
b171271a 225 BlockDriverState *p_bs = bs->backing_hd;
5f4da8c0
TS
226 uint32_t cur_pcid;
227
228 if (p_bs) {
229 cur_pcid = vmdk_read_cid(p_bs,0);
230 if (s->parent_cid != cur_pcid)
231 // CID not valid
232 return 0;
233 }
234#endif
235 // CID valid
236 return 1;
237}
238
239static int vmdk_snapshot_create(const char *filename, const char *backing_file)
240{
241 int snp_fd, p_fd;
53c2e716 242 int ret;
5f4da8c0 243 uint32_t p_cid;
5fafdf24 244 char *p_name, *gd_buf, *rgd_buf;
5f4da8c0
TS
245 const char *real_filename, *temp_str;
246 VMDK4Header header;
247 uint32_t gde_entries, gd_size;
248 int64_t gd_offset, rgd_offset, capacity, gt_size;
249 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
7ccfb2eb 250 static const char desc_template[] =
5f4da8c0
TS
251 "# Disk DescriptorFile\n"
252 "version=1\n"
253 "CID=%x\n"
254 "parentCID=%x\n"
255 "createType=\"monolithicSparse\"\n"
256 "parentFileNameHint=\"%s\"\n"
257 "\n"
258 "# Extent description\n"
7ccfb2eb 259 "RW %u SPARSE \"%s\"\n"
5f4da8c0
TS
260 "\n"
261 "# The Disk Data Base \n"
262 "#DDB\n"
263 "\n";
264
265 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
266 if (snp_fd < 0)
53c2e716 267 return -errno;
5f4da8c0
TS
268 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
269 if (p_fd < 0) {
270 close(snp_fd);
53c2e716 271 return -errno;
5f4da8c0
TS
272 }
273
274 /* read the header */
53c2e716
JQ
275 if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
276 ret = -errno;
5f4da8c0 277 goto fail;
53c2e716
JQ
278 }
279 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
280 ret = -errno;
5f4da8c0 281 goto fail;
53c2e716 282 }
5f4da8c0
TS
283
284 /* write the header */
53c2e716
JQ
285 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
286 ret = -errno;
5f4da8c0 287 goto fail;
53c2e716
JQ
288 }
289 if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
290 ret = -errno;
5f4da8c0 291 goto fail;
53c2e716 292 }
5f4da8c0
TS
293
294 memset(&header, 0, sizeof(header));
295 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
296
53c2e716
JQ
297 if (ftruncate(snp_fd, header.grain_offset << 9)) {
298 ret = -errno;
1640366c 299 goto fail;
53c2e716 300 }
5f4da8c0 301 /* the descriptor offset = 0x200 */
53c2e716
JQ
302 if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
303 ret = -errno;
5f4da8c0 304 goto fail;
53c2e716
JQ
305 }
306 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
307 ret = -errno;
5f4da8c0 308 goto fail;
53c2e716 309 }
5f4da8c0 310
511d2b14 311 if ((p_name = strstr(p_desc,"CID")) != NULL) {
5f4da8c0
TS
312 p_name += sizeof("CID");
313 sscanf(p_name,"%x",&p_cid);
314 }
315
316 real_filename = filename;
317 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
318 real_filename = temp_str + 1;
319 if ((temp_str = strrchr(real_filename, '/')) != NULL)
320 real_filename = temp_str + 1;
321 if ((temp_str = strrchr(real_filename, ':')) != NULL)
322 real_filename = temp_str + 1;
323
363a37d5
BS
324 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
325 (uint32_t)header.capacity, real_filename);
5f4da8c0
TS
326
327 /* write the descriptor */
53c2e716
JQ
328 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
329 ret = -errno;
5f4da8c0 330 goto fail;
53c2e716
JQ
331 }
332 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
333 ret = -errno;
5f4da8c0 334 goto fail;
53c2e716 335 }
ea2384d3 336
5f4da8c0
TS
337 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
338 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
339 capacity = header.capacity * SECTOR_SIZE; // Extent size
340 /*
341 * Each GDE span 32M disk, means:
342 * 512 GTE per GT, each GTE points to grain
343 */
344 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
53c2e716
JQ
345 if (!gt_size) {
346 ret = -EINVAL;
5f4da8c0 347 goto fail;
53c2e716 348 }
5fafdf24 349 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
5f4da8c0
TS
350 gd_size = gde_entries * sizeof(uint32_t);
351
352 /* write RGD */
353 rgd_buf = qemu_malloc(gd_size);
53c2e716
JQ
354 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
355 ret = -errno;
5f4da8c0 356 goto fail_rgd;
53c2e716
JQ
357 }
358 if (read(p_fd, rgd_buf, gd_size) != gd_size) {
359 ret = -errno;
5f4da8c0 360 goto fail_rgd;
53c2e716
JQ
361 }
362 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
363 ret = -errno;
5f4da8c0 364 goto fail_rgd;
53c2e716
JQ
365 }
366 if (write(snp_fd, rgd_buf, gd_size) == -1) {
367 ret = -errno;
5f4da8c0 368 goto fail_rgd;
53c2e716 369 }
5f4da8c0
TS
370
371 /* write GD */
372 gd_buf = qemu_malloc(gd_size);
53c2e716
JQ
373 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
374 ret = -errno;
5f4da8c0 375 goto fail_gd;
53c2e716
JQ
376 }
377 if (read(p_fd, gd_buf, gd_size) != gd_size) {
378 ret = -errno;
5f4da8c0 379 goto fail_gd;
53c2e716
JQ
380 }
381 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
382 ret = -errno;
5f4da8c0 383 goto fail_gd;
53c2e716
JQ
384 }
385 if (write(snp_fd, gd_buf, gd_size) == -1) {
386 ret = -errno;
5f4da8c0 387 goto fail_gd;
53c2e716 388 }
3829cb46 389 ret = 0;
5f4da8c0 390
3829cb46 391fail_gd:
5f4da8c0 392 qemu_free(gd_buf);
3829cb46 393fail_rgd:
5f4da8c0 394 qemu_free(rgd_buf);
3829cb46 395fail:
5f4da8c0
TS
396 close(p_fd);
397 close(snp_fd);
53c2e716 398 return ret;
5f4da8c0
TS
399}
400
9949f97e 401static int vmdk_parent_open(BlockDriverState *bs)
5f4da8c0 402{
5fafdf24 403 char *p_name;
5f4da8c0 404 char desc[DESC_SIZE];
5f4da8c0
TS
405
406 /* the descriptor offset = 0x200 */
6511ef77 407 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
5f4da8c0
TS
408 return -1;
409
511d2b14 410 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
5f4da8c0 411 char *end_name;
5f4da8c0
TS
412
413 p_name += sizeof("parentFileNameHint") + 1;
511d2b14 414 if ((end_name = strchr(p_name,'\"')) == NULL)
5f4da8c0 415 return -1;
b171271a 416 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
b34d259a 417 return -1;
3b46e624 418
b171271a 419 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
ff1afc72 420 }
5f4da8c0
TS
421
422 return 0;
423}
424
b3976d3c
FZ
425/* Create and append extent to the extent array. Return the added VmdkExtent
426 * address. return NULL if allocation failed. */
427static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
428 BlockDriverState *file, bool flat, int64_t sectors,
429 int64_t l1_offset, int64_t l1_backup_offset,
430 uint32_t l1_size,
431 int l2_size, unsigned int cluster_sectors)
432{
433 VmdkExtent *extent;
434 BDRVVmdkState *s = bs->opaque;
435
436 s->extents = qemu_realloc(s->extents,
437 (s->num_extents + 1) * sizeof(VmdkExtent));
438 extent = &s->extents[s->num_extents];
439 s->num_extents++;
440
441 memset(extent, 0, sizeof(VmdkExtent));
442 extent->file = file;
443 extent->flat = flat;
444 extent->sectors = sectors;
445 extent->l1_table_offset = l1_offset;
446 extent->l1_backup_table_offset = l1_backup_offset;
447 extent->l1_size = l1_size;
448 extent->l1_entry_sectors = l2_size * cluster_sectors;
449 extent->l2_size = l2_size;
450 extent->cluster_sectors = cluster_sectors;
451
452 if (s->num_extents > 1) {
453 extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
454 } else {
455 extent->end_sector = extent->sectors;
456 }
457 bs->total_sectors = extent->end_sector;
458 return extent;
459}
460
b4b3ab14 461static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
5f4da8c0 462{
b4b3ab14
FZ
463 int ret;
464 int l1_size, i;
5f4da8c0 465
ea2384d3 466 /* read the L1 table */
b3976d3c
FZ
467 l1_size = extent->l1_size * sizeof(uint32_t);
468 extent->l1_table = qemu_malloc(l1_size);
b4b3ab14
FZ
469 ret = bdrv_pread(extent->file,
470 extent->l1_table_offset,
471 extent->l1_table,
472 l1_size);
473 if (ret < 0) {
474 goto fail_l1;
b3976d3c
FZ
475 }
476 for (i = 0; i < extent->l1_size; i++) {
477 le32_to_cpus(&extent->l1_table[i]);
ea2384d3
FB
478 }
479
b3976d3c
FZ
480 if (extent->l1_backup_table_offset) {
481 extent->l1_backup_table = qemu_malloc(l1_size);
b4b3ab14
FZ
482 ret = bdrv_pread(extent->file,
483 extent->l1_backup_table_offset,
484 extent->l1_backup_table,
485 l1_size);
486 if (ret < 0) {
487 goto fail_l1b;
b3976d3c
FZ
488 }
489 for (i = 0; i < extent->l1_size; i++) {
490 le32_to_cpus(&extent->l1_backup_table[i]);
ff1afc72
FB
491 }
492 }
493
b3976d3c
FZ
494 extent->l2_cache =
495 qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
ea2384d3 496 return 0;
b4b3ab14
FZ
497 fail_l1b:
498 qemu_free(extent->l1_backup_table);
499 fail_l1:
500 qemu_free(extent->l1_table);
501 return ret;
502}
503
504static int vmdk_open_vmdk3(BlockDriverState *bs, int flags)
505{
506 int ret;
507 uint32_t magic;
508 VMDK3Header header;
509 VmdkExtent *extent;
510
511 ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
512 if (ret < 0) {
513 goto fail;
514 }
515 extent = vmdk_add_extent(bs,
516 bs->file, false,
517 le32_to_cpu(header.disk_sectors),
518 le32_to_cpu(header.l1dir_offset) << 9,
519 0, 1 << 6, 1 << 9,
520 le32_to_cpu(header.granularity));
521 ret = vmdk_init_tables(bs, extent);
522 if (ret) {
523 /* vmdk_init_tables cleans up on fail, so only free allocation of
524 * vmdk_add_extent here. */
525 goto fail;
526 }
527 return 0;
ea2384d3 528 fail:
b3976d3c 529 vmdk_free_extents(bs);
b4b3ab14
FZ
530 return ret;
531}
532
533static int vmdk_open_vmdk4(BlockDriverState *bs, int flags)
534{
535 int ret;
536 uint32_t magic;
537 uint32_t l1_size, l1_entry_sectors;
538 VMDK4Header header;
539 BDRVVmdkState *s = bs->opaque;
540 VmdkExtent *extent;
541
542 ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
543 if (ret < 0) {
544 goto fail;
545 }
546 l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
547 * le64_to_cpu(header.granularity);
548 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
549 / l1_entry_sectors;
550 extent = vmdk_add_extent(bs, bs->file, false,
551 le64_to_cpu(header.capacity),
552 le64_to_cpu(header.gd_offset) << 9,
553 le64_to_cpu(header.rgd_offset) << 9,
554 l1_size,
555 le32_to_cpu(header.num_gtes_per_gte),
556 le64_to_cpu(header.granularity));
557 if (extent->l1_entry_sectors <= 0) {
558 ret = -EINVAL;
559 goto fail;
560 }
561 /* try to open parent images, if exist */
562 ret = vmdk_parent_open(bs);
563 if (ret) {
564 goto fail;
565 }
566 s->parent_cid = vmdk_read_cid(bs, 1);
567 ret = vmdk_init_tables(bs, extent);
568 if (ret) {
569 goto fail;
570 }
571 return 0;
572 fail:
573 vmdk_free_extents(bs);
574 return ret;
575}
576
577static int vmdk_open(BlockDriverState *bs, int flags)
578{
579 uint32_t magic;
580
581 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
582 return -EIO;
583 }
584
585 magic = be32_to_cpu(magic);
586 if (magic == VMDK3_MAGIC) {
587 return vmdk_open_vmdk3(bs, flags);
588 } else if (magic == VMDK4_MAGIC) {
589 return vmdk_open_vmdk4(bs, flags);
590 } else {
591 return -EINVAL;
592 }
ea2384d3
FB
593}
594
b3976d3c
FZ
595static int get_whole_cluster(BlockDriverState *bs,
596 VmdkExtent *extent,
597 uint64_t cluster_offset,
598 uint64_t offset,
599 bool allocate)
5f4da8c0 600{
b3976d3c
FZ
601 /* 128 sectors * 512 bytes each = grain size 64KB */
602 uint8_t whole_grain[extent->cluster_sectors * 512];
5f4da8c0 603
0e69c543
FZ
604 /* we will be here if it's first write on non-exist grain(cluster).
605 * try to read from parent image, if exist */
b171271a 606 if (bs->backing_hd) {
c336500d 607 int ret;
5f4da8c0
TS
608
609 if (!vmdk_is_cid_valid(bs))
610 return -1;
5f4da8c0 611
0e69c543
FZ
612 /* floor offset to cluster */
613 offset -= offset % (extent->cluster_sectors * 512);
c336500d 614 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
b3976d3c 615 extent->cluster_sectors);
c336500d
KW
616 if (ret < 0) {
617 return -1;
618 }
630530a6 619
0e69c543 620 /* Write grain only into the active image */
b3976d3c
FZ
621 ret = bdrv_write(extent->file, cluster_offset, whole_grain,
622 extent->cluster_sectors);
c336500d
KW
623 if (ret < 0) {
624 return -1;
630530a6
TS
625 }
626 }
627 return 0;
628}
629
b3976d3c 630static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
630530a6 631{
630530a6 632 /* update L2 table */
b3976d3c
FZ
633 if (bdrv_pwrite_sync(
634 extent->file,
635 ((int64_t)m_data->l2_offset * 512)
636 + (m_data->l2_index * sizeof(m_data->offset)),
637 &(m_data->offset),
638 sizeof(m_data->offset)
639 ) < 0) {
630530a6 640 return -1;
b3976d3c 641 }
630530a6 642 /* update backup L2 table */
b3976d3c
FZ
643 if (extent->l1_backup_table_offset != 0) {
644 m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
645 if (bdrv_pwrite_sync(
646 extent->file,
647 ((int64_t)m_data->l2_offset * 512)
648 + (m_data->l2_index * sizeof(m_data->offset)),
649 &(m_data->offset), sizeof(m_data->offset)
650 ) < 0) {
5f4da8c0 651 return -1;
b3976d3c 652 }
5f4da8c0 653 }
630530a6 654
5f4da8c0
TS
655 return 0;
656}
657
b3976d3c
FZ
658static uint64_t get_cluster_offset(BlockDriverState *bs,
659 VmdkExtent *extent,
660 VmdkMetaData *m_data,
661 uint64_t offset, int allocate)
ea2384d3 662{
ea2384d3
FB
663 unsigned int l1_index, l2_offset, l2_index;
664 int min_index, i, j;
630530a6 665 uint32_t min_count, *l2_table, tmp = 0;
ea2384d3 666 uint64_t cluster_offset;
630530a6
TS
667
668 if (m_data)
669 m_data->valid = 0;
670
b3976d3c
FZ
671 l1_index = (offset >> 9) / extent->l1_entry_sectors;
672 if (l1_index >= extent->l1_size) {
ea2384d3 673 return 0;
b3976d3c
FZ
674 }
675 l2_offset = extent->l1_table[l1_index];
676 if (!l2_offset) {
ea2384d3 677 return 0;
b3976d3c 678 }
b4b3ab14 679 for (i = 0; i < L2_CACHE_SIZE; i++) {
b3976d3c 680 if (l2_offset == extent->l2_cache_offsets[i]) {
ea2384d3 681 /* increment the hit count */
b3976d3c 682 if (++extent->l2_cache_counts[i] == 0xffffffff) {
b4b3ab14 683 for (j = 0; j < L2_CACHE_SIZE; j++) {
b3976d3c 684 extent->l2_cache_counts[j] >>= 1;
ea2384d3
FB
685 }
686 }
b3976d3c 687 l2_table = extent->l2_cache + (i * extent->l2_size);
ea2384d3
FB
688 goto found;
689 }
690 }
691 /* not found: load a new entry in the least used one */
692 min_index = 0;
693 min_count = 0xffffffff;
b4b3ab14 694 for (i = 0; i < L2_CACHE_SIZE; i++) {
b3976d3c
FZ
695 if (extent->l2_cache_counts[i] < min_count) {
696 min_count = extent->l2_cache_counts[i];
ea2384d3
FB
697 min_index = i;
698 }
699 }
b3976d3c
FZ
700 l2_table = extent->l2_cache + (min_index * extent->l2_size);
701 if (bdrv_pread(
702 extent->file,
703 (int64_t)l2_offset * 512,
704 l2_table,
705 extent->l2_size * sizeof(uint32_t)
706 ) != extent->l2_size * sizeof(uint32_t)) {
ea2384d3 707 return 0;
b3976d3c 708 }
5f4da8c0 709
b3976d3c
FZ
710 extent->l2_cache_offsets[min_index] = l2_offset;
711 extent->l2_cache_counts[min_index] = 1;
ea2384d3 712 found:
b3976d3c 713 l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
ea2384d3 714 cluster_offset = le32_to_cpu(l2_table[l2_index]);
630530a6 715
ff1afc72
FB
716 if (!cluster_offset) {
717 if (!allocate)
718 return 0;
9949f97e 719
630530a6 720 // Avoid the L2 tables update for the images that have snapshots.
b3976d3c
FZ
721 cluster_offset = bdrv_getlength(extent->file);
722 bdrv_truncate(
723 extent->file,
724 cluster_offset + (extent->cluster_sectors << 9)
725 );
9949f97e
KW
726
727 cluster_offset >>= 9;
728 tmp = cpu_to_le32(cluster_offset);
729 l2_table[l2_index] = tmp;
630530a6 730
630530a6
TS
731 /* First of all we write grain itself, to avoid race condition
732 * that may to corrupt the image.
733 * This problem may occur because of insufficient space on host disk
734 * or inappropriate VM shutdown.
735 */
b3976d3c
FZ
736 if (get_whole_cluster(
737 bs, extent, cluster_offset, offset, allocate) == -1)
5f4da8c0 738 return 0;
630530a6
TS
739
740 if (m_data) {
741 m_data->offset = tmp;
742 m_data->l1_index = l1_index;
743 m_data->l2_index = l2_index;
744 m_data->l2_offset = l2_offset;
745 m_data->valid = 1;
746 }
ff1afc72 747 }
ea2384d3
FB
748 cluster_offset <<= 9;
749 return cluster_offset;
750}
751
b3976d3c
FZ
752static VmdkExtent *find_extent(BDRVVmdkState *s,
753 int64_t sector_num, VmdkExtent *start_hint)
754{
755 VmdkExtent *extent = start_hint;
756
757 if (!extent) {
758 extent = &s->extents[0];
759 }
760 while (extent < &s->extents[s->num_extents]) {
761 if (sector_num < extent->end_sector) {
762 return extent;
763 }
764 extent++;
765 }
766 return NULL;
767}
768
5fafdf24 769static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
770 int nb_sectors, int *pnum)
771{
772 BDRVVmdkState *s = bs->opaque;
ea2384d3 773
b3976d3c
FZ
774 int64_t index_in_cluster, n, ret;
775 uint64_t offset;
776 VmdkExtent *extent;
777
778 extent = find_extent(s, sector_num, NULL);
779 if (!extent) {
780 return 0;
781 }
782 if (extent->flat) {
783 n = extent->end_sector - sector_num;
784 ret = 1;
785 } else {
786 offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
787 index_in_cluster = sector_num % extent->cluster_sectors;
788 n = extent->cluster_sectors - index_in_cluster;
789 ret = offset ? 1 : 0;
790 }
ea2384d3
FB
791 if (n > nb_sectors)
792 n = nb_sectors;
793 *pnum = n;
b3976d3c 794 return ret;
ea2384d3
FB
795}
796
5fafdf24 797static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
798 uint8_t *buf, int nb_sectors)
799{
800 BDRVVmdkState *s = bs->opaque;
b3976d3c
FZ
801 int ret;
802 uint64_t n, index_in_cluster;
803 VmdkExtent *extent = NULL;
ea2384d3 804 uint64_t cluster_offset;
5f4da8c0 805
ea2384d3 806 while (nb_sectors > 0) {
b3976d3c
FZ
807 extent = find_extent(s, sector_num, extent);
808 if (!extent) {
809 return -EIO;
810 }
811 cluster_offset = get_cluster_offset(
812 bs, extent, NULL, sector_num << 9, 0);
813 index_in_cluster = sector_num % extent->cluster_sectors;
814 n = extent->cluster_sectors - index_in_cluster;
ea2384d3
FB
815 if (n > nb_sectors)
816 n = nb_sectors;
817 if (!cluster_offset) {
5f4da8c0 818 // try to read from parent image, if exist
b171271a 819 if (bs->backing_hd) {
5f4da8c0
TS
820 if (!vmdk_is_cid_valid(bs))
821 return -1;
b171271a 822 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
5f4da8c0
TS
823 if (ret < 0)
824 return -1;
825 } else {
826 memset(buf, 0, 512 * n);
827 }
ea2384d3 828 } else {
6511ef77 829 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
ea2384d3
FB
830 return -1;
831 }
832 nb_sectors -= n;
833 sector_num += n;
834 buf += n * 512;
835 }
836 return 0;
837}
838
5fafdf24 839static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
840 const uint8_t *buf, int nb_sectors)
841{
ff1afc72 842 BDRVVmdkState *s = bs->opaque;
b3976d3c
FZ
843 VmdkExtent *extent = NULL;
844 int n;
845 int64_t index_in_cluster;
ff1afc72 846 uint64_t cluster_offset;
5f4da8c0 847 static int cid_update = 0;
b3976d3c 848 VmdkMetaData m_data;
ff1afc72 849
630530a6
TS
850 if (sector_num > bs->total_sectors) {
851 fprintf(stderr,
92868412
JM
852 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
853 " total_sectors=0x%" PRIx64 "\n",
630530a6
TS
854 sector_num, bs->total_sectors);
855 return -1;
856 }
857
ff1afc72 858 while (nb_sectors > 0) {
b3976d3c
FZ
859 extent = find_extent(s, sector_num, extent);
860 if (!extent) {
861 return -EIO;
862 }
863 cluster_offset = get_cluster_offset(
864 bs,
865 extent,
866 &m_data,
867 sector_num << 9, 1);
868 if (!cluster_offset) {
ff1afc72 869 return -1;
b3976d3c
FZ
870 }
871 index_in_cluster = sector_num % extent->cluster_sectors;
872 n = extent->cluster_sectors - index_in_cluster;
873 if (n > nb_sectors) {
874 n = nb_sectors;
875 }
630530a6 876
b3976d3c
FZ
877 if (bdrv_pwrite(bs->file,
878 cluster_offset + index_in_cluster * 512,
879 buf, n * 512)
880 != n * 512) {
ff1afc72 881 return -1;
b3976d3c 882 }
630530a6
TS
883 if (m_data.valid) {
884 /* update L2 tables */
b3976d3c 885 if (vmdk_L2update(extent, &m_data) == -1) {
630530a6 886 return -1;
b3976d3c 887 }
630530a6 888 }
ff1afc72
FB
889 nb_sectors -= n;
890 sector_num += n;
891 buf += n * 512;
5f4da8c0
TS
892
893 // update CID on the first write every time the virtual disk is opened
894 if (!cid_update) {
895 vmdk_write_cid(bs, time(NULL));
896 cid_update++;
897 }
ff1afc72
FB
898 }
899 return 0;
ea2384d3
FB
900}
901
0e7e1989 902static int vmdk_create(const char *filename, QEMUOptionParameter *options)
8979b227
FB
903{
904 int fd, i;
905 VMDK4Header header;
906 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
7ccfb2eb 907 static const char desc_template[] =
8979b227
FB
908 "# Disk DescriptorFile\n"
909 "version=1\n"
910 "CID=%x\n"
911 "parentCID=ffffffff\n"
912 "createType=\"monolithicSparse\"\n"
913 "\n"
914 "# Extent description\n"
7fd6d9fc 915 "RW %" PRId64 " SPARSE \"%s\"\n"
8979b227
FB
916 "\n"
917 "# The Disk Data Base \n"
918 "#DDB\n"
919 "\n"
ec36ba14 920 "ddb.virtualHWVersion = \"%d\"\n"
7fd6d9fc 921 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
8979b227
FB
922 "ddb.geometry.heads = \"16\"\n"
923 "ddb.geometry.sectors = \"63\"\n"
924 "ddb.adapterType = \"ide\"\n";
925 char desc[1024];
926 const char *real_filename, *temp_str;
0e7e1989
KW
927 int64_t total_size = 0;
928 const char *backing_file = NULL;
929 int flags = 0;
1640366c 930 int ret;
0e7e1989
KW
931
932 // Read out options
933 while (options && options->name) {
934 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
935 total_size = options->value.n / 512;
936 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
937 backing_file = options->value.s;
938 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
939 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
940 }
941 options++;
942 }
8979b227
FB
943
944 /* XXX: add support for backing file */
5f4da8c0
TS
945 if (backing_file) {
946 return vmdk_snapshot_create(filename, backing_file);
947 }
8979b227
FB
948
949 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
950 0644);
951 if (fd < 0)
b781cce5 952 return -errno;
8979b227
FB
953 magic = cpu_to_be32(VMDK4_MAGIC);
954 memset(&header, 0, sizeof(header));
16372ff0
AG
955 header.version = 1;
956 header.flags = 3; /* ?? */
957 header.capacity = total_size;
958 header.granularity = 128;
959 header.num_gtes_per_gte = 512;
8979b227
FB
960
961 grains = (total_size + header.granularity - 1) / header.granularity;
962 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
963 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
964 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
965
966 header.desc_offset = 1;
967 header.desc_size = 20;
968 header.rgd_offset = header.desc_offset + header.desc_size;
969 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
970 header.grain_offset =
971 ((header.gd_offset + gd_size + (gt_size * gt_count) +
972 header.granularity - 1) / header.granularity) *
973 header.granularity;
974
16372ff0
AG
975 /* swap endianness for all header fields */
976 header.version = cpu_to_le32(header.version);
977 header.flags = cpu_to_le32(header.flags);
978 header.capacity = cpu_to_le64(header.capacity);
979 header.granularity = cpu_to_le64(header.granularity);
980 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
8979b227
FB
981 header.desc_offset = cpu_to_le64(header.desc_offset);
982 header.desc_size = cpu_to_le64(header.desc_size);
983 header.rgd_offset = cpu_to_le64(header.rgd_offset);
984 header.gd_offset = cpu_to_le64(header.gd_offset);
985 header.grain_offset = cpu_to_le64(header.grain_offset);
986
987 header.check_bytes[0] = 0xa;
988 header.check_bytes[1] = 0x20;
989 header.check_bytes[2] = 0xd;
990 header.check_bytes[3] = 0xa;
3b46e624
TS
991
992 /* write all the data */
1640366c
KS
993 ret = qemu_write_full(fd, &magic, sizeof(magic));
994 if (ret != sizeof(magic)) {
b781cce5 995 ret = -errno;
1640366c
KS
996 goto exit;
997 }
998 ret = qemu_write_full(fd, &header, sizeof(header));
999 if (ret != sizeof(header)) {
b781cce5 1000 ret = -errno;
1640366c
KS
1001 goto exit;
1002 }
8979b227 1003
16372ff0 1004 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
1640366c 1005 if (ret < 0) {
b781cce5 1006 ret = -errno;
1640366c
KS
1007 goto exit;
1008 }
8979b227
FB
1009
1010 /* write grain directory */
1011 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
16372ff0 1012 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
1640366c
KS
1013 i < gt_count; i++, tmp += gt_size) {
1014 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1015 if (ret != sizeof(tmp)) {
b781cce5 1016 ret = -errno;
1640366c
KS
1017 goto exit;
1018 }
1019 }
3b46e624 1020
8979b227
FB
1021 /* write backup grain directory */
1022 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
16372ff0 1023 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
1640366c
KS
1024 i < gt_count; i++, tmp += gt_size) {
1025 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1026 if (ret != sizeof(tmp)) {
b781cce5 1027 ret = -errno;
1640366c
KS
1028 goto exit;
1029 }
1030 }
8979b227
FB
1031
1032 /* compose the descriptor */
1033 real_filename = filename;
1034 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
1035 real_filename = temp_str + 1;
1036 if ((temp_str = strrchr(real_filename, '/')) != NULL)
1037 real_filename = temp_str + 1;
1038 if ((temp_str = strrchr(real_filename, ':')) != NULL)
1039 real_filename = temp_str + 1;
7ccfb2eb 1040 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
7fd6d9fc
BS
1041 total_size, real_filename,
1042 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1043 total_size / (int64_t)(63 * 16));
8979b227
FB
1044
1045 /* write the descriptor */
1046 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
1640366c
KS
1047 ret = qemu_write_full(fd, desc, strlen(desc));
1048 if (ret != strlen(desc)) {
b781cce5 1049 ret = -errno;
1640366c
KS
1050 goto exit;
1051 }
8979b227 1052
1640366c
KS
1053 ret = 0;
1054exit:
8979b227 1055 close(fd);
1640366c 1056 return ret;
8979b227
FB
1057}
1058
e2731add 1059static void vmdk_close(BlockDriverState *bs)
ea2384d3 1060{
b3976d3c 1061 vmdk_free_extents(bs);
ea2384d3
FB
1062}
1063
205ef796 1064static int vmdk_flush(BlockDriverState *bs)
7a6cba61 1065{
205ef796 1066 return bdrv_flush(bs->file);
7a6cba61
PB
1067}
1068
0e7e1989
KW
1069
1070static QEMUOptionParameter vmdk_create_options[] = {
db08adf5
KW
1071 {
1072 .name = BLOCK_OPT_SIZE,
1073 .type = OPT_SIZE,
1074 .help = "Virtual disk size"
1075 },
1076 {
1077 .name = BLOCK_OPT_BACKING_FILE,
1078 .type = OPT_STRING,
1079 .help = "File name of a base image"
1080 },
1081 {
1082 .name = BLOCK_OPT_COMPAT6,
1083 .type = OPT_FLAG,
1084 .help = "VMDK version 6 image"
1085 },
0e7e1989
KW
1086 { NULL }
1087};
1088
5efa9d5a 1089static BlockDriver bdrv_vmdk = {
e60f469c
AJ
1090 .format_name = "vmdk",
1091 .instance_size = sizeof(BDRVVmdkState),
1092 .bdrv_probe = vmdk_probe,
6511ef77 1093 .bdrv_open = vmdk_open,
e60f469c
AJ
1094 .bdrv_read = vmdk_read,
1095 .bdrv_write = vmdk_write,
1096 .bdrv_close = vmdk_close,
1097 .bdrv_create = vmdk_create,
1098 .bdrv_flush = vmdk_flush,
1099 .bdrv_is_allocated = vmdk_is_allocated,
0e7e1989
KW
1100
1101 .create_options = vmdk_create_options,
ea2384d3 1102};
5efa9d5a
AL
1103
1104static void bdrv_vmdk_init(void)
1105{
1106 bdrv_register(&bdrv_vmdk);
1107}
1108
1109block_init(bdrv_vmdk_init);