]> git.proxmox.com Git - qemu.git/blame - block/vmdk.c
VMDK: move 'static' cid_update flag to bs field
[qemu.git] / block / vmdk.c
CommitLineData
ea2384d3
FB
1/*
2 * Block driver for the VMDK format
5fafdf24 3 *
ea2384d3 4 * Copyright (c) 2004 Fabrice Bellard
ff1afc72 5 * Copyright (c) 2005 Filip Navara
5fafdf24 6 *
ea2384d3
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
5f4da8c0 25
faf07963 26#include "qemu-common.h"
ea2384d3 27#include "block_int.h"
5efa9d5a 28#include "module.h"
ea2384d3 29
ea2384d3
FB
30#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
32
33typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44} VMDK3Header;
45
46typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
ff1afc72 59} __attribute__((packed)) VMDK4Header;
ea2384d3
FB
60
61#define L2_CACHE_SIZE 16
62
b3976d3c
FZ
63typedef struct VmdkExtent {
64 BlockDriverState *file;
65 bool flat;
66 int64_t sectors;
67 int64_t end_sector;
ea2384d3 68 int64_t l1_table_offset;
ff1afc72 69 int64_t l1_backup_table_offset;
ea2384d3 70 uint32_t *l1_table;
ff1afc72 71 uint32_t *l1_backup_table;
ea2384d3
FB
72 unsigned int l1_size;
73 uint32_t l1_entry_sectors;
74
75 unsigned int l2_size;
76 uint32_t *l2_cache;
77 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
78 uint32_t l2_cache_counts[L2_CACHE_SIZE];
79
80 unsigned int cluster_sectors;
b3976d3c
FZ
81} VmdkExtent;
82
83typedef struct BDRVVmdkState {
e1da9b24 84 int desc_offset;
69b4d86d 85 bool cid_updated;
5f4da8c0 86 uint32_t parent_cid;
b3976d3c
FZ
87 int num_extents;
88 /* Extent array with num_extents entries, ascend ordered by address */
89 VmdkExtent *extents;
ea2384d3
FB
90} BDRVVmdkState;
91
630530a6
TS
92typedef struct VmdkMetaData {
93 uint32_t offset;
94 unsigned int l1_index;
95 unsigned int l2_index;
96 unsigned int l2_offset;
97 int valid;
98} VmdkMetaData;
99
ea2384d3
FB
100static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
101{
102 uint32_t magic;
103
104 if (buf_size < 4)
105 return 0;
106 magic = be32_to_cpu(*(uint32_t *)buf);
107 if (magic == VMDK3_MAGIC ||
01fc99d6 108 magic == VMDK4_MAGIC) {
ea2384d3 109 return 100;
01fc99d6
FZ
110 } else {
111 const char *p = (const char *)buf;
112 const char *end = p + buf_size;
113 while (p < end) {
114 if (*p == '#') {
115 /* skip comment line */
116 while (p < end && *p != '\n') {
117 p++;
118 }
119 p++;
120 continue;
121 }
122 if (*p == ' ') {
123 while (p < end && *p == ' ') {
124 p++;
125 }
126 /* skip '\r' if windows line endings used. */
127 if (p < end && *p == '\r') {
128 p++;
129 }
130 /* only accept blank lines before 'version=' line */
131 if (p == end || *p != '\n') {
132 return 0;
133 }
134 p++;
135 continue;
136 }
137 if (end - p >= strlen("version=X\n")) {
138 if (strncmp("version=1\n", p, strlen("version=1\n")) == 0 ||
139 strncmp("version=2\n", p, strlen("version=2\n")) == 0) {
140 return 100;
141 }
142 }
143 if (end - p >= strlen("version=X\r\n")) {
144 if (strncmp("version=1\r\n", p, strlen("version=1\r\n")) == 0 ||
145 strncmp("version=2\r\n", p, strlen("version=2\r\n")) == 0) {
146 return 100;
147 }
148 }
149 return 0;
150 }
ea2384d3 151 return 0;
01fc99d6 152 }
ea2384d3
FB
153}
154
5f4da8c0
TS
155#define CHECK_CID 1
156
3b46e624 157#define SECTOR_SIZE 512
5f4da8c0 158#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
5fafdf24 159#define HEADER_SIZE 512 // first sector of 512 bytes
5f4da8c0 160
b3976d3c
FZ
161static void vmdk_free_extents(BlockDriverState *bs)
162{
163 int i;
164 BDRVVmdkState *s = bs->opaque;
165
166 for (i = 0; i < s->num_extents; i++) {
167 qemu_free(s->extents[i].l1_table);
168 qemu_free(s->extents[i].l2_cache);
169 qemu_free(s->extents[i].l1_backup_table);
170 }
171 qemu_free(s->extents);
172}
173
5f4da8c0 174static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
ea2384d3 175{
5f4da8c0
TS
176 char desc[DESC_SIZE];
177 uint32_t cid;
7ccfb2eb 178 const char *p_name, *cid_str;
5f4da8c0 179 size_t cid_str_size;
e1da9b24 180 BDRVVmdkState *s = bs->opaque;
5f4da8c0 181
e1da9b24 182 if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
5f4da8c0 183 return 0;
e1da9b24 184 }
5f4da8c0
TS
185
186 if (parent) {
187 cid_str = "parentCID";
188 cid_str_size = sizeof("parentCID");
189 } else {
190 cid_str = "CID";
191 cid_str_size = sizeof("CID");
192 }
193
511d2b14 194 if ((p_name = strstr(desc,cid_str)) != NULL) {
5f4da8c0
TS
195 p_name += cid_str_size;
196 sscanf(p_name,"%x",&cid);
197 }
198
199 return cid;
200}
201
202static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
203{
5f4da8c0
TS
204 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
205 char *p_name, *tmp_str;
e1da9b24 206 BDRVVmdkState *s = bs->opaque;
5f4da8c0 207
e1da9b24
FZ
208 memset(desc, 0, sizeof(desc));
209 if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
210 return -EIO;
211 }
5f4da8c0
TS
212
213 tmp_str = strstr(desc,"parentCID");
363a37d5 214 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
511d2b14 215 if ((p_name = strstr(desc,"CID")) != NULL) {
5f4da8c0 216 p_name += sizeof("CID");
363a37d5
BS
217 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
218 pstrcat(desc, sizeof(desc), tmp_desc);
5f4da8c0
TS
219 }
220
e1da9b24
FZ
221 if (bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE) < 0) {
222 return -EIO;
223 }
5f4da8c0
TS
224 return 0;
225}
226
227static int vmdk_is_cid_valid(BlockDriverState *bs)
228{
229#ifdef CHECK_CID
230 BDRVVmdkState *s = bs->opaque;
b171271a 231 BlockDriverState *p_bs = bs->backing_hd;
5f4da8c0
TS
232 uint32_t cur_pcid;
233
234 if (p_bs) {
235 cur_pcid = vmdk_read_cid(p_bs,0);
236 if (s->parent_cid != cur_pcid)
237 // CID not valid
238 return 0;
239 }
240#endif
241 // CID valid
242 return 1;
243}
244
245static int vmdk_snapshot_create(const char *filename, const char *backing_file)
246{
247 int snp_fd, p_fd;
53c2e716 248 int ret;
5f4da8c0 249 uint32_t p_cid;
5fafdf24 250 char *p_name, *gd_buf, *rgd_buf;
5f4da8c0
TS
251 const char *real_filename, *temp_str;
252 VMDK4Header header;
253 uint32_t gde_entries, gd_size;
254 int64_t gd_offset, rgd_offset, capacity, gt_size;
255 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
7ccfb2eb 256 static const char desc_template[] =
5f4da8c0
TS
257 "# Disk DescriptorFile\n"
258 "version=1\n"
259 "CID=%x\n"
260 "parentCID=%x\n"
261 "createType=\"monolithicSparse\"\n"
262 "parentFileNameHint=\"%s\"\n"
263 "\n"
264 "# Extent description\n"
7ccfb2eb 265 "RW %u SPARSE \"%s\"\n"
5f4da8c0
TS
266 "\n"
267 "# The Disk Data Base \n"
268 "#DDB\n"
269 "\n";
270
271 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
272 if (snp_fd < 0)
53c2e716 273 return -errno;
5f4da8c0
TS
274 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
275 if (p_fd < 0) {
276 close(snp_fd);
53c2e716 277 return -errno;
5f4da8c0
TS
278 }
279
280 /* read the header */
53c2e716
JQ
281 if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
282 ret = -errno;
5f4da8c0 283 goto fail;
53c2e716
JQ
284 }
285 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
286 ret = -errno;
5f4da8c0 287 goto fail;
53c2e716 288 }
5f4da8c0
TS
289
290 /* write the header */
53c2e716
JQ
291 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
292 ret = -errno;
5f4da8c0 293 goto fail;
53c2e716
JQ
294 }
295 if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
296 ret = -errno;
5f4da8c0 297 goto fail;
53c2e716 298 }
5f4da8c0
TS
299
300 memset(&header, 0, sizeof(header));
301 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
302
53c2e716
JQ
303 if (ftruncate(snp_fd, header.grain_offset << 9)) {
304 ret = -errno;
1640366c 305 goto fail;
53c2e716 306 }
5f4da8c0 307 /* the descriptor offset = 0x200 */
53c2e716
JQ
308 if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
309 ret = -errno;
5f4da8c0 310 goto fail;
53c2e716
JQ
311 }
312 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
313 ret = -errno;
5f4da8c0 314 goto fail;
53c2e716 315 }
5f4da8c0 316
511d2b14 317 if ((p_name = strstr(p_desc,"CID")) != NULL) {
5f4da8c0
TS
318 p_name += sizeof("CID");
319 sscanf(p_name,"%x",&p_cid);
320 }
321
322 real_filename = filename;
323 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
324 real_filename = temp_str + 1;
325 if ((temp_str = strrchr(real_filename, '/')) != NULL)
326 real_filename = temp_str + 1;
327 if ((temp_str = strrchr(real_filename, ':')) != NULL)
328 real_filename = temp_str + 1;
329
363a37d5
BS
330 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
331 (uint32_t)header.capacity, real_filename);
5f4da8c0
TS
332
333 /* write the descriptor */
53c2e716
JQ
334 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
335 ret = -errno;
5f4da8c0 336 goto fail;
53c2e716
JQ
337 }
338 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
339 ret = -errno;
5f4da8c0 340 goto fail;
53c2e716 341 }
ea2384d3 342
5f4da8c0
TS
343 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
344 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
345 capacity = header.capacity * SECTOR_SIZE; // Extent size
346 /*
347 * Each GDE span 32M disk, means:
348 * 512 GTE per GT, each GTE points to grain
349 */
350 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
53c2e716
JQ
351 if (!gt_size) {
352 ret = -EINVAL;
5f4da8c0 353 goto fail;
53c2e716 354 }
5fafdf24 355 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
5f4da8c0
TS
356 gd_size = gde_entries * sizeof(uint32_t);
357
358 /* write RGD */
359 rgd_buf = qemu_malloc(gd_size);
53c2e716
JQ
360 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
361 ret = -errno;
5f4da8c0 362 goto fail_rgd;
53c2e716
JQ
363 }
364 if (read(p_fd, rgd_buf, gd_size) != gd_size) {
365 ret = -errno;
5f4da8c0 366 goto fail_rgd;
53c2e716
JQ
367 }
368 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
369 ret = -errno;
5f4da8c0 370 goto fail_rgd;
53c2e716
JQ
371 }
372 if (write(snp_fd, rgd_buf, gd_size) == -1) {
373 ret = -errno;
5f4da8c0 374 goto fail_rgd;
53c2e716 375 }
5f4da8c0
TS
376
377 /* write GD */
378 gd_buf = qemu_malloc(gd_size);
53c2e716
JQ
379 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
380 ret = -errno;
5f4da8c0 381 goto fail_gd;
53c2e716
JQ
382 }
383 if (read(p_fd, gd_buf, gd_size) != gd_size) {
384 ret = -errno;
5f4da8c0 385 goto fail_gd;
53c2e716
JQ
386 }
387 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
388 ret = -errno;
5f4da8c0 389 goto fail_gd;
53c2e716
JQ
390 }
391 if (write(snp_fd, gd_buf, gd_size) == -1) {
392 ret = -errno;
5f4da8c0 393 goto fail_gd;
53c2e716 394 }
3829cb46 395 ret = 0;
5f4da8c0 396
3829cb46 397fail_gd:
5f4da8c0 398 qemu_free(gd_buf);
3829cb46 399fail_rgd:
5f4da8c0 400 qemu_free(rgd_buf);
3829cb46 401fail:
5f4da8c0
TS
402 close(p_fd);
403 close(snp_fd);
53c2e716 404 return ret;
5f4da8c0
TS
405}
406
9949f97e 407static int vmdk_parent_open(BlockDriverState *bs)
5f4da8c0 408{
5fafdf24 409 char *p_name;
5f4da8c0 410 char desc[DESC_SIZE];
e1da9b24 411 BDRVVmdkState *s = bs->opaque;
5f4da8c0 412
e1da9b24 413 if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
5f4da8c0 414 return -1;
e1da9b24 415 }
5f4da8c0 416
511d2b14 417 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
5f4da8c0 418 char *end_name;
5f4da8c0
TS
419
420 p_name += sizeof("parentFileNameHint") + 1;
511d2b14 421 if ((end_name = strchr(p_name,'\"')) == NULL)
5f4da8c0 422 return -1;
b171271a 423 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
b34d259a 424 return -1;
3b46e624 425
b171271a 426 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
ff1afc72 427 }
5f4da8c0
TS
428
429 return 0;
430}
431
b3976d3c
FZ
432/* Create and append extent to the extent array. Return the added VmdkExtent
433 * address. return NULL if allocation failed. */
434static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
435 BlockDriverState *file, bool flat, int64_t sectors,
436 int64_t l1_offset, int64_t l1_backup_offset,
437 uint32_t l1_size,
438 int l2_size, unsigned int cluster_sectors)
439{
440 VmdkExtent *extent;
441 BDRVVmdkState *s = bs->opaque;
442
443 s->extents = qemu_realloc(s->extents,
444 (s->num_extents + 1) * sizeof(VmdkExtent));
445 extent = &s->extents[s->num_extents];
446 s->num_extents++;
447
448 memset(extent, 0, sizeof(VmdkExtent));
449 extent->file = file;
450 extent->flat = flat;
451 extent->sectors = sectors;
452 extent->l1_table_offset = l1_offset;
453 extent->l1_backup_table_offset = l1_backup_offset;
454 extent->l1_size = l1_size;
455 extent->l1_entry_sectors = l2_size * cluster_sectors;
456 extent->l2_size = l2_size;
457 extent->cluster_sectors = cluster_sectors;
458
459 if (s->num_extents > 1) {
460 extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
461 } else {
462 extent->end_sector = extent->sectors;
463 }
464 bs->total_sectors = extent->end_sector;
465 return extent;
466}
467
b4b3ab14 468static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent)
5f4da8c0 469{
b4b3ab14
FZ
470 int ret;
471 int l1_size, i;
5f4da8c0 472
ea2384d3 473 /* read the L1 table */
b3976d3c
FZ
474 l1_size = extent->l1_size * sizeof(uint32_t);
475 extent->l1_table = qemu_malloc(l1_size);
b4b3ab14
FZ
476 ret = bdrv_pread(extent->file,
477 extent->l1_table_offset,
478 extent->l1_table,
479 l1_size);
480 if (ret < 0) {
481 goto fail_l1;
b3976d3c
FZ
482 }
483 for (i = 0; i < extent->l1_size; i++) {
484 le32_to_cpus(&extent->l1_table[i]);
ea2384d3
FB
485 }
486
b3976d3c
FZ
487 if (extent->l1_backup_table_offset) {
488 extent->l1_backup_table = qemu_malloc(l1_size);
b4b3ab14
FZ
489 ret = bdrv_pread(extent->file,
490 extent->l1_backup_table_offset,
491 extent->l1_backup_table,
492 l1_size);
493 if (ret < 0) {
494 goto fail_l1b;
b3976d3c
FZ
495 }
496 for (i = 0; i < extent->l1_size; i++) {
497 le32_to_cpus(&extent->l1_backup_table[i]);
ff1afc72
FB
498 }
499 }
500
b3976d3c
FZ
501 extent->l2_cache =
502 qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
ea2384d3 503 return 0;
b4b3ab14
FZ
504 fail_l1b:
505 qemu_free(extent->l1_backup_table);
506 fail_l1:
507 qemu_free(extent->l1_table);
508 return ret;
509}
510
511static int vmdk_open_vmdk3(BlockDriverState *bs, int flags)
512{
513 int ret;
514 uint32_t magic;
515 VMDK3Header header;
e1da9b24 516 BDRVVmdkState *s = bs->opaque;
b4b3ab14
FZ
517 VmdkExtent *extent;
518
e1da9b24 519 s->desc_offset = 0x200;
b4b3ab14
FZ
520 ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
521 if (ret < 0) {
522 goto fail;
523 }
524 extent = vmdk_add_extent(bs,
525 bs->file, false,
526 le32_to_cpu(header.disk_sectors),
527 le32_to_cpu(header.l1dir_offset) << 9,
528 0, 1 << 6, 1 << 9,
529 le32_to_cpu(header.granularity));
530 ret = vmdk_init_tables(bs, extent);
531 if (ret) {
532 /* vmdk_init_tables cleans up on fail, so only free allocation of
533 * vmdk_add_extent here. */
534 goto fail;
535 }
536 return 0;
ea2384d3 537 fail:
b3976d3c 538 vmdk_free_extents(bs);
b4b3ab14
FZ
539 return ret;
540}
541
542static int vmdk_open_vmdk4(BlockDriverState *bs, int flags)
543{
544 int ret;
545 uint32_t magic;
546 uint32_t l1_size, l1_entry_sectors;
547 VMDK4Header header;
548 BDRVVmdkState *s = bs->opaque;
549 VmdkExtent *extent;
550
e1da9b24 551 s->desc_offset = 0x200;
b4b3ab14
FZ
552 ret = bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header));
553 if (ret < 0) {
554 goto fail;
555 }
556 l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
557 * le64_to_cpu(header.granularity);
558 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
559 / l1_entry_sectors;
560 extent = vmdk_add_extent(bs, bs->file, false,
561 le64_to_cpu(header.capacity),
562 le64_to_cpu(header.gd_offset) << 9,
563 le64_to_cpu(header.rgd_offset) << 9,
564 l1_size,
565 le32_to_cpu(header.num_gtes_per_gte),
566 le64_to_cpu(header.granularity));
567 if (extent->l1_entry_sectors <= 0) {
568 ret = -EINVAL;
569 goto fail;
570 }
571 /* try to open parent images, if exist */
572 ret = vmdk_parent_open(bs);
573 if (ret) {
574 goto fail;
575 }
576 s->parent_cid = vmdk_read_cid(bs, 1);
577 ret = vmdk_init_tables(bs, extent);
578 if (ret) {
579 goto fail;
580 }
581 return 0;
582 fail:
583 vmdk_free_extents(bs);
584 return ret;
585}
586
587static int vmdk_open(BlockDriverState *bs, int flags)
588{
589 uint32_t magic;
590
591 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic)) {
592 return -EIO;
593 }
594
595 magic = be32_to_cpu(magic);
596 if (magic == VMDK3_MAGIC) {
597 return vmdk_open_vmdk3(bs, flags);
598 } else if (magic == VMDK4_MAGIC) {
599 return vmdk_open_vmdk4(bs, flags);
600 } else {
601 return -EINVAL;
602 }
ea2384d3
FB
603}
604
b3976d3c
FZ
605static int get_whole_cluster(BlockDriverState *bs,
606 VmdkExtent *extent,
607 uint64_t cluster_offset,
608 uint64_t offset,
609 bool allocate)
5f4da8c0 610{
b3976d3c
FZ
611 /* 128 sectors * 512 bytes each = grain size 64KB */
612 uint8_t whole_grain[extent->cluster_sectors * 512];
5f4da8c0 613
0e69c543
FZ
614 /* we will be here if it's first write on non-exist grain(cluster).
615 * try to read from parent image, if exist */
b171271a 616 if (bs->backing_hd) {
c336500d 617 int ret;
5f4da8c0
TS
618
619 if (!vmdk_is_cid_valid(bs))
620 return -1;
5f4da8c0 621
0e69c543
FZ
622 /* floor offset to cluster */
623 offset -= offset % (extent->cluster_sectors * 512);
c336500d 624 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
b3976d3c 625 extent->cluster_sectors);
c336500d
KW
626 if (ret < 0) {
627 return -1;
628 }
630530a6 629
0e69c543 630 /* Write grain only into the active image */
b3976d3c
FZ
631 ret = bdrv_write(extent->file, cluster_offset, whole_grain,
632 extent->cluster_sectors);
c336500d
KW
633 if (ret < 0) {
634 return -1;
630530a6
TS
635 }
636 }
637 return 0;
638}
639
b3976d3c 640static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
630530a6 641{
630530a6 642 /* update L2 table */
b3976d3c
FZ
643 if (bdrv_pwrite_sync(
644 extent->file,
645 ((int64_t)m_data->l2_offset * 512)
646 + (m_data->l2_index * sizeof(m_data->offset)),
647 &(m_data->offset),
648 sizeof(m_data->offset)
649 ) < 0) {
630530a6 650 return -1;
b3976d3c 651 }
630530a6 652 /* update backup L2 table */
b3976d3c
FZ
653 if (extent->l1_backup_table_offset != 0) {
654 m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
655 if (bdrv_pwrite_sync(
656 extent->file,
657 ((int64_t)m_data->l2_offset * 512)
658 + (m_data->l2_index * sizeof(m_data->offset)),
659 &(m_data->offset), sizeof(m_data->offset)
660 ) < 0) {
5f4da8c0 661 return -1;
b3976d3c 662 }
5f4da8c0 663 }
630530a6 664
5f4da8c0
TS
665 return 0;
666}
667
b3976d3c
FZ
668static uint64_t get_cluster_offset(BlockDriverState *bs,
669 VmdkExtent *extent,
670 VmdkMetaData *m_data,
671 uint64_t offset, int allocate)
ea2384d3 672{
ea2384d3
FB
673 unsigned int l1_index, l2_offset, l2_index;
674 int min_index, i, j;
630530a6 675 uint32_t min_count, *l2_table, tmp = 0;
ea2384d3 676 uint64_t cluster_offset;
630530a6
TS
677
678 if (m_data)
679 m_data->valid = 0;
680
b3976d3c
FZ
681 l1_index = (offset >> 9) / extent->l1_entry_sectors;
682 if (l1_index >= extent->l1_size) {
ea2384d3 683 return 0;
b3976d3c
FZ
684 }
685 l2_offset = extent->l1_table[l1_index];
686 if (!l2_offset) {
ea2384d3 687 return 0;
b3976d3c 688 }
b4b3ab14 689 for (i = 0; i < L2_CACHE_SIZE; i++) {
b3976d3c 690 if (l2_offset == extent->l2_cache_offsets[i]) {
ea2384d3 691 /* increment the hit count */
b3976d3c 692 if (++extent->l2_cache_counts[i] == 0xffffffff) {
b4b3ab14 693 for (j = 0; j < L2_CACHE_SIZE; j++) {
b3976d3c 694 extent->l2_cache_counts[j] >>= 1;
ea2384d3
FB
695 }
696 }
b3976d3c 697 l2_table = extent->l2_cache + (i * extent->l2_size);
ea2384d3
FB
698 goto found;
699 }
700 }
701 /* not found: load a new entry in the least used one */
702 min_index = 0;
703 min_count = 0xffffffff;
b4b3ab14 704 for (i = 0; i < L2_CACHE_SIZE; i++) {
b3976d3c
FZ
705 if (extent->l2_cache_counts[i] < min_count) {
706 min_count = extent->l2_cache_counts[i];
ea2384d3
FB
707 min_index = i;
708 }
709 }
b3976d3c
FZ
710 l2_table = extent->l2_cache + (min_index * extent->l2_size);
711 if (bdrv_pread(
712 extent->file,
713 (int64_t)l2_offset * 512,
714 l2_table,
715 extent->l2_size * sizeof(uint32_t)
716 ) != extent->l2_size * sizeof(uint32_t)) {
ea2384d3 717 return 0;
b3976d3c 718 }
5f4da8c0 719
b3976d3c
FZ
720 extent->l2_cache_offsets[min_index] = l2_offset;
721 extent->l2_cache_counts[min_index] = 1;
ea2384d3 722 found:
b3976d3c 723 l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
ea2384d3 724 cluster_offset = le32_to_cpu(l2_table[l2_index]);
630530a6 725
ff1afc72
FB
726 if (!cluster_offset) {
727 if (!allocate)
728 return 0;
9949f97e 729
630530a6 730 // Avoid the L2 tables update for the images that have snapshots.
b3976d3c
FZ
731 cluster_offset = bdrv_getlength(extent->file);
732 bdrv_truncate(
733 extent->file,
734 cluster_offset + (extent->cluster_sectors << 9)
735 );
9949f97e
KW
736
737 cluster_offset >>= 9;
738 tmp = cpu_to_le32(cluster_offset);
739 l2_table[l2_index] = tmp;
630530a6 740
630530a6
TS
741 /* First of all we write grain itself, to avoid race condition
742 * that may to corrupt the image.
743 * This problem may occur because of insufficient space on host disk
744 * or inappropriate VM shutdown.
745 */
b3976d3c
FZ
746 if (get_whole_cluster(
747 bs, extent, cluster_offset, offset, allocate) == -1)
5f4da8c0 748 return 0;
630530a6
TS
749
750 if (m_data) {
751 m_data->offset = tmp;
752 m_data->l1_index = l1_index;
753 m_data->l2_index = l2_index;
754 m_data->l2_offset = l2_offset;
755 m_data->valid = 1;
756 }
ff1afc72 757 }
ea2384d3
FB
758 cluster_offset <<= 9;
759 return cluster_offset;
760}
761
b3976d3c
FZ
762static VmdkExtent *find_extent(BDRVVmdkState *s,
763 int64_t sector_num, VmdkExtent *start_hint)
764{
765 VmdkExtent *extent = start_hint;
766
767 if (!extent) {
768 extent = &s->extents[0];
769 }
770 while (extent < &s->extents[s->num_extents]) {
771 if (sector_num < extent->end_sector) {
772 return extent;
773 }
774 extent++;
775 }
776 return NULL;
777}
778
5fafdf24 779static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
780 int nb_sectors, int *pnum)
781{
782 BDRVVmdkState *s = bs->opaque;
ea2384d3 783
b3976d3c
FZ
784 int64_t index_in_cluster, n, ret;
785 uint64_t offset;
786 VmdkExtent *extent;
787
788 extent = find_extent(s, sector_num, NULL);
789 if (!extent) {
790 return 0;
791 }
792 if (extent->flat) {
793 n = extent->end_sector - sector_num;
794 ret = 1;
795 } else {
796 offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
797 index_in_cluster = sector_num % extent->cluster_sectors;
798 n = extent->cluster_sectors - index_in_cluster;
799 ret = offset ? 1 : 0;
800 }
ea2384d3
FB
801 if (n > nb_sectors)
802 n = nb_sectors;
803 *pnum = n;
b3976d3c 804 return ret;
ea2384d3
FB
805}
806
5fafdf24 807static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
808 uint8_t *buf, int nb_sectors)
809{
810 BDRVVmdkState *s = bs->opaque;
b3976d3c
FZ
811 int ret;
812 uint64_t n, index_in_cluster;
813 VmdkExtent *extent = NULL;
ea2384d3 814 uint64_t cluster_offset;
5f4da8c0 815
ea2384d3 816 while (nb_sectors > 0) {
b3976d3c
FZ
817 extent = find_extent(s, sector_num, extent);
818 if (!extent) {
819 return -EIO;
820 }
821 cluster_offset = get_cluster_offset(
822 bs, extent, NULL, sector_num << 9, 0);
823 index_in_cluster = sector_num % extent->cluster_sectors;
824 n = extent->cluster_sectors - index_in_cluster;
ea2384d3
FB
825 if (n > nb_sectors)
826 n = nb_sectors;
827 if (!cluster_offset) {
5f4da8c0 828 // try to read from parent image, if exist
b171271a 829 if (bs->backing_hd) {
5f4da8c0
TS
830 if (!vmdk_is_cid_valid(bs))
831 return -1;
b171271a 832 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
5f4da8c0
TS
833 if (ret < 0)
834 return -1;
835 } else {
836 memset(buf, 0, 512 * n);
837 }
ea2384d3 838 } else {
6511ef77 839 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
ea2384d3
FB
840 return -1;
841 }
842 nb_sectors -= n;
843 sector_num += n;
844 buf += n * 512;
845 }
846 return 0;
847}
848
5fafdf24 849static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
850 const uint8_t *buf, int nb_sectors)
851{
ff1afc72 852 BDRVVmdkState *s = bs->opaque;
b3976d3c
FZ
853 VmdkExtent *extent = NULL;
854 int n;
855 int64_t index_in_cluster;
ff1afc72 856 uint64_t cluster_offset;
b3976d3c 857 VmdkMetaData m_data;
ff1afc72 858
630530a6
TS
859 if (sector_num > bs->total_sectors) {
860 fprintf(stderr,
92868412
JM
861 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
862 " total_sectors=0x%" PRIx64 "\n",
630530a6
TS
863 sector_num, bs->total_sectors);
864 return -1;
865 }
866
ff1afc72 867 while (nb_sectors > 0) {
b3976d3c
FZ
868 extent = find_extent(s, sector_num, extent);
869 if (!extent) {
870 return -EIO;
871 }
872 cluster_offset = get_cluster_offset(
873 bs,
874 extent,
875 &m_data,
876 sector_num << 9, 1);
877 if (!cluster_offset) {
ff1afc72 878 return -1;
b3976d3c
FZ
879 }
880 index_in_cluster = sector_num % extent->cluster_sectors;
881 n = extent->cluster_sectors - index_in_cluster;
882 if (n > nb_sectors) {
883 n = nb_sectors;
884 }
630530a6 885
b3976d3c
FZ
886 if (bdrv_pwrite(bs->file,
887 cluster_offset + index_in_cluster * 512,
888 buf, n * 512)
889 != n * 512) {
ff1afc72 890 return -1;
b3976d3c 891 }
630530a6
TS
892 if (m_data.valid) {
893 /* update L2 tables */
b3976d3c 894 if (vmdk_L2update(extent, &m_data) == -1) {
630530a6 895 return -1;
b3976d3c 896 }
630530a6 897 }
ff1afc72
FB
898 nb_sectors -= n;
899 sector_num += n;
900 buf += n * 512;
5f4da8c0
TS
901
902 // update CID on the first write every time the virtual disk is opened
69b4d86d 903 if (!s->cid_updated) {
5f4da8c0 904 vmdk_write_cid(bs, time(NULL));
69b4d86d 905 s->cid_updated = true;
5f4da8c0 906 }
ff1afc72
FB
907 }
908 return 0;
ea2384d3
FB
909}
910
0e7e1989 911static int vmdk_create(const char *filename, QEMUOptionParameter *options)
8979b227
FB
912{
913 int fd, i;
914 VMDK4Header header;
915 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
7ccfb2eb 916 static const char desc_template[] =
8979b227
FB
917 "# Disk DescriptorFile\n"
918 "version=1\n"
919 "CID=%x\n"
920 "parentCID=ffffffff\n"
921 "createType=\"monolithicSparse\"\n"
922 "\n"
923 "# Extent description\n"
7fd6d9fc 924 "RW %" PRId64 " SPARSE \"%s\"\n"
8979b227
FB
925 "\n"
926 "# The Disk Data Base \n"
927 "#DDB\n"
928 "\n"
ec36ba14 929 "ddb.virtualHWVersion = \"%d\"\n"
7fd6d9fc 930 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
8979b227
FB
931 "ddb.geometry.heads = \"16\"\n"
932 "ddb.geometry.sectors = \"63\"\n"
933 "ddb.adapterType = \"ide\"\n";
934 char desc[1024];
935 const char *real_filename, *temp_str;
0e7e1989
KW
936 int64_t total_size = 0;
937 const char *backing_file = NULL;
938 int flags = 0;
1640366c 939 int ret;
0e7e1989
KW
940
941 // Read out options
942 while (options && options->name) {
943 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
944 total_size = options->value.n / 512;
945 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
946 backing_file = options->value.s;
947 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
948 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
949 }
950 options++;
951 }
8979b227
FB
952
953 /* XXX: add support for backing file */
5f4da8c0
TS
954 if (backing_file) {
955 return vmdk_snapshot_create(filename, backing_file);
956 }
8979b227
FB
957
958 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
959 0644);
960 if (fd < 0)
b781cce5 961 return -errno;
8979b227
FB
962 magic = cpu_to_be32(VMDK4_MAGIC);
963 memset(&header, 0, sizeof(header));
16372ff0
AG
964 header.version = 1;
965 header.flags = 3; /* ?? */
966 header.capacity = total_size;
967 header.granularity = 128;
968 header.num_gtes_per_gte = 512;
8979b227
FB
969
970 grains = (total_size + header.granularity - 1) / header.granularity;
971 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
972 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
973 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
974
975 header.desc_offset = 1;
976 header.desc_size = 20;
977 header.rgd_offset = header.desc_offset + header.desc_size;
978 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
979 header.grain_offset =
980 ((header.gd_offset + gd_size + (gt_size * gt_count) +
981 header.granularity - 1) / header.granularity) *
982 header.granularity;
983
16372ff0
AG
984 /* swap endianness for all header fields */
985 header.version = cpu_to_le32(header.version);
986 header.flags = cpu_to_le32(header.flags);
987 header.capacity = cpu_to_le64(header.capacity);
988 header.granularity = cpu_to_le64(header.granularity);
989 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
8979b227
FB
990 header.desc_offset = cpu_to_le64(header.desc_offset);
991 header.desc_size = cpu_to_le64(header.desc_size);
992 header.rgd_offset = cpu_to_le64(header.rgd_offset);
993 header.gd_offset = cpu_to_le64(header.gd_offset);
994 header.grain_offset = cpu_to_le64(header.grain_offset);
995
996 header.check_bytes[0] = 0xa;
997 header.check_bytes[1] = 0x20;
998 header.check_bytes[2] = 0xd;
999 header.check_bytes[3] = 0xa;
3b46e624
TS
1000
1001 /* write all the data */
1640366c
KS
1002 ret = qemu_write_full(fd, &magic, sizeof(magic));
1003 if (ret != sizeof(magic)) {
b781cce5 1004 ret = -errno;
1640366c
KS
1005 goto exit;
1006 }
1007 ret = qemu_write_full(fd, &header, sizeof(header));
1008 if (ret != sizeof(header)) {
b781cce5 1009 ret = -errno;
1640366c
KS
1010 goto exit;
1011 }
8979b227 1012
16372ff0 1013 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
1640366c 1014 if (ret < 0) {
b781cce5 1015 ret = -errno;
1640366c
KS
1016 goto exit;
1017 }
8979b227
FB
1018
1019 /* write grain directory */
1020 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
16372ff0 1021 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
1640366c
KS
1022 i < gt_count; i++, tmp += gt_size) {
1023 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1024 if (ret != sizeof(tmp)) {
b781cce5 1025 ret = -errno;
1640366c
KS
1026 goto exit;
1027 }
1028 }
3b46e624 1029
8979b227
FB
1030 /* write backup grain directory */
1031 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
16372ff0 1032 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
1640366c
KS
1033 i < gt_count; i++, tmp += gt_size) {
1034 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
1035 if (ret != sizeof(tmp)) {
b781cce5 1036 ret = -errno;
1640366c
KS
1037 goto exit;
1038 }
1039 }
8979b227
FB
1040
1041 /* compose the descriptor */
1042 real_filename = filename;
1043 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
1044 real_filename = temp_str + 1;
1045 if ((temp_str = strrchr(real_filename, '/')) != NULL)
1046 real_filename = temp_str + 1;
1047 if ((temp_str = strrchr(real_filename, ':')) != NULL)
1048 real_filename = temp_str + 1;
7ccfb2eb 1049 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
7fd6d9fc
BS
1050 total_size, real_filename,
1051 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
1052 total_size / (int64_t)(63 * 16));
8979b227
FB
1053
1054 /* write the descriptor */
1055 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
1640366c
KS
1056 ret = qemu_write_full(fd, desc, strlen(desc));
1057 if (ret != strlen(desc)) {
b781cce5 1058 ret = -errno;
1640366c
KS
1059 goto exit;
1060 }
8979b227 1061
1640366c
KS
1062 ret = 0;
1063exit:
8979b227 1064 close(fd);
1640366c 1065 return ret;
8979b227
FB
1066}
1067
e2731add 1068static void vmdk_close(BlockDriverState *bs)
ea2384d3 1069{
b3976d3c 1070 vmdk_free_extents(bs);
ea2384d3
FB
1071}
1072
205ef796 1073static int vmdk_flush(BlockDriverState *bs)
7a6cba61 1074{
333c574d
FZ
1075 int i, ret, err;
1076 BDRVVmdkState *s = bs->opaque;
1077
1078 ret = bdrv_flush(bs->file);
1079 for (i = 0; i < s->num_extents; i++) {
1080 err = bdrv_flush(s->extents[i].file);
1081 if (err < 0) {
1082 ret = err;
1083 }
1084 }
1085 return ret;
7a6cba61
PB
1086}
1087
0e7e1989
KW
1088
1089static QEMUOptionParameter vmdk_create_options[] = {
db08adf5
KW
1090 {
1091 .name = BLOCK_OPT_SIZE,
1092 .type = OPT_SIZE,
1093 .help = "Virtual disk size"
1094 },
1095 {
1096 .name = BLOCK_OPT_BACKING_FILE,
1097 .type = OPT_STRING,
1098 .help = "File name of a base image"
1099 },
1100 {
1101 .name = BLOCK_OPT_COMPAT6,
1102 .type = OPT_FLAG,
1103 .help = "VMDK version 6 image"
1104 },
0e7e1989
KW
1105 { NULL }
1106};
1107
5efa9d5a 1108static BlockDriver bdrv_vmdk = {
e60f469c
AJ
1109 .format_name = "vmdk",
1110 .instance_size = sizeof(BDRVVmdkState),
1111 .bdrv_probe = vmdk_probe,
6511ef77 1112 .bdrv_open = vmdk_open,
e60f469c
AJ
1113 .bdrv_read = vmdk_read,
1114 .bdrv_write = vmdk_write,
1115 .bdrv_close = vmdk_close,
1116 .bdrv_create = vmdk_create,
1117 .bdrv_flush = vmdk_flush,
1118 .bdrv_is_allocated = vmdk_is_allocated,
0e7e1989
KW
1119
1120 .create_options = vmdk_create_options,
ea2384d3 1121};
5efa9d5a
AL
1122
1123static void bdrv_vmdk_init(void)
1124{
1125 bdrv_register(&bdrv_vmdk);
1126}
1127
1128block_init(bdrv_vmdk_init);