]> git.proxmox.com Git - qemu.git/blob - block/vmdk.c
VMDK: introduce VmdkExtent
[qemu.git] / block / vmdk.c
1 /*
2 * Block driver for the VMDK format
3 *
4 * Copyright (c) 2004 Fabrice Bellard
5 * Copyright (c) 2005 Filip Navara
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu-common.h"
27 #include "block_int.h"
28 #include "module.h"
29
30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
32
33 typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44 } VMDK3Header;
45
46 typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
59 } __attribute__((packed)) VMDK4Header;
60
61 #define L2_CACHE_SIZE 16
62
63 typedef struct VmdkExtent {
64 BlockDriverState *file;
65 bool flat;
66 int64_t sectors;
67 int64_t end_sector;
68 int64_t l1_table_offset;
69 int64_t l1_backup_table_offset;
70 uint32_t *l1_table;
71 uint32_t *l1_backup_table;
72 unsigned int l1_size;
73 uint32_t l1_entry_sectors;
74
75 unsigned int l2_size;
76 uint32_t *l2_cache;
77 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
78 uint32_t l2_cache_counts[L2_CACHE_SIZE];
79
80 unsigned int cluster_sectors;
81 } VmdkExtent;
82
83 typedef struct BDRVVmdkState {
84 uint32_t parent_cid;
85 int num_extents;
86 /* Extent array with num_extents entries, ascend ordered by address */
87 VmdkExtent *extents;
88 } BDRVVmdkState;
89
90 typedef struct VmdkMetaData {
91 uint32_t offset;
92 unsigned int l1_index;
93 unsigned int l2_index;
94 unsigned int l2_offset;
95 int valid;
96 } VmdkMetaData;
97
98 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
99 {
100 uint32_t magic;
101
102 if (buf_size < 4)
103 return 0;
104 magic = be32_to_cpu(*(uint32_t *)buf);
105 if (magic == VMDK3_MAGIC ||
106 magic == VMDK4_MAGIC)
107 return 100;
108 else
109 return 0;
110 }
111
112 #define CHECK_CID 1
113
114 #define SECTOR_SIZE 512
115 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
116 #define HEADER_SIZE 512 // first sector of 512 bytes
117
118 static void vmdk_free_extents(BlockDriverState *bs)
119 {
120 int i;
121 BDRVVmdkState *s = bs->opaque;
122
123 for (i = 0; i < s->num_extents; i++) {
124 qemu_free(s->extents[i].l1_table);
125 qemu_free(s->extents[i].l2_cache);
126 qemu_free(s->extents[i].l1_backup_table);
127 }
128 qemu_free(s->extents);
129 }
130
131 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
132 {
133 char desc[DESC_SIZE];
134 uint32_t cid;
135 const char *p_name, *cid_str;
136 size_t cid_str_size;
137
138 /* the descriptor offset = 0x200 */
139 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
140 return 0;
141
142 if (parent) {
143 cid_str = "parentCID";
144 cid_str_size = sizeof("parentCID");
145 } else {
146 cid_str = "CID";
147 cid_str_size = sizeof("CID");
148 }
149
150 if ((p_name = strstr(desc,cid_str)) != NULL) {
151 p_name += cid_str_size;
152 sscanf(p_name,"%x",&cid);
153 }
154
155 return cid;
156 }
157
158 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
159 {
160 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
161 char *p_name, *tmp_str;
162
163 /* the descriptor offset = 0x200 */
164 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
165 return -1;
166
167 tmp_str = strstr(desc,"parentCID");
168 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
169 if ((p_name = strstr(desc,"CID")) != NULL) {
170 p_name += sizeof("CID");
171 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
172 pstrcat(desc, sizeof(desc), tmp_desc);
173 }
174
175 if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
176 return -1;
177 return 0;
178 }
179
180 static int vmdk_is_cid_valid(BlockDriverState *bs)
181 {
182 #ifdef CHECK_CID
183 BDRVVmdkState *s = bs->opaque;
184 BlockDriverState *p_bs = bs->backing_hd;
185 uint32_t cur_pcid;
186
187 if (p_bs) {
188 cur_pcid = vmdk_read_cid(p_bs,0);
189 if (s->parent_cid != cur_pcid)
190 // CID not valid
191 return 0;
192 }
193 #endif
194 // CID valid
195 return 1;
196 }
197
198 static int vmdk_snapshot_create(const char *filename, const char *backing_file)
199 {
200 int snp_fd, p_fd;
201 int ret;
202 uint32_t p_cid;
203 char *p_name, *gd_buf, *rgd_buf;
204 const char *real_filename, *temp_str;
205 VMDK4Header header;
206 uint32_t gde_entries, gd_size;
207 int64_t gd_offset, rgd_offset, capacity, gt_size;
208 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
209 static const char desc_template[] =
210 "# Disk DescriptorFile\n"
211 "version=1\n"
212 "CID=%x\n"
213 "parentCID=%x\n"
214 "createType=\"monolithicSparse\"\n"
215 "parentFileNameHint=\"%s\"\n"
216 "\n"
217 "# Extent description\n"
218 "RW %u SPARSE \"%s\"\n"
219 "\n"
220 "# The Disk Data Base \n"
221 "#DDB\n"
222 "\n";
223
224 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
225 if (snp_fd < 0)
226 return -errno;
227 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
228 if (p_fd < 0) {
229 close(snp_fd);
230 return -errno;
231 }
232
233 /* read the header */
234 if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
235 ret = -errno;
236 goto fail;
237 }
238 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
239 ret = -errno;
240 goto fail;
241 }
242
243 /* write the header */
244 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
245 ret = -errno;
246 goto fail;
247 }
248 if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
249 ret = -errno;
250 goto fail;
251 }
252
253 memset(&header, 0, sizeof(header));
254 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
255
256 if (ftruncate(snp_fd, header.grain_offset << 9)) {
257 ret = -errno;
258 goto fail;
259 }
260 /* the descriptor offset = 0x200 */
261 if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
262 ret = -errno;
263 goto fail;
264 }
265 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
266 ret = -errno;
267 goto fail;
268 }
269
270 if ((p_name = strstr(p_desc,"CID")) != NULL) {
271 p_name += sizeof("CID");
272 sscanf(p_name,"%x",&p_cid);
273 }
274
275 real_filename = filename;
276 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
277 real_filename = temp_str + 1;
278 if ((temp_str = strrchr(real_filename, '/')) != NULL)
279 real_filename = temp_str + 1;
280 if ((temp_str = strrchr(real_filename, ':')) != NULL)
281 real_filename = temp_str + 1;
282
283 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
284 (uint32_t)header.capacity, real_filename);
285
286 /* write the descriptor */
287 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
288 ret = -errno;
289 goto fail;
290 }
291 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
292 ret = -errno;
293 goto fail;
294 }
295
296 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
297 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
298 capacity = header.capacity * SECTOR_SIZE; // Extent size
299 /*
300 * Each GDE span 32M disk, means:
301 * 512 GTE per GT, each GTE points to grain
302 */
303 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
304 if (!gt_size) {
305 ret = -EINVAL;
306 goto fail;
307 }
308 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
309 gd_size = gde_entries * sizeof(uint32_t);
310
311 /* write RGD */
312 rgd_buf = qemu_malloc(gd_size);
313 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
314 ret = -errno;
315 goto fail_rgd;
316 }
317 if (read(p_fd, rgd_buf, gd_size) != gd_size) {
318 ret = -errno;
319 goto fail_rgd;
320 }
321 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
322 ret = -errno;
323 goto fail_rgd;
324 }
325 if (write(snp_fd, rgd_buf, gd_size) == -1) {
326 ret = -errno;
327 goto fail_rgd;
328 }
329
330 /* write GD */
331 gd_buf = qemu_malloc(gd_size);
332 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
333 ret = -errno;
334 goto fail_gd;
335 }
336 if (read(p_fd, gd_buf, gd_size) != gd_size) {
337 ret = -errno;
338 goto fail_gd;
339 }
340 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
341 ret = -errno;
342 goto fail_gd;
343 }
344 if (write(snp_fd, gd_buf, gd_size) == -1) {
345 ret = -errno;
346 goto fail_gd;
347 }
348 ret = 0;
349
350 fail_gd:
351 qemu_free(gd_buf);
352 fail_rgd:
353 qemu_free(rgd_buf);
354 fail:
355 close(p_fd);
356 close(snp_fd);
357 return ret;
358 }
359
360 static int vmdk_parent_open(BlockDriverState *bs)
361 {
362 char *p_name;
363 char desc[DESC_SIZE];
364
365 /* the descriptor offset = 0x200 */
366 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
367 return -1;
368
369 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
370 char *end_name;
371
372 p_name += sizeof("parentFileNameHint") + 1;
373 if ((end_name = strchr(p_name,'\"')) == NULL)
374 return -1;
375 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
376 return -1;
377
378 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
379 }
380
381 return 0;
382 }
383
384 /* Create and append extent to the extent array. Return the added VmdkExtent
385 * address. return NULL if allocation failed. */
386 static VmdkExtent *vmdk_add_extent(BlockDriverState *bs,
387 BlockDriverState *file, bool flat, int64_t sectors,
388 int64_t l1_offset, int64_t l1_backup_offset,
389 uint32_t l1_size,
390 int l2_size, unsigned int cluster_sectors)
391 {
392 VmdkExtent *extent;
393 BDRVVmdkState *s = bs->opaque;
394
395 s->extents = qemu_realloc(s->extents,
396 (s->num_extents + 1) * sizeof(VmdkExtent));
397 extent = &s->extents[s->num_extents];
398 s->num_extents++;
399
400 memset(extent, 0, sizeof(VmdkExtent));
401 extent->file = file;
402 extent->flat = flat;
403 extent->sectors = sectors;
404 extent->l1_table_offset = l1_offset;
405 extent->l1_backup_table_offset = l1_backup_offset;
406 extent->l1_size = l1_size;
407 extent->l1_entry_sectors = l2_size * cluster_sectors;
408 extent->l2_size = l2_size;
409 extent->cluster_sectors = cluster_sectors;
410
411 if (s->num_extents > 1) {
412 extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
413 } else {
414 extent->end_sector = extent->sectors;
415 }
416 bs->total_sectors = extent->end_sector;
417 return extent;
418 }
419
420
421 static int vmdk_open(BlockDriverState *bs, int flags)
422 {
423 BDRVVmdkState *s = bs->opaque;
424 uint32_t magic;
425 int i;
426 uint32_t l1_size, l1_entry_sectors;
427 VmdkExtent *extent = NULL;
428
429 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
430 goto fail;
431
432 magic = be32_to_cpu(magic);
433 if (magic == VMDK3_MAGIC) {
434 VMDK3Header header;
435 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
436 != sizeof(header)) {
437 goto fail;
438 }
439 extent = vmdk_add_extent(bs, bs->file, false,
440 le32_to_cpu(header.disk_sectors),
441 le32_to_cpu(header.l1dir_offset) << 9, 0,
442 1 << 6, 1 << 9, le32_to_cpu(header.granularity));
443 } else if (magic == VMDK4_MAGIC) {
444 VMDK4Header header;
445 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header))
446 != sizeof(header)) {
447 goto fail;
448 }
449 l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
450 * le64_to_cpu(header.granularity);
451 l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
452 / l1_entry_sectors;
453 extent = vmdk_add_extent(bs, bs->file, false,
454 le64_to_cpu(header.capacity),
455 le64_to_cpu(header.gd_offset) << 9,
456 le64_to_cpu(header.rgd_offset) << 9,
457 l1_size,
458 le32_to_cpu(header.num_gtes_per_gte),
459 le64_to_cpu(header.granularity));
460 if (extent->l1_entry_sectors <= 0) {
461 goto fail;
462 }
463 // try to open parent images, if exist
464 if (vmdk_parent_open(bs) != 0)
465 goto fail;
466 // write the CID once after the image creation
467 s->parent_cid = vmdk_read_cid(bs,1);
468 } else {
469 goto fail;
470 }
471
472 /* read the L1 table */
473 l1_size = extent->l1_size * sizeof(uint32_t);
474 extent->l1_table = qemu_malloc(l1_size);
475 if (bdrv_pread(bs->file,
476 extent->l1_table_offset,
477 extent->l1_table,
478 l1_size)
479 != l1_size) {
480 goto fail;
481 }
482 for (i = 0; i < extent->l1_size; i++) {
483 le32_to_cpus(&extent->l1_table[i]);
484 }
485
486 if (extent->l1_backup_table_offset) {
487 extent->l1_backup_table = qemu_malloc(l1_size);
488 if (bdrv_pread(bs->file,
489 extent->l1_backup_table_offset,
490 extent->l1_backup_table,
491 l1_size)
492 != l1_size) {
493 goto fail;
494 }
495 for (i = 0; i < extent->l1_size; i++) {
496 le32_to_cpus(&extent->l1_backup_table[i]);
497 }
498 }
499
500 extent->l2_cache =
501 qemu_malloc(extent->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
502 return 0;
503 fail:
504 vmdk_free_extents(bs);
505 return -1;
506 }
507
508 static int get_whole_cluster(BlockDriverState *bs,
509 VmdkExtent *extent,
510 uint64_t cluster_offset,
511 uint64_t offset,
512 bool allocate)
513 {
514 /* 128 sectors * 512 bytes each = grain size 64KB */
515 uint8_t whole_grain[extent->cluster_sectors * 512];
516
517 // we will be here if it's first write on non-exist grain(cluster).
518 // try to read from parent image, if exist
519 if (bs->backing_hd) {
520 int ret;
521
522 if (!vmdk_is_cid_valid(bs))
523 return -1;
524
525 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
526 extent->cluster_sectors);
527 if (ret < 0) {
528 return -1;
529 }
530
531 //Write grain only into the active image
532 ret = bdrv_write(extent->file, cluster_offset, whole_grain,
533 extent->cluster_sectors);
534 if (ret < 0) {
535 return -1;
536 }
537 }
538 return 0;
539 }
540
541 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
542 {
543 /* update L2 table */
544 if (bdrv_pwrite_sync(
545 extent->file,
546 ((int64_t)m_data->l2_offset * 512)
547 + (m_data->l2_index * sizeof(m_data->offset)),
548 &(m_data->offset),
549 sizeof(m_data->offset)
550 ) < 0) {
551 return -1;
552 }
553 /* update backup L2 table */
554 if (extent->l1_backup_table_offset != 0) {
555 m_data->l2_offset = extent->l1_backup_table[m_data->l1_index];
556 if (bdrv_pwrite_sync(
557 extent->file,
558 ((int64_t)m_data->l2_offset * 512)
559 + (m_data->l2_index * sizeof(m_data->offset)),
560 &(m_data->offset), sizeof(m_data->offset)
561 ) < 0) {
562 return -1;
563 }
564 }
565
566 return 0;
567 }
568
569 static uint64_t get_cluster_offset(BlockDriverState *bs,
570 VmdkExtent *extent,
571 VmdkMetaData *m_data,
572 uint64_t offset, int allocate)
573 {
574 unsigned int l1_index, l2_offset, l2_index;
575 int min_index, i, j;
576 uint32_t min_count, *l2_table, tmp = 0;
577 uint64_t cluster_offset;
578
579 if (m_data)
580 m_data->valid = 0;
581
582 l1_index = (offset >> 9) / extent->l1_entry_sectors;
583 if (l1_index >= extent->l1_size) {
584 return 0;
585 }
586 l2_offset = extent->l1_table[l1_index];
587 if (!l2_offset) {
588 return 0;
589 }
590 for(i = 0; i < L2_CACHE_SIZE; i++) {
591 if (l2_offset == extent->l2_cache_offsets[i]) {
592 /* increment the hit count */
593 if (++extent->l2_cache_counts[i] == 0xffffffff) {
594 for(j = 0; j < L2_CACHE_SIZE; j++) {
595 extent->l2_cache_counts[j] >>= 1;
596 }
597 }
598 l2_table = extent->l2_cache + (i * extent->l2_size);
599 goto found;
600 }
601 }
602 /* not found: load a new entry in the least used one */
603 min_index = 0;
604 min_count = 0xffffffff;
605 for(i = 0; i < L2_CACHE_SIZE; i++) {
606 if (extent->l2_cache_counts[i] < min_count) {
607 min_count = extent->l2_cache_counts[i];
608 min_index = i;
609 }
610 }
611 l2_table = extent->l2_cache + (min_index * extent->l2_size);
612 if (bdrv_pread(
613 extent->file,
614 (int64_t)l2_offset * 512,
615 l2_table,
616 extent->l2_size * sizeof(uint32_t)
617 ) != extent->l2_size * sizeof(uint32_t)) {
618 return 0;
619 }
620
621 extent->l2_cache_offsets[min_index] = l2_offset;
622 extent->l2_cache_counts[min_index] = 1;
623 found:
624 l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
625 cluster_offset = le32_to_cpu(l2_table[l2_index]);
626
627 if (!cluster_offset) {
628 if (!allocate)
629 return 0;
630
631 // Avoid the L2 tables update for the images that have snapshots.
632 cluster_offset = bdrv_getlength(extent->file);
633 bdrv_truncate(
634 extent->file,
635 cluster_offset + (extent->cluster_sectors << 9)
636 );
637
638 cluster_offset >>= 9;
639 tmp = cpu_to_le32(cluster_offset);
640 l2_table[l2_index] = tmp;
641
642 /* First of all we write grain itself, to avoid race condition
643 * that may to corrupt the image.
644 * This problem may occur because of insufficient space on host disk
645 * or inappropriate VM shutdown.
646 */
647 if (get_whole_cluster(
648 bs, extent, cluster_offset, offset, allocate) == -1)
649 return 0;
650
651 if (m_data) {
652 m_data->offset = tmp;
653 m_data->l1_index = l1_index;
654 m_data->l2_index = l2_index;
655 m_data->l2_offset = l2_offset;
656 m_data->valid = 1;
657 }
658 }
659 cluster_offset <<= 9;
660 return cluster_offset;
661 }
662
663 static VmdkExtent *find_extent(BDRVVmdkState *s,
664 int64_t sector_num, VmdkExtent *start_hint)
665 {
666 VmdkExtent *extent = start_hint;
667
668 if (!extent) {
669 extent = &s->extents[0];
670 }
671 while (extent < &s->extents[s->num_extents]) {
672 if (sector_num < extent->end_sector) {
673 return extent;
674 }
675 extent++;
676 }
677 return NULL;
678 }
679
680 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
681 int nb_sectors, int *pnum)
682 {
683 BDRVVmdkState *s = bs->opaque;
684
685 int64_t index_in_cluster, n, ret;
686 uint64_t offset;
687 VmdkExtent *extent;
688
689 extent = find_extent(s, sector_num, NULL);
690 if (!extent) {
691 return 0;
692 }
693 if (extent->flat) {
694 n = extent->end_sector - sector_num;
695 ret = 1;
696 } else {
697 offset = get_cluster_offset(bs, extent, NULL, sector_num * 512, 0);
698 index_in_cluster = sector_num % extent->cluster_sectors;
699 n = extent->cluster_sectors - index_in_cluster;
700 ret = offset ? 1 : 0;
701 }
702 if (n > nb_sectors)
703 n = nb_sectors;
704 *pnum = n;
705 return ret;
706 }
707
708 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
709 uint8_t *buf, int nb_sectors)
710 {
711 BDRVVmdkState *s = bs->opaque;
712 int ret;
713 uint64_t n, index_in_cluster;
714 VmdkExtent *extent = NULL;
715 uint64_t cluster_offset;
716
717 while (nb_sectors > 0) {
718 extent = find_extent(s, sector_num, extent);
719 if (!extent) {
720 return -EIO;
721 }
722 cluster_offset = get_cluster_offset(
723 bs, extent, NULL, sector_num << 9, 0);
724 index_in_cluster = sector_num % extent->cluster_sectors;
725 n = extent->cluster_sectors - index_in_cluster;
726 if (n > nb_sectors)
727 n = nb_sectors;
728 if (!cluster_offset) {
729 // try to read from parent image, if exist
730 if (bs->backing_hd) {
731 if (!vmdk_is_cid_valid(bs))
732 return -1;
733 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
734 if (ret < 0)
735 return -1;
736 } else {
737 memset(buf, 0, 512 * n);
738 }
739 } else {
740 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
741 return -1;
742 }
743 nb_sectors -= n;
744 sector_num += n;
745 buf += n * 512;
746 }
747 return 0;
748 }
749
750 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
751 const uint8_t *buf, int nb_sectors)
752 {
753 BDRVVmdkState *s = bs->opaque;
754 VmdkExtent *extent = NULL;
755 int n;
756 int64_t index_in_cluster;
757 uint64_t cluster_offset;
758 static int cid_update = 0;
759 VmdkMetaData m_data;
760
761 if (sector_num > bs->total_sectors) {
762 fprintf(stderr,
763 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
764 " total_sectors=0x%" PRIx64 "\n",
765 sector_num, bs->total_sectors);
766 return -1;
767 }
768
769 while (nb_sectors > 0) {
770 extent = find_extent(s, sector_num, extent);
771 if (!extent) {
772 return -EIO;
773 }
774 cluster_offset = get_cluster_offset(
775 bs,
776 extent,
777 &m_data,
778 sector_num << 9, 1);
779 if (!cluster_offset) {
780 return -1;
781 }
782 index_in_cluster = sector_num % extent->cluster_sectors;
783 n = extent->cluster_sectors - index_in_cluster;
784 if (n > nb_sectors) {
785 n = nb_sectors;
786 }
787
788 if (bdrv_pwrite(bs->file,
789 cluster_offset + index_in_cluster * 512,
790 buf, n * 512)
791 != n * 512) {
792 return -1;
793 }
794 if (m_data.valid) {
795 /* update L2 tables */
796 if (vmdk_L2update(extent, &m_data) == -1) {
797 return -1;
798 }
799 }
800 nb_sectors -= n;
801 sector_num += n;
802 buf += n * 512;
803
804 // update CID on the first write every time the virtual disk is opened
805 if (!cid_update) {
806 vmdk_write_cid(bs, time(NULL));
807 cid_update++;
808 }
809 }
810 return 0;
811 }
812
813 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
814 {
815 int fd, i;
816 VMDK4Header header;
817 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
818 static const char desc_template[] =
819 "# Disk DescriptorFile\n"
820 "version=1\n"
821 "CID=%x\n"
822 "parentCID=ffffffff\n"
823 "createType=\"monolithicSparse\"\n"
824 "\n"
825 "# Extent description\n"
826 "RW %" PRId64 " SPARSE \"%s\"\n"
827 "\n"
828 "# The Disk Data Base \n"
829 "#DDB\n"
830 "\n"
831 "ddb.virtualHWVersion = \"%d\"\n"
832 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
833 "ddb.geometry.heads = \"16\"\n"
834 "ddb.geometry.sectors = \"63\"\n"
835 "ddb.adapterType = \"ide\"\n";
836 char desc[1024];
837 const char *real_filename, *temp_str;
838 int64_t total_size = 0;
839 const char *backing_file = NULL;
840 int flags = 0;
841 int ret;
842
843 // Read out options
844 while (options && options->name) {
845 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
846 total_size = options->value.n / 512;
847 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
848 backing_file = options->value.s;
849 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
850 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
851 }
852 options++;
853 }
854
855 /* XXX: add support for backing file */
856 if (backing_file) {
857 return vmdk_snapshot_create(filename, backing_file);
858 }
859
860 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
861 0644);
862 if (fd < 0)
863 return -errno;
864 magic = cpu_to_be32(VMDK4_MAGIC);
865 memset(&header, 0, sizeof(header));
866 header.version = 1;
867 header.flags = 3; /* ?? */
868 header.capacity = total_size;
869 header.granularity = 128;
870 header.num_gtes_per_gte = 512;
871
872 grains = (total_size + header.granularity - 1) / header.granularity;
873 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
874 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
875 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
876
877 header.desc_offset = 1;
878 header.desc_size = 20;
879 header.rgd_offset = header.desc_offset + header.desc_size;
880 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
881 header.grain_offset =
882 ((header.gd_offset + gd_size + (gt_size * gt_count) +
883 header.granularity - 1) / header.granularity) *
884 header.granularity;
885
886 /* swap endianness for all header fields */
887 header.version = cpu_to_le32(header.version);
888 header.flags = cpu_to_le32(header.flags);
889 header.capacity = cpu_to_le64(header.capacity);
890 header.granularity = cpu_to_le64(header.granularity);
891 header.num_gtes_per_gte = cpu_to_le32(header.num_gtes_per_gte);
892 header.desc_offset = cpu_to_le64(header.desc_offset);
893 header.desc_size = cpu_to_le64(header.desc_size);
894 header.rgd_offset = cpu_to_le64(header.rgd_offset);
895 header.gd_offset = cpu_to_le64(header.gd_offset);
896 header.grain_offset = cpu_to_le64(header.grain_offset);
897
898 header.check_bytes[0] = 0xa;
899 header.check_bytes[1] = 0x20;
900 header.check_bytes[2] = 0xd;
901 header.check_bytes[3] = 0xa;
902
903 /* write all the data */
904 ret = qemu_write_full(fd, &magic, sizeof(magic));
905 if (ret != sizeof(magic)) {
906 ret = -errno;
907 goto exit;
908 }
909 ret = qemu_write_full(fd, &header, sizeof(header));
910 if (ret != sizeof(header)) {
911 ret = -errno;
912 goto exit;
913 }
914
915 ret = ftruncate(fd, le64_to_cpu(header.grain_offset) << 9);
916 if (ret < 0) {
917 ret = -errno;
918 goto exit;
919 }
920
921 /* write grain directory */
922 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
923 for (i = 0, tmp = le64_to_cpu(header.rgd_offset) + gd_size;
924 i < gt_count; i++, tmp += gt_size) {
925 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
926 if (ret != sizeof(tmp)) {
927 ret = -errno;
928 goto exit;
929 }
930 }
931
932 /* write backup grain directory */
933 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
934 for (i = 0, tmp = le64_to_cpu(header.gd_offset) + gd_size;
935 i < gt_count; i++, tmp += gt_size) {
936 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
937 if (ret != sizeof(tmp)) {
938 ret = -errno;
939 goto exit;
940 }
941 }
942
943 /* compose the descriptor */
944 real_filename = filename;
945 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
946 real_filename = temp_str + 1;
947 if ((temp_str = strrchr(real_filename, '/')) != NULL)
948 real_filename = temp_str + 1;
949 if ((temp_str = strrchr(real_filename, ':')) != NULL)
950 real_filename = temp_str + 1;
951 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
952 total_size, real_filename,
953 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
954 total_size / (int64_t)(63 * 16));
955
956 /* write the descriptor */
957 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
958 ret = qemu_write_full(fd, desc, strlen(desc));
959 if (ret != strlen(desc)) {
960 ret = -errno;
961 goto exit;
962 }
963
964 ret = 0;
965 exit:
966 close(fd);
967 return ret;
968 }
969
970 static void vmdk_close(BlockDriverState *bs)
971 {
972 vmdk_free_extents(bs);
973 }
974
975 static int vmdk_flush(BlockDriverState *bs)
976 {
977 return bdrv_flush(bs->file);
978 }
979
980
981 static QEMUOptionParameter vmdk_create_options[] = {
982 {
983 .name = BLOCK_OPT_SIZE,
984 .type = OPT_SIZE,
985 .help = "Virtual disk size"
986 },
987 {
988 .name = BLOCK_OPT_BACKING_FILE,
989 .type = OPT_STRING,
990 .help = "File name of a base image"
991 },
992 {
993 .name = BLOCK_OPT_COMPAT6,
994 .type = OPT_FLAG,
995 .help = "VMDK version 6 image"
996 },
997 { NULL }
998 };
999
1000 static BlockDriver bdrv_vmdk = {
1001 .format_name = "vmdk",
1002 .instance_size = sizeof(BDRVVmdkState),
1003 .bdrv_probe = vmdk_probe,
1004 .bdrv_open = vmdk_open,
1005 .bdrv_read = vmdk_read,
1006 .bdrv_write = vmdk_write,
1007 .bdrv_close = vmdk_close,
1008 .bdrv_create = vmdk_create,
1009 .bdrv_flush = vmdk_flush,
1010 .bdrv_is_allocated = vmdk_is_allocated,
1011
1012 .create_options = vmdk_create_options,
1013 };
1014
1015 static void bdrv_vmdk_init(void)
1016 {
1017 bdrv_register(&bdrv_vmdk);
1018 }
1019
1020 block_init(bdrv_vmdk_init);