]> git.proxmox.com Git - qemu.git/blob - block/vmdk.c
d52904a0e1cbec93098aac304d886447efbfe967
[qemu.git] / block / vmdk.c
1 /*
2 * Block driver for the VMDK format
3 *
4 * Copyright (c) 2004 Fabrice Bellard
5 * Copyright (c) 2005 Filip Navara
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
25
26 #include "qemu-common.h"
27 #include "block_int.h"
28 #include "module.h"
29
30 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
32
33 typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44 } VMDK3Header;
45
46 typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
59 } __attribute__((packed)) VMDK4Header;
60
61 #define L2_CACHE_SIZE 16
62
63 typedef struct BDRVVmdkState {
64 BlockDriverState *hd;
65 int64_t l1_table_offset;
66 int64_t l1_backup_table_offset;
67 uint32_t *l1_table;
68 uint32_t *l1_backup_table;
69 unsigned int l1_size;
70 uint32_t l1_entry_sectors;
71
72 unsigned int l2_size;
73 uint32_t *l2_cache;
74 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
75 uint32_t l2_cache_counts[L2_CACHE_SIZE];
76
77 unsigned int cluster_sectors;
78 uint32_t parent_cid;
79 int is_parent;
80 } BDRVVmdkState;
81
82 typedef struct VmdkMetaData {
83 uint32_t offset;
84 unsigned int l1_index;
85 unsigned int l2_index;
86 unsigned int l2_offset;
87 int valid;
88 } VmdkMetaData;
89
90 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
91 {
92 uint32_t magic;
93
94 if (buf_size < 4)
95 return 0;
96 magic = be32_to_cpu(*(uint32_t *)buf);
97 if (magic == VMDK3_MAGIC ||
98 magic == VMDK4_MAGIC)
99 return 100;
100 else
101 return 0;
102 }
103
104 #define CHECK_CID 1
105
106 #define SECTOR_SIZE 512
107 #define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
108 #define HEADER_SIZE 512 // first sector of 512 bytes
109
110 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
111 {
112 BDRVVmdkState *s = bs->opaque;
113 char desc[DESC_SIZE];
114 uint32_t cid;
115 const char *p_name, *cid_str;
116 size_t cid_str_size;
117
118 /* the descriptor offset = 0x200 */
119 if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
120 return 0;
121
122 if (parent) {
123 cid_str = "parentCID";
124 cid_str_size = sizeof("parentCID");
125 } else {
126 cid_str = "CID";
127 cid_str_size = sizeof("CID");
128 }
129
130 if ((p_name = strstr(desc,cid_str)) != NULL) {
131 p_name += cid_str_size;
132 sscanf(p_name,"%x",&cid);
133 }
134
135 return cid;
136 }
137
138 static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
139 {
140 BDRVVmdkState *s = bs->opaque;
141 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
142 char *p_name, *tmp_str;
143
144 /* the descriptor offset = 0x200 */
145 if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
146 return -1;
147
148 tmp_str = strstr(desc,"parentCID");
149 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
150 if ((p_name = strstr(desc,"CID")) != NULL) {
151 p_name += sizeof("CID");
152 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
153 pstrcat(desc, sizeof(desc), tmp_desc);
154 }
155
156 if (bdrv_pwrite(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
157 return -1;
158 return 0;
159 }
160
161 static int vmdk_is_cid_valid(BlockDriverState *bs)
162 {
163 #ifdef CHECK_CID
164 BDRVVmdkState *s = bs->opaque;
165 BlockDriverState *p_bs = bs->backing_hd;
166 uint32_t cur_pcid;
167
168 if (p_bs) {
169 cur_pcid = vmdk_read_cid(p_bs,0);
170 if (s->parent_cid != cur_pcid)
171 // CID not valid
172 return 0;
173 }
174 #endif
175 // CID valid
176 return 1;
177 }
178
179 static int vmdk_snapshot_create(const char *filename, const char *backing_file)
180 {
181 int snp_fd, p_fd;
182 uint32_t p_cid;
183 char *p_name, *gd_buf, *rgd_buf;
184 const char *real_filename, *temp_str;
185 VMDK4Header header;
186 uint32_t gde_entries, gd_size;
187 int64_t gd_offset, rgd_offset, capacity, gt_size;
188 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
189 static const char desc_template[] =
190 "# Disk DescriptorFile\n"
191 "version=1\n"
192 "CID=%x\n"
193 "parentCID=%x\n"
194 "createType=\"monolithicSparse\"\n"
195 "parentFileNameHint=\"%s\"\n"
196 "\n"
197 "# Extent description\n"
198 "RW %u SPARSE \"%s\"\n"
199 "\n"
200 "# The Disk Data Base \n"
201 "#DDB\n"
202 "\n";
203
204 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
205 if (snp_fd < 0)
206 return -1;
207 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
208 if (p_fd < 0) {
209 close(snp_fd);
210 return -1;
211 }
212
213 /* read the header */
214 if (lseek(p_fd, 0x0, SEEK_SET) == -1)
215 goto fail;
216 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE)
217 goto fail;
218
219 /* write the header */
220 if (lseek(snp_fd, 0x0, SEEK_SET) == -1)
221 goto fail;
222 if (write(snp_fd, hdr, HEADER_SIZE) == -1)
223 goto fail;
224
225 memset(&header, 0, sizeof(header));
226 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
227
228 ftruncate(snp_fd, header.grain_offset << 9);
229 /* the descriptor offset = 0x200 */
230 if (lseek(p_fd, 0x200, SEEK_SET) == -1)
231 goto fail;
232 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE)
233 goto fail;
234
235 if ((p_name = strstr(p_desc,"CID")) != NULL) {
236 p_name += sizeof("CID");
237 sscanf(p_name,"%x",&p_cid);
238 }
239
240 real_filename = filename;
241 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
242 real_filename = temp_str + 1;
243 if ((temp_str = strrchr(real_filename, '/')) != NULL)
244 real_filename = temp_str + 1;
245 if ((temp_str = strrchr(real_filename, ':')) != NULL)
246 real_filename = temp_str + 1;
247
248 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
249 (uint32_t)header.capacity, real_filename);
250
251 /* write the descriptor */
252 if (lseek(snp_fd, 0x200, SEEK_SET) == -1)
253 goto fail;
254 if (write(snp_fd, s_desc, strlen(s_desc)) == -1)
255 goto fail;
256
257 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
258 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
259 capacity = header.capacity * SECTOR_SIZE; // Extent size
260 /*
261 * Each GDE span 32M disk, means:
262 * 512 GTE per GT, each GTE points to grain
263 */
264 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
265 if (!gt_size)
266 goto fail;
267 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
268 gd_size = gde_entries * sizeof(uint32_t);
269
270 /* write RGD */
271 rgd_buf = qemu_malloc(gd_size);
272 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1)
273 goto fail_rgd;
274 if (read(p_fd, rgd_buf, gd_size) != gd_size)
275 goto fail_rgd;
276 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1)
277 goto fail_rgd;
278 if (write(snp_fd, rgd_buf, gd_size) == -1)
279 goto fail_rgd;
280
281 /* write GD */
282 gd_buf = qemu_malloc(gd_size);
283 if (lseek(p_fd, gd_offset, SEEK_SET) == -1)
284 goto fail_gd;
285 if (read(p_fd, gd_buf, gd_size) != gd_size)
286 goto fail_gd;
287 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1)
288 goto fail_gd;
289 if (write(snp_fd, gd_buf, gd_size) == -1)
290 goto fail_gd;
291 qemu_free(gd_buf);
292 qemu_free(rgd_buf);
293
294 close(p_fd);
295 close(snp_fd);
296 return 0;
297
298 fail_gd:
299 qemu_free(gd_buf);
300 fail_rgd:
301 qemu_free(rgd_buf);
302 fail:
303 close(p_fd);
304 close(snp_fd);
305 return -1;
306 }
307
308 static void vmdk_parent_close(BlockDriverState *bs)
309 {
310 if (bs->backing_hd)
311 bdrv_close(bs->backing_hd);
312 }
313
314 static int parent_open = 0;
315 static int vmdk_parent_open(BlockDriverState *bs, const char * filename)
316 {
317 BDRVVmdkState *s = bs->opaque;
318 char *p_name;
319 char desc[DESC_SIZE];
320 char parent_img_name[1024];
321
322 /* the descriptor offset = 0x200 */
323 if (bdrv_pread(s->hd, 0x200, desc, DESC_SIZE) != DESC_SIZE)
324 return -1;
325
326 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
327 char *end_name;
328 struct stat file_buf;
329
330 p_name += sizeof("parentFileNameHint") + 1;
331 if ((end_name = strchr(p_name,'\"')) == NULL)
332 return -1;
333 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
334 return -1;
335
336 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
337 if (stat(bs->backing_file, &file_buf) != 0) {
338 path_combine(parent_img_name, sizeof(parent_img_name),
339 filename, bs->backing_file);
340 } else {
341 pstrcpy(parent_img_name, sizeof(parent_img_name),
342 bs->backing_file);
343 }
344
345 bs->backing_hd = bdrv_new("");
346 if (!bs->backing_hd) {
347 failure:
348 bdrv_close(s->hd);
349 return -1;
350 }
351 parent_open = 1;
352 if (bdrv_open(bs->backing_hd, parent_img_name, BDRV_O_RDONLY) < 0)
353 goto failure;
354 parent_open = 0;
355 }
356
357 return 0;
358 }
359
360 static int vmdk_open(BlockDriverState *bs, const char *filename, int flags)
361 {
362 BDRVVmdkState *s = bs->opaque;
363 uint32_t magic;
364 int l1_size, i, ret;
365
366 if (parent_open)
367 // Parent must be opened as RO.
368 flags = BDRV_O_RDONLY;
369
370 ret = bdrv_file_open(&s->hd, filename, flags);
371 if (ret < 0)
372 return ret;
373 if (bdrv_pread(s->hd, 0, &magic, sizeof(magic)) != sizeof(magic))
374 goto fail;
375
376 magic = be32_to_cpu(magic);
377 if (magic == VMDK3_MAGIC) {
378 VMDK3Header header;
379
380 if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
381 goto fail;
382 s->cluster_sectors = le32_to_cpu(header.granularity);
383 s->l2_size = 1 << 9;
384 s->l1_size = 1 << 6;
385 bs->total_sectors = le32_to_cpu(header.disk_sectors);
386 s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
387 s->l1_backup_table_offset = 0;
388 s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
389 } else if (magic == VMDK4_MAGIC) {
390 VMDK4Header header;
391
392 if (bdrv_pread(s->hd, sizeof(magic), &header, sizeof(header)) != sizeof(header))
393 goto fail;
394 bs->total_sectors = le64_to_cpu(header.capacity);
395 s->cluster_sectors = le64_to_cpu(header.granularity);
396 s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
397 s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
398 if (s->l1_entry_sectors <= 0)
399 goto fail;
400 s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
401 / s->l1_entry_sectors;
402 s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
403 s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
404
405 if (parent_open)
406 s->is_parent = 1;
407 else
408 s->is_parent = 0;
409
410 // try to open parent images, if exist
411 if (vmdk_parent_open(bs, filename) != 0)
412 goto fail;
413 // write the CID once after the image creation
414 s->parent_cid = vmdk_read_cid(bs,1);
415 } else {
416 goto fail;
417 }
418
419 /* read the L1 table */
420 l1_size = s->l1_size * sizeof(uint32_t);
421 s->l1_table = qemu_malloc(l1_size);
422 if (bdrv_pread(s->hd, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
423 goto fail;
424 for(i = 0; i < s->l1_size; i++) {
425 le32_to_cpus(&s->l1_table[i]);
426 }
427
428 if (s->l1_backup_table_offset) {
429 s->l1_backup_table = qemu_malloc(l1_size);
430 if (bdrv_pread(s->hd, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
431 goto fail;
432 for(i = 0; i < s->l1_size; i++) {
433 le32_to_cpus(&s->l1_backup_table[i]);
434 }
435 }
436
437 s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
438 return 0;
439 fail:
440 qemu_free(s->l1_backup_table);
441 qemu_free(s->l1_table);
442 qemu_free(s->l2_cache);
443 bdrv_delete(s->hd);
444 return -1;
445 }
446
447 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
448 uint64_t offset, int allocate);
449
450 static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
451 uint64_t offset, int allocate)
452 {
453 BDRVVmdkState *s = bs->opaque;
454 uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB
455
456 // we will be here if it's first write on non-exist grain(cluster).
457 // try to read from parent image, if exist
458 if (bs->backing_hd) {
459 int ret;
460
461 if (!vmdk_is_cid_valid(bs))
462 return -1;
463
464 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
465 s->cluster_sectors);
466 if (ret < 0) {
467 return -1;
468 }
469
470 //Write grain only into the active image
471 ret = bdrv_write(s->hd, cluster_offset, whole_grain,
472 s->cluster_sectors);
473 if (ret < 0) {
474 return -1;
475 }
476 }
477 return 0;
478 }
479
480 static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
481 {
482 BDRVVmdkState *s = bs->opaque;
483
484 /* update L2 table */
485 if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
486 &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
487 return -1;
488 /* update backup L2 table */
489 if (s->l1_backup_table_offset != 0) {
490 m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
491 if (bdrv_pwrite(s->hd, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
492 &(m_data->offset), sizeof(m_data->offset)) != sizeof(m_data->offset))
493 return -1;
494 }
495
496 return 0;
497 }
498
499 static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
500 uint64_t offset, int allocate)
501 {
502 BDRVVmdkState *s = bs->opaque;
503 unsigned int l1_index, l2_offset, l2_index;
504 int min_index, i, j;
505 uint32_t min_count, *l2_table, tmp = 0;
506 uint64_t cluster_offset;
507
508 if (m_data)
509 m_data->valid = 0;
510
511 l1_index = (offset >> 9) / s->l1_entry_sectors;
512 if (l1_index >= s->l1_size)
513 return 0;
514 l2_offset = s->l1_table[l1_index];
515 if (!l2_offset)
516 return 0;
517 for(i = 0; i < L2_CACHE_SIZE; i++) {
518 if (l2_offset == s->l2_cache_offsets[i]) {
519 /* increment the hit count */
520 if (++s->l2_cache_counts[i] == 0xffffffff) {
521 for(j = 0; j < L2_CACHE_SIZE; j++) {
522 s->l2_cache_counts[j] >>= 1;
523 }
524 }
525 l2_table = s->l2_cache + (i * s->l2_size);
526 goto found;
527 }
528 }
529 /* not found: load a new entry in the least used one */
530 min_index = 0;
531 min_count = 0xffffffff;
532 for(i = 0; i < L2_CACHE_SIZE; i++) {
533 if (s->l2_cache_counts[i] < min_count) {
534 min_count = s->l2_cache_counts[i];
535 min_index = i;
536 }
537 }
538 l2_table = s->l2_cache + (min_index * s->l2_size);
539 if (bdrv_pread(s->hd, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
540 s->l2_size * sizeof(uint32_t))
541 return 0;
542
543 s->l2_cache_offsets[min_index] = l2_offset;
544 s->l2_cache_counts[min_index] = 1;
545 found:
546 l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
547 cluster_offset = le32_to_cpu(l2_table[l2_index]);
548
549 if (!cluster_offset) {
550 if (!allocate)
551 return 0;
552 // Avoid the L2 tables update for the images that have snapshots.
553 if (!s->is_parent) {
554 cluster_offset = bdrv_getlength(s->hd);
555 bdrv_truncate(s->hd, cluster_offset + (s->cluster_sectors << 9));
556
557 cluster_offset >>= 9;
558 tmp = cpu_to_le32(cluster_offset);
559 l2_table[l2_index] = tmp;
560 }
561 /* First of all we write grain itself, to avoid race condition
562 * that may to corrupt the image.
563 * This problem may occur because of insufficient space on host disk
564 * or inappropriate VM shutdown.
565 */
566 if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
567 return 0;
568
569 if (m_data) {
570 m_data->offset = tmp;
571 m_data->l1_index = l1_index;
572 m_data->l2_index = l2_index;
573 m_data->l2_offset = l2_offset;
574 m_data->valid = 1;
575 }
576 }
577 cluster_offset <<= 9;
578 return cluster_offset;
579 }
580
581 static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
582 int nb_sectors, int *pnum)
583 {
584 BDRVVmdkState *s = bs->opaque;
585 int index_in_cluster, n;
586 uint64_t cluster_offset;
587
588 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
589 index_in_cluster = sector_num % s->cluster_sectors;
590 n = s->cluster_sectors - index_in_cluster;
591 if (n > nb_sectors)
592 n = nb_sectors;
593 *pnum = n;
594 return (cluster_offset != 0);
595 }
596
597 static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
598 uint8_t *buf, int nb_sectors)
599 {
600 BDRVVmdkState *s = bs->opaque;
601 int index_in_cluster, n, ret;
602 uint64_t cluster_offset;
603
604 while (nb_sectors > 0) {
605 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
606 index_in_cluster = sector_num % s->cluster_sectors;
607 n = s->cluster_sectors - index_in_cluster;
608 if (n > nb_sectors)
609 n = nb_sectors;
610 if (!cluster_offset) {
611 // try to read from parent image, if exist
612 if (bs->backing_hd) {
613 if (!vmdk_is_cid_valid(bs))
614 return -1;
615 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
616 if (ret < 0)
617 return -1;
618 } else {
619 memset(buf, 0, 512 * n);
620 }
621 } else {
622 if(bdrv_pread(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
623 return -1;
624 }
625 nb_sectors -= n;
626 sector_num += n;
627 buf += n * 512;
628 }
629 return 0;
630 }
631
632 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
633 const uint8_t *buf, int nb_sectors)
634 {
635 BDRVVmdkState *s = bs->opaque;
636 VmdkMetaData m_data;
637 int index_in_cluster, n;
638 uint64_t cluster_offset;
639 static int cid_update = 0;
640
641 if (sector_num > bs->total_sectors) {
642 fprintf(stderr,
643 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
644 " total_sectors=0x%" PRIx64 "\n",
645 sector_num, bs->total_sectors);
646 return -1;
647 }
648
649 while (nb_sectors > 0) {
650 index_in_cluster = sector_num & (s->cluster_sectors - 1);
651 n = s->cluster_sectors - index_in_cluster;
652 if (n > nb_sectors)
653 n = nb_sectors;
654 cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
655 if (!cluster_offset)
656 return -1;
657
658 if (bdrv_pwrite(s->hd, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
659 return -1;
660 if (m_data.valid) {
661 /* update L2 tables */
662 if (vmdk_L2update(bs, &m_data) == -1)
663 return -1;
664 }
665 nb_sectors -= n;
666 sector_num += n;
667 buf += n * 512;
668
669 // update CID on the first write every time the virtual disk is opened
670 if (!cid_update) {
671 vmdk_write_cid(bs, time(NULL));
672 cid_update++;
673 }
674 }
675 return 0;
676 }
677
678 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
679 {
680 int fd, i;
681 VMDK4Header header;
682 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
683 static const char desc_template[] =
684 "# Disk DescriptorFile\n"
685 "version=1\n"
686 "CID=%x\n"
687 "parentCID=ffffffff\n"
688 "createType=\"monolithicSparse\"\n"
689 "\n"
690 "# Extent description\n"
691 "RW %" PRId64 " SPARSE \"%s\"\n"
692 "\n"
693 "# The Disk Data Base \n"
694 "#DDB\n"
695 "\n"
696 "ddb.virtualHWVersion = \"%d\"\n"
697 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
698 "ddb.geometry.heads = \"16\"\n"
699 "ddb.geometry.sectors = \"63\"\n"
700 "ddb.adapterType = \"ide\"\n";
701 char desc[1024];
702 const char *real_filename, *temp_str;
703 int64_t total_size = 0;
704 const char *backing_file = NULL;
705 int flags = 0;
706
707 // Read out options
708 while (options && options->name) {
709 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
710 total_size = options->value.n / 512;
711 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
712 backing_file = options->value.s;
713 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
714 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
715 }
716 options++;
717 }
718
719 /* XXX: add support for backing file */
720 if (backing_file) {
721 return vmdk_snapshot_create(filename, backing_file);
722 }
723
724 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
725 0644);
726 if (fd < 0)
727 return -1;
728 magic = cpu_to_be32(VMDK4_MAGIC);
729 memset(&header, 0, sizeof(header));
730 header.version = cpu_to_le32(1);
731 header.flags = cpu_to_le32(3); /* ?? */
732 header.capacity = cpu_to_le64(total_size);
733 header.granularity = cpu_to_le64(128);
734 header.num_gtes_per_gte = cpu_to_le32(512);
735
736 grains = (total_size + header.granularity - 1) / header.granularity;
737 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
738 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
739 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
740
741 header.desc_offset = 1;
742 header.desc_size = 20;
743 header.rgd_offset = header.desc_offset + header.desc_size;
744 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
745 header.grain_offset =
746 ((header.gd_offset + gd_size + (gt_size * gt_count) +
747 header.granularity - 1) / header.granularity) *
748 header.granularity;
749
750 header.desc_offset = cpu_to_le64(header.desc_offset);
751 header.desc_size = cpu_to_le64(header.desc_size);
752 header.rgd_offset = cpu_to_le64(header.rgd_offset);
753 header.gd_offset = cpu_to_le64(header.gd_offset);
754 header.grain_offset = cpu_to_le64(header.grain_offset);
755
756 header.check_bytes[0] = 0xa;
757 header.check_bytes[1] = 0x20;
758 header.check_bytes[2] = 0xd;
759 header.check_bytes[3] = 0xa;
760
761 /* write all the data */
762 write(fd, &magic, sizeof(magic));
763 write(fd, &header, sizeof(header));
764
765 ftruncate(fd, header.grain_offset << 9);
766
767 /* write grain directory */
768 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
769 for (i = 0, tmp = header.rgd_offset + gd_size;
770 i < gt_count; i++, tmp += gt_size)
771 write(fd, &tmp, sizeof(tmp));
772
773 /* write backup grain directory */
774 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
775 for (i = 0, tmp = header.gd_offset + gd_size;
776 i < gt_count; i++, tmp += gt_size)
777 write(fd, &tmp, sizeof(tmp));
778
779 /* compose the descriptor */
780 real_filename = filename;
781 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
782 real_filename = temp_str + 1;
783 if ((temp_str = strrchr(real_filename, '/')) != NULL)
784 real_filename = temp_str + 1;
785 if ((temp_str = strrchr(real_filename, ':')) != NULL)
786 real_filename = temp_str + 1;
787 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
788 total_size, real_filename,
789 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
790 total_size / (int64_t)(63 * 16));
791
792 /* write the descriptor */
793 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
794 write(fd, desc, strlen(desc));
795
796 close(fd);
797 return 0;
798 }
799
800 static void vmdk_close(BlockDriverState *bs)
801 {
802 BDRVVmdkState *s = bs->opaque;
803
804 qemu_free(s->l1_table);
805 qemu_free(s->l2_cache);
806 // try to close parent image, if exist
807 vmdk_parent_close(s->hd);
808 bdrv_delete(s->hd);
809 }
810
811 static void vmdk_flush(BlockDriverState *bs)
812 {
813 BDRVVmdkState *s = bs->opaque;
814 bdrv_flush(s->hd);
815 }
816
817
818 static QEMUOptionParameter vmdk_create_options[] = {
819 {
820 .name = BLOCK_OPT_SIZE,
821 .type = OPT_SIZE,
822 .help = "Virtual disk size"
823 },
824 {
825 .name = BLOCK_OPT_BACKING_FILE,
826 .type = OPT_STRING,
827 .help = "File name of a base image"
828 },
829 {
830 .name = BLOCK_OPT_COMPAT6,
831 .type = OPT_FLAG,
832 .help = "VMDK version 6 image"
833 },
834 { NULL }
835 };
836
837 static BlockDriver bdrv_vmdk = {
838 .format_name = "vmdk",
839 .instance_size = sizeof(BDRVVmdkState),
840 .bdrv_probe = vmdk_probe,
841 .bdrv_open = vmdk_open,
842 .bdrv_read = vmdk_read,
843 .bdrv_write = vmdk_write,
844 .bdrv_close = vmdk_close,
845 .bdrv_create = vmdk_create,
846 .bdrv_flush = vmdk_flush,
847 .bdrv_is_allocated = vmdk_is_allocated,
848
849 .create_options = vmdk_create_options,
850 };
851
852 static void bdrv_vmdk_init(void)
853 {
854 bdrv_register(&bdrv_vmdk);
855 }
856
857 block_init(bdrv_vmdk_init);