]> git.proxmox.com Git - qemu.git/blame - block/vmdk.c
scsi-disk: Implement rerror option
[qemu.git] / block / vmdk.c
CommitLineData
ea2384d3
FB
1/*
2 * Block driver for the VMDK format
5fafdf24 3 *
ea2384d3 4 * Copyright (c) 2004 Fabrice Bellard
ff1afc72 5 * Copyright (c) 2005 Filip Navara
5fafdf24 6 *
ea2384d3
FB
7 * Permission is hereby granted, free of charge, to any person obtaining a copy
8 * of this software and associated documentation files (the "Software"), to deal
9 * in the Software without restriction, including without limitation the rights
10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 * copies of the Software, and to permit persons to whom the Software is
12 * furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included in
15 * all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 * THE SOFTWARE.
24 */
5f4da8c0 25
faf07963 26#include "qemu-common.h"
ea2384d3 27#include "block_int.h"
5efa9d5a 28#include "module.h"
ea2384d3 29
ea2384d3
FB
30#define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
31#define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
32
33typedef struct {
34 uint32_t version;
35 uint32_t flags;
36 uint32_t disk_sectors;
37 uint32_t granularity;
38 uint32_t l1dir_offset;
39 uint32_t l1dir_size;
40 uint32_t file_sectors;
41 uint32_t cylinders;
42 uint32_t heads;
43 uint32_t sectors_per_track;
44} VMDK3Header;
45
46typedef struct {
47 uint32_t version;
48 uint32_t flags;
49 int64_t capacity;
50 int64_t granularity;
51 int64_t desc_offset;
52 int64_t desc_size;
53 int32_t num_gtes_per_gte;
54 int64_t rgd_offset;
55 int64_t gd_offset;
56 int64_t grain_offset;
57 char filler[1];
58 char check_bytes[4];
ff1afc72 59} __attribute__((packed)) VMDK4Header;
ea2384d3
FB
60
61#define L2_CACHE_SIZE 16
62
63typedef struct BDRVVmdkState {
5f4da8c0 64 BlockDriverState *hd;
ea2384d3 65 int64_t l1_table_offset;
ff1afc72 66 int64_t l1_backup_table_offset;
ea2384d3 67 uint32_t *l1_table;
ff1afc72 68 uint32_t *l1_backup_table;
ea2384d3
FB
69 unsigned int l1_size;
70 uint32_t l1_entry_sectors;
71
72 unsigned int l2_size;
73 uint32_t *l2_cache;
74 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
75 uint32_t l2_cache_counts[L2_CACHE_SIZE];
76
77 unsigned int cluster_sectors;
5f4da8c0 78 uint32_t parent_cid;
ea2384d3
FB
79} BDRVVmdkState;
80
630530a6
TS
81typedef struct VmdkMetaData {
82 uint32_t offset;
83 unsigned int l1_index;
84 unsigned int l2_index;
85 unsigned int l2_offset;
86 int valid;
87} VmdkMetaData;
88
ea2384d3
FB
89static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
90{
91 uint32_t magic;
92
93 if (buf_size < 4)
94 return 0;
95 magic = be32_to_cpu(*(uint32_t *)buf);
96 if (magic == VMDK3_MAGIC ||
97 magic == VMDK4_MAGIC)
98 return 100;
99 else
100 return 0;
101}
102
5f4da8c0
TS
103#define CHECK_CID 1
104
3b46e624 105#define SECTOR_SIZE 512
5f4da8c0 106#define DESC_SIZE 20*SECTOR_SIZE // 20 sectors of 512 bytes each
5fafdf24 107#define HEADER_SIZE 512 // first sector of 512 bytes
5f4da8c0
TS
108
109static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
ea2384d3 110{
5f4da8c0
TS
111 char desc[DESC_SIZE];
112 uint32_t cid;
7ccfb2eb 113 const char *p_name, *cid_str;
5f4da8c0
TS
114 size_t cid_str_size;
115
116 /* the descriptor offset = 0x200 */
6511ef77 117 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
5f4da8c0
TS
118 return 0;
119
120 if (parent) {
121 cid_str = "parentCID";
122 cid_str_size = sizeof("parentCID");
123 } else {
124 cid_str = "CID";
125 cid_str_size = sizeof("CID");
126 }
127
511d2b14 128 if ((p_name = strstr(desc,cid_str)) != NULL) {
5f4da8c0
TS
129 p_name += cid_str_size;
130 sscanf(p_name,"%x",&cid);
131 }
132
133 return cid;
134}
135
136static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
137{
5f4da8c0
TS
138 char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
139 char *p_name, *tmp_str;
140
141 /* the descriptor offset = 0x200 */
6511ef77 142 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
5f4da8c0
TS
143 return -1;
144
145 tmp_str = strstr(desc,"parentCID");
363a37d5 146 pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
511d2b14 147 if ((p_name = strstr(desc,"CID")) != NULL) {
5f4da8c0 148 p_name += sizeof("CID");
363a37d5
BS
149 snprintf(p_name, sizeof(desc) - (p_name - desc), "%x\n", cid);
150 pstrcat(desc, sizeof(desc), tmp_desc);
5f4da8c0
TS
151 }
152
b8852e87 153 if (bdrv_pwrite_sync(bs->file, 0x200, desc, DESC_SIZE) < 0)
5f4da8c0
TS
154 return -1;
155 return 0;
156}
157
158static int vmdk_is_cid_valid(BlockDriverState *bs)
159{
160#ifdef CHECK_CID
161 BDRVVmdkState *s = bs->opaque;
b171271a 162 BlockDriverState *p_bs = bs->backing_hd;
5f4da8c0
TS
163 uint32_t cur_pcid;
164
165 if (p_bs) {
166 cur_pcid = vmdk_read_cid(p_bs,0);
167 if (s->parent_cid != cur_pcid)
168 // CID not valid
169 return 0;
170 }
171#endif
172 // CID valid
173 return 1;
174}
175
176static int vmdk_snapshot_create(const char *filename, const char *backing_file)
177{
178 int snp_fd, p_fd;
53c2e716 179 int ret;
5f4da8c0 180 uint32_t p_cid;
5fafdf24 181 char *p_name, *gd_buf, *rgd_buf;
5f4da8c0
TS
182 const char *real_filename, *temp_str;
183 VMDK4Header header;
184 uint32_t gde_entries, gd_size;
185 int64_t gd_offset, rgd_offset, capacity, gt_size;
186 char p_desc[DESC_SIZE], s_desc[DESC_SIZE], hdr[HEADER_SIZE];
7ccfb2eb 187 static const char desc_template[] =
5f4da8c0
TS
188 "# Disk DescriptorFile\n"
189 "version=1\n"
190 "CID=%x\n"
191 "parentCID=%x\n"
192 "createType=\"monolithicSparse\"\n"
193 "parentFileNameHint=\"%s\"\n"
194 "\n"
195 "# Extent description\n"
7ccfb2eb 196 "RW %u SPARSE \"%s\"\n"
5f4da8c0
TS
197 "\n"
198 "# The Disk Data Base \n"
199 "#DDB\n"
200 "\n";
201
202 snp_fd = open(filename, O_RDWR | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE, 0644);
203 if (snp_fd < 0)
53c2e716 204 return -errno;
5f4da8c0
TS
205 p_fd = open(backing_file, O_RDONLY | O_BINARY | O_LARGEFILE);
206 if (p_fd < 0) {
207 close(snp_fd);
53c2e716 208 return -errno;
5f4da8c0
TS
209 }
210
211 /* read the header */
53c2e716
JQ
212 if (lseek(p_fd, 0x0, SEEK_SET) == -1) {
213 ret = -errno;
5f4da8c0 214 goto fail;
53c2e716
JQ
215 }
216 if (read(p_fd, hdr, HEADER_SIZE) != HEADER_SIZE) {
217 ret = -errno;
5f4da8c0 218 goto fail;
53c2e716 219 }
5f4da8c0
TS
220
221 /* write the header */
53c2e716
JQ
222 if (lseek(snp_fd, 0x0, SEEK_SET) == -1) {
223 ret = -errno;
5f4da8c0 224 goto fail;
53c2e716
JQ
225 }
226 if (write(snp_fd, hdr, HEADER_SIZE) == -1) {
227 ret = -errno;
5f4da8c0 228 goto fail;
53c2e716 229 }
5f4da8c0
TS
230
231 memset(&header, 0, sizeof(header));
232 memcpy(&header,&hdr[4], sizeof(header)); // skip the VMDK4_MAGIC
233
53c2e716
JQ
234 if (ftruncate(snp_fd, header.grain_offset << 9)) {
235 ret = -errno;
1640366c 236 goto fail;
53c2e716 237 }
5f4da8c0 238 /* the descriptor offset = 0x200 */
53c2e716
JQ
239 if (lseek(p_fd, 0x200, SEEK_SET) == -1) {
240 ret = -errno;
5f4da8c0 241 goto fail;
53c2e716
JQ
242 }
243 if (read(p_fd, p_desc, DESC_SIZE) != DESC_SIZE) {
244 ret = -errno;
5f4da8c0 245 goto fail;
53c2e716 246 }
5f4da8c0 247
511d2b14 248 if ((p_name = strstr(p_desc,"CID")) != NULL) {
5f4da8c0
TS
249 p_name += sizeof("CID");
250 sscanf(p_name,"%x",&p_cid);
251 }
252
253 real_filename = filename;
254 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
255 real_filename = temp_str + 1;
256 if ((temp_str = strrchr(real_filename, '/')) != NULL)
257 real_filename = temp_str + 1;
258 if ((temp_str = strrchr(real_filename, ':')) != NULL)
259 real_filename = temp_str + 1;
260
363a37d5
BS
261 snprintf(s_desc, sizeof(s_desc), desc_template, p_cid, p_cid, backing_file,
262 (uint32_t)header.capacity, real_filename);
5f4da8c0
TS
263
264 /* write the descriptor */
53c2e716
JQ
265 if (lseek(snp_fd, 0x200, SEEK_SET) == -1) {
266 ret = -errno;
5f4da8c0 267 goto fail;
53c2e716
JQ
268 }
269 if (write(snp_fd, s_desc, strlen(s_desc)) == -1) {
270 ret = -errno;
5f4da8c0 271 goto fail;
53c2e716 272 }
ea2384d3 273
5f4da8c0
TS
274 gd_offset = header.gd_offset * SECTOR_SIZE; // offset of GD table
275 rgd_offset = header.rgd_offset * SECTOR_SIZE; // offset of RGD table
276 capacity = header.capacity * SECTOR_SIZE; // Extent size
277 /*
278 * Each GDE span 32M disk, means:
279 * 512 GTE per GT, each GTE points to grain
280 */
281 gt_size = (int64_t)header.num_gtes_per_gte * header.granularity * SECTOR_SIZE;
53c2e716
JQ
282 if (!gt_size) {
283 ret = -EINVAL;
5f4da8c0 284 goto fail;
53c2e716 285 }
5fafdf24 286 gde_entries = (uint32_t)(capacity / gt_size); // number of gde/rgde
5f4da8c0
TS
287 gd_size = gde_entries * sizeof(uint32_t);
288
289 /* write RGD */
290 rgd_buf = qemu_malloc(gd_size);
53c2e716
JQ
291 if (lseek(p_fd, rgd_offset, SEEK_SET) == -1) {
292 ret = -errno;
5f4da8c0 293 goto fail_rgd;
53c2e716
JQ
294 }
295 if (read(p_fd, rgd_buf, gd_size) != gd_size) {
296 ret = -errno;
5f4da8c0 297 goto fail_rgd;
53c2e716
JQ
298 }
299 if (lseek(snp_fd, rgd_offset, SEEK_SET) == -1) {
300 ret = -errno;
5f4da8c0 301 goto fail_rgd;
53c2e716
JQ
302 }
303 if (write(snp_fd, rgd_buf, gd_size) == -1) {
304 ret = -errno;
5f4da8c0 305 goto fail_rgd;
53c2e716 306 }
5f4da8c0
TS
307
308 /* write GD */
309 gd_buf = qemu_malloc(gd_size);
53c2e716
JQ
310 if (lseek(p_fd, gd_offset, SEEK_SET) == -1) {
311 ret = -errno;
5f4da8c0 312 goto fail_gd;
53c2e716
JQ
313 }
314 if (read(p_fd, gd_buf, gd_size) != gd_size) {
315 ret = -errno;
5f4da8c0 316 goto fail_gd;
53c2e716
JQ
317 }
318 if (lseek(snp_fd, gd_offset, SEEK_SET) == -1) {
319 ret = -errno;
5f4da8c0 320 goto fail_gd;
53c2e716
JQ
321 }
322 if (write(snp_fd, gd_buf, gd_size) == -1) {
323 ret = -errno;
5f4da8c0 324 goto fail_gd;
53c2e716 325 }
3829cb46 326 ret = 0;
5f4da8c0 327
3829cb46 328fail_gd:
5f4da8c0 329 qemu_free(gd_buf);
3829cb46 330fail_rgd:
5f4da8c0 331 qemu_free(rgd_buf);
3829cb46 332fail:
5f4da8c0
TS
333 close(p_fd);
334 close(snp_fd);
53c2e716 335 return ret;
5f4da8c0
TS
336}
337
9949f97e 338static int vmdk_parent_open(BlockDriverState *bs)
5f4da8c0 339{
5fafdf24 340 char *p_name;
5f4da8c0 341 char desc[DESC_SIZE];
5f4da8c0
TS
342
343 /* the descriptor offset = 0x200 */
6511ef77 344 if (bdrv_pread(bs->file, 0x200, desc, DESC_SIZE) != DESC_SIZE)
5f4da8c0
TS
345 return -1;
346
511d2b14 347 if ((p_name = strstr(desc,"parentFileNameHint")) != NULL) {
5f4da8c0 348 char *end_name;
5f4da8c0
TS
349
350 p_name += sizeof("parentFileNameHint") + 1;
511d2b14 351 if ((end_name = strchr(p_name,'\"')) == NULL)
5f4da8c0 352 return -1;
b171271a 353 if ((end_name - p_name) > sizeof (bs->backing_file) - 1)
b34d259a 354 return -1;
3b46e624 355
b171271a 356 pstrcpy(bs->backing_file, end_name - p_name + 1, p_name);
ff1afc72 357 }
5f4da8c0
TS
358
359 return 0;
360}
361
6511ef77 362static int vmdk_open(BlockDriverState *bs, int flags)
5f4da8c0
TS
363{
364 BDRVVmdkState *s = bs->opaque;
365 uint32_t magic;
6511ef77 366 int l1_size, i;
5f4da8c0 367
6511ef77 368 if (bdrv_pread(bs->file, 0, &magic, sizeof(magic)) != sizeof(magic))
ea2384d3 369 goto fail;
5f4da8c0 370
7143c62c 371 magic = be32_to_cpu(magic);
ea2384d3
FB
372 if (magic == VMDK3_MAGIC) {
373 VMDK3Header header;
5f4da8c0 374
6511ef77 375 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
ea2384d3
FB
376 goto fail;
377 s->cluster_sectors = le32_to_cpu(header.granularity);
378 s->l2_size = 1 << 9;
379 s->l1_size = 1 << 6;
380 bs->total_sectors = le32_to_cpu(header.disk_sectors);
ff1afc72
FB
381 s->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
382 s->l1_backup_table_offset = 0;
ea2384d3
FB
383 s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
384 } else if (magic == VMDK4_MAGIC) {
385 VMDK4Header header;
5f4da8c0 386
6511ef77 387 if (bdrv_pread(bs->file, sizeof(magic), &header, sizeof(header)) != sizeof(header))
ea2384d3 388 goto fail;
bd6ea3c8
FB
389 bs->total_sectors = le64_to_cpu(header.capacity);
390 s->cluster_sectors = le64_to_cpu(header.granularity);
ea2384d3
FB
391 s->l2_size = le32_to_cpu(header.num_gtes_per_gte);
392 s->l1_entry_sectors = s->l2_size * s->cluster_sectors;
393 if (s->l1_entry_sectors <= 0)
394 goto fail;
5fafdf24 395 s->l1_size = (bs->total_sectors + s->l1_entry_sectors - 1)
ea2384d3 396 / s->l1_entry_sectors;
ff1afc72
FB
397 s->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
398 s->l1_backup_table_offset = le64_to_cpu(header.gd_offset) << 9;
5f4da8c0
TS
399
400 // try to open parent images, if exist
9949f97e 401 if (vmdk_parent_open(bs) != 0)
5f4da8c0
TS
402 goto fail;
403 // write the CID once after the image creation
404 s->parent_cid = vmdk_read_cid(bs,1);
ea2384d3
FB
405 } else {
406 goto fail;
407 }
5f4da8c0 408
ea2384d3
FB
409 /* read the L1 table */
410 l1_size = s->l1_size * sizeof(uint32_t);
411 s->l1_table = qemu_malloc(l1_size);
6511ef77 412 if (bdrv_pread(bs->file, s->l1_table_offset, s->l1_table, l1_size) != l1_size)
ea2384d3
FB
413 goto fail;
414 for(i = 0; i < s->l1_size; i++) {
415 le32_to_cpus(&s->l1_table[i]);
416 }
417
ff1afc72
FB
418 if (s->l1_backup_table_offset) {
419 s->l1_backup_table = qemu_malloc(l1_size);
6511ef77 420 if (bdrv_pread(bs->file, s->l1_backup_table_offset, s->l1_backup_table, l1_size) != l1_size)
ff1afc72
FB
421 goto fail;
422 for(i = 0; i < s->l1_size; i++) {
423 le32_to_cpus(&s->l1_backup_table[i]);
424 }
425 }
426
ea2384d3 427 s->l2_cache = qemu_malloc(s->l2_size * L2_CACHE_SIZE * sizeof(uint32_t));
ea2384d3
FB
428 return 0;
429 fail:
ff1afc72 430 qemu_free(s->l1_backup_table);
ea2384d3
FB
431 qemu_free(s->l1_table);
432 qemu_free(s->l2_cache);
ea2384d3
FB
433 return -1;
434}
435
630530a6
TS
436static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
437 uint64_t offset, int allocate);
5f4da8c0
TS
438
439static int get_whole_cluster(BlockDriverState *bs, uint64_t cluster_offset,
440 uint64_t offset, int allocate)
441{
5f4da8c0
TS
442 BDRVVmdkState *s = bs->opaque;
443 uint8_t whole_grain[s->cluster_sectors*512]; // 128 sectors * 512 bytes each = grain size 64KB
444
445 // we will be here if it's first write on non-exist grain(cluster).
446 // try to read from parent image, if exist
b171271a 447 if (bs->backing_hd) {
c336500d 448 int ret;
5f4da8c0
TS
449
450 if (!vmdk_is_cid_valid(bs))
451 return -1;
5f4da8c0 452
c336500d
KW
453 ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
454 s->cluster_sectors);
455 if (ret < 0) {
456 return -1;
457 }
630530a6 458
c336500d 459 //Write grain only into the active image
6511ef77 460 ret = bdrv_write(bs->file, cluster_offset, whole_grain,
c336500d
KW
461 s->cluster_sectors);
462 if (ret < 0) {
463 return -1;
630530a6
TS
464 }
465 }
466 return 0;
467}
468
469static int vmdk_L2update(BlockDriverState *bs, VmdkMetaData *m_data)
470{
471 BDRVVmdkState *s = bs->opaque;
472
473 /* update L2 table */
b8852e87
KW
474 if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
475 &(m_data->offset), sizeof(m_data->offset)) < 0)
630530a6
TS
476 return -1;
477 /* update backup L2 table */
478 if (s->l1_backup_table_offset != 0) {
479 m_data->l2_offset = s->l1_backup_table[m_data->l1_index];
b8852e87
KW
480 if (bdrv_pwrite_sync(bs->file, ((int64_t)m_data->l2_offset * 512) + (m_data->l2_index * sizeof(m_data->offset)),
481 &(m_data->offset), sizeof(m_data->offset)) < 0)
5f4da8c0
TS
482 return -1;
483 }
630530a6 484
5f4da8c0
TS
485 return 0;
486}
487
630530a6 488static uint64_t get_cluster_offset(BlockDriverState *bs, VmdkMetaData *m_data,
ff1afc72 489 uint64_t offset, int allocate)
ea2384d3
FB
490{
491 BDRVVmdkState *s = bs->opaque;
492 unsigned int l1_index, l2_offset, l2_index;
493 int min_index, i, j;
630530a6 494 uint32_t min_count, *l2_table, tmp = 0;
ea2384d3 495 uint64_t cluster_offset;
630530a6
TS
496
497 if (m_data)
498 m_data->valid = 0;
499
ea2384d3
FB
500 l1_index = (offset >> 9) / s->l1_entry_sectors;
501 if (l1_index >= s->l1_size)
502 return 0;
503 l2_offset = s->l1_table[l1_index];
504 if (!l2_offset)
505 return 0;
ea2384d3
FB
506 for(i = 0; i < L2_CACHE_SIZE; i++) {
507 if (l2_offset == s->l2_cache_offsets[i]) {
508 /* increment the hit count */
509 if (++s->l2_cache_counts[i] == 0xffffffff) {
510 for(j = 0; j < L2_CACHE_SIZE; j++) {
511 s->l2_cache_counts[j] >>= 1;
512 }
513 }
514 l2_table = s->l2_cache + (i * s->l2_size);
515 goto found;
516 }
517 }
518 /* not found: load a new entry in the least used one */
519 min_index = 0;
520 min_count = 0xffffffff;
521 for(i = 0; i < L2_CACHE_SIZE; i++) {
522 if (s->l2_cache_counts[i] < min_count) {
523 min_count = s->l2_cache_counts[i];
524 min_index = i;
525 }
526 }
527 l2_table = s->l2_cache + (min_index * s->l2_size);
6511ef77 528 if (bdrv_pread(bs->file, (int64_t)l2_offset * 512, l2_table, s->l2_size * sizeof(uint32_t)) !=
5f4da8c0 529 s->l2_size * sizeof(uint32_t))
ea2384d3 530 return 0;
5f4da8c0 531
ea2384d3
FB
532 s->l2_cache_offsets[min_index] = l2_offset;
533 s->l2_cache_counts[min_index] = 1;
534 found:
535 l2_index = ((offset >> 9) / s->cluster_sectors) % s->l2_size;
536 cluster_offset = le32_to_cpu(l2_table[l2_index]);
630530a6 537
ff1afc72
FB
538 if (!cluster_offset) {
539 if (!allocate)
540 return 0;
9949f97e 541
630530a6 542 // Avoid the L2 tables update for the images that have snapshots.
6511ef77
KW
543 cluster_offset = bdrv_getlength(bs->file);
544 bdrv_truncate(bs->file, cluster_offset + (s->cluster_sectors << 9));
9949f97e
KW
545
546 cluster_offset >>= 9;
547 tmp = cpu_to_le32(cluster_offset);
548 l2_table[l2_index] = tmp;
630530a6 549
630530a6
TS
550 /* First of all we write grain itself, to avoid race condition
551 * that may to corrupt the image.
552 * This problem may occur because of insufficient space on host disk
553 * or inappropriate VM shutdown.
554 */
5f4da8c0
TS
555 if (get_whole_cluster(bs, cluster_offset, offset, allocate) == -1)
556 return 0;
630530a6
TS
557
558 if (m_data) {
559 m_data->offset = tmp;
560 m_data->l1_index = l1_index;
561 m_data->l2_index = l2_index;
562 m_data->l2_offset = l2_offset;
563 m_data->valid = 1;
564 }
ff1afc72 565 }
ea2384d3
FB
566 cluster_offset <<= 9;
567 return cluster_offset;
568}
569
5fafdf24 570static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
571 int nb_sectors, int *pnum)
572{
573 BDRVVmdkState *s = bs->opaque;
574 int index_in_cluster, n;
575 uint64_t cluster_offset;
576
630530a6 577 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
ea2384d3
FB
578 index_in_cluster = sector_num % s->cluster_sectors;
579 n = s->cluster_sectors - index_in_cluster;
580 if (n > nb_sectors)
581 n = nb_sectors;
582 *pnum = n;
583 return (cluster_offset != 0);
584}
585
5fafdf24 586static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
587 uint8_t *buf, int nb_sectors)
588{
589 BDRVVmdkState *s = bs->opaque;
5f4da8c0 590 int index_in_cluster, n, ret;
ea2384d3 591 uint64_t cluster_offset;
5f4da8c0 592
ea2384d3 593 while (nb_sectors > 0) {
630530a6 594 cluster_offset = get_cluster_offset(bs, NULL, sector_num << 9, 0);
ea2384d3
FB
595 index_in_cluster = sector_num % s->cluster_sectors;
596 n = s->cluster_sectors - index_in_cluster;
597 if (n > nb_sectors)
598 n = nb_sectors;
599 if (!cluster_offset) {
5f4da8c0 600 // try to read from parent image, if exist
b171271a 601 if (bs->backing_hd) {
5f4da8c0
TS
602 if (!vmdk_is_cid_valid(bs))
603 return -1;
b171271a 604 ret = bdrv_read(bs->backing_hd, sector_num, buf, n);
5f4da8c0
TS
605 if (ret < 0)
606 return -1;
607 } else {
608 memset(buf, 0, 512 * n);
609 }
ea2384d3 610 } else {
6511ef77 611 if(bdrv_pread(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
ea2384d3
FB
612 return -1;
613 }
614 nb_sectors -= n;
615 sector_num += n;
616 buf += n * 512;
617 }
618 return 0;
619}
620
5fafdf24 621static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
ea2384d3
FB
622 const uint8_t *buf, int nb_sectors)
623{
ff1afc72 624 BDRVVmdkState *s = bs->opaque;
630530a6 625 VmdkMetaData m_data;
5f4da8c0 626 int index_in_cluster, n;
ff1afc72 627 uint64_t cluster_offset;
5f4da8c0 628 static int cid_update = 0;
ff1afc72 629
630530a6
TS
630 if (sector_num > bs->total_sectors) {
631 fprintf(stderr,
92868412
JM
632 "(VMDK) Wrong offset: sector_num=0x%" PRIx64
633 " total_sectors=0x%" PRIx64 "\n",
630530a6
TS
634 sector_num, bs->total_sectors);
635 return -1;
636 }
637
ff1afc72
FB
638 while (nb_sectors > 0) {
639 index_in_cluster = sector_num & (s->cluster_sectors - 1);
640 n = s->cluster_sectors - index_in_cluster;
641 if (n > nb_sectors)
642 n = nb_sectors;
630530a6 643 cluster_offset = get_cluster_offset(bs, &m_data, sector_num << 9, 1);
ff1afc72
FB
644 if (!cluster_offset)
645 return -1;
630530a6 646
6511ef77 647 if (bdrv_pwrite(bs->file, cluster_offset + index_in_cluster * 512, buf, n * 512) != n * 512)
ff1afc72 648 return -1;
630530a6
TS
649 if (m_data.valid) {
650 /* update L2 tables */
651 if (vmdk_L2update(bs, &m_data) == -1)
652 return -1;
653 }
ff1afc72
FB
654 nb_sectors -= n;
655 sector_num += n;
656 buf += n * 512;
5f4da8c0
TS
657
658 // update CID on the first write every time the virtual disk is opened
659 if (!cid_update) {
660 vmdk_write_cid(bs, time(NULL));
661 cid_update++;
662 }
ff1afc72
FB
663 }
664 return 0;
ea2384d3
FB
665}
666
0e7e1989 667static int vmdk_create(const char *filename, QEMUOptionParameter *options)
8979b227
FB
668{
669 int fd, i;
670 VMDK4Header header;
671 uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
7ccfb2eb 672 static const char desc_template[] =
8979b227
FB
673 "# Disk DescriptorFile\n"
674 "version=1\n"
675 "CID=%x\n"
676 "parentCID=ffffffff\n"
677 "createType=\"monolithicSparse\"\n"
678 "\n"
679 "# Extent description\n"
7fd6d9fc 680 "RW %" PRId64 " SPARSE \"%s\"\n"
8979b227
FB
681 "\n"
682 "# The Disk Data Base \n"
683 "#DDB\n"
684 "\n"
ec36ba14 685 "ddb.virtualHWVersion = \"%d\"\n"
7fd6d9fc 686 "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
8979b227
FB
687 "ddb.geometry.heads = \"16\"\n"
688 "ddb.geometry.sectors = \"63\"\n"
689 "ddb.adapterType = \"ide\"\n";
690 char desc[1024];
691 const char *real_filename, *temp_str;
0e7e1989
KW
692 int64_t total_size = 0;
693 const char *backing_file = NULL;
694 int flags = 0;
1640366c 695 int ret;
0e7e1989
KW
696
697 // Read out options
698 while (options && options->name) {
699 if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
700 total_size = options->value.n / 512;
701 } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
702 backing_file = options->value.s;
703 } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
704 flags |= options->value.n ? BLOCK_FLAG_COMPAT6: 0;
705 }
706 options++;
707 }
8979b227
FB
708
709 /* XXX: add support for backing file */
5f4da8c0
TS
710 if (backing_file) {
711 return vmdk_snapshot_create(filename, backing_file);
712 }
8979b227
FB
713
714 fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
715 0644);
716 if (fd < 0)
b781cce5 717 return -errno;
8979b227
FB
718 magic = cpu_to_be32(VMDK4_MAGIC);
719 memset(&header, 0, sizeof(header));
720 header.version = cpu_to_le32(1);
721 header.flags = cpu_to_le32(3); /* ?? */
722 header.capacity = cpu_to_le64(total_size);
723 header.granularity = cpu_to_le64(128);
724 header.num_gtes_per_gte = cpu_to_le32(512);
725
726 grains = (total_size + header.granularity - 1) / header.granularity;
727 gt_size = ((header.num_gtes_per_gte * sizeof(uint32_t)) + 511) >> 9;
728 gt_count = (grains + header.num_gtes_per_gte - 1) / header.num_gtes_per_gte;
729 gd_size = (gt_count * sizeof(uint32_t) + 511) >> 9;
730
731 header.desc_offset = 1;
732 header.desc_size = 20;
733 header.rgd_offset = header.desc_offset + header.desc_size;
734 header.gd_offset = header.rgd_offset + gd_size + (gt_size * gt_count);
735 header.grain_offset =
736 ((header.gd_offset + gd_size + (gt_size * gt_count) +
737 header.granularity - 1) / header.granularity) *
738 header.granularity;
739
740 header.desc_offset = cpu_to_le64(header.desc_offset);
741 header.desc_size = cpu_to_le64(header.desc_size);
742 header.rgd_offset = cpu_to_le64(header.rgd_offset);
743 header.gd_offset = cpu_to_le64(header.gd_offset);
744 header.grain_offset = cpu_to_le64(header.grain_offset);
745
746 header.check_bytes[0] = 0xa;
747 header.check_bytes[1] = 0x20;
748 header.check_bytes[2] = 0xd;
749 header.check_bytes[3] = 0xa;
3b46e624
TS
750
751 /* write all the data */
1640366c
KS
752 ret = qemu_write_full(fd, &magic, sizeof(magic));
753 if (ret != sizeof(magic)) {
b781cce5 754 ret = -errno;
1640366c
KS
755 goto exit;
756 }
757 ret = qemu_write_full(fd, &header, sizeof(header));
758 if (ret != sizeof(header)) {
b781cce5 759 ret = -errno;
1640366c
KS
760 goto exit;
761 }
8979b227 762
1640366c
KS
763 ret = ftruncate(fd, header.grain_offset << 9);
764 if (ret < 0) {
b781cce5 765 ret = -errno;
1640366c
KS
766 goto exit;
767 }
8979b227
FB
768
769 /* write grain directory */
770 lseek(fd, le64_to_cpu(header.rgd_offset) << 9, SEEK_SET);
771 for (i = 0, tmp = header.rgd_offset + gd_size;
1640366c
KS
772 i < gt_count; i++, tmp += gt_size) {
773 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
774 if (ret != sizeof(tmp)) {
b781cce5 775 ret = -errno;
1640366c
KS
776 goto exit;
777 }
778 }
3b46e624 779
8979b227
FB
780 /* write backup grain directory */
781 lseek(fd, le64_to_cpu(header.gd_offset) << 9, SEEK_SET);
782 for (i = 0, tmp = header.gd_offset + gd_size;
1640366c
KS
783 i < gt_count; i++, tmp += gt_size) {
784 ret = qemu_write_full(fd, &tmp, sizeof(tmp));
785 if (ret != sizeof(tmp)) {
b781cce5 786 ret = -errno;
1640366c
KS
787 goto exit;
788 }
789 }
8979b227
FB
790
791 /* compose the descriptor */
792 real_filename = filename;
793 if ((temp_str = strrchr(real_filename, '\\')) != NULL)
794 real_filename = temp_str + 1;
795 if ((temp_str = strrchr(real_filename, '/')) != NULL)
796 real_filename = temp_str + 1;
797 if ((temp_str = strrchr(real_filename, ':')) != NULL)
798 real_filename = temp_str + 1;
7ccfb2eb 799 snprintf(desc, sizeof(desc), desc_template, (unsigned int)time(NULL),
7fd6d9fc
BS
800 total_size, real_filename,
801 (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
802 total_size / (int64_t)(63 * 16));
8979b227
FB
803
804 /* write the descriptor */
805 lseek(fd, le64_to_cpu(header.desc_offset) << 9, SEEK_SET);
1640366c
KS
806 ret = qemu_write_full(fd, desc, strlen(desc));
807 if (ret != strlen(desc)) {
b781cce5 808 ret = -errno;
1640366c
KS
809 goto exit;
810 }
8979b227 811
1640366c
KS
812 ret = 0;
813exit:
8979b227 814 close(fd);
1640366c 815 return ret;
8979b227
FB
816}
817
e2731add 818static void vmdk_close(BlockDriverState *bs)
ea2384d3
FB
819{
820 BDRVVmdkState *s = bs->opaque;
5f4da8c0 821
ea2384d3
FB
822 qemu_free(s->l1_table);
823 qemu_free(s->l2_cache);
ea2384d3
FB
824}
825
7a6cba61
PB
826static void vmdk_flush(BlockDriverState *bs)
827{
6511ef77 828 bdrv_flush(bs->file);
7a6cba61
PB
829}
830
0e7e1989
KW
831
832static QEMUOptionParameter vmdk_create_options[] = {
db08adf5
KW
833 {
834 .name = BLOCK_OPT_SIZE,
835 .type = OPT_SIZE,
836 .help = "Virtual disk size"
837 },
838 {
839 .name = BLOCK_OPT_BACKING_FILE,
840 .type = OPT_STRING,
841 .help = "File name of a base image"
842 },
843 {
844 .name = BLOCK_OPT_COMPAT6,
845 .type = OPT_FLAG,
846 .help = "VMDK version 6 image"
847 },
0e7e1989
KW
848 { NULL }
849};
850
5efa9d5a 851static BlockDriver bdrv_vmdk = {
e60f469c
AJ
852 .format_name = "vmdk",
853 .instance_size = sizeof(BDRVVmdkState),
854 .bdrv_probe = vmdk_probe,
6511ef77 855 .bdrv_open = vmdk_open,
e60f469c
AJ
856 .bdrv_read = vmdk_read,
857 .bdrv_write = vmdk_write,
858 .bdrv_close = vmdk_close,
859 .bdrv_create = vmdk_create,
860 .bdrv_flush = vmdk_flush,
861 .bdrv_is_allocated = vmdk_is_allocated,
0e7e1989
KW
862
863 .create_options = vmdk_create_options,
ea2384d3 864};
5efa9d5a
AL
865
866static void bdrv_vmdk_init(void)
867{
868 bdrv_register(&bdrv_vmdk);
869}
870
871block_init(bdrv_vmdk_init);