]> git.proxmox.com Git - mirror_qemu.git/blobdiff - block/vmdk.c
remove double semicolons
[mirror_qemu.git] / block / vmdk.c
index 3b376ed64843bdb70195dbaef3cdf1420053c410..608daaf93093ffbb6e99006613dde9fbf48e20f2 100644 (file)
  */
 
 #include "qemu-common.h"
-#include "block_int.h"
-#include "module.h"
-#include "zlib.h"
+#include "block/block_int.h"
+#include "qemu/module.h"
+#include "migration/migration.h"
+#include <zlib.h>
 
 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
 #define VMDK4_COMPRESSION_DEFLATE 1
+#define VMDK4_FLAG_NL_DETECT (1 << 0)
 #define VMDK4_FLAG_RGD (1 << 1)
+/* Zeroed-grain enable bit */
+#define VMDK4_FLAG_ZERO_GRAIN   (1 << 2)
 #define VMDK4_FLAG_COMPRESS (1 << 16)
 #define VMDK4_FLAG_MARKER (1 << 17)
+#define VMDK4_GD_AT_END 0xffffffffffffffffULL
+
+#define VMDK_GTE_ZEROED 0x1
+
+/* VMDK internal error codes */
+#define VMDK_OK      0
+#define VMDK_ERROR   (-1)
+/* Cluster not allocated */
+#define VMDK_UNALLOC (-2)
+#define VMDK_ZEROED  (-3)
+
+#define BLOCK_OPT_ZEROED_GRAIN "zeroed_grain"
 
 typedef struct {
     uint32_t version;
@@ -56,8 +72,8 @@ typedef struct {
     int64_t desc_offset;
     int64_t desc_size;
     int32_t num_gtes_per_gte;
-    int64_t gd_offset;
     int64_t rgd_offset;
+    int64_t gd_offset;
     int64_t grain_offset;
     char filler[1];
     char check_bytes[4];
@@ -71,6 +87,8 @@ typedef struct VmdkExtent {
     bool flat;
     bool compressed;
     bool has_marker;
+    bool has_zero_grain;
+    int version;
     int64_t sectors;
     int64_t end_sector;
     int64_t flat_start_offset;
@@ -97,6 +115,7 @@ typedef struct BDRVVmdkState {
     int num_extents;
     /* Extent array with num_extents entries, ascend ordered by address */
     VmdkExtent *extents;
+    Error *migration_blocker;
 } BDRVVmdkState;
 
 typedef struct VmdkMetaData {
@@ -105,6 +124,7 @@ typedef struct VmdkMetaData {
     unsigned int l2_index;
     unsigned int l2_offset;
     int valid;
+    uint32_t *l2_cache_entry;
 } VmdkMetaData;
 
 typedef struct VmdkGrainMarker {
@@ -113,6 +133,13 @@ typedef struct VmdkGrainMarker {
     uint8_t  data[0];
 } VmdkGrainMarker;
 
+enum {
+    MARKER_END_OF_STREAM    = 0,
+    MARKER_GRAIN_TABLE      = 1,
+    MARKER_GRAIN_DIRECTORY  = 2,
+    MARKER_FOOTER           = 3,
+};
+
 static int vmdk_probe(const uint8_t *buf, int buf_size, const char *filename)
 {
     uint32_t magic;
@@ -208,12 +235,14 @@ static void vmdk_free_last_extent(BlockDriverState *bs)
 static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
 {
     char desc[DESC_SIZE];
-    uint32_t cid;
+    uint32_t cid = 0xffffffff;
     const char *p_name, *cid_str;
     size_t cid_str_size;
     BDRVVmdkState *s = bs->opaque;
+    int ret;
 
-    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
+    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    if (ret < 0) {
         return 0;
     }
 
@@ -225,6 +254,7 @@ static uint32_t vmdk_read_cid(BlockDriverState *bs, int parent)
         cid_str_size = sizeof("CID");
     }
 
+    desc[DESC_SIZE - 1] = '\0';
     p_name = strstr(desc, cid_str);
     if (p_name != NULL) {
         p_name += cid_str_size;
@@ -239,13 +269,19 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
     char desc[DESC_SIZE], tmp_desc[DESC_SIZE];
     char *p_name, *tmp_str;
     BDRVVmdkState *s = bs->opaque;
+    int ret;
 
-    memset(desc, 0, sizeof(desc));
-    if (bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE) != DESC_SIZE) {
-        return -EIO;
+    ret = bdrv_pread(bs->file, s->desc_offset, desc, DESC_SIZE);
+    if (ret < 0) {
+        return ret;
     }
 
+    desc[DESC_SIZE - 1] = '\0';
     tmp_str = strstr(desc, "parentCID");
+    if (tmp_str == NULL) {
+        return -EINVAL;
+    }
+
     pstrcpy(tmp_desc, sizeof(tmp_desc), tmp_str);
     p_name = strstr(desc, "CID");
     if (p_name != NULL) {
@@ -254,9 +290,11 @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
         pstrcat(desc, sizeof(desc), tmp_desc);
     }
 
-    if (bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE) < 0) {
-        return -EIO;
+    ret = bdrv_pwrite_sync(bs->file, s->desc_offset, desc, DESC_SIZE);
+    if (ret < 0) {
+        return ret;
     }
+
     return 0;
 }
 
@@ -279,6 +317,40 @@ static int vmdk_is_cid_valid(BlockDriverState *bs)
     return 1;
 }
 
+/* Queue extents, if any, for reopen() */
+static int vmdk_reopen_prepare(BDRVReopenState *state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    BDRVVmdkState *s;
+    int ret = -1;
+    int i;
+    VmdkExtent *e;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    if (queue == NULL) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                 "No reopen queue for VMDK extents");
+        goto exit;
+    }
+
+    s = state->bs->opaque;
+
+    assert(s != NULL);
+
+    for (i = 0; i < s->num_extents; i++) {
+        e = &s->extents[i];
+        if (e->file != state->bs->file) {
+            bdrv_reopen_queue(queue, e->file, state->flags);
+        }
+    }
+    ret = 0;
+
+exit:
+    return ret;
+}
+
 static int vmdk_parent_open(BlockDriverState *bs)
 {
     char *p_name;
@@ -438,9 +510,57 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     if (header.capacity == 0 && header.desc_offset) {
         return vmdk_open_desc_file(bs, flags, header.desc_offset << 9);
     }
+
+    if (le64_to_cpu(header.gd_offset) == VMDK4_GD_AT_END) {
+        /*
+         * The footer takes precedence over the header, so read it in. The
+         * footer starts at offset -1024 from the end: One sector for the
+         * footer, and another one for the end-of-stream marker.
+         */
+        struct {
+            struct {
+                uint64_t val;
+                uint32_t size;
+                uint32_t type;
+                uint8_t pad[512 - 16];
+            } QEMU_PACKED footer_marker;
+
+            uint32_t magic;
+            VMDK4Header header;
+            uint8_t pad[512 - 4 - sizeof(VMDK4Header)];
+
+            struct {
+                uint64_t val;
+                uint32_t size;
+                uint32_t type;
+                uint8_t pad[512 - 16];
+            } QEMU_PACKED eos_marker;
+        } QEMU_PACKED footer;
+
+        ret = bdrv_pread(file,
+            bs->file->total_sectors * 512 - 1536,
+            &footer, sizeof(footer));
+        if (ret < 0) {
+            return ret;
+        }
+
+        /* Some sanity checks for the footer */
+        if (be32_to_cpu(footer.magic) != VMDK4_MAGIC ||
+            le32_to_cpu(footer.footer_marker.size) != 0  ||
+            le32_to_cpu(footer.footer_marker.type) != MARKER_FOOTER ||
+            le64_to_cpu(footer.eos_marker.val) != 0  ||
+            le32_to_cpu(footer.eos_marker.size) != 0  ||
+            le32_to_cpu(footer.eos_marker.type) != MARKER_END_OF_STREAM)
+        {
+            return -EINVAL;
+        }
+
+        header = footer.header;
+    }
+
     l1_entry_sectors = le32_to_cpu(header.num_gtes_per_gte)
                         * le64_to_cpu(header.granularity);
-    if (l1_entry_sectors <= 0) {
+    if (l1_entry_sectors == 0) {
         return -EINVAL;
     }
     l1_size = (le64_to_cpu(header.capacity) + l1_entry_sectors - 1)
@@ -458,6 +578,8 @@ static int vmdk_open_vmdk4(BlockDriverState *bs,
     extent->compressed =
         le16_to_cpu(header.compressAlgorithm) == VMDK4_COMPRESSION_DEFLATE;
     extent->has_marker = le32_to_cpu(header.flags) & VMDK4_FLAG_MARKER;
+    extent->version = le32_to_cpu(header.version);
+    extent->has_zero_grain = le32_to_cpu(header.flags) & VMDK4_FLAG_ZERO_GRAIN;
     ret = vmdk_init_tables(bs, extent);
     if (ret) {
         /* free extent allocated by vmdk_add_extent */
@@ -475,22 +597,22 @@ static int vmdk_parse_description(const char *desc, const char *opt_name,
 
     opt_pos = strstr(desc, opt_name);
     if (!opt_pos) {
-        return -1;
+        return VMDK_ERROR;
     }
     /* Skip "=\"" following opt_name */
     opt_pos += strlen(opt_name) + 2;
     if (opt_pos >= end) {
-        return -1;
+        return VMDK_ERROR;
     }
     opt_end = opt_pos;
     while (opt_end < end && *opt_end != '"') {
         opt_end++;
     }
     if (opt_end == end || buf_size < opt_end - opt_pos + 1) {
-        return -1;
+        return VMDK_ERROR;
     }
     pstrcpy(buf, opt_end - opt_pos + 1, opt_pos);
-    return 0;
+    return VMDK_OK;
 }
 
 /* Open an extent file and append to bs array */
@@ -513,7 +635,7 @@ static int vmdk_open_sparse(BlockDriverState *bs,
             return vmdk_open_vmdk4(bs, file, flags);
             break;
         default:
-            return -EINVAL;
+            return -EMEDIUMTYPE;
             break;
     }
 }
@@ -538,7 +660,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
          * RW [size in sectors] SPARSE "file-name.vmdk"
          */
         flat_offset = -1;
-        ret = sscanf(p, "%10s %" SCNd64 " %10s %511s %" SCNd64,
+        ret = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
                 access, &sectors, type, fname, &flat_offset);
         if (ret < 4 || strcmp(access, "RW")) {
             goto next_line;
@@ -550,14 +672,6 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
             return -EINVAL;
         }
 
-        /* trim the quotation marks around */
-        if (fname[0] == '"') {
-            memmove(fname, fname + 1, strlen(fname));
-            if (strlen(fname) <= 1 || fname[strlen(fname) - 1] != '"') {
-                return -EINVAL;
-            }
-            fname[strlen(fname) - 1] = '\0';
-        }
         if (sectors <= 0 ||
             (strcmp(type, "FLAT") && strcmp(type, "SPARSE")) ||
             (strcmp(access, "RW"))) {
@@ -566,7 +680,7 @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
 
         path_combine(extent_path, sizeof(extent_path),
                 desc_file_path, fname);
-        ret = bdrv_file_open(&extent_file, extent_path, bs->open_flags);
+        ret = bdrv_file_open(&extent_file, extent_path, NULL, bs->open_flags);
         if (ret) {
             return ret;
         }
@@ -615,7 +729,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
     }
     buf[2047] = '\0';
     if (vmdk_parse_description(buf, "createType", ct, sizeof(ct))) {
-        return -EINVAL;
+        return -EMEDIUMTYPE;
     }
     if (strcmp(ct, "monolithicFlat") &&
         strcmp(ct, "twoGbMaxExtentSparse") &&
@@ -628,7 +742,7 @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags,
     return vmdk_parse_extents(buf, bs, bs->file->filename);
 }
 
-static int vmdk_open(BlockDriverState *bs, int flags)
+static int vmdk_open(BlockDriverState *bs, QDict *options, int flags)
 {
     int ret;
     BDRVVmdkState *s = bs->opaque;
@@ -648,7 +762,14 @@ static int vmdk_open(BlockDriverState *bs, int flags)
     }
     s->parent_cid = vmdk_read_cid(bs, 1);
     qemu_co_mutex_init(&s->lock);
-    return ret;
+
+    /* Disable migration when VMDK images are used */
+    error_set(&s->migration_blocker,
+              QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+              "vmdk", bs->device_name, "live migration");
+    migrate_add_blocker(s->migration_blocker);
+
+    return 0;
 
 fail:
     vmdk_free_extents(bs);
@@ -670,7 +791,7 @@ static int get_whole_cluster(BlockDriverState *bs,
         int ret;
 
         if (!vmdk_is_cid_valid(bs)) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         /* floor offset to cluster */
@@ -678,30 +799,31 @@ static int get_whole_cluster(BlockDriverState *bs,
         ret = bdrv_read(bs->backing_hd, offset >> 9, whole_grain,
                 extent->cluster_sectors);
         if (ret < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         /* Write grain only into the active image */
         ret = bdrv_write(extent->file, cluster_offset, whole_grain,
                 extent->cluster_sectors);
         if (ret < 0) {
-            return -1;
+            return VMDK_ERROR;
         }
     }
-    return 0;
+    return VMDK_OK;
 }
 
 static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
 {
+    uint32_t offset;
+    QEMU_BUILD_BUG_ON(sizeof(offset) != sizeof(m_data->offset));
+    offset = cpu_to_le32(m_data->offset);
     /* update L2 table */
     if (bdrv_pwrite_sync(
                 extent->file,
                 ((int64_t)m_data->l2_offset * 512)
                     + (m_data->l2_index * sizeof(m_data->offset)),
-                &(m_data->offset),
-                sizeof(m_data->offset)
-            ) < 0) {
-        return -1;
+                &offset, sizeof(offset)) < 0) {
+        return VMDK_ERROR;
     }
     /* update backup L2 table */
     if (extent->l1_backup_table_offset != 0) {
@@ -710,13 +832,15 @@ static int vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data)
                     extent->file,
                     ((int64_t)m_data->l2_offset * 512)
                         + (m_data->l2_index * sizeof(m_data->offset)),
-                    &(m_data->offset), sizeof(m_data->offset)
-                ) < 0) {
-            return -1;
+                    &offset, sizeof(offset)) < 0) {
+            return VMDK_ERROR;
         }
     }
+    if (m_data->l2_cache_entry) {
+        *m_data->l2_cache_entry = offset;
+    }
 
-    return 0;
+    return VMDK_OK;
 }
 
 static int get_cluster_offset(BlockDriverState *bs,
@@ -728,24 +852,25 @@ static int get_cluster_offset(BlockDriverState *bs,
 {
     unsigned int l1_index, l2_offset, l2_index;
     int min_index, i, j;
-    uint32_t min_count, *l2_table, tmp = 0;
+    uint32_t min_count, *l2_table;
+    bool zeroed = false;
 
     if (m_data) {
         m_data->valid = 0;
     }
     if (extent->flat) {
         *cluster_offset = extent->flat_start_offset;
-        return 0;
+        return VMDK_OK;
     }
 
     offset -= (extent->end_sector - extent->sectors) * SECTOR_SIZE;
     l1_index = (offset >> 9) / extent->l1_entry_sectors;
     if (l1_index >= extent->l1_size) {
-        return -1;
+        return VMDK_ERROR;
     }
     l2_offset = extent->l1_table[l1_index];
     if (!l2_offset) {
-        return -1;
+        return VMDK_UNALLOC;
     }
     for (i = 0; i < L2_CACHE_SIZE; i++) {
         if (l2_offset == extent->l2_cache_offsets[i]) {
@@ -775,7 +900,7 @@ static int get_cluster_offset(BlockDriverState *bs,
                 l2_table,
                 extent->l2_size * sizeof(uint32_t)
             ) != extent->l2_size * sizeof(uint32_t)) {
-        return -1;
+        return VMDK_ERROR;
     }
 
     extent->l2_cache_offsets[min_index] = l2_offset;
@@ -784,9 +909,21 @@ static int get_cluster_offset(BlockDriverState *bs,
     l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
     *cluster_offset = le32_to_cpu(l2_table[l2_index]);
 
-    if (!*cluster_offset) {
+    if (m_data) {
+        m_data->valid = 1;
+        m_data->l1_index = l1_index;
+        m_data->l2_index = l2_index;
+        m_data->offset = *cluster_offset;
+        m_data->l2_offset = l2_offset;
+        m_data->l2_cache_entry = &l2_table[l2_index];
+    }
+    if (extent->has_zero_grain && *cluster_offset == VMDK_GTE_ZEROED) {
+        zeroed = true;
+    }
+
+    if (!*cluster_offset || zeroed) {
         if (!allocate) {
-            return -1;
+            return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
         }
 
         /* Avoid the L2 tables update for the images that have snapshots. */
@@ -799,8 +936,7 @@ static int get_cluster_offset(BlockDriverState *bs,
         }
 
         *cluster_offset >>= 9;
-        tmp = cpu_to_le32(*cluster_offset);
-        l2_table[l2_index] = tmp;
+        l2_table[l2_index] = cpu_to_le32(*cluster_offset);
 
         /* First of all we write grain itself, to avoid race condition
          * that may to corrupt the image.
@@ -809,19 +945,15 @@ static int get_cluster_offset(BlockDriverState *bs,
          */
         if (get_whole_cluster(
                 bs, extent, *cluster_offset, offset, allocate) == -1) {
-            return -1;
+            return VMDK_ERROR;
         }
 
         if (m_data) {
-            m_data->offset = tmp;
-            m_data->l1_index = l1_index;
-            m_data->l2_index = l2_index;
-            m_data->l2_offset = l2_offset;
-            m_data->valid = 1;
+            m_data->offset = *cluster_offset;
         }
     }
     *cluster_offset <<= 9;
-    return 0;
+    return VMDK_OK;
 }
 
 static VmdkExtent *find_extent(BDRVVmdkState *s,
@@ -841,8 +973,8 @@ static VmdkExtent *find_extent(BDRVVmdkState *s,
     return NULL;
 }
 
-static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
-                             int nb_sectors, int *pnum)
+static int coroutine_fn vmdk_co_is_allocated(BlockDriverState *bs,
+        int64_t sector_num, int nb_sectors, int *pnum)
 {
     BDRVVmdkState *s = bs->opaque;
     int64_t index_in_cluster, n, ret;
@@ -853,10 +985,12 @@ static int vmdk_is_allocated(BlockDriverState *bs, int64_t sector_num,
     if (!extent) {
         return 0;
     }
+    qemu_co_mutex_lock(&s->lock);
     ret = get_cluster_offset(bs, extent, NULL,
                             sector_num * 512, 0, &offset);
-    /* get_cluster_offset returning 0 means success */
-    ret = !ret;
+    qemu_co_mutex_unlock(&s->lock);
+
+    ret = (ret == VMDK_OK || ret == VMDK_ZEROED);
 
     index_in_cluster = sector_num % extent->cluster_sectors;
     n = extent->cluster_sectors - index_in_cluster;
@@ -980,6 +1114,7 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
     BDRVVmdkState *s = bs->opaque;
     int ret;
     uint64_t n, index_in_cluster;
+    uint64_t extent_begin_sector, extent_relative_sector_num;
     VmdkExtent *extent = NULL;
     uint64_t cluster_offset;
 
@@ -991,14 +1126,16 @@ static int vmdk_read(BlockDriverState *bs, int64_t sector_num,
         ret = get_cluster_offset(
                             bs, extent, NULL,
                             sector_num << 9, 0, &cluster_offset);
-        index_in_cluster = sector_num % extent->cluster_sectors;
+        extent_begin_sector = extent->end_sector - extent->sectors;
+        extent_relative_sector_num = sector_num - extent_begin_sector;
+        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
         n = extent->cluster_sectors - index_in_cluster;
         if (n > nb_sectors) {
             n = nb_sectors;
         }
-        if (ret) {
+        if (ret != VMDK_OK) {
             /* if not allocated, try to read from parent image, if exist */
-            if (bs->backing_hd) {
+            if (bs->backing_hd && ret != VMDK_ZEROED) {
                 if (!vmdk_is_cid_valid(bs)) {
                     return -EINVAL;
                 }
@@ -1035,13 +1172,23 @@ static coroutine_fn int vmdk_co_read(BlockDriverState *bs, int64_t sector_num,
     return ret;
 }
 
+/**
+ * vmdk_write:
+ * @zeroed:       buf is ignored (data is zero), use zeroed_grain GTE feature
+ * if possible, otherwise return -ENOTSUP.
+ * @zero_dry_run: used for zeroed == true only, don't update L2 table, just
+ *
+ * Returns: error code with 0 for success.
+ */
 static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
-                     const uint8_t *buf, int nb_sectors)
+                      const uint8_t *buf, int nb_sectors,
+                      bool zeroed, bool zero_dry_run)
 {
     BDRVVmdkState *s = bs->opaque;
     VmdkExtent *extent = NULL;
     int n, ret;
     int64_t index_in_cluster;
+    uint64_t extent_begin_sector, extent_relative_sector_num;
     uint64_t cluster_offset;
     VmdkMetaData m_data;
 
@@ -1065,7 +1212,7 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                                 sector_num << 9, !extent->compressed,
                                 &cluster_offset);
         if (extent->compressed) {
-            if (ret == 0) {
+            if (ret == VMDK_OK) {
                 /* Refuse write to allocated cluster for streamOptimized */
                 fprintf(stderr,
                         "VMDK: can't write to allocated cluster"
@@ -1081,25 +1228,44 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
                                         &cluster_offset);
             }
         }
-        if (ret) {
+        if (ret == VMDK_ERROR) {
             return -EINVAL;
         }
-        index_in_cluster = sector_num % extent->cluster_sectors;
+        extent_begin_sector = extent->end_sector - extent->sectors;
+        extent_relative_sector_num = sector_num - extent_begin_sector;
+        index_in_cluster = extent_relative_sector_num % extent->cluster_sectors;
         n = extent->cluster_sectors - index_in_cluster;
         if (n > nb_sectors) {
             n = nb_sectors;
         }
-
-        ret = vmdk_write_extent(extent,
-                        cluster_offset, index_in_cluster * 512,
-                        buf, n, sector_num);
-        if (ret) {
-            return ret;
-        }
-        if (m_data.valid) {
-            /* update L2 tables */
-            if (vmdk_L2update(extent, &m_data) == -1) {
-                return -EIO;
+        if (zeroed) {
+            /* Do zeroed write, buf is ignored */
+            if (extent->has_zero_grain &&
+                    index_in_cluster == 0 &&
+                    n >= extent->cluster_sectors) {
+                n = extent->cluster_sectors;
+                if (!zero_dry_run) {
+                    m_data.offset = VMDK_GTE_ZEROED;
+                    /* update L2 tables */
+                    if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                        return -EIO;
+                    }
+                }
+            } else {
+                return -ENOTSUP;
+            }
+        } else {
+            ret = vmdk_write_extent(extent,
+                            cluster_offset, index_in_cluster * 512,
+                            buf, n, sector_num);
+            if (ret) {
+                return ret;
+            }
+            if (m_data.valid) {
+                /* update L2 tables */
+                if (vmdk_L2update(extent, &m_data) != VMDK_OK) {
+                    return -EIO;
+                }
             }
         }
         nb_sectors -= n;
@@ -1109,7 +1275,10 @@ static int vmdk_write(BlockDriverState *bs, int64_t sector_num,
         /* update CID on the first write every time the virtual disk is
          * opened */
         if (!s->cid_updated) {
-            vmdk_write_cid(bs, time(NULL));
+            ret = vmdk_write_cid(bs, time(NULL));
+            if (ret < 0) {
+                return ret;
+            }
             s->cid_updated = true;
         }
     }
@@ -1122,24 +1291,38 @@ static coroutine_fn int vmdk_co_write(BlockDriverState *bs, int64_t sector_num,
     int ret;
     BDRVVmdkState *s = bs->opaque;
     qemu_co_mutex_lock(&s->lock);
-    ret = vmdk_write(bs, sector_num, buf, nb_sectors);
+    ret = vmdk_write(bs, sector_num, buf, nb_sectors, false, false);
+    qemu_co_mutex_unlock(&s->lock);
+    return ret;
+}
+
+static int coroutine_fn vmdk_co_write_zeroes(BlockDriverState *bs,
+                                             int64_t sector_num,
+                                             int nb_sectors)
+{
+    int ret;
+    BDRVVmdkState *s = bs->opaque;
+    qemu_co_mutex_lock(&s->lock);
+    ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, true);
+    if (!ret) {
+        ret = vmdk_write(bs, sector_num, NULL, nb_sectors, true, false);
+    }
     qemu_co_mutex_unlock(&s->lock);
     return ret;
 }
 
 
 static int vmdk_create_extent(const char *filename, int64_t filesize,
-                              bool flat, bool compress)
+                              bool flat, bool compress, bool zeroed_grain)
 {
     int ret, i;
     int fd = 0;
     VMDK4Header header;
     uint32_t tmp, magic, grains, gd_size, gt_size, gt_count;
 
-    fd = open(
-        filename,
-        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-        0644);
+    fd = qemu_open(filename,
+                   O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                   0644);
     if (fd < 0) {
         return -errno;
     }
@@ -1152,9 +1335,10 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
     }
     magic = cpu_to_be32(VMDK4_MAGIC);
     memset(&header, 0, sizeof(header));
-    header.version = 1;
-    header.flags =
-        3 | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0);
+    header.version = zeroed_grain ? 2 : 1;
+    header.flags = VMDK4_FLAG_RGD | VMDK4_FLAG_NL_DETECT
+                   | (compress ? VMDK4_FLAG_COMPRESS | VMDK4_FLAG_MARKER : 0)
+                   | (zeroed_grain ? VMDK4_FLAG_ZERO_GRAIN : 0);
     header.compressAlgorithm = compress ? VMDK4_COMPRESSION_DEFLATE : 0;
     header.capacity = filesize / 512;
     header.granularity = 128;
@@ -1234,7 +1418,7 @@ static int vmdk_create_extent(const char *filename, int64_t filesize,
 
     ret = 0;
  exit:
-    close(fd);
+    qemu_close(fd);
     return ret;
 }
 
@@ -1245,7 +1429,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
 
     if (filename == NULL || !strlen(filename)) {
         fprintf(stderr, "Vmdk: no filename provided.\n");
-        return -1;
+        return VMDK_ERROR;
     }
     p = strrchr(filename, '/');
     if (p == NULL) {
@@ -1257,7 +1441,7 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
     if (p != NULL) {
         p++;
         if (p - filename >= buf_len) {
-            return -1;
+            return VMDK_ERROR;
         }
         pstrcpy(path, p - filename + 1, filename);
     } else {
@@ -1270,12 +1454,12 @@ static int filename_decompose(const char *filename, char *path, char *prefix,
         postfix[0] = '\0';
     } else {
         if (q - p >= buf_len) {
-            return -1;
+            return VMDK_ERROR;
         }
         pstrcpy(prefix, q - p + 1, p);
         pstrcpy(postfix, buf_len, q);
     }
-    return 0;
+    return VMDK_OK;
 }
 
 static int relative_path(char *dest, int dest_size,
@@ -1291,12 +1475,11 @@ static int relative_path(char *dest, int dest_size,
 #endif
 
     if (!(dest && base && target)) {
-        return -1;
+        return VMDK_ERROR;
     }
     if (path_is_absolute(target)) {
-        dest[dest_size - 1] = '\0';
-        strncpy(dest, target, dest_size - 1);
-        return 0;
+        pstrcpy(dest, dest_size, target);
+        return VMDK_OK;
     }
     while (base[i] == target[i]) {
         i++;
@@ -1315,7 +1498,7 @@ static int relative_path(char *dest, int dest_size,
         pstrcat(dest, dest_size, sep);
     }
     pstrcat(dest, dest_size, q);
-    return 0;
+    return VMDK_OK;
 }
 
 static int vmdk_create(const char *filename, QEMUOptionParameter *options)
@@ -1323,6 +1506,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     int fd, idx = 0;
     char desc[BUF_SIZE];
     int64_t total_size = 0, filesize;
+    const char *adapter_type = NULL;
     const char *backing_file = NULL;
     const char *fmt = NULL;
     int flags = 0;
@@ -1334,6 +1518,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     const char *desc_extent_line;
     char parent_desc_line[BUF_SIZE] = "";
     uint32_t parent_cid = 0xffffffff;
+    uint32_t number_heads = 16;
+    bool zeroed_grain = false;
     const char desc_template[] =
         "# Disk DescriptorFile\n"
         "version=1\n"
@@ -1350,9 +1536,9 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
         "\n"
         "ddb.virtualHWVersion = \"%d\"\n"
         "ddb.geometry.cylinders = \"%" PRId64 "\"\n"
-        "ddb.geometry.heads = \"16\"\n"
+        "ddb.geometry.heads = \"%d\"\n"
         "ddb.geometry.sectors = \"63\"\n"
-        "ddb.adapterType = \"ide\"\n";
+        "ddb.adapterType = \"%s\"\n";
 
     if (filename_decompose(filename, path, prefix, postfix, PATH_MAX)) {
         return -EINVAL;
@@ -1361,15 +1547,33 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     while (options && options->name) {
         if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
             total_size = options->value.n;
+        } else if (!strcmp(options->name, BLOCK_OPT_ADAPTER_TYPE)) {
+            adapter_type = options->value.s;
         } else if (!strcmp(options->name, BLOCK_OPT_BACKING_FILE)) {
             backing_file = options->value.s;
         } else if (!strcmp(options->name, BLOCK_OPT_COMPAT6)) {
             flags |= options->value.n ? BLOCK_FLAG_COMPAT6 : 0;
         } else if (!strcmp(options->name, BLOCK_OPT_SUBFMT)) {
             fmt = options->value.s;
+        } else if (!strcmp(options->name, BLOCK_OPT_ZEROED_GRAIN)) {
+            zeroed_grain |= options->value.n;
         }
         options++;
     }
+    if (!adapter_type) {
+        adapter_type = "ide";
+    } else if (strcmp(adapter_type, "ide") &&
+               strcmp(adapter_type, "buslogic") &&
+               strcmp(adapter_type, "lsilogic") &&
+               strcmp(adapter_type, "legacyESX")) {
+        fprintf(stderr, "VMDK: Unknown adapter type: '%s'.\n", adapter_type);
+        return -EINVAL;
+    }
+    if (strcmp(adapter_type, "ide") != 0) {
+        /* that's the number of heads with which vmware operates when
+           creating, exporting, etc. vmdk files with a non-ide adapter type */
+        number_heads = 255;
+    }
     if (!fmt) {
         /* Default format to monolithicSparse */
         fmt = "monolithicSparse";
@@ -1398,7 +1602,7 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     if (backing_file) {
         char parent_filename[PATH_MAX];
         BlockDriverState *bs = bdrv_new("");
-        ret = bdrv_open(bs, backing_file, 0, NULL);
+        ret = bdrv_open(bs, backing_file, NULL, 0, NULL);
         if (ret != 0) {
             bdrv_delete(bs);
             return ret;
@@ -1407,7 +1611,6 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
             bdrv_delete(bs);
             return -EINVAL;
         }
-        filesize = bdrv_getlength(bs);
         parent_cid = vmdk_read_cid(bs, 0);
         bdrv_delete(bs);
         relative_path(parent_filename, sizeof(parent_filename),
@@ -1440,7 +1643,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
         snprintf(ext_filename, sizeof(ext_filename), "%s%s",
                 path, desc_filename);
 
-        if (vmdk_create_extent(ext_filename, size, flat, compress)) {
+        if (vmdk_create_extent(ext_filename, size,
+                               flat, compress, zeroed_grain)) {
             return -EINVAL;
         }
         filesize -= size;
@@ -1458,17 +1662,16 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
             parent_desc_line,
             ext_desc_lines,
             (flags & BLOCK_FLAG_COMPAT6 ? 6 : 4),
-            total_size / (int64_t)(63 * 16 * 512));
+            total_size / (int64_t)(63 * number_heads * 512), number_heads,
+                adapter_type);
     if (split || flat) {
-        fd = open(
-                filename,
-                O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
-                0644);
+        fd = qemu_open(filename,
+                       O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
+                       0644);
     } else {
-        fd = open(
-                filename,
-                O_WRONLY | O_BINARY | O_LARGEFILE,
-                0644);
+        fd = qemu_open(filename,
+                       O_WRONLY | O_BINARY | O_LARGEFILE,
+                       0644);
     }
     if (fd < 0) {
         return -errno;
@@ -1485,23 +1688,28 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options)
     }
     ret = 0;
 exit:
-    close(fd);
+    qemu_close(fd);
     return ret;
 }
 
 static void vmdk_close(BlockDriverState *bs)
 {
+    BDRVVmdkState *s = bs->opaque;
+
     vmdk_free_extents(bs);
+
+    migrate_del_blocker(s->migration_blocker);
+    error_free(s->migration_blocker);
 }
 
-static int vmdk_flush(BlockDriverState *bs)
+static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
 {
-    int i, ret, err;
     BDRVVmdkState *s = bs->opaque;
+    int i, err;
+    int ret = 0;
 
-    ret = bdrv_flush(bs->file);
     for (i = 0; i < s->num_extents; i++) {
-        err = bdrv_flush(s->extents[i].file);
+        err = bdrv_co_flush(s->extents[i].file);
         if (err < 0) {
             ret = err;
         }
@@ -1539,6 +1747,12 @@ static QEMUOptionParameter vmdk_create_options[] = {
         .type = OPT_SIZE,
         .help = "Virtual disk size"
     },
+    {
+        .name = BLOCK_OPT_ADAPTER_TYPE,
+        .type = OPT_STRING,
+        .help = "Virtual adapter type, can be one of "
+                "ide (default), lsilogic, buslogic or legacyESX"
+    },
     {
         .name = BLOCK_OPT_BACKING_FILE,
         .type = OPT_STRING,
@@ -1556,6 +1770,11 @@ static QEMUOptionParameter vmdk_create_options[] = {
             "VMDK flat extent format, can be one of "
             "{monolithicSparse (default) | monolithicFlat | twoGbMaxExtentSparse | twoGbMaxExtentFlat | streamOptimized} "
     },
+    {
+        .name = BLOCK_OPT_ZEROED_GRAIN,
+        .type = OPT_FLAG,
+        .help = "Enable efficient zero writes using the zeroed-grain GTE feature"
+    },
     { NULL }
 };
 
@@ -1564,12 +1783,14 @@ static BlockDriver bdrv_vmdk = {
     .instance_size  = sizeof(BDRVVmdkState),
     .bdrv_probe     = vmdk_probe,
     .bdrv_open      = vmdk_open,
+    .bdrv_reopen_prepare = vmdk_reopen_prepare,
     .bdrv_read      = vmdk_co_read,
     .bdrv_write     = vmdk_co_write,
+    .bdrv_co_write_zeroes = vmdk_co_write_zeroes,
     .bdrv_close     = vmdk_close,
     .bdrv_create    = vmdk_create,
-    .bdrv_flush     = vmdk_flush,
-    .bdrv_is_allocated  = vmdk_is_allocated,
+    .bdrv_co_flush_to_disk  = vmdk_co_flush,
+    .bdrv_co_is_allocated   = vmdk_co_is_allocated,
     .bdrv_get_allocated_file_size  = vmdk_get_allocated_file_size,
 
     .create_options = vmdk_create_options,