]> git.proxmox.com Git - mirror_qemu.git/blobdiff - block/sheepdog.c
block-backend: Add flags to blk_truncate()
[mirror_qemu.git] / block / sheepdog.c
index a1edb992ffd4cc11b8bca5df348eebf6249c6f89..76729f40a4d5b8e83b4a177fab338447d9c87b33 100644 (file)
  */
 
 #include "qemu/osdep.h"
-#include "qapi-visit.h"
+#include "qemu-common.h"
 #include "qapi/error.h"
+#include "qapi/qapi-visit-sockets.h"
+#include "qapi/qapi-visit-block-core.h"
 #include "qapi/qmp/qdict.h"
 #include "qapi/qobject-input-visitor.h"
+#include "qapi/qobject-output-visitor.h"
 #include "qemu/uri.h"
 #include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "qemu/module.h"
+#include "qemu/option.h"
 #include "qemu/sockets.h"
 #include "block/block_int.h"
+#include "block/qdict.h"
 #include "sysemu/block-backend.h"
 #include "qemu/bitops.h"
 #include "qemu/cutils.h"
+#include "trace.h"
 
 #define SD_PROTO_VER 0x01
 
@@ -295,19 +303,6 @@ static inline size_t count_data_objs(const struct SheepdogInode *inode)
                         (1UL << inode->block_size_shift));
 }
 
-#undef DPRINTF
-#ifdef DEBUG_SDOG
-#define DEBUG_SDOG_PRINT 1
-#else
-#define DEBUG_SDOG_PRINT 0
-#endif
-#define DPRINTF(fmt, args...)                                           \
-    do {                                                                \
-        if (DEBUG_SDOG_PRINT) {                                         \
-            fprintf(stderr, "%s %d: " fmt, __func__, __LINE__, ##args); \
-        }                                                               \
-    } while (0)
-
 typedef struct SheepdogAIOCB SheepdogAIOCB;
 typedef struct BDRVSheepdogState BDRVSheepdogState;
 
@@ -387,12 +382,12 @@ struct BDRVSheepdogState {
     uint32_t aioreq_seq_num;
 
     /* Every aio request must be linked to either of these queues. */
-    QLIST_HEAD(inflight_aio_head, AIOReq) inflight_aio_head;
-    QLIST_HEAD(failed_aio_head, AIOReq) failed_aio_head;
+    QLIST_HEAD(, AIOReq) inflight_aio_head;
+    QLIST_HEAD(, AIOReq) failed_aio_head;
 
     CoMutex queue_lock;
     CoQueue overlapping_queue;
-    QLIST_HEAD(inflight_aiocb_head, SheepdogAIOCB) inflight_aiocb_head;
+    QLIST_HEAD(, SheepdogAIOCB) inflight_aiocb_head;
 };
 
 typedef struct BDRVSheepdogReopenState {
@@ -400,7 +395,7 @@ typedef struct BDRVSheepdogReopenState {
     int cache_flags;
 } BDRVSheepdogReopenState;
 
-static const char * sd_strerror(int err)
+static const char *sd_strerror(int err)
 {
     int i;
 
@@ -532,47 +527,20 @@ static void sd_aio_setup(SheepdogAIOCB *acb, BDRVSheepdogState *s,
     qemu_co_mutex_unlock(&s->queue_lock);
 }
 
-static SocketAddress *sd_socket_address(const char *path,
-                                        const char *host, const char *port)
-{
-    SocketAddress *addr = g_new0(SocketAddress, 1);
-
-    if (path) {
-        addr->type = SOCKET_ADDRESS_TYPE_UNIX;
-        addr->u.q_unix.path = g_strdup(path);
-    } else {
-        addr->type = SOCKET_ADDRESS_TYPE_INET;
-        addr->u.inet.host = g_strdup(host ?: SD_DEFAULT_ADDR);
-        addr->u.inet.port = g_strdup(port ?: stringify(SD_DEFAULT_PORT));
-    }
-
-    return addr;
-}
-
 static SocketAddress *sd_server_config(QDict *options, Error **errp)
 {
     QDict *server = NULL;
-    QObject *crumpled_server = NULL;
     Visitor *iv = NULL;
     SocketAddress *saddr = NULL;
     Error *local_err = NULL;
 
     qdict_extract_subqdict(options, &server, "server.");
 
-    crumpled_server = qdict_crumple(server, errp);
-    if (!crumpled_server) {
+    iv = qobject_input_visitor_new_flat_confused(server, errp);
+    if (!iv) {
         goto done;
     }
 
-    /*
-     * FIXME .numeric, .to, .ipv4 or .ipv6 don't work with -drive
-     * server.type=inet.  .to doesn't matter, it's ignored anyway.
-     * That's because when @options come from -blockdev or
-     * blockdev_add, members are typed according to the QAPI schema,
-     * but when they come from -drive, they're all QString.  The
-     * visitor expects the former.
-     */
-    iv = qobject_input_visitor_new(crumpled_server);
     visit_type_SocketAddress(iv, NULL, &saddr, &local_err);
     if (local_err) {
         error_propagate(errp, local_err);
@@ -581,8 +549,7 @@ static SocketAddress *sd_server_config(QDict *options, Error **errp)
 
 done:
     visit_free(iv);
-    qobject_decref(crumpled_server);
-    QDECREF(server);
+    qobject_unref(server);
     return saddr;
 }
 
@@ -596,7 +563,7 @@ static int connect_to_sdog(BDRVSheepdogState *s, Error **errp)
     if (s->addr->type == SOCKET_ADDRESS_TYPE_INET && fd >= 0) {
         int ret = socket_set_nodelay(fd);
         if (ret < 0) {
-            error_report("%s", strerror(errno));
+            warn_report("can't set TCP_NODELAY: %s", strerror(errno));
         }
     }
 
@@ -774,7 +741,7 @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
         Error *local_err = NULL;
         s->fd = get_sheep_fd(s, &local_err);
         if (s->fd < 0) {
-            DPRINTF("Wait for connection to be established\n");
+            trace_sheepdog_reconnect_to_sdog();
             error_report_err(local_err);
             qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000000ULL);
         }
@@ -871,7 +838,7 @@ static void coroutine_fn aio_read_response(void *opaque)
         break;
     case AIOCB_FLUSH_CACHE:
         if (rsp.result == SD_RES_INVALID_PARMS) {
-            DPRINTF("disable cache since the server doesn't support it\n");
+            trace_sheepdog_aio_read_response();
             s->cache_flags = SD_FLAG_CMD_DIRECT;
             rsp.result = SD_RES_SUCCESS;
         }
@@ -1050,7 +1017,7 @@ static void sd_parse_uri(SheepdogConfig *cfg, const char *filename,
 
     cfg->uri = uri = uri_parse(filename);
     if (!uri) {
-        error_setg(&err, "invalid URI");
+        error_setg(&err, "invalid URI '%s'", filename);
         goto out;
     }
 
@@ -1248,7 +1215,7 @@ static int find_vdi_name(BDRVSheepdogState *s, const char *filename,
     SheepdogVdiReq hdr;
     SheepdogVdiRsp *rsp = (SheepdogVdiRsp *)&hdr;
     unsigned int wlen, rlen = 0;
-    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN];
+    char buf[SD_MAX_VDI_LEN + SD_MAX_VDI_TAG_LEN] QEMU_NONSTRING;
 
     fd = connect_to_sdog(s, errp);
     if (fd < 0) {
@@ -1631,7 +1598,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     if (!tag) {
         tag = "";
     }
-    if (tag && strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
+    if (strlen(tag) >= SD_MAX_VDI_TAG_LEN) {
         error_setg(errp, "value of parameter 'tag' is too long");
         ret = -EINVAL;
         goto err_no_fd;
@@ -1663,7 +1630,7 @@ static int sd_open(BlockDriverState *bs, QDict *options, int flags,
     s->discard_supported = true;
 
     if (snap_id || tag[0]) {
-        DPRINTF("%" PRIx32 " snapshot inode was open.\n", vid);
+        trace_sheepdog_open(vid);
         s->is_snapshot = true;
     }
 
@@ -1825,40 +1792,35 @@ static int do_sd_create(BDRVSheepdogState *s, uint32_t *vdi_id, int snapshot,
     return 0;
 }
 
-static int sd_prealloc(const char *filename, Error **errp)
+static int sd_prealloc(BlockDriverState *bs, int64_t old_size, int64_t new_size,
+                       Error **errp)
 {
     BlockBackend *blk = NULL;
-    BDRVSheepdogState *base = NULL;
+    BDRVSheepdogState *base = bs->opaque;
     unsigned long buf_size;
     uint32_t idx, max_idx;
     uint32_t object_size;
-    int64_t vdi_size;
     void *buf = NULL;
     int ret;
 
-    blk = blk_new_open(filename, NULL, NULL,
-                       BDRV_O_RDWR | BDRV_O_RESIZE | BDRV_O_PROTOCOL, errp);
-    if (blk == NULL) {
-        ret = -EIO;
+    blk = blk_new(bdrv_get_aio_context(bs),
+                  BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE | BLK_PERM_RESIZE,
+                  BLK_PERM_ALL);
+
+    ret = blk_insert_bs(blk, bs, errp);
+    if (ret < 0) {
         goto out_with_err_set;
     }
 
     blk_set_allow_write_beyond_eof(blk, true);
 
-    vdi_size = blk_getlength(blk);
-    if (vdi_size < 0) {
-        ret = vdi_size;
-        goto out;
-    }
-
-    base = blk_bs(blk)->opaque;
     object_size = (UINT32_C(1) << base->inode.block_size_shift);
     buf_size = MIN(object_size, SD_DATA_OBJ_SIZE);
     buf = g_malloc0(buf_size);
 
-    max_idx = DIV_ROUND_UP(vdi_size, buf_size);
+    max_idx = DIV_ROUND_UP(new_size, buf_size);
 
-    for (idx = 0; idx < max_idx; idx++) {
+    for (idx = old_size / buf_size; idx < max_idx; idx++) {
         /*
          * The created image can be a cloned image, so we need to read
          * a data from the source image.
@@ -1879,14 +1841,85 @@ out:
         error_setg_errno(errp, -ret, "Can't pre-allocate");
     }
 out_with_err_set:
-    if (blk) {
-        blk_unref(blk);
-    }
+    blk_unref(blk);
     g_free(buf);
 
     return ret;
 }
 
+static int sd_create_prealloc(BlockdevOptionsSheepdog *location, int64_t size,
+                              Error **errp)
+{
+    BlockDriverState *bs;
+    Visitor *v;
+    QObject *obj = NULL;
+    QDict *qdict;
+    int ret;
+
+    v = qobject_output_visitor_new(&obj);
+    visit_type_BlockdevOptionsSheepdog(v, NULL, &location, &error_abort);
+    visit_free(v);
+
+    qdict = qobject_to(QDict, obj);
+    qdict_flatten(qdict);
+
+    qdict_put_str(qdict, "driver", "sheepdog");
+
+    bs = bdrv_open(NULL, NULL, qdict, BDRV_O_PROTOCOL | BDRV_O_RDWR, errp);
+    if (bs == NULL) {
+        ret = -EIO;
+        goto fail;
+    }
+
+    ret = sd_prealloc(bs, 0, size, errp);
+fail:
+    bdrv_unref(bs);
+    qobject_unref(qdict);
+    return ret;
+}
+
+static int parse_redundancy(BDRVSheepdogState *s, SheepdogRedundancy *opt)
+{
+    struct SheepdogInode *inode = &s->inode;
+
+    switch (opt->type) {
+    case SHEEPDOG_REDUNDANCY_TYPE_FULL:
+        if (opt->u.full.copies > SD_MAX_COPIES || opt->u.full.copies < 1) {
+            return -EINVAL;
+        }
+        inode->copy_policy = 0;
+        inode->nr_copies = opt->u.full.copies;
+        return 0;
+
+    case SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED:
+    {
+        int64_t copy = opt->u.erasure_coded.data_strips;
+        int64_t parity = opt->u.erasure_coded.parity_strips;
+
+        if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
+            return -EINVAL;
+        }
+
+        if (parity >= SD_EC_MAX_STRIP || parity < 1) {
+            return -EINVAL;
+        }
+
+        /*
+         * 4 bits for parity and 4 bits for data.
+         * We have to compress upper data bits because it can't represent 16
+         */
+        inode->copy_policy = ((copy / 2) << 4) + parity;
+        inode->nr_copies = copy + parity;
+        return 0;
+    }
+
+    default:
+        g_assert_not_reached();
+    }
+
+    return -EINVAL;
+}
+
 /*
  * Sheepdog support two kinds of redundancy, full replication and erasure
  * coding.
@@ -1897,60 +1930,62 @@ out_with_err_set:
  * # create a erasure coded vdi with x data strips and y parity strips
  * -o redundancy=x:y (x must be one of {2,4,8,16} and 1 <= y < SD_EC_MAX_STRIP)
  */
-static int parse_redundancy(BDRVSheepdogState *s, const char *opt)
+static SheepdogRedundancy *parse_redundancy_str(const char *opt)
 {
-    struct SheepdogInode *inode = &s->inode;
+    SheepdogRedundancy *redundancy;
     const char *n1, *n2;
     long copy, parity;
     char p[10];
+    int ret;
 
     pstrcpy(p, sizeof(p), opt);
     n1 = strtok(p, ":");
     n2 = strtok(NULL, ":");
 
     if (!n1) {
-        return -EINVAL;
+        return NULL;
     }
 
-    copy = strtol(n1, NULL, 10);
-    /* FIXME fix error checking by switching to qemu_strtol() */
-    if (copy > SD_MAX_COPIES || copy < 1) {
-        return -EINVAL;
-    }
-    if (!n2) {
-        inode->copy_policy = 0;
-        inode->nr_copies = copy;
-        return 0;
+    ret = qemu_strtol(n1, NULL, 10, &copy);
+    if (ret < 0) {
+        return NULL;
     }
 
-    if (copy != 2 && copy != 4 && copy != 8 && copy != 16) {
-        return -EINVAL;
-    }
+    redundancy = g_new0(SheepdogRedundancy, 1);
+    if (!n2) {
+        *redundancy = (SheepdogRedundancy) {
+            .type               = SHEEPDOG_REDUNDANCY_TYPE_FULL,
+            .u.full.copies      = copy,
+        };
+    } else {
+        ret = qemu_strtol(n2, NULL, 10, &parity);
+        if (ret < 0) {
+            g_free(redundancy);
+            return NULL;
+        }
 
-    parity = strtol(n2, NULL, 10);
-    /* FIXME fix error checking by switching to qemu_strtol() */
-    if (parity >= SD_EC_MAX_STRIP || parity < 1) {
-        return -EINVAL;
+        *redundancy = (SheepdogRedundancy) {
+            .type               = SHEEPDOG_REDUNDANCY_TYPE_ERASURE_CODED,
+            .u.erasure_coded    = {
+                .data_strips    = copy,
+                .parity_strips  = parity,
+            },
+        };
     }
 
-    /*
-     * 4 bits for parity and 4 bits for data.
-     * We have to compress upper data bits because it can't represent 16
-     */
-    inode->copy_policy = ((copy / 2) << 4) + parity;
-    inode->nr_copies = copy + parity;
-
-    return 0;
+    return redundancy;
 }
 
-static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
+static int parse_block_size_shift(BDRVSheepdogState *s,
+                                  BlockdevCreateOptionsSheepdog *opts)
 {
     struct SheepdogInode *inode = &s->inode;
     uint64_t object_size;
     int obj_order;
 
-    object_size = qemu_opt_get_size_del(opt, BLOCK_OPT_OBJECT_SIZE, 0);
-    if (object_size) {
+    if (opts->has_object_size) {
+        object_size = opts->object_size;
+
         if ((object_size - 1) & object_size) {    /* not a power of 2? */
             return -EINVAL;
         }
@@ -1964,57 +1999,55 @@ static int parse_block_size_shift(BDRVSheepdogState *s, QemuOpts *opt)
     return 0;
 }
 
-static int sd_create(const char *filename, QemuOpts *opts,
-                     Error **errp)
+static int sd_co_create(BlockdevCreateOptions *options, Error **errp)
 {
-    Error *err = NULL;
+    BlockdevCreateOptionsSheepdog *opts = &options->u.sheepdog;
     int ret = 0;
     uint32_t vid = 0;
     char *backing_file = NULL;
     char *buf = NULL;
     BDRVSheepdogState *s;
-    SheepdogConfig cfg;
     uint64_t max_vdi_size;
     bool prealloc = false;
 
+    assert(options->driver == BLOCKDEV_DRIVER_SHEEPDOG);
+
     s = g_new0(BDRVSheepdogState, 1);
 
-    if (strstr(filename, "://")) {
-        sd_parse_uri(&cfg, filename, &err);
-    } else {
-        parse_vdiname(&cfg, filename, &err);
-    }
-    if (err) {
-        error_propagate(errp, err);
+    /* Steal SocketAddress from QAPI, set NULL to prevent double free */
+    s->addr = opts->location->server;
+    opts->location->server = NULL;
+
+    if (strlen(opts->location->vdi) >= sizeof(s->name)) {
+        error_setg(errp, "'vdi' string too long");
+        ret = -EINVAL;
         goto out;
     }
+    pstrcpy(s->name, sizeof(s->name), opts->location->vdi);
 
-    buf = cfg.port ? g_strdup_printf("%d", cfg.port) : NULL;
-    s->addr = sd_socket_address(cfg.path, cfg.host, buf);
-    g_free(buf);
-    strcpy(s->name, cfg.vdi);
-    sd_config_done(&cfg);
+    s->inode.vdi_size = opts->size;
+    backing_file = opts->backing_file;
 
-    s->inode.vdi_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0),
-                                 BDRV_SECTOR_SIZE);
-    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
-    buf = qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC);
-    if (!buf || !strcmp(buf, "off")) {
+    if (!opts->has_preallocation) {
+        opts->preallocation = PREALLOC_MODE_OFF;
+    }
+    switch (opts->preallocation) {
+    case PREALLOC_MODE_OFF:
         prealloc = false;
-    } else if (!strcmp(buf, "full")) {
+        break;
+    case PREALLOC_MODE_FULL:
         prealloc = true;
-    } else {
-        error_setg(errp, "Invalid preallocation mode: '%s'", buf);
+        break;
+    default:
+        error_setg(errp, "Preallocation mode not supported for Sheepdog");
         ret = -EINVAL;
         goto out;
     }
 
-    g_free(buf);
-    buf = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY);
-    if (buf) {
-        ret = parse_redundancy(s, buf);
+    if (opts->has_redundancy) {
+        ret = parse_redundancy(s, opts->redundancy);
         if (ret < 0) {
-            error_setg(errp, "Invalid redundancy mode: '%s'", buf);
+            error_setg(errp, "Invalid redundancy mode");
             goto out;
         }
     }
@@ -2026,20 +2059,20 @@ static int sd_create(const char *filename, QemuOpts *opts,
         goto out;
     }
 
-    if (backing_file) {
+    if (opts->has_backing_file) {
         BlockBackend *blk;
         BDRVSheepdogState *base;
         BlockDriver *drv;
 
         /* Currently, only Sheepdog backing image is supported. */
-        drv = bdrv_find_protocol(backing_file, true, NULL);
+        drv = bdrv_find_protocol(opts->backing_file, true, NULL);
         if (!drv || strcmp(drv->protocol_name, "sheepdog") != 0) {
             error_setg(errp, "backing_file must be a sheepdog image");
             ret = -EINVAL;
             goto out;
         }
 
-        blk = blk_new_open(backing_file, NULL, NULL,
+        blk = blk_new_open(opts->backing_file, NULL, NULL,
                            BDRV_O_PROTOCOL, errp);
         if (blk == NULL) {
             ret = -EIO;
@@ -2107,15 +2140,96 @@ static int sd_create(const char *filename, QemuOpts *opts,
     }
 
     if (prealloc) {
-        ret = sd_prealloc(filename, errp);
+        ret = sd_create_prealloc(opts->location, opts->size, errp);
     }
 out:
     g_free(backing_file);
     g_free(buf);
+    g_free(s->addr);
     g_free(s);
     return ret;
 }
 
+static int coroutine_fn sd_co_create_opts(BlockDriver *drv,
+                                          const char *filename,
+                                          QemuOpts *opts,
+                                          Error **errp)
+{
+    BlockdevCreateOptions *create_options = NULL;
+    QDict *qdict, *location_qdict;
+    Visitor *v;
+    char *redundancy;
+    Error *local_err = NULL;
+    int ret;
+
+    redundancy = qemu_opt_get_del(opts, BLOCK_OPT_REDUNDANCY);
+
+    qdict = qemu_opts_to_qdict(opts, NULL);
+    qdict_put_str(qdict, "driver", "sheepdog");
+
+    location_qdict = qdict_new();
+    qdict_put(qdict, "location", location_qdict);
+
+    sd_parse_filename(filename, location_qdict, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    qdict_flatten(qdict);
+
+    /* Change legacy command line options into QMP ones */
+    static const QDictRenames opt_renames[] = {
+        { BLOCK_OPT_BACKING_FILE,       "backing-file" },
+        { BLOCK_OPT_OBJECT_SIZE,        "object-size" },
+        { NULL, NULL },
+    };
+
+    if (!qdict_rename_keys(qdict, opt_renames, errp)) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    /* Get the QAPI object */
+    v = qobject_input_visitor_new_flat_confused(qdict, errp);
+    if (!v) {
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    visit_type_BlockdevCreateOptions(v, NULL, &create_options, &local_err);
+    visit_free(v);
+
+    if (local_err) {
+        error_propagate(errp, local_err);
+        ret = -EINVAL;
+        goto fail;
+    }
+
+    assert(create_options->driver == BLOCKDEV_DRIVER_SHEEPDOG);
+    create_options->u.sheepdog.size =
+        ROUND_UP(create_options->u.sheepdog.size, BDRV_SECTOR_SIZE);
+
+    if (redundancy) {
+        create_options->u.sheepdog.has_redundancy = true;
+        create_options->u.sheepdog.redundancy =
+            parse_redundancy_str(redundancy);
+        if (create_options->u.sheepdog.redundancy == NULL) {
+            error_setg(errp, "Invalid redundancy mode");
+            ret = -EINVAL;
+            goto fail;
+        }
+    }
+
+    ret = sd_co_create(create_options, errp);
+fail:
+    qapi_free_BlockdevCreateOptions(create_options);
+    qobject_unref(qdict);
+    g_free(redundancy);
+    return ret;
+}
+
 static void sd_close(BlockDriverState *bs)
 {
     Error *local_err = NULL;
@@ -2125,7 +2239,7 @@ static void sd_close(BlockDriverState *bs)
     unsigned int wlen, rlen = 0;
     int fd, ret;
 
-    DPRINTF("%s\n", s->name);
+    trace_sheepdog_close(s->name);
 
     fd = connect_to_sdog(s, &local_err);
     if (fd < 0) {
@@ -2165,22 +2279,24 @@ static int64_t sd_getlength(BlockDriverState *bs)
     return s->inode.vdi_size;
 }
 
-static int sd_truncate(BlockDriverState *bs, int64_t offset,
-                       PreallocMode prealloc, Error **errp)
+static int coroutine_fn sd_co_truncate(BlockDriverState *bs, int64_t offset,
+                                       bool exact, PreallocMode prealloc,
+                                       BdrvRequestFlags flags, Error **errp)
 {
     BDRVSheepdogState *s = bs->opaque;
     int ret, fd;
     unsigned int datalen;
     uint64_t max_vdi_size;
+    int64_t old_size = s->inode.vdi_size;
 
-    if (prealloc != PREALLOC_MODE_OFF) {
+    if (prealloc != PREALLOC_MODE_OFF && prealloc != PREALLOC_MODE_FULL) {
         error_setg(errp, "Unsupported preallocation mode '%s'",
                    PreallocMode_str(prealloc));
         return -ENOTSUP;
     }
 
     max_vdi_size = (UINT64_C(1) << s->inode.block_size_shift) * MAX_DATA_OBJS;
-    if (offset < s->inode.vdi_size) {
+    if (offset < old_size) {
         error_setg(errp, "shrinking is not supported");
         return -EINVAL;
     } else if (offset > max_vdi_size) {
@@ -2194,7 +2310,7 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,
     }
 
     /* we don't need to update entire object */
-    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
+    datalen = SD_INODE_HEADER_SIZE;
     s->inode.vdi_size = offset;
     ret = write_object(fd, s->bs, (char *)&s->inode,
                        vid_to_vdi_oid(s->inode.vdi_id), s->inode.nr_copies,
@@ -2203,9 +2319,17 @@ static int sd_truncate(BlockDriverState *bs, int64_t offset,
 
     if (ret < 0) {
         error_setg_errno(errp, -ret, "failed to update an inode");
+        return ret;
     }
 
-    return ret;
+    if (prealloc == PREALLOC_MODE_FULL) {
+        ret = sd_prealloc(bs, old_size, offset, errp);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    return 0;
 }
 
 /*
@@ -2293,7 +2417,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
     char *buf;
     bool deleted;
 
-    DPRINTF("%" PRIx32 " is snapshot.\n", s->inode.vdi_id);
+    trace_sheepdog_create_branch_snapshot(s->inode.vdi_id);
 
     buf = g_malloc(SD_INODE_SIZE);
 
@@ -2309,7 +2433,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
         goto out;
     }
 
-    DPRINTF("%" PRIx32 " is created.\n", vid);
+    trace_sheepdog_create_branch_created(vid);
 
     fd = connect_to_sdog(s, &local_err);
     if (fd < 0) {
@@ -2331,7 +2455,7 @@ static int sd_create_branch(BDRVSheepdogState *s)
 
     s->is_snapshot = false;
     ret = 0;
-    DPRINTF("%" PRIx32 " was newly created.\n", s->inode.vdi_id);
+    trace_sheepdog_create_branch_new(s->inode.vdi_id);
 
 out:
     g_free(buf);
@@ -2425,11 +2549,11 @@ static void coroutine_fn sd_co_rw_vector(SheepdogAIOCB *acb)
         }
 
         if (create) {
-            DPRINTF("update ino (%" PRIu32 ") %" PRIu64 " %" PRIu64 " %ld\n",
-                    inode->vdi_id, oid,
-                    vid_to_data_oid(inode->data_vdi_id[idx], idx), idx);
+            trace_sheepdog_co_rw_vector_update(inode->vdi_id, oid,
+                                  vid_to_data_oid(inode->data_vdi_id[idx], idx),
+                                  idx);
             oid = vid_to_data_oid(inode->vdi_id, idx);
-            DPRINTF("new oid %" PRIx64 "\n", oid);
+            trace_sheepdog_co_rw_vector_new(oid);
         }
 
         aio_req = alloc_aio_req(s, acb, oid, len, offset, flags, create,
@@ -2463,15 +2587,17 @@ static void sd_aio_complete(SheepdogAIOCB *acb)
 }
 
 static coroutine_fn int sd_co_writev(BlockDriverState *bs, int64_t sector_num,
-                        int nb_sectors, QEMUIOVector *qiov)
+                                     int nb_sectors, QEMUIOVector *qiov,
+                                     int flags)
 {
     SheepdogAIOCB acb;
     int ret;
     int64_t offset = (sector_num + nb_sectors) * BDRV_SECTOR_SIZE;
     BDRVSheepdogState *s = bs->opaque;
 
+    assert(!flags);
     if (offset > s->inode.vdi_size) {
-        ret = sd_truncate(bs, offset, PREALLOC_MODE_OFF, NULL);
+        ret = sd_co_truncate(bs, offset, false, PREALLOC_MODE_OFF, 0, NULL);
         if (ret < 0) {
             return ret;
         }
@@ -2532,9 +2658,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     SheepdogInode *inode;
     unsigned int datalen;
 
-    DPRINTF("sn_info: name %s id_str %s s: name %s vm_state_size %" PRId64 " "
-            "is_snapshot %d\n", sn_info->name, sn_info->id_str,
-            s->name, sn_info->vm_state_size, s->is_snapshot);
+    trace_sheepdog_snapshot_create_info(sn_info->name, sn_info->id_str, s->name,
+                                        sn_info->vm_state_size, s->is_snapshot);
 
     if (s->is_snapshot) {
         error_report("You can't create a snapshot of a snapshot VDI, "
@@ -2543,7 +2668,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
         return -EINVAL;
     }
 
-    DPRINTF("%s %s\n", sn_info->name, sn_info->id_str);
+    trace_sheepdog_snapshot_create(sn_info->name, sn_info->id_str);
 
     s->inode.vm_state_size = sn_info->vm_state_size;
     s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
@@ -2552,7 +2677,7 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
      */
     strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
     /* we don't need to update entire object */
-    datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
+    datalen = SD_INODE_HEADER_SIZE;
     inode = g_malloc(datalen);
 
     /* refresh inode. */
@@ -2588,8 +2713,8 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
     }
 
     memcpy(&s->inode, inode, datalen);
-    DPRINTF("s->inode: name %s snap_id %x oid %x\n",
-            s->inode.name, s->inode.snap_id, s->inode.vdi_id);
+    trace_sheepdog_snapshot_create_inode(s->inode.name, s->inode.snap_id,
+                                         s->inode.vdi_id);
 
 cleanup:
     g_free(inode);
@@ -2787,13 +2912,14 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
     QEMUSnapshotInfo *sn_tab = NULL;
     unsigned wlen, rlen;
     int found = 0;
-    static SheepdogInode inode;
+    SheepdogInode *inode;
     unsigned long *vdi_inuse;
     unsigned int start_nr;
     uint64_t hval;
     uint32_t vid;
 
     vdi_inuse = g_malloc(max);
+    inode = g_malloc(SD_INODE_HEADER_SIZE);
 
     fd = connect_to_sdog(s, &local_err);
     if (fd < 0) {
@@ -2836,26 +2962,26 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab)
         }
 
         /* we don't need to read entire object */
-        ret = read_object(fd, s->bs, (char *)&inode,
+        ret = read_object(fd, s->bs, (char *)inode,
                           vid_to_vdi_oid(vid),
-                          0, SD_INODE_SIZE - sizeof(inode.data_vdi_id), 0,
+                          0, SD_INODE_HEADER_SIZE, 0,
                           s->cache_flags);
 
         if (ret) {
             continue;
         }
 
-        if (!strcmp(inode.name, s->name) && is_snapshot(&inode)) {
-            sn_tab[found].date_sec = inode.snap_ctime >> 32;
-            sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
-            sn_tab[found].vm_state_size = inode.vm_state_size;
-            sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
+        if (!strcmp(inode->name, s->name) && is_snapshot(inode)) {
+            sn_tab[found].date_sec = inode->snap_ctime >> 32;
+            sn_tab[found].date_nsec = inode->snap_ctime & 0xffffffff;
+            sn_tab[found].vm_state_size = inode->vm_state_size;
+            sn_tab[found].vm_clock_nsec = inode->vm_clock_nsec;
 
             snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str),
-                     "%" PRIu32, inode.snap_id);
+                     "%" PRIu32, inode->snap_id);
             pstrcpy(sn_tab[found].name,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)),
-                    inode.tag);
+                    MIN(sizeof(sn_tab[found].name), sizeof(inode->tag)),
+                    inode->tag);
             found++;
         }
     }
@@ -2865,6 +2991,7 @@ out:
     *psn_tab = sn_tab;
 
     g_free(vdi_inuse);
+    g_free(inode);
 
     if (ret < 0) {
         return ret;
@@ -2987,19 +3114,19 @@ static coroutine_fn int sd_co_pdiscard(BlockDriverState *bs, int64_t offset,
     return acb.ret;
 }
 
-static coroutine_fn int64_t
-sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
-                       int *pnum, BlockDriverState **file)
+static coroutine_fn int
+sd_co_block_status(BlockDriverState *bs, bool want_zero, int64_t offset,
+                   int64_t bytes, int64_t *pnum, int64_t *map,
+                   BlockDriverState **file)
 {
     BDRVSheepdogState *s = bs->opaque;
     SheepdogInode *inode = &s->inode;
     uint32_t object_size = (UINT32_C(1) << inode->block_size_shift);
-    uint64_t offset = sector_num * BDRV_SECTOR_SIZE;
     unsigned long start = offset / object_size,
-                  end = DIV_ROUND_UP((sector_num + nb_sectors) *
-                                     BDRV_SECTOR_SIZE, object_size);
+                  end = DIV_ROUND_UP(offset + bytes, object_size);
     unsigned long idx;
-    int64_t ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID | offset;
+    *map = offset;
+    int ret = BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID;
 
     for (idx = start; idx < end; idx++) {
         if (inode->data_vdi_id[idx] == 0) {
@@ -3016,9 +3143,9 @@ sd_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
         }
     }
 
-    *pnum = (idx - start) * object_size / BDRV_SECTOR_SIZE;
-    if (*pnum > nb_sectors) {
-        *pnum = nb_sectors;
+    *pnum = (idx - start) * object_size;
+    if (*pnum > bytes) {
+        *pnum = bytes;
     }
     if (ret > 0 && ret & BDRV_BLOCK_OFFSET_VALID) {
         *file = bs;
@@ -3076,112 +3203,130 @@ static QemuOptsList sd_create_opts = {
     }
 };
 
+static const char *const sd_strong_runtime_opts[] = {
+    "vdi",
+    "snap-id",
+    "tag",
+    "server.",
+
+    NULL
+};
+
 static BlockDriver bdrv_sheepdog = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename    = sd_parse_filename,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
+    .format_name                  = "sheepdog",
+    .protocol_name                = "sheepdog",
+    .instance_size                = sizeof(BDRVSheepdogState),
+    .bdrv_parse_filename          = sd_parse_filename,
+    .bdrv_file_open               = sd_open,
+    .bdrv_reopen_prepare          = sd_reopen_prepare,
+    .bdrv_reopen_commit           = sd_reopen_commit,
+    .bdrv_reopen_abort            = sd_reopen_abort,
+    .bdrv_close                   = sd_close,
+    .bdrv_co_create               = sd_co_create,
+    .bdrv_co_create_opts          = sd_co_create_opts,
+    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
+    .bdrv_has_zero_init_truncate  = bdrv_has_zero_init_1,
+    .bdrv_getlength               = sd_getlength,
     .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
+    .bdrv_co_truncate             = sd_co_truncate,
 
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard = sd_co_pdiscard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
+    .bdrv_co_readv                = sd_co_readv,
+    .bdrv_co_writev               = sd_co_writev,
+    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
+    .bdrv_co_pdiscard             = sd_co_pdiscard,
+    .bdrv_co_block_status         = sd_co_block_status,
 
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
+    .bdrv_snapshot_create         = sd_snapshot_create,
+    .bdrv_snapshot_goto           = sd_snapshot_goto,
+    .bdrv_snapshot_delete         = sd_snapshot_delete,
+    .bdrv_snapshot_list           = sd_snapshot_list,
 
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
+    .bdrv_save_vmstate            = sd_save_vmstate,
+    .bdrv_load_vmstate            = sd_load_vmstate,
 
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
+    .bdrv_detach_aio_context      = sd_detach_aio_context,
+    .bdrv_attach_aio_context      = sd_attach_aio_context,
 
-    .create_opts    = &sd_create_opts,
+    .create_opts                  = &sd_create_opts,
+    .strong_runtime_opts          = sd_strong_runtime_opts,
 };
 
 static BlockDriver bdrv_sheepdog_tcp = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog+tcp",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename    = sd_parse_filename,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
+    .format_name                  = "sheepdog",
+    .protocol_name                = "sheepdog+tcp",
+    .instance_size                = sizeof(BDRVSheepdogState),
+    .bdrv_parse_filename          = sd_parse_filename,
+    .bdrv_file_open               = sd_open,
+    .bdrv_reopen_prepare          = sd_reopen_prepare,
+    .bdrv_reopen_commit           = sd_reopen_commit,
+    .bdrv_reopen_abort            = sd_reopen_abort,
+    .bdrv_close                   = sd_close,
+    .bdrv_co_create               = sd_co_create,
+    .bdrv_co_create_opts          = sd_co_create_opts,
+    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
+    .bdrv_has_zero_init_truncate  = bdrv_has_zero_init_1,
+    .bdrv_getlength               = sd_getlength,
     .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
+    .bdrv_co_truncate             = sd_co_truncate,
 
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard = sd_co_pdiscard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
+    .bdrv_co_readv                = sd_co_readv,
+    .bdrv_co_writev               = sd_co_writev,
+    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
+    .bdrv_co_pdiscard             = sd_co_pdiscard,
+    .bdrv_co_block_status         = sd_co_block_status,
 
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
+    .bdrv_snapshot_create         = sd_snapshot_create,
+    .bdrv_snapshot_goto           = sd_snapshot_goto,
+    .bdrv_snapshot_delete         = sd_snapshot_delete,
+    .bdrv_snapshot_list           = sd_snapshot_list,
 
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
+    .bdrv_save_vmstate            = sd_save_vmstate,
+    .bdrv_load_vmstate            = sd_load_vmstate,
 
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
+    .bdrv_detach_aio_context      = sd_detach_aio_context,
+    .bdrv_attach_aio_context      = sd_attach_aio_context,
 
-    .create_opts    = &sd_create_opts,
+    .create_opts                  = &sd_create_opts,
+    .strong_runtime_opts          = sd_strong_runtime_opts,
 };
 
 static BlockDriver bdrv_sheepdog_unix = {
-    .format_name    = "sheepdog",
-    .protocol_name  = "sheepdog+unix",
-    .instance_size  = sizeof(BDRVSheepdogState),
-    .bdrv_parse_filename    = sd_parse_filename,
-    .bdrv_file_open = sd_open,
-    .bdrv_reopen_prepare    = sd_reopen_prepare,
-    .bdrv_reopen_commit     = sd_reopen_commit,
-    .bdrv_reopen_abort      = sd_reopen_abort,
-    .bdrv_close     = sd_close,
-    .bdrv_create    = sd_create,
-    .bdrv_has_zero_init = bdrv_has_zero_init_1,
-    .bdrv_getlength = sd_getlength,
+    .format_name                  = "sheepdog",
+    .protocol_name                = "sheepdog+unix",
+    .instance_size                = sizeof(BDRVSheepdogState),
+    .bdrv_parse_filename          = sd_parse_filename,
+    .bdrv_file_open               = sd_open,
+    .bdrv_reopen_prepare          = sd_reopen_prepare,
+    .bdrv_reopen_commit           = sd_reopen_commit,
+    .bdrv_reopen_abort            = sd_reopen_abort,
+    .bdrv_close                   = sd_close,
+    .bdrv_co_create               = sd_co_create,
+    .bdrv_co_create_opts          = sd_co_create_opts,
+    .bdrv_has_zero_init           = bdrv_has_zero_init_1,
+    .bdrv_has_zero_init_truncate  = bdrv_has_zero_init_1,
+    .bdrv_getlength               = sd_getlength,
     .bdrv_get_allocated_file_size = sd_get_allocated_file_size,
-    .bdrv_truncate  = sd_truncate,
+    .bdrv_co_truncate             = sd_co_truncate,
 
-    .bdrv_co_readv  = sd_co_readv,
-    .bdrv_co_writev = sd_co_writev,
-    .bdrv_co_flush_to_disk  = sd_co_flush_to_disk,
-    .bdrv_co_pdiscard = sd_co_pdiscard,
-    .bdrv_co_get_block_status = sd_co_get_block_status,
+    .bdrv_co_readv                = sd_co_readv,
+    .bdrv_co_writev               = sd_co_writev,
+    .bdrv_co_flush_to_disk        = sd_co_flush_to_disk,
+    .bdrv_co_pdiscard             = sd_co_pdiscard,
+    .bdrv_co_block_status         = sd_co_block_status,
 
-    .bdrv_snapshot_create   = sd_snapshot_create,
-    .bdrv_snapshot_goto     = sd_snapshot_goto,
-    .bdrv_snapshot_delete   = sd_snapshot_delete,
-    .bdrv_snapshot_list     = sd_snapshot_list,
+    .bdrv_snapshot_create         = sd_snapshot_create,
+    .bdrv_snapshot_goto           = sd_snapshot_goto,
+    .bdrv_snapshot_delete         = sd_snapshot_delete,
+    .bdrv_snapshot_list           = sd_snapshot_list,
 
-    .bdrv_save_vmstate  = sd_save_vmstate,
-    .bdrv_load_vmstate  = sd_load_vmstate,
+    .bdrv_save_vmstate            = sd_save_vmstate,
+    .bdrv_load_vmstate            = sd_load_vmstate,
 
-    .bdrv_detach_aio_context = sd_detach_aio_context,
-    .bdrv_attach_aio_context = sd_attach_aio_context,
+    .bdrv_detach_aio_context      = sd_detach_aio_context,
+    .bdrv_attach_aio_context      = sd_attach_aio_context,
 
-    .create_opts    = &sd_create_opts,
+    .create_opts                  = &sd_create_opts,
+    .strong_runtime_opts          = sd_strong_runtime_opts,
 };
 
 static void bdrv_sheepdog_init(void)