]> git.proxmox.com Git - mirror_qemu.git/blobdiff - block/blkdebug.c
blkdebug: Add pass-through write_zero and discard support
[mirror_qemu.git] / block / blkdebug.c
index 20d25bda67fb9e442903d6d9bdf51cc3daef3b0c..a3dc5f6677d3008a04476e75dbdde69eec5cc9d7 100644 (file)
@@ -1,6 +1,7 @@
 /*
  * Block protocol for I/O error injection
  *
+ * Copyright (C) 2016-2017 Red Hat, Inc.
  * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
 typedef struct BDRVBlkdebugState {
     int state;
     int new_state;
+    int align;
+
+    /* For blkdebug_refresh_filename() */
+    char *config_file;
 
     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG__MAX];
     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
@@ -45,7 +50,6 @@ typedef struct BDRVBlkdebugState {
 
 typedef struct BlkdebugAIOCB {
     BlockAIOCB common;
-    QEMUBH *bh;
     int ret;
 } BlkdebugAIOCB;
 
@@ -55,10 +59,6 @@ typedef struct BlkdebugSuspendedReq {
     QLIST_ENTRY(BlkdebugSuspendedReq) next;
 } BlkdebugSuspendedReq;
 
-static const AIOCBInfo blkdebug_aiocb_info = {
-    .aiocb_size    = sizeof(BlkdebugAIOCB),
-};
-
 enum {
     ACTION_INJECT_ERROR,
     ACTION_SET_STATE,
@@ -74,7 +74,7 @@ typedef struct BlkdebugRule {
             int error;
             int immediately;
             int once;
-            int64_t sector;
+            int64_t offset;
         } inject;
         struct {
             int new_state;
@@ -171,6 +171,7 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
     const char* event_name;
     BlkdebugEvent event;
     struct BlkdebugRule *rule;
+    int64_t sector;
 
     /* Find the right event for the rule */
     event_name = qemu_opt_get(opts, "event");
@@ -197,7 +198,9 @@ static int add_rule(void *opaque, QemuOpts *opts, Error **errp)
         rule->options.inject.once  = qemu_opt_get_bool(opts, "once", 0);
         rule->options.inject.immediately =
             qemu_opt_get_bool(opts, "immediately", 0);
-        rule->options.inject.sector = qemu_opt_get_number(opts, "sector", -1);
+        sector = qemu_opt_get_number(opts, "sector", -1);
+        rule->options.inject.offset =
+            sector == -1 ? -1 : sector * BDRV_SECTOR_SIZE;
         break;
 
     case ACTION_SET_STATE:
@@ -299,7 +302,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,
     if (!strstart(filename, "blkdebug:", &filename)) {
         /* There was no prefix; therefore, all options have to be already
            present in the QDict (except for the filename) */
-        qdict_put(options, "x-image", qstring_from_str(filename));
+        qdict_put_str(options, "x-image", filename);
         return;
     }
 
@@ -318,7 +321,7 @@ static void blkdebug_parse_filename(const char *filename, QDict *options,
 
     /* TODO Allow multi-level nesting and set file.filename here */
     filename = c + 1;
-    qdict_put(options, "x-image", qstring_from_str(filename));
+    qdict_put_str(options, "x-image", filename);
 }
 
 static QemuOptsList runtime_opts = {
@@ -350,7 +353,6 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     BDRVBlkdebugState *s = bs->opaque;
     QemuOpts *opts;
     Error *local_err = NULL;
-    const char *config;
     uint64_t align;
     int ret;
 
@@ -363,8 +365,8 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
     }
 
     /* Read rules from config file or command line options */
-    config = qemu_opt_get(opts, "config");
-    ret = read_config(s, config, options, errp);
+    s->config_file = g_strdup(qemu_opt_get(opts, "config"));
+    ret = read_config(s, s->config_file, options, errp);
     if (ret) {
         goto out;
     }
@@ -381,128 +383,190 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
         goto out;
     }
 
+    bs->supported_write_flags = BDRV_REQ_FUA &
+        bs->file->bs->supported_write_flags;
+    bs->supported_zero_flags = (BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
+        bs->file->bs->supported_zero_flags;
+
     /* Set request alignment */
-    align = qemu_opt_get_size(opts, "align", bs->request_alignment);
-    if (align > 0 && align < INT_MAX && !(align & (align - 1))) {
-        bs->request_alignment = align;
-    } else {
+    align = qemu_opt_get_size(opts, "align", 0);
+    if (align < INT_MAX && is_power_of_2(align)) {
+        s->align = align;
+    } else if (align) {
         error_setg(errp, "Invalid alignment");
         ret = -EINVAL;
-        goto fail_unref;
+        goto out;
     }
 
     ret = 0;
     goto out;
 
-fail_unref:
-    bdrv_unref_child(bs, bs->file);
 out:
+    if (ret < 0) {
+        g_free(s->config_file);
+    }
     qemu_opts_del(opts);
     return ret;
 }
 
-static void error_callback_bh(void *opaque)
-{
-    struct BlkdebugAIOCB *acb = opaque;
-    qemu_bh_delete(acb->bh);
-    acb->common.cb(acb->common.opaque, acb->ret);
-    qemu_aio_unref(acb);
-}
-
-static BlockAIOCB *inject_error(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque, BlkdebugRule *rule)
+static int rule_check(BlockDriverState *bs, uint64_t offset, uint64_t bytes)
 {
     BDRVBlkdebugState *s = bs->opaque;
-    int error = rule->options.inject.error;
-    struct BlkdebugAIOCB *acb;
-    QEMUBH *bh;
-    bool immediately = rule->options.inject.immediately;
+    BlkdebugRule *rule = NULL;
+    int error;
+    bool immediately;
+
+    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
+        uint64_t inject_offset = rule->options.inject.offset;
+
+        if (inject_offset == -1 ||
+            (bytes && inject_offset >= offset &&
+             inject_offset < offset + bytes))
+        {
+            break;
+        }
+    }
+
+    if (!rule || !rule->options.inject.error) {
+        return 0;
+    }
+
+    immediately = rule->options.inject.immediately;
+    error = rule->options.inject.error;
 
     if (rule->options.inject.once) {
         QSIMPLEQ_REMOVE(&s->active_rules, rule, BlkdebugRule, active_next);
         remove_rule(rule);
     }
 
-    if (immediately) {
-        return NULL;
+    if (!immediately) {
+        aio_co_schedule(qemu_get_current_aio_context(), qemu_coroutine_self());
+        qemu_coroutine_yield();
     }
 
-    acb = qemu_aio_get(&blkdebug_aiocb_info, bs, cb, opaque);
-    acb->ret = -error;
+    return -error;
+}
 
-    bh = aio_bh_new(bdrv_get_aio_context(bs), error_callback_bh, acb);
-    acb->bh = bh;
-    qemu_bh_schedule(bh);
+static int coroutine_fn
+blkdebug_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                   QEMUIOVector *qiov, int flags)
+{
+    int err;
 
-    return &acb->common;
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
+    }
+
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
 }
 
-static BlockAIOCB *blkdebug_aio_readv(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn
+blkdebug_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+                    QEMUIOVector *qiov, int flags)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err;
 
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
-            break;
-        }
+    /* Sanity check block layer guarantees */
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
+    if (bs->bl.max_transfer) {
+        assert(bytes <= bs->bl.max_transfer);
     }
 
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+    err = rule_check(bs, offset, bytes);
+    if (err) {
+        return err;
     }
 
-    return bdrv_aio_readv(bs->file->bs, sector_num, qiov, nb_sectors,
-                          cb, opaque);
+    return bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
 }
 
-static BlockAIOCB *blkdebug_aio_writev(BlockDriverState *bs,
-    int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
-    BlockCompletionFunc *cb, void *opaque)
+static int blkdebug_co_flush(BlockDriverState *bs)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    int err = rule_check(bs, 0, 0);
 
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1 ||
-            (rule->options.inject.sector >= sector_num &&
-             rule->options.inject.sector < sector_num + nb_sectors)) {
-            break;
-        }
+    if (err) {
+        return err;
     }
 
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+    return bdrv_co_flush(bs->file->bs);
+}
+
+static int coroutine_fn blkdebug_co_pwrite_zeroes(BlockDriverState *bs,
+                                                  int64_t offset, int count,
+                                                  BdrvRequestFlags flags)
+{
+    uint32_t align = MAX(bs->bl.request_alignment,
+                         bs->bl.pwrite_zeroes_alignment);
+    int err;
+
+    /* Only pass through requests that are larger than requested
+     * preferred alignment (so that we test the fallback to writes on
+     * unaligned portions), and check that the block layer never hands
+     * us anything unaligned that crosses an alignment boundary.  */
+    if (count < align) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, align));
+    assert(QEMU_IS_ALIGNED(count, align));
+    if (bs->bl.max_pwrite_zeroes) {
+        assert(count <= bs->bl.max_pwrite_zeroes);
     }
 
-    return bdrv_aio_writev(bs->file->bs, sector_num, qiov, nb_sectors,
-                           cb, opaque);
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->file, offset, count, flags);
 }
 
-static BlockAIOCB *blkdebug_aio_flush(BlockDriverState *bs,
-    BlockCompletionFunc *cb, void *opaque)
+static int coroutine_fn blkdebug_co_pdiscard(BlockDriverState *bs,
+                                             int64_t offset, int count)
 {
-    BDRVBlkdebugState *s = bs->opaque;
-    BlkdebugRule *rule = NULL;
+    uint32_t align = bs->bl.pdiscard_alignment;
+    int err;
 
-    QSIMPLEQ_FOREACH(rule, &s->active_rules, active_next) {
-        if (rule->options.inject.sector == -1) {
-            break;
-        }
+    /* Only pass through requests that are larger than requested
+     * minimum alignment, and ensure that unaligned requests do not
+     * cross optimum discard boundaries. */
+    if (count < bs->bl.request_alignment) {
+        assert(QEMU_IS_ALIGNED(offset, align) ||
+               QEMU_IS_ALIGNED(offset + count, align) ||
+               DIV_ROUND_UP(offset, align) ==
+               DIV_ROUND_UP(offset + count, align));
+        return -ENOTSUP;
+    }
+    assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
+    assert(QEMU_IS_ALIGNED(count, bs->bl.request_alignment));
+    if (align && count >= align) {
+        assert(QEMU_IS_ALIGNED(offset, align));
+        assert(QEMU_IS_ALIGNED(count, align));
+    }
+    if (bs->bl.max_pdiscard) {
+        assert(count <= bs->bl.max_pdiscard);
     }
 
-    if (rule && rule->options.inject.error) {
-        return inject_error(bs, cb, opaque, rule);
+    err = rule_check(bs, offset, count);
+    if (err) {
+        return err;
     }
 
-    return bdrv_aio_flush(bs->file->bs, cb, opaque);
+    return bdrv_co_pdiscard(bs->file->bs, offset, count);
 }
 
-
 static void blkdebug_close(BlockDriverState *bs)
 {
     BDRVBlkdebugState *s = bs->opaque;
@@ -514,6 +578,8 @@ static void blkdebug_close(BlockDriverState *bs)
             remove_rule(rule);
         }
     }
+
+    g_free(s->config_file);
 }
 
 static void suspend_request(BlockDriverState *bs, BlkdebugRule *rule)
@@ -620,7 +686,7 @@ static int blkdebug_debug_resume(BlockDriverState *bs, const char *tag)
 
     QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, next) {
         if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
+            qemu_coroutine_enter(r->co);
             return 0;
         }
     }
@@ -646,7 +712,7 @@ static int blkdebug_debug_remove_breakpoint(BlockDriverState *bs,
     }
     QLIST_FOREACH_SAFE(r, &s->suspended_reqs, next, r_next) {
         if (!strcmp(r->tag, tag)) {
-            qemu_coroutine_enter(r->co, NULL);
+            qemu_coroutine_enter(r->co);
             ret = 0;
         }
     }
@@ -673,11 +739,12 @@ static int64_t blkdebug_getlength(BlockDriverState *bs)
 
 static int blkdebug_truncate(BlockDriverState *bs, int64_t offset)
 {
-    return bdrv_truncate(bs->file->bs, offset);
+    return bdrv_truncate(bs->file, offset, NULL);
 }
 
 static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
 {
+    BDRVBlkdebugState *s = bs->opaque;
     QDict *opts;
     const QDictEntry *e;
     bool force_json = false;
@@ -699,16 +766,15 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
 
     if (!force_json && bs->file->bs->exact_filename[0]) {
         snprintf(bs->exact_filename, sizeof(bs->exact_filename),
-                 "blkdebug:%s:%s",
-                 qdict_get_try_str(options, "config") ?: "",
+                 "blkdebug:%s:%s", s->config_file ?: "",
                  bs->file->bs->exact_filename);
     }
 
     opts = qdict_new();
-    qdict_put_obj(opts, "driver", QOBJECT(qstring_from_str("blkdebug")));
+    qdict_put_str(opts, "driver", "blkdebug");
 
     QINCREF(bs->file->bs->full_open_options);
-    qdict_put_obj(opts, "image", QOBJECT(bs->file->bs->full_open_options));
+    qdict_put(opts, "image", bs->file->bs->full_open_options);
 
     for (e = qdict_first(options); e; e = qdict_next(options, e)) {
         if (strcmp(qdict_entry_key(e), "x-image")) {
@@ -720,6 +786,15 @@ static void blkdebug_refresh_filename(BlockDriverState *bs, QDict *options)
     bs->full_open_options = opts;
 }
 
+static void blkdebug_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    BDRVBlkdebugState *s = bs->opaque;
+
+    if (s->align) {
+        bs->bl.request_alignment = s->align;
+    }
+}
+
 static int blkdebug_reopen_prepare(BDRVReopenState *reopen_state,
                                    BlockReopenQueue *queue, Error **errp)
 {
@@ -735,13 +810,18 @@ static BlockDriver bdrv_blkdebug = {
     .bdrv_file_open         = blkdebug_open,
     .bdrv_close             = blkdebug_close,
     .bdrv_reopen_prepare    = blkdebug_reopen_prepare,
+    .bdrv_child_perm        = bdrv_filter_default_perms,
+
     .bdrv_getlength         = blkdebug_getlength,
     .bdrv_truncate          = blkdebug_truncate,
     .bdrv_refresh_filename  = blkdebug_refresh_filename,
+    .bdrv_refresh_limits    = blkdebug_refresh_limits,
 
-    .bdrv_aio_readv         = blkdebug_aio_readv,
-    .bdrv_aio_writev        = blkdebug_aio_writev,
-    .bdrv_aio_flush         = blkdebug_aio_flush,
+    .bdrv_co_preadv         = blkdebug_co_preadv,
+    .bdrv_co_pwritev        = blkdebug_co_pwritev,
+    .bdrv_co_flush_to_disk  = blkdebug_co_flush,
+    .bdrv_co_pwrite_zeroes  = blkdebug_co_pwrite_zeroes,
+    .bdrv_co_pdiscard       = blkdebug_co_pdiscard,
 
     .bdrv_debug_event           = blkdebug_debug_event,
     .bdrv_debug_breakpoint      = blkdebug_debug_breakpoint,