]> git.proxmox.com Git - mirror_qemu.git/blobdiff - block.c
blockjob: Don't touch BDS iostatus
[mirror_qemu.git] / block.c
diff --git a/block.c b/block.c
index 4e27c49a753f3c6540a1cb96c387c14888e695a9..fd4cf81096442edfdd93ae3ee57f60dc59641f26 100644 (file)
--- a/block.c
+++ b/block.c
@@ -38,7 +38,6 @@
 #include "qmp-commands.h"
 #include "qemu/timer.h"
 #include "qapi-event.h"
-#include "block/throttle-groups.h"
 #include "qemu/cutils.h"
 #include "qemu/id.h"
 
@@ -218,8 +217,6 @@ void bdrv_get_full_backing_filename(BlockDriverState *bs, char *dest, size_t sz,
 
 void bdrv_register(BlockDriver *bdrv)
 {
-    bdrv_setup_io_funcs(bdrv);
-
     QLIST_INSERT_HEAD(&bdrv_drivers, bdrv, list);
 }
 
@@ -239,8 +236,6 @@ BlockDriverState *bdrv_new(void)
         QLIST_INIT(&bs->op_blockers[i]);
     }
     notifier_with_return_list_init(&bs->before_write_notifiers);
-    qemu_co_queue_init(&bs->throttled_reqs[0]);
-    qemu_co_queue_init(&bs->throttled_reqs[1]);
     bs->refcnt = 1;
     bs->aio_context = qemu_get_aio_context();
 
@@ -645,21 +640,23 @@ int bdrv_parse_discard_flags(const char *mode, int *flags)
  *
  * Return 0 on success, -1 if the cache mode was invalid.
  */
-int bdrv_parse_cache_flags(const char *mode, int *flags)
+int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
 {
     *flags &= ~BDRV_O_CACHE_MASK;
 
     if (!strcmp(mode, "off") || !strcmp(mode, "none")) {
-        *flags |= BDRV_O_NOCACHE | BDRV_O_CACHE_WB;
+        *writethrough = false;
+        *flags |= BDRV_O_NOCACHE;
     } else if (!strcmp(mode, "directsync")) {
+        *writethrough = true;
         *flags |= BDRV_O_NOCACHE;
     } else if (!strcmp(mode, "writeback")) {
-        *flags |= BDRV_O_CACHE_WB;
+        *writethrough = false;
     } else if (!strcmp(mode, "unsafe")) {
-        *flags |= BDRV_O_CACHE_WB;
+        *writethrough = false;
         *flags |= BDRV_O_NO_FLUSH;
     } else if (!strcmp(mode, "writethrough")) {
-        /* this is the default */
+        *writethrough = true;
     } else {
         return -1;
     }
@@ -667,23 +664,6 @@ int bdrv_parse_cache_flags(const char *mode, int *flags)
     return 0;
 }
 
-int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough)
-{
-    int ret = bdrv_parse_cache_flags(mode, flags);
-    if (ret < 0) {
-        return ret;
-    }
-
-    if (*flags & BDRV_O_CACHE_WB) {
-        *flags &= ~BDRV_O_CACHE_WB;
-        *writethrough = false;
-    } else {
-        *writethrough = true;
-    }
-
-    return 0;
-}
-
 /*
  * Returns the options and flags that a temporary snapshot should get, based on
  * the originally requested flags (the originally requested image will have
@@ -695,7 +675,6 @@ static void bdrv_temp_snapshot_options(int *child_flags, QDict *child_options,
     *child_flags = (parent_flags & ~BDRV_O_SNAPSHOT) | BDRV_O_TEMPORARY;
 
     /* For temporary files, unconditional cache=unsafe is fine */
-    qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_DIRECT, "off");
     qdict_set_default_str(child_options, BDRV_OPT_CACHE_NO_FLUSH, "on");
 }
@@ -720,7 +699,6 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
     /* Our block drivers take care to send flushes and respect unmap policy,
      * so we can default to enable both on lower layers regardless of the
      * corresponding parent options. */
-    qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
     flags |= BDRV_O_UNMAP;
 
     /* Clear flags that only apply to the top layer */
@@ -763,7 +741,6 @@ static void bdrv_backing_options(int *child_flags, QDict *child_options,
 
     /* The cache mode is inherited unmodified for backing files; except WCE,
      * which is only applied on the top level (BlockBackend) */
-    qdict_set_default_str(child_options, BDRV_OPT_CACHE_WB, "on");
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_DIRECT);
     qdict_copy_default(child_options, parent_options, BDRV_OPT_CACHE_NO_FLUSH);
 
@@ -782,7 +759,7 @@ static const BdrvChildRole child_backing = {
 
 static int bdrv_open_flags(BlockDriverState *bs, int flags)
 {
-    int open_flags = flags | BDRV_O_CACHE_WB;
+    int open_flags = flags;
 
     /*
      * Clear flags that are internal to the block layer before opening the
@@ -804,11 +781,6 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
 {
     *flags &= ~BDRV_O_CACHE_MASK;
 
-    assert(qemu_opt_find(opts, BDRV_OPT_CACHE_WB));
-    if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_WB, false)) {
-        *flags |= BDRV_O_CACHE_WB;
-    }
-
     assert(qemu_opt_find(opts, BDRV_OPT_CACHE_NO_FLUSH));
     if (qemu_opt_get_bool(opts, BDRV_OPT_CACHE_NO_FLUSH, false)) {
         *flags |= BDRV_O_NO_FLUSH;
@@ -822,10 +794,6 @@ static void update_flags_from_options(int *flags, QemuOpts *opts)
 
 static void update_options_from_flags(QDict *options, int flags)
 {
-    if (!qdict_haskey(options, BDRV_OPT_CACHE_WB)) {
-        qdict_put(options, BDRV_OPT_CACHE_WB,
-                  qbool_from_bool(flags & BDRV_O_CACHE_WB));
-    }
     if (!qdict_haskey(options, BDRV_OPT_CACHE_DIRECT)) {
         qdict_put(options, BDRV_OPT_CACHE_DIRECT,
                   qbool_from_bool(flags & BDRV_O_NOCACHE));
@@ -887,11 +855,6 @@ static QemuOptsList bdrv_runtime_opts = {
             .type = QEMU_OPT_STRING,
             .help = "Block driver to use for the node",
         },
-        {
-            .name = BDRV_OPT_CACHE_WB,
-            .type = QEMU_OPT_BOOL,
-            .help = "Enable writeback mode",
-        },
         {
             .name = BDRV_OPT_CACHE_DIRECT,
             .type = QEMU_OPT_BOOL,
@@ -999,14 +962,6 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
     /* Apply cache mode options */
     update_flags_from_options(&bs->open_flags, opts);
 
-    if (!bs->blk && (bs->open_flags & BDRV_O_CACHE_WB) == 0) {
-        error_setg(errp, "Can't set writethrough mode except for the root");
-        ret = -EINVAL;
-        goto free_and_fail;
-    }
-
-    bdrv_set_enable_write_cache(bs, bs->open_flags & BDRV_O_CACHE_WB);
-
     /* Open the image, either directly or using a protocol */
     open_flags = bdrv_open_flags(bs, bs->open_flags);
     if (drv->bdrv_file_open) {
@@ -1216,10 +1171,10 @@ BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
     return child;
 }
 
-static BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
-                                    BlockDriverState *child_bs,
-                                    const char *child_name,
-                                    const BdrvChildRole *child_role)
+BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
+                             BlockDriverState *child_bs,
+                             const char *child_name,
+                             const BdrvChildRole *child_role)
 {
     BdrvChild *child = bdrv_root_attach_child(child_bs, child_name, child_role);
     QLIST_INSERT_HEAD(&parent_bs->children, child, next);
@@ -1259,6 +1214,27 @@ void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
     bdrv_root_unref_child(child);
 }
 
+
+static void bdrv_parent_cb_change_media(BlockDriverState *bs, bool load)
+{
+    BdrvChild *c;
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c->role->change_media) {
+            c->role->change_media(c, load);
+        }
+    }
+}
+
+static void bdrv_parent_cb_resize(BlockDriverState *bs)
+{
+    BdrvChild *c;
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c->role->resize) {
+            c->role->resize(c);
+        }
+    }
+}
+
 /*
  * Sets the backing file link of a BDS. A new reference is created; callers
  * which don't need their own reference any more must call bdrv_unref().
@@ -1566,6 +1542,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
         if (!bs) {
             return -ENODEV;
         }
+
         bdrv_ref(bs);
         *pbs = bs;
         return 0;
@@ -1717,9 +1694,7 @@ static int bdrv_open_inherit(BlockDriverState **pbs, const char *filename,
     }
 
     if (!bdrv_key_required(bs)) {
-        if (bs->blk) {
-            blk_dev_change_media_cb(bs->blk, true);
-        }
+        bdrv_parent_cb_change_media(bs, true);
     } else if (!runstate_check(RUN_STATE_PRELAUNCH)
                && !runstate_check(RUN_STATE_INMIGRATE)
                && !runstate_check(RUN_STATE_PAUSED)) { /* HACK */
@@ -2028,16 +2003,6 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
 
     update_flags_from_options(&reopen_state->flags, opts);
 
-    /* WCE is a BlockBackend level option, can't change it */
-    bool old_wce = bdrv_enable_write_cache(reopen_state->bs);
-    bool new_wce = (reopen_state->flags & BDRV_O_CACHE_WB);
-
-    if (old_wce != new_wce) {
-        error_setg(errp, "Cannot change cache.writeback");
-        ret = -EINVAL;
-        goto error;
-    }
-
     /* node-name and driver must be unchanged. Put them back into the QDict, so
      * that they are checked at the end of this function. */
     value = qemu_opt_get(opts, "node-name");
@@ -2139,8 +2104,6 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
     reopen_state->bs->open_flags         = reopen_state->flags;
     reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
 
-    bdrv_set_enable_write_cache(reopen_state->bs,
-                                !!(reopen_state->flags & BDRV_O_CACHE_WB));
     bdrv_refresh_limits(reopen_state->bs, NULL);
 }
 
@@ -2170,11 +2133,6 @@ static void bdrv_close(BlockDriverState *bs)
 
     assert(!bs->job);
 
-    /* Disable I/O limits and drain all pending throttled requests */
-    if (bs->throttle_state) {
-        bdrv_io_limits_disable(bs);
-    }
-
     bdrv_drained_begin(bs); /* complete I/O */
     bdrv_flush(bs);
     bdrv_drain(bs); /* in case flush left pending I/O */
@@ -2182,9 +2140,7 @@ static void bdrv_close(BlockDriverState *bs)
     bdrv_release_named_dirty_bitmaps(bs);
     assert(QLIST_EMPTY(&bs->dirty_bitmaps));
 
-    if (bs->blk) {
-        blk_dev_change_media_cb(bs->blk, false);
-    }
+    bdrv_parent_cb_change_media(bs, false);
 
     if (bs->drv) {
         BdrvChild *child, *next;
@@ -2265,13 +2221,6 @@ void bdrv_close_all(void)
     }
 }
 
-/* Fields that need to stay with the top-level BDS */
-static void bdrv_move_feature_fields(BlockDriverState *bs_dest,
-                                     BlockDriverState *bs_src)
-{
-    /* move some fields that need to stay attached to the device */
-}
-
 static void change_parent_backing_link(BlockDriverState *from,
                                        BlockDriverState *to)
 {
@@ -2295,23 +2244,6 @@ static void change_parent_backing_link(BlockDriverState *from,
     }
 }
 
-static void swap_feature_fields(BlockDriverState *bs_top,
-                                BlockDriverState *bs_new)
-{
-    BlockDriverState tmp;
-
-    bdrv_move_feature_fields(&tmp, bs_top);
-    bdrv_move_feature_fields(bs_top, bs_new);
-    bdrv_move_feature_fields(bs_new, &tmp);
-
-    assert(!bs_new->throttle_state);
-    if (bs_top->throttle_state) {
-        assert(bs_top->io_limits_enabled);
-        bdrv_io_limits_enable(bs_new, throttle_group_get_name(bs_top));
-        bdrv_io_limits_disable(bs_top);
-    }
-}
-
 /*
  * Add new bs contents at the top of an image chain while the chain is
  * live, while keeping required fields on the top layer.
@@ -2334,11 +2266,8 @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
     assert(!bdrv_requests_pending(bs_new));
 
     bdrv_ref(bs_top);
-    change_parent_backing_link(bs_top, bs_new);
-
-    /* Some fields always stay on top of the backing file chain */
-    swap_feature_fields(bs_top, bs_new);
 
+    change_parent_backing_link(bs_top, bs_new);
     bdrv_set_backing_hd(bs_new, bs_top);
     bdrv_unref(bs_top);
 
@@ -2354,16 +2283,6 @@ void bdrv_replace_in_backing_chain(BlockDriverState *old, BlockDriverState *new)
 
     bdrv_ref(old);
 
-    if (old->blk) {
-        /* As long as these fields aren't in BlockBackend, but in the top-level
-         * BlockDriverState, it's not possible for a BDS to have two BBs.
-         *
-         * We really want to copy the fields from old to new, but we go for a
-         * swap instead so that pointers aren't duplicated and cause trouble.
-         * (Also, bdrv_swap() used to do the same.) */
-        assert(!new->blk);
-        swap_feature_fields(old, new);
-    }
     change_parent_backing_link(old, new);
 
     /* Change backing files if a previously independent node is added to the
@@ -2672,9 +2591,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
     if (ret == 0) {
         ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
         bdrv_dirty_bitmap_truncate(bs);
-        if (bs->blk) {
-            blk_dev_resize_cb(bs->blk);
-        }
+        bdrv_parent_cb_resize(bs);
     }
     return ret;
 }
@@ -2747,29 +2664,6 @@ int bdrv_is_sg(BlockDriverState *bs)
     return bs->sg;
 }
 
-int bdrv_enable_write_cache(BlockDriverState *bs)
-{
-    if (bs->blk) {
-        return blk_enable_write_cache(bs->blk);
-    } else {
-        return true;
-    }
-}
-
-void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
-{
-    if (bs->blk) {
-        blk_set_enable_write_cache(bs->blk, wce);
-    }
-
-    /* so a reopen() will preserve wce */
-    if (wce) {
-        bs->open_flags |= BDRV_O_CACHE_WB;
-    } else {
-        bs->open_flags &= ~BDRV_O_CACHE_WB;
-    }
-}
-
 int bdrv_is_encrypted(BlockDriverState *bs)
 {
     if (bs->backing && bs->backing->bs->encrypted) {
@@ -2807,11 +2701,9 @@ int bdrv_set_key(BlockDriverState *bs, const char *key)
     if (ret < 0) {
         bs->valid_key = 0;
     } else if (!bs->valid_key) {
+        /* call the change callback now, we skipped it on open */
         bs->valid_key = 1;
-        if (bs->blk) {
-            /* call the change callback now, we skipped it on open */
-            blk_dev_change_media_cb(bs->blk, true);
-        }
+        bdrv_parent_cb_change_media(bs, true);
     }
     return ret;
 }
@@ -3002,10 +2894,28 @@ const char *bdrv_get_node_name(const BlockDriverState *bs)
     return bs->node_name;
 }
 
+static const char *bdrv_get_parent_name(const BlockDriverState *bs)
+{
+    BdrvChild *c;
+    const char *name;
+
+    /* If multiple parents have a name, just pick the first one. */
+    QLIST_FOREACH(c, &bs->parents, next_parent) {
+        if (c->role->get_name) {
+            name = c->role->get_name(c);
+            if (name && *name) {
+                return name;
+            }
+        }
+    }
+
+    return NULL;
+}
+
 /* TODO check what callers really want: bs->node_name or blk_name() */
 const char *bdrv_get_device_name(const BlockDriverState *bs)
 {
-    return bs->blk ? blk_name(bs->blk) : "";
+    return bdrv_get_parent_name(bs) ?: "";
 }
 
 /* This can be used to identify nodes that might not have a device
@@ -3014,7 +2924,7 @@ const char *bdrv_get_device_name(const BlockDriverState *bs)
  * absent, then this returns an empty (non-null) string. */
 const char *bdrv_get_device_or_node_name(const BlockDriverState *bs)
 {
-    return bs->blk ? blk_name(bs->blk) : bs->node_name;
+    return bdrv_get_parent_name(bs) ?: bs->node_name;
 }
 
 int bdrv_get_flags(BlockDriverState *bs)
@@ -3269,6 +3179,7 @@ void bdrv_init_with_whitelist(void)
 
 void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
 {
+    BdrvChild *child;
     Error *local_err = NULL;
     int ret;
 
@@ -3283,13 +3194,20 @@ void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp)
 
     if (bs->drv->bdrv_invalidate_cache) {
         bs->drv->bdrv_invalidate_cache(bs, &local_err);
-    } else if (bs->file) {
-        bdrv_invalidate_cache(bs->file->bs, &local_err);
+        if (local_err) {
+            bs->open_flags |= BDRV_O_INACTIVE;
+            error_propagate(errp, local_err);
+            return;
+        }
     }
-    if (local_err) {
-        bs->open_flags |= BDRV_O_INACTIVE;
-        error_propagate(errp, local_err);
-        return;
+
+    QLIST_FOREACH(child, &bs->children, next) {
+        bdrv_invalidate_cache(child->bs, &local_err);
+        if (local_err) {
+            bs->open_flags |= BDRV_O_INACTIVE;
+            error_propagate(errp, local_err);
+            return;
+        }
     }
 
     ret = refresh_total_sectors(bs, bs->total_sectors);
@@ -3318,38 +3236,63 @@ void bdrv_invalidate_cache_all(Error **errp)
     }
 }
 
-static int bdrv_inactivate(BlockDriverState *bs)
+static int bdrv_inactivate_recurse(BlockDriverState *bs,
+                                   bool setting_flag)
 {
+    BdrvChild *child;
     int ret;
 
-    if (bs->drv->bdrv_inactivate) {
+    if (!setting_flag && bs->drv->bdrv_inactivate) {
         ret = bs->drv->bdrv_inactivate(bs);
         if (ret < 0) {
             return ret;
         }
     }
 
-    bs->open_flags |= BDRV_O_INACTIVE;
+    QLIST_FOREACH(child, &bs->children, next) {
+        ret = bdrv_inactivate_recurse(child->bs, setting_flag);
+        if (ret < 0) {
+            return ret;
+        }
+    }
+
+    if (setting_flag) {
+        bs->open_flags |= BDRV_O_INACTIVE;
+    }
     return 0;
 }
 
 int bdrv_inactivate_all(void)
 {
     BlockDriverState *bs = NULL;
-    int ret;
+    int ret = 0;
+    int pass;
 
     while ((bs = bdrv_next(bs)) != NULL) {
-        AioContext *aio_context = bdrv_get_aio_context(bs);
+        aio_context_acquire(bdrv_get_aio_context(bs));
+    }
 
-        aio_context_acquire(aio_context);
-        ret = bdrv_inactivate(bs);
-        aio_context_release(aio_context);
-        if (ret < 0) {
-            return ret;
+    /* We do two passes of inactivation. The first pass calls to drivers'
+     * .bdrv_inactivate callbacks recursively so all cache is flushed to disk;
+     * the second pass sets the BDRV_O_INACTIVE flag so that no further write
+     * is allowed. */
+    for (pass = 0; pass < 2; pass++) {
+        bs = NULL;
+        while ((bs = bdrv_next(bs)) != NULL) {
+            ret = bdrv_inactivate_recurse(bs, pass);
+            if (ret < 0) {
+                goto out;
+            }
         }
     }
 
-    return 0;
+out:
+    bs = NULL;
+    while ((bs = bdrv_next(bs)) != NULL) {
+        aio_context_release(bdrv_get_aio_context(bs));
+    }
+
+    return ret;
 }
 
 /**************************************************************/
@@ -3620,7 +3563,7 @@ void bdrv_img_create(const char *filename, const char *fmt,
             }
 
             /* backing files always opened read-only */
-            back_flags = flags | BDRV_O_CACHE_WB;
+            back_flags = flags;
             back_flags &= ~(BDRV_O_RDWR | BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING);
 
             if (backing_fmt) {
@@ -3700,9 +3643,6 @@ void bdrv_detach_aio_context(BlockDriverState *bs)
         baf->detach_aio_context(baf->opaque);
     }
 
-    if (bs->throttle_state) {
-        throttle_timers_detach_aio_context(&bs->throttle_timers);
-    }
     if (bs->drv->bdrv_detach_aio_context) {
         bs->drv->bdrv_detach_aio_context(bs);
     }
@@ -3736,9 +3676,6 @@ void bdrv_attach_aio_context(BlockDriverState *bs,
     if (bs->drv->bdrv_attach_aio_context) {
         bs->drv->bdrv_attach_aio_context(bs, new_context);
     }
-    if (bs->throttle_state) {
-        throttle_timers_attach_aio_context(&bs->throttle_timers, new_context);
-    }
 
     QLIST_FOREACH(ban, &bs->aio_notifiers, list) {
         ban->attached_aio_context(new_context, ban->opaque);
@@ -4049,3 +3986,52 @@ void bdrv_refresh_filename(BlockDriverState *bs)
         QDECREF(json);
     }
 }
+
+/*
+ * Hot add/remove a BDS's child. So the user can take a child offline when
+ * it is broken and take a new child online
+ */
+void bdrv_add_child(BlockDriverState *parent_bs, BlockDriverState *child_bs,
+                    Error **errp)
+{
+
+    if (!parent_bs->drv || !parent_bs->drv->bdrv_add_child) {
+        error_setg(errp, "The node %s does not support adding a child",
+                   bdrv_get_device_or_node_name(parent_bs));
+        return;
+    }
+
+    if (!QLIST_EMPTY(&child_bs->parents)) {
+        error_setg(errp, "The node %s already has a parent",
+                   child_bs->node_name);
+        return;
+    }
+
+    parent_bs->drv->bdrv_add_child(parent_bs, child_bs, errp);
+}
+
+void bdrv_del_child(BlockDriverState *parent_bs, BdrvChild *child, Error **errp)
+{
+    BdrvChild *tmp;
+
+    if (!parent_bs->drv || !parent_bs->drv->bdrv_del_child) {
+        error_setg(errp, "The node %s does not support removing a child",
+                   bdrv_get_device_or_node_name(parent_bs));
+        return;
+    }
+
+    QLIST_FOREACH(tmp, &parent_bs->children, next) {
+        if (tmp == child) {
+            break;
+        }
+    }
+
+    if (!tmp) {
+        error_setg(errp, "The node %s does not have a child named %s",
+                   bdrv_get_device_or_node_name(parent_bs),
+                   bdrv_get_device_or_node_name(child->bs));
+        return;
+    }
+
+    parent_bs->drv->bdrv_del_child(parent_bs, child, errp);
+}