#ifndef BLOCK_INT_COMMON_H
#define BLOCK_INT_COMMON_H
-#include "block/accounting.h"
-#include "block/block.h"
-#include "block/aio-wait.h"
-#include "qemu/queue.h"
-#include "qemu/coroutine.h"
-#include "qemu/stats64.h"
-#include "qemu/timer.h"
-#include "qemu/hbitmap.h"
+#include "block/aio.h"
+#include "block/block-common.h"
+#include "block/block-global-state.h"
#include "block/snapshot.h"
-#include "qemu/throttle.h"
+#include "qemu/iov.h"
#include "qemu/rcu.h"
+#include "qemu/stats64.h"
#define BLOCK_FLAG_LAZY_REFCOUNTS 8
* (And this filtered child must then be bs->file or bs->backing.)
*/
bool is_filter;
+ /*
+ * Only make sense for filter drivers, for others must be false.
+ * If true, filtered child is bs->backing. Otherwise it's bs->file.
+ * Two internal filters use bs->backing as filtered child and has this
+ * field set to true: mirror_top and commit_top. There also two such test
+ * filters in tests/unit/test-bdrv-graph-mod.c.
+ *
+ * Never create any more such filters!
+ *
+ * TODO: imagine how to deprecate this behavior and make all filters work
+ * similarly using bs->file as filtered child.
+ */
+ bool filtered_child_is_backing;
+
/*
* Set to true if the BlockDriver is a format driver. Format nodes
* generally do not expect their children to be other format nodes
* that it can do IOMMU mapping with VFIO etc., in order to get better
* performance. In the case of VFIO drivers, this callback is used to do
* DMA mapping for hot buffers.
+ *
+ * Returns: true on success, false on failure
*/
- void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
- void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
+ bool (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size,
+ Error **errp);
+ void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
/*
* This field is modified only under the BQL, and is part of
bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file);
+ /*
+ * Snapshot-access API.
+ *
+ * Block-driver may provide snapshot-access API: special functions to access
+ * some internal "snapshot". The functions are similar with normal
+ * read/block_status/discard handler, but don't have any specific handling
+ * in generic block-layer: no serializing, no alignment, no tracked
+ * requests. So, block-driver that realizes these APIs is fully responsible
+ * for synchronization between snapshot-access API and normal IO requests.
+ *
+ * TODO: To be able to support qcow2's internal snapshots, this API will
+ * need to be extended to:
+ * - be able to select a specific snapshot
+ * - receive the snapshot's actual length (which may differ from bs's
+ * length)
+ */
+ int coroutine_fn (*bdrv_co_preadv_snapshot)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset);
+ int coroutine_fn (*bdrv_co_snapshot_block_status)(BlockDriverState *bs,
+ bool want_zero, int64_t offset, int64_t bytes, int64_t *pnum,
+ int64_t *map, BlockDriverState **file);
+ int coroutine_fn (*bdrv_co_pdiscard_snapshot)(BlockDriverState *bs,
+ int64_t offset, int64_t bytes);
+
/*
* Invalidate any cached meta-data.
*/
- void coroutine_fn (*bdrv_co_invalidate_cache)(BlockDriverState *bs,
- Error **errp);
+ void coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_invalidate_cache)(
+ BlockDriverState *bs, Error **errp);
/*
* Flushes all data for all layers by calling bdrv_co_flush for underlying
Error **errp);
BlockStatsSpecific *(*bdrv_get_specific_stats)(BlockDriverState *bs);
- int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
- int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
- QEMUIOVector *qiov,
- int64_t pos);
+ int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_save_vmstate)(
+ BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
+
+ int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_load_vmstate)(
+ BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
/* removable device specific */
bool (*bdrv_is_inserted)(BlockDriverState *bs);
* Returns 0 for completed check, -errno for internal errors.
* The check results are stored in result.
*/
- int coroutine_fn (*bdrv_co_check)(BlockDriverState *bs,
- BdrvCheckResult *result,
- BdrvCheckMode fix);
+ int coroutine_fn GRAPH_RDLOCK_PTR (*bdrv_co_check)(
+ BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix);
void (*bdrv_debug_event)(BlockDriverState *bs, BlkdebugEvent event);
void (*bdrv_io_unplug)(BlockDriverState *bs);
/**
- * bdrv_co_drain_begin is called if implemented in the beginning of a
+ * bdrv_drain_begin is called if implemented in the beginning of a
* drain operation to drain and stop any internal sources of requests in
* the driver.
- * bdrv_co_drain_end is called if implemented at the end of the drain.
+ * bdrv_drain_end is called if implemented at the end of the drain.
*
* They should be used by the driver to e.g. manage scheduled I/O
* requests, or toggle an internal state. After the end of the drain new
* requests will continue normally.
+ *
+ * Implementations of both functions must not call aio_poll().
*/
- void coroutine_fn (*bdrv_co_drain_begin)(BlockDriverState *bs);
- void coroutine_fn (*bdrv_co_drain_end)(BlockDriverState *bs);
+ void (*bdrv_drain_begin)(BlockDriverState *bs);
+ void (*bdrv_drain_end)(BlockDriverState *bs);
bool (*bdrv_supports_persistent_dirty_bitmap)(BlockDriverState *bs);
- bool (*bdrv_co_can_store_new_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- uint32_t granularity,
- Error **errp);
- int (*bdrv_co_remove_persistent_dirty_bitmap)(BlockDriverState *bs,
- const char *name,
- Error **errp);
+ bool coroutine_fn (*bdrv_co_can_store_new_dirty_bitmap)(
+ BlockDriverState *bs, const char *name, uint32_t granularity,
+ Error **errp);
+ int coroutine_fn (*bdrv_co_remove_persistent_dirty_bitmap)(
+ BlockDriverState *bs, const char *name, Error **errp);
};
static inline bool block_driver_can_compress(BlockDriver *drv)
*/
bool parent_is_bds;
+ /*
+ * Global state (GS) API. These functions run under the BQL.
+ *
+ * See include/block/block-global-state.h for more information about
+ * the GS API.
+ */
void (*inherit_options)(BdrvChildRole role, bool parent_is_format,
int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options);
-
void (*change_media)(BdrvChild *child, bool load);
+
+ /*
+ * Returns a malloced string that describes the parent of the child for a
+ * human reader. This could be a node-name, BlockBackend name, qdev ID or
+ * QOM path of the device owning the BlockBackend, job type and ID etc. The
+ * caller is responsible for freeing the memory.
+ */
+ char *(*get_parent_desc)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the child has been activated/inactivated (e.g.
+ * when migration is completing) and it can start/stop requesting
+ * permissions and doing I/O on it.
+ */
+ void (*activate)(BdrvChild *child, Error **errp);
+ int (*inactivate)(BdrvChild *child);
+
+ void GRAPH_WRLOCK_PTR (*attach)(BdrvChild *child);
+ void GRAPH_WRLOCK_PTR (*detach)(BdrvChild *child);
+
+ /*
+ * Notifies the parent that the filename of its child has changed (e.g.
+ * because the direct child was removed from the backing chain), so that it
+ * can update its reference.
+ */
+ int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
+ const char *filename, Error **errp);
+
+ bool (*change_aio_ctx)(BdrvChild *child, AioContext *ctx,
+ GHashTable *visited, Transaction *tran,
+ Error **errp);
+
+ /*
+ * I/O API functions. These functions are thread-safe.
+ *
+ * See include/block/block-io.h for more information about
+ * the I/O API.
+ */
+
void (*resize)(BdrvChild *child);
/*
*/
const char *(*get_name)(BdrvChild *child);
- /*
- * Returns a malloced string that describes the parent of the child for a
- * human reader. This could be a node-name, BlockBackend name, qdev ID or
- * QOM path of the device owning the BlockBackend, job type and ID etc. The
- * caller is responsible for freeing the memory.
- */
- char *(*get_parent_desc)(BdrvChild *child);
+ AioContext *(*get_parent_aio_context)(BdrvChild *child);
/*
* If this pair of functions is implemented, the parent doesn't issue new
* These functions must not change the graph (and therefore also must not
* call aio_poll(), which could change the graph indirectly).
*
- * If drained_end() schedules background operations, it must atomically
- * increment *drained_end_counter for each such operation and atomically
- * decrement it once the operation has settled.
- *
* Note that this can be nested. If drained_begin() was called twice, new
* I/O is allowed only after drained_end() was called twice, too.
*/
void (*drained_begin)(BdrvChild *child);
- void (*drained_end)(BdrvChild *child, int *drained_end_counter);
+ void (*drained_end)(BdrvChild *child);
/*
* Returns whether the parent has pending requests for the child. This
* activity on the child has stopped.
*/
bool (*drained_poll)(BdrvChild *child);
-
- /*
- * Notifies the parent that the child has been activated/inactivated (e.g.
- * when migration is completing) and it can start/stop requesting
- * permissions and doing I/O on it.
- */
- void (*activate)(BdrvChild *child, Error **errp);
- int (*inactivate)(BdrvChild *child);
-
- void (*attach)(BdrvChild *child);
- void (*detach)(BdrvChild *child);
-
- /*
- * Notifies the parent that the filename of its child has changed (e.g.
- * because the direct child was removed from the backing chain), so that it
- * can update its reference.
- */
- int (*update_filename)(BdrvChild *child, BlockDriverState *new_base,
- const char *filename, Error **errp);
-
- bool (*can_set_aio_ctx)(BdrvChild *child, AioContext *ctx,
- GSList **ignore, Error **errp);
- void (*set_aio_ctx)(BdrvChild *child, AioContext *ctx, GSList **ignore);
-
- AioContext *(*get_parent_aio_context)(BdrvChild *child);
};
extern const BdrvChildClass child_of_bds;
bool frozen;
/*
- * How many times the parent of this child has been drained
+ * True if the parent of this child has been drained by this BdrvChild
* (through klass->drained_*).
- * Usually, this is equal to bs->quiesce_counter (potentially
- * reduced by bdrv_drain_all_count). It may differ while the
+ *
+ * It is generally true if bs->quiesce_counter > 0. It may differ while the
* child is entering or leaving a drained section.
*/
- int parent_quiesce_counter;
+ bool quiesced_parent;
QLIST_ENTRY(BdrvChild) next;
QLIST_ENTRY(BdrvChild) next_parent;
QDict *full_open_options;
char exact_filename[PATH_MAX];
- BdrvChild *backing;
- BdrvChild *file;
-
/* I/O Limits */
BlockLimits bl;
/*
* Flags honored during pread
*/
- unsigned int supported_read_flags;
+ BdrvRequestFlags supported_read_flags;
/*
* Flags honored during pwrite (so far: BDRV_REQ_FUA,
* BDRV_REQ_WRITE_UNCHANGED).
* flag), or they have to explicitly take the WRITE permission for
* their children.
*/
- unsigned int supported_write_flags;
+ BdrvRequestFlags supported_write_flags;
/*
* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
*/
- unsigned int supported_zero_flags;
+ BdrvRequestFlags supported_zero_flags;
/*
* Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
*
* that any added space reads as all zeros. If this can't be guaranteed,
* the operation must fail.
*/
- unsigned int supported_truncate_flags;
+ BdrvRequestFlags supported_truncate_flags;
/* the following member gives a name to every node on the bs graph. */
char node_name[32];
* parent node of this node.
*/
BlockDriverState *inherits_from;
+
+ /*
+ * @backing and @file are some of @children or NULL. All these three fields
+ * (@file, @backing and @children) are modified only in
+ * bdrv_child_cb_attach() and bdrv_child_cb_detach().
+ *
+ * See also comment in include/block/block.h, to learn how backing and file
+ * are connected with BdrvChildRole.
+ */
QLIST_HEAD(, BdrvChild) children;
+ BdrvChild *backing;
+ BdrvChild *file;
+
QLIST_HEAD(, BdrvChild) parents;
QDict *options;
/* Accessed with atomic ops. */
int quiesce_counter;
- int recursive_quiesce_counter;
unsigned int write_gen; /* Current data generation */
}
int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp);
-int get_tmp_filename(char *filename, int size);
+char *create_tmp_file(Error **errp);
void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
QDict *options);