#include "block/aio.h"
#include "qapi/qapi-types-block-core.h"
+#include "block/aio-wait.h"
#include "qemu/iov.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
BDRV_REQ_FUA = 0x10,
BDRV_REQ_WRITE_COMPRESSED = 0x20,
+ /* Signifies that this write request will not change the visible disk
+ * content. */
+ BDRV_REQ_WRITE_UNCHANGED = 0x40,
+
/* Mask of valid flags */
- BDRV_REQ_MASK = 0x3f,
+ BDRV_REQ_MASK = 0x7f,
} BdrvRequestFlags;
typedef struct BlockSizes {
* BDRV_BLOCK_ZERO: offset reads as zero
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
- * layer (short for DATA || ZERO), set by block layer
- * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this layer
+ * layer rather than any backing, set by block layer
+ * BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
+ * layer, set by block layer
*
* Internal flag:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
*
- * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 (BDRV_BLOCK_OFFSET_MASK) of
- * the return value (old interface) or the entire map parameter (new
- * interface) represent the offset in the returned BDS that is allocated for
- * the corresponding raw data. However, whether that offset actually
- * contains data also depends on BDRV_BLOCK_DATA, as follows:
+ * If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
+ * host offset within the returned BDS that is allocated for the
+ * corresponding raw guest data. However, whether that offset
+ * actually contains data also depends on BDRV_BLOCK_DATA, as follows:
*
* DATA ZERO OFFSET_VALID
* t t t sectors read as zero, returned file is zero at offset
* This permission (which is weaker than BLK_PERM_WRITE) is both enough and
* required for writes to the block node when the caller promises that
* the visible disk content doesn't change.
+ *
+ * As the BLK_PERM_WRITE permission is strictly stronger, either is
+ * sufficient to perform an unchanging write.
*/
BLK_PERM_WRITE_UNCHANGED = 0x04,
void bdrv_init(void);
void bdrv_init_with_whitelist(void);
bool bdrv_uses_whitelist(void);
+int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
BlockDriver *bdrv_find_protocol(const char *filename,
bool allow_protocol_prefix,
Error **errp);
BlockDriverState* parent,
const BdrvChildRole *child_role,
bool allow_none, Error **errp);
+BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int count);
-int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
/*
* Efficiently zero a region of the disk image. Note that this is a regular
* I/O request like read or write and should have a reasonable size. This
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
void bdrv_refresh_filename(BlockDriverState *bs);
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
+ PreallocMode prealloc, Error **errp);
int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
Error **errp);
+
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
int64_t total_work_size, void *opaque);
int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque);
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ Error **errp);
/* external snapshots */
bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
+/* Returns NULL when bs == NULL */
+AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
+
#define BDRV_POLL_WHILE(bs, cond) ({ \
- bool waited_ = false; \
- bool busy_ = true; \
BlockDriverState *bs_ = (bs); \
- AioContext *ctx_ = bdrv_get_aio_context(bs_); \
- if (aio_context_in_iothread(ctx_)) { \
- while ((cond) || busy_) { \
- busy_ = aio_poll(ctx_, (cond)); \
- waited_ |= !!(cond) | busy_; \
- } \
- } else { \
- assert(qemu_get_current_aio_context() == \
- qemu_get_aio_context()); \
- /* Ask bdrv_dec_in_flight to wake up the main \
- * QEMU AioContext. Extra I/O threads never take \
- * other I/O threads' AioContexts (see for example \
- * block_job_defer_to_main_loop for how to do it). \
- */ \
- assert(!bs_->wakeup); \
- /* Set bs->wakeup before evaluating cond. */ \
- atomic_mb_set(&bs_->wakeup, true); \
- while (busy_) { \
- if ((cond)) { \
- waited_ = busy_ = true; \
- aio_context_release(ctx_); \
- aio_poll(qemu_get_aio_context(), true); \
- aio_context_acquire(ctx_); \
- } else { \
- busy_ = aio_poll(ctx_, false); \
- waited_ |= busy_; \
- } \
- } \
- atomic_set(&bs_->wakeup, false); \
- } \
- waited_; })
+ AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
+ bdrv_get_aio_context(bs_), \
+ cond); })
int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp);
int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
+bool bdrv_is_writable(BlockDriverState *bs);
bool bdrv_is_sg(BlockDriverState *bs);
bool bdrv_is_inserted(BlockDriverState *bs);
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
Error **errp);
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
typedef struct BdrvNextIterator {
enum {
* Begin a quiesced section of all users of @bs. This is part of
* bdrv_drained_begin.
*/
-void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents);
/**
* bdrv_parent_drained_end:
* End a quiesced section of all users of @bs. This is part of
* bdrv_drained_end.
*/
-void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents);
+
+/**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs, its parents (except for @ignore_parent),
+ * and if @recursive is true its children as well (used for subtree drain).
+ *
+ * If @ignore_bds_parents is true, parents that are BlockDriverStates must
+ * ignore the drain request because they will be drained separately (used for
+ * drain_all).
+ *
+ * This is part of bdrv_drained_begin.
+ */
+bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+ BdrvChild *ignore_parent, bool ignore_bds_parents);
/**
* bdrv_drained_begin:
*/
void bdrv_drained_begin(BlockDriverState *bs);
+/**
+ * bdrv_do_drained_begin_quiesce:
+ *
+ * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
+ * running requests to complete.
+ */
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+ BdrvChild *parent, bool ignore_bds_parents);
+
/**
* Like bdrv_drained_begin, but recursively begins a quiesced section for
* exclusive access to all child nodes as well.
*/
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+
+/**
+ *
+ * bdrv_co_copy_range:
+ *
+ * Do offloaded copy between two children. If the operation is not implemented
+ * by the driver, or if the backend storage doesn't support it, a negative
+ * error code will be returned.
+ *
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
+ * calling copy_file_range(2)) after the first error, thus it should fall back
+ * to a read+write path in the caller level.
+ *
+ * @src: Source child to copy data from
+ * @src_offset: offset in @src image to read data
+ * @dst: Destination child to copy data to
+ * @dst_offset: offset in @dst image to write data
+ * @bytes: number of bytes to copy
+ * @flags: request flags. Must be one of:
+ * 0 - actually read data from src;
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
+ * write on @dst as if bdrv_co_pwrite_zeroes is
+ * called. Used to simplify caller code, or
+ * during BlockDriver.bdrv_co_copy_range_from()
+ * recursion.
+ *
+ * Returns: 0 if succeeded; negative error code if failed.
+ **/
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags flags);
#endif