typedef enum {
BDRV_REQ_COPY_ON_READ = 0x1,
BDRV_REQ_ZERO_WRITE = 0x2,
- /* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver
- * is allowed to optimize a write zeroes request by unmapping (discarding)
- * blocks if it is guaranteed that the result will read back as
- * zeroes. The flag is only passed to the driver if the block device is
- * opened with BDRV_O_UNMAP.
+
+ /*
+ * The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
+ * that the block driver should unmap (discard) blocks if it is guaranteed
+ * that the result will read back as zeroes. The flag is only passed to the
+ * driver if the block device is opened with BDRV_O_UNMAP.
*/
BDRV_REQ_MAY_UNMAP = 0x4,
+
+ /*
+ * The BDRV_REQ_NO_SERIALISING flag is only valid for reads and means that
+ * we don't want wait_serialising_requests() during the read operation.
+ *
+ * This flag is used for backup copy-on-write operations, when we need to
+ * read old data before write (write notifier triggered). It is okay since
+ * we already waited for other serializing requests in the initiating write
+ * (see bdrv_aligned_pwritev), and it is necessary if the initiating write
+ * is already serializing (without the flag, the read would deadlock
+ * waiting for the serialising write to complete).
+ */
BDRV_REQ_NO_SERIALISING = 0x8,
BDRV_REQ_FUA = 0x10,
BDRV_REQ_WRITE_COMPRESSED = 0x20,
+ /* Signifies that this write request will not change the visible disk
+ * content. */
+ BDRV_REQ_WRITE_UNCHANGED = 0x40,
+
+ /*
+ * BDRV_REQ_SERIALISING forces request serialisation for writes.
+ * It is used to ensure that writes to the backing file of a backup process
+ * target cannot race with a read of the backup target that defers to the
+ * backing file.
+ *
+ * Note, that BDRV_REQ_SERIALISING is _not_ opposite in meaning to
+ * BDRV_REQ_NO_SERIALISING. A more descriptive name for the latter might be
+ * _DO_NOT_WAIT_FOR_SERIALISING, except that is too long.
+ */
+ BDRV_REQ_SERIALISING = 0x80,
+
+ /* Execute the request only if the operation can be offloaded or otherwise
+ * be executed efficiently, but return an error instead of using a slow
+ * fallback. */
+ BDRV_REQ_NO_FALLBACK = 0x100,
+
/* Mask of valid flags */
- BDRV_REQ_MASK = 0x3f,
+ BDRV_REQ_MASK = 0x1ff,
} BdrvRequestFlags;
typedef struct BlockSizes {
select an appropriate protocol driver,
ignoring the format layer */
#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
+#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
#define BDRV_OPT_CACHE_DIRECT "cache.direct"
#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
#define BDRV_OPT_READ_ONLY "read-only"
+#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
#define BDRV_OPT_DISCARD "discard"
#define BDRV_OPT_FORCE_SHARE "force-share"
* BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
* layer, set by block layer
*
- * Internal flag:
+ * Internal flags:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
+ * BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
+ * zeroes in file child of current block node inside
+ * returned region. Only valid together with both
+ * BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
+ * appear with BDRV_BLOCK_ZERO.
*
* If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
* host offset within the returned BDS that is allocated for the
#define BDRV_BLOCK_RAW 0x08
#define BDRV_BLOCK_ALLOCATED 0x10
#define BDRV_BLOCK_EOF 0x20
+#define BDRV_BLOCK_RECURSE 0x40
#define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK
typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
typedef struct BDRVReopenState {
BlockDriverState *bs;
int flags;
+ BlockdevDetectZeroesOptions detect_zeroes;
+ bool backing_missing;
+ bool replace_backing_bs; /* new_backing_bs is ignored if this is false */
+ BlockDriverState *new_backing_bs; /* If NULL then detach the current bs */
uint64_t perm, shared_perm;
QDict *options;
QDict *explicit_options;
* This permission (which is weaker than BLK_PERM_WRITE) is both enough and
* required for writes to the block node when the caller promises that
* the visible disk content doesn't change.
+ *
+ * As the BLK_PERM_WRITE permission is strictly stronger, either is
+ * sufficient to perform an unchanging write.
*/
BLK_PERM_WRITE_UNCHANGED = 0x04,
BLK_PERM_GRAPH_MOD = 0x10,
BLK_PERM_ALL = 0x1f,
+
+ DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
+ | BLK_PERM_WRITE
+ | BLK_PERM_WRITE_UNCHANGED
+ | BLK_PERM_RESIZE,
+
+ DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
};
char *bdrv_perm_names(uint64_t perm);
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp);
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
- BlockDriverState *bs,
- QDict *options, int flags);
-int bdrv_reopen_multiple(AioContext *ctx, BlockReopenQueue *bs_queue, Error **errp);
-int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+ BlockDriverState *bs, QDict *options,
+ bool keep_old_opts);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
+ Error **errp);
int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
BlockReopenQueue *queue, Error **errp);
void bdrv_reopen_commit(BDRVReopenState *reopen_state);
void bdrv_reopen_abort(BDRVReopenState *reopen_state);
-int bdrv_read(BdrvChild *child, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
-int bdrv_write(BdrvChild *child, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int bytes, BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
int bdrv_pwritev(BdrvChild *child, int64_t offset, QEMUIOVector *qiov);
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int count);
-int coroutine_fn bdrv_co_readv(BdrvChild *child, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
-int coroutine_fn bdrv_co_writev(BdrvChild *child, int64_t sector_num,
- int nb_sectors, QEMUIOVector *qiov);
/*
* Efficiently zero a region of the disk image. Note that this is a regular
* I/O request like read or write and should have a reasonable size. This
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
void bdrv_refresh_filename(BlockDriverState *bs);
+
+int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset,
+ PreallocMode prealloc, Error **errp);
int bdrv_truncate(BdrvChild *child, int64_t offset, PreallocMode prealloc,
Error **errp);
+
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs);
BlockDriverState *bdrv_find_base(BlockDriverState *bs);
+bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
+ Error **errp);
+void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
typedef struct BdrvCheckResult {
typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
int64_t total_work_size, void *opaque);
int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
- BlockDriverAmendStatusCB *status_cb, void *cb_opaque);
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ Error **errp);
/* external snapshots */
bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs,
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
-/* Returns NULL when bs == NULL */
-AioWait *bdrv_get_aio_wait(BlockDriverState *bs);
-
#define BDRV_POLL_WHILE(bs, cond) ({ \
BlockDriverState *bs_ = (bs); \
- AIO_WAIT_WHILE(bdrv_get_aio_wait(bs_), \
- bdrv_get_aio_context(bs_), \
+ AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
cond); })
-int bdrv_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
-int bdrv_co_pdiscard(BlockDriverState *bs, int64_t offset, int bytes);
+int bdrv_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
+int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs);
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
- int64_t offset, int64_t bytes, int64_t *pnum);
+ bool include_base, int64_t offset, int64_t bytes,
+ int64_t *pnum);
bool bdrv_is_read_only(BlockDriverState *bs);
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp);
-int bdrv_set_read_only(BlockDriverState *bs, bool read_only, Error **errp);
+int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
+ Error **errp);
+bool bdrv_is_writable(BlockDriverState *bs);
bool bdrv_is_sg(BlockDriverState *bs);
bool bdrv_is_inserted(BlockDriverState *bs);
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
const char *bdrv_get_format_name(BlockDriverState *bs);
BlockDriverState *bdrv_find_node(const char *node_name);
BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp);
+XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
BlockDriverState *bdrv_lookup_bs(const char *device,
const char *node_name,
Error **errp);
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
+BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
typedef struct BdrvNextIterator {
enum {
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
bool bdrv_is_encrypted(BlockDriverState *bs);
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
- void *opaque);
+ void *opaque, bool read_only);
const char *bdrv_get_node_name(const BlockDriverState *bs);
const char *bdrv_get_device_name(const BlockDriverState *bs);
const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
int bdrv_get_flags(BlockDriverState *bs);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
-ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs);
+ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
+ Error **errp);
void bdrv_round_to_clusters(BlockDriverState *bs,
int64_t offset, int64_t bytes,
int64_t *cluster_offset,
int64_t *cluster_bytes);
-const char *bdrv_get_encrypted_filename(BlockDriverState *bs);
void bdrv_get_backing_filename(BlockDriverState *bs,
char *filename, int filename_size);
-void bdrv_get_full_backing_filename(BlockDriverState *bs,
- char *dest, size_t sz, Error **errp);
-void bdrv_get_full_backing_filename_from_filename(const char *backed,
- const char *backing,
- char *dest, size_t sz,
- Error **errp);
+char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
+char *bdrv_get_full_backing_filename_from_filename(const char *backed,
+ const char *backing,
+ Error **errp);
+char *bdrv_dirname(BlockDriverState *bs, Error **errp);
int path_has_protocol(const char *path);
int path_is_absolute(const char *path);
-void path_combine(char *dest, int dest_size,
- const char *base_path,
- const char *filename);
+char *path_combine(const char *base_path, const char *filename);
int bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
*/
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
-/**
- * bdrv_set_aio_context:
- *
- * Changes the #AioContext used for fd handlers, timers, and BHs by this
- * BlockDriverState and all its children.
- *
- * This function must be called with iothread lock held.
- */
-void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context);
+void bdrv_set_aio_context_ignore(BlockDriverState *bs,
+ AioContext *new_context, GSList **ignore);
+int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ Error **errp);
+int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ BdrvChild *ignore_child, Error **errp);
+bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
+ GSList **ignore, Error **errp);
+bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
+ GSList **ignore, Error **errp);
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
* Begin a quiesced section of all users of @bs. This is part of
* bdrv_drained_begin.
*/
-void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore);
+void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents);
+
+/**
+ * bdrv_parent_drained_begin_single:
+ *
+ * Begin a quiesced section for the parent of @c. If @poll is true, wait for
+ * any pending activity to cease.
+ */
+void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
/**
* bdrv_parent_drained_end:
* End a quiesced section of all users of @bs. This is part of
* bdrv_drained_end.
*/
-void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore);
+void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
+ bool ignore_bds_parents);
+
+/**
+ * bdrv_drain_poll:
+ *
+ * Poll for pending requests in @bs, its parents (except for @ignore_parent),
+ * and if @recursive is true its children as well (used for subtree drain).
+ *
+ * If @ignore_bds_parents is true, parents that are BlockDriverStates must
+ * ignore the drain request because they will be drained separately (used for
+ * drain_all).
+ *
+ * This is part of bdrv_drained_begin.
+ */
+bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
+ BdrvChild *ignore_parent, bool ignore_bds_parents);
/**
* bdrv_drained_begin:
*/
void bdrv_drained_begin(BlockDriverState *bs);
+/**
+ * bdrv_do_drained_begin_quiesce:
+ *
+ * Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
+ * running requests to complete.
+ */
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
+ BdrvChild *parent, bool ignore_bds_parents);
+
/**
* Like bdrv_drained_begin, but recursively begins a quiesced section for
* exclusive access to all child nodes as well.
*/
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
+
+/**
+ *
+ * bdrv_co_copy_range:
+ *
+ * Do offloaded copy between two children. If the operation is not implemented
+ * by the driver, or if the backend storage doesn't support it, a negative
+ * error code will be returned.
+ *
+ * Note: block layer doesn't emulate or fallback to a bounce buffer approach
+ * because usually the caller shouldn't attempt offloaded copy any more (e.g.
+ * calling copy_file_range(2)) after the first error, thus it should fall back
+ * to a read+write path in the caller level.
+ *
+ * @src: Source child to copy data from
+ * @src_offset: offset in @src image to read data
+ * @dst: Destination child to copy data to
+ * @dst_offset: offset in @dst image to write data
+ * @bytes: number of bytes to copy
+ * @flags: request flags. Supported flags:
+ * BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
+ * write on @dst as if bdrv_co_pwrite_zeroes is
+ * called. Used to simplify caller code, or
+ * during BlockDriver.bdrv_co_copy_range_from()
+ * recursion.
+ * BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
+ * requests currently in flight.
+ *
+ * Returns: 0 if succeeded; negative error code if failed.
+ **/
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
+ BdrvChild *dst, uint64_t dst_offset,
+ uint64_t bytes, BdrvRequestFlags read_flags,
+ BdrvRequestFlags write_flags);
#endif