#include "block/accounting.h"
#include "block/block.h"
-#include "block/throttle-groups.h"
#include "qemu/option.h"
#include "qemu/queue.h"
#include "qemu/coroutine.h"
#include "qemu/throttle.h"
#define BLOCK_FLAG_ENCRYPT 1
-#define BLOCK_FLAG_COMPAT6 4
#define BLOCK_FLAG_LAZY_REFCOUNTS 8
#define BLOCK_OPT_SIZE "size"
#define BLOCK_OPT_ENCRYPT "encryption"
#define BLOCK_OPT_COMPAT6 "compat6"
+#define BLOCK_OPT_HWVERSION "hwversion"
#define BLOCK_OPT_BACKING_FILE "backing_file"
#define BLOCK_OPT_BACKING_FMT "backing_fmt"
#define BLOCK_OPT_CLUSTER_SIZE "cluster_size"
Error **errp);
int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags,
Error **errp);
- int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
- uint8_t *buf, int nb_sectors);
- int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num,
- const uint8_t *buf, int nb_sectors);
void (*bdrv_close)(BlockDriverState *bs);
int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp);
int (*bdrv_set_key)(BlockDriverState *bs, const char *key);
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs,
BlockCompletionFunc *cb, void *opaque);
- BlockAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors,
+ BlockAIOCB *(*bdrv_aio_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int count,
BlockCompletionFunc *cb, void *opaque);
int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, QEMUIOVector *qiov);
+ int coroutine_fn (*bdrv_co_writev_flags)(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags);
+ int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags);
+
/*
* Efficiently zero a region of the disk image. Typically an image format
* would use a compact metadata representation to implement this. This
- * function pointer may be NULL and .bdrv_co_writev() will be called
- * instead.
+ * function pointer may be NULL or return -ENOSUP and .bdrv_co_writev()
+ * will be called instead.
*/
- int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors, BdrvRequestFlags flags);
- int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs,
- int64_t sector_num, int nb_sectors);
+ int coroutine_fn (*bdrv_co_pwrite_zeroes)(BlockDriverState *bs,
+ int64_t offset, int count, BdrvRequestFlags flags);
+ int coroutine_fn (*bdrv_co_pdiscard)(BlockDriverState *bs,
+ int64_t offset, int count);
int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs,
int64_t sector_num, int nb_sectors, int *pnum,
BlockDriverState **file);
int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi);
ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs);
- int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov,
- int64_t pos);
- int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf,
- int64_t pos, int size);
+ int coroutine_fn (*bdrv_save_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
+ int coroutine_fn (*bdrv_load_vmstate)(BlockDriverState *bs,
+ QEMUIOVector *qiov,
+ int64_t pos);
int (*bdrv_change_backing_file)(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt);
/* io queue for linux-aio */
void (*bdrv_io_plug)(BlockDriverState *bs);
void (*bdrv_io_unplug)(BlockDriverState *bs);
- void (*bdrv_flush_io_queue)(BlockDriverState *bs);
/**
* Try to get @bs's logical and physical block size.
*/
void (*bdrv_drain)(BlockDriverState *bs);
+ void (*bdrv_add_child)(BlockDriverState *parent, BlockDriverState *child,
+ Error **errp);
+ void (*bdrv_del_child)(BlockDriverState *parent, BdrvChild *child,
+ Error **errp);
+
QLIST_ENTRY(BlockDriver) list;
};
typedef struct BlockLimits {
- /* maximum number of sectors that can be discarded at once */
- int max_discard;
-
- /* optimal alignment for discard requests in sectors */
- int64_t discard_alignment;
-
- /* maximum number of sectors that can zeroized at once */
- int max_write_zeroes;
-
- /* optimal alignment for write zeroes requests in sectors */
- int64_t write_zeroes_alignment;
-
- /* optimal transfer length in sectors */
- int opt_transfer_length;
-
- /* maximal transfer length in sectors */
- int max_transfer_length;
-
- /* memory alignment so that no bounce buffer is needed */
+ /* Alignment requirement, in bytes, for offset/length of I/O
+ * requests. Must be a power of 2 less than INT_MAX; defaults to
+ * 1 for drivers with modern byte interfaces, and to 512
+ * otherwise. */
+ uint32_t request_alignment;
+
+ /* Maximum number of bytes that can be discarded at once (since it
+ * is signed, it must be < 2G, if set). Must be multiple of
+ * pdiscard_alignment, but need not be power of 2. May be 0 if no
+ * inherent 32-bit limit */
+ int32_t max_pdiscard;
+
+ /* Optimal alignment for discard requests in bytes. A power of 2
+ * is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pdiscard if
+ * that is set. May be 0 if bl.request_alignment is good enough */
+ uint32_t pdiscard_alignment;
+
+ /* Maximum number of bytes that can zeroized at once (since it is
+ * signed, it must be < 2G, if set). Must be multiple of
+ * pwrite_zeroes_alignment. May be 0 if no inherent 32-bit limit */
+ int32_t max_pwrite_zeroes;
+
+ /* Optimal alignment for write zeroes requests in bytes. A power
+ * of 2 is best but not mandatory. Must be a multiple of
+ * bl.request_alignment, and must be less than max_pwrite_zeroes
+ * if that is set. May be 0 if bl.request_alignment is good
+ * enough */
+ uint32_t pwrite_zeroes_alignment;
+
+ /* Optimal transfer length in bytes. A power of 2 is best but not
+ * mandatory. Must be a multiple of bl.request_alignment, or 0 if
+ * no preferred size */
+ uint32_t opt_transfer;
+
+ /* Maximal transfer length in bytes. Need not be power of 2, but
+ * must be multiple of opt_transfer and bl.request_alignment, or 0
+ * for no 32-bit limit. For now, anything larger than INT_MAX is
+ * clamped down. */
+ uint32_t max_transfer;
+
+ /* memory alignment, in bytes so that no bounce buffer is needed */
size_t min_mem_alignment;
- /* memory alignment for bounce buffer */
+ /* memory alignment, in bytes, for bounce buffer */
size_t opt_mem_alignment;
/* maximum number of iovec elements */
void (*detach_aio_context)(void *opaque);
void *opaque;
+ bool deleted;
QLIST_ENTRY(BdrvAioNotifier) list;
} BdrvAioNotifier;
struct BdrvChildRole {
void (*inherit_options)(int *child_flags, QDict *child_options,
int parent_flags, QDict *parent_options);
+
+ void (*change_media)(BdrvChild *child, bool load);
+ void (*resize)(BdrvChild *child);
+
+ /* Returns a name that is supposedly more useful for human users than the
+ * node name for identifying the node in question (in particular, a BB
+ * name), or NULL if the parent can't provide a better name. */
+ const char* (*get_name)(BdrvChild *child);
+
+ /*
+ * If this pair of functions is implemented, the parent doesn't issue new
+ * requests after returning from .drained_begin() until .drained_end() is
+ * called.
+ *
+ * Note that this can be nested. If drained_begin() was called twice, new
+ * I/O is allowed only after drained_end() was called twice, too.
+ */
+ void (*drained_begin)(BdrvChild *child);
+ void (*drained_end)(BdrvChild *child);
};
extern const BdrvChildRole child_file;
BlockDriverState *bs;
char *name;
const BdrvChildRole *role;
+ void *opaque;
QLIST_ENTRY(BdrvChild) next;
QLIST_ENTRY(BdrvChild) next_parent;
};
struct BlockDriverState {
int64_t total_sectors; /* if we are reading a disk image, give its
size in sectors */
- int read_only; /* if true, the media is read only */
int open_flags; /* flags used to open the file, re-used for re-open */
- int encrypted; /* if true, the media is encrypted */
- int valid_key; /* if true, a valid encryption key has been set */
- int sg; /* if true, the device is a /dev/sg* */
- int copy_on_read; /* if true, copy read backing sectors into image
+ bool read_only; /* if true, the media is read only */
+ bool encrypted; /* if true, the media is encrypted */
+ bool valid_key; /* if true, a valid encryption key has been set */
+ bool sg; /* if true, the device is a /dev/sg* */
+ bool probed; /* if true, format was probed rather than specified */
+
+ int copy_on_read; /* if nonzero, copy read backing sectors into image.
note this is a reference count */
- bool probed;
+
+ CoQueue flush_queue; /* Serializing flush queue */
+ unsigned int write_gen; /* Current data generation */
+ unsigned int flush_started_gen; /* Generation for which flush has started */
+ unsigned int flushed_gen; /* Flushed write generation */
BlockDriver *drv; /* NULL means no media */
void *opaque;
- BlockBackend *blk; /* owning backend, if any */
-
AioContext *aio_context; /* event loop used for fd handlers, timers, etc */
/* long-running tasks intended to always use the same AioContext as this
* BDS may register themselves in this list to be notified of changes
* regarding this BDS's context */
QLIST_HEAD(, BdrvAioNotifier) aio_notifiers;
+ bool walking_aio_notifiers; /* to make removal during iteration safe */
char filename[PATH_MAX];
char backing_file[PATH_MAX]; /* if non zero, the image is a diff of
/* number of in-flight serialising requests */
unsigned int serialising_in_flight;
- /* I/O throttling.
- * throttle_state tells us if this BDS has I/O limits configured.
- * io_limits_enabled tells us if they are currently being
- * enforced, but it can be temporarily set to false */
- CoQueue throttled_reqs[2];
- bool io_limits_enabled;
- /* The following fields are protected by the ThrottleGroup lock.
- * See the ThrottleGroup documentation for details. */
- ThrottleState *throttle_state;
- ThrottleTimers throttle_timers;
- unsigned pending_reqs[2];
- QLIST_ENTRY(BlockDriverState) round_robin;
-
/* Offset after the highest byte written to */
uint64_t wr_highest_offset;
/* I/O Limits */
BlockLimits bl;
- /* Whether produces zeros when read beyond eof */
- bool zero_beyond_eof;
-
- /* Alignment requirement for offset/length of I/O requests */
- unsigned int request_alignment;
-
- /* do we need to tell the quest if we have a volatile write cache? */
- int enable_write_cache;
+ /* Flags honored during pwrite (so far: BDRV_REQ_FUA) */
+ unsigned int supported_write_flags;
+ /* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
+ * BDRV_REQ_MAY_UNMAP) */
+ unsigned int supported_zero_flags;
/* the following member gives a name to every node on the bs graph. */
char node_name[32];
uint64_t write_threshold_offset;
NotifierWithReturn write_threshold_notifier;
+ /* counters for nested bdrv_io_plug and bdrv_io_unplugged_begin */
+ unsigned io_plugged;
+ unsigned io_plug_disabled;
+
int quiesce_counter;
};
int open_flags;
bool read_only;
BlockdevDetectZeroesOptions detect_zeroes;
-
- char *throttle_group;
- ThrottleState *throttle_state;
};
+typedef enum BlockMirrorBackingMode {
+ /* Reuse the existing backing chain from the source for the target.
+ * - sync=full: Set backing BDS to NULL.
+ * - sync=top: Use source's backing BDS.
+ * - sync=none: Use source as the backing BDS. */
+ MIRROR_SOURCE_BACKING_CHAIN,
+
+ /* Open the target's backing chain completely anew */
+ MIRROR_OPEN_BACKING_CHAIN,
+
+ /* Do not change the target's backing BDS after job completion */
+ MIRROR_LEAVE_BACKING_CHAIN,
+} BlockMirrorBackingMode;
+
static inline BlockDriverState *backing_bs(BlockDriverState *bs)
{
return bs->backing ? bs->backing->bs : NULL;
*/
void bdrv_setup_io_funcs(BlockDriver *bdrv);
-int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
+int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
-int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
+int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
const char *filename);
-void bdrv_set_io_limits(BlockDriverState *bs,
- ThrottleConfig *cfg);
-
/**
* bdrv_add_before_write_notifier:
/**
* stream_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
* @bs: Block device to operate on.
* @base: Block device that will become the new base, or %NULL to
* flatten the whole backing file chain onto @bs.
- * @base_id: The file name that will be written to @bs as the new
- * backing file if the job completes. Ignored if @base is %NULL.
+ * @backing_file_str: The file name that will be written to @bs as the
+ * the new backing file if the job completes. Ignored if @base is %NULL.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @cb: Completion function for the job.
* in @bs, but allocated in any image between @base and @bs (both
* exclusive) will be written to @bs. At the end of a successful
* streaming job, the backing file of @bs will be changed to
- * @base_id in the written image and to @base in the live BlockDriverState.
+ * @backing_file_str in the written image and to @base in the live
+ * BlockDriverState.
*/
-void stream_start(BlockDriverState *bs, BlockDriverState *base,
- const char *base_id, int64_t speed, BlockdevOnError on_error,
- BlockCompletionFunc *cb,
- void *opaque, Error **errp);
+void stream_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, const char *backing_file_str,
+ int64_t speed, BlockdevOnError on_error,
+ BlockCompletionFunc *cb, void *opaque, Error **errp);
/**
* commit_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
* @bs: Active block device.
* @top: Top block device to be committed.
* @base: Block device that will be written into, and become the new top.
* @errp: Error object.
*
*/
-void commit_start(BlockDriverState *bs, BlockDriverState *base,
- BlockDriverState *top, int64_t speed,
- BlockdevOnError on_error, BlockCompletionFunc *cb,
- void *opaque, const char *backing_file_str, Error **errp);
+void commit_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, BlockDriverState *top, int64_t speed,
+ BlockdevOnError on_error, BlockCompletionFunc *cb,
+ void *opaque, const char *backing_file_str, Error **errp);
/**
* commit_active_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
* @bs: Active block device to be committed.
* @base: Block device that will be written into, and become the new top.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @errp: Error object.
*
*/
-void commit_active_start(BlockDriverState *bs, BlockDriverState *base,
- int64_t speed,
+void commit_active_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *base, int64_t speed,
BlockdevOnError on_error,
BlockCompletionFunc *cb,
void *opaque, Error **errp);
/*
* mirror_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
* @bs: Block device to operate on.
* @target: Block device to write to.
* @replaces: Block graph node name to replace once the mirror is done. Can
* @granularity: The chosen granularity for the dirty bitmap.
* @buf_size: The amount of data that can be in flight at one time.
* @mode: Whether to collapse all images in the chain to the target.
+ * @backing_mode: How to establish the target's backing chain after completion.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
* @unmap: Whether to unmap target where source sectors only contain zeroes.
* manually completed. At the end of a successful mirroring job,
* @bs will be switched to read from @target.
*/
-void mirror_start(BlockDriverState *bs, BlockDriverState *target,
- const char *replaces,
+void mirror_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, const char *replaces,
int64_t speed, uint32_t granularity, int64_t buf_size,
- MirrorSyncMode mode, BlockdevOnError on_source_error,
+ MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
+ BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap,
BlockCompletionFunc *cb,
/*
* backup_start:
+ * @job_id: The id of the newly-created job, or %NULL to use the
+ * device name of @bs.
* @bs: Block device to operate on.
* @target: Block device to write to.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* Start a backup operation on @bs. Clusters in @bs are written to @target
* until the job is cancelled or manually completed.
*/
-void backup_start(BlockDriverState *bs, BlockDriverState *target,
- int64_t speed, MirrorSyncMode sync_mode,
- BdrvDirtyBitmap *sync_bitmap,
+void backup_start(const char *job_id, BlockDriverState *bs,
+ BlockDriverState *target, int64_t speed,
+ MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
BlockCompletionFunc *cb, void *opaque,
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
- const BdrvChildRole *child_role);
+ const BdrvChildRole *child_role,
+ void *opaque);
void bdrv_root_unref_child(BdrvChild *child);
+const char *bdrv_get_parent_name(const BlockDriverState *bs);
void blk_dev_change_media_cb(BlockBackend *blk, bool load);
bool blk_dev_has_removable_media(BlockBackend *blk);
bool blk_dev_has_tray(BlockBackend *blk);
void blk_dev_eject_request(BlockBackend *blk, bool force);
bool blk_dev_is_tray_open(BlockBackend *blk);
bool blk_dev_is_medium_locked(BlockBackend *blk);
-void blk_dev_resize_cb(BlockBackend *blk);
-void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors);
+void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int64_t nr_sect);
bool bdrv_requests_pending(BlockDriverState *bs);
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);