#include "block/blockjob_int.h"
#include "block/block_int.h"
#include "block/coroutines.h"
+#include "block/dirty-bitmap.h"
#include "block/write-threshold.h"
#include "qemu/cutils.h"
#include "qemu/memalign.h"
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
int64_t offset, int64_t bytes, BdrvRequestFlags flags);
-static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c, *next;
QLIST_FOREACH_SAFE(c, &bs->parents, next_parent, next) {
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+ if (c == ignore) {
continue;
}
- bdrv_parent_drained_begin_single(c, false);
+ bdrv_parent_drained_begin_single(c);
}
}
void bdrv_parent_drained_end_single(BdrvChild *c)
{
- IO_OR_GS_CODE();
+ GLOBAL_STATE_CODE();
+
+ assert(c->quiesced_parent);
+ c->quiesced_parent = false;
- assert(c->parent_quiesce_counter > 0);
- c->parent_quiesce_counter--;
if (c->klass->drained_end) {
c->klass->drained_end(c);
}
}
-static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore,
- bool ignore_bds_parents)
+static void bdrv_parent_drained_end(BlockDriverState *bs, BdrvChild *ignore)
{
BdrvChild *c;
QLIST_FOREACH(c, &bs->parents, next_parent) {
- if (c == ignore || (ignore_bds_parents && c->klass->parent_is_bds)) {
+ if (c == ignore) {
continue;
}
bdrv_parent_drained_end_single(c);
}
}
-static bool bdrv_parent_drained_poll_single(BdrvChild *c)
+bool bdrv_parent_drained_poll_single(BdrvChild *c)
{
if (c->klass->drained_poll) {
return c->klass->drained_poll(c);
return busy;
}
-void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
+void bdrv_parent_drained_begin_single(BdrvChild *c)
{
- AioContext *ctx = bdrv_child_get_parent_aio_context(c);
- IO_OR_GS_CODE();
- c->parent_quiesce_counter++;
+ GLOBAL_STATE_CODE();
+
+ assert(!c->quiesced_parent);
+ c->quiesced_parent = true;
+
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
}
- if (poll) {
- AIO_WAIT_WHILE(ctx, bdrv_parent_drained_poll_single(c));
- }
}
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
bdrv_merge_limits(&bs->bl, &c->bs->bl);
have_limits = true;
}
+
+ if (c->role & BDRV_CHILD_FILTERED) {
+ bs->bl.has_variable_length |= c->bs->bl.has_variable_length;
+ }
}
if (!have_limits) {
bool begin;
bool poll;
BdrvChild *parent;
- bool ignore_bds_parents;
} BdrvCoDrainData;
/* Returns true if BDRV_POLL_WHILE() should go into a blocking aio_poll() */
bool bdrv_drain_poll(BlockDriverState *bs, BdrvChild *ignore_parent,
bool ignore_bds_parents)
{
- IO_OR_GS_CODE();
+ GLOBAL_STATE_CODE();
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
return true;
}
static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents, bool poll);
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents);
+ bool poll);
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent);
static void bdrv_co_drain_bh_cb(void *opaque)
{
aio_context_acquire(ctx);
bdrv_dec_in_flight(bs);
if (data->begin) {
- bdrv_do_drained_begin(bs, data->parent, data->ignore_bds_parents,
- data->poll);
+ bdrv_do_drained_begin(bs, data->parent, data->poll);
} else {
assert(!data->poll);
- bdrv_do_drained_end(bs, data->parent, data->ignore_bds_parents);
+ bdrv_do_drained_end(bs, data->parent);
}
aio_context_release(ctx);
} else {
static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
bool begin,
BdrvChild *parent,
- bool ignore_bds_parents,
bool poll)
{
BdrvCoDrainData data;
.done = false,
.begin = begin,
.parent = parent,
- .ignore_bds_parents = ignore_bds_parents,
.poll = poll,
};
if (ctx != co_ctx) {
aio_context_release(ctx);
}
- replay_bh_schedule_oneshot_event(ctx, bdrv_co_drain_bh_cb, &data);
+ replay_bh_schedule_oneshot_event(qemu_get_aio_context(),
+ bdrv_co_drain_bh_cb, &data);
qemu_coroutine_yield();
/* If we are resumed from some other event (such as an aio completion or a
* timer callback), it is a bug in the caller that should be fixed. */
assert(data.done);
- /* Reaquire the AioContext of bs if we dropped it */
+ /* Reacquire the AioContext of bs if we dropped it */
if (ctx != co_ctx) {
aio_context_acquire(ctx);
}
}
-void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
- BdrvChild *parent, bool ignore_bds_parents)
+static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
+ bool poll)
{
IO_OR_GS_CODE();
- assert(!qemu_in_coroutine());
-
- /* Stop things in parent-to-child order */
- if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
- aio_disable_external(bdrv_get_aio_context(bs));
- }
-
- bdrv_parent_drained_begin(bs, parent, ignore_bds_parents);
- if (bs->drv && bs->drv->bdrv_drain_begin) {
- bs->drv->bdrv_drain_begin(bs);
- }
-}
-static void bdrv_do_drained_begin(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents, bool poll)
-{
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, true, parent, ignore_bds_parents, poll);
+ bdrv_co_yield_to_drain(bs, true, parent, poll);
return;
}
- bdrv_do_drained_begin_quiesce(bs, parent, ignore_bds_parents);
+ GLOBAL_STATE_CODE();
+
+ /* Stop things in parent-to-child order */
+ if (qatomic_fetch_inc(&bs->quiesce_counter) == 0) {
+ bdrv_parent_drained_begin(bs, parent);
+ if (bs->drv && bs->drv->bdrv_drain_begin) {
+ bs->drv->bdrv_drain_begin(bs);
+ }
+ }
/*
* Wait for drained requests to finish.
* nodes.
*/
if (poll) {
- assert(!ignore_bds_parents);
BDRV_POLL_WHILE(bs, bdrv_drain_poll_top_level(bs, parent));
}
}
+void bdrv_do_drained_begin_quiesce(BlockDriverState *bs, BdrvChild *parent)
+{
+ bdrv_do_drained_begin(bs, parent, false);
+}
+
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_begin(bs, NULL, false, true);
+ bdrv_do_drained_begin(bs, NULL, true);
}
/**
* This function does not poll, nor must any of its recursively called
* functions.
*/
-static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent,
- bool ignore_bds_parents)
+static void bdrv_do_drained_end(BlockDriverState *bs, BdrvChild *parent)
{
int old_quiesce_counter;
+ IO_OR_GS_CODE();
+
if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(bs, false, parent, ignore_bds_parents, false);
+ bdrv_co_yield_to_drain(bs, false, parent, false);
return;
}
assert(bs->quiesce_counter > 0);
+ GLOBAL_STATE_CODE();
/* Re-enable things in child-to-parent order */
- if (bs->drv && bs->drv->bdrv_drain_end) {
- bs->drv->bdrv_drain_end(bs);
- }
- bdrv_parent_drained_end(bs, parent, ignore_bds_parents);
-
old_quiesce_counter = qatomic_fetch_dec(&bs->quiesce_counter);
if (old_quiesce_counter == 1) {
- aio_enable_external(bdrv_get_aio_context(bs));
+ if (bs->drv && bs->drv->bdrv_drain_end) {
+ bs->drv->bdrv_drain_end(bs);
+ }
+ bdrv_parent_drained_end(bs, parent);
}
}
void bdrv_drained_end(BlockDriverState *bs)
{
IO_OR_GS_CODE();
- bdrv_do_drained_end(bs, NULL, false);
+ bdrv_do_drained_end(bs, NULL);
}
void bdrv_drain(BlockDriverState *bs)
* NOTE: no new block jobs or BlockDriverStates can be created between
* the bdrv_drain_all_begin() and bdrv_drain_all_end() calls.
*/
-void bdrv_drain_all_begin(void)
+void bdrv_drain_all_begin_nopoll(void)
{
BlockDriverState *bs = NULL;
GLOBAL_STATE_CODE();
- if (qemu_in_coroutine()) {
- bdrv_co_yield_to_drain(NULL, true, NULL, true, true);
- return;
- }
-
/*
* bdrv queue is managed by record/replay,
* waiting for finishing the I/O requests may
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_begin(bs, NULL, true, false);
+ bdrv_do_drained_begin(bs, NULL, false);
aio_context_release(aio_context);
}
+}
+
+void bdrv_drain_all_begin(void)
+{
+ BlockDriverState *bs = NULL;
+
+ if (qemu_in_coroutine()) {
+ bdrv_co_yield_to_drain(NULL, true, NULL, true);
+ return;
+ }
+
+ /*
+ * bdrv queue is managed by record/replay,
+ * waiting for finishing the I/O requests may
+ * be infinite
+ */
+ if (replay_events_enabled()) {
+ return;
+ }
+
+ bdrv_drain_all_begin_nopoll();
/* Now poll the in-flight requests */
- AIO_WAIT_WHILE(NULL, bdrv_drain_all_poll());
+ AIO_WAIT_WHILE_UNLOCKED(NULL, bdrv_drain_all_poll());
while ((bs = bdrv_next_all_states(bs))) {
bdrv_drain_assert_idle(bs);
g_assert(!bs->refcnt);
while (bs->quiesce_counter) {
- bdrv_do_drained_end(bs, NULL, true);
+ bdrv_do_drained_end(bs, NULL);
}
}
AioContext *aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context);
- bdrv_do_drained_end(bs, NULL, true);
+ bdrv_do_drained_end(bs, NULL);
aio_context_release(aio_context);
}
qatomic_dec(&req->bs->serialising_in_flight);
}
- qemu_co_mutex_lock(&req->bs->reqs_lock);
+ qemu_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
+ qemu_mutex_unlock(&req->bs->reqs_lock);
+
+ /*
+ * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
+ * anymore because the request has been removed from the list, so it's safe
+ * to restart the queue outside reqs_lock to minimize the critical section.
+ */
qemu_co_queue_restart_all(&req->wait_queue);
- qemu_co_mutex_unlock(&req->bs->reqs_lock);
}
/**
qemu_co_queue_init(&req->wait_queue);
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
}
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
}
/**
- * Round a region to cluster boundaries
+ * Round a region to subcluster (if supported) or cluster boundaries
*/
-void bdrv_round_to_clusters(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- int64_t *cluster_offset,
- int64_t *cluster_bytes)
+void coroutine_fn GRAPH_RDLOCK
+bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *align_offset, int64_t *align_bytes)
{
BlockDriverInfo bdi;
IO_CODE();
- if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
- *cluster_offset = offset;
- *cluster_bytes = bytes;
+ if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) {
+ *align_offset = offset;
+ *align_bytes = bytes;
} else {
- int64_t c = bdi.cluster_size;
- *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
- *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ int64_t c = bdi.subcluster_size;
+ *align_offset = QEMU_ALIGN_DOWN(offset, c);
+ *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c);
}
}
-static int bdrv_get_cluster_size(BlockDriverState *bs)
+static int coroutine_fn GRAPH_RDLOCK bdrv_get_cluster_size(BlockDriverState *bs)
{
BlockDriverInfo bdi;
int ret;
- ret = bdrv_get_info(bs, &bdi);
+ ret = bdrv_co_get_info(bs, &bdi);
if (ret < 0 || bdi.cluster_size == 0) {
return bs->bl.request_alignment;
} else {
return;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
bdrv_wait_serialising_requests_locked(self);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
}
void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
{
IO_CODE();
- qemu_co_mutex_lock(&req->bs->reqs_lock);
+ qemu_mutex_lock(&req->bs->reqs_lock);
tracked_request_set_serialising(req, align);
bdrv_wait_serialising_requests_locked(req);
- qemu_co_mutex_unlock(&req->bs->reqs_lock);
+ qemu_mutex_unlock(&req->bs->reqs_lock);
}
int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
{
int ret;
IO_CODE();
+ assert_bdrv_graph_readable();
ret = bdrv_co_pwrite(child, offset, bytes, buf, flags);
if (ret < 0) {
aio_co_wake(co->coroutine);
}
-static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov,
- size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BlockDriver *drv = bs->drv;
int64_t sector_num;
unsigned int nb_sectors;
QEMUIOVector local_qiov;
int ret;
+ assert_bdrv_graph_readable();
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
assert(!(flags & ~bs->supported_read_flags));
return ret;
}
-static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
- int64_t offset, int64_t bytes,
- QEMUIOVector *qiov,
- size_t qiov_offset,
- BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
bool emulate_fua = false;
unsigned int nb_sectors;
QEMUIOVector local_qiov;
int ret;
+ assert_bdrv_graph_readable();
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
return ret;
}
-static int coroutine_fn
+static int coroutine_fn GRAPH_RDLOCK
bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset)
BlockDriver *drv = bs->drv;
QEMUIOVector local_qiov;
int ret;
+ assert_bdrv_graph_readable();
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
return ret;
}
-static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
- int64_t offset, int64_t bytes, QEMUIOVector *qiov,
- size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_copy_on_readv(BdrvChild *child, int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
void *bounce_buffer = NULL;
BlockDriver *drv = bs->drv;
- int64_t cluster_offset;
- int64_t cluster_bytes;
+ int64_t align_offset;
+ int64_t align_bytes;
int64_t skip_bytes;
int ret;
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
* BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
* is one reason we loop rather than doing it all at once.
*/
- bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- skip_bytes = offset - cluster_offset;
+ bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes);
+ skip_bytes = offset - align_offset;
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
- cluster_offset, cluster_bytes);
+ align_offset, align_bytes);
- while (cluster_bytes) {
+ while (align_bytes) {
int64_t pnum;
if (skip_write) {
ret = 1; /* "already allocated", so nothing will be copied */
- pnum = MIN(cluster_bytes, max_transfer);
+ pnum = MIN(align_bytes, max_transfer);
} else {
- ret = bdrv_is_allocated(bs, cluster_offset,
- MIN(cluster_bytes, max_transfer), &pnum);
+ ret = bdrv_is_allocated(bs, align_offset,
+ MIN(align_bytes, max_transfer), &pnum);
if (ret < 0) {
/*
* Safe to treat errors in querying allocation as if
* unallocated; we'll probably fail again soon on the
* read, but at least that will set a decent errno.
*/
- pnum = MIN(cluster_bytes, max_transfer);
+ pnum = MIN(align_bytes, max_transfer);
}
/* Stop at EOF if the image ends in the middle of the cluster */
/* Must copy-on-read; use the bounce buffer */
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
if (!bounce_buffer) {
- int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
+ int64_t max_we_need = MAX(pnum, align_bytes - pnum);
int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
}
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
- ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
+ ret = bdrv_driver_preadv(bs, align_offset, pnum,
&local_qiov, 0, 0);
if (ret < 0) {
goto err;
}
- bdrv_debug_event(bs, BLKDBG_COR_WRITE);
+ bdrv_co_debug_event(bs, BLKDBG_COR_WRITE);
if (drv->bdrv_co_pwrite_zeroes &&
buffer_is_zero(bounce_buffer, pnum)) {
/* FIXME: Should we (perhaps conditionally) be setting
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
* that still correctly reads as zero? */
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+ ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum,
BDRV_REQ_WRITE_UNCHANGED);
} else {
/* This does not change the data on the disk, it is not
* necessary to flush even in cache=writethrough mode.
*/
- ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
+ ret = bdrv_driver_pwritev(bs, align_offset, pnum,
&local_qiov, 0,
BDRV_REQ_WRITE_UNCHANGED);
}
}
}
- cluster_offset += pnum;
- cluster_bytes -= pnum;
+ align_offset += pnum;
+ align_bytes -= pnum;
progress += pnum - skip_bytes;
skip_bytes = 0;
}
* handles copy on read, zeroing after EOF, and fragmentation of large
* reads; any other features must be implemented by the caller.
*/
-static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
- BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
- int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
+ int64_t offset, int64_t bytes, int64_t align,
+ QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
}
/* Forward the request to the BlockDriver, possibly fragmenting it */
- total_bytes = bdrv_getlength(bs);
+ total_bytes = bdrv_co_getlength(bs);
if (total_bytes < 0) {
ret = total_bytes;
goto out;
* @merge_reads is true for small requests,
* if @buf_len == @head + bytes + @tail. In this case it is possible that both
* head and tail exist but @buf_len == align and @tail_buf == @buf.
+ *
+ * @write is true for write requests, false for read requests.
+ *
+ * If padding makes the vector too long (exceeding IOV_MAX), then we need to
+ * merge existing vector elements into a single one. @collapse_bounce_buf acts
+ * as the bounce buffer in such cases. @pre_collapse_qiov has the pre-collapse
+ * I/O vector elements so for read requests, the data can be copied back after
+ * the read is done.
*/
typedef struct BdrvRequestPadding {
uint8_t *buf;
size_t head;
size_t tail;
bool merge_reads;
+ bool write;
QEMUIOVector local_qiov;
+
+ uint8_t *collapse_bounce_buf;
+ size_t collapse_len;
+ QEMUIOVector pre_collapse_qiov;
} BdrvRequestPadding;
static bool bdrv_init_padding(BlockDriverState *bs,
int64_t offset, int64_t bytes,
+ bool write,
BdrvRequestPadding *pad)
{
int64_t align = bs->bl.request_alignment;
pad->tail_buf = pad->buf + pad->buf_len - align;
}
+ pad->write = write;
+
return true;
}
-static coroutine_fn int bdrv_padding_rmw_read(BdrvChild *child,
- BdrvTrackedRequest *req,
- BdrvRequestPadding *pad,
- bool zero_middle)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_padding_rmw_read(BdrvChild *child, BdrvTrackedRequest *req,
+ BdrvRequestPadding *pad, bool zero_middle)
{
QEMUIOVector local_qiov;
BlockDriverState *bs = child->bs;
qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
if (pad->head) {
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
}
if (pad->merge_reads && pad->tail) {
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
}
ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
align, &local_qiov, 0, 0);
return ret;
}
if (pad->head) {
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
}
if (pad->merge_reads && pad->tail) {
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
}
if (pad->merge_reads) {
if (pad->tail) {
qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
ret = bdrv_aligned_preadv(
child, req,
req->overlap_offset + req->overlap_bytes - align,
if (ret < 0) {
return ret;
}
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
}
zero_mem:
return 0;
}
-static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+/**
+ * Free *pad's associated buffers, and perform any necessary finalization steps.
+ */
+static void bdrv_padding_finalize(BdrvRequestPadding *pad)
{
+ if (pad->collapse_bounce_buf) {
+ if (!pad->write) {
+ /*
+ * If padding required elements in the vector to be collapsed into a
+ * bounce buffer, copy the bounce buffer content back
+ */
+ qemu_iovec_from_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_vfree(pad->collapse_bounce_buf);
+ qemu_iovec_destroy(&pad->pre_collapse_qiov);
+ }
if (pad->buf) {
qemu_vfree(pad->buf);
qemu_iovec_destroy(&pad->local_qiov);
memset(pad, 0, sizeof(*pad));
}
+/*
+ * Create pad->local_qiov by wrapping @iov in the padding head and tail, while
+ * ensuring that the resulting vector will not exceed IOV_MAX elements.
+ *
+ * To ensure this, when necessary, the first two or three elements of @iov are
+ * merged into pad->collapse_bounce_buf and replaced by a reference to that
+ * bounce buffer in pad->local_qiov.
+ *
+ * After performing a read request, the data from the bounce buffer must be
+ * copied back into pad->pre_collapse_qiov (e.g. by bdrv_padding_finalize()).
+ */
+static int bdrv_create_padded_qiov(BlockDriverState *bs,
+ BdrvRequestPadding *pad,
+ struct iovec *iov, int niov,
+ size_t iov_offset, size_t bytes)
+{
+ int padded_niov, surplus_count, collapse_count;
+
+ /* Assert this invariant */
+ assert(niov <= IOV_MAX);
+
+ /*
+ * Cannot pad if resulting length would exceed SIZE_MAX. Returning an error
+ * to the guest is not ideal, but there is little else we can do. At least
+ * this will practically never happen on 64-bit systems.
+ */
+ if (SIZE_MAX - pad->head < bytes ||
+ SIZE_MAX - pad->head - bytes < pad->tail)
+ {
+ return -EINVAL;
+ }
+
+ /* Length of the resulting IOV if we just concatenated everything */
+ padded_niov = !!pad->head + niov + !!pad->tail;
+
+ qemu_iovec_init(&pad->local_qiov, MIN(padded_niov, IOV_MAX));
+
+ if (pad->head) {
+ qemu_iovec_add(&pad->local_qiov, pad->buf, pad->head);
+ }
+
+ /*
+ * If padded_niov > IOV_MAX, we cannot just concatenate everything.
+ * Instead, merge the first two or three elements of @iov to reduce the
+ * number of vector elements as necessary.
+ */
+ if (padded_niov > IOV_MAX) {
+ /*
+ * Only head and tail can have lead to the number of entries exceeding
+ * IOV_MAX, so we can exceed it by the head and tail at most. We need
+ * to reduce the number of elements by `surplus_count`, so we merge that
+ * many elements plus one into one element.
+ */
+ surplus_count = padded_niov - IOV_MAX;
+ assert(surplus_count <= !!pad->head + !!pad->tail);
+ collapse_count = surplus_count + 1;
+
+ /*
+ * Move the elements to collapse into `pad->pre_collapse_qiov`, then
+ * advance `iov` (and associated variables) by those elements.
+ */
+ qemu_iovec_init(&pad->pre_collapse_qiov, collapse_count);
+ qemu_iovec_concat_iov(&pad->pre_collapse_qiov, iov,
+ collapse_count, iov_offset, SIZE_MAX);
+ iov += collapse_count;
+ iov_offset = 0;
+ niov -= collapse_count;
+ bytes -= pad->pre_collapse_qiov.size;
+
+ /*
+ * Construct the bounce buffer to match the length of the to-collapse
+ * vector elements, and for write requests, initialize it with the data
+ * from those elements. Then add it to `pad->local_qiov`.
+ */
+ pad->collapse_len = pad->pre_collapse_qiov.size;
+ pad->collapse_bounce_buf = qemu_blockalign(bs, pad->collapse_len);
+ if (pad->write) {
+ qemu_iovec_to_buf(&pad->pre_collapse_qiov, 0,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+ qemu_iovec_add(&pad->local_qiov,
+ pad->collapse_bounce_buf, pad->collapse_len);
+ }
+
+ qemu_iovec_concat_iov(&pad->local_qiov, iov, niov, iov_offset, bytes);
+
+ if (pad->tail) {
+ qemu_iovec_add(&pad->local_qiov,
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
+ }
+
+ assert(pad->local_qiov.niov == MIN(padded_niov, IOV_MAX));
+ return 0;
+}
+
/*
* bdrv_pad_request
*
* read of padding, bdrv_padding_rmw_read() should be called separately if
* needed.
*
+ * @write is true for write requests, false for read requests.
+ *
* Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
* - on function start they represent original request
* - on failure or when padding is not needed they are unchanged
static int bdrv_pad_request(BlockDriverState *bs,
QEMUIOVector **qiov, size_t *qiov_offset,
int64_t *offset, int64_t *bytes,
+ bool write,
BdrvRequestPadding *pad, bool *padded,
BdrvRequestFlags *flags)
{
int ret;
+ struct iovec *sliced_iov;
+ int sliced_niov;
+ size_t sliced_head, sliced_tail;
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
+ /* Should have been checked by the caller already */
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
+ if (ret < 0) {
+ return ret;
+ }
- if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
+ if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
*padded = false;
}
return 0;
}
- ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
- *qiov, *qiov_offset, *bytes,
- pad->buf + pad->buf_len - pad->tail,
- pad->tail);
+ sliced_iov = qemu_iovec_slice(*qiov, *qiov_offset, *bytes,
+ &sliced_head, &sliced_tail,
+ &sliced_niov);
+
+ /* Guaranteed by bdrv_check_request32() */
+ assert(*bytes <= SIZE_MAX);
+ ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
+ sliced_head, *bytes);
if (ret < 0) {
- bdrv_padding_destroy(pad);
+ bdrv_padding_finalize(pad);
return ret;
}
*bytes += pad->head + pad->tail;
trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
- if (!bdrv_is_inserted(bs)) {
+ if (!bdrv_co_is_inserted(bs)) {
return -ENOMEDIUM;
}
flags |= BDRV_REQ_COPY_ON_READ;
}
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- NULL, &flags);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, false,
+ &pad, NULL, &flags);
if (ret < 0) {
goto fail;
}
bs->bl.request_alignment,
qiov, qiov_offset, flags);
tracked_request_end(&req);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
fail:
bdrv_dec_in_flight(bs);
return ret;
}
-static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
- int64_t offset, int64_t bytes, BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
bs->bl.request_alignment);
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
+ assert_bdrv_graph_readable();
bdrv_check_request(offset, bytes, &error_abort);
if (!drv) {
return ret;
}
-static inline int coroutine_fn
+static inline int coroutine_fn GRAPH_RDLOCK
bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
BdrvTrackedRequest *req, int flags)
{
* Forwards an already correctly aligned write request to the BlockDriver,
* after possibly fragmenting it.
*/
-static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
- BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
- int64_t align, QEMUIOVector *qiov, size_t qiov_offset,
- BdrvRequestFlags flags)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_aligned_pwritev(BdrvChild *child, BdrvTrackedRequest *req,
+ int64_t offset, int64_t bytes, int64_t align,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
flags |= BDRV_REQ_MAY_UNMAP;
}
+
+ /* Can't use optimization hint with bufferless zero write */
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
}
if (ret < 0) {
/* Do nothing, write notifier decided to fail this request */
} else if (flags & BDRV_REQ_ZERO_WRITE) {
- bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
} else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
qiov, qiov_offset);
} else if (bytes <= max_transfer) {
- bdrv_debug_event(bs, BLKDBG_PWRITEV);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV);
ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
} else {
- bdrv_debug_event(bs, BLKDBG_PWRITEV);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV);
while (bytes_remaining) {
int num = MIN(bytes_remaining, max_transfer);
int local_flags = flags;
bytes_remaining -= num;
}
}
- bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
+ bdrv_co_debug_event(bs, BLKDBG_PWRITEV_DONE);
if (ret >= 0) {
ret = 0;
return ret;
}
-static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
- int64_t offset,
- int64_t bytes,
- BdrvRequestFlags flags,
- BdrvTrackedRequest *req)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_do_zero_pwritev(BdrvChild *child, int64_t offset, int64_t bytes,
+ BdrvRequestFlags flags, BdrvTrackedRequest *req)
{
BlockDriverState *bs = child->bs;
QEMUIOVector local_qiov;
/* This flag doesn't make sense for padding or zero writes */
flags &= ~BDRV_REQ_REGISTERED_BUF;
- padding = bdrv_init_padding(bs, offset, bytes, &pad);
+ padding = bdrv_init_padding(bs, offset, bytes, true, &pad);
if (padding) {
assert(!(flags & BDRV_REQ_NO_WAIT));
bdrv_make_request_serialising(req, align);
}
out:
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
return ret;
}
trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
- if (!bdrv_is_inserted(bs)) {
+ if (!bdrv_co_is_inserted(bs)) {
return -ENOMEDIUM;
}
* bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
* alignment only if there is no ZERO flag.
*/
- ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
- &padded, &flags);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, true,
+ &pad, &padded, &flags);
if (ret < 0) {
return ret;
}
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
qiov, qiov_offset, flags);
- bdrv_padding_destroy(&pad);
+ bdrv_padding_finalize(&pad);
out:
tracked_request_end(&req);
{
IO_CODE();
trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
+ assert_bdrv_graph_readable();
if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
flags &= ~BDRV_REQ_MAY_UNMAP;
* BDRV_BLOCK_OFFSET_VALID bit is set, 'map' and 'file' (if non-NULL) are
* set to the host mapping and BDS corresponding to the guest offset.
*/
-static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
- bool want_zero,
- int64_t offset, int64_t bytes,
- int64_t *pnum, int64_t *map,
- BlockDriverState **file)
+static int coroutine_fn GRAPH_RDLOCK
+bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map, BlockDriverState **file)
{
int64_t total_size;
int64_t n; /* bytes */
bool has_filtered_child;
assert(pnum);
+ assert_bdrv_graph_readable();
*pnum = 0;
- total_size = bdrv_getlength(bs);
+ total_size = bdrv_co_getlength(bs);
if (total_size < 0) {
ret = total_size;
goto early_out;
bytes = n;
}
- /* Must be non-NULL or bdrv_getlength() would have failed */
+ /* Must be non-NULL or bdrv_co_getlength() would have failed */
assert(bs->drv);
has_filtered_child = bdrv_filter_child(bs);
if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
if (!cow_bs) {
ret |= BDRV_BLOCK_ZERO;
} else if (want_zero) {
- int64_t size2 = bdrv_getlength(cow_bs);
+ int64_t size2 = bdrv_co_getlength(cow_bs);
if (size2 >= 0 && offset >= size2) {
ret |= BDRV_BLOCK_ZERO;
IO_CODE();
assert(!include_base || base); /* Can't include NULL base */
+ assert_bdrv_graph_readable();
if (!depth) {
depth = &dummy;
return ret;
}
+int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
+ BlockDriverState *base,
+ int64_t offset, int64_t bytes,
+ int64_t *pnum, int64_t *map,
+ BlockDriverState **file)
+{
+ IO_CODE();
+ return bdrv_co_common_block_status_above(bs, base, false, true, offset,
+ bytes, pnum, map, file, NULL);
+}
+
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
return (pnum == bytes) && (ret & BDRV_BLOCK_ZERO);
}
+int coroutine_fn bdrv_co_is_allocated(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, int64_t *pnum)
+{
+ int ret;
+ int64_t dummy;
+ IO_CODE();
+
+ ret = bdrv_co_common_block_status_above(bs, bs, true, false, offset,
+ bytes, pnum ? pnum : &dummy, NULL,
+ NULL, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+ return !!(ret & BDRV_BLOCK_ALLOCATED);
+}
+
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum)
{
return !!(ret & BDRV_BLOCK_ALLOCATED);
}
+/* See bdrv_is_allocated_above for documentation */
+int coroutine_fn bdrv_co_is_allocated_above(BlockDriverState *top,
+ BlockDriverState *base,
+ bool include_base, int64_t offset,
+ int64_t bytes, int64_t *pnum)
+{
+ int depth;
+ int ret;
+ IO_CODE();
+
+ ret = bdrv_co_common_block_status_above(top, base, include_base, false,
+ offset, bytes, pnum, NULL, NULL,
+ &depth);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (ret & BDRV_BLOCK_ALLOCATED) {
+ return depth;
+ }
+ return 0;
+}
+
/*
* Given an image chain: ... -> [BASE] -> [INTER1] -> [INTER2] -> [TOP]
*
int64_t bytes, int64_t *pnum)
{
int depth;
- int ret = bdrv_common_block_status_above(top, base, include_base, false,
- offset, bytes, pnum, NULL, NULL,
- &depth);
+ int ret;
IO_CODE();
+
+ ret = bdrv_common_block_status_above(top, base, include_base, false,
+ offset, bytes, pnum, NULL, NULL,
+ &depth);
if (ret < 0) {
return ret;
}
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
IO_CODE();
+ assert_bdrv_graph_readable();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
bdrv_inc_in_flight(bs);
- if (drv->bdrv_load_vmstate) {
- ret = drv->bdrv_load_vmstate(bs, qiov, pos);
+ if (drv->bdrv_co_load_vmstate) {
+ ret = drv->bdrv_co_load_vmstate(bs, qiov, pos);
} else if (child_bs) {
ret = bdrv_co_readv_vmstate(child_bs, qiov, pos);
} else {
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
IO_CODE();
+ assert_bdrv_graph_readable();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
bdrv_inc_in_flight(bs);
- if (drv->bdrv_save_vmstate) {
- ret = drv->bdrv_save_vmstate(bs, qiov, pos);
+ if (drv->bdrv_co_save_vmstate) {
+ ret = drv->bdrv_co_save_vmstate(bs, qiov, pos);
} else if (child_bs) {
ret = bdrv_co_writev_vmstate(child_bs, qiov, pos);
} else {
int ret = 0;
IO_CODE();
+ assert_bdrv_graph_readable();
bdrv_inc_in_flight(bs);
- if (!bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
+ if (!bdrv_co_is_inserted(bs) || bdrv_is_read_only(bs) ||
bdrv_is_sg(bs)) {
goto early_exit;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
current_gen = qatomic_read(&bs->write_gen);
/* Wait until any previous flushes are completed */
/* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
}
/* Write back cached data to the OS even with cache=unsafe */
- BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
+ BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
if (bs->drv->bdrv_co_flush_to_os) {
ret = bs->drv->bdrv_co_flush_to_os(bs);
if (ret < 0) {
goto flush_children;
}
- BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
+ BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
if (!bs->drv) {
/* bs->drv->bdrv_co_flush() might have ejected the BDS
* (even in case of apparent success) */
bs->flushed_gen = current_gen;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
early_exit:
bdrv_dec_in_flight(bs);
int head, tail, align;
BlockDriverState *bs = child->bs;
IO_CODE();
+ assert_bdrv_graph_readable();
- if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
+ if (!bs || !bs->drv || !bdrv_co_is_inserted(bs)) {
return -ENOMEDIUM;
}
};
BlockAIOCB *acb;
IO_CODE();
+ assert_bdrv_graph_readable();
bdrv_inc_in_flight(bs);
if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
return co.ret;
}
+int coroutine_fn bdrv_co_zone_report(BlockDriverState *bs, int64_t offset,
+ unsigned int *nr_zones,
+ BlockZoneDescriptor *zones)
+{
+ BlockDriver *drv = bs->drv;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ IO_CODE();
+
+ bdrv_inc_in_flight(bs);
+ if (!drv || !drv->bdrv_co_zone_report || bs->bl.zoned == BLK_Z_NONE) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+ co.ret = drv->bdrv_co_zone_report(bs, offset, nr_zones, zones);
+out:
+ bdrv_dec_in_flight(bs);
+ return co.ret;
+}
+
+int coroutine_fn bdrv_co_zone_mgmt(BlockDriverState *bs, BlockZoneOp op,
+ int64_t offset, int64_t len)
+{
+ BlockDriver *drv = bs->drv;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ IO_CODE();
+
+ bdrv_inc_in_flight(bs);
+ if (!drv || !drv->bdrv_co_zone_mgmt || bs->bl.zoned == BLK_Z_NONE) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+ co.ret = drv->bdrv_co_zone_mgmt(bs, op, offset, len);
+out:
+ bdrv_dec_in_flight(bs);
+ return co.ret;
+}
+
+int coroutine_fn bdrv_co_zone_append(BlockDriverState *bs, int64_t *offset,
+ QEMUIOVector *qiov,
+ BdrvRequestFlags flags)
+{
+ int ret;
+ BlockDriver *drv = bs->drv;
+ CoroutineIOCompletion co = {
+ .coroutine = qemu_coroutine_self(),
+ };
+ IO_CODE();
+
+ ret = bdrv_check_qiov_request(*offset, qiov->size, qiov, 0, NULL);
+ if (ret < 0) {
+ return ret;
+ }
+
+ bdrv_inc_in_flight(bs);
+ if (!drv || !drv->bdrv_co_zone_append || bs->bl.zoned == BLK_Z_NONE) {
+ co.ret = -ENOTSUP;
+ goto out;
+ }
+ co.ret = drv->bdrv_co_zone_append(bs, offset, qiov, flags);
+out:
+ bdrv_dec_in_flight(bs);
+ return co.ret;
+}
+
void *qemu_blockalign(BlockDriverState *bs, size_t size)
{
IO_CODE();
return mem;
}
-void bdrv_io_plug(BlockDriverState *bs)
-{
- BdrvChild *child;
- IO_CODE();
-
- QLIST_FOREACH(child, &bs->children, next) {
- bdrv_io_plug(child->bs);
- }
-
- if (qatomic_fetch_inc(&bs->io_plugged) == 0) {
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_plug) {
- drv->bdrv_io_plug(bs);
- }
- }
-}
-
-void bdrv_io_unplug(BlockDriverState *bs)
-{
- BdrvChild *child;
- IO_CODE();
-
- assert(bs->io_plugged);
- if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
- BlockDriver *drv = bs->drv;
- if (drv && drv->bdrv_io_unplug) {
- drv->bdrv_io_unplug(bs);
- }
- }
-
- QLIST_FOREACH(child, &bs->children, next) {
- bdrv_io_unplug(child->bs);
- }
-}
-
/* Helper that undoes bdrv_register_buf() when it fails partway through */
-static void bdrv_register_buf_rollback(BlockDriverState *bs,
- void *host,
- size_t size,
- BdrvChild *final_child)
+static void GRAPH_RDLOCK
+bdrv_register_buf_rollback(BlockDriverState *bs, void *host, size_t size,
+ BdrvChild *final_child)
{
BdrvChild *child;
+ GLOBAL_STATE_CODE();
+ assert_bdrv_graph_readable();
+
QLIST_FOREACH(child, &bs->children, next) {
if (child == final_child) {
break;
BdrvChild *child;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (bs->drv && bs->drv->bdrv_register_buf) {
if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
return false;
BdrvChild *child;
GLOBAL_STATE_CODE();
+ GRAPH_RDLOCK_GUARD_MAINLOOP();
+
if (bs->drv && bs->drv->bdrv_unregister_buf) {
bs->drv->bdrv_unregister_buf(bs, host, size);
}
}
}
-static int coroutine_fn bdrv_co_copy_range_internal(
+static int coroutine_fn GRAPH_RDLOCK bdrv_co_copy_range_internal(
BdrvChild *src, int64_t src_offset, BdrvChild *dst,
int64_t dst_offset, int64_t bytes,
BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
{
BdrvTrackedRequest req;
int ret;
+ assert_bdrv_graph_readable();
/* TODO We can support BDRV_REQ_NO_FALLBACK here */
assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
assert(!(read_flags & BDRV_REQ_NO_WAIT));
assert(!(write_flags & BDRV_REQ_NO_WAIT));
- if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
+ if (!dst || !dst->bs || !bdrv_co_is_inserted(dst->bs)) {
return -ENOMEDIUM;
}
ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
return bdrv_co_pwrite_zeroes(dst, dst_offset, bytes, write_flags);
}
- if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
+ if (!src || !src->bs || !bdrv_co_is_inserted(src->bs)) {
return -ENOMEDIUM;
}
ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
BdrvRequestFlags write_flags)
{
IO_CODE();
+ assert_bdrv_graph_readable();
trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
BdrvRequestFlags write_flags)
{
IO_CODE();
+ assert_bdrv_graph_readable();
trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
BdrvRequestFlags write_flags)
{
IO_CODE();
+ assert_bdrv_graph_readable();
+
return bdrv_co_copy_range_from(src, src_offset,
dst, dst_offset,
bytes, read_flags, write_flags);
int64_t old_size, new_bytes;
int ret;
IO_CODE();
+ assert_bdrv_graph_readable();
/* if bs->drv == NULL, bs is closed, so there's nothing to do here */
if (!drv) {
return ret;
}
- old_size = bdrv_getlength(bs);
+ old_size = bdrv_co_getlength(bs);
if (old_size < 0) {
error_setg_errno(errp, -old_size, "Failed to get old image size");
return old_size;
if (new_bytes && backing) {
int64_t backing_len;
- backing_len = bdrv_getlength(backing->bs);
+ backing_len = bdrv_co_getlength(backing->bs);
if (backing_len < 0) {
ret = backing_len;
error_setg_errno(errp, -ret, "Could not get backing file size");
goto out;
}
- ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
+ ret = bdrv_co_refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
if (ret < 0) {
error_setg_errno(errp, -ret, "Could not refresh total sector count");
} else {
offset = bs->total_sectors * BDRV_SECTOR_SIZE;
}
- /* It's possible that truncation succeeded but refresh_total_sectors
+ /*
+ * It's possible that truncation succeeded but bdrv_refresh_total_sectors
* failed, but the latter doesn't affect how we should finish the request.
- * Pass 0 as the last parameter so that dirty bitmaps etc. are handled. */
+ * Pass 0 as the last parameter so that dirty bitmaps etc. are handled.
+ */
bdrv_co_write_req_finish(child, offset - new_bytes, new_bytes, &req, 0);
out:
BlockDriver *drv = bs->drv;
int ret;
IO_CODE();
+ assert_bdrv_graph_readable();
if (!drv) {
return -ENOMEDIUM;
BlockDriver *drv = bs->drv;
int ret;
IO_CODE();
+ assert_bdrv_graph_readable();
if (!drv) {
return -ENOMEDIUM;
BlockDriver *drv = bs->drv;
int ret;
IO_CODE();
+ assert_bdrv_graph_readable();
if (!drv) {
return -ENOMEDIUM;