#include "block/blockjob_int.h"
#include "block/block_int.h"
#include "block/coroutines.h"
+#include "block/write-threshold.h"
#include "qemu/cutils.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
static void bdrv_parent_cb_resize(BlockDriverState *bs);
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
- int64_t offset, int bytes, BdrvRequestFlags flags);
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags);
static void bdrv_parent_drained_begin(BlockDriverState *bs, BdrvChild *ignore,
bool ignore_bds_parents)
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
{
+ dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
+ src->pdiscard_alignment);
dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
+ dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
+ src->max_hw_transfer);
dst->opt_mem_alignment = MAX(dst->opt_mem_alignment,
src->opt_mem_alignment);
dst->min_mem_alignment = MAX(dst->min_mem_alignment,
dst->max_iov = MIN_NON_ZERO(dst->max_iov, src->max_iov);
}
-void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
+typedef struct BdrvRefreshLimitsState {
+ BlockDriverState *bs;
+ BlockLimits old_bl;
+} BdrvRefreshLimitsState;
+
+static void bdrv_refresh_limits_abort(void *opaque)
+{
+ BdrvRefreshLimitsState *s = opaque;
+
+ s->bs->bl = s->old_bl;
+}
+
+static TransactionActionDrv bdrv_refresh_limits_drv = {
+ .abort = bdrv_refresh_limits_abort,
+ .clean = g_free,
+};
+
+/* @tran is allowed to be NULL, in this case no rollback is possible. */
+void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
{
ERRP_GUARD();
BlockDriver *drv = bs->drv;
BdrvChild *c;
bool have_limits;
+ if (tran) {
+ BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
+ *s = (BdrvRefreshLimitsState) {
+ .bs = bs,
+ .old_bl = bs->bl,
+ };
+ tran_add(tran, &bdrv_refresh_limits_drv, s);
+ }
+
memset(&bs->bl, 0, sizeof(bs->bl));
if (!drv) {
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
- bdrv_refresh_limits(c->bs, errp);
+ bdrv_refresh_limits(c->bs, tran, errp);
if (*errp) {
return;
}
static void tracked_request_begin(BdrvTrackedRequest *req,
BlockDriverState *bs,
int64_t offset,
- uint64_t bytes,
+ int64_t bytes,
enum BdrvTrackedRequestType type)
{
- assert(bytes <= INT64_MAX && offset <= INT64_MAX - bytes);
+ bdrv_check_request(offset, bytes, &error_abort);
*req = (BdrvTrackedRequest){
.bs = bs,
}
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
- int64_t offset, uint64_t bytes)
+ int64_t offset, int64_t bytes)
{
+ bdrv_check_request(offset, bytes, &error_abort);
+
/* aaaa bbbb */
if (offset >= req->overlap_offset + req->overlap_bytes) {
return false;
return true;
}
-static bool coroutine_fn
-bdrv_wait_serialising_requests_locked(BlockDriverState *bs,
- BdrvTrackedRequest *self)
+/* Called with self->bs->reqs_lock held */
+static BdrvTrackedRequest *
+bdrv_find_conflicting_request(BdrvTrackedRequest *self)
{
BdrvTrackedRequest *req;
- bool retry;
- bool waited = false;
- do {
- retry = false;
- QLIST_FOREACH(req, &bs->tracked_requests, list) {
- if (req == self || (!req->serialising && !self->serialising)) {
- continue;
- }
- if (tracked_request_overlaps(req, self->overlap_offset,
- self->overlap_bytes))
- {
- /* Hitting this means there was a reentrant request, for
- * example, a block driver issuing nested requests. This must
- * never happen since it means deadlock.
- */
- assert(qemu_coroutine_self() != req->co);
-
- /* If the request is already (indirectly) waiting for us, or
- * will wait for us as soon as it wakes up, then just go on
- * (instead of producing a deadlock in the former case). */
- if (!req->waiting_for) {
- self->waiting_for = req;
- qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
- self->waiting_for = NULL;
- retry = true;
- waited = true;
- break;
- }
+ QLIST_FOREACH(req, &self->bs->tracked_requests, list) {
+ if (req == self || (!req->serialising && !self->serialising)) {
+ continue;
+ }
+ if (tracked_request_overlaps(req, self->overlap_offset,
+ self->overlap_bytes))
+ {
+ /*
+ * Hitting this means there was a reentrant request, for
+ * example, a block driver issuing nested requests. This must
+ * never happen since it means deadlock.
+ */
+ assert(qemu_coroutine_self() != req->co);
+
+ /*
+ * If the request is already (indirectly) waiting for us, or
+ * will wait for us as soon as it wakes up, then just go on
+ * (instead of producing a deadlock in the former case).
+ */
+ if (!req->waiting_for) {
+ return req;
}
}
- } while (retry);
+ }
+
+ return NULL;
+}
+
+/* Called with self->bs->reqs_lock held */
+static bool coroutine_fn
+bdrv_wait_serialising_requests_locked(BdrvTrackedRequest *self)
+{
+ BdrvTrackedRequest *req;
+ bool waited = false;
+
+ while ((req = bdrv_find_conflicting_request(self))) {
+ self->waiting_for = req;
+ qemu_co_queue_wait(&req->wait_queue, &self->bs->reqs_lock);
+ self->waiting_for = NULL;
+ waited = true;
+ }
+
return waited;
}
-bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
+/* Called with req->bs->reqs_lock held */
+static void tracked_request_set_serialising(BdrvTrackedRequest *req,
+ uint64_t align)
{
- BlockDriverState *bs = req->bs;
int64_t overlap_offset = req->offset & ~(align - 1);
- uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
- - overlap_offset;
- bool waited;
+ int64_t overlap_bytes =
+ ROUND_UP(req->offset + req->bytes, align) - overlap_offset;
+
+ bdrv_check_request(req->offset, req->bytes, &error_abort);
- qemu_co_mutex_lock(&bs->reqs_lock);
if (!req->serialising) {
qatomic_inc(&req->bs->serialising_in_flight);
req->serialising = true;
req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
- waited = bdrv_wait_serialising_requests_locked(bs, req);
- qemu_co_mutex_unlock(&bs->reqs_lock);
- return waited;
}
/**
}
qemu_co_mutex_lock(&bs->reqs_lock);
- waited = bdrv_wait_serialising_requests_locked(bs, self);
+ waited = bdrv_wait_serialising_requests_locked(self);
qemu_co_mutex_unlock(&bs->reqs_lock);
return waited;
}
-int bdrv_check_request(int64_t offset, int64_t bytes)
+bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
+ uint64_t align)
+{
+ bool waited;
+
+ qemu_co_mutex_lock(&req->bs->reqs_lock);
+
+ tracked_request_set_serialising(req, align);
+ waited = bdrv_wait_serialising_requests_locked(req);
+
+ qemu_co_mutex_unlock(&req->bs->reqs_lock);
+
+ return waited;
+}
+
+static int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ Error **errp)
{
- if (offset < 0 || bytes < 0) {
+ /*
+ * Check generic offset/bytes correctness
+ */
+
+ if (offset < 0) {
+ error_setg(errp, "offset is negative: %" PRIi64, offset);
+ return -EIO;
+ }
+
+ if (bytes < 0) {
+ error_setg(errp, "bytes is negative: %" PRIi64, bytes);
return -EIO;
}
if (bytes > BDRV_MAX_LENGTH) {
+ error_setg(errp, "bytes(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
+ bytes, BDRV_MAX_LENGTH);
+ return -EIO;
+ }
+
+ if (offset > BDRV_MAX_LENGTH) {
+ error_setg(errp, "offset(%" PRIi64 ") exceeds maximum(%" PRIi64 ")",
+ offset, BDRV_MAX_LENGTH);
return -EIO;
}
if (offset > BDRV_MAX_LENGTH - bytes) {
+ error_setg(errp, "sum of offset(%" PRIi64 ") and bytes(%" PRIi64 ") "
+ "exceeds maximum(%" PRIi64 ")", offset, bytes,
+ BDRV_MAX_LENGTH);
+ return -EIO;
+ }
+
+ if (!qiov) {
+ return 0;
+ }
+
+ /*
+ * Check qiov and qiov_offset
+ */
+
+ if (qiov_offset > qiov->size) {
+ error_setg(errp, "qiov_offset(%zu) overflow io vector size(%zu)",
+ qiov_offset, qiov->size);
+ return -EIO;
+ }
+
+ if (bytes > qiov->size - qiov_offset) {
+ error_setg(errp, "bytes(%" PRIi64 ") + qiov_offset(%zu) overflow io "
+ "vector size(%zu)", bytes, qiov_offset, qiov->size);
return -EIO;
}
return 0;
}
-static int bdrv_check_request32(int64_t offset, int64_t bytes)
+int bdrv_check_request(int64_t offset, int64_t bytes, Error **errp)
{
- int ret = bdrv_check_request(offset, bytes);
+ return bdrv_check_qiov_request(offset, bytes, NULL, 0, errp);
+}
+
+static int bdrv_check_request32(int64_t offset, int64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
+{
+ int ret = bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, NULL);
if (ret < 0) {
return ret;
}
}
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int bytes, BdrvRequestFlags flags)
+ int64_t bytes, BdrvRequestFlags flags)
{
return bdrv_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
/* See bdrv_pwrite() for the return codes */
-int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int bytes)
+int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
-EINVAL Invalid offset or number of bytes
-EACCES Trying to write a read-only device
*/
-int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf, int bytes)
+int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
+ int64_t bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
* Returns 0 on success, -errno in error cases.
*/
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
- const void *buf, int count)
+ const void *buf, int64_t count)
{
int ret;
}
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
- uint64_t offset, uint64_t bytes,
+ int64_t offset, int64_t bytes,
QEMUIOVector *qiov,
size_t qiov_offset, int flags)
{
QEMUIOVector local_qiov;
int ret;
+ bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
assert(!(flags & ~BDRV_REQ_MASK));
assert(!(flags & BDRV_REQ_NO_FALLBACK));
}
static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
- uint64_t offset, uint64_t bytes,
+ int64_t offset, int64_t bytes,
QEMUIOVector *qiov,
size_t qiov_offset, int flags)
{
QEMUIOVector local_qiov;
int ret;
+ bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
assert(!(flags & ~BDRV_REQ_MASK));
assert(!(flags & BDRV_REQ_NO_FALLBACK));
}
static int coroutine_fn
-bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov,
+bdrv_driver_pwritev_compressed(BlockDriverState *bs, int64_t offset,
+ int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset)
{
BlockDriver *drv = bs->drv;
QEMUIOVector local_qiov;
int ret;
+ bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+
if (!drv) {
return -ENOMEDIUM;
}
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int64_t cluster_offset;
int64_t cluster_bytes;
- size_t skip_bytes;
+ int64_t skip_bytes;
int ret;
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
BDRV_REQUEST_MAX_BYTES);
- unsigned int progress = 0;
+ int64_t progress = 0;
bool skip_write;
+ bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+
if (!drv) {
return -ENOMEDIUM;
}
* reads; any other features must be implemented by the caller.
*/
static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
- BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+ BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
int ret = 0;
- uint64_t bytes_remaining = bytes;
+ int64_t bytes_remaining = bytes;
int max_transfer;
+ bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
assert(is_power_of_2(align));
assert((offset & (align - 1)) == 0);
assert((bytes & (align - 1)) == 0);
* with each other for the same cluster. For example, in copy-on-read
* it ensures that the CoR read and write operations are atomic and
* guest writes cannot interleave between them. */
- bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
+ bdrv_make_request_serialising(req, bdrv_get_cluster_size(bs));
} else {
bdrv_wait_serialising_requests(req);
}
if (flags & BDRV_REQ_COPY_ON_READ) {
int64_t pnum;
+ /* The flag BDRV_REQ_COPY_ON_READ has reached its addressee */
+ flags &= ~BDRV_REQ_COPY_ON_READ;
+
ret = bdrv_is_allocated(bs, offset, bytes, &pnum);
if (ret < 0) {
goto out;
goto out;
}
+ assert(!(flags & ~bs->supported_read_flags));
+
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
if (bytes <= max_bytes && bytes <= max_transfer) {
- ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, flags);
goto out;
}
while (bytes_remaining) {
- int num;
+ int64_t num;
if (max_bytes) {
num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
num, qiov,
- qiov_offset + bytes - bytes_remaining, 0);
+ qiov_offset + bytes - bytes_remaining,
+ flags);
max_bytes -= num;
} else {
num = bytes_remaining;
int64_t offset, int64_t bytes,
BdrvRequestPadding *pad)
{
- uint64_t align = bs->bl.request_alignment;
- size_t sum;
+ int64_t align = bs->bl.request_alignment;
+ int64_t sum;
+
+ bdrv_check_request(offset, bytes, &error_abort);
+ assert(align <= INT_MAX); /* documented in block/block_int.h */
+ assert(align <= SIZE_MAX / 2); /* so we can allocate the buffer */
memset(pad, 0, sizeof(*pad));
assert(req->serialising && pad->buf);
if (pad->head || pad->merge_reads) {
- uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
+ int64_t bytes = pad->merge_reads ? pad->buf_len : align;
qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
qemu_vfree(pad->buf);
qemu_iovec_destroy(&pad->local_qiov);
}
+ memset(pad, 0, sizeof(*pad));
}
/*
* read of padding, bdrv_padding_rmw_read() should be called separately if
* needed.
*
- * All parameters except @bs are in-out: they represent original request at
- * function call and padded (if padding needed) at function finish.
- *
- * Function always succeeds.
+ * Request parameters (@qiov, &qiov_offset, &offset, &bytes) are in-out:
+ * - on function start they represent original request
+ * - on failure or when padding is not needed they are unchanged
+ * - on success when padding is needed they represent padded request
*/
-static bool bdrv_pad_request(BlockDriverState *bs,
- QEMUIOVector **qiov, size_t *qiov_offset,
- int64_t *offset, unsigned int *bytes,
- BdrvRequestPadding *pad)
+static int bdrv_pad_request(BlockDriverState *bs,
+ QEMUIOVector **qiov, size_t *qiov_offset,
+ int64_t *offset, int64_t *bytes,
+ BdrvRequestPadding *pad, bool *padded)
{
+ int ret;
+
+ bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
+
if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
- return false;
+ if (padded) {
+ *padded = false;
+ }
+ return 0;
}
- qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
- *qiov, *qiov_offset, *bytes,
- pad->buf + pad->buf_len - pad->tail, pad->tail);
+ ret = qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
+ *qiov, *qiov_offset, *bytes,
+ pad->buf + pad->buf_len - pad->tail,
+ pad->tail);
+ if (ret < 0) {
+ bdrv_padding_destroy(pad);
+ return ret;
+ }
*bytes += pad->head + pad->tail;
*offset -= pad->head;
*qiov = &pad->local_qiov;
*qiov_offset = 0;
+ if (padded) {
+ *padded = true;
+ }
- return true;
+ return 0;
}
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
}
int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
- int64_t offset, unsigned int bytes,
+ int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
{
BdrvRequestPadding pad;
int ret;
- trace_bdrv_co_preadv(bs, offset, bytes, flags);
+ trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
if (!bdrv_is_inserted(bs)) {
return -ENOMEDIUM;
}
- ret = bdrv_check_request32(offset, bytes);
+ ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
if (ret < 0) {
return ret;
}
flags |= BDRV_REQ_COPY_ON_READ;
}
- bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+ NULL);
+ if (ret < 0) {
+ return ret;
+ }
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
ret = bdrv_aligned_preadv(child, &req, offset, bytes,
}
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
- int64_t offset, int bytes, BdrvRequestFlags flags)
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags)
{
BlockDriver *drv = bs->drv;
QEMUIOVector qiov;
bs->bl.request_alignment);
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer, MAX_BOUNCE_BUFFER);
+ bdrv_check_request(offset, bytes, &error_abort);
+
if (!drv) {
return -ENOMEDIUM;
}
assert(max_write_zeroes >= bs->bl.request_alignment);
while (bytes > 0 && !ret) {
- int num = bytes;
+ int64_t num = bytes;
/* Align request. Block drivers can expect the "bulk" of the request
* to be aligned, and that unaligned requests do not cross cluster
}
static inline int coroutine_fn
-bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
+bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, int64_t bytes,
BdrvTrackedRequest *req, int flags)
{
BlockDriverState *bs = child->bs;
- bool waited;
- int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
- if (bs->read_only) {
+ bdrv_check_request(offset, bytes, &error_abort);
+
+ if (bdrv_is_read_only(bs)) {
return -EPERM;
}
assert(!(bs->open_flags & BDRV_O_INACTIVE));
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
assert(!(flags & ~BDRV_REQ_MASK));
+ assert(!((flags & BDRV_REQ_NO_WAIT) && !(flags & BDRV_REQ_SERIALISING)));
if (flags & BDRV_REQ_SERIALISING) {
- waited = bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
- /*
- * For a misaligned request we should have already waited earlier,
- * because we come after bdrv_padding_rmw_read which must be called
- * with the request already marked as serialising.
- */
- assert(!waited ||
- (req->offset == req->overlap_offset &&
- req->bytes == req->overlap_bytes));
+ QEMU_LOCK_GUARD(&bs->reqs_lock);
+
+ tracked_request_set_serialising(req, bdrv_get_cluster_size(bs));
+
+ if ((flags & BDRV_REQ_NO_WAIT) && bdrv_find_conflicting_request(req)) {
+ return -EBUSY;
+ }
+
+ bdrv_wait_serialising_requests_locked(req);
} else {
bdrv_wait_serialising_requests(req);
}
assert(req->overlap_offset <= offset);
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
- assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
+ assert(offset + bytes <= bs->total_sectors * BDRV_SECTOR_SIZE ||
+ child->perm & BLK_PERM_RESIZE);
switch (req->type) {
case BDRV_TRACKED_WRITE:
} else {
assert(child->perm & BLK_PERM_WRITE);
}
- return notifier_with_return_list_notify(&bs->before_write_notifiers,
- req);
+ bdrv_write_threshold_check_write(bs, offset, bytes);
+ return 0;
case BDRV_TRACKED_TRUNCATE:
assert(child->perm & BLK_PERM_RESIZE);
return 0;
}
static inline void coroutine_fn
-bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes,
+bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, int64_t bytes,
BdrvTrackedRequest *req, int ret)
{
int64_t end_sector = DIV_ROUND_UP(offset + bytes, BDRV_SECTOR_SIZE);
BlockDriverState *bs = child->bs;
+ bdrv_check_request(offset, bytes, &error_abort);
+
qatomic_inc(&bs->write_gen);
/*
* after possibly fragmenting it.
*/
static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
- BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
+ BdrvTrackedRequest *req, int64_t offset, int64_t bytes,
int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
int ret;
- uint64_t bytes_remaining = bytes;
+ int64_t bytes_remaining = bytes;
int max_transfer;
+ bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
+
if (!drv) {
return -ENOMEDIUM;
}
assert(is_power_of_2(align));
assert((offset & (align - 1)) == 0);
assert((bytes & (align - 1)) == 0);
- assert(!qiov || qiov_offset + bytes <= qiov->size);
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
int64_t offset,
- unsigned int bytes,
+ int64_t bytes,
BdrvRequestFlags flags,
BdrvTrackedRequest *req)
{
padding = bdrv_init_padding(bs, offset, bytes, &pad);
if (padding) {
- bdrv_mark_request_serialising(req, align);
+ bdrv_make_request_serialising(req, align);
bdrv_padding_rmw_read(child, req, &pad, true);
assert(!bytes || (offset & (align - 1)) == 0);
if (bytes >= align) {
/* Write the aligned part in the middle. */
- uint64_t aligned_bytes = bytes & ~(align - 1);
+ int64_t aligned_bytes = bytes & ~(align - 1);
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
NULL, 0, flags);
if (ret < 0) {
* Handle a write request in coroutine context
*/
int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
}
int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
- int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+ int64_t offset, int64_t bytes, QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags)
{
BlockDriverState *bs = child->bs;
uint64_t align = bs->bl.request_alignment;
BdrvRequestPadding pad;
int ret;
+ bool padded = false;
- trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
+ trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
if (!bdrv_is_inserted(bs)) {
return -ENOMEDIUM;
}
- ret = bdrv_check_request32(offset, bytes);
+ ret = bdrv_check_request32(offset, bytes, qiov, qiov_offset);
if (ret < 0) {
return ret;
}
return 0;
}
+ if (!(flags & BDRV_REQ_ZERO_WRITE)) {
+ /*
+ * Pad request for following read-modify-write cycle.
+ * bdrv_co_do_zero_pwritev() does aligning by itself, so, we do
+ * alignment only if there is no ZERO flag.
+ */
+ ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
+ &padded);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+
bdrv_inc_in_flight(bs);
- /*
- * Align write if necessary by performing a read-modify-write cycle.
- * Pad qiov with the read parts and be sure to have a tracked request not
- * only for bdrv_aligned_pwritev, but also for the reads of the RMW cycle.
- */
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_WRITE);
if (flags & BDRV_REQ_ZERO_WRITE) {
+ assert(!padded);
ret = bdrv_co_do_zero_pwritev(child, offset, bytes, flags, &req);
goto out;
}
- if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
- bdrv_mark_request_serialising(&req, align);
+ if (padded) {
+ /*
+ * Request was unaligned to request_alignment and therefore
+ * padded. We are going to do read-modify-write, and must
+ * serialize the request to prevent interactions of the
+ * widened region with other transactions.
+ */
+ bdrv_make_request_serialising(&req, align);
bdrv_padding_rmw_read(child, &req, &pad, false);
}
}
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
- int bytes, BdrvRequestFlags flags)
+ int64_t bytes, BdrvRequestFlags flags)
{
trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
return -EPERM;
}
- ret = bdrv_check_request(offset, bytes);
+ ret = bdrv_check_request(offset, bytes, NULL);
if (ret < 0) {
return ret;
}
return true;
}
-void bdrv_add_before_write_notifier(BlockDriverState *bs,
- NotifierWithReturn *notifier)
-{
- notifier_with_return_list_add(&bs->before_write_notifiers, notifier);
-}
-
void bdrv_io_plug(BlockDriverState *bs)
{
BdrvChild *child;
}
static int coroutine_fn bdrv_co_copy_range_internal(
- BdrvChild *src, uint64_t src_offset, BdrvChild *dst,
- uint64_t dst_offset, uint64_t bytes,
+ BdrvChild *src, int64_t src_offset, BdrvChild *dst,
+ int64_t dst_offset, int64_t bytes,
BdrvRequestFlags read_flags, BdrvRequestFlags write_flags,
bool recurse_src)
{
if (!dst || !dst->bs || !bdrv_is_inserted(dst->bs)) {
return -ENOMEDIUM;
}
- ret = bdrv_check_request32(dst_offset, bytes);
+ ret = bdrv_check_request32(dst_offset, bytes, NULL, 0);
if (ret) {
return ret;
}
if (!src || !src->bs || !bdrv_is_inserted(src->bs)) {
return -ENOMEDIUM;
}
- ret = bdrv_check_request32(src_offset, bytes);
+ ret = bdrv_check_request32(src_offset, bytes, NULL, 0);
if (ret) {
return ret;
}
*
* See the comment of bdrv_co_copy_range for the parameter and return value
* semantics. */
-int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, uint64_t src_offset,
- BdrvChild *dst, uint64_t dst_offset,
- uint64_t bytes,
+int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
*
* See the comment of bdrv_co_copy_range for the parameter and return value
* semantics. */
-int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, uint64_t src_offset,
- BdrvChild *dst, uint64_t dst_offset,
- uint64_t bytes,
+int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
bytes, read_flags, write_flags, false);
}
-int coroutine_fn bdrv_co_copy_range(BdrvChild *src, uint64_t src_offset,
- BdrvChild *dst, uint64_t dst_offset,
- uint64_t bytes, BdrvRequestFlags read_flags,
+int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
+ BdrvChild *dst, int64_t dst_offset,
+ int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
return bdrv_co_copy_range_from(src, src_offset,
return -EINVAL;
}
- ret = bdrv_check_request(offset, 0);
+ ret = bdrv_check_request(offset, 0, errp);
if (ret < 0) {
- error_setg(errp, "Required too big image size, it must be not greater "
- "than %" PRId64, BDRV_MAX_LENGTH);
return ret;
}
return old_size;
}
+ if (bdrv_is_read_only(bs)) {
+ error_setg(errp, "Image is read-only");
+ return -EACCES;
+ }
+
if (offset > old_size) {
new_bytes = offset - old_size;
} else {
* new area, we need to make sure that no write requests are made to it
* concurrently or they might be overwritten by preallocation. */
if (new_bytes) {
- bdrv_mark_request_serialising(&req, 1);
- }
- if (bs->read_only) {
- error_setg(errp, "Image is read-only");
- ret = -EACCES;
- goto out;
+ bdrv_make_request_serialising(&req, 1);
}
ret = bdrv_co_write_req_prepare(child, offset - new_bytes, new_bytes, &req,
0);
return ret;
}
+
+void bdrv_cancel_in_flight(BlockDriverState *bs)
+{
+ if (!bs || !bs->drv) {
+ return;
+ }
+
+ if (bs->drv->bdrv_cancel_in_flight) {
+ bs->drv->bdrv_cancel_in_flight(bs);
+ }
+}