/* Default alignment based on whether driver has byte interface */
bs->bl.request_alignment = (drv->bdrv_co_preadv ||
- drv->bdrv_aio_preadv) ? 1 : 512;
+ drv->bdrv_aio_preadv ||
+ drv->bdrv_co_preadv_part) ? 1 : 512;
/* Take some limits from the children as a default */
if (bs->file) {
static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
- QEMUIOVector *qiov, int flags)
+ QEMUIOVector *qiov,
+ size_t qiov_offset, int flags)
{
BlockDriver *drv = bs->drv;
int64_t sector_num;
unsigned int nb_sectors;
+ QEMUIOVector local_qiov;
+ int ret;
assert(!(flags & ~BDRV_REQ_MASK));
assert(!(flags & BDRV_REQ_NO_FALLBACK));
return -ENOMEDIUM;
}
+ if (drv->bdrv_co_preadv_part) {
+ return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset,
+ flags);
+ }
+
+ if (qiov_offset > 0 || bytes != qiov->size) {
+ qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
+ qiov = &local_qiov;
+ }
+
if (drv->bdrv_co_preadv) {
- return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
+ ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags);
+ goto out;
}
if (drv->bdrv_aio_preadv) {
acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags,
bdrv_co_io_em_complete, &co);
if (acb == NULL) {
- return -EIO;
+ ret = -EIO;
+ goto out;
} else {
qemu_coroutine_yield();
- return co.ret;
+ ret = co.ret;
+ goto out;
}
}
assert(bytes <= BDRV_REQUEST_MAX_BYTES);
assert(drv->bdrv_co_readv);
- return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+ ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov);
+
+out:
+ if (qiov == &local_qiov) {
+ qemu_iovec_destroy(&local_qiov);
+ }
+
+ return ret;
}
static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
- QEMUIOVector *qiov, int flags)
+ QEMUIOVector *qiov,
+ size_t qiov_offset, int flags)
{
BlockDriver *drv = bs->drv;
int64_t sector_num;
unsigned int nb_sectors;
+ QEMUIOVector local_qiov;
int ret;
assert(!(flags & ~BDRV_REQ_MASK));
return -ENOMEDIUM;
}
+ if (drv->bdrv_co_pwritev_part) {
+ ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
+ flags & bs->supported_write_flags);
+ flags &= ~bs->supported_write_flags;
+ goto emulate_flags;
+ }
+
+ if (qiov_offset > 0 || bytes != qiov->size) {
+ qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
+ qiov = &local_qiov;
+ }
+
if (drv->bdrv_co_pwritev) {
ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
flags & bs->supported_write_flags);
ret = bdrv_co_flush(bs);
}
+ if (qiov == &local_qiov) {
+ qemu_iovec_destroy(&local_qiov);
+ }
+
return ret;
}
static int coroutine_fn
bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov)
+ uint64_t bytes, QEMUIOVector *qiov,
+ size_t qiov_offset)
{
BlockDriver *drv = bs->drv;
+ QEMUIOVector local_qiov;
+ int ret;
if (!drv) {
return -ENOMEDIUM;
}
- if (!drv->bdrv_co_pwritev_compressed) {
+ if (!block_driver_can_compress(drv)) {
return -ENOTSUP;
}
- return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
+ if (drv->bdrv_co_pwritev_compressed_part) {
+ return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes,
+ qiov, qiov_offset);
+ }
+
+ if (qiov_offset == 0) {
+ return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov);
+ }
+
+ qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes);
+ ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov);
+ qemu_iovec_destroy(&local_qiov);
+
+ return ret;
}
static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
- int flags)
+ size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
* modifying the image file. This is critical for zero-copy guest I/O
* where anything might happen inside guest memory.
*/
- void *bounce_buffer;
+ void *bounce_buffer = NULL;
BlockDriver *drv = bs->drv;
- QEMUIOVector local_qiov;
int64_t cluster_offset;
int64_t cluster_bytes;
size_t skip_bytes;
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
cluster_offset, cluster_bytes);
- bounce_buffer = qemu_try_blockalign(bs,
- MIN(MIN(max_transfer, cluster_bytes),
- MAX_BOUNCE_BUFFER));
- if (bounce_buffer == NULL) {
- ret = -ENOMEM;
- goto err;
- }
-
while (cluster_bytes) {
int64_t pnum;
assert(skip_bytes < pnum);
if (ret <= 0) {
+ QEMUIOVector local_qiov;
+
/* Must copy-on-read; use the bounce buffer */
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
+ if (!bounce_buffer) {
+ int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
+ int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
+ int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
+
+ bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len);
+ if (!bounce_buffer) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ }
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
- &local_qiov, 0);
+ &local_qiov, 0, 0);
if (ret < 0) {
goto err;
}
* necessary to flush even in cache=writethrough mode.
*/
ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
- &local_qiov,
+ &local_qiov, 0,
BDRV_REQ_WRITE_UNCHANGED);
}
}
if (!(flags & BDRV_REQ_PREFETCH)) {
- qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes,
+ qemu_iovec_from_buf(qiov, qiov_offset + progress,
+ bounce_buffer + skip_bytes,
pnum - skip_bytes);
}
} else if (!(flags & BDRV_REQ_PREFETCH)) {
/* Read directly into the destination */
- qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes);
- ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size,
- &local_qiov, 0);
- qemu_iovec_destroy(&local_qiov);
+ ret = bdrv_driver_preadv(bs, offset + progress,
+ MIN(pnum - skip_bytes, bytes - progress),
+ qiov, qiov_offset + progress, 0);
if (ret < 0) {
goto err;
}
*/
static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
- int64_t align, QEMUIOVector *qiov, int flags)
+ int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
int64_t total_bytes, max_bytes;
assert(is_power_of_2(align));
assert((offset & (align - 1)) == 0);
assert((bytes & (align - 1)) == 0);
- assert(!qiov || bytes == qiov->size);
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
}
if (!ret || pnum != bytes) {
- ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, flags);
+ ret = bdrv_co_do_copy_on_readv(child, offset, bytes,
+ qiov, qiov_offset, flags);
goto out;
} else if (flags & BDRV_REQ_PREFETCH) {
goto out;
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
if (bytes <= max_bytes && bytes <= max_transfer) {
- ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0);
+ ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0);
goto out;
}
int num;
if (max_bytes) {
- QEMUIOVector local_qiov;
-
num = MIN(bytes_remaining, MIN(max_bytes, max_transfer));
assert(num);
- qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining,
- num, &local_qiov, 0);
+ num, qiov, bytes - bytes_remaining, 0);
max_bytes -= num;
- qemu_iovec_destroy(&local_qiov);
} else {
num = bytes_remaining;
ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0,
}
/*
- * Handle a read request in coroutine context
+ * Request padding
+ *
+ * |<---- align ----->| |<----- align ---->|
+ * |<- head ->|<------------- bytes ------------->|<-- tail -->|
+ * | | | | | |
+ * -*----------$-------*-------- ... --------*-----$------------*---
+ * | | | | | |
+ * | offset | | end |
+ * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end)
+ * [buf ... ) [tail_buf )
+ *
+ * @buf is an aligned allocation needed to store @head and @tail paddings. @head
+ * is placed at the beginning of @buf and @tail at the @end.
+ *
+ * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk
+ * around tail, if tail exists.
+ *
+ * @merge_reads is true for small requests,
+ * if @buf_len == @head + bytes + @tail. In this case it is possible that both
+ * head and tail exist but @buf_len == align and @tail_buf == @buf.
+ */
+typedef struct BdrvRequestPadding {
+ uint8_t *buf;
+ size_t buf_len;
+ uint8_t *tail_buf;
+ size_t head;
+ size_t tail;
+ bool merge_reads;
+ QEMUIOVector local_qiov;
+} BdrvRequestPadding;
+
+static bool bdrv_init_padding(BlockDriverState *bs,
+ int64_t offset, int64_t bytes,
+ BdrvRequestPadding *pad)
+{
+ uint64_t align = bs->bl.request_alignment;
+ size_t sum;
+
+ memset(pad, 0, sizeof(*pad));
+
+ pad->head = offset & (align - 1);
+ pad->tail = ((offset + bytes) & (align - 1));
+ if (pad->tail) {
+ pad->tail = align - pad->tail;
+ }
+
+ if ((!pad->head && !pad->tail) || !bytes) {
+ return false;
+ }
+
+ sum = pad->head + bytes + pad->tail;
+ pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align;
+ pad->buf = qemu_blockalign(bs, pad->buf_len);
+ pad->merge_reads = sum == pad->buf_len;
+ if (pad->tail) {
+ pad->tail_buf = pad->buf + pad->buf_len - align;
+ }
+
+ return true;
+}
+
+static int bdrv_padding_rmw_read(BdrvChild *child,
+ BdrvTrackedRequest *req,
+ BdrvRequestPadding *pad,
+ bool zero_middle)
+{
+ QEMUIOVector local_qiov;
+ BlockDriverState *bs = child->bs;
+ uint64_t align = bs->bl.request_alignment;
+ int ret;
+
+ assert(req->serialising && pad->buf);
+
+ if (pad->head || pad->merge_reads) {
+ uint64_t bytes = pad->merge_reads ? pad->buf_len : align;
+
+ qemu_iovec_init_buf(&local_qiov, pad->buf, bytes);
+
+ if (pad->head) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
+ }
+ if (pad->merge_reads && pad->tail) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ }
+ ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes,
+ align, &local_qiov, 0, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ if (pad->head) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
+ }
+ if (pad->merge_reads && pad->tail) {
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ }
+
+ if (pad->merge_reads) {
+ goto zero_mem;
+ }
+ }
+
+ if (pad->tail) {
+ qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align);
+
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
+ ret = bdrv_aligned_preadv(
+ child, req,
+ req->overlap_offset + req->overlap_bytes - align,
+ align, align, &local_qiov, 0, 0);
+ if (ret < 0) {
+ return ret;
+ }
+ bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ }
+
+zero_mem:
+ if (zero_middle) {
+ memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail);
+ }
+
+ return 0;
+}
+
+static void bdrv_padding_destroy(BdrvRequestPadding *pad)
+{
+ if (pad->buf) {
+ qemu_vfree(pad->buf);
+ qemu_iovec_destroy(&pad->local_qiov);
+ }
+}
+
+/*
+ * bdrv_pad_request
+ *
+ * Exchange request parameters with padded request if needed. Don't include RMW
+ * read of padding, bdrv_padding_rmw_read() should be called separately if
+ * needed.
+ *
+ * All parameters except @bs are in-out: they represent original request at
+ * function call and padded (if padding needed) at function finish.
+ *
+ * Function always succeeds.
*/
+static bool bdrv_pad_request(BlockDriverState *bs,
+ QEMUIOVector **qiov, size_t *qiov_offset,
+ int64_t *offset, unsigned int *bytes,
+ BdrvRequestPadding *pad)
+{
+ if (!bdrv_init_padding(bs, *offset, *bytes, pad)) {
+ return false;
+ }
+
+ qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head,
+ *qiov, *qiov_offset, *bytes,
+ pad->buf + pad->buf_len - pad->tail, pad->tail);
+ *bytes += pad->head + pad->tail;
+ *offset -= pad->head;
+ *qiov = &pad->local_qiov;
+ *qiov_offset = 0;
+
+ return true;
+}
+
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
+{
+ return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
+}
+
+int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
+ int64_t offset, unsigned int bytes,
+ QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
{
BlockDriverState *bs = child->bs;
- BlockDriver *drv = bs->drv;
BdrvTrackedRequest req;
-
- uint64_t align = bs->bl.request_alignment;
- uint8_t *head_buf = NULL;
- uint8_t *tail_buf = NULL;
- QEMUIOVector local_qiov;
- bool use_local_qiov = false;
+ BdrvRequestPadding pad;
int ret;
- trace_bdrv_co_preadv(child->bs, offset, bytes, flags);
-
- if (!drv) {
- return -ENOMEDIUM;
- }
+ trace_bdrv_co_preadv(bs, offset, bytes, flags);
ret = bdrv_check_byte_request(bs, offset, bytes);
if (ret < 0) {
flags |= BDRV_REQ_COPY_ON_READ;
}
- /* Align read if necessary by padding qiov */
- if (offset & (align - 1)) {
- head_buf = qemu_blockalign(bs, align);
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
-
- bytes += offset & (align - 1);
- offset = offset & ~(align - 1);
- }
-
- if ((offset + bytes) & (align - 1)) {
- if (!use_local_qiov) {
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
- }
- tail_buf = qemu_blockalign(bs, align);
- qemu_iovec_add(&local_qiov, tail_buf,
- align - ((offset + bytes) & (align - 1)));
-
- bytes = ROUND_UP(bytes, align);
- }
+ bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad);
tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ);
- ret = bdrv_aligned_preadv(child, &req, offset, bytes, align,
- use_local_qiov ? &local_qiov : qiov,
- flags);
+ ret = bdrv_aligned_preadv(child, &req, offset, bytes,
+ bs->bl.request_alignment,
+ qiov, qiov_offset, flags);
tracked_request_end(&req);
bdrv_dec_in_flight(bs);
- if (use_local_qiov) {
- qemu_iovec_destroy(&local_qiov);
- qemu_vfree(head_buf);
- qemu_vfree(tail_buf);
- }
+ bdrv_padding_destroy(&pad);
return ret;
}
}
qemu_iovec_init_buf(&qiov, buf, num);
- ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags);
+ ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags);
/* Keep bounce buffer around if it is big enough for all
* all future requests.
*/
static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child,
BdrvTrackedRequest *req, int64_t offset, unsigned int bytes,
- int64_t align, QEMUIOVector *qiov, int flags)
+ int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BlockDriverState *bs = child->bs;
BlockDriver *drv = bs->drv;
assert(is_power_of_2(align));
assert((offset & (align - 1)) == 0);
assert((bytes & (align - 1)) == 0);
- assert(!qiov || bytes == qiov->size);
+ assert(!qiov || qiov_offset + bytes <= qiov->size);
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
align);
if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF &&
!(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes &&
- qemu_iovec_is_zero(qiov)) {
+ qemu_iovec_is_zero(qiov, qiov_offset, bytes)) {
flags |= BDRV_REQ_ZERO_WRITE;
if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) {
flags |= BDRV_REQ_MAY_UNMAP;
bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO);
ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags);
} else if (flags & BDRV_REQ_WRITE_COMPRESSED) {
- ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov);
+ ret = bdrv_driver_pwritev_compressed(bs, offset, bytes,
+ qiov, qiov_offset);
} else if (bytes <= max_transfer) {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
- ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags);
+ ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags);
} else {
bdrv_debug_event(bs, BLKDBG_PWRITEV);
while (bytes_remaining) {
int num = MIN(bytes_remaining, max_transfer);
- QEMUIOVector local_qiov;
int local_flags = flags;
assert(num);
* need to flush on the last iteration */
local_flags &= ~BDRV_REQ_FUA;
}
- qemu_iovec_init(&local_qiov, qiov->niov);
- qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num);
ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining,
- num, &local_qiov, local_flags);
- qemu_iovec_destroy(&local_qiov);
+ num, qiov, bytes - bytes_remaining,
+ local_flags);
if (ret < 0) {
break;
}
BdrvTrackedRequest *req)
{
BlockDriverState *bs = child->bs;
- uint8_t *buf = NULL;
QEMUIOVector local_qiov;
uint64_t align = bs->bl.request_alignment;
- unsigned int head_padding_bytes, tail_padding_bytes;
int ret = 0;
+ bool padding;
+ BdrvRequestPadding pad;
- head_padding_bytes = offset & (align - 1);
- tail_padding_bytes = (align - (offset + bytes)) & (align - 1);
-
-
- assert(flags & BDRV_REQ_ZERO_WRITE);
- if (head_padding_bytes || tail_padding_bytes) {
- buf = qemu_blockalign(bs, align);
- qemu_iovec_init_buf(&local_qiov, buf, align);
- }
- if (head_padding_bytes) {
- uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes);
-
- /* RMW the unaligned part before head. */
+ padding = bdrv_init_padding(bs, offset, bytes, &pad);
+ if (padding) {
mark_request_serialising(req, align);
wait_serialising_requests(req);
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align,
- align, &local_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
- memset(buf + head_padding_bytes, 0, zero_bytes);
- ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align,
- align, &local_qiov,
- flags & ~BDRV_REQ_ZERO_WRITE);
- if (ret < 0) {
- goto fail;
+ bdrv_padding_rmw_read(child, req, &pad, true);
+
+ if (pad.head || pad.merge_reads) {
+ int64_t aligned_offset = offset & ~(align - 1);
+ int64_t write_bytes = pad.merge_reads ? pad.buf_len : align;
+
+ qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes);
+ ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes,
+ align, &local_qiov, 0,
+ flags & ~BDRV_REQ_ZERO_WRITE);
+ if (ret < 0 || pad.merge_reads) {
+ /* Error or all work is done */
+ goto out;
+ }
+ offset += write_bytes - pad.head;
+ bytes -= write_bytes - pad.head;
}
- offset += zero_bytes;
- bytes -= zero_bytes;
}
assert(!bytes || (offset & (align - 1)) == 0);
/* Write the aligned part in the middle. */
uint64_t aligned_bytes = bytes & ~(align - 1);
ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align,
- NULL, flags);
+ NULL, 0, flags);
if (ret < 0) {
- goto fail;
+ goto out;
}
bytes -= aligned_bytes;
offset += aligned_bytes;
assert(!bytes || (offset & (align - 1)) == 0);
if (bytes) {
- assert(align == tail_padding_bytes + bytes);
- /* RMW the unaligned part after tail. */
- mark_request_serialising(req, align);
- wait_serialising_requests(req);
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(child, req, offset, align,
- align, &local_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
+ assert(align == pad.tail + bytes);
- memset(buf, 0, bytes);
+ qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align);
ret = bdrv_aligned_pwritev(child, req, offset, align, align,
- &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE);
+ &local_qiov, 0,
+ flags & ~BDRV_REQ_ZERO_WRITE);
}
-fail:
- qemu_vfree(buf);
- return ret;
+out:
+ bdrv_padding_destroy(&pad);
+
+ return ret;
}
/*
int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, unsigned int bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
+{
+ return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
+}
+
+int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
+ int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset,
+ BdrvRequestFlags flags)
{
BlockDriverState *bs = child->bs;
BdrvTrackedRequest req;
uint64_t align = bs->bl.request_alignment;
- uint8_t *head_buf = NULL;
- uint8_t *tail_buf = NULL;
- QEMUIOVector local_qiov;
- bool use_local_qiov = false;
+ BdrvRequestPadding pad;
int ret;
trace_bdrv_co_pwritev(child->bs, offset, bytes, flags);
goto out;
}
- if (offset & (align - 1)) {
- QEMUIOVector head_qiov;
-
+ if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
mark_request_serialising(&req, align);
wait_serialising_requests(&req);
-
- head_buf = qemu_blockalign(bs, align);
- qemu_iovec_init_buf(&head_qiov, head_buf, align);
-
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD);
- ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align,
- align, &head_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD);
-
- qemu_iovec_init(&local_qiov, qiov->niov + 2);
- qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1));
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
-
- bytes += offset & (align - 1);
- offset = offset & ~(align - 1);
-
- /* We have read the tail already if the request is smaller
- * than one aligned block.
- */
- if (bytes < align) {
- qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes);
- bytes = align;
- }
- }
-
- if ((offset + bytes) & (align - 1)) {
- QEMUIOVector tail_qiov;
- size_t tail_bytes;
- bool waited;
-
- mark_request_serialising(&req, align);
- waited = wait_serialising_requests(&req);
- assert(!waited || !use_local_qiov);
-
- tail_buf = qemu_blockalign(bs, align);
- qemu_iovec_init_buf(&tail_qiov, tail_buf, align);
-
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL);
- ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1),
- align, align, &tail_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL);
-
- if (!use_local_qiov) {
- qemu_iovec_init(&local_qiov, qiov->niov + 1);
- qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size);
- use_local_qiov = true;
- }
-
- tail_bytes = (offset + bytes) & (align - 1);
- qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes);
-
- bytes = ROUND_UP(bytes, align);
+ bdrv_padding_rmw_read(child, &req, &pad, false);
}
ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align,
- use_local_qiov ? &local_qiov : qiov,
- flags);
+ qiov, qiov_offset, flags);
-fail:
+ bdrv_padding_destroy(&pad);
- if (use_local_qiov) {
- qemu_iovec_destroy(&local_qiov);
- }
- qemu_vfree(head_buf);
- qemu_vfree(tail_buf);
out:
tracked_request_end(&req);
bdrv_dec_in_flight(bs);
+
return ret;
}
uint64_t file_cluster_offset,
uint64_t offset,
uint64_t bytes,
- QEMUIOVector *qiov);
+ QEMUIOVector *qiov,
+ size_t qiov_offset);
static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename)
{
return ret;
}
-static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov,
- int flags)
+static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset, int flags)
{
BDRVQcow2State *s = bs->opaque;
int offset_in_cluster;
int ret;
unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
- uint64_t bytes_done = 0;
- QEMUIOVector hd_qiov;
uint8_t *cluster_data = NULL;
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
while (bytes != 0) {
/* prepare next request */
offset_in_cluster = offset_into_cluster(s, offset);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
-
switch (ret) {
case QCOW2_CLUSTER_UNALLOCATED:
if (bs->backing) {
BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- ret = bdrv_co_preadv(bs->backing, offset, cur_bytes,
- &hd_qiov, 0);
+ ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes,
+ qiov, qiov_offset, 0);
if (ret < 0) {
goto fail;
}
} else {
/* Note: in this case, no need to wait */
- qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
+ qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
}
break;
case QCOW2_CLUSTER_ZERO_PLAIN:
case QCOW2_CLUSTER_ZERO_ALLOC:
- qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes);
+ qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
break;
case QCOW2_CLUSTER_COMPRESSED:
ret = qcow2_co_preadv_compressed(bs, cluster_offset,
offset, cur_bytes,
- &hd_qiov);
+ qiov, qiov_offset);
if (ret < 0) {
goto fail;
}
}
assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
- }
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- ret = bdrv_co_preadv(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, &hd_qiov, 0);
- if (ret < 0) {
- goto fail;
- }
- if (bs->encrypted) {
- assert(s->crypto);
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ ret = bdrv_co_pread(s->data_file,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, cluster_data, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
if (qcow2_co_decrypt(bs, cluster_offset, offset,
ret = -EIO;
goto fail;
}
- qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes);
+ qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes);
+ } else {
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ ret = bdrv_co_preadv_part(s->data_file,
+ cluster_offset + offset_in_cluster,
+ cur_bytes, qiov, qiov_offset, 0);
+ if (ret < 0) {
+ goto fail;
+ }
}
break;
bytes -= cur_bytes;
offset += cur_bytes;
- bytes_done += cur_bytes;
+ qiov_offset += cur_bytes;
}
ret = 0;
fail:
- qemu_iovec_destroy(&hd_qiov);
qemu_vfree(cluster_data);
return ret;
/* Check if it's possible to merge a write request with the writing of
* the data from the COW regions */
static bool merge_cow(uint64_t offset, unsigned bytes,
- QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
+ QEMUIOVector *qiov, size_t qiov_offset,
+ QCowL2Meta *l2meta)
{
QCowL2Meta *m;
/* Make sure that adding both COW regions to the QEMUIOVector
* does not exceed IOV_MAX */
- if (hd_qiov->niov > IOV_MAX - 2) {
+ if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
continue;
}
- m->data_qiov = hd_qiov;
+ m->data_qiov = qiov;
+ m->data_qiov_offset = qiov_offset;
return true;
}
return 0;
}
-static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov,
- int flags)
+static coroutine_fn int qcow2_co_pwritev_part(
+ BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BDRVQcow2State *s = bs->opaque;
int offset_in_cluster;
int ret;
unsigned int cur_bytes; /* number of sectors in current iteration */
uint64_t cluster_offset;
- QEMUIOVector hd_qiov;
+ QEMUIOVector encrypted_qiov;
uint64_t bytes_done = 0;
uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta = NULL;
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
qemu_co_mutex_lock(&s->lock);
while (bytes != 0) {
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
-
if (bs->encrypted) {
assert(s->crypto);
if (!cluster_data) {
}
}
- assert(hd_qiov.size <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
+ assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ qemu_iovec_to_buf(qiov, qiov_offset + bytes_done,
+ cluster_data, cur_bytes);
if (qcow2_co_encrypt(bs, cluster_offset, offset,
cluster_data, cur_bytes) < 0) {
goto out_unlocked;
}
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
+ qemu_iovec_init_buf(&encrypted_qiov, cluster_data, cur_bytes);
}
/* Try to efficiently initialize the physical space with zeroes */
* writing of the guest data together with that of the COW regions.
* If it's not possible (or not necessary) then write the
* guest data now. */
- if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
+ if (!merge_cow(offset, cur_bytes,
+ bs->encrypted ? &encrypted_qiov : qiov,
+ bs->encrypted ? 0 : qiov_offset + bytes_done, l2meta))
+ {
BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
trace_qcow2_writev_data(qemu_coroutine_self(),
cluster_offset + offset_in_cluster);
- ret = bdrv_co_pwritev(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, &hd_qiov, 0);
+ ret = bdrv_co_pwritev_part(
+ s->data_file, cluster_offset + offset_in_cluster, cur_bytes,
+ bs->encrypted ? &encrypted_qiov : qiov,
+ bs->encrypted ? 0 : qiov_offset + bytes_done, 0);
if (ret < 0) {
goto out_unlocked;
}
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_destroy(&hd_qiov);
qemu_vfree(cluster_data);
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static coroutine_fn int
-qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov)
+qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
{
BDRVQcow2State *s = bs->opaque;
int ret;
/* Zero-pad last write if image size is not cluster aligned */
memset(buf + bytes, 0, s->cluster_size - bytes);
}
- qemu_iovec_to_buf(qiov, 0, buf, bytes);
+ qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
out_buf = g_malloc(s->cluster_size);
buf, s->cluster_size);
if (out_len == -ENOMEM) {
/* could not compress: write normal cluster */
- ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
+ ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
if (ret < 0) {
goto fail;
}
uint64_t file_cluster_offset,
uint64_t offset,
uint64_t bytes,
- QEMUIOVector *qiov)
+ QEMUIOVector *qiov,
+ size_t qiov_offset)
{
BDRVQcow2State *s = bs->opaque;
int ret = 0, csize, nb_csectors;
goto fail;
}
- qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes);
+ qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes);
fail:
qemu_vfree(out_buf);
BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
- qiov->size, qiov, 0);
+ return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0, 0);
}
static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
- return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos,
- qiov->size, qiov, 0);
+ return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0, 0);
}
/*
.bdrv_has_zero_init_truncate = bdrv_has_zero_init_1,
.bdrv_co_block_status = qcow2_co_block_status,
- .bdrv_co_preadv = qcow2_co_preadv,
- .bdrv_co_pwritev = qcow2_co_pwritev,
+ .bdrv_co_preadv_part = qcow2_co_preadv_part,
+ .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
.bdrv_co_copy_range_from = qcow2_co_copy_range_from,
.bdrv_co_copy_range_to = qcow2_co_copy_range_to,
.bdrv_co_truncate = qcow2_co_truncate,
- .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
+ .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
.bdrv_make_empty = qcow2_make_empty,
.bdrv_snapshot_create = qcow2_snapshot_create,