* timer callback), it is a bug in the caller that should be fixed. */
assert(data.done);
- /* Reaquire the AioContext of bs if we dropped it */
+ /* Reacquire the AioContext of bs if we dropped it */
if (ctx != co_ctx) {
aio_context_acquire(ctx);
}
qatomic_dec(&req->bs->serialising_in_flight);
}
- qemu_co_mutex_lock(&req->bs->reqs_lock);
+ qemu_mutex_lock(&req->bs->reqs_lock);
QLIST_REMOVE(req, list);
+ qemu_mutex_unlock(&req->bs->reqs_lock);
+
+ /*
+ * At this point qemu_co_queue_wait(&req->wait_queue, ...) won't be called
+ * anymore because the request has been removed from the list, so it's safe
+ * to restart the queue outside reqs_lock to minimize the critical section.
+ */
qemu_co_queue_restart_all(&req->wait_queue);
- qemu_co_mutex_unlock(&req->bs->reqs_lock);
}
/**
qemu_co_queue_init(&req->wait_queue);
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
QLIST_INSERT_HEAD(&bs->tracked_requests, req, list);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
}
static bool tracked_request_overlaps(BdrvTrackedRequest *req,
}
/**
- * Round a region to cluster boundaries
+ * Round a region to subcluster (if supported) or cluster boundaries
*/
void coroutine_fn GRAPH_RDLOCK
-bdrv_round_to_clusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
- int64_t *cluster_offset, int64_t *cluster_bytes)
+bdrv_round_to_subclusters(BlockDriverState *bs, int64_t offset, int64_t bytes,
+ int64_t *align_offset, int64_t *align_bytes)
{
BlockDriverInfo bdi;
IO_CODE();
- if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
- *cluster_offset = offset;
- *cluster_bytes = bytes;
+ if (bdrv_co_get_info(bs, &bdi) < 0 || bdi.subcluster_size == 0) {
+ *align_offset = offset;
+ *align_bytes = bytes;
} else {
- int64_t c = bdi.cluster_size;
- *cluster_offset = QEMU_ALIGN_DOWN(offset, c);
- *cluster_bytes = QEMU_ALIGN_UP(offset - *cluster_offset + bytes, c);
+ int64_t c = bdi.subcluster_size;
+ *align_offset = QEMU_ALIGN_DOWN(offset, c);
+ *align_bytes = QEMU_ALIGN_UP(offset - *align_offset + bytes, c);
}
}
return;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
bdrv_wait_serialising_requests_locked(self);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
}
void coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
{
IO_CODE();
- qemu_co_mutex_lock(&req->bs->reqs_lock);
+ qemu_mutex_lock(&req->bs->reqs_lock);
tracked_request_set_serialising(req, align);
bdrv_wait_serialising_requests_locked(req);
- qemu_co_mutex_unlock(&req->bs->reqs_lock);
+ qemu_mutex_unlock(&req->bs->reqs_lock);
}
int bdrv_check_qiov_request(int64_t offset, int64_t bytes,
void *bounce_buffer = NULL;
BlockDriver *drv = bs->drv;
- int64_t cluster_offset;
- int64_t cluster_bytes;
+ int64_t align_offset;
+ int64_t align_bytes;
int64_t skip_bytes;
int ret;
int max_transfer = MIN_NON_ZERO(bs->bl.max_transfer,
* BDRV_REQUEST_MAX_BYTES (even when the original read did not), which
* is one reason we loop rather than doing it all at once.
*/
- bdrv_round_to_clusters(bs, offset, bytes, &cluster_offset, &cluster_bytes);
- skip_bytes = offset - cluster_offset;
+ bdrv_round_to_subclusters(bs, offset, bytes, &align_offset, &align_bytes);
+ skip_bytes = offset - align_offset;
trace_bdrv_co_do_copy_on_readv(bs, offset, bytes,
- cluster_offset, cluster_bytes);
+ align_offset, align_bytes);
- while (cluster_bytes) {
+ while (align_bytes) {
int64_t pnum;
if (skip_write) {
ret = 1; /* "already allocated", so nothing will be copied */
- pnum = MIN(cluster_bytes, max_transfer);
+ pnum = MIN(align_bytes, max_transfer);
} else {
- ret = bdrv_is_allocated(bs, cluster_offset,
- MIN(cluster_bytes, max_transfer), &pnum);
+ ret = bdrv_is_allocated(bs, align_offset,
+ MIN(align_bytes, max_transfer), &pnum);
if (ret < 0) {
/*
* Safe to treat errors in querying allocation as if
* unallocated; we'll probably fail again soon on the
* read, but at least that will set a decent errno.
*/
- pnum = MIN(cluster_bytes, max_transfer);
+ pnum = MIN(align_bytes, max_transfer);
}
/* Stop at EOF if the image ends in the middle of the cluster */
/* Must copy-on-read; use the bounce buffer */
pnum = MIN(pnum, MAX_BOUNCE_BUFFER);
if (!bounce_buffer) {
- int64_t max_we_need = MAX(pnum, cluster_bytes - pnum);
+ int64_t max_we_need = MAX(pnum, align_bytes - pnum);
int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER);
int64_t bounce_buffer_len = MIN(max_we_need, max_allowed);
}
qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum);
- ret = bdrv_driver_preadv(bs, cluster_offset, pnum,
+ ret = bdrv_driver_preadv(bs, align_offset, pnum,
&local_qiov, 0, 0);
if (ret < 0) {
goto err;
/* FIXME: Should we (perhaps conditionally) be setting
* BDRV_REQ_MAY_UNMAP, if it will allow for a sparser copy
* that still correctly reads as zero? */
- ret = bdrv_co_do_pwrite_zeroes(bs, cluster_offset, pnum,
+ ret = bdrv_co_do_pwrite_zeroes(bs, align_offset, pnum,
BDRV_REQ_WRITE_UNCHANGED);
} else {
/* This does not change the data on the disk, it is not
* necessary to flush even in cache=writethrough mode.
*/
- ret = bdrv_driver_pwritev(bs, cluster_offset, pnum,
+ ret = bdrv_driver_pwritev(bs, align_offset, pnum,
&local_qiov, 0,
BDRV_REQ_WRITE_UNCHANGED);
}
}
}
- cluster_offset += pnum;
- cluster_bytes -= pnum;
+ align_offset += pnum;
+ align_bytes -= pnum;
progress += pnum - skip_bytes;
skip_bytes = 0;
}
}
/* Forward the request to the BlockDriver, possibly fragmenting it */
- total_bytes = bdrv_getlength(bs);
+ total_bytes = bdrv_co_getlength(bs);
if (total_bytes < 0) {
ret = total_bytes;
goto out;
int sliced_niov;
size_t sliced_head, sliced_tail;
- bdrv_check_qiov_request(*offset, *bytes, *qiov, *qiov_offset, &error_abort);
+ /* Should have been checked by the caller already */
+ ret = bdrv_check_request32(*offset, *bytes, *qiov, *qiov_offset);
+ if (ret < 0) {
+ return ret;
+ }
if (!bdrv_init_padding(bs, *offset, *bytes, write, pad)) {
if (padded) {
&sliced_head, &sliced_tail,
&sliced_niov);
- /* Guaranteed by bdrv_check_qiov_request() */
+ /* Guaranteed by bdrv_check_request32() */
assert(*bytes <= SIZE_MAX);
ret = bdrv_create_padded_qiov(bs, pad, sliced_iov, sliced_niov,
sliced_head, *bytes);
assert(pnum);
assert_bdrv_graph_readable();
*pnum = 0;
- total_size = bdrv_getlength(bs);
+ total_size = bdrv_co_getlength(bs);
if (total_size < 0) {
ret = total_size;
goto early_out;
bytes = n;
}
- /* Must be non-NULL or bdrv_getlength() would have failed */
+ /* Must be non-NULL or bdrv_co_getlength() would have failed */
assert(bs->drv);
has_filtered_child = bdrv_filter_child(bs);
if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
if (!cow_bs) {
ret |= BDRV_BLOCK_ZERO;
} else if (want_zero) {
- int64_t size2 = bdrv_getlength(cow_bs);
+ int64_t size2 = bdrv_co_getlength(cow_bs);
if (size2 >= 0 && offset >= size2) {
ret |= BDRV_BLOCK_ZERO;
goto early_exit;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
current_gen = qatomic_read(&bs->write_gen);
/* Wait until any previous flushes are completed */
/* Flushes reach this point in nondecreasing current_gen order. */
bs->active_flush_req = true;
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
}
/* Write back cached data to the OS even with cache=unsafe */
- BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
+ BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
if (bs->drv->bdrv_co_flush_to_os) {
ret = bs->drv->bdrv_co_flush_to_os(bs);
if (ret < 0) {
goto flush_children;
}
- BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
+ BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
if (!bs->drv) {
/* bs->drv->bdrv_co_flush() might have ejected the BDS
* (even in case of apparent success) */
bs->flushed_gen = current_gen;
}
- qemu_co_mutex_lock(&bs->reqs_lock);
+ qemu_mutex_lock(&bs->reqs_lock);
bs->active_flush_req = false;
/* Return value is ignored - it's ok if wait queue is empty */
qemu_co_queue_next(&bs->flush_queue);
- qemu_co_mutex_unlock(&bs->reqs_lock);
+ qemu_mutex_unlock(&bs->reqs_lock);
early_exit:
bdrv_dec_in_flight(bs);
return ret;
}
- old_size = bdrv_getlength(bs);
+ old_size = bdrv_co_getlength(bs);
if (old_size < 0) {
error_setg_errno(errp, -old_size, "Failed to get old image size");
return old_size;