void *opaque,
bool is_write);
static void coroutine_fn bdrv_co_do_rw(void *opaque);
+static int coroutine_fn bdrv_co_do_write_zeroes(BlockDriverState *bs,
+ int64_t sector_num, int nb_sectors);
static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
bool is_write, double elapsed_time, uint64_t *wait);
void bdrv_close(BlockDriverState *bs)
{
+ bdrv_flush(bs);
if (bs->drv) {
+ if (bs->job) {
+ block_job_cancel_sync(bs->job);
+ }
+ bdrv_drain_all();
+
if (bs == bs_snapshots) {
bs_snapshots = NULL;
}
bs->device_name[0] = '\0';
}
+/*
+ * Add new bs contents at the top of an image chain while the chain is
+ * live, while keeping required fields on the top layer.
+ *
+ * This will modify the BlockDriverState fields, and swap contents
+ * between bs_new and bs_top. Both bs_new and bs_top are modified.
+ *
+ * bs_new is required to be anonymous.
+ *
+ * This function does not create any image files.
+ */
+void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top)
+{
+ BlockDriverState tmp;
+
+ /* bs_new must be anonymous */
+ assert(bs_new->device_name[0] == '\0');
+
+ tmp = *bs_new;
+
+ /* there are some fields that need to stay on the top layer: */
+
+ /* dev info */
+ tmp.dev_ops = bs_top->dev_ops;
+ tmp.dev_opaque = bs_top->dev_opaque;
+ tmp.dev = bs_top->dev;
+ tmp.buffer_alignment = bs_top->buffer_alignment;
+ tmp.copy_on_read = bs_top->copy_on_read;
+
+ /* i/o timing parameters */
+ tmp.slice_time = bs_top->slice_time;
+ tmp.slice_start = bs_top->slice_start;
+ tmp.slice_end = bs_top->slice_end;
+ tmp.io_limits = bs_top->io_limits;
+ tmp.io_base = bs_top->io_base;
+ tmp.throttled_reqs = bs_top->throttled_reqs;
+ tmp.block_timer = bs_top->block_timer;
+ tmp.io_limits_enabled = bs_top->io_limits_enabled;
+
+ /* geometry */
+ tmp.cyls = bs_top->cyls;
+ tmp.heads = bs_top->heads;
+ tmp.secs = bs_top->secs;
+ tmp.translation = bs_top->translation;
+
+ /* r/w error */
+ tmp.on_read_error = bs_top->on_read_error;
+ tmp.on_write_error = bs_top->on_write_error;
+
+ /* i/o status */
+ tmp.iostatus_enabled = bs_top->iostatus_enabled;
+ tmp.iostatus = bs_top->iostatus;
+
+ /* keep the same entry in bdrv_states */
+ pstrcpy(tmp.device_name, sizeof(tmp.device_name), bs_top->device_name);
+ tmp.list = bs_top->list;
+
+ /* The contents of 'tmp' will become bs_top, as we are
+ * swapping bs_new and bs_top contents. */
+ tmp.backing_hd = bs_new;
+ pstrcpy(tmp.backing_file, sizeof(tmp.backing_file), bs_top->filename);
+ bdrv_get_format(bs_top, tmp.backing_format, sizeof(tmp.backing_format));
+
+ /* swap contents of the fixed new bs and the current top */
+ *bs_new = *bs_top;
+ *bs_top = tmp;
+
+ /* device_name[] was carried over from the old bs_top. bs_new
+ * shouldn't be in bdrv_states, so we need to make device_name[]
+ * reflect the anonymity of bs_new
+ */
+ bs_new->device_name[0] = '\0';
+
+ /* clear the copied fields in the new backing file */
+ bdrv_detach_dev(bs_new, bs_new->dev);
+
+ qemu_co_queue_init(&bs_new->throttled_reqs);
+ memset(&bs_new->io_base, 0, sizeof(bs_new->io_base));
+ memset(&bs_new->io_limits, 0, sizeof(bs_new->io_limits));
+ bdrv_iostatus_disable(bs_new);
+
+ /* we don't use bdrv_io_limits_disable() for this, because we don't want
+ * to affect or delete the block_timer, as it has been moved to bs_top */
+ bs_new->io_limits_enabled = false;
+ bs_new->block_timer = NULL;
+ bs_new->slice_time = 0;
+ bs_new->slice_start = 0;
+ bs_new->slice_end = 0;
+}
+
void bdrv_delete(BlockDriverState *bs)
{
assert(!bs->dev);
+ assert(!bs->job);
+ assert(!bs->in_use);
/* remove from list, if necessary */
bdrv_make_anon(bs);
return ret;
}
-void bdrv_commit_all(void)
+int bdrv_commit_all(void)
{
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) {
- bdrv_commit(bs);
+ int ret = bdrv_commit(bs);
+ if (ret < 0) {
+ return ret;
+ }
}
+ return 0;
}
struct BdrvTrackedRequest {
qemu_iovec_init_external(&qiov, &iov, 1);
+ /**
+ * In sync call context, when the vcpu is blocked, this throttling timer
+ * will not fire; so the I/O throttling function has to be disabled here
+ * if it has been enabled.
+ */
+ if (bs->io_limits_enabled) {
+ fprintf(stderr, "Disabling I/O throttling on '%s' due "
+ "to synchronous I/O.\n", bdrv_get_device_name(bs));
+ bdrv_io_limits_disable(bs);
+ }
+
if (qemu_in_coroutine()) {
/* Fast-path if already in coroutine context */
bdrv_rw_co_entry(&rwco);
if (drv->bdrv_co_write_zeroes &&
buffer_is_zero(bounce_buffer, iov.iov_len)) {
- ret = drv->bdrv_co_write_zeroes(bs, cluster_sector_num,
- cluster_nb_sectors);
+ ret = bdrv_co_do_write_zeroes(bs, cluster_sector_num,
+ cluster_nb_sectors);
} else {
ret = drv->bdrv_co_writev(bs, cluster_sector_num, cluster_nb_sectors,
&bounce_qiov);
struct iovec iov;
int ret;
+ /* TODO Emulate only part of misaligned requests instead of letting block
+ * drivers return -ENOTSUP and emulate everything */
+
/* First try the efficient write zeroes operation */
if (drv->bdrv_co_write_zeroes) {
- return drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ ret = drv->bdrv_co_write_zeroes(bs, sector_num, nb_sectors);
+ if (ret != -ENOTSUP) {
+ return ret;
+ }
}
/* Fall back to bounce buffer if write zeroes is unsupported */
struct partition *p;
uint32_t nr_sects;
uint64_t nb_sectors;
+ bool enabled;
bdrv_get_geometry(bs, &nb_sectors);
+ /**
+ * The function will be invoked during startup not only in sync I/O mode,
+ * but also in async I/O mode. So the I/O throttling function has to
+ * be disabled temporarily here, not permanently.
+ */
+ enabled = bs->io_limits_enabled;
+ bs->io_limits_enabled = false;
ret = bdrv_read(bs, 0, buf, 1);
+ bs->io_limits_enabled = enabled;
if (ret < 0)
return -1;
/* test msdos magic */
BlockDriverState *bs;
QTAILQ_FOREACH(bs, &bdrv_states, list) {
- if (!bdrv_is_read_only(bs) && bdrv_is_inserted(bs)) {
- bdrv_flush(bs);
- }
+ bdrv_flush(bs);
}
}
BlockDriverCompletionFunc *cb;
void *opaque;
QEMUIOVector *free_qiov;
- void *free_buf;
} callbacks[];
} MultiwriteCB;
qemu_iovec_destroy(mcb->callbacks[i].free_qiov);
}
g_free(mcb->callbacks[i].free_qiov);
- qemu_vfree(mcb->callbacks[i].free_buf);
}
}
int merge = 0;
int64_t oldreq_last = reqs[outidx].sector + reqs[outidx].nb_sectors;
- // This handles the cases that are valid for all block drivers, namely
- // exactly sequential writes and overlapping writes.
+ // Handle exactly sequential writes and overlapping writes.
if (reqs[i].sector <= oldreq_last) {
merge = 1;
}
- // The block driver may decide that it makes sense to combine requests
- // even if there is a gap of some sectors between them. In this case,
- // the gap is filled with zeros (therefore only applicable for yet
- // unused space in format like qcow2).
- if (!merge && bs->drv->bdrv_merge_requests) {
- merge = bs->drv->bdrv_merge_requests(bs, &reqs[outidx], &reqs[i]);
- }
-
if (reqs[outidx].qiov->niov + reqs[i].qiov->niov + 1 > IOV_MAX) {
merge = 0;
}
size = (reqs[i].sector - reqs[outidx].sector) << 9;
qemu_iovec_concat(qiov, reqs[outidx].qiov, size);
- // We might need to add some zeros between the two requests
- if (reqs[i].sector > oldreq_last) {
- size_t zero_bytes = (reqs[i].sector - oldreq_last) << 9;
- uint8_t *buf = qemu_blockalign(bs, zero_bytes);
- memset(buf, 0, zero_bytes);
- qemu_iovec_add(qiov, buf, zero_bytes);
- mcb->callbacks[i].free_buf = buf;
- }
+ // We should need to add any zeros between the two requests
+ assert (reqs[i].sector <= oldreq_last);
// Add the second request
qemu_iovec_concat(qiov, reqs[i].qiov, reqs[i].qiov->size);
{
int ret;
- if (!bs->drv) {
+ if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return 0;
}
}
if (bs->drv->bdrv_co_flush_to_disk) {
- return bs->drv->bdrv_co_flush_to_disk(bs);
+ ret = bs->drv->bdrv_co_flush_to_disk(bs);
} else if (bs->drv->bdrv_aio_flush) {
BlockDriverAIOCB *acb;
CoroutineIOCompletion co = {
acb = bs->drv->bdrv_aio_flush(bs, bdrv_co_io_em_complete, &co);
if (acb == NULL) {
- return -EIO;
+ ret = -EIO;
} else {
qemu_coroutine_yield();
- return co.ret;
+ ret = co.ret;
}
} else {
/*
*
* Let's hope the user knows what he's doing.
*/
- return 0;
+ ret = 0;
}
+ if (ret < 0) {
+ return ret;
+ }
+
+ /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
+ * in the case of cache=unsafe, so there are no useless flushes.
+ */
+ return bdrv_co_flush(bs->file);
}
void bdrv_invalidate_cache(BlockDriverState *bs)
}
}
+void bdrv_clear_incoming_migration_all(void)
+{
+ BlockDriverState *bs;
+
+ QTAILQ_FOREACH(bs, &bdrv_states, list) {
+ bs->open_flags = bs->open_flags & ~(BDRV_O_INCOMING);
+ }
+}
+
int bdrv_flush(BlockDriverState *bs)
{
Coroutine *co;
}
void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
- BlockDriverCompletionFunc *cb, void *opaque)
+ int64_t speed, BlockDriverCompletionFunc *cb,
+ void *opaque, Error **errp)
{
BlockJob *job;
if (bs->job || bdrv_in_use(bs)) {
+ error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
return NULL;
}
bdrv_set_in_use(bs, 1);
job->cb = cb;
job->opaque = opaque;
bs->job = job;
+
+ /* Only set speed when necessary to avoid NotSupported error */
+ if (speed != 0) {
+ Error *local_err = NULL;
+
+ block_job_set_speed(job, speed, &local_err);
+ if (error_is_set(&local_err)) {
+ bs->job = NULL;
+ g_free(job);
+ bdrv_set_in_use(bs, 0);
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+ }
return job;
}
bdrv_set_in_use(bs, 0);
}
-int block_job_set_speed(BlockJob *job, int64_t value)
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
{
+ Error *local_err = NULL;
+
if (!job->job_type->set_speed) {
- return -ENOTSUP;
+ error_set(errp, QERR_NOT_SUPPORTED);
+ return;
+ }
+ job->job_type->set_speed(job, speed, &local_err);
+ if (error_is_set(&local_err)) {
+ error_propagate(errp, local_err);
+ return;
}
- return job->job_type->set_speed(job, value);
+
+ job->speed = speed;
}
void block_job_cancel(BlockJob *job)
{
return job->cancelled;
}
+
+void block_job_cancel_sync(BlockJob *job)
+{
+ BlockDriverState *bs = job->bs;
+
+ assert(bs->job == job);
+ block_job_cancel(job);
+ while (bs->job != NULL && bs->job->busy) {
+ qemu_aio_wait();
+ }
+}