#include "qapi/qobject-input-visitor.h"
#include "qapi/qapi-visit-block-core.h"
#include "crypto.h"
+#include "block/aio_task.h"
/*
Differences with QCOW:
return -EINVAL;
}
- if (bitmaps_ext.bitmap_directory_offset & (s->cluster_size - 1)) {
+ if (offset_into_cluster(s, bitmaps_ext.bitmap_directory_offset)) {
error_setg(errp, "bitmaps_ext: "
"invalid bitmap directory offset");
return -EINVAL;
return 0;
}
+static void qcow2_add_check_result(BdrvCheckResult *out,
+ const BdrvCheckResult *src,
+ bool set_allocation_info)
+{
+ out->corruptions += src->corruptions;
+ out->leaks += src->leaks;
+ out->check_errors += src->check_errors;
+ out->corruptions_fixed += src->corruptions_fixed;
+ out->leaks_fixed += src->leaks_fixed;
+
+ if (set_allocation_info) {
+ out->image_end_offset = src->image_end_offset;
+ out->bfi = src->bfi;
+ }
+}
+
static int coroutine_fn qcow2_co_check_locked(BlockDriverState *bs,
BdrvCheckResult *result,
BdrvCheckMode fix)
{
- int ret = qcow2_check_refcounts(bs, result, fix);
+ BdrvCheckResult snapshot_res = {};
+ BdrvCheckResult refcount_res = {};
+ int ret;
+
+ memset(result, 0, sizeof(*result));
+
+ ret = qcow2_check_read_snapshot_table(bs, &snapshot_res, fix);
+ if (ret < 0) {
+ qcow2_add_check_result(result, &snapshot_res, false);
+ return ret;
+ }
+
+ ret = qcow2_check_refcounts(bs, &refcount_res, fix);
+ qcow2_add_check_result(result, &refcount_res, true);
+ if (ret < 0) {
+ qcow2_add_check_result(result, &snapshot_res, false);
+ return ret;
+ }
+
+ ret = qcow2_check_fix_snapshot_table(bs, &snapshot_res, fix);
+ qcow2_add_check_result(result, &snapshot_res, false);
if (ret < 0) {
return ret;
}
bool l2_cache_entry_size_set;
int min_refcount_cache = MIN_REFCOUNT_CACHE_SIZE * s->cluster_size;
uint64_t virtual_disk_size = bs->total_sectors * BDRV_SECTOR_SIZE;
- uint64_t max_l2_cache = virtual_disk_size / (s->cluster_size / 8);
+ uint64_t max_l2_entries = DIV_ROUND_UP(virtual_disk_size, s->cluster_size);
+ /* An L2 table is always one cluster in size so the max cache size
+ * should be a multiple of the cluster size. */
+ uint64_t max_l2_cache = ROUND_UP(max_l2_entries * sizeof(uint64_t),
+ s->cluster_size);
combined_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_CACHE_SIZE);
l2_cache_size_set = qemu_opt_get(opts, QCOW2_OPT_L2_CACHE_SIZE);
goto fail;
}
- /* The total size in bytes of the snapshot table is checked in
- * qcow2_read_snapshots() because the size of each snapshot is
- * variable and we don't know it yet.
- * Here we only check the offset and number of snapshots. */
- ret = qcow2_validate_table(bs, header.snapshots_offset,
- header.nb_snapshots,
- sizeof(QCowSnapshotHeader),
- sizeof(QCowSnapshotHeader) * QCOW_MAX_SNAPSHOTS,
- "Snapshot table", errp);
- if (ret < 0) {
- goto fail;
+ if (!(flags & BDRV_O_CHECK)) {
+ /*
+ * The total size in bytes of the snapshot table is checked in
+ * qcow2_read_snapshots() because the size of each snapshot is
+ * variable and we don't know it yet.
+ * Here we only check the offset and number of snapshots.
+ */
+ ret = qcow2_validate_table(bs, header.snapshots_offset,
+ header.nb_snapshots,
+ sizeof(QCowSnapshotHeader),
+ sizeof(QCowSnapshotHeader) *
+ QCOW_MAX_SNAPSHOTS,
+ "Snapshot table", errp);
+ if (ret < 0) {
+ goto fail;
+ }
}
/* read the level 1 table */
s->image_backing_file = g_strdup(bs->auto_backing_file);
}
- /* Internal snapshots */
- s->snapshots_offset = header.snapshots_offset;
- s->nb_snapshots = header.nb_snapshots;
+ /*
+ * Internal snapshots; skip reading them in check mode, because
+ * we do not need them then, and we do not want to abort because
+ * of a broken table.
+ */
+ if (!(flags & BDRV_O_CHECK)) {
+ s->snapshots_offset = header.snapshots_offset;
+ s->nb_snapshots = header.nb_snapshots;
- ret = qcow2_read_snapshots(bs);
- if (ret < 0) {
- error_setg_errno(errp, -ret, "Could not read snapshots");
- goto fail;
+ ret = qcow2_read_snapshots(bs, errp);
+ if (ret < 0) {
+ goto fail;
+ }
}
/* Clear unknown autoclear feature bits */
if (!(bdrv_get_flags(bs) & BDRV_O_INACTIVE)) {
/* It's case 1, 2 or 3.2. Or 3.1 which is BUG in management layer. */
bool header_updated = qcow2_load_dirty_bitmaps(bs, &local_err);
+ if (local_err != NULL) {
+ error_propagate(errp, local_err);
+ ret = -EINVAL;
+ goto fail;
+ }
update_header = update_header && !header_updated;
}
- if (local_err != NULL) {
- error_propagate(errp, local_err);
- ret = -EINVAL;
- goto fail;
- }
if (update_header) {
ret = qcow2_update_header(bs);
}
}
- bs->supported_zero_flags = header.version >= 3 ? BDRV_REQ_MAY_UNMAP : 0;
+ bs->supported_zero_flags = header.version >= 3 ?
+ BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK : 0;
/* Repair image if dirty */
if (!(flags & (BDRV_O_CHECK | BDRV_O_INACTIVE)) && !bs->read_only &&
static void qcow2_reopen_commit(BDRVReopenState *state)
{
qcow2_update_options_commit(state->bs, state->opaque);
+ if (state->flags & BDRV_O_RDWR) {
+ Error *local_err = NULL;
+
+ if (qcow2_reopen_bitmaps_rw(state->bs, &local_err) < 0) {
+ /*
+ * This is not fatal, bitmaps just left read-only, so all following
+ * writes will fail. User can remove read-only bitmaps to unblock
+ * writes or retry reopen.
+ */
+ error_reportf_err(local_err,
+ "%s: Failed to make dirty bitmaps writable: ",
+ bdrv_get_node_name(state->bs));
+ }
+ }
g_free(state->opaque);
}
{
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_offset;
- int index_in_cluster, ret;
unsigned int bytes;
- int status = 0;
+ int ret, status = 0;
+
+ qemu_co_mutex_lock(&s->lock);
if (!s->metadata_preallocation_checked) {
ret = qcow2_detect_metadata_preallocation(bs);
}
bytes = MIN(INT_MAX, count);
- qemu_co_mutex_lock(&s->lock);
ret = qcow2_get_cluster_offset(bs, offset, &bytes, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
if ((ret == QCOW2_CLUSTER_NORMAL || ret == QCOW2_CLUSTER_ZERO_ALLOC) &&
!s->crypto) {
- index_in_cluster = offset & (s->cluster_size - 1);
- *map = cluster_offset | index_in_cluster;
+ *map = cluster_offset | offset_into_cluster(s, offset);
*file = s->data_file->bs;
status |= BDRV_BLOCK_OFFSET_VALID;
}
return ret;
}
+static coroutine_fn int
+qcow2_co_preadv_encrypted(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ uint64_t qiov_offset)
+{
+ int ret;
+ BDRVQcow2State *s = bs->opaque;
+ uint8_t *buf;
+
+ assert(bs->encrypted && s->crypto);
+ assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+
+ /*
+ * For encrypted images, read everything into a temporary
+ * contiguous buffer on which the AES functions can work.
+ * Also, decryption in a separate buffer is better as it
+ * prevents the guest from learning information about the
+ * encrypted nature of the virtual disk.
+ */
+
+ buf = qemu_try_blockalign(s->data_file->bs, bytes);
+ if (buf == NULL) {
+ return -ENOMEM;
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ ret = bdrv_co_pread(s->data_file,
+ file_cluster_offset + offset_into_cluster(s, offset),
+ bytes, buf, 0);
+ if (ret < 0) {
+ goto fail;
+ }
+
+ assert(QEMU_IS_ALIGNED(offset, BDRV_SECTOR_SIZE));
+ assert(QEMU_IS_ALIGNED(bytes, BDRV_SECTOR_SIZE));
+ if (qcow2_co_decrypt(bs,
+ file_cluster_offset + offset_into_cluster(s, offset),
+ offset, buf, bytes) < 0)
+ {
+ ret = -EIO;
+ goto fail;
+ }
+ qemu_iovec_from_buf(qiov, qiov_offset, buf, bytes);
+
+fail:
+ qemu_vfree(buf);
+
+ return ret;
+}
+
+typedef struct Qcow2AioTask {
+ AioTask task;
+
+ BlockDriverState *bs;
+ QCow2ClusterType cluster_type; /* only for read */
+ uint64_t file_cluster_offset;
+ uint64_t offset;
+ uint64_t bytes;
+ QEMUIOVector *qiov;
+ uint64_t qiov_offset;
+ QCowL2Meta *l2meta; /* only for write */
+} Qcow2AioTask;
+
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task);
+static coroutine_fn int qcow2_add_task(BlockDriverState *bs,
+ AioTaskPool *pool,
+ AioTaskFunc func,
+ QCow2ClusterType cluster_type,
+ uint64_t file_cluster_offset,
+ uint64_t offset,
+ uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset,
+ QCowL2Meta *l2meta)
+{
+ Qcow2AioTask local_task;
+ Qcow2AioTask *task = pool ? g_new(Qcow2AioTask, 1) : &local_task;
+
+ *task = (Qcow2AioTask) {
+ .task.func = func,
+ .bs = bs,
+ .cluster_type = cluster_type,
+ .qiov = qiov,
+ .file_cluster_offset = file_cluster_offset,
+ .offset = offset,
+ .bytes = bytes,
+ .qiov_offset = qiov_offset,
+ .l2meta = l2meta,
+ };
+
+ trace_qcow2_add_task(qemu_coroutine_self(), bs, pool,
+ func == qcow2_co_preadv_task_entry ? "read" : "write",
+ cluster_type, file_cluster_offset, offset, bytes,
+ qiov, qiov_offset);
+
+ if (!pool) {
+ return func(&task->task);
+ }
+
+ aio_task_pool_start_task(pool, &task->task);
+
+ return 0;
+}
+
+static coroutine_fn int qcow2_co_preadv_task(BlockDriverState *bs,
+ QCow2ClusterType cluster_type,
+ uint64_t file_cluster_offset,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ size_t qiov_offset)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int offset_in_cluster = offset_into_cluster(s, offset);
+
+ switch (cluster_type) {
+ case QCOW2_CLUSTER_ZERO_PLAIN:
+ case QCOW2_CLUSTER_ZERO_ALLOC:
+ /* Both zero types are handled in qcow2_co_preadv_part */
+ g_assert_not_reached();
+
+ case QCOW2_CLUSTER_UNALLOCATED:
+ assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
+ return bdrv_co_preadv_part(bs->backing, offset, bytes,
+ qiov, qiov_offset, 0);
+
+ case QCOW2_CLUSTER_COMPRESSED:
+ return qcow2_co_preadv_compressed(bs, file_cluster_offset,
+ offset, bytes, qiov, qiov_offset);
+
+ case QCOW2_CLUSTER_NORMAL:
+ if ((file_cluster_offset & 511) != 0) {
+ return -EIO;
+ }
+
+ if (bs->encrypted) {
+ return qcow2_co_preadv_encrypted(bs, file_cluster_offset,
+ offset, bytes, qiov, qiov_offset);
+ }
+
+ BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
+ return bdrv_co_preadv_part(s->data_file,
+ file_cluster_offset + offset_in_cluster,
+ bytes, qiov, qiov_offset, 0);
+
+ default:
+ g_assert_not_reached();
+ }
+
+ g_assert_not_reached();
+}
+
+static coroutine_fn int qcow2_co_preadv_task_entry(AioTask *task)
+{
+ Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+ assert(!t->l2meta);
+
+ return qcow2_co_preadv_task(t->bs, t->cluster_type, t->file_cluster_offset,
+ t->offset, t->bytes, t->qiov, t->qiov_offset);
+}
+
static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs,
uint64_t offset, uint64_t bytes,
QEMUIOVector *qiov,
size_t qiov_offset, int flags)
{
BDRVQcow2State *s = bs->opaque;
- int offset_in_cluster;
- int ret;
+ int ret = 0;
unsigned int cur_bytes; /* number of bytes in current iteration */
uint64_t cluster_offset = 0;
- uint8_t *cluster_data = NULL;
-
- while (bytes != 0) {
+ AioTaskPool *aio = NULL;
+ while (bytes != 0 && aio_task_pool_status(aio) == 0) {
/* prepare next request */
cur_bytes = MIN(bytes, INT_MAX);
if (s->crypto) {
ret = qcow2_get_cluster_offset(bs, offset, &cur_bytes, &cluster_offset);
qemu_co_mutex_unlock(&s->lock);
if (ret < 0) {
- goto fail;
+ goto out;
}
- offset_in_cluster = offset_into_cluster(s, offset);
-
- switch (ret) {
- case QCOW2_CLUSTER_UNALLOCATED:
-
- if (bs->backing) {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
- ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes,
- qiov, qiov_offset, 0);
- if (ret < 0) {
- goto fail;
- }
- } else {
- /* Note: in this case, no need to wait */
- qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
- }
- break;
-
- case QCOW2_CLUSTER_ZERO_PLAIN:
- case QCOW2_CLUSTER_ZERO_ALLOC:
+ if (ret == QCOW2_CLUSTER_ZERO_PLAIN ||
+ ret == QCOW2_CLUSTER_ZERO_ALLOC ||
+ (ret == QCOW2_CLUSTER_UNALLOCATED && !bs->backing))
+ {
qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes);
- break;
-
- case QCOW2_CLUSTER_COMPRESSED:
- ret = qcow2_co_preadv_compressed(bs, cluster_offset,
- offset, cur_bytes,
- qiov, qiov_offset);
- if (ret < 0) {
- goto fail;
- }
-
- break;
-
- case QCOW2_CLUSTER_NORMAL:
- if ((cluster_offset & 511) != 0) {
- ret = -EIO;
- goto fail;
+ } else {
+ if (!aio && cur_bytes != bytes) {
+ aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
}
-
- if (bs->encrypted) {
- assert(s->crypto);
-
- /*
- * For encrypted images, read everything into a temporary
- * contiguous buffer on which the AES functions can work.
- */
- if (!cluster_data) {
- cluster_data =
- qemu_try_blockalign(s->data_file->bs,
- QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
- if (cluster_data == NULL) {
- ret = -ENOMEM;
- goto fail;
- }
- }
-
- assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
-
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- ret = bdrv_co_pread(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, cluster_data, 0);
- if (ret < 0) {
- goto fail;
- }
-
- assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
- assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
- if (qcow2_co_decrypt(bs, cluster_offset, offset,
- cluster_data, cur_bytes) < 0) {
- ret = -EIO;
- goto fail;
- }
- qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes);
- } else {
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
- ret = bdrv_co_preadv_part(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, qiov, qiov_offset, 0);
- if (ret < 0) {
- goto fail;
- }
+ ret = qcow2_add_task(bs, aio, qcow2_co_preadv_task_entry, ret,
+ cluster_offset, offset, cur_bytes,
+ qiov, qiov_offset, NULL);
+ if (ret < 0) {
+ goto out;
}
- break;
-
- default:
- g_assert_not_reached();
- ret = -EIO;
- goto fail;
}
bytes -= cur_bytes;
offset += cur_bytes;
qiov_offset += cur_bytes;
}
- ret = 0;
-fail:
- qemu_vfree(cluster_data);
+out:
+ if (aio) {
+ aio_task_pool_wait_all(aio);
+ if (ret == 0) {
+ ret = aio_task_pool_status(aio);
+ }
+ g_free(aio);
+ }
return ret;
}
/* Check if it's possible to merge a write request with the writing of
* the data from the COW regions */
static bool merge_cow(uint64_t offset, unsigned bytes,
- QEMUIOVector *hd_qiov, QCowL2Meta *l2meta)
+ QEMUIOVector *qiov, size_t qiov_offset,
+ QCowL2Meta *l2meta)
{
QCowL2Meta *m;
/* Make sure that adding both COW regions to the QEMUIOVector
* does not exceed IOV_MAX */
- if (hd_qiov->niov > IOV_MAX - 2) {
+ if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) {
continue;
}
- m->data_qiov = hd_qiov;
+ m->data_qiov = qiov;
+ m->data_qiov_offset = qiov_offset;
return true;
}
return 0;
}
-static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov,
- int flags)
+/*
+ * qcow2_co_pwritev_task
+ * Called with s->lock unlocked
+ * l2meta - if not NULL, qcow2_co_pwritev_task() will consume it. Caller must
+ * not use it somehow after qcow2_co_pwritev_task() call
+ */
+static coroutine_fn int qcow2_co_pwritev_task(BlockDriverState *bs,
+ uint64_t file_cluster_offset,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov,
+ uint64_t qiov_offset,
+ QCowL2Meta *l2meta)
+{
+ int ret;
+ BDRVQcow2State *s = bs->opaque;
+ void *crypt_buf = NULL;
+ int offset_in_cluster = offset_into_cluster(s, offset);
+ QEMUIOVector encrypted_qiov;
+
+ if (bs->encrypted) {
+ assert(s->crypto);
+ assert(bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
+ crypt_buf = qemu_try_blockalign(bs->file->bs, bytes);
+ if (crypt_buf == NULL) {
+ ret = -ENOMEM;
+ goto out_unlocked;
+ }
+ qemu_iovec_to_buf(qiov, qiov_offset, crypt_buf, bytes);
+
+ if (qcow2_co_encrypt(bs, file_cluster_offset + offset_in_cluster,
+ offset, crypt_buf, bytes) < 0)
+ {
+ ret = -EIO;
+ goto out_unlocked;
+ }
+
+ qemu_iovec_init_buf(&encrypted_qiov, crypt_buf, bytes);
+ qiov = &encrypted_qiov;
+ qiov_offset = 0;
+ }
+
+ /* Try to efficiently initialize the physical space with zeroes */
+ ret = handle_alloc_space(bs, l2meta);
+ if (ret < 0) {
+ goto out_unlocked;
+ }
+
+ /*
+ * If we need to do COW, check if it's possible to merge the
+ * writing of the guest data together with that of the COW regions.
+ * If it's not possible (or not necessary) then write the
+ * guest data now.
+ */
+ if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
+ BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
+ trace_qcow2_writev_data(qemu_coroutine_self(),
+ file_cluster_offset + offset_in_cluster);
+ ret = bdrv_co_pwritev_part(s->data_file,
+ file_cluster_offset + offset_in_cluster,
+ bytes, qiov, qiov_offset, 0);
+ if (ret < 0) {
+ goto out_unlocked;
+ }
+ }
+
+ qemu_co_mutex_lock(&s->lock);
+
+ ret = qcow2_handle_l2meta(bs, &l2meta, true);
+ goto out_locked;
+
+out_unlocked:
+ qemu_co_mutex_lock(&s->lock);
+
+out_locked:
+ qcow2_handle_l2meta(bs, &l2meta, false);
+ qemu_co_mutex_unlock(&s->lock);
+
+ qemu_vfree(crypt_buf);
+
+ return ret;
+}
+
+static coroutine_fn int qcow2_co_pwritev_task_entry(AioTask *task)
+{
+ Qcow2AioTask *t = container_of(task, Qcow2AioTask, task);
+
+ assert(!t->cluster_type);
+
+ return qcow2_co_pwritev_task(t->bs, t->file_cluster_offset,
+ t->offset, t->bytes, t->qiov, t->qiov_offset,
+ t->l2meta);
+}
+
+static coroutine_fn int qcow2_co_pwritev_part(
+ BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset, int flags)
{
BDRVQcow2State *s = bs->opaque;
int offset_in_cluster;
int ret;
unsigned int cur_bytes; /* number of sectors in current iteration */
uint64_t cluster_offset;
- QEMUIOVector hd_qiov;
- uint64_t bytes_done = 0;
- uint8_t *cluster_data = NULL;
QCowL2Meta *l2meta = NULL;
+ AioTaskPool *aio = NULL;
trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes);
- qemu_iovec_init(&hd_qiov, qiov->niov);
-
- qemu_co_mutex_lock(&s->lock);
-
- while (bytes != 0) {
+ while (bytes != 0 && aio_task_pool_status(aio) == 0) {
l2meta = NULL;
- offset_in_cluster);
}
+ qemu_co_mutex_lock(&s->lock);
+
ret = qcow2_alloc_cluster_offset(bs, offset, &cur_bytes,
&cluster_offset, &l2meta);
if (ret < 0) {
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes);
-
- if (bs->encrypted) {
- assert(s->crypto);
- if (!cluster_data) {
- cluster_data = qemu_try_blockalign(bs->file->bs,
- QCOW_MAX_CRYPT_CLUSTERS
- * s->cluster_size);
- if (cluster_data == NULL) {
- ret = -ENOMEM;
- goto out_unlocked;
- }
- }
-
- assert(hd_qiov.size <=
- QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size);
- qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size);
-
- if (qcow2_co_encrypt(bs, cluster_offset, offset,
- cluster_data, cur_bytes) < 0) {
- ret = -EIO;
- goto out_unlocked;
- }
-
- qemu_iovec_reset(&hd_qiov);
- qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes);
+ if (!aio && cur_bytes != bytes) {
+ aio = aio_task_pool_new(QCOW2_MAX_WORKERS);
}
-
- /* Try to efficiently initialize the physical space with zeroes */
- ret = handle_alloc_space(bs, l2meta);
+ ret = qcow2_add_task(bs, aio, qcow2_co_pwritev_task_entry, 0,
+ cluster_offset, offset, cur_bytes,
+ qiov, qiov_offset, l2meta);
+ l2meta = NULL; /* l2meta is consumed by qcow2_co_pwritev_task() */
if (ret < 0) {
- goto out_unlocked;
- }
-
- /* If we need to do COW, check if it's possible to merge the
- * writing of the guest data together with that of the COW regions.
- * If it's not possible (or not necessary) then write the
- * guest data now. */
- if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) {
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
- trace_qcow2_writev_data(qemu_coroutine_self(),
- cluster_offset + offset_in_cluster);
- ret = bdrv_co_pwritev(s->data_file,
- cluster_offset + offset_in_cluster,
- cur_bytes, &hd_qiov, 0);
- if (ret < 0) {
- goto out_unlocked;
- }
- }
-
- qemu_co_mutex_lock(&s->lock);
-
- ret = qcow2_handle_l2meta(bs, &l2meta, true);
- if (ret) {
- goto out_locked;
+ goto fail_nometa;
}
bytes -= cur_bytes;
offset += cur_bytes;
- bytes_done += cur_bytes;
+ qiov_offset += cur_bytes;
trace_qcow2_writev_done_part(qemu_coroutine_self(), cur_bytes);
}
ret = 0;
- goto out_locked;
-out_unlocked:
qemu_co_mutex_lock(&s->lock);
out_locked:
qemu_co_mutex_unlock(&s->lock);
- qemu_iovec_destroy(&hd_qiov);
- qemu_vfree(cluster_data);
+fail_nometa:
+ if (aio) {
+ aio_task_pool_wait_all(aio);
+ if (ret == 0) {
+ ret = aio_task_pool_status(aio);
+ }
+ g_free(aio);
+ }
+
trace_qcow2_writev_done_req(qemu_coroutine_self(), ret);
return ret;
int ret, result = 0;
Error *local_err = NULL;
- qcow2_store_persistent_dirty_bitmaps(bs, &local_err);
+ qcow2_store_persistent_dirty_bitmaps(bs, true, &local_err);
if (local_err != NULL) {
result = -EINVAL;
error_reportf_err(local_err, "Lost persistent bitmaps during "
if (mode == PREALLOC_MODE_METADATA) {
mode = PREALLOC_MODE_OFF;
}
- ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, mode,
- errp);
+ ret = bdrv_co_truncate(s->data_file, host_offset + cur_bytes, false,
+ mode, errp);
if (ret < 0) {
return ret;
}
}
blk_set_allow_write_beyond_eof(blk, true);
- /* Clear the protocol layer and preallocate it if necessary */
- ret = blk_truncate(blk, 0, PREALLOC_MODE_OFF, errp);
- if (ret < 0) {
- goto out;
- }
-
/* Write the header */
QEMU_BUILD_BUG_ON((1 << MIN_CLUSTER_BITS) < sizeof(*header));
header = g_malloc0(cluster_size);
}
/* Okay, now that we have a valid image, let's give it the right size */
- ret = blk_truncate(blk, qcow2_opts->size, qcow2_opts->preallocation, errp);
+ ret = blk_truncate(blk, qcow2_opts->size, false, qcow2_opts->preallocation,
+ errp);
if (ret < 0) {
error_prepend(errp, "Could not resize image: ");
goto out;
}
static int coroutine_fn qcow2_co_truncate(BlockDriverState *bs, int64_t offset,
- PreallocMode prealloc, Error **errp)
+ bool exact, PreallocMode prealloc,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
uint64_t old_length;
if ((last_cluster + 1) * s->cluster_size < old_file_size) {
Error *local_err = NULL;
+ /*
+ * Do not pass @exact here: It will not help the user if
+ * we get an error here just because they wanted to shrink
+ * their qcow2 image (on a block device) with qemu-img.
+ * (And on the qcow2 layer, the @exact requirement is
+ * always fulfilled, so there is no need to pass it on.)
+ */
bdrv_co_truncate(bs->file, (last_cluster + 1) * s->cluster_size,
- PREALLOC_MODE_OFF, &local_err);
+ false, PREALLOC_MODE_OFF, &local_err);
if (local_err) {
warn_reportf_err(local_err,
"Failed to truncate the tail of the image: ");
switch (prealloc) {
case PREALLOC_MODE_OFF:
if (has_data_file(bs)) {
- ret = bdrv_co_truncate(s->data_file, offset, prealloc, errp);
+ /*
+ * If the caller wants an exact resize, the external data
+ * file should be resized to the exact target size, too,
+ * so we pass @exact here.
+ */
+ ret = bdrv_co_truncate(s->data_file, offset, exact, prealloc, errp);
if (ret < 0) {
goto fail;
}
/* Allocate the data area */
new_file_size = allocation_start +
nb_new_data_clusters * s->cluster_size;
- ret = bdrv_co_truncate(bs->file, new_file_size, prealloc, errp);
+ /* Image file grows, so @exact does not matter */
+ ret = bdrv_co_truncate(bs->file, new_file_size, false, prealloc, errp);
if (ret < 0) {
error_prepend(errp, "Failed to resize underlying file: ");
qcow2_free_clusters(bs, allocation_start,
/* XXX: put compressed sectors first, then all the cluster aligned
tables to avoid losing bytes in alignment */
static coroutine_fn int
-qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
- uint64_t bytes, QEMUIOVector *qiov)
+qcow2_co_pwritev_compressed_part(BlockDriverState *bs,
+ uint64_t offset, uint64_t bytes,
+ QEMUIOVector *qiov, size_t qiov_offset)
{
BDRVQcow2State *s = bs->opaque;
int ret;
if (len < 0) {
return len;
}
- return bdrv_co_truncate(bs->file, len, PREALLOC_MODE_OFF, NULL);
+ return bdrv_co_truncate(bs->file, len, false, PREALLOC_MODE_OFF, NULL);
}
if (offset_into_cluster(s, offset)) {
/* Zero-pad last write if image size is not cluster aligned */
memset(buf + bytes, 0, s->cluster_size - bytes);
}
- qemu_iovec_to_buf(qiov, 0, buf, bytes);
+ qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes);
out_buf = g_malloc(s->cluster_size);
buf, s->cluster_size);
if (out_len == -ENOMEM) {
/* could not compress: write normal cluster */
- ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
+ ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0);
if (ret < 0) {
goto fail;
}
goto fail;
}
- ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size,
+ ret = bdrv_truncate(bs->file, (3 + l1_clusters) * s->cluster_size, false,
PREALLOC_MODE_OFF, &local_err);
if (ret < 0) {
error_report_err(local_err);
BDRVQcow2State *s = bs->opaque;
BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
- return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos,
- qiov->size, qiov, 0);
+ return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos,
+ qiov->size, qiov, 0, 0);
}
static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
return 0;
}
+/*
+ * Upgrades an image's version. While newer versions encompass all
+ * features of older versions, some things may have to be presented
+ * differently.
+ */
+static int qcow2_upgrade(BlockDriverState *bs, int target_version,
+ BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ bool need_snapshot_update;
+ int current_version = s->qcow_version;
+ int i;
+ int ret;
+
+ /* This is qcow2_upgrade(), not qcow2_downgrade() */
+ assert(target_version > current_version);
+
+ /* There are no other versions (yet) that you can upgrade to */
+ assert(target_version == 3);
+
+ status_cb(bs, 0, 2, cb_opaque);
+
+ /*
+ * In v2, snapshots do not need to have extra data. v3 requires
+ * the 64-bit VM state size and the virtual disk size to be
+ * present.
+ * qcow2_write_snapshots() will always write the list in the
+ * v3-compliant format.
+ */
+ need_snapshot_update = false;
+ for (i = 0; i < s->nb_snapshots; i++) {
+ if (s->snapshots[i].extra_data_size <
+ sizeof_field(QCowSnapshotExtraData, vm_state_size_large) +
+ sizeof_field(QCowSnapshotExtraData, disk_size))
+ {
+ need_snapshot_update = true;
+ break;
+ }
+ }
+ if (need_snapshot_update) {
+ ret = qcow2_write_snapshots(bs);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to update the snapshot table");
+ return ret;
+ }
+ }
+ status_cb(bs, 1, 2, cb_opaque);
+
+ s->qcow_version = target_version;
+ ret = qcow2_update_header(bs);
+ if (ret < 0) {
+ s->qcow_version = current_version;
+ error_setg_errno(errp, -ret, "Failed to update the image header");
+ return ret;
+ }
+ status_cb(bs, 2, 2, cb_opaque);
+
+ return 0;
+}
+
typedef enum Qcow2AmendOperation {
/* This is the value Qcow2AmendHelperCBInfo::last_operation will be
* statically initialized to so that the helper CB can discern the first
* invocation from an operation change */
QCOW2_NO_OPERATION = 0,
+ QCOW2_UPGRADING,
QCOW2_CHANGING_REFCOUNT_ORDER,
QCOW2_DOWNGRADING,
} Qcow2AmendOperation;
helper_cb_info = (Qcow2AmendHelperCBInfo){
.original_status_cb = status_cb,
.original_cb_opaque = cb_opaque,
- .total_operations = (new_version < old_version)
+ .total_operations = (new_version != old_version)
+ (s->refcount_bits != refcount_bits)
};
/* Upgrade first (some features may require compat=1.1) */
if (new_version > old_version) {
- s->qcow_version = new_version;
- ret = qcow2_update_header(bs);
+ helper_cb_info.current_operation = QCOW2_UPGRADING;
+ ret = qcow2_upgrade(bs, new_version, &qcow2_amend_helper_cb,
+ &helper_cb_info, errp);
if (ret < 0) {
- s->qcow_version = old_version;
- error_setg_errno(errp, -ret, "Failed to update the image header");
return ret;
}
}
return ret;
}
- ret = blk_truncate(blk, new_size, PREALLOC_MODE_OFF, errp);
+ /*
+ * Amending image options should ensure that the image has
+ * exactly the given new values, so pass exact=true here.
+ */
+ ret = blk_truncate(blk, new_size, true, PREALLOC_MODE_OFF, errp);
blk_unref(blk);
if (ret < 0) {
return ret;
.bdrv_co_block_status = qcow2_co_block_status,
.bdrv_co_preadv_part = qcow2_co_preadv_part,
- .bdrv_co_pwritev = qcow2_co_pwritev,
+ .bdrv_co_pwritev_part = qcow2_co_pwritev_part,
.bdrv_co_flush_to_os = qcow2_co_flush_to_os,
.bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes,
.bdrv_co_copy_range_from = qcow2_co_copy_range_from,
.bdrv_co_copy_range_to = qcow2_co_copy_range_to,
.bdrv_co_truncate = qcow2_co_truncate,
- .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed,
+ .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part,
.bdrv_make_empty = qcow2_make_empty,
.bdrv_snapshot_create = qcow2_snapshot_create,
.bdrv_detach_aio_context = qcow2_detach_aio_context,
.bdrv_attach_aio_context = qcow2_attach_aio_context,
- .bdrv_reopen_bitmaps_rw = qcow2_reopen_bitmaps_rw,
- .bdrv_can_store_new_dirty_bitmap = qcow2_can_store_new_dirty_bitmap,
- .bdrv_remove_persistent_dirty_bitmap = qcow2_remove_persistent_dirty_bitmap,
+ .bdrv_co_can_store_new_dirty_bitmap = qcow2_co_can_store_new_dirty_bitmap,
+ .bdrv_co_remove_persistent_dirty_bitmap =
+ qcow2_co_remove_persistent_dirty_bitmap,
};
static void bdrv_qcow2_init(void)