*/
#include "qemu/osdep.h"
+#include "block/block-io.h"
#include "qapi/error.h"
#include "qcow2.h"
#include "qemu/range.h"
#include "qemu/bswap.h"
#include "qemu/cutils.h"
+#include "qemu/memalign.h"
#include "trace.h"
static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
uint64_t max);
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset, int64_t length, uint64_t addend,
- bool decrease, enum qcow2_discard_type type);
+
+G_GNUC_WARN_UNUSED_RESULT
+static int update_refcount(BlockDriverState *bs,
+ int64_t offset, int64_t length, uint64_t addend,
+ bool decrease, enum qcow2_discard_type type);
static uint64_t get_refcount_ro0(const void *refcount_array, uint64_t index);
static uint64_t get_refcount_ro1(const void *refcount_array, uint64_t index);
s->max_refcount_table_index = i;
}
-int qcow2_refcount_init(BlockDriverState *bs)
+int coroutine_fn qcow2_refcount_init(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_size2, i;
ret = -ENOMEM;
goto fail;
}
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
- ret = bdrv_pread(bs->file, s->refcount_table_offset,
- s->refcount_table, refcount_table_size2);
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
+ ret = bdrv_co_pread(bs->file, s->refcount_table_offset,
+ refcount_table_size2, s->refcount_table, 0);
if (ret < 0) {
goto fail;
}
}
-static int load_refcount_block(BlockDriverState *bs,
- int64_t refcount_block_offset,
- void **refcount_block)
+static int GRAPH_RDLOCK
+load_refcount_block(BlockDriverState *bs, int64_t refcount_block_offset,
+ void **refcount_block)
{
BDRVQcow2State *s = bs->opaque;
*
* Returns 0 on success or -errno in error case
*/
-static int alloc_refcount_block(BlockDriverState *bs,
- int64_t cluster_index, void **refcount_block)
+static int GRAPH_RDLOCK
+alloc_refcount_block(BlockDriverState *bs, int64_t cluster_index,
+ void **refcount_block)
{
BDRVQcow2State *s = bs->opaque;
unsigned int refcount_table_index;
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_HOOKUP);
ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset +
refcount_table_index * REFTABLE_ENTRY_SIZE,
- &data64, sizeof(data64));
+ sizeof(data64), &data64, 0);
if (ret < 0) {
goto fail;
}
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE);
- ret = bdrv_pwrite_sync(bs->file, table_offset, new_table,
- table_size * REFTABLE_ENTRY_SIZE);
+ ret = bdrv_pwrite_sync(bs->file, table_offset,
+ table_size * REFTABLE_ENTRY_SIZE, new_table, 0);
if (ret < 0) {
goto fail;
}
BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE);
ret = bdrv_pwrite_sync(bs->file,
offsetof(QCowHeader, refcount_table_offset),
- &data, sizeof(data));
+ sizeof(data), &data, 0);
if (ret < 0) {
goto fail;
}
/* XXX: cache several refcount block clusters ? */
/* @addend is the absolute value of the addend; if @decrease is set, @addend
* will be subtracted from the current refcount, otherwise it will be added */
-static int QEMU_WARN_UNUSED_RESULT update_refcount(BlockDriverState *bs,
- int64_t offset,
- int64_t length,
- uint64_t addend,
- bool decrease,
- enum qcow2_discard_type type)
+static int GRAPH_RDLOCK
+update_refcount(BlockDriverState *bs, int64_t offset, int64_t length,
+ uint64_t addend, bool decrease, enum qcow2_discard_type type)
{
BDRVQcow2State *s = bs->opaque;
int64_t start, last, cluster_offset;
/* return < 0 if error */
-static int64_t alloc_clusters_noref(BlockDriverState *bs, uint64_t size,
- uint64_t max)
+static int64_t GRAPH_RDLOCK
+alloc_clusters_noref(BlockDriverState *bs, uint64_t size, uint64_t max)
{
BDRVQcow2State *s = bs->opaque;
uint64_t i, nb_clusters, refcount;
return offset;
}
-int64_t qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
- int64_t nb_clusters)
+int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
+ int64_t nb_clusters)
{
BDRVQcow2State *s = bs->opaque;
uint64_t cluster_index, refcount;
/* only used to allocate compressed sectors. We try to allocate
contiguous sectors. size must be <= cluster_size */
-int64_t qcow2_alloc_bytes(BlockDriverState *bs, int size)
+int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size)
{
BDRVQcow2State *s = bs->opaque;
int64_t offset;
size_t free_in_cluster;
int ret;
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
assert(size > 0 && size <= s->cluster_size);
assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
}
}
-int coroutine_fn qcow2_write_caches(BlockDriverState *bs)
+int qcow2_write_caches(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
int ret;
return 0;
}
-int coroutine_fn qcow2_flush_caches(BlockDriverState *bs)
+int qcow2_flush_caches(BlockDriverState *bs)
{
int ret = qcow2_write_caches(bs);
if (ret < 0) {
}
l1_allocated = true;
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size2);
+ ret = bdrv_pread(bs->file, l1_table_offset, l1_size2, l1_table, 0);
if (ret < 0) {
goto fail;
}
cpu_to_be64s(&l1_table[i]);
}
- ret = bdrv_pwrite_sync(bs->file, l1_table_offset,
- l1_table, l1_size2);
+ ret = bdrv_pwrite_sync(bs->file, l1_table_offset, l1_size2, l1_table,
+ 0);
for (i = 0; i < l1_size; i++) {
be64_to_cpus(&l1_table[i]);
*
* Modifies the number of errors in res.
*/
-int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
- void **refcount_table,
- int64_t *refcount_table_size,
- int64_t offset, int64_t size)
+int coroutine_fn GRAPH_RDLOCK
+qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
+ void **refcount_table,
+ int64_t *refcount_table_size,
+ int64_t offset, int64_t size)
{
BDRVQcow2State *s = bs->opaque;
uint64_t start, last, cluster_offset, k, refcount;
return 0;
}
- file_len = bdrv_getlength(bs->file->bs);
+ file_len = bdrv_co_getlength(bs->file->bs);
if (file_len < 0) {
return file_len;
}
*
* On failure in-memory @l2_table may be modified.
*/
-static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
- uint64_t l2_offset,
- uint64_t *l2_table, int l2_index, bool active,
- bool *metadata_overlap)
+static int coroutine_fn GRAPH_RDLOCK
+fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
+ uint64_t l2_offset, uint64_t *l2_table,
+ int l2_index, bool active,
+ bool *metadata_overlap)
{
BDRVQcow2State *s = bs->opaque;
int ret;
goto fail;
}
- ret = bdrv_pwrite_sync(bs->file, l2e_offset, &l2_table[idx],
- l2_entry_size(s));
+ ret = bdrv_co_pwrite_sync(bs->file, l2e_offset, l2_entry_size(s),
+ &l2_table[idx], 0);
if (ret < 0) {
fprintf(stderr, "ERROR: Failed to overwrite L2 "
"table entry: %s\n", strerror(-ret));
* Returns the number of errors found by the checks or -errno if an internal
* error occurred.
*/
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
- void **refcount_table,
- int64_t *refcount_table_size, int64_t l2_offset,
- int flags, BdrvCheckMode fix, bool active)
+static int coroutine_fn GRAPH_RDLOCK
+check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
+ void **refcount_table,
+ int64_t *refcount_table_size, int64_t l2_offset,
+ int flags, BdrvCheckMode fix, bool active)
{
BDRVQcow2State *s = bs->opaque;
uint64_t l2_entry, l2_bitmap;
bool metadata_overlap;
/* Read L2 table from disk */
- ret = bdrv_pread(bs->file, l2_offset, l2_table, l2_size_bytes);
+ ret = bdrv_co_pread(bs->file, l2_offset, l2_size_bytes, l2_table, 0);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
res->check_errors++;
* Returns the number of errors found by the checks or -errno if an internal
* error occurred.
*/
-static int check_refcounts_l1(BlockDriverState *bs,
- BdrvCheckResult *res,
- void **refcount_table,
- int64_t *refcount_table_size,
- int64_t l1_table_offset, int l1_size,
- int flags, BdrvCheckMode fix, bool active)
+static int coroutine_fn GRAPH_RDLOCK
+check_refcounts_l1(BlockDriverState *bs, BdrvCheckResult *res,
+ void **refcount_table, int64_t *refcount_table_size,
+ int64_t l1_table_offset, int l1_size,
+ int flags, BdrvCheckMode fix, bool active)
{
BDRVQcow2State *s = bs->opaque;
size_t l1_size_bytes = l1_size * L1E_SIZE;
}
/* Read L1 table entries from disk */
- ret = bdrv_pread(bs->file, l1_table_offset, l1_table, l1_size_bytes);
+ ret = bdrv_co_pread(bs->file, l1_table_offset, l1_size_bytes, l1_table, 0);
if (ret < 0) {
fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
res->check_errors++;
* have been already detected and sufficiently signaled by the calling function
* (qcow2_check_refcounts) by the time this function is called).
*/
-static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix)
+static int coroutine_fn GRAPH_RDLOCK
+check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
{
BDRVQcow2State *s = bs->opaque;
uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
}
}
- ret = bdrv_pread(bs->file, l2_offset, l2_table,
- s->l2_size * l2_entry_size(s));
+ ret = bdrv_co_pread(bs->file, l2_offset, s->l2_size * l2_entry_size(s),
+ l2_table, 0);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
strerror(-ret));
goto fail;
}
- ret = bdrv_pwrite(bs->file, l2_offset, l2_table,
- s->cluster_size);
+ ret = bdrv_co_pwrite(bs->file, l2_offset, s->cluster_size, l2_table, 0);
if (ret < 0) {
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
strerror(-ret));
* Checks consistency of refblocks and accounts for each refblock in
* *refcount_table.
*/
-static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix, bool *rebuild,
- void **refcount_table, int64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix, bool *rebuild,
+ void **refcount_table, int64_t *nb_clusters)
{
BDRVQcow2State *s = bs->opaque;
int64_t i, size;
goto resize_fail;
}
- ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
- PREALLOC_MODE_OFF, 0, &local_err);
+ ret = bdrv_co_truncate(bs->file, offset + s->cluster_size, false,
+ PREALLOC_MODE_OFF, 0, &local_err);
if (ret < 0) {
error_report_err(local_err);
goto resize_fail;
}
- size = bdrv_getlength(bs->file->bs);
+ size = bdrv_co_getlength(bs->file->bs);
if (size < 0) {
ret = size;
goto resize_fail;
/*
* Calculates an in-memory refcount table.
*/
-static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix, bool *rebuild,
- void **refcount_table, int64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix, bool *rebuild,
+ void **refcount_table, int64_t *nb_clusters)
{
BDRVQcow2State *s = bs->opaque;
int64_t i;
* Compares the actual reference count for each cluster in the image against the
* refcount as reported by the refcount structures on-disk.
*/
-static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix, bool *rebuild,
- int64_t *highest_cluster,
- void *refcount_table, int64_t nb_clusters)
+static void coroutine_fn GRAPH_RDLOCK
+compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
+ BdrvCheckMode fix, bool *rebuild,
+ int64_t *highest_cluster,
+ void *refcount_table, int64_t nb_clusters)
{
BDRVQcow2State *s = bs->opaque;
int64_t i;
}
/*
- * Creates a new refcount structure based solely on the in-memory information
- * given through *refcount_table. All necessary allocations will be reflected
- * in that array.
+ * Helper function for rebuild_refcount_structure().
*
- * On success, the old refcount structure is leaked (it will be covered by the
- * new refcount structure).
+ * Scan the range of clusters [first_cluster, end_cluster) for allocated
+ * clusters and write all corresponding refblocks to disk. The refblock
+ * and allocation data is taken from the in-memory refcount table
+ * *refcount_table[] (of size *nb_clusters), which is basically one big
+ * (unlimited size) refblock for the whole image.
+ *
+ * For these refblocks, clusters are allocated using said in-memory
+ * refcount table. Care is taken that these allocations are reflected
+ * in the refblocks written to disk.
+ *
+ * The refblocks' offsets are written into a reftable, which is
+ * *on_disk_reftable_ptr[] (of size *on_disk_reftable_entries_ptr). If
+ * that reftable is of insufficient size, it will be resized to fit.
+ * This reftable is not written to disk.
+ *
+ * (If *on_disk_reftable_ptr is not NULL, the entries within are assumed
+ * to point to existing valid refblocks that do not need to be allocated
+ * again.)
+ *
+ * Return whether the on-disk reftable array was resized (true/false),
+ * or -errno on error.
*/
-static int rebuild_refcount_structure(BlockDriverState *bs,
- BdrvCheckResult *res,
- void **refcount_table,
- int64_t *nb_clusters)
+static int coroutine_fn GRAPH_RDLOCK
+rebuild_refcounts_write_refblocks(
+ BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
+ int64_t first_cluster, int64_t end_cluster,
+ uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
+ Error **errp
+ )
{
BDRVQcow2State *s = bs->opaque;
- int64_t first_free_cluster = 0, reftable_offset = -1, cluster = 0;
+ int64_t cluster;
int64_t refblock_offset, refblock_start, refblock_index;
- uint32_t reftable_size = 0;
- uint64_t *on_disk_reftable = NULL;
+ int64_t first_free_cluster = 0;
+ uint64_t *on_disk_reftable = *on_disk_reftable_ptr;
+ uint32_t on_disk_reftable_entries = *on_disk_reftable_entries_ptr;
void *on_disk_refblock;
- int ret = 0;
- struct {
- uint64_t reftable_offset;
- uint32_t reftable_clusters;
- } QEMU_PACKED reftable_offset_and_clusters;
-
- qcow2_cache_empty(bs, s->refcount_block_cache);
+ bool reftable_grown = false;
+ int ret;
-write_refblocks:
- for (; cluster < *nb_clusters; cluster++) {
+ for (cluster = first_cluster; cluster < end_cluster; cluster++) {
+ /* Check all clusters to find refblocks that contain non-zero entries */
if (!s->get_refcount(*refcount_table, cluster)) {
continue;
}
+ /*
+ * This cluster is allocated, so we need to create a refblock
+ * for it. The data we will write to disk is just the
+ * respective slice from *refcount_table, so it will contain
+ * accurate refcounts for all clusters belonging to this
+ * refblock. After we have written it, we will therefore skip
+ * all remaining clusters in this refblock.
+ */
+
refblock_index = cluster >> s->refcount_block_bits;
refblock_start = refblock_index << s->refcount_block_bits;
- /* Don't allocate a cluster in a refblock already written to disk */
- if (first_free_cluster < refblock_start) {
- first_free_cluster = refblock_start;
- }
- refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
- nb_clusters, &first_free_cluster);
- if (refblock_offset < 0) {
- fprintf(stderr, "ERROR allocating refblock: %s\n",
- strerror(-refblock_offset));
- res->check_errors++;
- ret = refblock_offset;
- goto fail;
- }
+ if (on_disk_reftable_entries > refblock_index &&
+ on_disk_reftable[refblock_index])
+ {
+ /*
+ * We can get here after a `goto write_refblocks`: We have a
+ * reftable from a previous run, and the refblock is already
+ * allocated. No need to allocate it again.
+ */
+ refblock_offset = on_disk_reftable[refblock_index];
+ } else {
+ int64_t refblock_cluster_index;
- if (reftable_size <= refblock_index) {
- uint32_t old_reftable_size = reftable_size;
- uint64_t *new_on_disk_reftable;
+ /* Don't allocate a cluster in a refblock already written to disk */
+ if (first_free_cluster < refblock_start) {
+ first_free_cluster = refblock_start;
+ }
+ refblock_offset = alloc_clusters_imrt(bs, 1, refcount_table,
+ nb_clusters,
+ &first_free_cluster);
+ if (refblock_offset < 0) {
+ error_setg_errno(errp, -refblock_offset,
+ "ERROR allocating refblock");
+ return refblock_offset;
+ }
- reftable_size = ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
- s->cluster_size) / REFTABLE_ENTRY_SIZE;
- new_on_disk_reftable = g_try_realloc(on_disk_reftable,
- reftable_size *
- REFTABLE_ENTRY_SIZE);
- if (!new_on_disk_reftable) {
- res->check_errors++;
- ret = -ENOMEM;
- goto fail;
+ refblock_cluster_index = refblock_offset / s->cluster_size;
+ if (refblock_cluster_index >= end_cluster) {
+ /*
+ * We must write the refblock that holds this refblock's
+ * refcount
+ */
+ end_cluster = refblock_cluster_index + 1;
}
- on_disk_reftable = new_on_disk_reftable;
- memset(on_disk_reftable + old_reftable_size, 0,
- (reftable_size - old_reftable_size) * REFTABLE_ENTRY_SIZE);
+ if (on_disk_reftable_entries <= refblock_index) {
+ on_disk_reftable_entries =
+ ROUND_UP((refblock_index + 1) * REFTABLE_ENTRY_SIZE,
+ s->cluster_size) / REFTABLE_ENTRY_SIZE;
+ on_disk_reftable =
+ g_try_realloc(on_disk_reftable,
+ on_disk_reftable_entries *
+ REFTABLE_ENTRY_SIZE);
+ if (!on_disk_reftable) {
+ error_setg(errp, "ERROR allocating reftable memory");
+ return -ENOMEM;
+ }
- /* The offset we have for the reftable is now no longer valid;
- * this will leak that range, but we can easily fix that by running
- * a leak-fixing check after this rebuild operation */
- reftable_offset = -1;
- } else {
- assert(on_disk_reftable);
- }
- on_disk_reftable[refblock_index] = refblock_offset;
+ memset(on_disk_reftable + *on_disk_reftable_entries_ptr, 0,
+ (on_disk_reftable_entries -
+ *on_disk_reftable_entries_ptr) *
+ REFTABLE_ENTRY_SIZE);
- /* If this is apparently the last refblock (for now), try to squeeze the
- * reftable in */
- if (refblock_index == (*nb_clusters - 1) >> s->refcount_block_bits &&
- reftable_offset < 0)
- {
- uint64_t reftable_clusters = size_to_clusters(s, reftable_size *
- REFTABLE_ENTRY_SIZE);
- reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
- refcount_table, nb_clusters,
- &first_free_cluster);
- if (reftable_offset < 0) {
- fprintf(stderr, "ERROR allocating reftable: %s\n",
- strerror(-reftable_offset));
- res->check_errors++;
- ret = reftable_offset;
- goto fail;
+ *on_disk_reftable_ptr = on_disk_reftable;
+ *on_disk_reftable_entries_ptr = on_disk_reftable_entries;
+
+ reftable_grown = true;
+ } else {
+ assert(on_disk_reftable);
}
+ on_disk_reftable[refblock_index] = refblock_offset;
}
+ /* Refblock is allocated, write it to disk */
+
ret = qcow2_pre_write_overlap_check(bs, 0, refblock_offset,
s->cluster_size, false);
if (ret < 0) {
- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
- goto fail;
+ error_setg_errno(errp, -ret, "ERROR writing refblock");
+ return ret;
}
- /* The size of *refcount_table is always cluster-aligned, therefore the
- * write operation will not overflow */
+ /*
+ * The refblock is simply a slice of *refcount_table.
+ * Note that the size of *refcount_table is always aligned to
+ * whole clusters, so the write operation will not result in
+ * out-of-bounds accesses.
+ */
on_disk_refblock = (void *)((char *) *refcount_table +
refblock_index * s->cluster_size);
- ret = bdrv_pwrite(bs->file, refblock_offset, on_disk_refblock,
- s->cluster_size);
+ ret = bdrv_co_pwrite(bs->file, refblock_offset, s->cluster_size,
+ on_disk_refblock, 0);
if (ret < 0) {
- fprintf(stderr, "ERROR writing refblock: %s\n", strerror(-ret));
- goto fail;
+ error_setg_errno(errp, -ret, "ERROR writing refblock");
+ return ret;
}
- /* Go to the end of this refblock */
+ /* This refblock is done, skip to its end */
cluster = refblock_start + s->refcount_block_size - 1;
}
- if (reftable_offset < 0) {
- uint64_t post_refblock_start, reftable_clusters;
+ return reftable_grown;
+}
+
+/*
+ * Creates a new refcount structure based solely on the in-memory information
+ * given through *refcount_table (this in-memory information is basically just
+ * the concatenation of all refblocks). All necessary allocations will be
+ * reflected in that array.
+ *
+ * On success, the old refcount structure is leaked (it will be covered by the
+ * new refcount structure).
+ */
+static int coroutine_fn GRAPH_RDLOCK
+rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res,
+ void **refcount_table, int64_t *nb_clusters,
+ Error **errp)
+{
+ BDRVQcow2State *s = bs->opaque;
+ int64_t reftable_offset = -1;
+ int64_t reftable_length = 0;
+ int64_t reftable_clusters;
+ int64_t refblock_index;
+ uint32_t on_disk_reftable_entries = 0;
+ uint64_t *on_disk_reftable = NULL;
+ int ret = 0;
+ int reftable_size_changed = 0;
+ struct {
+ uint64_t reftable_offset;
+ uint32_t reftable_clusters;
+ } QEMU_PACKED reftable_offset_and_clusters;
+
+ qcow2_cache_empty(bs, s->refcount_block_cache);
+
+ /*
+ * For each refblock containing entries, we try to allocate a
+ * cluster (in the in-memory refcount table) and write its offset
+ * into on_disk_reftable[]. We then write the whole refblock to
+ * disk (as a slice of the in-memory refcount table).
+ * This is done by rebuild_refcounts_write_refblocks().
+ *
+ * Once we have scanned all clusters, we try to find space for the
+ * reftable. This will dirty the in-memory refcount table (i.e.
+ * make it differ from the refblocks we have already written), so we
+ * need to run rebuild_refcounts_write_refblocks() again for the
+ * range of clusters where the reftable has been allocated.
+ *
+ * This second run might make the reftable grow again, in which case
+ * we will need to allocate another space for it, which is why we
+ * repeat all this until the reftable stops growing.
+ *
+ * (This loop will terminate, because with every cluster the
+ * reftable grows, it can accommodate a multitude of more refcounts,
+ * so that at some point this must be able to cover the reftable
+ * and all refblocks describing it.)
+ *
+ * We then convert the reftable to big-endian and write it to disk.
+ *
+ * Note that we never free any reftable allocations. Doing so would
+ * needlessly complicate the algorithm: The eventual second check
+ * run we do will clean up all leaks we have caused.
+ */
+
+ reftable_size_changed =
+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+ 0, *nb_clusters,
+ &on_disk_reftable,
+ &on_disk_reftable_entries, errp);
+ if (reftable_size_changed < 0) {
+ res->check_errors++;
+ ret = reftable_size_changed;
+ goto fail;
+ }
+
+ /*
+ * There was no reftable before, so rebuild_refcounts_write_refblocks()
+ * must have increased its size (from 0 to something).
+ */
+ assert(reftable_size_changed);
+
+ do {
+ int64_t reftable_start_cluster, reftable_end_cluster;
+ int64_t first_free_cluster = 0;
+
+ reftable_length = on_disk_reftable_entries * REFTABLE_ENTRY_SIZE;
+ reftable_clusters = size_to_clusters(s, reftable_length);
- post_refblock_start = ROUND_UP(*nb_clusters, s->refcount_block_size);
- reftable_clusters =
- size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE);
- /* Not pretty but simple */
- if (first_free_cluster < post_refblock_start) {
- first_free_cluster = post_refblock_start;
- }
reftable_offset = alloc_clusters_imrt(bs, reftable_clusters,
refcount_table, nb_clusters,
&first_free_cluster);
if (reftable_offset < 0) {
- fprintf(stderr, "ERROR allocating reftable: %s\n",
- strerror(-reftable_offset));
+ error_setg_errno(errp, -reftable_offset,
+ "ERROR allocating reftable");
res->check_errors++;
ret = reftable_offset;
goto fail;
}
- goto write_refblocks;
- }
+ /*
+ * We need to update the affected refblocks, so re-run the
+ * write_refblocks loop for the reftable's range of clusters.
+ */
+ assert(offset_into_cluster(s, reftable_offset) == 0);
+ reftable_start_cluster = reftable_offset / s->cluster_size;
+ reftable_end_cluster = reftable_start_cluster + reftable_clusters;
+ reftable_size_changed =
+ rebuild_refcounts_write_refblocks(bs, refcount_table, nb_clusters,
+ reftable_start_cluster,
+ reftable_end_cluster,
+ &on_disk_reftable,
+ &on_disk_reftable_entries, errp);
+ if (reftable_size_changed < 0) {
+ res->check_errors++;
+ ret = reftable_size_changed;
+ goto fail;
+ }
+
+ /*
+ * If the reftable size has changed, we will need to find a new
+ * allocation, repeating the loop.
+ */
+ } while (reftable_size_changed);
+
+ /* The above loop must have run at least once */
+ assert(reftable_offset >= 0);
+
+ /*
+ * All allocations are done, all refblocks are written, convert the
+ * reftable to big-endian and write it to disk.
+ */
- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+ refblock_index++)
+ {
cpu_to_be64s(&on_disk_reftable[refblock_index]);
}
- ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset,
- reftable_size * REFTABLE_ENTRY_SIZE,
+ ret = qcow2_pre_write_overlap_check(bs, 0, reftable_offset, reftable_length,
false);
if (ret < 0) {
- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR writing reftable");
goto fail;
}
- assert(reftable_size < INT_MAX / REFTABLE_ENTRY_SIZE);
- ret = bdrv_pwrite(bs->file, reftable_offset, on_disk_reftable,
- reftable_size * REFTABLE_ENTRY_SIZE);
+ assert(reftable_length < INT_MAX);
+ ret = bdrv_co_pwrite(bs->file, reftable_offset, reftable_length,
+ on_disk_reftable, 0);
if (ret < 0) {
- fprintf(stderr, "ERROR writing reftable: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR writing reftable");
goto fail;
}
/* Enter new reftable into the image header */
reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
reftable_offset_and_clusters.reftable_clusters =
- cpu_to_be32(size_to_clusters(s, reftable_size * REFTABLE_ENTRY_SIZE));
- ret = bdrv_pwrite_sync(bs->file,
- offsetof(QCowHeader, refcount_table_offset),
- &reftable_offset_and_clusters,
- sizeof(reftable_offset_and_clusters));
+ cpu_to_be32(reftable_clusters);
+ ret = bdrv_co_pwrite_sync(bs->file,
+ offsetof(QCowHeader, refcount_table_offset),
+ sizeof(reftable_offset_and_clusters),
+ &reftable_offset_and_clusters, 0);
if (ret < 0) {
- fprintf(stderr, "ERROR setting reftable: %s\n", strerror(-ret));
+ error_setg_errno(errp, -ret, "ERROR setting reftable");
goto fail;
}
- for (refblock_index = 0; refblock_index < reftable_size; refblock_index++) {
+ for (refblock_index = 0; refblock_index < on_disk_reftable_entries;
+ refblock_index++)
+ {
be64_to_cpus(&on_disk_reftable[refblock_index]);
}
s->refcount_table = on_disk_reftable;
s->refcount_table_offset = reftable_offset;
- s->refcount_table_size = reftable_size;
+ s->refcount_table_size = on_disk_reftable_entries;
update_max_refcount_table_index(s);
return 0;
* Returns 0 if no errors are found, the number of errors in case the image is
* detected as corrupted, and -errno when an internal error occurred.
*/
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
- BdrvCheckMode fix)
+int coroutine_fn GRAPH_RDLOCK
+qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
{
BDRVQcow2State *s = bs->opaque;
BdrvCheckResult pre_compare_res;
bool rebuild = false;
int ret;
- size = bdrv_getlength(bs->file->bs);
+ size = bdrv_co_getlength(bs->file->bs);
if (size < 0) {
res->check_errors++;
return size;
if (rebuild && (fix & BDRV_FIX_ERRORS)) {
BdrvCheckResult old_res = *res;
int fresh_leaks = 0;
+ Error *local_err = NULL;
fprintf(stderr, "Rebuilding refcount structure\n");
ret = rebuild_refcount_structure(bs, res, &refcount_table,
- &nb_clusters);
+ &nb_clusters, &local_err);
if (ret < 0) {
+ error_report_err(local_err);
goto fail;
}
return -ENOMEM;
}
- ret = bdrv_pread(bs->file, l1_ofs, l1, l1_sz2);
+ ret = bdrv_pread(bs->file, l1_ofs, l1_sz2, l1, 0);
if (ret < 0) {
g_free(l1);
return ret;
*
* @allocated should be set to true if a new cluster has been allocated.
*/
-typedef int (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
- uint64_t reftable_index, uint64_t *reftable_size,
- void *refblock, bool refblock_empty,
- bool *allocated, Error **errp);
+typedef int /* GRAPH_RDLOCK_PTR */
+ (RefblockFinishOp)(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty,
+ bool *allocated, Error **errp);
/**
* This "operation" for walk_over_reftable() allocates the refblock on disk (if
* it is not empty) and inserts its offset into the new reftable. The size of
* this new reftable is increased as required.
*/
-static int alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
- uint64_t reftable_index, uint64_t *reftable_size,
- void *refblock, bool refblock_empty, bool *allocated,
- Error **errp)
+static int GRAPH_RDLOCK
+alloc_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int64_t offset;
* offset specified by the new reftable's entry. It does not modify the new
* reftable or change any refcounts.
*/
-static int flush_refblock(BlockDriverState *bs, uint64_t **reftable,
- uint64_t reftable_index, uint64_t *reftable_size,
- void *refblock, bool refblock_empty, bool *allocated,
- Error **errp)
+static int GRAPH_RDLOCK
+flush_refblock(BlockDriverState *bs, uint64_t **reftable,
+ uint64_t reftable_index, uint64_t *reftable_size,
+ void *refblock, bool refblock_empty, bool *allocated,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
int64_t offset;
return ret;
}
- ret = bdrv_pwrite(bs->file, offset, refblock, s->cluster_size);
+ ret = bdrv_pwrite(bs->file, offset, s->cluster_size, refblock, 0);
if (ret < 0) {
error_setg_errno(errp, -ret, "Failed to write refblock");
return ret;
*
* @allocated is set to true if a new cluster has been allocated.
*/
-static int walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
- uint64_t *new_reftable_index,
- uint64_t *new_reftable_size,
- void *new_refblock, int new_refblock_size,
- int new_refcount_bits,
- RefblockFinishOp *operation, bool *allocated,
- Qcow2SetRefcountFunc *new_set_refcount,
- BlockDriverAmendStatusCB *status_cb,
- void *cb_opaque, int index, int total,
- Error **errp)
+static int GRAPH_RDLOCK
+walk_over_reftable(BlockDriverState *bs, uint64_t **new_reftable,
+ uint64_t *new_reftable_index,
+ uint64_t *new_reftable_size,
+ void *new_refblock, int new_refblock_size,
+ int new_refcount_bits,
+ RefblockFinishOp *operation, bool *allocated,
+ Qcow2SetRefcountFunc *new_set_refcount,
+ BlockDriverAmendStatusCB *status_cb,
+ void *cb_opaque, int index, int total,
+ Error **errp)
{
BDRVQcow2State *s = bs->opaque;
uint64_t reftable_index;
cpu_to_be64s(&new_reftable[i]);
}
- ret = bdrv_pwrite(bs->file, new_reftable_offset, new_reftable,
- new_reftable_size * REFTABLE_ENTRY_SIZE);
+ ret = bdrv_pwrite(bs->file, new_reftable_offset,
+ new_reftable_size * REFTABLE_ENTRY_SIZE, new_reftable,
+ 0);
for (i = 0; i < new_reftable_size; i++) {
be64_to_cpus(&new_reftable[i]);
return ret;
}
-static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
+static int64_t coroutine_fn GRAPH_RDLOCK
+get_refblock_offset(BlockDriverState *bs, uint64_t offset)
{
BDRVQcow2State *s = bs->opaque;
uint32_t index = offset_to_reftable_index(s, offset);
return covering_refblock_offset;
}
-static int qcow2_discard_refcount_block(BlockDriverState *bs,
- uint64_t discard_block_offs)
+static int coroutine_fn GRAPH_RDLOCK
+qcow2_discard_refcount_block(BlockDriverState *bs, uint64_t discard_block_offs)
{
BDRVQcow2State *s = bs->opaque;
int64_t refblock_offs;
return 0;
}
-int qcow2_shrink_reftable(BlockDriverState *bs)
+int coroutine_fn qcow2_shrink_reftable(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
uint64_t *reftable_tmp =
reftable_tmp[i] = unused_block ? 0 : cpu_to_be64(s->refcount_table[i]);
}
- ret = bdrv_pwrite_sync(bs->file, s->refcount_table_offset, reftable_tmp,
- s->refcount_table_size * REFTABLE_ENTRY_SIZE);
+ ret = bdrv_co_pwrite_sync(bs->file, s->refcount_table_offset,
+ s->refcount_table_size * REFTABLE_ENTRY_SIZE,
+ reftable_tmp, 0);
/*
* If the write in the reftable failed the image may contain a partially
* overwritten reftable. In this case it would be better to clear the
return ret;
}
-int64_t qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
+int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
{
BDRVQcow2State *s = bs->opaque;
int64_t i;
return -EIO;
}
-int qcow2_detect_metadata_preallocation(BlockDriverState *bs)
+int coroutine_fn GRAPH_RDLOCK
+qcow2_detect_metadata_preallocation(BlockDriverState *bs)
{
BDRVQcow2State *s = bs->opaque;
int64_t i, end_cluster, cluster_count = 0, threshold;
qemu_co_mutex_assert_locked(&s->lock);
- file_length = bdrv_getlength(bs->file->bs);
+ file_length = bdrv_co_getlength(bs->file->bs);
if (file_length < 0) {
return file_length;
}
- real_allocation = bdrv_get_allocated_file_size(bs->file->bs);
+ real_allocation = bdrv_co_get_allocated_file_size(bs->file->bs);
if (real_allocation < 0) {
return real_allocation;
}