#include "block/qcow2.h"
#include "trace.h"
-int qcow2_grow_l1_table(BlockDriverState *bs, int min_size, bool exact_size)
+int qcow2_grow_l1_table(BlockDriverState *bs, uint64_t min_size,
+ bool exact_size)
{
BDRVQcowState *s = bs->opaque;
- int new_l1_size, new_l1_size2, ret, i;
+ int new_l1_size2, ret, i;
uint64_t *new_l1_table;
- int64_t new_l1_table_offset;
+ int64_t new_l1_table_offset, new_l1_size;
uint8_t data[12];
if (min_size <= s->l1_size)
}
}
+ if (new_l1_size > INT_MAX) {
+ return -EFBIG;
+ }
+
#ifdef DEBUG_ALLOC2
- fprintf(stderr, "grow l1_table from %d to %d\n", s->l1_size, new_l1_size);
+ fprintf(stderr, "grow l1_table from %d to %" PRId64 "\n",
+ s->l1_size, new_l1_size);
#endif
new_l1_size2 = sizeof(uint64_t) * new_l1_size;
goto fail;
}
+ /* the L1 position has not yet been updated, so these clusters must
+ * indeed be completely free */
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ new_l1_table_offset, new_l1_size2);
+ if (ret < 0) {
+ goto fail;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_L1_GROW_WRITE_TABLE);
for(i = 0; i < s->l1_size; i++)
new_l1_table[i] = cpu_to_be64(new_l1_table[i]);
goto fail;
}
g_free(s->l1_table);
- qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t));
+ qcow2_free_clusters(bs, s->l1_table_offset, s->l1_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
s->l1_table_offset = new_l1_table_offset;
s->l1_table = new_l1_table;
s->l1_size = new_l1_size;
return 0;
fail:
g_free(new_l1_table);
- qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2);
+ qcow2_free_clusters(bs, new_l1_table_offset, new_l1_size2,
+ QCOW2_DISCARD_OTHER);
return ret;
}
* and we really don't want bdrv_pread to perform a read-modify-write)
*/
#define L1_ENTRIES_PER_SECTOR (512 / 8)
-static int write_l1_entry(BlockDriverState *bs, int l1_index)
+int qcow2_write_l1_entry(BlockDriverState *bs, int l1_index)
{
BDRVQcowState *s = bs->opaque;
uint64_t buf[L1_ENTRIES_PER_SECTOR];
buf[i] = cpu_to_be64(s->l1_table[l1_start_index + i]);
}
+ ret = qcow2_pre_write_overlap_check(bs,
+ QCOW2_OL_DEFAULT & ~QCOW2_OL_ACTIVE_L1,
+ s->l1_table_offset + 8 * l1_start_index, sizeof(buf));
+ if (ret < 0) {
+ return ret;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
ret = bdrv_pwrite_sync(bs->file, s->l1_table_offset + 8 * l1_start_index,
buf, sizeof(buf));
/* update the L1 entry */
trace_qcow2_l2_allocate_write_l1(bs, l1_index);
s->l1_table[l1_index] = l2_offset | QCOW_OFLAG_COPIED;
- ret = write_l1_entry(bs, l1_index);
+ ret = qcow2_write_l1_entry(bs, l1_index);
if (ret < 0) {
goto fail;
}
&s->aes_encrypt_key);
}
+ ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT,
+ cluster_offset + n_start * BDRV_SECTOR_SIZE, n * BDRV_SECTOR_SIZE);
+ if (ret < 0) {
+ goto out;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
ret = bdrv_co_writev(bs->file, (cluster_offset >> 9) + n_start, n, &qiov);
if (ret < 0) {
int *num, uint64_t *cluster_offset)
{
BDRVQcowState *s = bs->opaque;
- unsigned int l1_index, l2_index;
- uint64_t l2_offset, *l2_table;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset, *l2_table;
int l1_bits, c;
unsigned int index_in_cluster, nb_clusters;
uint64_t nb_available, nb_needed;
int *new_l2_index)
{
BDRVQcowState *s = bs->opaque;
- unsigned int l1_index, l2_index;
- uint64_t l2_offset;
+ unsigned int l2_index;
+ uint64_t l1_index, l2_offset;
uint64_t *l2_table = NULL;
int ret;
}
}
+ assert(l1_index < s->l1_size);
l2_offset = s->l1_table[l1_index] & L1E_OFFSET_MASK;
/* seek the l2 table of the given l2 offset */
/* Then decrease the refcount of the old table */
if (l2_offset) {
- qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t));
+ qcow2_free_clusters(bs, l2_offset, s->l2_size * sizeof(uint64_t),
+ QCOW2_DISCARD_OTHER);
}
}
/*
* If this was a COW, we need to decrease the refcount of the old cluster.
* Also flush bs->file to get the right order for L2 and refcount update.
+ *
+ * Don't discard clusters that reach a refcount of 0 (e.g. compressed
+ * clusters), the next write will reuse them anyway.
*/
if (j != 0) {
for (i = 0; i < j; i++) {
- qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1);
+ qcow2_free_any_clusters(bs, be64_to_cpu(old_cluster[i]), 1,
+ QCOW2_DISCARD_NEVER);
}
}
* must start over anyway, so consider *cur_bytes undefined.
*/
static int handle_dependencies(BlockDriverState *bs, uint64_t guest_offset,
- uint64_t *cur_bytes)
+ uint64_t *cur_bytes, QCowL2Meta **m)
{
BDRVQcowState *s = bs->opaque;
QCowL2Meta *old_alloc;
bytes = 0;
}
+ /* Stop if already an l2meta exists. After yielding, it wouldn't
+ * be valid any more, so we'd have to clean up the old L2Metas
+ * and deal with requests depending on them before starting to
+ * gather new ones. Not worth the trouble. */
+ if (bytes == 0 && *m) {
+ *cur_bytes = 0;
+ return 0;
+ }
+
if (bytes == 0) {
/* Wait for the dependency to complete. We need to recheck
* the free/allocated clusters when we continue. */
nb_clusters = count_cow_clusters(s, nb_clusters, l2_table, l2_index);
}
+ /* This function is only called when there were no non-COW clusters, so if
+ * we can't find any unallocated or COW clusters either, something is
+ * wrong with our code. */
+ assert(nb_clusters > 0);
+
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
if (ret < 0) {
return ret;
}
- if (nb_clusters == 0) {
- *bytes = 0;
- return 0;
- }
-
/* Allocate, if necessary at a given offset in the image file */
alloc_cluster_offset = start_of_cluster(s, *host_offset);
ret = do_alloc_cluster_offset(bs, guest_offset, &alloc_cluster_offset,
int alloc_n_start = offset_into_cluster(s, guest_offset)
>> BDRV_SECTOR_BITS;
int nb_sectors = MIN(requested_sectors, avail_sectors);
+ QCowL2Meta *old_m = *m;
*m = g_malloc0(sizeof(**m));
**m = (QCowL2Meta) {
+ .next = old_m,
+
.alloc_offset = alloc_cluster_offset,
.offset = start_of_cluster(s, guest_offset),
.nb_clusters = nb_clusters,
remaining = (n_end - n_start) << BDRV_SECTOR_BITS;
cluster_offset = 0;
*host_offset = 0;
+ cur_bytes = 0;
+ *m = NULL;
- /*
- * Now start gathering as many contiguous clusters as possible:
- *
- * 1. Check for overlaps with in-flight allocations
- *
- * a) Overlap not in the first cluster -> shorten this request and let
- * the caller handle the rest in its next loop iteration.
- *
- * b) Real overlaps of two requests. Yield and restart the search for
- * contiguous clusters (the situation could have changed while we
- * were sleeping)
- *
- * c) TODO: Request starts in the same cluster as the in-flight
- * allocation ends. Shorten the COW of the in-fight allocation, set
- * cluster_offset to write to the same cluster and set up the right
- * synchronisation between the in-flight request and the new one.
- */
- cur_bytes = remaining;
- ret = handle_dependencies(bs, start, &cur_bytes);
- if (ret == -EAGAIN) {
- goto again;
- } else if (ret < 0) {
- return ret;
- } else {
- /* handle_dependencies() may have decreased cur_bytes (shortened
- * the allocations below) so that the next dependency is processed
- * correctly during the next loop iteration. */
- }
+ while (true) {
- /*
- * 2. Count contiguous COPIED clusters.
- */
- ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
if (!*host_offset) {
*host_offset = start_of_cluster(s, cluster_offset);
}
+ assert(remaining >= cur_bytes);
+
start += cur_bytes;
remaining -= cur_bytes;
cluster_offset += cur_bytes;
+ if (remaining == 0) {
+ break;
+ }
+
cur_bytes = remaining;
- } else if (cur_bytes == 0) {
- goto done;
- }
- /* If there is something left to allocate, do that now */
- if (remaining == 0) {
- goto done;
- }
+ /*
+ * Now start gathering as many contiguous clusters as possible:
+ *
+ * 1. Check for overlaps with in-flight allocations
+ *
+ * a) Overlap not in the first cluster -> shorten this request and
+ * let the caller handle the rest in its next loop iteration.
+ *
+ * b) Real overlaps of two requests. Yield and restart the search
+ * for contiguous clusters (the situation could have changed
+ * while we were sleeping)
+ *
+ * c) TODO: Request starts in the same cluster as the in-flight
+ * allocation ends. Shorten the COW of the in-fight allocation,
+ * set cluster_offset to write to the same cluster and set up
+ * the right synchronisation between the in-flight request and
+ * the new one.
+ */
+ ret = handle_dependencies(bs, start, &cur_bytes, m);
+ if (ret == -EAGAIN) {
+ /* Currently handle_dependencies() doesn't yield if we already had
+ * an allocation. If it did, we would have to clean up the L2Meta
+ * structs before starting over. */
+ assert(*m == NULL);
+ goto again;
+ } else if (ret < 0) {
+ return ret;
+ } else if (cur_bytes == 0) {
+ break;
+ } else {
+ /* handle_dependencies() may have decreased cur_bytes (shortened
+ * the allocations below) so that the next dependency is processed
+ * correctly during the next loop iteration. */
+ }
- /*
- * 3. If the request still hasn't completed, allocate new clusters,
- * considering any cluster_offset of steps 1c or 2.
- */
- ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
- if (ret < 0) {
- return ret;
- } else if (ret) {
- if (!*host_offset) {
- *host_offset = start_of_cluster(s, cluster_offset);
+ /*
+ * 2. Count contiguous COPIED clusters.
+ */
+ ret = handle_copied(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else if (cur_bytes == 0) {
+ break;
}
- start += cur_bytes;
- remaining -= cur_bytes;
- cluster_offset += cur_bytes;
+ /*
+ * 3. If the request still hasn't completed, allocate new clusters,
+ * considering any cluster_offset of steps 1c or 2.
+ */
+ ret = handle_alloc(bs, start, &cluster_offset, &cur_bytes, m);
+ if (ret < 0) {
+ return ret;
+ } else if (ret) {
+ continue;
+ } else {
+ assert(cur_bytes == 0);
+ break;
+ }
}
- /* Some cleanup work */
-done:
*num = (n_end - n_start) - (remaining >> BDRV_SECTOR_BITS);
assert(*num > 0);
assert(*host_offset != 0);
* clusters.
*/
static int discard_single_l2(BlockDriverState *bs, uint64_t offset,
- unsigned int nb_clusters)
+ unsigned int nb_clusters, enum qcow2_discard_type type)
{
BDRVQcowState *s = bs->opaque;
uint64_t *l2_table;
l2_table[l2_index + i] = cpu_to_be64(0);
/* Then decrease the refcount */
- qcow2_free_any_clusters(bs, old_offset, 1);
+ qcow2_free_any_clusters(bs, old_offset, 1, type);
}
ret = qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
}
int qcow2_discard_clusters(BlockDriverState *bs, uint64_t offset,
- int nb_sectors)
+ int nb_sectors, enum qcow2_discard_type type)
{
BDRVQcowState *s = bs->opaque;
uint64_t end_offset;
nb_clusters = size_to_clusters(s, end_offset - offset);
+ s->cache_discards = true;
+
/* Each L2 table is handled by its own loop iteration */
while (nb_clusters > 0) {
- ret = discard_single_l2(bs, offset, nb_clusters);
+ ret = discard_single_l2(bs, offset, nb_clusters, type);
if (ret < 0) {
- return ret;
+ goto fail;
}
nb_clusters -= ret;
offset += (ret * s->cluster_size);
}
- return 0;
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
}
/*
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_table);
if (old_offset & QCOW_OFLAG_COMPRESSED) {
l2_table[l2_index + i] = cpu_to_be64(QCOW_OFLAG_ZERO);
- qcow2_free_any_clusters(bs, old_offset, 1);
+ qcow2_free_any_clusters(bs, old_offset, 1, QCOW2_DISCARD_REQUEST);
} else {
l2_table[l2_index + i] |= cpu_to_be64(QCOW_OFLAG_ZERO);
}
/* Each L2 table is handled by its own loop iteration */
nb_clusters = size_to_clusters(s, nb_sectors << BDRV_SECTOR_BITS);
+ s->cache_discards = true;
+
while (nb_clusters > 0) {
ret = zero_single_l2(bs, offset, nb_clusters);
if (ret < 0) {
- return ret;
+ goto fail;
}
nb_clusters -= ret;
offset += (ret * s->cluster_size);
}
- return 0;
+ ret = 0;
+fail:
+ s->cache_discards = false;
+ qcow2_process_discards(bs, ret);
+
+ return ret;
}