]> git.proxmox.com Git - mirror_qemu.git/blame - block/qcow2-cache.c
Merge tag 'for-upstream' of https://gitlab.com/bonzini/qemu into staging
[mirror_qemu.git] / block / qcow2-cache.c
CommitLineData
49381094
KW
1/*
2 * L2/refcount table cache for the QCOW2 format
3 *
4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
80c71a24 25#include "qemu/osdep.h"
5df022cf 26#include "qemu/memalign.h"
49381094 27#include "qcow2.h"
3cce16f4 28#include "trace.h"
49381094
KW
29
30typedef struct Qcow2CachedTable {
2693310e 31 int64_t offset;
2693310e
AG
32 uint64_t lru_counter;
33 int ref;
909c260c 34 bool dirty;
49381094
KW
35} Qcow2CachedTable;
36
37struct Qcow2Cache {
d1b4efe5
AG
38 Qcow2CachedTable *entries;
39 struct Qcow2Cache *depends;
bf595021 40 int size;
03019d73 41 int table_size;
3de0a294 42 bool depends_on_flush;
72e80b89 43 void *table_array;
2693310e 44 uint64_t lru_counter;
279621c0 45 uint64_t cache_clean_lru_counter;
49381094
KW
46};
47
9869b27b 48static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
72e80b89 49{
03019d73 50 return (uint8_t *) c->table_array + (size_t) table * c->table_size;
72e80b89
AG
51}
52
b3b8b6d9 53static inline int qcow2_cache_get_table_idx(Qcow2Cache *c, void *table)
baf07d60 54{
baf07d60 55 ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
03019d73
AG
56 int idx = table_offset / c->table_size;
57 assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
baf07d60
AG
58 return idx;
59}
60
4efb1f7c
HR
61static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
62{
63 if (c == s->refcount_block_cache) {
64 return "refcount block";
65 } else if (c == s->l2_table_cache) {
66 return "L2 table";
67 } else {
68 /* Do not abort, because this is not critical */
69 return "unknown";
70 }
71}
72
ebe988f3 73static void qcow2_cache_table_release(Qcow2Cache *c, int i, int num_tables)
355ee2d0 74{
2f2c8d6b
AG
75/* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
76#ifdef CONFIG_LINUX
9869b27b 77 void *t = qcow2_cache_get_table_addr(c, i);
8e3b0cbb 78 int align = qemu_real_host_page_size();
03019d73 79 size_t mem_size = (size_t) c->table_size * num_tables;
355ee2d0
AG
80 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
81 size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
08546bcf 82 if (mem_size > offset && length > 0) {
2f2c8d6b 83 madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
355ee2d0
AG
84 }
85#endif
86}
87
279621c0
AG
88static inline bool can_clean_entry(Qcow2Cache *c, int i)
89{
90 Qcow2CachedTable *t = &c->entries[i];
91 return t->ref == 0 && !t->dirty && t->offset != 0 &&
92 t->lru_counter <= c->cache_clean_lru_counter;
93}
94
b2f68bff 95void qcow2_cache_clean_unused(Qcow2Cache *c)
279621c0
AG
96{
97 int i = 0;
98 while (i < c->size) {
99 int to_clean = 0;
100
101 /* Skip the entries that we don't need to clean */
102 while (i < c->size && !can_clean_entry(c, i)) {
103 i++;
104 }
105
106 /* And count how many we can clean in a row */
107 while (i < c->size && can_clean_entry(c, i)) {
108 c->entries[i].offset = 0;
109 c->entries[i].lru_counter = 0;
110 i++;
111 to_clean++;
112 }
113
114 if (to_clean > 0) {
ebe988f3 115 qcow2_cache_table_release(c, i - to_clean, to_clean);
279621c0
AG
116 }
117 }
118
119 c->cache_clean_lru_counter = c->lru_counter;
120}
121
1221fe6f
AG
122Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables,
123 unsigned table_size)
49381094 124{
ff99129a 125 BDRVQcow2State *s = bs->opaque;
49381094 126 Qcow2Cache *c;
49381094 127
1221fe6f
AG
128 assert(num_tables > 0);
129 assert(is_power_of_2(table_size));
130 assert(table_size >= (1 << MIN_CLUSTER_BITS));
131 assert(table_size <= s->cluster_size);
132
02004bd4 133 c = g_new0(Qcow2Cache, 1);
49381094 134 c->size = num_tables;
1221fe6f 135 c->table_size = table_size;
02004bd4 136 c->entries = g_try_new0(Qcow2CachedTable, num_tables);
9a4f4c31 137 c->table_array = qemu_try_blockalign(bs->file->bs,
03019d73 138 (size_t) num_tables * c->table_size);
72e80b89
AG
139
140 if (!c->entries || !c->table_array) {
141 qemu_vfree(c->table_array);
142 g_free(c->entries);
143 g_free(c);
144 c = NULL;
49381094
KW
145 }
146
147 return c;
148}
149
e64d4072 150int qcow2_cache_destroy(Qcow2Cache *c)
49381094
KW
151{
152 int i;
153
154 for (i = 0; i < c->size; i++) {
155 assert(c->entries[i].ref == 0);
49381094
KW
156 }
157
72e80b89 158 qemu_vfree(c->table_array);
7267c094
AL
159 g_free(c->entries);
160 g_free(c);
49381094
KW
161
162 return 0;
163}
164
165static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
166{
167 int ret;
168
169 ret = qcow2_cache_flush(bs, c->depends);
170 if (ret < 0) {
171 return ret;
172 }
173
174 c->depends = NULL;
3de0a294
KW
175 c->depends_on_flush = false;
176
49381094
KW
177 return 0;
178}
179
180static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
181{
ff99129a 182 BDRVQcow2State *s = bs->opaque;
3de0a294 183 int ret = 0;
49381094
KW
184
185 if (!c->entries[i].dirty || !c->entries[i].offset) {
186 return 0;
187 }
188
3cce16f4
KW
189 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
190 c == s->l2_table_cache, i);
191
49381094
KW
192 if (c->depends) {
193 ret = qcow2_cache_flush_dependency(bs, c);
3de0a294 194 } else if (c->depends_on_flush) {
9a4f4c31 195 ret = bdrv_flush(bs->file->bs);
3de0a294
KW
196 if (ret >= 0) {
197 c->depends_on_flush = false;
49381094
KW
198 }
199 }
200
3de0a294
KW
201 if (ret < 0) {
202 return ret;
203 }
204
cf93980e 205 if (c == s->refcount_block_cache) {
231bb267 206 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
966b000f 207 c->entries[i].offset, c->table_size, false);
cf93980e 208 } else if (c == s->l2_table_cache) {
231bb267 209 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
966b000f 210 c->entries[i].offset, c->table_size, false);
cf93980e 211 } else {
231bb267 212 ret = qcow2_pre_write_overlap_check(bs, 0,
966b000f 213 c->entries[i].offset, c->table_size, false);
cf93980e
HR
214 }
215
216 if (ret < 0) {
217 return ret;
218 }
219
29c1a730
KW
220 if (c == s->refcount_block_cache) {
221 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
222 } else if (c == s->l2_table_cache) {
223 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
224 }
225
d9ca2ea2 226 ret = bdrv_pwrite(bs->file, c->entries[i].offset,
9869b27b 227 qcow2_cache_get_table_addr(c, i), c->table_size);
49381094
KW
228 if (ret < 0) {
229 return ret;
230 }
231
232 c->entries[i].dirty = false;
233
234 return 0;
235}
236
f3c3b87d 237int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
49381094 238{
ff99129a 239 BDRVQcow2State *s = bs->opaque;
49381094
KW
240 int result = 0;
241 int ret;
242 int i;
243
3cce16f4
KW
244 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
245
49381094
KW
246 for (i = 0; i < c->size; i++) {
247 ret = qcow2_cache_entry_flush(bs, c, i);
248 if (ret < 0 && result != -ENOSPC) {
249 result = ret;
250 }
251 }
252
f3c3b87d
DL
253 return result;
254}
255
256int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
257{
258 int result = qcow2_cache_write(bs, c);
259
49381094 260 if (result == 0) {
f3c3b87d 261 int ret = bdrv_flush(bs->file->bs);
49381094
KW
262 if (ret < 0) {
263 result = ret;
264 }
265 }
266
267 return result;
268}
269
270int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
271 Qcow2Cache *dependency)
272{
273 int ret;
274
275 if (dependency->depends) {
276 ret = qcow2_cache_flush_dependency(bs, dependency);
277 if (ret < 0) {
278 return ret;
279 }
280 }
281
282 if (c->depends && (c->depends != dependency)) {
283 ret = qcow2_cache_flush_dependency(bs, c);
284 if (ret < 0) {
285 return ret;
286 }
287 }
288
289 c->depends = dependency;
290 return 0;
291}
292
3de0a294
KW
293void qcow2_cache_depends_on_flush(Qcow2Cache *c)
294{
295 c->depends_on_flush = true;
296}
297
e7108fea
HR
298int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
299{
300 int ret, i;
301
302 ret = qcow2_cache_flush(bs, c);
303 if (ret < 0) {
304 return ret;
305 }
306
307 for (i = 0; i < c->size; i++) {
308 assert(c->entries[i].ref == 0);
309 c->entries[i].offset = 0;
2693310e 310 c->entries[i].lru_counter = 0;
e7108fea
HR
311 }
312
ebe988f3 313 qcow2_cache_table_release(c, 0, c->size);
355ee2d0 314
2693310e
AG
315 c->lru_counter = 0;
316
e7108fea
HR
317 return 0;
318}
319
49381094
KW
320static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
321 uint64_t offset, void **table, bool read_from_disk)
322{
ff99129a 323 BDRVQcow2State *s = bs->opaque;
49381094
KW
324 int i;
325 int ret;
812e4082 326 int lookup_index;
fdfbca82
AG
327 uint64_t min_lru_counter = UINT64_MAX;
328 int min_lru_index = -1;
49381094 329
4efb1f7c
HR
330 assert(offset != 0);
331
3cce16f4
KW
332 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
333 offset, read_from_disk);
334
03019d73 335 if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
4efb1f7c
HR
336 qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
337 "cache: Offset %#" PRIx64 " is unaligned",
338 qcow2_cache_get_name(s, c), offset);
339 return -EIO;
340 }
341
49381094 342 /* Check if the table is already cached */
03019d73 343 i = lookup_index = (offset / c->table_size * 4) % c->size;
812e4082 344 do {
fdfbca82
AG
345 const Qcow2CachedTable *t = &c->entries[i];
346 if (t->offset == offset) {
49381094
KW
347 goto found;
348 }
fdfbca82
AG
349 if (t->ref == 0 && t->lru_counter < min_lru_counter) {
350 min_lru_counter = t->lru_counter;
351 min_lru_index = i;
352 }
812e4082
AG
353 if (++i == c->size) {
354 i = 0;
355 }
356 } while (i != lookup_index);
fdfbca82
AG
357
358 if (min_lru_index == -1) {
359 /* This can't happen in current synchronous code, but leave the check
360 * here as a reminder for whoever starts using AIO with the cache */
361 abort();
49381094
KW
362 }
363
fdfbca82
AG
364 /* Cache miss: write a table back and replace it */
365 i = min_lru_index;
3cce16f4
KW
366 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
367 c == s->l2_table_cache, i);
49381094
KW
368
369 ret = qcow2_cache_entry_flush(bs, c, i);
370 if (ret < 0) {
371 return ret;
372 }
373
3cce16f4
KW
374 trace_qcow2_cache_get_read(qemu_coroutine_self(),
375 c == s->l2_table_cache, i);
49381094
KW
376 c->entries[i].offset = 0;
377 if (read_from_disk) {
29c1a730
KW
378 if (c == s->l2_table_cache) {
379 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
380 }
381
cf2ab8fc 382 ret = bdrv_pread(bs->file, offset,
9869b27b 383 qcow2_cache_get_table_addr(c, i),
03019d73 384 c->table_size);
49381094
KW
385 if (ret < 0) {
386 return ret;
387 }
388 }
389
49381094
KW
390 c->entries[i].offset = offset;
391
392 /* And return the right table */
393found:
49381094 394 c->entries[i].ref++;
9869b27b 395 *table = qcow2_cache_get_table_addr(c, i);
3cce16f4
KW
396
397 trace_qcow2_cache_get_done(qemu_coroutine_self(),
398 c == s->l2_table_cache, i);
399
49381094
KW
400 return 0;
401}
402
403int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
404 void **table)
405{
406 return qcow2_cache_do_get(bs, c, offset, table, true);
407}
408
409int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
410 void **table)
411{
412 return qcow2_cache_do_get(bs, c, offset, table, false);
413}
414
2013c3d4 415void qcow2_cache_put(Qcow2Cache *c, void **table)
49381094 416{
b3b8b6d9 417 int i = qcow2_cache_get_table_idx(c, *table);
49381094 418
49381094
KW
419 c->entries[i].ref--;
420 *table = NULL;
421
2693310e
AG
422 if (c->entries[i].ref == 0) {
423 c->entries[i].lru_counter = ++c->lru_counter;
424 }
425
49381094 426 assert(c->entries[i].ref >= 0);
49381094
KW
427}
428
2d135ee9 429void qcow2_cache_entry_mark_dirty(Qcow2Cache *c, void *table)
49381094 430{
b3b8b6d9 431 int i = qcow2_cache_get_table_idx(c, table);
baf07d60 432 assert(c->entries[i].offset != 0);
49381094
KW
433 c->entries[i].dirty = true;
434}
f71c08ea 435
6e6fa760 436void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset)
f71c08ea
PB
437{
438 int i;
439
440 for (i = 0; i < c->size; i++) {
441 if (c->entries[i].offset == offset) {
9869b27b 442 return qcow2_cache_get_table_addr(c, i);
f71c08ea
PB
443 }
444 }
445 return NULL;
446}
447
77aadd7b 448void qcow2_cache_discard(Qcow2Cache *c, void *table)
f71c08ea 449{
b3b8b6d9 450 int i = qcow2_cache_get_table_idx(c, table);
f71c08ea
PB
451
452 assert(c->entries[i].ref == 0);
453
454 c->entries[i].offset = 0;
455 c->entries[i].lru_counter = 0;
456 c->entries[i].dirty = false;
457
ebe988f3 458 qcow2_cache_table_release(c, i, 1);
f71c08ea 459}