]> git.proxmox.com Git - mirror_qemu.git/blame - block/qcow2-cache.c
qcow2: Remove BDS parameter from qcow2_cache_get_table_addr()
[mirror_qemu.git] / block / qcow2-cache.c
CommitLineData
49381094
KW
1/*
2 * L2/refcount table cache for the QCOW2 format
3 *
4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
80c71a24 25#include "qemu/osdep.h"
737e150e 26#include "block/block_int.h"
49381094
KW
27#include "qemu-common.h"
28#include "qcow2.h"
3cce16f4 29#include "trace.h"
49381094
KW
30
31typedef struct Qcow2CachedTable {
2693310e 32 int64_t offset;
2693310e
AG
33 uint64_t lru_counter;
34 int ref;
909c260c 35 bool dirty;
49381094
KW
36} Qcow2CachedTable;
37
38struct Qcow2Cache {
d1b4efe5
AG
39 Qcow2CachedTable *entries;
40 struct Qcow2Cache *depends;
bf595021 41 int size;
03019d73 42 int table_size;
3de0a294 43 bool depends_on_flush;
72e80b89 44 void *table_array;
2693310e 45 uint64_t lru_counter;
279621c0 46 uint64_t cache_clean_lru_counter;
49381094
KW
47};
48
9869b27b 49static inline void *qcow2_cache_get_table_addr(Qcow2Cache *c, int table)
72e80b89 50{
03019d73 51 return (uint8_t *) c->table_array + (size_t) table * c->table_size;
72e80b89
AG
52}
53
baf07d60
AG
54static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
55 Qcow2Cache *c, void *table)
56{
baf07d60 57 ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
03019d73
AG
58 int idx = table_offset / c->table_size;
59 assert(idx >= 0 && idx < c->size && table_offset % c->table_size == 0);
baf07d60
AG
60 return idx;
61}
62
4efb1f7c
HR
63static inline const char *qcow2_cache_get_name(BDRVQcow2State *s, Qcow2Cache *c)
64{
65 if (c == s->refcount_block_cache) {
66 return "refcount block";
67 } else if (c == s->l2_table_cache) {
68 return "L2 table";
69 } else {
70 /* Do not abort, because this is not critical */
71 return "unknown";
72 }
73}
74
355ee2d0
AG
75static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
76 int i, int num_tables)
77{
2f2c8d6b
AG
78/* Using MADV_DONTNEED to discard memory is a Linux-specific feature */
79#ifdef CONFIG_LINUX
9869b27b 80 void *t = qcow2_cache_get_table_addr(c, i);
355ee2d0 81 int align = getpagesize();
03019d73 82 size_t mem_size = (size_t) c->table_size * num_tables;
355ee2d0
AG
83 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
84 size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
08546bcf 85 if (mem_size > offset && length > 0) {
2f2c8d6b 86 madvise((uint8_t *) t + offset, length, MADV_DONTNEED);
355ee2d0
AG
87 }
88#endif
89}
90
279621c0
AG
91static inline bool can_clean_entry(Qcow2Cache *c, int i)
92{
93 Qcow2CachedTable *t = &c->entries[i];
94 return t->ref == 0 && !t->dirty && t->offset != 0 &&
95 t->lru_counter <= c->cache_clean_lru_counter;
96}
97
98void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
99{
100 int i = 0;
101 while (i < c->size) {
102 int to_clean = 0;
103
104 /* Skip the entries that we don't need to clean */
105 while (i < c->size && !can_clean_entry(c, i)) {
106 i++;
107 }
108
109 /* And count how many we can clean in a row */
110 while (i < c->size && can_clean_entry(c, i)) {
111 c->entries[i].offset = 0;
112 c->entries[i].lru_counter = 0;
113 i++;
114 to_clean++;
115 }
116
117 if (to_clean > 0) {
118 qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
119 }
120 }
121
122 c->cache_clean_lru_counter = c->lru_counter;
123}
124
6af4e9ea 125Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
49381094 126{
ff99129a 127 BDRVQcow2State *s = bs->opaque;
49381094 128 Qcow2Cache *c;
49381094 129
02004bd4 130 c = g_new0(Qcow2Cache, 1);
49381094 131 c->size = num_tables;
03019d73 132 c->table_size = s->cluster_size;
02004bd4 133 c->entries = g_try_new0(Qcow2CachedTable, num_tables);
9a4f4c31 134 c->table_array = qemu_try_blockalign(bs->file->bs,
03019d73 135 (size_t) num_tables * c->table_size);
72e80b89
AG
136
137 if (!c->entries || !c->table_array) {
138 qemu_vfree(c->table_array);
139 g_free(c->entries);
140 g_free(c);
141 c = NULL;
49381094
KW
142 }
143
144 return c;
145}
146
d1b4efe5 147int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
49381094
KW
148{
149 int i;
150
151 for (i = 0; i < c->size; i++) {
152 assert(c->entries[i].ref == 0);
49381094
KW
153 }
154
72e80b89 155 qemu_vfree(c->table_array);
7267c094
AL
156 g_free(c->entries);
157 g_free(c);
49381094
KW
158
159 return 0;
160}
161
162static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
163{
164 int ret;
165
166 ret = qcow2_cache_flush(bs, c->depends);
167 if (ret < 0) {
168 return ret;
169 }
170
171 c->depends = NULL;
3de0a294
KW
172 c->depends_on_flush = false;
173
49381094
KW
174 return 0;
175}
176
177static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
178{
ff99129a 179 BDRVQcow2State *s = bs->opaque;
3de0a294 180 int ret = 0;
49381094
KW
181
182 if (!c->entries[i].dirty || !c->entries[i].offset) {
183 return 0;
184 }
185
3cce16f4
KW
186 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
187 c == s->l2_table_cache, i);
188
49381094
KW
189 if (c->depends) {
190 ret = qcow2_cache_flush_dependency(bs, c);
3de0a294 191 } else if (c->depends_on_flush) {
9a4f4c31 192 ret = bdrv_flush(bs->file->bs);
3de0a294
KW
193 if (ret >= 0) {
194 c->depends_on_flush = false;
49381094
KW
195 }
196 }
197
3de0a294
KW
198 if (ret < 0) {
199 return ret;
200 }
201
cf93980e 202 if (c == s->refcount_block_cache) {
231bb267 203 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
03019d73 204 c->entries[i].offset, c->table_size);
cf93980e 205 } else if (c == s->l2_table_cache) {
231bb267 206 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
03019d73 207 c->entries[i].offset, c->table_size);
cf93980e 208 } else {
231bb267 209 ret = qcow2_pre_write_overlap_check(bs, 0,
03019d73 210 c->entries[i].offset, c->table_size);
cf93980e
HR
211 }
212
213 if (ret < 0) {
214 return ret;
215 }
216
29c1a730
KW
217 if (c == s->refcount_block_cache) {
218 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
219 } else if (c == s->l2_table_cache) {
220 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
221 }
222
d9ca2ea2 223 ret = bdrv_pwrite(bs->file, c->entries[i].offset,
9869b27b 224 qcow2_cache_get_table_addr(c, i), c->table_size);
49381094
KW
225 if (ret < 0) {
226 return ret;
227 }
228
229 c->entries[i].dirty = false;
230
231 return 0;
232}
233
f3c3b87d 234int qcow2_cache_write(BlockDriverState *bs, Qcow2Cache *c)
49381094 235{
ff99129a 236 BDRVQcow2State *s = bs->opaque;
49381094
KW
237 int result = 0;
238 int ret;
239 int i;
240
3cce16f4
KW
241 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
242
49381094
KW
243 for (i = 0; i < c->size; i++) {
244 ret = qcow2_cache_entry_flush(bs, c, i);
245 if (ret < 0 && result != -ENOSPC) {
246 result = ret;
247 }
248 }
249
f3c3b87d
DL
250 return result;
251}
252
253int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
254{
255 int result = qcow2_cache_write(bs, c);
256
49381094 257 if (result == 0) {
f3c3b87d 258 int ret = bdrv_flush(bs->file->bs);
49381094
KW
259 if (ret < 0) {
260 result = ret;
261 }
262 }
263
264 return result;
265}
266
267int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
268 Qcow2Cache *dependency)
269{
270 int ret;
271
272 if (dependency->depends) {
273 ret = qcow2_cache_flush_dependency(bs, dependency);
274 if (ret < 0) {
275 return ret;
276 }
277 }
278
279 if (c->depends && (c->depends != dependency)) {
280 ret = qcow2_cache_flush_dependency(bs, c);
281 if (ret < 0) {
282 return ret;
283 }
284 }
285
286 c->depends = dependency;
287 return 0;
288}
289
3de0a294
KW
290void qcow2_cache_depends_on_flush(Qcow2Cache *c)
291{
292 c->depends_on_flush = true;
293}
294
e7108fea
HR
295int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
296{
297 int ret, i;
298
299 ret = qcow2_cache_flush(bs, c);
300 if (ret < 0) {
301 return ret;
302 }
303
304 for (i = 0; i < c->size; i++) {
305 assert(c->entries[i].ref == 0);
306 c->entries[i].offset = 0;
2693310e 307 c->entries[i].lru_counter = 0;
e7108fea
HR
308 }
309
355ee2d0
AG
310 qcow2_cache_table_release(bs, c, 0, c->size);
311
2693310e
AG
312 c->lru_counter = 0;
313
e7108fea
HR
314 return 0;
315}
316
49381094
KW
317static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
318 uint64_t offset, void **table, bool read_from_disk)
319{
ff99129a 320 BDRVQcow2State *s = bs->opaque;
49381094
KW
321 int i;
322 int ret;
812e4082 323 int lookup_index;
fdfbca82
AG
324 uint64_t min_lru_counter = UINT64_MAX;
325 int min_lru_index = -1;
49381094 326
4efb1f7c
HR
327 assert(offset != 0);
328
3cce16f4
KW
329 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
330 offset, read_from_disk);
331
03019d73 332 if (!QEMU_IS_ALIGNED(offset, c->table_size)) {
4efb1f7c
HR
333 qcow2_signal_corruption(bs, true, -1, -1, "Cannot get entry from %s "
334 "cache: Offset %#" PRIx64 " is unaligned",
335 qcow2_cache_get_name(s, c), offset);
336 return -EIO;
337 }
338
49381094 339 /* Check if the table is already cached */
03019d73 340 i = lookup_index = (offset / c->table_size * 4) % c->size;
812e4082 341 do {
fdfbca82
AG
342 const Qcow2CachedTable *t = &c->entries[i];
343 if (t->offset == offset) {
49381094
KW
344 goto found;
345 }
fdfbca82
AG
346 if (t->ref == 0 && t->lru_counter < min_lru_counter) {
347 min_lru_counter = t->lru_counter;
348 min_lru_index = i;
349 }
812e4082
AG
350 if (++i == c->size) {
351 i = 0;
352 }
353 } while (i != lookup_index);
fdfbca82
AG
354
355 if (min_lru_index == -1) {
356 /* This can't happen in current synchronous code, but leave the check
357 * here as a reminder for whoever starts using AIO with the cache */
358 abort();
49381094
KW
359 }
360
fdfbca82
AG
361 /* Cache miss: write a table back and replace it */
362 i = min_lru_index;
3cce16f4
KW
363 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
364 c == s->l2_table_cache, i);
49381094
KW
365
366 ret = qcow2_cache_entry_flush(bs, c, i);
367 if (ret < 0) {
368 return ret;
369 }
370
3cce16f4
KW
371 trace_qcow2_cache_get_read(qemu_coroutine_self(),
372 c == s->l2_table_cache, i);
49381094
KW
373 c->entries[i].offset = 0;
374 if (read_from_disk) {
29c1a730
KW
375 if (c == s->l2_table_cache) {
376 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
377 }
378
cf2ab8fc 379 ret = bdrv_pread(bs->file, offset,
9869b27b 380 qcow2_cache_get_table_addr(c, i),
03019d73 381 c->table_size);
49381094
KW
382 if (ret < 0) {
383 return ret;
384 }
385 }
386
49381094
KW
387 c->entries[i].offset = offset;
388
389 /* And return the right table */
390found:
49381094 391 c->entries[i].ref++;
9869b27b 392 *table = qcow2_cache_get_table_addr(c, i);
3cce16f4
KW
393
394 trace_qcow2_cache_get_done(qemu_coroutine_self(),
395 c == s->l2_table_cache, i);
396
49381094
KW
397 return 0;
398}
399
400int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
401 void **table)
402{
403 return qcow2_cache_do_get(bs, c, offset, table, true);
404}
405
406int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
407 void **table)
408{
409 return qcow2_cache_do_get(bs, c, offset, table, false);
410}
411
a3f1afb4 412void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
49381094 413{
baf07d60 414 int i = qcow2_cache_get_table_idx(bs, c, *table);
49381094 415
49381094
KW
416 c->entries[i].ref--;
417 *table = NULL;
418
2693310e
AG
419 if (c->entries[i].ref == 0) {
420 c->entries[i].lru_counter = ++c->lru_counter;
421 }
422
49381094 423 assert(c->entries[i].ref >= 0);
49381094
KW
424}
425
72e80b89
AG
426void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
427 void *table)
49381094 428{
baf07d60
AG
429 int i = qcow2_cache_get_table_idx(bs, c, table);
430 assert(c->entries[i].offset != 0);
49381094
KW
431 c->entries[i].dirty = true;
432}
f71c08ea
PB
433
434void *qcow2_cache_is_table_offset(BlockDriverState *bs, Qcow2Cache *c,
435 uint64_t offset)
436{
437 int i;
438
439 for (i = 0; i < c->size; i++) {
440 if (c->entries[i].offset == offset) {
9869b27b 441 return qcow2_cache_get_table_addr(c, i);
f71c08ea
PB
442 }
443 }
444 return NULL;
445}
446
447void qcow2_cache_discard(BlockDriverState *bs, Qcow2Cache *c, void *table)
448{
449 int i = qcow2_cache_get_table_idx(bs, c, table);
450
451 assert(c->entries[i].ref == 0);
452
453 c->entries[i].offset = 0;
454 c->entries[i].lru_counter = 0;
455 c->entries[i].dirty = false;
456
457 qcow2_cache_table_release(bs, c, i, 1);
458}