]> git.proxmox.com Git - mirror_qemu.git/blame - block/qcow2-cache.c
qemu-iotests: Reduce racy output in 028
[mirror_qemu.git] / block / qcow2-cache.c
CommitLineData
49381094
KW
1/*
2 * L2/refcount table cache for the QCOW2 format
3 *
4 * Copyright (c) 2010 Kevin Wolf <kwolf@redhat.com>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
23 */
24
355ee2d0
AG
25/* Needed for CONFIG_MADVISE */
26#include "config-host.h"
27
28#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
29#include <sys/mman.h>
30#endif
31
737e150e 32#include "block/block_int.h"
49381094 33#include "qemu-common.h"
355ee2d0 34#include "qemu/osdep.h"
49381094 35#include "qcow2.h"
3cce16f4 36#include "trace.h"
49381094
KW
37
38typedef struct Qcow2CachedTable {
2693310e 39 int64_t offset;
2693310e
AG
40 uint64_t lru_counter;
41 int ref;
909c260c 42 bool dirty;
49381094
KW
43} Qcow2CachedTable;
44
45struct Qcow2Cache {
d1b4efe5
AG
46 Qcow2CachedTable *entries;
47 struct Qcow2Cache *depends;
bf595021 48 int size;
3de0a294 49 bool depends_on_flush;
72e80b89 50 void *table_array;
2693310e 51 uint64_t lru_counter;
279621c0 52 uint64_t cache_clean_lru_counter;
49381094
KW
53};
54
72e80b89
AG
55static inline void *qcow2_cache_get_table_addr(BlockDriverState *bs,
56 Qcow2Cache *c, int table)
57{
ff99129a 58 BDRVQcow2State *s = bs->opaque;
72e80b89
AG
59 return (uint8_t *) c->table_array + (size_t) table * s->cluster_size;
60}
61
baf07d60
AG
62static inline int qcow2_cache_get_table_idx(BlockDriverState *bs,
63 Qcow2Cache *c, void *table)
64{
ff99129a 65 BDRVQcow2State *s = bs->opaque;
baf07d60
AG
66 ptrdiff_t table_offset = (uint8_t *) table - (uint8_t *) c->table_array;
67 int idx = table_offset / s->cluster_size;
68 assert(idx >= 0 && idx < c->size && table_offset % s->cluster_size == 0);
69 return idx;
70}
71
355ee2d0
AG
72static void qcow2_cache_table_release(BlockDriverState *bs, Qcow2Cache *c,
73 int i, int num_tables)
74{
75#if QEMU_MADV_DONTNEED != QEMU_MADV_INVALID
ff99129a 76 BDRVQcow2State *s = bs->opaque;
355ee2d0
AG
77 void *t = qcow2_cache_get_table_addr(bs, c, i);
78 int align = getpagesize();
79 size_t mem_size = (size_t) s->cluster_size * num_tables;
80 size_t offset = QEMU_ALIGN_UP((uintptr_t) t, align) - (uintptr_t) t;
81 size_t length = QEMU_ALIGN_DOWN(mem_size - offset, align);
82 if (length > 0) {
83 qemu_madvise((uint8_t *) t + offset, length, QEMU_MADV_DONTNEED);
84 }
85#endif
86}
87
279621c0
AG
88static inline bool can_clean_entry(Qcow2Cache *c, int i)
89{
90 Qcow2CachedTable *t = &c->entries[i];
91 return t->ref == 0 && !t->dirty && t->offset != 0 &&
92 t->lru_counter <= c->cache_clean_lru_counter;
93}
94
95void qcow2_cache_clean_unused(BlockDriverState *bs, Qcow2Cache *c)
96{
97 int i = 0;
98 while (i < c->size) {
99 int to_clean = 0;
100
101 /* Skip the entries that we don't need to clean */
102 while (i < c->size && !can_clean_entry(c, i)) {
103 i++;
104 }
105
106 /* And count how many we can clean in a row */
107 while (i < c->size && can_clean_entry(c, i)) {
108 c->entries[i].offset = 0;
109 c->entries[i].lru_counter = 0;
110 i++;
111 to_clean++;
112 }
113
114 if (to_clean > 0) {
115 qcow2_cache_table_release(bs, c, i - to_clean, to_clean);
116 }
117 }
118
119 c->cache_clean_lru_counter = c->lru_counter;
120}
121
6af4e9ea 122Qcow2Cache *qcow2_cache_create(BlockDriverState *bs, int num_tables)
49381094 123{
ff99129a 124 BDRVQcow2State *s = bs->opaque;
49381094 125 Qcow2Cache *c;
49381094 126
02004bd4 127 c = g_new0(Qcow2Cache, 1);
49381094 128 c->size = num_tables;
02004bd4 129 c->entries = g_try_new0(Qcow2CachedTable, num_tables);
9a4f4c31 130 c->table_array = qemu_try_blockalign(bs->file->bs,
72e80b89
AG
131 (size_t) num_tables * s->cluster_size);
132
133 if (!c->entries || !c->table_array) {
134 qemu_vfree(c->table_array);
135 g_free(c->entries);
136 g_free(c);
137 c = NULL;
49381094
KW
138 }
139
140 return c;
141}
142
d1b4efe5 143int qcow2_cache_destroy(BlockDriverState *bs, Qcow2Cache *c)
49381094
KW
144{
145 int i;
146
147 for (i = 0; i < c->size; i++) {
148 assert(c->entries[i].ref == 0);
49381094
KW
149 }
150
72e80b89 151 qemu_vfree(c->table_array);
7267c094
AL
152 g_free(c->entries);
153 g_free(c);
49381094
KW
154
155 return 0;
156}
157
158static int qcow2_cache_flush_dependency(BlockDriverState *bs, Qcow2Cache *c)
159{
160 int ret;
161
162 ret = qcow2_cache_flush(bs, c->depends);
163 if (ret < 0) {
164 return ret;
165 }
166
167 c->depends = NULL;
3de0a294
KW
168 c->depends_on_flush = false;
169
49381094
KW
170 return 0;
171}
172
173static int qcow2_cache_entry_flush(BlockDriverState *bs, Qcow2Cache *c, int i)
174{
ff99129a 175 BDRVQcow2State *s = bs->opaque;
3de0a294 176 int ret = 0;
49381094
KW
177
178 if (!c->entries[i].dirty || !c->entries[i].offset) {
179 return 0;
180 }
181
3cce16f4
KW
182 trace_qcow2_cache_entry_flush(qemu_coroutine_self(),
183 c == s->l2_table_cache, i);
184
49381094
KW
185 if (c->depends) {
186 ret = qcow2_cache_flush_dependency(bs, c);
3de0a294 187 } else if (c->depends_on_flush) {
9a4f4c31 188 ret = bdrv_flush(bs->file->bs);
3de0a294
KW
189 if (ret >= 0) {
190 c->depends_on_flush = false;
49381094
KW
191 }
192 }
193
3de0a294
KW
194 if (ret < 0) {
195 return ret;
196 }
197
cf93980e 198 if (c == s->refcount_block_cache) {
231bb267 199 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_REFCOUNT_BLOCK,
cf93980e
HR
200 c->entries[i].offset, s->cluster_size);
201 } else if (c == s->l2_table_cache) {
231bb267 202 ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_ACTIVE_L2,
cf93980e
HR
203 c->entries[i].offset, s->cluster_size);
204 } else {
231bb267 205 ret = qcow2_pre_write_overlap_check(bs, 0,
cf93980e
HR
206 c->entries[i].offset, s->cluster_size);
207 }
208
209 if (ret < 0) {
210 return ret;
211 }
212
29c1a730
KW
213 if (c == s->refcount_block_cache) {
214 BLKDBG_EVENT(bs->file, BLKDBG_REFBLOCK_UPDATE_PART);
215 } else if (c == s->l2_table_cache) {
216 BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
217 }
218
9a4f4c31 219 ret = bdrv_pwrite(bs->file->bs, c->entries[i].offset,
72e80b89 220 qcow2_cache_get_table_addr(bs, c, i), s->cluster_size);
49381094
KW
221 if (ret < 0) {
222 return ret;
223 }
224
225 c->entries[i].dirty = false;
226
227 return 0;
228}
229
230int qcow2_cache_flush(BlockDriverState *bs, Qcow2Cache *c)
231{
ff99129a 232 BDRVQcow2State *s = bs->opaque;
49381094
KW
233 int result = 0;
234 int ret;
235 int i;
236
3cce16f4
KW
237 trace_qcow2_cache_flush(qemu_coroutine_self(), c == s->l2_table_cache);
238
49381094
KW
239 for (i = 0; i < c->size; i++) {
240 ret = qcow2_cache_entry_flush(bs, c, i);
241 if (ret < 0 && result != -ENOSPC) {
242 result = ret;
243 }
244 }
245
246 if (result == 0) {
9a4f4c31 247 ret = bdrv_flush(bs->file->bs);
49381094
KW
248 if (ret < 0) {
249 result = ret;
250 }
251 }
252
253 return result;
254}
255
256int qcow2_cache_set_dependency(BlockDriverState *bs, Qcow2Cache *c,
257 Qcow2Cache *dependency)
258{
259 int ret;
260
261 if (dependency->depends) {
262 ret = qcow2_cache_flush_dependency(bs, dependency);
263 if (ret < 0) {
264 return ret;
265 }
266 }
267
268 if (c->depends && (c->depends != dependency)) {
269 ret = qcow2_cache_flush_dependency(bs, c);
270 if (ret < 0) {
271 return ret;
272 }
273 }
274
275 c->depends = dependency;
276 return 0;
277}
278
3de0a294
KW
279void qcow2_cache_depends_on_flush(Qcow2Cache *c)
280{
281 c->depends_on_flush = true;
282}
283
e7108fea
HR
284int qcow2_cache_empty(BlockDriverState *bs, Qcow2Cache *c)
285{
286 int ret, i;
287
288 ret = qcow2_cache_flush(bs, c);
289 if (ret < 0) {
290 return ret;
291 }
292
293 for (i = 0; i < c->size; i++) {
294 assert(c->entries[i].ref == 0);
295 c->entries[i].offset = 0;
2693310e 296 c->entries[i].lru_counter = 0;
e7108fea
HR
297 }
298
355ee2d0
AG
299 qcow2_cache_table_release(bs, c, 0, c->size);
300
2693310e
AG
301 c->lru_counter = 0;
302
e7108fea
HR
303 return 0;
304}
305
49381094
KW
306static int qcow2_cache_do_get(BlockDriverState *bs, Qcow2Cache *c,
307 uint64_t offset, void **table, bool read_from_disk)
308{
ff99129a 309 BDRVQcow2State *s = bs->opaque;
49381094
KW
310 int i;
311 int ret;
812e4082 312 int lookup_index;
fdfbca82
AG
313 uint64_t min_lru_counter = UINT64_MAX;
314 int min_lru_index = -1;
49381094 315
3cce16f4
KW
316 trace_qcow2_cache_get(qemu_coroutine_self(), c == s->l2_table_cache,
317 offset, read_from_disk);
318
49381094 319 /* Check if the table is already cached */
812e4082
AG
320 i = lookup_index = (offset / s->cluster_size * 4) % c->size;
321 do {
fdfbca82
AG
322 const Qcow2CachedTable *t = &c->entries[i];
323 if (t->offset == offset) {
49381094
KW
324 goto found;
325 }
fdfbca82
AG
326 if (t->ref == 0 && t->lru_counter < min_lru_counter) {
327 min_lru_counter = t->lru_counter;
328 min_lru_index = i;
329 }
812e4082
AG
330 if (++i == c->size) {
331 i = 0;
332 }
333 } while (i != lookup_index);
fdfbca82
AG
334
335 if (min_lru_index == -1) {
336 /* This can't happen in current synchronous code, but leave the check
337 * here as a reminder for whoever starts using AIO with the cache */
338 abort();
49381094
KW
339 }
340
fdfbca82
AG
341 /* Cache miss: write a table back and replace it */
342 i = min_lru_index;
3cce16f4
KW
343 trace_qcow2_cache_get_replace_entry(qemu_coroutine_self(),
344 c == s->l2_table_cache, i);
49381094
KW
345
346 ret = qcow2_cache_entry_flush(bs, c, i);
347 if (ret < 0) {
348 return ret;
349 }
350
3cce16f4
KW
351 trace_qcow2_cache_get_read(qemu_coroutine_self(),
352 c == s->l2_table_cache, i);
49381094
KW
353 c->entries[i].offset = 0;
354 if (read_from_disk) {
29c1a730
KW
355 if (c == s->l2_table_cache) {
356 BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
357 }
358
9a4f4c31
KW
359 ret = bdrv_pread(bs->file->bs, offset,
360 qcow2_cache_get_table_addr(bs, c, i),
72e80b89 361 s->cluster_size);
49381094
KW
362 if (ret < 0) {
363 return ret;
364 }
365 }
366
49381094
KW
367 c->entries[i].offset = offset;
368
369 /* And return the right table */
370found:
49381094 371 c->entries[i].ref++;
72e80b89 372 *table = qcow2_cache_get_table_addr(bs, c, i);
3cce16f4
KW
373
374 trace_qcow2_cache_get_done(qemu_coroutine_self(),
375 c == s->l2_table_cache, i);
376
49381094
KW
377 return 0;
378}
379
380int qcow2_cache_get(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
381 void **table)
382{
383 return qcow2_cache_do_get(bs, c, offset, table, true);
384}
385
386int qcow2_cache_get_empty(BlockDriverState *bs, Qcow2Cache *c, uint64_t offset,
387 void **table)
388{
389 return qcow2_cache_do_get(bs, c, offset, table, false);
390}
391
a3f1afb4 392void qcow2_cache_put(BlockDriverState *bs, Qcow2Cache *c, void **table)
49381094 393{
baf07d60 394 int i = qcow2_cache_get_table_idx(bs, c, *table);
49381094 395
49381094
KW
396 c->entries[i].ref--;
397 *table = NULL;
398
2693310e
AG
399 if (c->entries[i].ref == 0) {
400 c->entries[i].lru_counter = ++c->lru_counter;
401 }
402
49381094 403 assert(c->entries[i].ref >= 0);
49381094
KW
404}
405
72e80b89
AG
406void qcow2_cache_entry_mark_dirty(BlockDriverState *bs, Qcow2Cache *c,
407 void *table)
49381094 408{
baf07d60
AG
409 int i = qcow2_cache_get_table_idx(bs, c, table);
410 assert(c->entries[i].offset != 0);
49381094
KW
411 c->entries[i].dirty = true;
412}