]> git.proxmox.com Git - mirror_qemu.git/blob - hw/i386/xen/xen-mapcache.c
Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging
[mirror_qemu.git] / hw / i386 / xen / xen-mapcache.c
1 /*
2 * Copyright (C) 2011 Citrix Ltd.
3 *
4 * This work is licensed under the terms of the GNU GPL, version 2. See
5 * the COPYING file in the top-level directory.
6 *
7 * Contributions after 2012-01-13 are licensed under the terms of the
8 * GNU GPL, version 2 or (at your option) any later version.
9 */
10
11 #include "qemu/osdep.h"
12 #include "qemu/units.h"
13 #include "qemu/error-report.h"
14
15 #include <sys/resource.h>
16
17 #include "hw/xen/xen_backend.h"
18 #include "qemu/bitmap.h"
19
20 #include <xen/hvm/params.h>
21
22 #include "sysemu/xen-mapcache.h"
23 #include "trace.h"
24
25
26 //#define MAPCACHE_DEBUG
27
28 #ifdef MAPCACHE_DEBUG
29 # define DPRINTF(fmt, ...) do { \
30 fprintf(stderr, "xen_mapcache: " fmt, ## __VA_ARGS__); \
31 } while (0)
32 #else
33 # define DPRINTF(fmt, ...) do { } while (0)
34 #endif
35
36 #if HOST_LONG_BITS == 32
37 # define MCACHE_BUCKET_SHIFT 16
38 # define MCACHE_MAX_SIZE (1UL<<31) /* 2GB Cap */
39 #else
40 # define MCACHE_BUCKET_SHIFT 20
41 # define MCACHE_MAX_SIZE (1UL<<35) /* 32GB Cap */
42 #endif
43 #define MCACHE_BUCKET_SIZE (1UL << MCACHE_BUCKET_SHIFT)
44
45 /* This is the size of the virtual address space reserve to QEMU that will not
46 * be use by MapCache.
47 * From empirical tests I observed that qemu use 75MB more than the
48 * max_mcache_size.
49 */
50 #define NON_MCACHE_MEMORY_SIZE (80 * MiB)
51
52 typedef struct MapCacheEntry {
53 hwaddr paddr_index;
54 uint8_t *vaddr_base;
55 unsigned long *valid_mapping;
56 uint8_t lock;
57 #define XEN_MAPCACHE_ENTRY_DUMMY (1 << 0)
58 uint8_t flags;
59 hwaddr size;
60 struct MapCacheEntry *next;
61 } MapCacheEntry;
62
63 typedef struct MapCacheRev {
64 uint8_t *vaddr_req;
65 hwaddr paddr_index;
66 hwaddr size;
67 QTAILQ_ENTRY(MapCacheRev) next;
68 bool dma;
69 } MapCacheRev;
70
71 typedef struct MapCache {
72 MapCacheEntry *entry;
73 unsigned long nr_buckets;
74 QTAILQ_HEAD(map_cache_head, MapCacheRev) locked_entries;
75
76 /* For most cases (>99.9%), the page address is the same. */
77 MapCacheEntry *last_entry;
78 unsigned long max_mcache_size;
79 unsigned int mcache_bucket_shift;
80
81 phys_offset_to_gaddr_t phys_offset_to_gaddr;
82 QemuMutex lock;
83 void *opaque;
84 } MapCache;
85
86 static MapCache *mapcache;
87
88 static inline void mapcache_lock(void)
89 {
90 qemu_mutex_lock(&mapcache->lock);
91 }
92
93 static inline void mapcache_unlock(void)
94 {
95 qemu_mutex_unlock(&mapcache->lock);
96 }
97
98 static inline int test_bits(int nr, int size, const unsigned long *addr)
99 {
100 unsigned long res = find_next_zero_bit(addr, size + nr, nr);
101 if (res >= nr + size)
102 return 1;
103 else
104 return 0;
105 }
106
107 void xen_map_cache_init(phys_offset_to_gaddr_t f, void *opaque)
108 {
109 unsigned long size;
110 struct rlimit rlimit_as;
111
112 mapcache = g_malloc0(sizeof (MapCache));
113
114 mapcache->phys_offset_to_gaddr = f;
115 mapcache->opaque = opaque;
116 qemu_mutex_init(&mapcache->lock);
117
118 QTAILQ_INIT(&mapcache->locked_entries);
119
120 if (geteuid() == 0) {
121 rlimit_as.rlim_cur = RLIM_INFINITY;
122 rlimit_as.rlim_max = RLIM_INFINITY;
123 mapcache->max_mcache_size = MCACHE_MAX_SIZE;
124 } else {
125 getrlimit(RLIMIT_AS, &rlimit_as);
126 rlimit_as.rlim_cur = rlimit_as.rlim_max;
127
128 if (rlimit_as.rlim_max != RLIM_INFINITY) {
129 warn_report("QEMU's maximum size of virtual"
130 " memory is not infinity");
131 }
132 if (rlimit_as.rlim_max < MCACHE_MAX_SIZE + NON_MCACHE_MEMORY_SIZE) {
133 mapcache->max_mcache_size = rlimit_as.rlim_max -
134 NON_MCACHE_MEMORY_SIZE;
135 } else {
136 mapcache->max_mcache_size = MCACHE_MAX_SIZE;
137 }
138 }
139
140 setrlimit(RLIMIT_AS, &rlimit_as);
141
142 mapcache->nr_buckets =
143 (((mapcache->max_mcache_size >> XC_PAGE_SHIFT) +
144 (1UL << (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT)) - 1) >>
145 (MCACHE_BUCKET_SHIFT - XC_PAGE_SHIFT));
146
147 size = mapcache->nr_buckets * sizeof (MapCacheEntry);
148 size = (size + XC_PAGE_SIZE - 1) & ~(XC_PAGE_SIZE - 1);
149 DPRINTF("%s, nr_buckets = %lx size %lu\n", __func__,
150 mapcache->nr_buckets, size);
151 mapcache->entry = g_malloc0(size);
152 }
153
154 static void xen_remap_bucket(MapCacheEntry *entry,
155 void *vaddr,
156 hwaddr size,
157 hwaddr address_index,
158 bool dummy)
159 {
160 uint8_t *vaddr_base;
161 xen_pfn_t *pfns;
162 int *err;
163 unsigned int i;
164 hwaddr nb_pfn = size >> XC_PAGE_SHIFT;
165
166 trace_xen_remap_bucket(address_index);
167
168 pfns = g_malloc0(nb_pfn * sizeof (xen_pfn_t));
169 err = g_malloc0(nb_pfn * sizeof (int));
170
171 if (entry->vaddr_base != NULL) {
172 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
173 ram_block_notify_remove(entry->vaddr_base, entry->size);
174 }
175 if (munmap(entry->vaddr_base, entry->size) != 0) {
176 perror("unmap fails");
177 exit(-1);
178 }
179 }
180 g_free(entry->valid_mapping);
181 entry->valid_mapping = NULL;
182
183 for (i = 0; i < nb_pfn; i++) {
184 pfns[i] = (address_index << (MCACHE_BUCKET_SHIFT-XC_PAGE_SHIFT)) + i;
185 }
186
187 if (!dummy) {
188 vaddr_base = xenforeignmemory_map2(xen_fmem, xen_domid, vaddr,
189 PROT_READ | PROT_WRITE, 0,
190 nb_pfn, pfns, err);
191 if (vaddr_base == NULL) {
192 perror("xenforeignmemory_map2");
193 exit(-1);
194 }
195 } else {
196 /*
197 * We create dummy mappings where we are unable to create a foreign
198 * mapping immediately due to certain circumstances (i.e. on resume now)
199 */
200 vaddr_base = mmap(vaddr, size, PROT_READ | PROT_WRITE,
201 MAP_ANON | MAP_SHARED, -1, 0);
202 if (vaddr_base == MAP_FAILED) {
203 perror("mmap");
204 exit(-1);
205 }
206 }
207
208 if (!(entry->flags & XEN_MAPCACHE_ENTRY_DUMMY)) {
209 ram_block_notify_add(vaddr_base, size);
210 }
211
212 entry->vaddr_base = vaddr_base;
213 entry->paddr_index = address_index;
214 entry->size = size;
215 entry->valid_mapping = (unsigned long *) g_malloc0(sizeof(unsigned long) *
216 BITS_TO_LONGS(size >> XC_PAGE_SHIFT));
217
218 if (dummy) {
219 entry->flags |= XEN_MAPCACHE_ENTRY_DUMMY;
220 } else {
221 entry->flags &= ~(XEN_MAPCACHE_ENTRY_DUMMY);
222 }
223
224 bitmap_zero(entry->valid_mapping, nb_pfn);
225 for (i = 0; i < nb_pfn; i++) {
226 if (!err[i]) {
227 bitmap_set(entry->valid_mapping, i, 1);
228 }
229 }
230
231 g_free(pfns);
232 g_free(err);
233 }
234
235 static uint8_t *xen_map_cache_unlocked(hwaddr phys_addr, hwaddr size,
236 uint8_t lock, bool dma)
237 {
238 MapCacheEntry *entry, *pentry = NULL,
239 *free_entry = NULL, *free_pentry = NULL;
240 hwaddr address_index;
241 hwaddr address_offset;
242 hwaddr cache_size = size;
243 hwaddr test_bit_size;
244 bool translated G_GNUC_UNUSED = false;
245 bool dummy = false;
246
247 tryagain:
248 address_index = phys_addr >> MCACHE_BUCKET_SHIFT;
249 address_offset = phys_addr & (MCACHE_BUCKET_SIZE - 1);
250
251 trace_xen_map_cache(phys_addr);
252
253 /* test_bit_size is always a multiple of XC_PAGE_SIZE */
254 if (size) {
255 test_bit_size = size + (phys_addr & (XC_PAGE_SIZE - 1));
256
257 if (test_bit_size % XC_PAGE_SIZE) {
258 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
259 }
260 } else {
261 test_bit_size = XC_PAGE_SIZE;
262 }
263
264 if (mapcache->last_entry != NULL &&
265 mapcache->last_entry->paddr_index == address_index &&
266 !lock && !size &&
267 test_bits(address_offset >> XC_PAGE_SHIFT,
268 test_bit_size >> XC_PAGE_SHIFT,
269 mapcache->last_entry->valid_mapping)) {
270 trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
271 return mapcache->last_entry->vaddr_base + address_offset;
272 }
273
274 /* size is always a multiple of MCACHE_BUCKET_SIZE */
275 if (size) {
276 cache_size = size + address_offset;
277 if (cache_size % MCACHE_BUCKET_SIZE) {
278 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
279 }
280 } else {
281 cache_size = MCACHE_BUCKET_SIZE;
282 }
283
284 entry = &mapcache->entry[address_index % mapcache->nr_buckets];
285
286 while (entry && (lock || entry->lock) && entry->vaddr_base &&
287 (entry->paddr_index != address_index || entry->size != cache_size ||
288 !test_bits(address_offset >> XC_PAGE_SHIFT,
289 test_bit_size >> XC_PAGE_SHIFT,
290 entry->valid_mapping))) {
291 if (!free_entry && !entry->lock) {
292 free_entry = entry;
293 free_pentry = pentry;
294 }
295 pentry = entry;
296 entry = entry->next;
297 }
298 if (!entry && free_entry) {
299 entry = free_entry;
300 pentry = free_pentry;
301 }
302 if (!entry) {
303 entry = g_malloc0(sizeof (MapCacheEntry));
304 pentry->next = entry;
305 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
306 } else if (!entry->lock) {
307 if (!entry->vaddr_base || entry->paddr_index != address_index ||
308 entry->size != cache_size ||
309 !test_bits(address_offset >> XC_PAGE_SHIFT,
310 test_bit_size >> XC_PAGE_SHIFT,
311 entry->valid_mapping)) {
312 xen_remap_bucket(entry, NULL, cache_size, address_index, dummy);
313 }
314 }
315
316 if(!test_bits(address_offset >> XC_PAGE_SHIFT,
317 test_bit_size >> XC_PAGE_SHIFT,
318 entry->valid_mapping)) {
319 mapcache->last_entry = NULL;
320 #ifdef XEN_COMPAT_PHYSMAP
321 if (!translated && mapcache->phys_offset_to_gaddr) {
322 phys_addr = mapcache->phys_offset_to_gaddr(phys_addr, size);
323 translated = true;
324 goto tryagain;
325 }
326 #endif
327 if (!dummy && runstate_check(RUN_STATE_INMIGRATE)) {
328 dummy = true;
329 goto tryagain;
330 }
331 trace_xen_map_cache_return(NULL);
332 return NULL;
333 }
334
335 mapcache->last_entry = entry;
336 if (lock) {
337 MapCacheRev *reventry = g_malloc0(sizeof(MapCacheRev));
338 entry->lock++;
339 reventry->dma = dma;
340 reventry->vaddr_req = mapcache->last_entry->vaddr_base + address_offset;
341 reventry->paddr_index = mapcache->last_entry->paddr_index;
342 reventry->size = entry->size;
343 QTAILQ_INSERT_HEAD(&mapcache->locked_entries, reventry, next);
344 }
345
346 trace_xen_map_cache_return(mapcache->last_entry->vaddr_base + address_offset);
347 return mapcache->last_entry->vaddr_base + address_offset;
348 }
349
350 uint8_t *xen_map_cache(hwaddr phys_addr, hwaddr size,
351 uint8_t lock, bool dma)
352 {
353 uint8_t *p;
354
355 mapcache_lock();
356 p = xen_map_cache_unlocked(phys_addr, size, lock, dma);
357 mapcache_unlock();
358 return p;
359 }
360
361 ram_addr_t xen_ram_addr_from_mapcache(void *ptr)
362 {
363 MapCacheEntry *entry = NULL;
364 MapCacheRev *reventry;
365 hwaddr paddr_index;
366 hwaddr size;
367 ram_addr_t raddr;
368 int found = 0;
369
370 mapcache_lock();
371 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
372 if (reventry->vaddr_req == ptr) {
373 paddr_index = reventry->paddr_index;
374 size = reventry->size;
375 found = 1;
376 break;
377 }
378 }
379 if (!found) {
380 fprintf(stderr, "%s, could not find %p\n", __func__, ptr);
381 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
382 DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index,
383 reventry->vaddr_req);
384 }
385 abort();
386 return 0;
387 }
388
389 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
390 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
391 entry = entry->next;
392 }
393 if (!entry) {
394 DPRINTF("Trying to find address %p that is not in the mapcache!\n", ptr);
395 raddr = 0;
396 } else {
397 raddr = (reventry->paddr_index << MCACHE_BUCKET_SHIFT) +
398 ((unsigned long) ptr - (unsigned long) entry->vaddr_base);
399 }
400 mapcache_unlock();
401 return raddr;
402 }
403
404 static void xen_invalidate_map_cache_entry_unlocked(uint8_t *buffer)
405 {
406 MapCacheEntry *entry = NULL, *pentry = NULL;
407 MapCacheRev *reventry;
408 hwaddr paddr_index;
409 hwaddr size;
410 int found = 0;
411
412 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
413 if (reventry->vaddr_req == buffer) {
414 paddr_index = reventry->paddr_index;
415 size = reventry->size;
416 found = 1;
417 break;
418 }
419 }
420 if (!found) {
421 DPRINTF("%s, could not find %p\n", __func__, buffer);
422 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
423 DPRINTF(" "TARGET_FMT_plx" -> %p is present\n", reventry->paddr_index, reventry->vaddr_req);
424 }
425 return;
426 }
427 QTAILQ_REMOVE(&mapcache->locked_entries, reventry, next);
428 g_free(reventry);
429
430 if (mapcache->last_entry != NULL &&
431 mapcache->last_entry->paddr_index == paddr_index) {
432 mapcache->last_entry = NULL;
433 }
434
435 entry = &mapcache->entry[paddr_index % mapcache->nr_buckets];
436 while (entry && (entry->paddr_index != paddr_index || entry->size != size)) {
437 pentry = entry;
438 entry = entry->next;
439 }
440 if (!entry) {
441 DPRINTF("Trying to unmap address %p that is not in the mapcache!\n", buffer);
442 return;
443 }
444 entry->lock--;
445 if (entry->lock > 0 || pentry == NULL) {
446 return;
447 }
448
449 pentry->next = entry->next;
450 ram_block_notify_remove(entry->vaddr_base, entry->size);
451 if (munmap(entry->vaddr_base, entry->size) != 0) {
452 perror("unmap fails");
453 exit(-1);
454 }
455 g_free(entry->valid_mapping);
456 g_free(entry);
457 }
458
459 void xen_invalidate_map_cache_entry(uint8_t *buffer)
460 {
461 mapcache_lock();
462 xen_invalidate_map_cache_entry_unlocked(buffer);
463 mapcache_unlock();
464 }
465
466 void xen_invalidate_map_cache(void)
467 {
468 unsigned long i;
469 MapCacheRev *reventry;
470
471 /* Flush pending AIO before destroying the mapcache */
472 bdrv_drain_all();
473
474 mapcache_lock();
475
476 QTAILQ_FOREACH(reventry, &mapcache->locked_entries, next) {
477 if (!reventry->dma) {
478 continue;
479 }
480 fprintf(stderr, "Locked DMA mapping while invalidating mapcache!"
481 " "TARGET_FMT_plx" -> %p is present\n",
482 reventry->paddr_index, reventry->vaddr_req);
483 }
484
485 for (i = 0; i < mapcache->nr_buckets; i++) {
486 MapCacheEntry *entry = &mapcache->entry[i];
487
488 if (entry->vaddr_base == NULL) {
489 continue;
490 }
491 if (entry->lock > 0) {
492 continue;
493 }
494
495 if (munmap(entry->vaddr_base, entry->size) != 0) {
496 perror("unmap fails");
497 exit(-1);
498 }
499
500 entry->paddr_index = 0;
501 entry->vaddr_base = NULL;
502 entry->size = 0;
503 g_free(entry->valid_mapping);
504 entry->valid_mapping = NULL;
505 }
506
507 mapcache->last_entry = NULL;
508
509 mapcache_unlock();
510 }
511
512 static uint8_t *xen_replace_cache_entry_unlocked(hwaddr old_phys_addr,
513 hwaddr new_phys_addr,
514 hwaddr size)
515 {
516 MapCacheEntry *entry;
517 hwaddr address_index, address_offset;
518 hwaddr test_bit_size, cache_size = size;
519
520 address_index = old_phys_addr >> MCACHE_BUCKET_SHIFT;
521 address_offset = old_phys_addr & (MCACHE_BUCKET_SIZE - 1);
522
523 assert(size);
524 /* test_bit_size is always a multiple of XC_PAGE_SIZE */
525 test_bit_size = size + (old_phys_addr & (XC_PAGE_SIZE - 1));
526 if (test_bit_size % XC_PAGE_SIZE) {
527 test_bit_size += XC_PAGE_SIZE - (test_bit_size % XC_PAGE_SIZE);
528 }
529 cache_size = size + address_offset;
530 if (cache_size % MCACHE_BUCKET_SIZE) {
531 cache_size += MCACHE_BUCKET_SIZE - (cache_size % MCACHE_BUCKET_SIZE);
532 }
533
534 entry = &mapcache->entry[address_index % mapcache->nr_buckets];
535 while (entry && !(entry->paddr_index == address_index &&
536 entry->size == cache_size)) {
537 entry = entry->next;
538 }
539 if (!entry) {
540 DPRINTF("Trying to update an entry for "TARGET_FMT_plx \
541 "that is not in the mapcache!\n", old_phys_addr);
542 return NULL;
543 }
544
545 address_index = new_phys_addr >> MCACHE_BUCKET_SHIFT;
546 address_offset = new_phys_addr & (MCACHE_BUCKET_SIZE - 1);
547
548 fprintf(stderr, "Replacing a dummy mapcache entry for "TARGET_FMT_plx \
549 " with "TARGET_FMT_plx"\n", old_phys_addr, new_phys_addr);
550
551 xen_remap_bucket(entry, entry->vaddr_base,
552 cache_size, address_index, false);
553 if (!test_bits(address_offset >> XC_PAGE_SHIFT,
554 test_bit_size >> XC_PAGE_SHIFT,
555 entry->valid_mapping)) {
556 DPRINTF("Unable to update a mapcache entry for "TARGET_FMT_plx"!\n",
557 old_phys_addr);
558 return NULL;
559 }
560
561 return entry->vaddr_base + address_offset;
562 }
563
564 uint8_t *xen_replace_cache_entry(hwaddr old_phys_addr,
565 hwaddr new_phys_addr,
566 hwaddr size)
567 {
568 uint8_t *p;
569
570 mapcache_lock();
571 p = xen_replace_cache_entry_unlocked(old_phys_addr, new_phys_addr, size);
572 mapcache_unlock();
573 return p;
574 }