]> git.proxmox.com Git - mirror_qemu.git/blob - linux-user/mmap.c
Merge tag 'pull-qapi-2023-07-10' of https://repo.or.cz/qemu/armbru into staging
[mirror_qemu.git] / linux-user / mmap.c
1 /*
2 * mmap support for qemu
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29
30 void mmap_lock(void)
31 {
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
34 }
35 }
36
37 void mmap_unlock(void)
38 {
39 assert(mmap_lock_count > 0);
40 if (--mmap_lock_count == 0) {
41 pthread_mutex_unlock(&mmap_mutex);
42 }
43 }
44
45 bool have_mmap_lock(void)
46 {
47 return mmap_lock_count > 0 ? true : false;
48 }
49
50 /* Grab lock to make sure things are in a consistent state after fork(). */
51 void mmap_fork_start(void)
52 {
53 if (mmap_lock_count)
54 abort();
55 pthread_mutex_lock(&mmap_mutex);
56 }
57
58 void mmap_fork_end(int child)
59 {
60 if (child) {
61 pthread_mutex_init(&mmap_mutex, NULL);
62 } else {
63 pthread_mutex_unlock(&mmap_mutex);
64 }
65 }
66
67 /*
68 * Validate target prot bitmask.
69 * Return the prot bitmask for the host in *HOST_PROT.
70 * Return 0 if the target prot bitmask is invalid, otherwise
71 * the internal qemu page_flags (which will include PAGE_VALID).
72 */
73 static int validate_prot_to_pageflags(int prot)
74 {
75 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
76 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
77
78 #ifdef TARGET_AARCH64
79 {
80 ARMCPU *cpu = ARM_CPU(thread_cpu);
81
82 /*
83 * The PROT_BTI bit is only accepted if the cpu supports the feature.
84 * Since this is the unusual case, don't bother checking unless
85 * the bit has been requested. If set and valid, record the bit
86 * within QEMU's page_flags.
87 */
88 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
89 valid |= TARGET_PROT_BTI;
90 page_flags |= PAGE_BTI;
91 }
92 /* Similarly for the PROT_MTE bit. */
93 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
94 valid |= TARGET_PROT_MTE;
95 page_flags |= PAGE_MTE;
96 }
97 }
98 #elif defined(TARGET_HPPA)
99 valid |= PROT_GROWSDOWN | PROT_GROWSUP;
100 #endif
101
102 return prot & ~valid ? 0 : page_flags;
103 }
104
105 /*
106 * For the host, we need not pass anything except read/write/exec.
107 * While PROT_SEM is allowed by all hosts, it is also ignored, so
108 * don't bother transforming guest bit to host bit. Any other
109 * target-specific prot bits will not be understood by the host
110 * and will need to be encoded into page_flags for qemu emulation.
111 *
112 * Pages that are executable by the guest will never be executed
113 * by the host, but the host will need to be able to read them.
114 */
115 static int target_to_host_prot(int prot)
116 {
117 return (prot & (PROT_READ | PROT_WRITE)) |
118 (prot & PROT_EXEC ? PROT_READ : 0);
119 }
120
121 /* NOTE: all the constants are the HOST ones, but addresses are target. */
122 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
123 {
124 abi_ulong starts[3];
125 abi_ulong lens[3];
126 int prots[3];
127 abi_ulong host_start, host_last, last;
128 int prot1, ret, page_flags, nranges;
129
130 trace_target_mprotect(start, len, target_prot);
131
132 if ((start & ~TARGET_PAGE_MASK) != 0) {
133 return -TARGET_EINVAL;
134 }
135 page_flags = validate_prot_to_pageflags(target_prot);
136 if (!page_flags) {
137 return -TARGET_EINVAL;
138 }
139 if (len == 0) {
140 return 0;
141 }
142 len = TARGET_PAGE_ALIGN(len);
143 if (!guest_range_valid_untagged(start, len)) {
144 return -TARGET_ENOMEM;
145 }
146
147 last = start + len - 1;
148 host_start = start & qemu_host_page_mask;
149 host_last = HOST_PAGE_ALIGN(last) - 1;
150 nranges = 0;
151
152 mmap_lock();
153
154 if (host_last - host_start < qemu_host_page_size) {
155 /* Single host page contains all guest pages: sum the prot. */
156 prot1 = target_prot;
157 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
158 prot1 |= page_get_flags(a);
159 }
160 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
161 prot1 |= page_get_flags(a + 1);
162 }
163 starts[nranges] = host_start;
164 lens[nranges] = qemu_host_page_size;
165 prots[nranges] = prot1;
166 nranges++;
167 } else {
168 if (host_start < start) {
169 /* Host page contains more than one guest page: sum the prot. */
170 prot1 = target_prot;
171 for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
172 prot1 |= page_get_flags(a);
173 }
174 /* If the resulting sum differs, create a new range. */
175 if (prot1 != target_prot) {
176 starts[nranges] = host_start;
177 lens[nranges] = qemu_host_page_size;
178 prots[nranges] = prot1;
179 nranges++;
180 host_start += qemu_host_page_size;
181 }
182 }
183
184 if (last < host_last) {
185 /* Host page contains more than one guest page: sum the prot. */
186 prot1 = target_prot;
187 for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
188 prot1 |= page_get_flags(a + 1);
189 }
190 /* If the resulting sum differs, create a new range. */
191 if (prot1 != target_prot) {
192 host_last -= qemu_host_page_size;
193 starts[nranges] = host_last + 1;
194 lens[nranges] = qemu_host_page_size;
195 prots[nranges] = prot1;
196 nranges++;
197 }
198 }
199
200 /* Create a range for the middle, if any remains. */
201 if (host_start < host_last) {
202 starts[nranges] = host_start;
203 lens[nranges] = host_last - host_start + 1;
204 prots[nranges] = target_prot;
205 nranges++;
206 }
207 }
208
209 for (int i = 0; i < nranges; ++i) {
210 ret = mprotect(g2h_untagged(starts[i]), lens[i],
211 target_to_host_prot(prots[i]));
212 if (ret != 0) {
213 goto error;
214 }
215 }
216
217 page_set_flags(start, last, page_flags);
218 ret = 0;
219
220 error:
221 mmap_unlock();
222 return ret;
223 }
224
225 /* map an incomplete host page */
226 static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
227 int prot, int flags, int fd, off_t offset)
228 {
229 abi_ulong real_last;
230 void *host_start;
231 int prot_old, prot_new;
232 int host_prot_old, host_prot_new;
233
234 if (!(flags & MAP_ANONYMOUS)
235 && (flags & MAP_TYPE) == MAP_SHARED
236 && (prot & PROT_WRITE)) {
237 /*
238 * msync() won't work with the partial page, so we return an
239 * error if write is possible while it is a shared mapping.
240 */
241 errno = EINVAL;
242 return false;
243 }
244
245 real_last = real_start + qemu_host_page_size - 1;
246 host_start = g2h_untagged(real_start);
247
248 /* Get the protection of the target pages outside the mapping. */
249 prot_old = 0;
250 for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
251 prot_old |= page_get_flags(a);
252 }
253 for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
254 prot_old |= page_get_flags(a);
255 }
256
257 if (prot_old == 0) {
258 /*
259 * Since !(prot_old & PAGE_VALID), there were no guest pages
260 * outside of the fragment we need to map. Allocate a new host
261 * page to cover, discarding whatever else may have been present.
262 */
263 void *p = mmap(host_start, qemu_host_page_size,
264 target_to_host_prot(prot),
265 flags | MAP_ANONYMOUS, -1, 0);
266 if (p == MAP_FAILED) {
267 return false;
268 }
269 prot_old = prot;
270 }
271 prot_new = prot | prot_old;
272
273 host_prot_old = target_to_host_prot(prot_old);
274 host_prot_new = target_to_host_prot(prot_new);
275
276 /* Adjust protection to be able to write. */
277 if (!(host_prot_old & PROT_WRITE)) {
278 host_prot_old |= PROT_WRITE;
279 mprotect(host_start, qemu_host_page_size, host_prot_old);
280 }
281
282 /* Read or zero the new guest pages. */
283 if (flags & MAP_ANONYMOUS) {
284 memset(g2h_untagged(start), 0, last - start + 1);
285 } else {
286 if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
287 return false;
288 }
289 }
290
291 /* Put final protection */
292 if (host_prot_new != host_prot_old) {
293 mprotect(host_start, qemu_host_page_size, host_prot_new);
294 }
295 return true;
296 }
297
298 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
299 #ifdef TARGET_AARCH64
300 # define TASK_UNMAPPED_BASE 0x5500000000
301 #else
302 # define TASK_UNMAPPED_BASE (1ul << 38)
303 #endif
304 #else
305 #ifdef TARGET_HPPA
306 # define TASK_UNMAPPED_BASE 0xfa000000
307 #else
308 # define TASK_UNMAPPED_BASE 0x40000000
309 #endif
310 #endif
311 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
312
313 unsigned long last_brk;
314
315 /*
316 * Subroutine of mmap_find_vma, used when we have pre-allocated
317 * a chunk of guest address space.
318 */
319 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
320 abi_ulong align)
321 {
322 target_ulong ret;
323
324 ret = page_find_range_empty(start, reserved_va, size, align);
325 if (ret == -1 && start > mmap_min_addr) {
326 /* Restart at the beginning of the address space. */
327 ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
328 }
329
330 return ret;
331 }
332
333 /*
334 * Find and reserve a free memory area of size 'size'. The search
335 * starts at 'start'.
336 * It must be called with mmap_lock() held.
337 * Return -1 if error.
338 */
339 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
340 {
341 void *ptr, *prev;
342 abi_ulong addr;
343 int wrapped, repeat;
344
345 align = MAX(align, qemu_host_page_size);
346
347 /* If 'start' == 0, then a default start address is used. */
348 if (start == 0) {
349 start = mmap_next_start;
350 } else {
351 start &= qemu_host_page_mask;
352 }
353 start = ROUND_UP(start, align);
354
355 size = HOST_PAGE_ALIGN(size);
356
357 if (reserved_va) {
358 return mmap_find_vma_reserved(start, size, align);
359 }
360
361 addr = start;
362 wrapped = repeat = 0;
363 prev = 0;
364
365 for (;; prev = ptr) {
366 /*
367 * Reserve needed memory area to avoid a race.
368 * It should be discarded using:
369 * - mmap() with MAP_FIXED flag
370 * - mremap() with MREMAP_FIXED flag
371 * - shmat() with SHM_REMAP flag
372 */
373 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
374 MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
375
376 /* ENOMEM, if host address space has no memory */
377 if (ptr == MAP_FAILED) {
378 return (abi_ulong)-1;
379 }
380
381 /*
382 * Count the number of sequential returns of the same address.
383 * This is used to modify the search algorithm below.
384 */
385 repeat = (ptr == prev ? repeat + 1 : 0);
386
387 if (h2g_valid(ptr + size - 1)) {
388 addr = h2g(ptr);
389
390 if ((addr & (align - 1)) == 0) {
391 /* Success. */
392 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
393 mmap_next_start = addr + size;
394 }
395 return addr;
396 }
397
398 /* The address is not properly aligned for the target. */
399 switch (repeat) {
400 case 0:
401 /*
402 * Assume the result that the kernel gave us is the
403 * first with enough free space, so start again at the
404 * next higher target page.
405 */
406 addr = ROUND_UP(addr, align);
407 break;
408 case 1:
409 /*
410 * Sometimes the kernel decides to perform the allocation
411 * at the top end of memory instead.
412 */
413 addr &= -align;
414 break;
415 case 2:
416 /* Start over at low memory. */
417 addr = 0;
418 break;
419 default:
420 /* Fail. This unaligned block must the last. */
421 addr = -1;
422 break;
423 }
424 } else {
425 /*
426 * Since the result the kernel gave didn't fit, start
427 * again at low memory. If any repetition, fail.
428 */
429 addr = (repeat ? -1 : 0);
430 }
431
432 /* Unmap and try again. */
433 munmap(ptr, size);
434
435 /* ENOMEM if we checked the whole of the target address space. */
436 if (addr == (abi_ulong)-1) {
437 return (abi_ulong)-1;
438 } else if (addr == 0) {
439 if (wrapped) {
440 return (abi_ulong)-1;
441 }
442 wrapped = 1;
443 /*
444 * Don't actually use 0 when wrapping, instead indicate
445 * that we'd truly like an allocation in low memory.
446 */
447 addr = (mmap_min_addr > TARGET_PAGE_SIZE
448 ? TARGET_PAGE_ALIGN(mmap_min_addr)
449 : TARGET_PAGE_SIZE);
450 } else if (wrapped && addr >= start) {
451 return (abi_ulong)-1;
452 }
453 }
454 }
455
456 /* NOTE: all the constants are the HOST ones */
457 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
458 int flags, int fd, off_t offset)
459 {
460 abi_ulong ret, last, real_start, real_last, retaddr, host_len;
461 abi_ulong passthrough_start = -1, passthrough_last = 0;
462 int page_flags;
463 off_t host_offset;
464
465 mmap_lock();
466 trace_target_mmap(start, len, target_prot, flags, fd, offset);
467
468 if (!len) {
469 errno = EINVAL;
470 goto fail;
471 }
472
473 page_flags = validate_prot_to_pageflags(target_prot);
474 if (!page_flags) {
475 errno = EINVAL;
476 goto fail;
477 }
478
479 /* Also check for overflows... */
480 len = TARGET_PAGE_ALIGN(len);
481 if (!len) {
482 errno = ENOMEM;
483 goto fail;
484 }
485
486 if (offset & ~TARGET_PAGE_MASK) {
487 errno = EINVAL;
488 goto fail;
489 }
490
491 /*
492 * If we're mapping shared memory, ensure we generate code for parallel
493 * execution and flush old translations. This will work up to the level
494 * supported by the host -- anything that requires EXCP_ATOMIC will not
495 * be atomic with respect to an external process.
496 */
497 if (flags & MAP_SHARED) {
498 CPUState *cpu = thread_cpu;
499 if (!(cpu->tcg_cflags & CF_PARALLEL)) {
500 cpu->tcg_cflags |= CF_PARALLEL;
501 tb_flush(cpu);
502 }
503 }
504
505 real_start = start & qemu_host_page_mask;
506 host_offset = offset & qemu_host_page_mask;
507
508 /*
509 * If the user is asking for the kernel to find a location, do that
510 * before we truncate the length for mapping files below.
511 */
512 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
513 host_len = len + offset - host_offset;
514 host_len = HOST_PAGE_ALIGN(host_len);
515 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
516 if (start == (abi_ulong)-1) {
517 errno = ENOMEM;
518 goto fail;
519 }
520 }
521
522 /*
523 * When mapping files into a memory area larger than the file, accesses
524 * to pages beyond the file size will cause a SIGBUS.
525 *
526 * For example, if mmaping a file of 100 bytes on a host with 4K pages
527 * emulating a target with 8K pages, the target expects to be able to
528 * access the first 8K. But the host will trap us on any access beyond
529 * 4K.
530 *
531 * When emulating a target with a larger page-size than the hosts, we
532 * may need to truncate file maps at EOF and add extra anonymous pages
533 * up to the targets page boundary.
534 */
535 if ((qemu_real_host_page_size() < qemu_host_page_size) &&
536 !(flags & MAP_ANONYMOUS)) {
537 struct stat sb;
538
539 if (fstat(fd, &sb) == -1) {
540 goto fail;
541 }
542
543 /* Are we trying to create a map beyond EOF?. */
544 if (offset + len > sb.st_size) {
545 /*
546 * If so, truncate the file map at eof aligned with
547 * the hosts real pagesize. Additional anonymous maps
548 * will be created beyond EOF.
549 */
550 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
551 }
552 }
553
554 if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
555 uintptr_t host_start;
556 int host_prot;
557 void *p;
558
559 host_len = len + offset - host_offset;
560 host_len = HOST_PAGE_ALIGN(host_len);
561 host_prot = target_to_host_prot(target_prot);
562
563 /*
564 * Note: we prefer to control the mapping address. It is
565 * especially important if qemu_host_page_size >
566 * qemu_real_host_page_size.
567 */
568 p = mmap(g2h_untagged(start), host_len, host_prot,
569 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
570 if (p == MAP_FAILED) {
571 goto fail;
572 }
573 /* update start so that it points to the file position at 'offset' */
574 host_start = (uintptr_t)p;
575 if (!(flags & MAP_ANONYMOUS)) {
576 p = mmap(g2h_untagged(start), len, host_prot,
577 flags | MAP_FIXED, fd, host_offset);
578 if (p == MAP_FAILED) {
579 munmap(g2h_untagged(start), host_len);
580 goto fail;
581 }
582 host_start += offset - host_offset;
583 }
584 start = h2g(host_start);
585 last = start + len - 1;
586 passthrough_start = start;
587 passthrough_last = last;
588 } else {
589 if (start & ~TARGET_PAGE_MASK) {
590 errno = EINVAL;
591 goto fail;
592 }
593 last = start + len - 1;
594 real_last = HOST_PAGE_ALIGN(last) - 1;
595
596 /*
597 * Test if requested memory area fits target address space
598 * It can fail only on 64-bit host with 32-bit target.
599 * On any other target/host host mmap() handles this error correctly.
600 */
601 if (last < start || !guest_range_valid_untagged(start, len)) {
602 errno = ENOMEM;
603 goto fail;
604 }
605
606 /* Validate that the chosen range is empty. */
607 if ((flags & MAP_FIXED_NOREPLACE)
608 && !page_check_range_empty(start, last)) {
609 errno = EEXIST;
610 goto fail;
611 }
612
613 /*
614 * worst case: we cannot map the file because the offset is not
615 * aligned, so we read it
616 */
617 if (!(flags & MAP_ANONYMOUS) &&
618 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
619 /*
620 * msync() won't work here, so we return an error if write is
621 * possible while it is a shared mapping
622 */
623 if ((flags & MAP_TYPE) == MAP_SHARED
624 && (target_prot & PROT_WRITE)) {
625 errno = EINVAL;
626 goto fail;
627 }
628 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
629 (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
630 | MAP_PRIVATE | MAP_ANONYMOUS,
631 -1, 0);
632 if (retaddr == -1) {
633 goto fail;
634 }
635 if (pread(fd, g2h_untagged(start), len, offset) == -1) {
636 goto fail;
637 }
638 if (!(target_prot & PROT_WRITE)) {
639 ret = target_mprotect(start, len, target_prot);
640 assert(ret == 0);
641 }
642 goto the_end;
643 }
644
645 /* handle the start of the mapping */
646 if (start > real_start) {
647 if (real_last == real_start + qemu_host_page_size - 1) {
648 /* one single host page */
649 if (!mmap_frag(real_start, start, last,
650 target_prot, flags, fd, offset)) {
651 goto fail;
652 }
653 goto the_end1;
654 }
655 if (!mmap_frag(real_start, start,
656 real_start + qemu_host_page_size - 1,
657 target_prot, flags, fd, offset)) {
658 goto fail;
659 }
660 real_start += qemu_host_page_size;
661 }
662 /* handle the end of the mapping */
663 if (last < real_last) {
664 abi_ulong real_page = real_last - qemu_host_page_size + 1;
665 if (!mmap_frag(real_page, real_page, last,
666 target_prot, flags, fd,
667 offset + real_page - start)) {
668 goto fail;
669 }
670 real_last -= qemu_host_page_size;
671 }
672
673 /* map the middle (easier) */
674 if (real_start < real_last) {
675 void *p;
676 off_t offset1;
677
678 if (flags & MAP_ANONYMOUS) {
679 offset1 = 0;
680 } else {
681 offset1 = offset + real_start - start;
682 }
683 p = mmap(g2h_untagged(real_start), real_last - real_start + 1,
684 target_to_host_prot(target_prot), flags, fd, offset1);
685 if (p == MAP_FAILED) {
686 goto fail;
687 }
688 passthrough_start = real_start;
689 passthrough_last = real_last;
690 }
691 }
692 the_end1:
693 if (flags & MAP_ANONYMOUS) {
694 page_flags |= PAGE_ANON;
695 }
696 page_flags |= PAGE_RESET;
697 if (passthrough_start > passthrough_last) {
698 page_set_flags(start, last, page_flags);
699 } else {
700 if (start < passthrough_start) {
701 page_set_flags(start, passthrough_start - 1, page_flags);
702 }
703 page_set_flags(passthrough_start, passthrough_last,
704 page_flags | PAGE_PASSTHROUGH);
705 if (passthrough_last < last) {
706 page_set_flags(passthrough_last + 1, last, page_flags);
707 }
708 }
709 the_end:
710 trace_target_mmap_complete(start);
711 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
712 FILE *f = qemu_log_trylock();
713 if (f) {
714 fprintf(f, "page layout changed following mmap\n");
715 page_dump(f);
716 qemu_log_unlock(f);
717 }
718 }
719 mmap_unlock();
720 return start;
721 fail:
722 mmap_unlock();
723 return -1;
724 }
725
726 static void mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
727 {
728 abi_ulong real_start;
729 abi_ulong real_last;
730 abi_ulong real_len;
731 abi_ulong last;
732 abi_ulong a;
733 void *host_start;
734 int prot;
735
736 last = start + len - 1;
737 real_start = start & qemu_host_page_mask;
738 real_last = HOST_PAGE_ALIGN(last) - 1;
739
740 /*
741 * If guest pages remain on the first or last host pages,
742 * adjust the deallocation to retain those guest pages.
743 * The single page special case is required for the last page,
744 * lest real_start overflow to zero.
745 */
746 if (real_last - real_start < qemu_host_page_size) {
747 prot = 0;
748 for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
749 prot |= page_get_flags(a);
750 }
751 for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
752 prot |= page_get_flags(a + 1);
753 }
754 if (prot != 0) {
755 return;
756 }
757 } else {
758 for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
759 prot |= page_get_flags(a);
760 }
761 if (prot != 0) {
762 real_start += qemu_host_page_size;
763 }
764
765 for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
766 prot |= page_get_flags(a + 1);
767 }
768 if (prot != 0) {
769 real_last -= qemu_host_page_size;
770 }
771
772 if (real_last < real_start) {
773 return;
774 }
775 }
776
777 real_len = real_last - real_start + 1;
778 host_start = g2h_untagged(real_start);
779
780 if (reserved_va) {
781 void *ptr = mmap(host_start, real_len, PROT_NONE,
782 MAP_FIXED | MAP_ANONYMOUS
783 | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
784 assert(ptr == host_start);
785 } else {
786 int ret = munmap(host_start, real_len);
787 assert(ret == 0);
788 }
789 }
790
791 int target_munmap(abi_ulong start, abi_ulong len)
792 {
793 trace_target_munmap(start, len);
794
795 if (start & ~TARGET_PAGE_MASK) {
796 return -TARGET_EINVAL;
797 }
798 len = TARGET_PAGE_ALIGN(len);
799 if (len == 0 || !guest_range_valid_untagged(start, len)) {
800 return -TARGET_EINVAL;
801 }
802
803 mmap_lock();
804 mmap_reserve_or_unmap(start, len);
805 page_set_flags(start, start + len - 1, 0);
806 mmap_unlock();
807
808 return 0;
809 }
810
811 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
812 abi_ulong new_size, unsigned long flags,
813 abi_ulong new_addr)
814 {
815 int prot;
816 void *host_addr;
817
818 if (!guest_range_valid_untagged(old_addr, old_size) ||
819 ((flags & MREMAP_FIXED) &&
820 !guest_range_valid_untagged(new_addr, new_size)) ||
821 ((flags & MREMAP_MAYMOVE) == 0 &&
822 !guest_range_valid_untagged(old_addr, new_size))) {
823 errno = ENOMEM;
824 return -1;
825 }
826
827 mmap_lock();
828
829 if (flags & MREMAP_FIXED) {
830 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
831 flags, g2h_untagged(new_addr));
832
833 if (reserved_va && host_addr != MAP_FAILED) {
834 /*
835 * If new and old addresses overlap then the above mremap will
836 * already have failed with EINVAL.
837 */
838 mmap_reserve_or_unmap(old_addr, old_size);
839 }
840 } else if (flags & MREMAP_MAYMOVE) {
841 abi_ulong mmap_start;
842
843 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
844
845 if (mmap_start == -1) {
846 errno = ENOMEM;
847 host_addr = MAP_FAILED;
848 } else {
849 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
850 flags | MREMAP_FIXED,
851 g2h_untagged(mmap_start));
852 if (reserved_va) {
853 mmap_reserve_or_unmap(old_addr, old_size);
854 }
855 }
856 } else {
857 int prot = 0;
858 if (reserved_va && old_size < new_size) {
859 abi_ulong addr;
860 for (addr = old_addr + old_size;
861 addr < old_addr + new_size;
862 addr++) {
863 prot |= page_get_flags(addr);
864 }
865 }
866 if (prot == 0) {
867 host_addr = mremap(g2h_untagged(old_addr),
868 old_size, new_size, flags);
869
870 if (host_addr != MAP_FAILED) {
871 /* Check if address fits target address space */
872 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
873 /* Revert mremap() changes */
874 host_addr = mremap(g2h_untagged(old_addr),
875 new_size, old_size, flags);
876 errno = ENOMEM;
877 host_addr = MAP_FAILED;
878 } else if (reserved_va && old_size > new_size) {
879 mmap_reserve_or_unmap(old_addr + old_size,
880 old_size - new_size);
881 }
882 }
883 } else {
884 errno = ENOMEM;
885 host_addr = MAP_FAILED;
886 }
887 }
888
889 if (host_addr == MAP_FAILED) {
890 new_addr = -1;
891 } else {
892 new_addr = h2g(host_addr);
893 prot = page_get_flags(old_addr);
894 page_set_flags(old_addr, old_addr + old_size - 1, 0);
895 page_set_flags(new_addr, new_addr + new_size - 1,
896 prot | PAGE_VALID | PAGE_RESET);
897 }
898 mmap_unlock();
899 return new_addr;
900 }
901
902 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
903 {
904 abi_ulong len;
905 int ret = 0;
906
907 if (start & ~TARGET_PAGE_MASK) {
908 return -TARGET_EINVAL;
909 }
910 if (len_in == 0) {
911 return 0;
912 }
913 len = TARGET_PAGE_ALIGN(len_in);
914 if (len == 0 || !guest_range_valid_untagged(start, len)) {
915 return -TARGET_EINVAL;
916 }
917
918 /* Translate for some architectures which have different MADV_xxx values */
919 switch (advice) {
920 case TARGET_MADV_DONTNEED: /* alpha */
921 advice = MADV_DONTNEED;
922 break;
923 case TARGET_MADV_WIPEONFORK: /* parisc */
924 advice = MADV_WIPEONFORK;
925 break;
926 case TARGET_MADV_KEEPONFORK: /* parisc */
927 advice = MADV_KEEPONFORK;
928 break;
929 /* we do not care about the other MADV_xxx values yet */
930 }
931
932 /*
933 * Most advice values are hints, so ignoring and returning success is ok.
934 *
935 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
936 * MADV_KEEPONFORK are not hints and need to be emulated.
937 *
938 * A straight passthrough for those may not be safe because qemu sometimes
939 * turns private file-backed mappings into anonymous mappings.
940 * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
941 * same semantics for the host as for the guest.
942 *
943 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
944 * return failure if not.
945 *
946 * MADV_DONTNEED is passed through as well, if possible.
947 * If passthrough isn't possible, we nevertheless (wrongly!) return
948 * success, which is broken but some userspace programs fail to work
949 * otherwise. Completely implementing such emulation is quite complicated
950 * though.
951 */
952 mmap_lock();
953 switch (advice) {
954 case MADV_WIPEONFORK:
955 case MADV_KEEPONFORK:
956 ret = -EINVAL;
957 /* fall through */
958 case MADV_DONTNEED:
959 if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
960 ret = get_errno(madvise(g2h_untagged(start), len, advice));
961 if ((advice == MADV_DONTNEED) && (ret == 0)) {
962 page_reset_target_data(start, start + len - 1);
963 }
964 }
965 }
966 mmap_unlock();
967
968 return ret;
969 }