]> git.proxmox.com Git - mirror_qemu.git/blob - linux-user/mmap.c
Merge tag 'pull-tcg-20230701' of https://gitlab.com/rth7680/qemu into staging
[mirror_qemu.git] / linux-user / mmap.c
1 /*
2 * mmap support for qemu
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
18 */
19 #include "qemu/osdep.h"
20 #include "trace.h"
21 #include "exec/log.h"
22 #include "qemu.h"
23 #include "user-internals.h"
24 #include "user-mmap.h"
25 #include "target_mman.h"
26
27 static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
28 static __thread int mmap_lock_count;
29
30 void mmap_lock(void)
31 {
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
34 }
35 }
36
37 void mmap_unlock(void)
38 {
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
41 }
42 }
43
44 bool have_mmap_lock(void)
45 {
46 return mmap_lock_count > 0 ? true : false;
47 }
48
49 /* Grab lock to make sure things are in a consistent state after fork(). */
50 void mmap_fork_start(void)
51 {
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
55 }
56
57 void mmap_fork_end(int child)
58 {
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
63 }
64
65 /*
66 * Validate target prot bitmask.
67 * Return the prot bitmask for the host in *HOST_PROT.
68 * Return 0 if the target prot bitmask is invalid, otherwise
69 * the internal qemu page_flags (which will include PAGE_VALID).
70 */
71 static int validate_prot_to_pageflags(int *host_prot, int prot)
72 {
73 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75
76 /*
77 * For the host, we need not pass anything except read/write/exec.
78 * While PROT_SEM is allowed by all hosts, it is also ignored, so
79 * don't bother transforming guest bit to host bit. Any other
80 * target-specific prot bits will not be understood by the host
81 * and will need to be encoded into page_flags for qemu emulation.
82 *
83 * Pages that are executable by the guest will never be executed
84 * by the host, but the host will need to be able to read them.
85 */
86 *host_prot = (prot & (PROT_READ | PROT_WRITE))
87 | (prot & PROT_EXEC ? PROT_READ : 0);
88
89 #ifdef TARGET_AARCH64
90 {
91 ARMCPU *cpu = ARM_CPU(thread_cpu);
92
93 /*
94 * The PROT_BTI bit is only accepted if the cpu supports the feature.
95 * Since this is the unusual case, don't bother checking unless
96 * the bit has been requested. If set and valid, record the bit
97 * within QEMU's page_flags.
98 */
99 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
100 valid |= TARGET_PROT_BTI;
101 page_flags |= PAGE_BTI;
102 }
103 /* Similarly for the PROT_MTE bit. */
104 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105 valid |= TARGET_PROT_MTE;
106 page_flags |= PAGE_MTE;
107 }
108 }
109 #elif defined(TARGET_HPPA)
110 valid |= PROT_GROWSDOWN | PROT_GROWSUP;
111 #endif
112
113 return prot & ~valid ? 0 : page_flags;
114 }
115
116 /* NOTE: all the constants are the HOST ones, but addresses are target. */
117 int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
118 {
119 abi_ulong end, host_start, host_end, addr;
120 int prot1, ret, page_flags, host_prot;
121
122 trace_target_mprotect(start, len, target_prot);
123
124 if ((start & ~TARGET_PAGE_MASK) != 0) {
125 return -TARGET_EINVAL;
126 }
127 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128 if (!page_flags) {
129 return -TARGET_EINVAL;
130 }
131 len = TARGET_PAGE_ALIGN(len);
132 end = start + len;
133 if (!guest_range_valid_untagged(start, len)) {
134 return -TARGET_ENOMEM;
135 }
136 if (len == 0) {
137 return 0;
138 }
139
140 mmap_lock();
141 host_start = start & qemu_host_page_mask;
142 host_end = HOST_PAGE_ALIGN(end);
143 if (start > host_start) {
144 /* handle host page containing start */
145 prot1 = host_prot;
146 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
147 prot1 |= page_get_flags(addr);
148 }
149 if (host_end == host_start + qemu_host_page_size) {
150 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
151 prot1 |= page_get_flags(addr);
152 }
153 end = host_end;
154 }
155 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
156 prot1 & PAGE_BITS);
157 if (ret != 0) {
158 goto error;
159 }
160 host_start += qemu_host_page_size;
161 }
162 if (end < host_end) {
163 prot1 = host_prot;
164 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
165 prot1 |= page_get_flags(addr);
166 }
167 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
168 qemu_host_page_size, prot1 & PAGE_BITS);
169 if (ret != 0) {
170 goto error;
171 }
172 host_end -= qemu_host_page_size;
173 }
174
175 /* handle the pages in the middle */
176 if (host_start < host_end) {
177 ret = mprotect(g2h_untagged(host_start),
178 host_end - host_start, host_prot);
179 if (ret != 0) {
180 goto error;
181 }
182 }
183
184 page_set_flags(start, start + len - 1, page_flags);
185 ret = 0;
186
187 error:
188 mmap_unlock();
189 return ret;
190 }
191
192 /* map an incomplete host page */
193 static int mmap_frag(abi_ulong real_start,
194 abi_ulong start, abi_ulong end,
195 int prot, int flags, int fd, abi_ulong offset)
196 {
197 abi_ulong real_end, addr;
198 void *host_start;
199 int prot1, prot_new;
200
201 real_end = real_start + qemu_host_page_size;
202 host_start = g2h_untagged(real_start);
203
204 /* get the protection of the target pages outside the mapping */
205 prot1 = 0;
206 for(addr = real_start; addr < real_end; addr++) {
207 if (addr < start || addr >= end)
208 prot1 |= page_get_flags(addr);
209 }
210
211 if (prot1 == 0) {
212 /* no page was there, so we allocate one */
213 void *p = mmap(host_start, qemu_host_page_size, prot,
214 flags | MAP_ANONYMOUS, -1, 0);
215 if (p == MAP_FAILED)
216 return -1;
217 prot1 = prot;
218 }
219 prot1 &= PAGE_BITS;
220
221 prot_new = prot | prot1;
222 if (!(flags & MAP_ANONYMOUS)) {
223 /* msync() won't work here, so we return an error if write is
224 possible while it is a shared mapping */
225 if ((flags & MAP_TYPE) == MAP_SHARED &&
226 (prot & PROT_WRITE))
227 return -1;
228
229 /* adjust protection to be able to read */
230 if (!(prot1 & PROT_WRITE))
231 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
232
233 /* read the corresponding file data */
234 if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
235 return -1;
236
237 /* put final protection */
238 if (prot_new != (prot1 | PROT_WRITE))
239 mprotect(host_start, qemu_host_page_size, prot_new);
240 } else {
241 if (prot_new != prot1) {
242 mprotect(host_start, qemu_host_page_size, prot_new);
243 }
244 if (prot_new & PROT_WRITE) {
245 memset(g2h_untagged(start), 0, end - start);
246 }
247 }
248 return 0;
249 }
250
251 #if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
252 #ifdef TARGET_AARCH64
253 # define TASK_UNMAPPED_BASE 0x5500000000
254 #else
255 # define TASK_UNMAPPED_BASE (1ul << 38)
256 #endif
257 #else
258 #ifdef TARGET_HPPA
259 # define TASK_UNMAPPED_BASE 0xfa000000
260 #else
261 # define TASK_UNMAPPED_BASE 0x40000000
262 #endif
263 #endif
264 abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
265
266 unsigned long last_brk;
267
268 /* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
269 of guest address space. */
270 static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
271 abi_ulong align)
272 {
273 abi_ulong addr, end_addr, incr = qemu_host_page_size;
274 int prot;
275 bool looped = false;
276
277 if (size > reserved_va) {
278 return (abi_ulong)-1;
279 }
280
281 /* Note that start and size have already been aligned by mmap_find_vma. */
282
283 end_addr = start + size;
284 /*
285 * Start at the top of the address space, ignoring the last page.
286 * If reserved_va == UINT32_MAX, then end_addr wraps to 0,
287 * throwing the rest of the calculations off.
288 * TODO: rewrite using last_addr instead.
289 * TODO: use the interval tree instead of probing every page.
290 */
291 if (start > reserved_va - size) {
292 end_addr = ((reserved_va - size) & -align) + size;
293 looped = true;
294 }
295
296 /* Search downward from END_ADDR, checking to see if a page is in use. */
297 addr = end_addr;
298 while (1) {
299 addr -= incr;
300 if (addr > end_addr) {
301 if (looped) {
302 /* Failure. The entire address space has been searched. */
303 return (abi_ulong)-1;
304 }
305 /* Re-start at the top of the address space (see above). */
306 addr = end_addr = ((reserved_va - size) & -align) + size;
307 looped = true;
308 } else {
309 prot = page_get_flags(addr);
310 if (prot) {
311 /* Page in use. Restart below this page. */
312 addr = end_addr = ((addr - size) & -align) + size;
313 } else if (addr && addr + size == end_addr) {
314 /* Success! All pages between ADDR and END_ADDR are free. */
315 if (start == mmap_next_start) {
316 mmap_next_start = addr;
317 }
318 return addr;
319 }
320 }
321 }
322 }
323
324 /*
325 * Find and reserve a free memory area of size 'size'. The search
326 * starts at 'start'.
327 * It must be called with mmap_lock() held.
328 * Return -1 if error.
329 */
330 abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
331 {
332 void *ptr, *prev;
333 abi_ulong addr;
334 int wrapped, repeat;
335
336 align = MAX(align, qemu_host_page_size);
337
338 /* If 'start' == 0, then a default start address is used. */
339 if (start == 0) {
340 start = mmap_next_start;
341 } else {
342 start &= qemu_host_page_mask;
343 }
344 start = ROUND_UP(start, align);
345
346 size = HOST_PAGE_ALIGN(size);
347
348 if (reserved_va) {
349 return mmap_find_vma_reserved(start, size, align);
350 }
351
352 addr = start;
353 wrapped = repeat = 0;
354 prev = 0;
355
356 for (;; prev = ptr) {
357 /*
358 * Reserve needed memory area to avoid a race.
359 * It should be discarded using:
360 * - mmap() with MAP_FIXED flag
361 * - mremap() with MREMAP_FIXED flag
362 * - shmat() with SHM_REMAP flag
363 */
364 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
365 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
366
367 /* ENOMEM, if host address space has no memory */
368 if (ptr == MAP_FAILED) {
369 return (abi_ulong)-1;
370 }
371
372 /* Count the number of sequential returns of the same address.
373 This is used to modify the search algorithm below. */
374 repeat = (ptr == prev ? repeat + 1 : 0);
375
376 if (h2g_valid(ptr + size - 1)) {
377 addr = h2g(ptr);
378
379 if ((addr & (align - 1)) == 0) {
380 /* Success. */
381 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
382 mmap_next_start = addr + size;
383 }
384 return addr;
385 }
386
387 /* The address is not properly aligned for the target. */
388 switch (repeat) {
389 case 0:
390 /* Assume the result that the kernel gave us is the
391 first with enough free space, so start again at the
392 next higher target page. */
393 addr = ROUND_UP(addr, align);
394 break;
395 case 1:
396 /* Sometimes the kernel decides to perform the allocation
397 at the top end of memory instead. */
398 addr &= -align;
399 break;
400 case 2:
401 /* Start over at low memory. */
402 addr = 0;
403 break;
404 default:
405 /* Fail. This unaligned block must the last. */
406 addr = -1;
407 break;
408 }
409 } else {
410 /* Since the result the kernel gave didn't fit, start
411 again at low memory. If any repetition, fail. */
412 addr = (repeat ? -1 : 0);
413 }
414
415 /* Unmap and try again. */
416 munmap(ptr, size);
417
418 /* ENOMEM if we checked the whole of the target address space. */
419 if (addr == (abi_ulong)-1) {
420 return (abi_ulong)-1;
421 } else if (addr == 0) {
422 if (wrapped) {
423 return (abi_ulong)-1;
424 }
425 wrapped = 1;
426 /* Don't actually use 0 when wrapping, instead indicate
427 that we'd truly like an allocation in low memory. */
428 addr = (mmap_min_addr > TARGET_PAGE_SIZE
429 ? TARGET_PAGE_ALIGN(mmap_min_addr)
430 : TARGET_PAGE_SIZE);
431 } else if (wrapped && addr >= start) {
432 return (abi_ulong)-1;
433 }
434 }
435 }
436
437 /* NOTE: all the constants are the HOST ones */
438 abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
439 int flags, int fd, abi_ulong offset)
440 {
441 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
442 passthrough_start = -1, passthrough_end = -1;
443 int page_flags, host_prot;
444
445 mmap_lock();
446 trace_target_mmap(start, len, target_prot, flags, fd, offset);
447
448 if (!len) {
449 errno = EINVAL;
450 goto fail;
451 }
452
453 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
454 if (!page_flags) {
455 errno = EINVAL;
456 goto fail;
457 }
458
459 /* Also check for overflows... */
460 len = TARGET_PAGE_ALIGN(len);
461 if (!len) {
462 errno = ENOMEM;
463 goto fail;
464 }
465
466 if (offset & ~TARGET_PAGE_MASK) {
467 errno = EINVAL;
468 goto fail;
469 }
470
471 /*
472 * If we're mapping shared memory, ensure we generate code for parallel
473 * execution and flush old translations. This will work up to the level
474 * supported by the host -- anything that requires EXCP_ATOMIC will not
475 * be atomic with respect to an external process.
476 */
477 if (flags & MAP_SHARED) {
478 CPUState *cpu = thread_cpu;
479 if (!(cpu->tcg_cflags & CF_PARALLEL)) {
480 cpu->tcg_cflags |= CF_PARALLEL;
481 tb_flush(cpu);
482 }
483 }
484
485 real_start = start & qemu_host_page_mask;
486 host_offset = offset & qemu_host_page_mask;
487
488 /* If the user is asking for the kernel to find a location, do that
489 before we truncate the length for mapping files below. */
490 if (!(flags & MAP_FIXED)) {
491 host_len = len + offset - host_offset;
492 host_len = HOST_PAGE_ALIGN(host_len);
493 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
494 if (start == (abi_ulong)-1) {
495 errno = ENOMEM;
496 goto fail;
497 }
498 }
499
500 /* When mapping files into a memory area larger than the file, accesses
501 to pages beyond the file size will cause a SIGBUS.
502
503 For example, if mmaping a file of 100 bytes on a host with 4K pages
504 emulating a target with 8K pages, the target expects to be able to
505 access the first 8K. But the host will trap us on any access beyond
506 4K.
507
508 When emulating a target with a larger page-size than the hosts, we
509 may need to truncate file maps at EOF and add extra anonymous pages
510 up to the targets page boundary. */
511
512 if ((qemu_real_host_page_size() < qemu_host_page_size) &&
513 !(flags & MAP_ANONYMOUS)) {
514 struct stat sb;
515
516 if (fstat (fd, &sb) == -1)
517 goto fail;
518
519 /* Are we trying to create a map beyond EOF?. */
520 if (offset + len > sb.st_size) {
521 /* If so, truncate the file map at eof aligned with
522 the hosts real pagesize. Additional anonymous maps
523 will be created beyond EOF. */
524 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
525 }
526 }
527
528 if (!(flags & MAP_FIXED)) {
529 unsigned long host_start;
530 void *p;
531
532 host_len = len + offset - host_offset;
533 host_len = HOST_PAGE_ALIGN(host_len);
534
535 /* Note: we prefer to control the mapping address. It is
536 especially important if qemu_host_page_size >
537 qemu_real_host_page_size */
538 p = mmap(g2h_untagged(start), host_len, host_prot,
539 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
540 if (p == MAP_FAILED) {
541 goto fail;
542 }
543 /* update start so that it points to the file position at 'offset' */
544 host_start = (unsigned long)p;
545 if (!(flags & MAP_ANONYMOUS)) {
546 p = mmap(g2h_untagged(start), len, host_prot,
547 flags | MAP_FIXED, fd, host_offset);
548 if (p == MAP_FAILED) {
549 munmap(g2h_untagged(start), host_len);
550 goto fail;
551 }
552 host_start += offset - host_offset;
553 }
554 start = h2g(host_start);
555 passthrough_start = start;
556 passthrough_end = start + len;
557 } else {
558 if (start & ~TARGET_PAGE_MASK) {
559 errno = EINVAL;
560 goto fail;
561 }
562 end = start + len;
563 real_end = HOST_PAGE_ALIGN(end);
564
565 /*
566 * Test if requested memory area fits target address space
567 * It can fail only on 64-bit host with 32-bit target.
568 * On any other target/host host mmap() handles this error correctly.
569 */
570 if (end < start || !guest_range_valid_untagged(start, len)) {
571 errno = ENOMEM;
572 goto fail;
573 }
574
575 /* worst case: we cannot map the file because the offset is not
576 aligned, so we read it */
577 if (!(flags & MAP_ANONYMOUS) &&
578 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
579 /* msync() won't work here, so we return an error if write is
580 possible while it is a shared mapping */
581 if ((flags & MAP_TYPE) == MAP_SHARED &&
582 (host_prot & PROT_WRITE)) {
583 errno = EINVAL;
584 goto fail;
585 }
586 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
587 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
588 -1, 0);
589 if (retaddr == -1)
590 goto fail;
591 if (pread(fd, g2h_untagged(start), len, offset) == -1)
592 goto fail;
593 if (!(host_prot & PROT_WRITE)) {
594 ret = target_mprotect(start, len, target_prot);
595 assert(ret == 0);
596 }
597 goto the_end;
598 }
599
600 /* handle the start of the mapping */
601 if (start > real_start) {
602 if (real_end == real_start + qemu_host_page_size) {
603 /* one single host page */
604 ret = mmap_frag(real_start, start, end,
605 host_prot, flags, fd, offset);
606 if (ret == -1)
607 goto fail;
608 goto the_end1;
609 }
610 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
611 host_prot, flags, fd, offset);
612 if (ret == -1)
613 goto fail;
614 real_start += qemu_host_page_size;
615 }
616 /* handle the end of the mapping */
617 if (end < real_end) {
618 ret = mmap_frag(real_end - qemu_host_page_size,
619 real_end - qemu_host_page_size, end,
620 host_prot, flags, fd,
621 offset + real_end - qemu_host_page_size - start);
622 if (ret == -1)
623 goto fail;
624 real_end -= qemu_host_page_size;
625 }
626
627 /* map the middle (easier) */
628 if (real_start < real_end) {
629 void *p;
630 unsigned long offset1;
631 if (flags & MAP_ANONYMOUS)
632 offset1 = 0;
633 else
634 offset1 = offset + real_start - start;
635 p = mmap(g2h_untagged(real_start), real_end - real_start,
636 host_prot, flags, fd, offset1);
637 if (p == MAP_FAILED)
638 goto fail;
639 passthrough_start = real_start;
640 passthrough_end = real_end;
641 }
642 }
643 the_end1:
644 if (flags & MAP_ANONYMOUS) {
645 page_flags |= PAGE_ANON;
646 }
647 page_flags |= PAGE_RESET;
648 if (passthrough_start == passthrough_end) {
649 page_set_flags(start, start + len - 1, page_flags);
650 } else {
651 if (start < passthrough_start) {
652 page_set_flags(start, passthrough_start - 1, page_flags);
653 }
654 page_set_flags(passthrough_start, passthrough_end - 1,
655 page_flags | PAGE_PASSTHROUGH);
656 if (passthrough_end < start + len) {
657 page_set_flags(passthrough_end, start + len - 1, page_flags);
658 }
659 }
660 the_end:
661 trace_target_mmap_complete(start);
662 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
663 FILE *f = qemu_log_trylock();
664 if (f) {
665 fprintf(f, "page layout changed following mmap\n");
666 page_dump(f);
667 qemu_log_unlock(f);
668 }
669 }
670 mmap_unlock();
671 return start;
672 fail:
673 mmap_unlock();
674 return -1;
675 }
676
677 static void mmap_reserve(abi_ulong start, abi_ulong size)
678 {
679 abi_ulong real_start;
680 abi_ulong real_end;
681 abi_ulong addr;
682 abi_ulong end;
683 int prot;
684
685 real_start = start & qemu_host_page_mask;
686 real_end = HOST_PAGE_ALIGN(start + size);
687 end = start + size;
688 if (start > real_start) {
689 /* handle host page containing start */
690 prot = 0;
691 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
692 prot |= page_get_flags(addr);
693 }
694 if (real_end == real_start + qemu_host_page_size) {
695 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
696 prot |= page_get_flags(addr);
697 }
698 end = real_end;
699 }
700 if (prot != 0)
701 real_start += qemu_host_page_size;
702 }
703 if (end < real_end) {
704 prot = 0;
705 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
706 prot |= page_get_flags(addr);
707 }
708 if (prot != 0)
709 real_end -= qemu_host_page_size;
710 }
711 if (real_start != real_end) {
712 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
713 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
714 -1, 0);
715 }
716 }
717
718 int target_munmap(abi_ulong start, abi_ulong len)
719 {
720 abi_ulong end, real_start, real_end, addr;
721 int prot, ret;
722
723 trace_target_munmap(start, len);
724
725 if (start & ~TARGET_PAGE_MASK)
726 return -TARGET_EINVAL;
727 len = TARGET_PAGE_ALIGN(len);
728 if (len == 0 || !guest_range_valid_untagged(start, len)) {
729 return -TARGET_EINVAL;
730 }
731
732 mmap_lock();
733 end = start + len;
734 real_start = start & qemu_host_page_mask;
735 real_end = HOST_PAGE_ALIGN(end);
736
737 if (start > real_start) {
738 /* handle host page containing start */
739 prot = 0;
740 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
741 prot |= page_get_flags(addr);
742 }
743 if (real_end == real_start + qemu_host_page_size) {
744 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
745 prot |= page_get_flags(addr);
746 }
747 end = real_end;
748 }
749 if (prot != 0)
750 real_start += qemu_host_page_size;
751 }
752 if (end < real_end) {
753 prot = 0;
754 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
755 prot |= page_get_flags(addr);
756 }
757 if (prot != 0)
758 real_end -= qemu_host_page_size;
759 }
760
761 ret = 0;
762 /* unmap what we can */
763 if (real_start < real_end) {
764 if (reserved_va) {
765 mmap_reserve(real_start, real_end - real_start);
766 } else {
767 ret = munmap(g2h_untagged(real_start), real_end - real_start);
768 }
769 }
770
771 if (ret == 0) {
772 page_set_flags(start, start + len - 1, 0);
773 }
774 mmap_unlock();
775 return ret;
776 }
777
778 abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
779 abi_ulong new_size, unsigned long flags,
780 abi_ulong new_addr)
781 {
782 int prot;
783 void *host_addr;
784
785 if (!guest_range_valid_untagged(old_addr, old_size) ||
786 ((flags & MREMAP_FIXED) &&
787 !guest_range_valid_untagged(new_addr, new_size)) ||
788 ((flags & MREMAP_MAYMOVE) == 0 &&
789 !guest_range_valid_untagged(old_addr, new_size))) {
790 errno = ENOMEM;
791 return -1;
792 }
793
794 mmap_lock();
795
796 if (flags & MREMAP_FIXED) {
797 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
798 flags, g2h_untagged(new_addr));
799
800 if (reserved_va && host_addr != MAP_FAILED) {
801 /* If new and old addresses overlap then the above mremap will
802 already have failed with EINVAL. */
803 mmap_reserve(old_addr, old_size);
804 }
805 } else if (flags & MREMAP_MAYMOVE) {
806 abi_ulong mmap_start;
807
808 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
809
810 if (mmap_start == -1) {
811 errno = ENOMEM;
812 host_addr = MAP_FAILED;
813 } else {
814 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
815 flags | MREMAP_FIXED,
816 g2h_untagged(mmap_start));
817 if (reserved_va) {
818 mmap_reserve(old_addr, old_size);
819 }
820 }
821 } else {
822 int prot = 0;
823 if (reserved_va && old_size < new_size) {
824 abi_ulong addr;
825 for (addr = old_addr + old_size;
826 addr < old_addr + new_size;
827 addr++) {
828 prot |= page_get_flags(addr);
829 }
830 }
831 if (prot == 0) {
832 host_addr = mremap(g2h_untagged(old_addr),
833 old_size, new_size, flags);
834
835 if (host_addr != MAP_FAILED) {
836 /* Check if address fits target address space */
837 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
838 /* Revert mremap() changes */
839 host_addr = mremap(g2h_untagged(old_addr),
840 new_size, old_size, flags);
841 errno = ENOMEM;
842 host_addr = MAP_FAILED;
843 } else if (reserved_va && old_size > new_size) {
844 mmap_reserve(old_addr + old_size, old_size - new_size);
845 }
846 }
847 } else {
848 errno = ENOMEM;
849 host_addr = MAP_FAILED;
850 }
851 }
852
853 if (host_addr == MAP_FAILED) {
854 new_addr = -1;
855 } else {
856 new_addr = h2g(host_addr);
857 prot = page_get_flags(old_addr);
858 page_set_flags(old_addr, old_addr + old_size - 1, 0);
859 page_set_flags(new_addr, new_addr + new_size - 1,
860 prot | PAGE_VALID | PAGE_RESET);
861 }
862 mmap_unlock();
863 return new_addr;
864 }
865
866 static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
867 {
868 ulong addr;
869
870 if ((start | end) & ~qemu_host_page_mask) {
871 return false;
872 }
873
874 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
875 if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
876 return false;
877 }
878 }
879
880 return true;
881 }
882
883 abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
884 {
885 abi_ulong len, end;
886 int ret = 0;
887
888 if (start & ~TARGET_PAGE_MASK) {
889 return -TARGET_EINVAL;
890 }
891 len = TARGET_PAGE_ALIGN(len_in);
892
893 if (len_in && !len) {
894 return -TARGET_EINVAL;
895 }
896
897 end = start + len;
898 if (end < start) {
899 return -TARGET_EINVAL;
900 }
901
902 if (end == start) {
903 return 0;
904 }
905
906 if (!guest_range_valid_untagged(start, len)) {
907 return -TARGET_EINVAL;
908 }
909
910 /* Translate for some architectures which have different MADV_xxx values */
911 switch (advice) {
912 case TARGET_MADV_DONTNEED: /* alpha */
913 advice = MADV_DONTNEED;
914 break;
915 case TARGET_MADV_WIPEONFORK: /* parisc */
916 advice = MADV_WIPEONFORK;
917 break;
918 case TARGET_MADV_KEEPONFORK: /* parisc */
919 advice = MADV_KEEPONFORK;
920 break;
921 /* we do not care about the other MADV_xxx values yet */
922 }
923
924 /*
925 * Most advice values are hints, so ignoring and returning success is ok.
926 *
927 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
928 * MADV_KEEPONFORK are not hints and need to be emulated.
929 *
930 * A straight passthrough for those may not be safe because qemu sometimes
931 * turns private file-backed mappings into anonymous mappings.
932 * can_passthrough_madvise() helps to check if a passthrough is possible by
933 * comparing mappings that are known to have the same semantics in the host
934 * and the guest. In this case passthrough is safe.
935 *
936 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
937 * return failure if not.
938 *
939 * MADV_DONTNEED is passed through as well, if possible.
940 * If passthrough isn't possible, we nevertheless (wrongly!) return
941 * success, which is broken but some userspace programs fail to work
942 * otherwise. Completely implementing such emulation is quite complicated
943 * though.
944 */
945 mmap_lock();
946 switch (advice) {
947 case MADV_WIPEONFORK:
948 case MADV_KEEPONFORK:
949 ret = -EINVAL;
950 /* fall through */
951 case MADV_DONTNEED:
952 if (can_passthrough_madvise(start, end)) {
953 ret = get_errno(madvise(g2h_untagged(start), len, advice));
954 if ((advice == MADV_DONTNEED) && (ret == 0)) {
955 page_reset_target_data(start, start + len - 1);
956 }
957 }
958 }
959 mmap_unlock();
960
961 return ret;
962 }