]> git.proxmox.com Git - mirror_qemu.git/blame - linux-user/mmap.c
tests/vm: custom openbsd partitioning to increase /home space
[mirror_qemu.git] / linux-user / mmap.c
CommitLineData
54936004
FB
1/*
2 * mmap support for qemu
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
8167ee88 17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
d39594e9 19#include "qemu/osdep.h"
11d96056 20#include "trace.h"
10d0d505 21#include "exec/log.h"
54936004 22#include "qemu.h"
3b249d26 23#include "user-internals.h"
5423e6d3 24#include "user-mmap.h"
8655b4c7 25#include "target_mman.h"
54936004 26
1e6eec8b 27static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
dfd3f85c 28static __thread int mmap_lock_count;
c8a706fe
PB
29
30void mmap_lock(void)
31{
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
34 }
35}
36
37void mmap_unlock(void)
38{
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
41 }
42}
d5975363 43
301e40ed
AB
44bool have_mmap_lock(void)
45{
46 return mmap_lock_count > 0 ? true : false;
47}
48
d5975363
PB
49/* Grab lock to make sure things are in a consistent state after fork(). */
50void mmap_fork_start(void)
51{
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
55}
56
57void mmap_fork_end(int child)
58{
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
63}
c8a706fe 64
9dba3ca5
RH
65/*
66 * Validate target prot bitmask.
67 * Return the prot bitmask for the host in *HOST_PROT.
68 * Return 0 if the target prot bitmask is invalid, otherwise
69 * the internal qemu page_flags (which will include PAGE_VALID).
70 */
71static int validate_prot_to_pageflags(int *host_prot, int prot)
72{
73 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75
76 /*
77 * For the host, we need not pass anything except read/write/exec.
78 * While PROT_SEM is allowed by all hosts, it is also ignored, so
79 * don't bother transforming guest bit to host bit. Any other
80 * target-specific prot bits will not be understood by the host
81 * and will need to be encoded into page_flags for qemu emulation.
4eaa960d
RH
82 *
83 * Pages that are executable by the guest will never be executed
84 * by the host, but the host will need to be able to read them.
9dba3ca5 85 */
4eaa960d
RH
86 *host_prot = (prot & (PROT_READ | PROT_WRITE))
87 | (prot & PROT_EXEC ? PROT_READ : 0);
9dba3ca5 88
be5d6f48 89#ifdef TARGET_AARCH64
d109b46d 90 {
be5d6f48 91 ARMCPU *cpu = ARM_CPU(thread_cpu);
d109b46d
RH
92
93 /*
94 * The PROT_BTI bit is only accepted if the cpu supports the feature.
95 * Since this is the unusual case, don't bother checking unless
96 * the bit has been requested. If set and valid, record the bit
97 * within QEMU's page_flags.
98 */
99 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
be5d6f48
RH
100 valid |= TARGET_PROT_BTI;
101 page_flags |= PAGE_BTI;
102 }
d109b46d
RH
103 /* Similarly for the PROT_MTE bit. */
104 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105 valid |= TARGET_PROT_MTE;
106 page_flags |= PAGE_MTE;
107 }
be5d6f48 108 }
4c184e70
HD
109#elif defined(TARGET_HPPA)
110 valid |= PROT_GROWSDOWN | PROT_GROWSUP;
be5d6f48
RH
111#endif
112
9dba3ca5
RH
113 return prot & ~valid ? 0 : page_flags;
114}
115
53a5960a 116/* NOTE: all the constants are the HOST ones, but addresses are target. */
9dba3ca5 117int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
54936004 118{
992f48a0 119 abi_ulong end, host_start, host_end, addr;
9dba3ca5 120 int prot1, ret, page_flags, host_prot;
54936004 121
9dba3ca5 122 trace_target_mprotect(start, len, target_prot);
54936004 123
9dba3ca5 124 if ((start & ~TARGET_PAGE_MASK) != 0) {
78cf3390 125 return -TARGET_EINVAL;
9dba3ca5
RH
126 }
127 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
128 if (!page_flags) {
129 return -TARGET_EINVAL;
130 }
54936004
FB
131 len = TARGET_PAGE_ALIGN(len);
132 end = start + len;
46b12f46 133 if (!guest_range_valid_untagged(start, len)) {
78cf3390 134 return -TARGET_ENOMEM;
ebf9a363 135 }
9dba3ca5 136 if (len == 0) {
54936004 137 return 0;
9dba3ca5 138 }
3b46e624 139
c8a706fe 140 mmap_lock();
83fb7adf 141 host_start = start & qemu_host_page_mask;
54936004
FB
142 host_end = HOST_PAGE_ALIGN(end);
143 if (start > host_start) {
144 /* handle host page containing start */
9dba3ca5
RH
145 prot1 = host_prot;
146 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
54936004
FB
147 prot1 |= page_get_flags(addr);
148 }
83fb7adf 149 if (host_end == host_start + qemu_host_page_size) {
9dba3ca5 150 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
d418c81e
FB
151 prot1 |= page_get_flags(addr);
152 }
153 end = host_end;
154 }
3e8f1628 155 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
9dba3ca5
RH
156 prot1 & PAGE_BITS);
157 if (ret != 0) {
c8a706fe 158 goto error;
9dba3ca5 159 }
83fb7adf 160 host_start += qemu_host_page_size;
54936004
FB
161 }
162 if (end < host_end) {
9dba3ca5
RH
163 prot1 = host_prot;
164 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
54936004
FB
165 prot1 |= page_get_flags(addr);
166 }
3e8f1628 167 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
9dba3ca5
RH
168 qemu_host_page_size, prot1 & PAGE_BITS);
169 if (ret != 0) {
c8a706fe 170 goto error;
9dba3ca5 171 }
83fb7adf 172 host_end -= qemu_host_page_size;
54936004 173 }
3b46e624 174
54936004
FB
175 /* handle the pages in the middle */
176 if (host_start < host_end) {
3e8f1628
RH
177 ret = mprotect(g2h_untagged(host_start),
178 host_end - host_start, host_prot);
9dba3ca5 179 if (ret != 0) {
c8a706fe 180 goto error;
9dba3ca5 181 }
54936004 182 }
aa98e2d8 183
9dba3ca5 184 page_set_flags(start, start + len, page_flags);
aa98e2d8
IL
185 ret = 0;
186
c8a706fe
PB
187error:
188 mmap_unlock();
189 return ret;
54936004
FB
190}
191
192/* map an incomplete host page */
992f48a0
BS
193static int mmap_frag(abi_ulong real_start,
194 abi_ulong start, abi_ulong end,
195 int prot, int flags, int fd, abi_ulong offset)
54936004 196{
80210bcd 197 abi_ulong real_end, addr;
53a5960a 198 void *host_start;
54936004
FB
199 int prot1, prot_new;
200
53a5960a 201 real_end = real_start + qemu_host_page_size;
3e8f1628 202 host_start = g2h_untagged(real_start);
54936004
FB
203
204 /* get the protection of the target pages outside the mapping */
205 prot1 = 0;
53a5960a 206 for(addr = real_start; addr < real_end; addr++) {
54936004
FB
207 if (addr < start || addr >= end)
208 prot1 |= page_get_flags(addr);
209 }
3b46e624 210
54936004
FB
211 if (prot1 == 0) {
212 /* no page was there, so we allocate one */
80210bcd
TS
213 void *p = mmap(host_start, qemu_host_page_size, prot,
214 flags | MAP_ANONYMOUS, -1, 0);
215 if (p == MAP_FAILED)
216 return -1;
53a5960a 217 prot1 = prot;
54936004
FB
218 }
219 prot1 &= PAGE_BITS;
220
221 prot_new = prot | prot1;
222 if (!(flags & MAP_ANONYMOUS)) {
223 /* msync() won't work here, so we return an error if write is
224 possible while it is a shared mapping */
225 if ((flags & MAP_TYPE) == MAP_SHARED &&
226 (prot & PROT_WRITE))
ee636500 227 return -1;
54936004
FB
228
229 /* adjust protection to be able to read */
230 if (!(prot1 & PROT_WRITE))
53a5960a 231 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
3b46e624 232
54936004 233 /* read the corresponding file data */
3e8f1628 234 if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
fb7e378c 235 return -1;
3b46e624 236
54936004
FB
237 /* put final protection */
238 if (prot_new != (prot1 | PROT_WRITE))
53a5960a 239 mprotect(host_start, qemu_host_page_size, prot_new);
54936004 240 } else {
54936004 241 if (prot_new != prot1) {
53a5960a 242 mprotect(host_start, qemu_host_page_size, prot_new);
54936004 243 }
e6deac9c 244 if (prot_new & PROT_WRITE) {
3e8f1628 245 memset(g2h_untagged(start), 0, end - start);
e6deac9c 246 }
54936004
FB
247 }
248 return 0;
249}
250
14f24e14 251#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
aab613fb
LY
252#ifdef TARGET_AARCH64
253# define TASK_UNMAPPED_BASE 0x5500000000
254#else
14f24e14 255# define TASK_UNMAPPED_BASE (1ul << 38)
aab613fb 256#endif
a03e2d42 257#else
9c9b5d7b
HD
258#ifdef TARGET_HPPA
259# define TASK_UNMAPPED_BASE 0xfa000000
260#else
14f24e14 261# define TASK_UNMAPPED_BASE 0x40000000
a03e2d42 262#endif
9c9b5d7b 263#endif
59e9d91c 264abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
a03e2d42 265
0776590d
PB
266unsigned long last_brk;
267
68a1c816
PB
268/* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
269 of guest address space. */
30ab9ef2
RH
270static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
271 abi_ulong align)
68a1c816 272{
30ab9ef2 273 abi_ulong addr, end_addr, incr = qemu_host_page_size;
68a1c816 274 int prot;
30ab9ef2 275 bool looped = false;
68a1c816 276
b76f21a7 277 if (size > reserved_va) {
68a1c816
PB
278 return (abi_ulong)-1;
279 }
280
30ab9ef2
RH
281 /* Note that start and size have already been aligned by mmap_find_vma. */
282
59e9d91c 283 end_addr = start + size;
30ab9ef2
RH
284 if (start > reserved_va - size) {
285 /* Start at the top of the address space. */
286 end_addr = ((reserved_va - size) & -align) + size;
287 looped = true;
59e9d91c 288 }
59e9d91c 289
30ab9ef2
RH
290 /* Search downward from END_ADDR, checking to see if a page is in use. */
291 addr = end_addr;
59e9d91c 292 while (1) {
30ab9ef2 293 addr -= incr;
59e9d91c 294 if (addr > end_addr) {
68a1c816 295 if (looped) {
30ab9ef2 296 /* Failure. The entire address space has been searched. */
68a1c816
PB
297 return (abi_ulong)-1;
298 }
30ab9ef2
RH
299 /* Re-start at the top of the address space. */
300 addr = end_addr = ((reserved_va - size) & -align) + size;
301 looped = true;
302 } else {
303 prot = page_get_flags(addr);
304 if (prot) {
305 /* Page in use. Restart below this page. */
306 addr = end_addr = ((addr - size) & -align) + size;
307 } else if (addr && addr + size == end_addr) {
308 /* Success! All pages between ADDR and END_ADDR are free. */
309 if (start == mmap_next_start) {
310 mmap_next_start = addr;
311 }
312 return addr;
313 }
68a1c816
PB
314 }
315 }
68a1c816
PB
316}
317
fe3b4152
KS
318/*
319 * Find and reserve a free memory area of size 'size'. The search
320 * starts at 'start'.
321 * It must be called with mmap_lock() held.
322 * Return -1 if error.
323 */
30ab9ef2 324abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
a03e2d42 325{
14f24e14 326 void *ptr, *prev;
fe3b4152 327 abi_ulong addr;
14f24e14 328 int wrapped, repeat;
fe3b4152 329
443b7505
RH
330 align = MAX(align, qemu_host_page_size);
331
fe3b4152 332 /* If 'start' == 0, then a default start address is used. */
14f24e14 333 if (start == 0) {
fe3b4152 334 start = mmap_next_start;
14f24e14
RH
335 } else {
336 start &= qemu_host_page_mask;
337 }
30ab9ef2 338 start = ROUND_UP(start, align);
14f24e14
RH
339
340 size = HOST_PAGE_ALIGN(size);
fe3b4152 341
b76f21a7 342 if (reserved_va) {
30ab9ef2 343 return mmap_find_vma_reserved(start, size, align);
68a1c816
PB
344 }
345
a03e2d42 346 addr = start;
14f24e14
RH
347 wrapped = repeat = 0;
348 prev = 0;
fe3b4152 349
14f24e14 350 for (;; prev = ptr) {
fe3b4152
KS
351 /*
352 * Reserve needed memory area to avoid a race.
353 * It should be discarded using:
354 * - mmap() with MAP_FIXED flag
355 * - mremap() with MREMAP_FIXED flag
356 * - shmat() with SHM_REMAP flag
357 */
3e8f1628 358 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
fe3b4152
KS
359 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
360
361 /* ENOMEM, if host address space has no memory */
14f24e14 362 if (ptr == MAP_FAILED) {
fe3b4152 363 return (abi_ulong)-1;
14f24e14
RH
364 }
365
366 /* Count the number of sequential returns of the same address.
367 This is used to modify the search algorithm below. */
368 repeat = (ptr == prev ? repeat + 1 : 0);
369
370 if (h2g_valid(ptr + size - 1)) {
371 addr = h2g(ptr);
fe3b4152 372
30ab9ef2 373 if ((addr & (align - 1)) == 0) {
14f24e14
RH
374 /* Success. */
375 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
376 mmap_next_start = addr + size;
377 }
378 return addr;
379 }
fe3b4152 380
14f24e14
RH
381 /* The address is not properly aligned for the target. */
382 switch (repeat) {
383 case 0:
384 /* Assume the result that the kernel gave us is the
385 first with enough free space, so start again at the
386 next higher target page. */
30ab9ef2 387 addr = ROUND_UP(addr, align);
14f24e14
RH
388 break;
389 case 1:
390 /* Sometimes the kernel decides to perform the allocation
391 at the top end of memory instead. */
30ab9ef2 392 addr &= -align;
14f24e14
RH
393 break;
394 case 2:
395 /* Start over at low memory. */
396 addr = 0;
397 break;
398 default:
399 /* Fail. This unaligned block must the last. */
400 addr = -1;
401 break;
402 }
403 } else {
404 /* Since the result the kernel gave didn't fit, start
405 again at low memory. If any repetition, fail. */
406 addr = (repeat ? -1 : 0);
407 }
408
409 /* Unmap and try again. */
fe3b4152 410 munmap(ptr, size);
fe3b4152 411
14f24e14 412 /* ENOMEM if we checked the whole of the target address space. */
d0b3e4f5 413 if (addr == (abi_ulong)-1) {
a03e2d42 414 return (abi_ulong)-1;
14f24e14
RH
415 } else if (addr == 0) {
416 if (wrapped) {
417 return (abi_ulong)-1;
418 }
419 wrapped = 1;
420 /* Don't actually use 0 when wrapping, instead indicate
8186e783 421 that we'd truly like an allocation in low memory. */
14f24e14
RH
422 addr = (mmap_min_addr > TARGET_PAGE_SIZE
423 ? TARGET_PAGE_ALIGN(mmap_min_addr)
424 : TARGET_PAGE_SIZE);
425 } else if (wrapped && addr >= start) {
426 return (abi_ulong)-1;
427 }
a03e2d42 428 }
a03e2d42
FB
429}
430
54936004 431/* NOTE: all the constants are the HOST ones */
9dba3ca5 432abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
992f48a0 433 int flags, int fd, abi_ulong offset)
54936004 434{
f93b7695
IL
435 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len,
436 passthrough_start = -1, passthrough_end = -1;
9dba3ca5 437 int page_flags, host_prot;
54936004 438
c8a706fe 439 mmap_lock();
9dba3ca5 440 trace_target_mmap(start, len, target_prot, flags, fd, offset);
54936004 441
38138fab 442 if (!len) {
e89f07d3 443 errno = EINVAL;
c8a706fe 444 goto fail;
e89f07d3 445 }
54936004 446
9dba3ca5
RH
447 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
448 if (!page_flags) {
449 errno = EINVAL;
450 goto fail;
451 }
452
38138fab 453 /* Also check for overflows... */
54936004 454 len = TARGET_PAGE_ALIGN(len);
38138fab
AB
455 if (!len) {
456 errno = ENOMEM;
457 goto fail;
458 }
459
460 if (offset & ~TARGET_PAGE_MASK) {
461 errno = EINVAL;
462 goto fail;
463 }
464
228168cb
RH
465 /*
466 * If we're mapping shared memory, ensure we generate code for parallel
467 * execution and flush old translations. This will work up to the level
468 * supported by the host -- anything that requires EXCP_ATOMIC will not
469 * be atomic with respect to an external process.
470 */
471 if (flags & MAP_SHARED) {
472 CPUState *cpu = thread_cpu;
473 if (!(cpu->tcg_cflags & CF_PARALLEL)) {
474 cpu->tcg_cflags |= CF_PARALLEL;
475 tb_flush(cpu);
476 }
477 }
478
53a5960a 479 real_start = start & qemu_host_page_mask;
a5e7ee46
RH
480 host_offset = offset & qemu_host_page_mask;
481
482 /* If the user is asking for the kernel to find a location, do that
483 before we truncate the length for mapping files below. */
484 if (!(flags & MAP_FIXED)) {
485 host_len = len + offset - host_offset;
486 host_len = HOST_PAGE_ALIGN(host_len);
30ab9ef2 487 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
a5e7ee46
RH
488 if (start == (abi_ulong)-1) {
489 errno = ENOMEM;
490 goto fail;
491 }
492 }
54936004 493
54c5a2ae
EI
494 /* When mapping files into a memory area larger than the file, accesses
495 to pages beyond the file size will cause a SIGBUS.
496
497 For example, if mmaping a file of 100 bytes on a host with 4K pages
498 emulating a target with 8K pages, the target expects to be able to
499 access the first 8K. But the host will trap us on any access beyond
500 4K.
501
502 When emulating a target with a larger page-size than the hosts, we
503 may need to truncate file maps at EOF and add extra anonymous pages
504 up to the targets page boundary. */
505
8e3b0cbb 506 if ((qemu_real_host_page_size() < qemu_host_page_size) &&
35f2fd04
MAL
507 !(flags & MAP_ANONYMOUS)) {
508 struct stat sb;
54c5a2ae
EI
509
510 if (fstat (fd, &sb) == -1)
511 goto fail;
512
513 /* Are we trying to create a map beyond EOF?. */
514 if (offset + len > sb.st_size) {
515 /* If so, truncate the file map at eof aligned with
516 the hosts real pagesize. Additional anonymous maps
517 will be created beyond EOF. */
0c2d70c4 518 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
54c5a2ae
EI
519 }
520 }
521
54936004 522 if (!(flags & MAP_FIXED)) {
a5e7ee46 523 unsigned long host_start;
a03e2d42 524 void *p;
a5e7ee46 525
a03e2d42
FB
526 host_len = len + offset - host_offset;
527 host_len = HOST_PAGE_ALIGN(host_len);
a5e7ee46 528
a03e2d42
FB
529 /* Note: we prefer to control the mapping address. It is
530 especially important if qemu_host_page_size >
531 qemu_real_host_page_size */
3e8f1628 532 p = mmap(g2h_untagged(start), host_len, host_prot,
a5e7ee46 533 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
9dba3ca5 534 if (p == MAP_FAILED) {
c8a706fe 535 goto fail;
9dba3ca5 536 }
a03e2d42
FB
537 /* update start so that it points to the file position at 'offset' */
538 host_start = (unsigned long)p;
54c5a2ae 539 if (!(flags & MAP_ANONYMOUS)) {
3e8f1628 540 p = mmap(g2h_untagged(start), len, host_prot,
54c5a2ae 541 flags | MAP_FIXED, fd, host_offset);
8384274e 542 if (p == MAP_FAILED) {
3e8f1628 543 munmap(g2h_untagged(start), host_len);
8384274e
JB
544 goto fail;
545 }
a03e2d42 546 host_start += offset - host_offset;
54c5a2ae 547 }
a03e2d42 548 start = h2g(host_start);
f93b7695
IL
549 passthrough_start = start;
550 passthrough_end = start + len;
a03e2d42
FB
551 } else {
552 if (start & ~TARGET_PAGE_MASK) {
e89f07d3 553 errno = EINVAL;
c8a706fe 554 goto fail;
e89f07d3 555 }
a03e2d42
FB
556 end = start + len;
557 real_end = HOST_PAGE_ALIGN(end);
7ab240ad 558
7d37435b
PB
559 /*
560 * Test if requested memory area fits target address space
561 * It can fail only on 64-bit host with 32-bit target.
562 * On any other target/host host mmap() handles this error correctly.
563 */
46b12f46 564 if (end < start || !guest_range_valid_untagged(start, len)) {
ebf9a363 565 errno = ENOMEM;
45bc1f52
AJ
566 goto fail;
567 }
568
a03e2d42
FB
569 /* worst case: we cannot map the file because the offset is not
570 aligned, so we read it */
571 if (!(flags & MAP_ANONYMOUS) &&
572 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
573 /* msync() won't work here, so we return an error if write is
574 possible while it is a shared mapping */
575 if ((flags & MAP_TYPE) == MAP_SHARED &&
9dba3ca5 576 (host_prot & PROT_WRITE)) {
a03e2d42 577 errno = EINVAL;
c8a706fe 578 goto fail;
a03e2d42 579 }
9dba3ca5 580 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
a03e2d42
FB
581 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
582 -1, 0);
583 if (retaddr == -1)
c8a706fe 584 goto fail;
3e8f1628 585 if (pread(fd, g2h_untagged(start), len, offset) == -1)
fb7e378c 586 goto fail;
9dba3ca5
RH
587 if (!(host_prot & PROT_WRITE)) {
588 ret = target_mprotect(start, len, target_prot);
86abac06 589 assert(ret == 0);
a03e2d42
FB
590 }
591 goto the_end;
54936004 592 }
a03e2d42
FB
593
594 /* handle the start of the mapping */
595 if (start > real_start) {
596 if (real_end == real_start + qemu_host_page_size) {
597 /* one single host page */
598 ret = mmap_frag(real_start, start, end,
9dba3ca5 599 host_prot, flags, fd, offset);
a03e2d42 600 if (ret == -1)
c8a706fe 601 goto fail;
a03e2d42
FB
602 goto the_end1;
603 }
604 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
9dba3ca5 605 host_prot, flags, fd, offset);
54936004 606 if (ret == -1)
c8a706fe 607 goto fail;
a03e2d42
FB
608 real_start += qemu_host_page_size;
609 }
610 /* handle the end of the mapping */
611 if (end < real_end) {
612 ret = mmap_frag(real_end - qemu_host_page_size,
530c0032 613 real_end - qemu_host_page_size, end,
9dba3ca5 614 host_prot, flags, fd,
a03e2d42
FB
615 offset + real_end - qemu_host_page_size - start);
616 if (ret == -1)
c8a706fe 617 goto fail;
a03e2d42 618 real_end -= qemu_host_page_size;
54936004 619 }
3b46e624 620
a03e2d42
FB
621 /* map the middle (easier) */
622 if (real_start < real_end) {
623 void *p;
624 unsigned long offset1;
625 if (flags & MAP_ANONYMOUS)
626 offset1 = 0;
627 else
628 offset1 = offset + real_start - start;
3e8f1628 629 p = mmap(g2h_untagged(real_start), real_end - real_start,
9dba3ca5 630 host_prot, flags, fd, offset1);
a03e2d42 631 if (p == MAP_FAILED)
c8a706fe 632 goto fail;
f93b7695
IL
633 passthrough_start = real_start;
634 passthrough_end = real_end;
a03e2d42 635 }
54936004
FB
636 }
637 the_end1:
26bab757
RH
638 if (flags & MAP_ANONYMOUS) {
639 page_flags |= PAGE_ANON;
640 }
d9c58585 641 page_flags |= PAGE_RESET;
f93b7695
IL
642 if (passthrough_start == passthrough_end) {
643 page_set_flags(start, start + len, page_flags);
644 } else {
645 if (start < passthrough_start) {
646 page_set_flags(start, passthrough_start, page_flags);
647 }
648 page_set_flags(passthrough_start, passthrough_end,
649 page_flags | PAGE_PASSTHROUGH);
650 if (passthrough_end < start + len) {
651 page_set_flags(passthrough_end, start + len, page_flags);
652 }
653 }
54936004 654 the_end:
d0e165ae 655 trace_target_mmap_complete(start);
10d0d505 656 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
93756fdc
RH
657 FILE *f = qemu_log_trylock();
658 if (f) {
659 fprintf(f, "page layout changed following mmap\n");
660 page_dump(f);
661 qemu_log_unlock(f);
662 }
10d0d505 663 }
c8a706fe 664 mmap_unlock();
54936004 665 return start;
c8a706fe
PB
666fail:
667 mmap_unlock();
668 return -1;
54936004
FB
669}
670
68a1c816
PB
671static void mmap_reserve(abi_ulong start, abi_ulong size)
672{
673 abi_ulong real_start;
674 abi_ulong real_end;
675 abi_ulong addr;
676 abi_ulong end;
677 int prot;
678
679 real_start = start & qemu_host_page_mask;
680 real_end = HOST_PAGE_ALIGN(start + size);
681 end = start + size;
682 if (start > real_start) {
683 /* handle host page containing start */
684 prot = 0;
685 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
686 prot |= page_get_flags(addr);
687 }
688 if (real_end == real_start + qemu_host_page_size) {
689 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
690 prot |= page_get_flags(addr);
691 }
692 end = real_end;
693 }
694 if (prot != 0)
695 real_start += qemu_host_page_size;
696 }
697 if (end < real_end) {
698 prot = 0;
699 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
700 prot |= page_get_flags(addr);
701 }
702 if (prot != 0)
703 real_end -= qemu_host_page_size;
704 }
705 if (real_start != real_end) {
3e8f1628 706 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
68a1c816
PB
707 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
708 -1, 0);
709 }
710}
711
992f48a0 712int target_munmap(abi_ulong start, abi_ulong len)
54936004 713{
992f48a0 714 abi_ulong end, real_start, real_end, addr;
54936004
FB
715 int prot, ret;
716
b7b18d26
AB
717 trace_target_munmap(start, len);
718
54936004 719 if (start & ~TARGET_PAGE_MASK)
78cf3390 720 return -TARGET_EINVAL;
54936004 721 len = TARGET_PAGE_ALIGN(len);
46b12f46 722 if (len == 0 || !guest_range_valid_untagged(start, len)) {
78cf3390 723 return -TARGET_EINVAL;
ebf9a363
MF
724 }
725
c8a706fe 726 mmap_lock();
54936004 727 end = start + len;
53a5960a
PB
728 real_start = start & qemu_host_page_mask;
729 real_end = HOST_PAGE_ALIGN(end);
54936004 730
53a5960a 731 if (start > real_start) {
54936004
FB
732 /* handle host page containing start */
733 prot = 0;
53a5960a 734 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
54936004
FB
735 prot |= page_get_flags(addr);
736 }
53a5960a
PB
737 if (real_end == real_start + qemu_host_page_size) {
738 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
d418c81e
FB
739 prot |= page_get_flags(addr);
740 }
53a5960a 741 end = real_end;
d418c81e 742 }
54936004 743 if (prot != 0)
53a5960a 744 real_start += qemu_host_page_size;
54936004 745 }
53a5960a 746 if (end < real_end) {
54936004 747 prot = 0;
53a5960a 748 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
54936004
FB
749 prot |= page_get_flags(addr);
750 }
751 if (prot != 0)
53a5960a 752 real_end -= qemu_host_page_size;
54936004 753 }
3b46e624 754
c8a706fe 755 ret = 0;
54936004 756 /* unmap what we can */
53a5960a 757 if (real_start < real_end) {
b76f21a7 758 if (reserved_va) {
68a1c816
PB
759 mmap_reserve(real_start, real_end - real_start);
760 } else {
3e8f1628 761 ret = munmap(g2h_untagged(real_start), real_end - real_start);
68a1c816 762 }
54936004
FB
763 }
764
77a8f1a5 765 if (ret == 0) {
c8a706fe 766 page_set_flags(start, start + len, 0);
77a8f1a5 767 }
c8a706fe
PB
768 mmap_unlock();
769 return ret;
54936004
FB
770}
771
992f48a0
BS
772abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
773 abi_ulong new_size, unsigned long flags,
774 abi_ulong new_addr)
54936004
FB
775{
776 int prot;
f19412a2 777 void *host_addr;
54936004 778
46b12f46 779 if (!guest_range_valid_untagged(old_addr, old_size) ||
ebf9a363 780 ((flags & MREMAP_FIXED) &&
46b12f46 781 !guest_range_valid_untagged(new_addr, new_size)) ||
ccc5ccc1 782 ((flags & MREMAP_MAYMOVE) == 0 &&
46b12f46 783 !guest_range_valid_untagged(old_addr, new_size))) {
ebf9a363
MF
784 errno = ENOMEM;
785 return -1;
786 }
787
c8a706fe 788 mmap_lock();
f19412a2 789
68a1c816 790 if (flags & MREMAP_FIXED) {
3e8f1628
RH
791 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
792 flags, g2h_untagged(new_addr));
68a1c816 793
b76f21a7 794 if (reserved_va && host_addr != MAP_FAILED) {
68a1c816
PB
795 /* If new and old addresses overlap then the above mremap will
796 already have failed with EINVAL. */
797 mmap_reserve(old_addr, old_size);
798 }
799 } else if (flags & MREMAP_MAYMOVE) {
f19412a2
AJ
800 abi_ulong mmap_start;
801
30ab9ef2 802 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
f19412a2
AJ
803
804 if (mmap_start == -1) {
805 errno = ENOMEM;
806 host_addr = MAP_FAILED;
68a1c816 807 } else {
3e8f1628
RH
808 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
809 flags | MREMAP_FIXED,
810 g2h_untagged(mmap_start));
b76f21a7 811 if (reserved_va) {
c65ffe6d 812 mmap_reserve(old_addr, old_size);
813 }
68a1c816 814 }
3af72a4d 815 } else {
68a1c816 816 int prot = 0;
b76f21a7 817 if (reserved_va && old_size < new_size) {
68a1c816
PB
818 abi_ulong addr;
819 for (addr = old_addr + old_size;
820 addr < old_addr + new_size;
821 addr++) {
822 prot |= page_get_flags(addr);
823 }
824 }
825 if (prot == 0) {
3e8f1628
RH
826 host_addr = mremap(g2h_untagged(old_addr),
827 old_size, new_size, flags);
56d19084
TK
828
829 if (host_addr != MAP_FAILED) {
830 /* Check if address fits target address space */
46b12f46 831 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
56d19084 832 /* Revert mremap() changes */
3e8f1628
RH
833 host_addr = mremap(g2h_untagged(old_addr),
834 new_size, old_size, flags);
56d19084
TK
835 errno = ENOMEM;
836 host_addr = MAP_FAILED;
837 } else if (reserved_va && old_size > new_size) {
838 mmap_reserve(old_addr + old_size, old_size - new_size);
839 }
68a1c816
PB
840 }
841 } else {
842 errno = ENOMEM;
843 host_addr = MAP_FAILED;
844 }
f19412a2
AJ
845 }
846
847 if (host_addr == MAP_FAILED) {
c8a706fe
PB
848 new_addr = -1;
849 } else {
850 new_addr = h2g(host_addr);
851 prot = page_get_flags(old_addr);
852 page_set_flags(old_addr, old_addr + old_size, 0);
d9c58585
RH
853 page_set_flags(new_addr, new_addr + new_size,
854 prot | PAGE_VALID | PAGE_RESET);
c8a706fe
PB
855 }
856 mmap_unlock();
54936004
FB
857 return new_addr;
858}
892a4f6a 859
4530deb1 860static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
892a4f6a
IL
861{
862 ulong addr;
863
864 if ((start | end) & ~qemu_host_page_mask) {
865 return false;
866 }
867
868 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
f93b7695 869 if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
892a4f6a
IL
870 return false;
871 }
872 }
873
874 return true;
875}
876
877abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
878{
879 abi_ulong len, end;
880 int ret = 0;
881
882 if (start & ~TARGET_PAGE_MASK) {
883 return -TARGET_EINVAL;
884 }
885 len = TARGET_PAGE_ALIGN(len_in);
886
887 if (len_in && !len) {
888 return -TARGET_EINVAL;
889 }
890
891 end = start + len;
892 if (end < start) {
893 return -TARGET_EINVAL;
894 }
895
896 if (end == start) {
897 return 0;
898 }
899
900 if (!guest_range_valid_untagged(start, len)) {
901 return -TARGET_EINVAL;
902 }
903
4530deb1
HD
904 /* Translate for some architectures which have different MADV_xxx values */
905 switch (advice) {
906 case TARGET_MADV_DONTNEED: /* alpha */
907 advice = MADV_DONTNEED;
908 break;
909 case TARGET_MADV_WIPEONFORK: /* parisc */
910 advice = MADV_WIPEONFORK;
911 break;
912 case TARGET_MADV_KEEPONFORK: /* parisc */
913 advice = MADV_KEEPONFORK;
914 break;
915 /* we do not care about the other MADV_xxx values yet */
916 }
917
892a4f6a 918 /*
4530deb1
HD
919 * Most advice values are hints, so ignoring and returning success is ok.
920 *
921 * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
922 * MADV_KEEPONFORK are not hints and need to be emulated.
892a4f6a 923 *
4530deb1
HD
924 * A straight passthrough for those may not be safe because qemu sometimes
925 * turns private file-backed mappings into anonymous mappings.
926 * can_passthrough_madvise() helps to check if a passthrough is possible by
927 * comparing mappings that are known to have the same semantics in the host
928 * and the guest. In this case passthrough is safe.
892a4f6a 929 *
4530deb1
HD
930 * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
931 * return failure if not.
932 *
933 * MADV_DONTNEED is passed through as well, if possible.
934 * If passthrough isn't possible, we nevertheless (wrongly!) return
935 * success, which is broken but some userspace programs fail to work
936 * otherwise. Completely implementing such emulation is quite complicated
937 * though.
892a4f6a
IL
938 */
939 mmap_lock();
4530deb1
HD
940 switch (advice) {
941 case MADV_WIPEONFORK:
942 case MADV_KEEPONFORK:
943 ret = -EINVAL;
944 /* fall through */
945 case MADV_DONTNEED:
946 if (can_passthrough_madvise(start, end)) {
947 ret = get_errno(madvise(g2h_untagged(start), len, advice));
948 if ((advice == MADV_DONTNEED) && (ret == 0)) {
949 page_reset_target_data(start, start + len);
950 }
dbbf8975 951 }
892a4f6a
IL
952 }
953 mmap_unlock();
954
955 return ret;
956}