]> git.proxmox.com Git - mirror_qemu.git/blame - linux-user/mmap.c
linux-user: Implement stracing madvise()
[mirror_qemu.git] / linux-user / mmap.c
CommitLineData
54936004
FB
1/*
2 * mmap support for qemu
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
8167ee88 17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
d39594e9 19#include "qemu/osdep.h"
11d96056 20#include "trace.h"
10d0d505 21#include "exec/log.h"
54936004 22#include "qemu.h"
3b249d26 23#include "user-internals.h"
5423e6d3 24#include "user-mmap.h"
8655b4c7 25#include "target_mman.h"
54936004 26
1e6eec8b 27static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
dfd3f85c 28static __thread int mmap_lock_count;
c8a706fe
PB
29
30void mmap_lock(void)
31{
32 if (mmap_lock_count++ == 0) {
33 pthread_mutex_lock(&mmap_mutex);
34 }
35}
36
37void mmap_unlock(void)
38{
39 if (--mmap_lock_count == 0) {
40 pthread_mutex_unlock(&mmap_mutex);
41 }
42}
d5975363 43
301e40ed
AB
44bool have_mmap_lock(void)
45{
46 return mmap_lock_count > 0 ? true : false;
47}
48
d5975363
PB
49/* Grab lock to make sure things are in a consistent state after fork(). */
50void mmap_fork_start(void)
51{
52 if (mmap_lock_count)
53 abort();
54 pthread_mutex_lock(&mmap_mutex);
55}
56
57void mmap_fork_end(int child)
58{
59 if (child)
60 pthread_mutex_init(&mmap_mutex, NULL);
61 else
62 pthread_mutex_unlock(&mmap_mutex);
63}
c8a706fe 64
9dba3ca5
RH
65/*
66 * Validate target prot bitmask.
67 * Return the prot bitmask for the host in *HOST_PROT.
68 * Return 0 if the target prot bitmask is invalid, otherwise
69 * the internal qemu page_flags (which will include PAGE_VALID).
70 */
71static int validate_prot_to_pageflags(int *host_prot, int prot)
72{
73 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
74 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
75
76 /*
77 * For the host, we need not pass anything except read/write/exec.
78 * While PROT_SEM is allowed by all hosts, it is also ignored, so
79 * don't bother transforming guest bit to host bit. Any other
80 * target-specific prot bits will not be understood by the host
81 * and will need to be encoded into page_flags for qemu emulation.
4eaa960d
RH
82 *
83 * Pages that are executable by the guest will never be executed
84 * by the host, but the host will need to be able to read them.
9dba3ca5 85 */
4eaa960d
RH
86 *host_prot = (prot & (PROT_READ | PROT_WRITE))
87 | (prot & PROT_EXEC ? PROT_READ : 0);
9dba3ca5 88
be5d6f48 89#ifdef TARGET_AARCH64
d109b46d 90 {
be5d6f48 91 ARMCPU *cpu = ARM_CPU(thread_cpu);
d109b46d
RH
92
93 /*
94 * The PROT_BTI bit is only accepted if the cpu supports the feature.
95 * Since this is the unusual case, don't bother checking unless
96 * the bit has been requested. If set and valid, record the bit
97 * within QEMU's page_flags.
98 */
99 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
be5d6f48
RH
100 valid |= TARGET_PROT_BTI;
101 page_flags |= PAGE_BTI;
102 }
d109b46d
RH
103 /* Similarly for the PROT_MTE bit. */
104 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
105 valid |= TARGET_PROT_MTE;
106 page_flags |= PAGE_MTE;
107 }
be5d6f48
RH
108 }
109#endif
110
9dba3ca5
RH
111 return prot & ~valid ? 0 : page_flags;
112}
113
53a5960a 114/* NOTE: all the constants are the HOST ones, but addresses are target. */
9dba3ca5 115int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
54936004 116{
992f48a0 117 abi_ulong end, host_start, host_end, addr;
9dba3ca5 118 int prot1, ret, page_flags, host_prot;
54936004 119
9dba3ca5 120 trace_target_mprotect(start, len, target_prot);
54936004 121
9dba3ca5 122 if ((start & ~TARGET_PAGE_MASK) != 0) {
78cf3390 123 return -TARGET_EINVAL;
9dba3ca5
RH
124 }
125 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
126 if (!page_flags) {
127 return -TARGET_EINVAL;
128 }
54936004
FB
129 len = TARGET_PAGE_ALIGN(len);
130 end = start + len;
46b12f46 131 if (!guest_range_valid_untagged(start, len)) {
78cf3390 132 return -TARGET_ENOMEM;
ebf9a363 133 }
9dba3ca5 134 if (len == 0) {
54936004 135 return 0;
9dba3ca5 136 }
3b46e624 137
c8a706fe 138 mmap_lock();
83fb7adf 139 host_start = start & qemu_host_page_mask;
54936004
FB
140 host_end = HOST_PAGE_ALIGN(end);
141 if (start > host_start) {
142 /* handle host page containing start */
9dba3ca5
RH
143 prot1 = host_prot;
144 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
54936004
FB
145 prot1 |= page_get_flags(addr);
146 }
83fb7adf 147 if (host_end == host_start + qemu_host_page_size) {
9dba3ca5 148 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
d418c81e
FB
149 prot1 |= page_get_flags(addr);
150 }
151 end = host_end;
152 }
3e8f1628 153 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
9dba3ca5
RH
154 prot1 & PAGE_BITS);
155 if (ret != 0) {
c8a706fe 156 goto error;
9dba3ca5 157 }
83fb7adf 158 host_start += qemu_host_page_size;
54936004
FB
159 }
160 if (end < host_end) {
9dba3ca5
RH
161 prot1 = host_prot;
162 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
54936004
FB
163 prot1 |= page_get_flags(addr);
164 }
3e8f1628 165 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
9dba3ca5
RH
166 qemu_host_page_size, prot1 & PAGE_BITS);
167 if (ret != 0) {
c8a706fe 168 goto error;
9dba3ca5 169 }
83fb7adf 170 host_end -= qemu_host_page_size;
54936004 171 }
3b46e624 172
54936004
FB
173 /* handle the pages in the middle */
174 if (host_start < host_end) {
3e8f1628
RH
175 ret = mprotect(g2h_untagged(host_start),
176 host_end - host_start, host_prot);
9dba3ca5 177 if (ret != 0) {
c8a706fe 178 goto error;
9dba3ca5 179 }
54936004 180 }
aa98e2d8 181
9dba3ca5 182 page_set_flags(start, start + len, page_flags);
aa98e2d8
IL
183 tb_invalidate_phys_range(start, start + len);
184 ret = 0;
185
c8a706fe
PB
186error:
187 mmap_unlock();
188 return ret;
54936004
FB
189}
190
191/* map an incomplete host page */
992f48a0
BS
192static int mmap_frag(abi_ulong real_start,
193 abi_ulong start, abi_ulong end,
194 int prot, int flags, int fd, abi_ulong offset)
54936004 195{
80210bcd 196 abi_ulong real_end, addr;
53a5960a 197 void *host_start;
54936004
FB
198 int prot1, prot_new;
199
53a5960a 200 real_end = real_start + qemu_host_page_size;
3e8f1628 201 host_start = g2h_untagged(real_start);
54936004
FB
202
203 /* get the protection of the target pages outside the mapping */
204 prot1 = 0;
53a5960a 205 for(addr = real_start; addr < real_end; addr++) {
54936004
FB
206 if (addr < start || addr >= end)
207 prot1 |= page_get_flags(addr);
208 }
3b46e624 209
54936004
FB
210 if (prot1 == 0) {
211 /* no page was there, so we allocate one */
80210bcd
TS
212 void *p = mmap(host_start, qemu_host_page_size, prot,
213 flags | MAP_ANONYMOUS, -1, 0);
214 if (p == MAP_FAILED)
215 return -1;
53a5960a 216 prot1 = prot;
54936004
FB
217 }
218 prot1 &= PAGE_BITS;
219
220 prot_new = prot | prot1;
221 if (!(flags & MAP_ANONYMOUS)) {
222 /* msync() won't work here, so we return an error if write is
223 possible while it is a shared mapping */
224 if ((flags & MAP_TYPE) == MAP_SHARED &&
225 (prot & PROT_WRITE))
ee636500 226 return -1;
54936004
FB
227
228 /* adjust protection to be able to read */
229 if (!(prot1 & PROT_WRITE))
53a5960a 230 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
3b46e624 231
54936004 232 /* read the corresponding file data */
3e8f1628 233 if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
fb7e378c 234 return -1;
3b46e624 235
54936004
FB
236 /* put final protection */
237 if (prot_new != (prot1 | PROT_WRITE))
53a5960a 238 mprotect(host_start, qemu_host_page_size, prot_new);
54936004 239 } else {
54936004 240 if (prot_new != prot1) {
53a5960a 241 mprotect(host_start, qemu_host_page_size, prot_new);
54936004 242 }
e6deac9c 243 if (prot_new & PROT_WRITE) {
3e8f1628 244 memset(g2h_untagged(start), 0, end - start);
e6deac9c 245 }
54936004
FB
246 }
247 return 0;
248}
249
14f24e14 250#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
aab613fb
LY
251#ifdef TARGET_AARCH64
252# define TASK_UNMAPPED_BASE 0x5500000000
253#else
14f24e14 254# define TASK_UNMAPPED_BASE (1ul << 38)
aab613fb 255#endif
a03e2d42 256#else
9c9b5d7b
HD
257#ifdef TARGET_HPPA
258# define TASK_UNMAPPED_BASE 0xfa000000
259#else
14f24e14 260# define TASK_UNMAPPED_BASE 0x40000000
a03e2d42 261#endif
9c9b5d7b 262#endif
59e9d91c 263abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
a03e2d42 264
0776590d
PB
265unsigned long last_brk;
266
68a1c816
PB
267/* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
268 of guest address space. */
30ab9ef2
RH
269static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
270 abi_ulong align)
68a1c816 271{
30ab9ef2 272 abi_ulong addr, end_addr, incr = qemu_host_page_size;
68a1c816 273 int prot;
30ab9ef2 274 bool looped = false;
68a1c816 275
b76f21a7 276 if (size > reserved_va) {
68a1c816
PB
277 return (abi_ulong)-1;
278 }
279
30ab9ef2
RH
280 /* Note that start and size have already been aligned by mmap_find_vma. */
281
59e9d91c 282 end_addr = start + size;
30ab9ef2
RH
283 if (start > reserved_va - size) {
284 /* Start at the top of the address space. */
285 end_addr = ((reserved_va - size) & -align) + size;
286 looped = true;
59e9d91c 287 }
59e9d91c 288
30ab9ef2
RH
289 /* Search downward from END_ADDR, checking to see if a page is in use. */
290 addr = end_addr;
59e9d91c 291 while (1) {
30ab9ef2 292 addr -= incr;
59e9d91c 293 if (addr > end_addr) {
68a1c816 294 if (looped) {
30ab9ef2 295 /* Failure. The entire address space has been searched. */
68a1c816
PB
296 return (abi_ulong)-1;
297 }
30ab9ef2
RH
298 /* Re-start at the top of the address space. */
299 addr = end_addr = ((reserved_va - size) & -align) + size;
300 looped = true;
301 } else {
302 prot = page_get_flags(addr);
303 if (prot) {
304 /* Page in use. Restart below this page. */
305 addr = end_addr = ((addr - size) & -align) + size;
306 } else if (addr && addr + size == end_addr) {
307 /* Success! All pages between ADDR and END_ADDR are free. */
308 if (start == mmap_next_start) {
309 mmap_next_start = addr;
310 }
311 return addr;
312 }
68a1c816
PB
313 }
314 }
68a1c816
PB
315}
316
fe3b4152
KS
317/*
318 * Find and reserve a free memory area of size 'size'. The search
319 * starts at 'start'.
320 * It must be called with mmap_lock() held.
321 * Return -1 if error.
322 */
30ab9ef2 323abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
a03e2d42 324{
14f24e14 325 void *ptr, *prev;
fe3b4152 326 abi_ulong addr;
14f24e14 327 int wrapped, repeat;
fe3b4152 328
443b7505
RH
329 align = MAX(align, qemu_host_page_size);
330
fe3b4152 331 /* If 'start' == 0, then a default start address is used. */
14f24e14 332 if (start == 0) {
fe3b4152 333 start = mmap_next_start;
14f24e14
RH
334 } else {
335 start &= qemu_host_page_mask;
336 }
30ab9ef2 337 start = ROUND_UP(start, align);
14f24e14
RH
338
339 size = HOST_PAGE_ALIGN(size);
fe3b4152 340
b76f21a7 341 if (reserved_va) {
30ab9ef2 342 return mmap_find_vma_reserved(start, size, align);
68a1c816
PB
343 }
344
a03e2d42 345 addr = start;
14f24e14
RH
346 wrapped = repeat = 0;
347 prev = 0;
fe3b4152 348
14f24e14 349 for (;; prev = ptr) {
fe3b4152
KS
350 /*
351 * Reserve needed memory area to avoid a race.
352 * It should be discarded using:
353 * - mmap() with MAP_FIXED flag
354 * - mremap() with MREMAP_FIXED flag
355 * - shmat() with SHM_REMAP flag
356 */
3e8f1628 357 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
fe3b4152
KS
358 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
359
360 /* ENOMEM, if host address space has no memory */
14f24e14 361 if (ptr == MAP_FAILED) {
fe3b4152 362 return (abi_ulong)-1;
14f24e14
RH
363 }
364
365 /* Count the number of sequential returns of the same address.
366 This is used to modify the search algorithm below. */
367 repeat = (ptr == prev ? repeat + 1 : 0);
368
369 if (h2g_valid(ptr + size - 1)) {
370 addr = h2g(ptr);
fe3b4152 371
30ab9ef2 372 if ((addr & (align - 1)) == 0) {
14f24e14
RH
373 /* Success. */
374 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
375 mmap_next_start = addr + size;
376 }
377 return addr;
378 }
fe3b4152 379
14f24e14
RH
380 /* The address is not properly aligned for the target. */
381 switch (repeat) {
382 case 0:
383 /* Assume the result that the kernel gave us is the
384 first with enough free space, so start again at the
385 next higher target page. */
30ab9ef2 386 addr = ROUND_UP(addr, align);
14f24e14
RH
387 break;
388 case 1:
389 /* Sometimes the kernel decides to perform the allocation
390 at the top end of memory instead. */
30ab9ef2 391 addr &= -align;
14f24e14
RH
392 break;
393 case 2:
394 /* Start over at low memory. */
395 addr = 0;
396 break;
397 default:
398 /* Fail. This unaligned block must the last. */
399 addr = -1;
400 break;
401 }
402 } else {
403 /* Since the result the kernel gave didn't fit, start
404 again at low memory. If any repetition, fail. */
405 addr = (repeat ? -1 : 0);
406 }
407
408 /* Unmap and try again. */
fe3b4152 409 munmap(ptr, size);
fe3b4152 410
14f24e14 411 /* ENOMEM if we checked the whole of the target address space. */
d0b3e4f5 412 if (addr == (abi_ulong)-1) {
a03e2d42 413 return (abi_ulong)-1;
14f24e14
RH
414 } else if (addr == 0) {
415 if (wrapped) {
416 return (abi_ulong)-1;
417 }
418 wrapped = 1;
419 /* Don't actually use 0 when wrapping, instead indicate
8186e783 420 that we'd truly like an allocation in low memory. */
14f24e14
RH
421 addr = (mmap_min_addr > TARGET_PAGE_SIZE
422 ? TARGET_PAGE_ALIGN(mmap_min_addr)
423 : TARGET_PAGE_SIZE);
424 } else if (wrapped && addr >= start) {
425 return (abi_ulong)-1;
426 }
a03e2d42 427 }
a03e2d42
FB
428}
429
54936004 430/* NOTE: all the constants are the HOST ones */
9dba3ca5 431abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
992f48a0 432 int flags, int fd, abi_ulong offset)
54936004 433{
992f48a0 434 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
9dba3ca5 435 int page_flags, host_prot;
54936004 436
c8a706fe 437 mmap_lock();
9dba3ca5 438 trace_target_mmap(start, len, target_prot, flags, fd, offset);
54936004 439
38138fab 440 if (!len) {
e89f07d3 441 errno = EINVAL;
c8a706fe 442 goto fail;
e89f07d3 443 }
54936004 444
9dba3ca5
RH
445 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
446 if (!page_flags) {
447 errno = EINVAL;
448 goto fail;
449 }
450
38138fab 451 /* Also check for overflows... */
54936004 452 len = TARGET_PAGE_ALIGN(len);
38138fab
AB
453 if (!len) {
454 errno = ENOMEM;
455 goto fail;
456 }
457
458 if (offset & ~TARGET_PAGE_MASK) {
459 errno = EINVAL;
460 goto fail;
461 }
462
228168cb
RH
463 /*
464 * If we're mapping shared memory, ensure we generate code for parallel
465 * execution and flush old translations. This will work up to the level
466 * supported by the host -- anything that requires EXCP_ATOMIC will not
467 * be atomic with respect to an external process.
468 */
469 if (flags & MAP_SHARED) {
470 CPUState *cpu = thread_cpu;
471 if (!(cpu->tcg_cflags & CF_PARALLEL)) {
472 cpu->tcg_cflags |= CF_PARALLEL;
473 tb_flush(cpu);
474 }
475 }
476
53a5960a 477 real_start = start & qemu_host_page_mask;
a5e7ee46
RH
478 host_offset = offset & qemu_host_page_mask;
479
480 /* If the user is asking for the kernel to find a location, do that
481 before we truncate the length for mapping files below. */
482 if (!(flags & MAP_FIXED)) {
483 host_len = len + offset - host_offset;
484 host_len = HOST_PAGE_ALIGN(host_len);
30ab9ef2 485 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
a5e7ee46
RH
486 if (start == (abi_ulong)-1) {
487 errno = ENOMEM;
488 goto fail;
489 }
490 }
54936004 491
54c5a2ae
EI
492 /* When mapping files into a memory area larger than the file, accesses
493 to pages beyond the file size will cause a SIGBUS.
494
495 For example, if mmaping a file of 100 bytes on a host with 4K pages
496 emulating a target with 8K pages, the target expects to be able to
497 access the first 8K. But the host will trap us on any access beyond
498 4K.
499
500 When emulating a target with a larger page-size than the hosts, we
501 may need to truncate file maps at EOF and add extra anonymous pages
502 up to the targets page boundary. */
503
8e3b0cbb 504 if ((qemu_real_host_page_size() < qemu_host_page_size) &&
35f2fd04
MAL
505 !(flags & MAP_ANONYMOUS)) {
506 struct stat sb;
54c5a2ae
EI
507
508 if (fstat (fd, &sb) == -1)
509 goto fail;
510
511 /* Are we trying to create a map beyond EOF?. */
512 if (offset + len > sb.st_size) {
513 /* If so, truncate the file map at eof aligned with
514 the hosts real pagesize. Additional anonymous maps
515 will be created beyond EOF. */
0c2d70c4 516 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
54c5a2ae
EI
517 }
518 }
519
54936004 520 if (!(flags & MAP_FIXED)) {
a5e7ee46 521 unsigned long host_start;
a03e2d42 522 void *p;
a5e7ee46 523
a03e2d42
FB
524 host_len = len + offset - host_offset;
525 host_len = HOST_PAGE_ALIGN(host_len);
a5e7ee46 526
a03e2d42
FB
527 /* Note: we prefer to control the mapping address. It is
528 especially important if qemu_host_page_size >
529 qemu_real_host_page_size */
3e8f1628 530 p = mmap(g2h_untagged(start), host_len, host_prot,
a5e7ee46 531 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
9dba3ca5 532 if (p == MAP_FAILED) {
c8a706fe 533 goto fail;
9dba3ca5 534 }
a03e2d42
FB
535 /* update start so that it points to the file position at 'offset' */
536 host_start = (unsigned long)p;
54c5a2ae 537 if (!(flags & MAP_ANONYMOUS)) {
3e8f1628 538 p = mmap(g2h_untagged(start), len, host_prot,
54c5a2ae 539 flags | MAP_FIXED, fd, host_offset);
8384274e 540 if (p == MAP_FAILED) {
3e8f1628 541 munmap(g2h_untagged(start), host_len);
8384274e
JB
542 goto fail;
543 }
a03e2d42 544 host_start += offset - host_offset;
54c5a2ae 545 }
a03e2d42
FB
546 start = h2g(host_start);
547 } else {
548 if (start & ~TARGET_PAGE_MASK) {
e89f07d3 549 errno = EINVAL;
c8a706fe 550 goto fail;
e89f07d3 551 }
a03e2d42
FB
552 end = start + len;
553 real_end = HOST_PAGE_ALIGN(end);
7ab240ad 554
7d37435b
PB
555 /*
556 * Test if requested memory area fits target address space
557 * It can fail only on 64-bit host with 32-bit target.
558 * On any other target/host host mmap() handles this error correctly.
559 */
46b12f46 560 if (end < start || !guest_range_valid_untagged(start, len)) {
ebf9a363 561 errno = ENOMEM;
45bc1f52
AJ
562 goto fail;
563 }
564
a03e2d42
FB
565 /* worst case: we cannot map the file because the offset is not
566 aligned, so we read it */
567 if (!(flags & MAP_ANONYMOUS) &&
568 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
569 /* msync() won't work here, so we return an error if write is
570 possible while it is a shared mapping */
571 if ((flags & MAP_TYPE) == MAP_SHARED &&
9dba3ca5 572 (host_prot & PROT_WRITE)) {
a03e2d42 573 errno = EINVAL;
c8a706fe 574 goto fail;
a03e2d42 575 }
9dba3ca5 576 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
a03e2d42
FB
577 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
578 -1, 0);
579 if (retaddr == -1)
c8a706fe 580 goto fail;
3e8f1628 581 if (pread(fd, g2h_untagged(start), len, offset) == -1)
fb7e378c 582 goto fail;
9dba3ca5
RH
583 if (!(host_prot & PROT_WRITE)) {
584 ret = target_mprotect(start, len, target_prot);
86abac06 585 assert(ret == 0);
a03e2d42
FB
586 }
587 goto the_end;
54936004 588 }
a03e2d42
FB
589
590 /* handle the start of the mapping */
591 if (start > real_start) {
592 if (real_end == real_start + qemu_host_page_size) {
593 /* one single host page */
594 ret = mmap_frag(real_start, start, end,
9dba3ca5 595 host_prot, flags, fd, offset);
a03e2d42 596 if (ret == -1)
c8a706fe 597 goto fail;
a03e2d42
FB
598 goto the_end1;
599 }
600 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
9dba3ca5 601 host_prot, flags, fd, offset);
54936004 602 if (ret == -1)
c8a706fe 603 goto fail;
a03e2d42
FB
604 real_start += qemu_host_page_size;
605 }
606 /* handle the end of the mapping */
607 if (end < real_end) {
608 ret = mmap_frag(real_end - qemu_host_page_size,
530c0032 609 real_end - qemu_host_page_size, end,
9dba3ca5 610 host_prot, flags, fd,
a03e2d42
FB
611 offset + real_end - qemu_host_page_size - start);
612 if (ret == -1)
c8a706fe 613 goto fail;
a03e2d42 614 real_end -= qemu_host_page_size;
54936004 615 }
3b46e624 616
a03e2d42
FB
617 /* map the middle (easier) */
618 if (real_start < real_end) {
619 void *p;
620 unsigned long offset1;
621 if (flags & MAP_ANONYMOUS)
622 offset1 = 0;
623 else
624 offset1 = offset + real_start - start;
3e8f1628 625 p = mmap(g2h_untagged(real_start), real_end - real_start,
9dba3ca5 626 host_prot, flags, fd, offset1);
a03e2d42 627 if (p == MAP_FAILED)
c8a706fe 628 goto fail;
a03e2d42 629 }
54936004
FB
630 }
631 the_end1:
26bab757
RH
632 if (flags & MAP_ANONYMOUS) {
633 page_flags |= PAGE_ANON;
634 }
d9c58585 635 page_flags |= PAGE_RESET;
9dba3ca5 636 page_set_flags(start, start + len, page_flags);
54936004 637 the_end:
d0e165ae 638 trace_target_mmap_complete(start);
10d0d505 639 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
93756fdc
RH
640 FILE *f = qemu_log_trylock();
641 if (f) {
642 fprintf(f, "page layout changed following mmap\n");
643 page_dump(f);
644 qemu_log_unlock(f);
645 }
10d0d505 646 }
35865339 647 tb_invalidate_phys_range(start, start + len);
c8a706fe 648 mmap_unlock();
54936004 649 return start;
c8a706fe
PB
650fail:
651 mmap_unlock();
652 return -1;
54936004
FB
653}
654
68a1c816
PB
655static void mmap_reserve(abi_ulong start, abi_ulong size)
656{
657 abi_ulong real_start;
658 abi_ulong real_end;
659 abi_ulong addr;
660 abi_ulong end;
661 int prot;
662
663 real_start = start & qemu_host_page_mask;
664 real_end = HOST_PAGE_ALIGN(start + size);
665 end = start + size;
666 if (start > real_start) {
667 /* handle host page containing start */
668 prot = 0;
669 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
670 prot |= page_get_flags(addr);
671 }
672 if (real_end == real_start + qemu_host_page_size) {
673 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
674 prot |= page_get_flags(addr);
675 }
676 end = real_end;
677 }
678 if (prot != 0)
679 real_start += qemu_host_page_size;
680 }
681 if (end < real_end) {
682 prot = 0;
683 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
684 prot |= page_get_flags(addr);
685 }
686 if (prot != 0)
687 real_end -= qemu_host_page_size;
688 }
689 if (real_start != real_end) {
3e8f1628 690 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
68a1c816
PB
691 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
692 -1, 0);
693 }
694}
695
992f48a0 696int target_munmap(abi_ulong start, abi_ulong len)
54936004 697{
992f48a0 698 abi_ulong end, real_start, real_end, addr;
54936004
FB
699 int prot, ret;
700
b7b18d26
AB
701 trace_target_munmap(start, len);
702
54936004 703 if (start & ~TARGET_PAGE_MASK)
78cf3390 704 return -TARGET_EINVAL;
54936004 705 len = TARGET_PAGE_ALIGN(len);
46b12f46 706 if (len == 0 || !guest_range_valid_untagged(start, len)) {
78cf3390 707 return -TARGET_EINVAL;
ebf9a363
MF
708 }
709
c8a706fe 710 mmap_lock();
54936004 711 end = start + len;
53a5960a
PB
712 real_start = start & qemu_host_page_mask;
713 real_end = HOST_PAGE_ALIGN(end);
54936004 714
53a5960a 715 if (start > real_start) {
54936004
FB
716 /* handle host page containing start */
717 prot = 0;
53a5960a 718 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
54936004
FB
719 prot |= page_get_flags(addr);
720 }
53a5960a
PB
721 if (real_end == real_start + qemu_host_page_size) {
722 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
d418c81e
FB
723 prot |= page_get_flags(addr);
724 }
53a5960a 725 end = real_end;
d418c81e 726 }
54936004 727 if (prot != 0)
53a5960a 728 real_start += qemu_host_page_size;
54936004 729 }
53a5960a 730 if (end < real_end) {
54936004 731 prot = 0;
53a5960a 732 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
54936004
FB
733 prot |= page_get_flags(addr);
734 }
735 if (prot != 0)
53a5960a 736 real_end -= qemu_host_page_size;
54936004 737 }
3b46e624 738
c8a706fe 739 ret = 0;
54936004 740 /* unmap what we can */
53a5960a 741 if (real_start < real_end) {
b76f21a7 742 if (reserved_va) {
68a1c816
PB
743 mmap_reserve(real_start, real_end - real_start);
744 } else {
3e8f1628 745 ret = munmap(g2h_untagged(real_start), real_end - real_start);
68a1c816 746 }
54936004
FB
747 }
748
77a8f1a5 749 if (ret == 0) {
c8a706fe 750 page_set_flags(start, start + len, 0);
35865339 751 tb_invalidate_phys_range(start, start + len);
77a8f1a5 752 }
c8a706fe
PB
753 mmap_unlock();
754 return ret;
54936004
FB
755}
756
992f48a0
BS
757abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
758 abi_ulong new_size, unsigned long flags,
759 abi_ulong new_addr)
54936004
FB
760{
761 int prot;
f19412a2 762 void *host_addr;
54936004 763
46b12f46 764 if (!guest_range_valid_untagged(old_addr, old_size) ||
ebf9a363 765 ((flags & MREMAP_FIXED) &&
46b12f46 766 !guest_range_valid_untagged(new_addr, new_size)) ||
ccc5ccc1 767 ((flags & MREMAP_MAYMOVE) == 0 &&
46b12f46 768 !guest_range_valid_untagged(old_addr, new_size))) {
ebf9a363
MF
769 errno = ENOMEM;
770 return -1;
771 }
772
c8a706fe 773 mmap_lock();
f19412a2 774
68a1c816 775 if (flags & MREMAP_FIXED) {
3e8f1628
RH
776 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
777 flags, g2h_untagged(new_addr));
68a1c816 778
b76f21a7 779 if (reserved_va && host_addr != MAP_FAILED) {
68a1c816
PB
780 /* If new and old addresses overlap then the above mremap will
781 already have failed with EINVAL. */
782 mmap_reserve(old_addr, old_size);
783 }
784 } else if (flags & MREMAP_MAYMOVE) {
f19412a2
AJ
785 abi_ulong mmap_start;
786
30ab9ef2 787 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
f19412a2
AJ
788
789 if (mmap_start == -1) {
790 errno = ENOMEM;
791 host_addr = MAP_FAILED;
68a1c816 792 } else {
3e8f1628
RH
793 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
794 flags | MREMAP_FIXED,
795 g2h_untagged(mmap_start));
b76f21a7 796 if (reserved_va) {
c65ffe6d 797 mmap_reserve(old_addr, old_size);
798 }
68a1c816 799 }
3af72a4d 800 } else {
68a1c816 801 int prot = 0;
b76f21a7 802 if (reserved_va && old_size < new_size) {
68a1c816
PB
803 abi_ulong addr;
804 for (addr = old_addr + old_size;
805 addr < old_addr + new_size;
806 addr++) {
807 prot |= page_get_flags(addr);
808 }
809 }
810 if (prot == 0) {
3e8f1628
RH
811 host_addr = mremap(g2h_untagged(old_addr),
812 old_size, new_size, flags);
56d19084
TK
813
814 if (host_addr != MAP_FAILED) {
815 /* Check if address fits target address space */
46b12f46 816 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
56d19084 817 /* Revert mremap() changes */
3e8f1628
RH
818 host_addr = mremap(g2h_untagged(old_addr),
819 new_size, old_size, flags);
56d19084
TK
820 errno = ENOMEM;
821 host_addr = MAP_FAILED;
822 } else if (reserved_va && old_size > new_size) {
823 mmap_reserve(old_addr + old_size, old_size - new_size);
824 }
68a1c816
PB
825 }
826 } else {
827 errno = ENOMEM;
828 host_addr = MAP_FAILED;
829 }
f19412a2
AJ
830 }
831
832 if (host_addr == MAP_FAILED) {
c8a706fe
PB
833 new_addr = -1;
834 } else {
835 new_addr = h2g(host_addr);
836 prot = page_get_flags(old_addr);
837 page_set_flags(old_addr, old_addr + old_size, 0);
d9c58585
RH
838 page_set_flags(new_addr, new_addr + new_size,
839 prot | PAGE_VALID | PAGE_RESET);
c8a706fe 840 }
35865339 841 tb_invalidate_phys_range(new_addr, new_addr + new_size);
c8a706fe 842 mmap_unlock();
54936004
FB
843 return new_addr;
844}
892a4f6a
IL
845
846static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
847{
848 ulong addr;
849
850 if ((start | end) & ~qemu_host_page_mask) {
851 return false;
852 }
853
854 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
855 if (!(page_get_flags(addr) & PAGE_ANON)) {
856 return false;
857 }
858 }
859
860 return true;
861}
862
863abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
864{
865 abi_ulong len, end;
866 int ret = 0;
867
868 if (start & ~TARGET_PAGE_MASK) {
869 return -TARGET_EINVAL;
870 }
871 len = TARGET_PAGE_ALIGN(len_in);
872
873 if (len_in && !len) {
874 return -TARGET_EINVAL;
875 }
876
877 end = start + len;
878 if (end < start) {
879 return -TARGET_EINVAL;
880 }
881
882 if (end == start) {
883 return 0;
884 }
885
886 if (!guest_range_valid_untagged(start, len)) {
887 return -TARGET_EINVAL;
888 }
889
890 /*
891 * A straight passthrough may not be safe because qemu sometimes turns
892 * private file-backed mappings into anonymous mappings.
893 *
894 * This is a hint, so ignoring and returning success is ok.
895 *
896 * This breaks MADV_DONTNEED, completely implementing which is quite
897 * complicated. However, there is one low-hanging fruit: host-page-aligned
898 * anonymous mappings. In this case passthrough is safe, so do it.
899 */
900 mmap_lock();
8655b4c7 901 if (advice == TARGET_MADV_DONTNEED &&
892a4f6a
IL
902 can_passthrough_madv_dontneed(start, end)) {
903 ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
dbbf8975
VB
904 if (ret == 0) {
905 page_reset_target_data(start, start + len);
906 }
892a4f6a
IL
907 }
908 mmap_unlock();
909
910 return ret;
911}