]> git.proxmox.com Git - mirror_qemu.git/blame - linux-user/mmap.c
linux-user: Fix strace of chmod() if mode == 0
[mirror_qemu.git] / linux-user / mmap.c
CommitLineData
54936004
FB
1/*
2 * mmap support for qemu
5fafdf24 3 *
54936004
FB
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
8167ee88 17 * along with this program; if not, see <http://www.gnu.org/licenses/>.
54936004 18 */
d39594e9 19#include "qemu/osdep.h"
11d96056 20#include "trace.h"
10d0d505 21#include "exec/log.h"
54936004 22#include "qemu.h"
3b249d26 23#include "user-internals.h"
5423e6d3 24#include "user-mmap.h"
54936004 25
1e6eec8b 26static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
dfd3f85c 27static __thread int mmap_lock_count;
c8a706fe
PB
28
29void mmap_lock(void)
30{
31 if (mmap_lock_count++ == 0) {
32 pthread_mutex_lock(&mmap_mutex);
33 }
34}
35
36void mmap_unlock(void)
37{
38 if (--mmap_lock_count == 0) {
39 pthread_mutex_unlock(&mmap_mutex);
40 }
41}
d5975363 42
301e40ed
AB
43bool have_mmap_lock(void)
44{
45 return mmap_lock_count > 0 ? true : false;
46}
47
d5975363
PB
48/* Grab lock to make sure things are in a consistent state after fork(). */
49void mmap_fork_start(void)
50{
51 if (mmap_lock_count)
52 abort();
53 pthread_mutex_lock(&mmap_mutex);
54}
55
56void mmap_fork_end(int child)
57{
58 if (child)
59 pthread_mutex_init(&mmap_mutex, NULL);
60 else
61 pthread_mutex_unlock(&mmap_mutex);
62}
c8a706fe 63
9dba3ca5
RH
64/*
65 * Validate target prot bitmask.
66 * Return the prot bitmask for the host in *HOST_PROT.
67 * Return 0 if the target prot bitmask is invalid, otherwise
68 * the internal qemu page_flags (which will include PAGE_VALID).
69 */
70static int validate_prot_to_pageflags(int *host_prot, int prot)
71{
72 int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
73 int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
74
75 /*
76 * For the host, we need not pass anything except read/write/exec.
77 * While PROT_SEM is allowed by all hosts, it is also ignored, so
78 * don't bother transforming guest bit to host bit. Any other
79 * target-specific prot bits will not be understood by the host
80 * and will need to be encoded into page_flags for qemu emulation.
4eaa960d
RH
81 *
82 * Pages that are executable by the guest will never be executed
83 * by the host, but the host will need to be able to read them.
9dba3ca5 84 */
4eaa960d
RH
85 *host_prot = (prot & (PROT_READ | PROT_WRITE))
86 | (prot & PROT_EXEC ? PROT_READ : 0);
9dba3ca5 87
be5d6f48 88#ifdef TARGET_AARCH64
d109b46d 89 {
be5d6f48 90 ARMCPU *cpu = ARM_CPU(thread_cpu);
d109b46d
RH
91
92 /*
93 * The PROT_BTI bit is only accepted if the cpu supports the feature.
94 * Since this is the unusual case, don't bother checking unless
95 * the bit has been requested. If set and valid, record the bit
96 * within QEMU's page_flags.
97 */
98 if ((prot & TARGET_PROT_BTI) && cpu_isar_feature(aa64_bti, cpu)) {
be5d6f48
RH
99 valid |= TARGET_PROT_BTI;
100 page_flags |= PAGE_BTI;
101 }
d109b46d
RH
102 /* Similarly for the PROT_MTE bit. */
103 if ((prot & TARGET_PROT_MTE) && cpu_isar_feature(aa64_mte, cpu)) {
104 valid |= TARGET_PROT_MTE;
105 page_flags |= PAGE_MTE;
106 }
be5d6f48
RH
107 }
108#endif
109
9dba3ca5
RH
110 return prot & ~valid ? 0 : page_flags;
111}
112
53a5960a 113/* NOTE: all the constants are the HOST ones, but addresses are target. */
9dba3ca5 114int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
54936004 115{
992f48a0 116 abi_ulong end, host_start, host_end, addr;
9dba3ca5 117 int prot1, ret, page_flags, host_prot;
54936004 118
9dba3ca5 119 trace_target_mprotect(start, len, target_prot);
54936004 120
9dba3ca5 121 if ((start & ~TARGET_PAGE_MASK) != 0) {
78cf3390 122 return -TARGET_EINVAL;
9dba3ca5
RH
123 }
124 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
125 if (!page_flags) {
126 return -TARGET_EINVAL;
127 }
54936004
FB
128 len = TARGET_PAGE_ALIGN(len);
129 end = start + len;
46b12f46 130 if (!guest_range_valid_untagged(start, len)) {
78cf3390 131 return -TARGET_ENOMEM;
ebf9a363 132 }
9dba3ca5 133 if (len == 0) {
54936004 134 return 0;
9dba3ca5 135 }
3b46e624 136
c8a706fe 137 mmap_lock();
83fb7adf 138 host_start = start & qemu_host_page_mask;
54936004
FB
139 host_end = HOST_PAGE_ALIGN(end);
140 if (start > host_start) {
141 /* handle host page containing start */
9dba3ca5
RH
142 prot1 = host_prot;
143 for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
54936004
FB
144 prot1 |= page_get_flags(addr);
145 }
83fb7adf 146 if (host_end == host_start + qemu_host_page_size) {
9dba3ca5 147 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
d418c81e
FB
148 prot1 |= page_get_flags(addr);
149 }
150 end = host_end;
151 }
3e8f1628 152 ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
9dba3ca5
RH
153 prot1 & PAGE_BITS);
154 if (ret != 0) {
c8a706fe 155 goto error;
9dba3ca5 156 }
83fb7adf 157 host_start += qemu_host_page_size;
54936004
FB
158 }
159 if (end < host_end) {
9dba3ca5
RH
160 prot1 = host_prot;
161 for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
54936004
FB
162 prot1 |= page_get_flags(addr);
163 }
3e8f1628 164 ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
9dba3ca5
RH
165 qemu_host_page_size, prot1 & PAGE_BITS);
166 if (ret != 0) {
c8a706fe 167 goto error;
9dba3ca5 168 }
83fb7adf 169 host_end -= qemu_host_page_size;
54936004 170 }
3b46e624 171
54936004
FB
172 /* handle the pages in the middle */
173 if (host_start < host_end) {
3e8f1628
RH
174 ret = mprotect(g2h_untagged(host_start),
175 host_end - host_start, host_prot);
9dba3ca5 176 if (ret != 0) {
c8a706fe 177 goto error;
9dba3ca5 178 }
54936004 179 }
aa98e2d8 180
9dba3ca5 181 page_set_flags(start, start + len, page_flags);
aa98e2d8
IL
182 tb_invalidate_phys_range(start, start + len);
183 ret = 0;
184
c8a706fe
PB
185error:
186 mmap_unlock();
187 return ret;
54936004
FB
188}
189
190/* map an incomplete host page */
992f48a0
BS
191static int mmap_frag(abi_ulong real_start,
192 abi_ulong start, abi_ulong end,
193 int prot, int flags, int fd, abi_ulong offset)
54936004 194{
80210bcd 195 abi_ulong real_end, addr;
53a5960a 196 void *host_start;
54936004
FB
197 int prot1, prot_new;
198
53a5960a 199 real_end = real_start + qemu_host_page_size;
3e8f1628 200 host_start = g2h_untagged(real_start);
54936004
FB
201
202 /* get the protection of the target pages outside the mapping */
203 prot1 = 0;
53a5960a 204 for(addr = real_start; addr < real_end; addr++) {
54936004
FB
205 if (addr < start || addr >= end)
206 prot1 |= page_get_flags(addr);
207 }
3b46e624 208
54936004
FB
209 if (prot1 == 0) {
210 /* no page was there, so we allocate one */
80210bcd
TS
211 void *p = mmap(host_start, qemu_host_page_size, prot,
212 flags | MAP_ANONYMOUS, -1, 0);
213 if (p == MAP_FAILED)
214 return -1;
53a5960a 215 prot1 = prot;
54936004
FB
216 }
217 prot1 &= PAGE_BITS;
218
219 prot_new = prot | prot1;
220 if (!(flags & MAP_ANONYMOUS)) {
221 /* msync() won't work here, so we return an error if write is
222 possible while it is a shared mapping */
223 if ((flags & MAP_TYPE) == MAP_SHARED &&
224 (prot & PROT_WRITE))
ee636500 225 return -1;
54936004
FB
226
227 /* adjust protection to be able to read */
228 if (!(prot1 & PROT_WRITE))
53a5960a 229 mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
3b46e624 230
54936004 231 /* read the corresponding file data */
3e8f1628 232 if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
fb7e378c 233 return -1;
3b46e624 234
54936004
FB
235 /* put final protection */
236 if (prot_new != (prot1 | PROT_WRITE))
53a5960a 237 mprotect(host_start, qemu_host_page_size, prot_new);
54936004 238 } else {
54936004 239 if (prot_new != prot1) {
53a5960a 240 mprotect(host_start, qemu_host_page_size, prot_new);
54936004 241 }
e6deac9c 242 if (prot_new & PROT_WRITE) {
3e8f1628 243 memset(g2h_untagged(start), 0, end - start);
e6deac9c 244 }
54936004
FB
245 }
246 return 0;
247}
248
14f24e14 249#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
aab613fb
LY
250#ifdef TARGET_AARCH64
251# define TASK_UNMAPPED_BASE 0x5500000000
252#else
14f24e14 253# define TASK_UNMAPPED_BASE (1ul << 38)
aab613fb 254#endif
a03e2d42 255#else
14f24e14 256# define TASK_UNMAPPED_BASE 0x40000000
a03e2d42 257#endif
59e9d91c 258abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
a03e2d42 259
0776590d
PB
260unsigned long last_brk;
261
68a1c816
PB
262/* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
263 of guest address space. */
30ab9ef2
RH
264static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
265 abi_ulong align)
68a1c816 266{
30ab9ef2 267 abi_ulong addr, end_addr, incr = qemu_host_page_size;
68a1c816 268 int prot;
30ab9ef2 269 bool looped = false;
68a1c816 270
b76f21a7 271 if (size > reserved_va) {
68a1c816
PB
272 return (abi_ulong)-1;
273 }
274
30ab9ef2
RH
275 /* Note that start and size have already been aligned by mmap_find_vma. */
276
59e9d91c 277 end_addr = start + size;
30ab9ef2
RH
278 if (start > reserved_va - size) {
279 /* Start at the top of the address space. */
280 end_addr = ((reserved_va - size) & -align) + size;
281 looped = true;
59e9d91c 282 }
59e9d91c 283
30ab9ef2
RH
284 /* Search downward from END_ADDR, checking to see if a page is in use. */
285 addr = end_addr;
59e9d91c 286 while (1) {
30ab9ef2 287 addr -= incr;
59e9d91c 288 if (addr > end_addr) {
68a1c816 289 if (looped) {
30ab9ef2 290 /* Failure. The entire address space has been searched. */
68a1c816
PB
291 return (abi_ulong)-1;
292 }
30ab9ef2
RH
293 /* Re-start at the top of the address space. */
294 addr = end_addr = ((reserved_va - size) & -align) + size;
295 looped = true;
296 } else {
297 prot = page_get_flags(addr);
298 if (prot) {
299 /* Page in use. Restart below this page. */
300 addr = end_addr = ((addr - size) & -align) + size;
301 } else if (addr && addr + size == end_addr) {
302 /* Success! All pages between ADDR and END_ADDR are free. */
303 if (start == mmap_next_start) {
304 mmap_next_start = addr;
305 }
306 return addr;
307 }
68a1c816
PB
308 }
309 }
68a1c816
PB
310}
311
fe3b4152
KS
312/*
313 * Find and reserve a free memory area of size 'size'. The search
314 * starts at 'start'.
315 * It must be called with mmap_lock() held.
316 * Return -1 if error.
317 */
30ab9ef2 318abi_ulong mmap_find_vma(abi_ulong start, abi_ulong size, abi_ulong align)
a03e2d42 319{
14f24e14 320 void *ptr, *prev;
fe3b4152 321 abi_ulong addr;
14f24e14 322 int wrapped, repeat;
fe3b4152 323
443b7505
RH
324 align = MAX(align, qemu_host_page_size);
325
fe3b4152 326 /* If 'start' == 0, then a default start address is used. */
14f24e14 327 if (start == 0) {
fe3b4152 328 start = mmap_next_start;
14f24e14
RH
329 } else {
330 start &= qemu_host_page_mask;
331 }
30ab9ef2 332 start = ROUND_UP(start, align);
14f24e14
RH
333
334 size = HOST_PAGE_ALIGN(size);
fe3b4152 335
b76f21a7 336 if (reserved_va) {
30ab9ef2 337 return mmap_find_vma_reserved(start, size, align);
68a1c816
PB
338 }
339
a03e2d42 340 addr = start;
14f24e14
RH
341 wrapped = repeat = 0;
342 prev = 0;
fe3b4152 343
14f24e14 344 for (;; prev = ptr) {
fe3b4152
KS
345 /*
346 * Reserve needed memory area to avoid a race.
347 * It should be discarded using:
348 * - mmap() with MAP_FIXED flag
349 * - mremap() with MREMAP_FIXED flag
350 * - shmat() with SHM_REMAP flag
351 */
3e8f1628 352 ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
fe3b4152
KS
353 MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
354
355 /* ENOMEM, if host address space has no memory */
14f24e14 356 if (ptr == MAP_FAILED) {
fe3b4152 357 return (abi_ulong)-1;
14f24e14
RH
358 }
359
360 /* Count the number of sequential returns of the same address.
361 This is used to modify the search algorithm below. */
362 repeat = (ptr == prev ? repeat + 1 : 0);
363
364 if (h2g_valid(ptr + size - 1)) {
365 addr = h2g(ptr);
fe3b4152 366
30ab9ef2 367 if ((addr & (align - 1)) == 0) {
14f24e14
RH
368 /* Success. */
369 if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
370 mmap_next_start = addr + size;
371 }
372 return addr;
373 }
fe3b4152 374
14f24e14
RH
375 /* The address is not properly aligned for the target. */
376 switch (repeat) {
377 case 0:
378 /* Assume the result that the kernel gave us is the
379 first with enough free space, so start again at the
380 next higher target page. */
30ab9ef2 381 addr = ROUND_UP(addr, align);
14f24e14
RH
382 break;
383 case 1:
384 /* Sometimes the kernel decides to perform the allocation
385 at the top end of memory instead. */
30ab9ef2 386 addr &= -align;
14f24e14
RH
387 break;
388 case 2:
389 /* Start over at low memory. */
390 addr = 0;
391 break;
392 default:
393 /* Fail. This unaligned block must the last. */
394 addr = -1;
395 break;
396 }
397 } else {
398 /* Since the result the kernel gave didn't fit, start
399 again at low memory. If any repetition, fail. */
400 addr = (repeat ? -1 : 0);
401 }
402
403 /* Unmap and try again. */
fe3b4152 404 munmap(ptr, size);
fe3b4152 405
14f24e14 406 /* ENOMEM if we checked the whole of the target address space. */
d0b3e4f5 407 if (addr == (abi_ulong)-1) {
a03e2d42 408 return (abi_ulong)-1;
14f24e14
RH
409 } else if (addr == 0) {
410 if (wrapped) {
411 return (abi_ulong)-1;
412 }
413 wrapped = 1;
414 /* Don't actually use 0 when wrapping, instead indicate
8186e783 415 that we'd truly like an allocation in low memory. */
14f24e14
RH
416 addr = (mmap_min_addr > TARGET_PAGE_SIZE
417 ? TARGET_PAGE_ALIGN(mmap_min_addr)
418 : TARGET_PAGE_SIZE);
419 } else if (wrapped && addr >= start) {
420 return (abi_ulong)-1;
421 }
a03e2d42 422 }
a03e2d42
FB
423}
424
54936004 425/* NOTE: all the constants are the HOST ones */
9dba3ca5 426abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
992f48a0 427 int flags, int fd, abi_ulong offset)
54936004 428{
992f48a0 429 abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
9dba3ca5 430 int page_flags, host_prot;
54936004 431
c8a706fe 432 mmap_lock();
9dba3ca5 433 trace_target_mmap(start, len, target_prot, flags, fd, offset);
54936004 434
38138fab 435 if (!len) {
e89f07d3 436 errno = EINVAL;
c8a706fe 437 goto fail;
e89f07d3 438 }
54936004 439
9dba3ca5
RH
440 page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
441 if (!page_flags) {
442 errno = EINVAL;
443 goto fail;
444 }
445
38138fab 446 /* Also check for overflows... */
54936004 447 len = TARGET_PAGE_ALIGN(len);
38138fab
AB
448 if (!len) {
449 errno = ENOMEM;
450 goto fail;
451 }
452
453 if (offset & ~TARGET_PAGE_MASK) {
454 errno = EINVAL;
455 goto fail;
456 }
457
228168cb
RH
458 /*
459 * If we're mapping shared memory, ensure we generate code for parallel
460 * execution and flush old translations. This will work up to the level
461 * supported by the host -- anything that requires EXCP_ATOMIC will not
462 * be atomic with respect to an external process.
463 */
464 if (flags & MAP_SHARED) {
465 CPUState *cpu = thread_cpu;
466 if (!(cpu->tcg_cflags & CF_PARALLEL)) {
467 cpu->tcg_cflags |= CF_PARALLEL;
468 tb_flush(cpu);
469 }
470 }
471
53a5960a 472 real_start = start & qemu_host_page_mask;
a5e7ee46
RH
473 host_offset = offset & qemu_host_page_mask;
474
475 /* If the user is asking for the kernel to find a location, do that
476 before we truncate the length for mapping files below. */
477 if (!(flags & MAP_FIXED)) {
478 host_len = len + offset - host_offset;
479 host_len = HOST_PAGE_ALIGN(host_len);
30ab9ef2 480 start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
a5e7ee46
RH
481 if (start == (abi_ulong)-1) {
482 errno = ENOMEM;
483 goto fail;
484 }
485 }
54936004 486
54c5a2ae
EI
487 /* When mapping files into a memory area larger than the file, accesses
488 to pages beyond the file size will cause a SIGBUS.
489
490 For example, if mmaping a file of 100 bytes on a host with 4K pages
491 emulating a target with 8K pages, the target expects to be able to
492 access the first 8K. But the host will trap us on any access beyond
493 4K.
494
495 When emulating a target with a larger page-size than the hosts, we
496 may need to truncate file maps at EOF and add extra anonymous pages
497 up to the targets page boundary. */
498
8e3b0cbb 499 if ((qemu_real_host_page_size() < qemu_host_page_size) &&
35f2fd04
MAL
500 !(flags & MAP_ANONYMOUS)) {
501 struct stat sb;
54c5a2ae
EI
502
503 if (fstat (fd, &sb) == -1)
504 goto fail;
505
506 /* Are we trying to create a map beyond EOF?. */
507 if (offset + len > sb.st_size) {
508 /* If so, truncate the file map at eof aligned with
509 the hosts real pagesize. Additional anonymous maps
510 will be created beyond EOF. */
0c2d70c4 511 len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
54c5a2ae
EI
512 }
513 }
514
54936004 515 if (!(flags & MAP_FIXED)) {
a5e7ee46 516 unsigned long host_start;
a03e2d42 517 void *p;
a5e7ee46 518
a03e2d42
FB
519 host_len = len + offset - host_offset;
520 host_len = HOST_PAGE_ALIGN(host_len);
a5e7ee46 521
a03e2d42
FB
522 /* Note: we prefer to control the mapping address. It is
523 especially important if qemu_host_page_size >
524 qemu_real_host_page_size */
3e8f1628 525 p = mmap(g2h_untagged(start), host_len, host_prot,
a5e7ee46 526 flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
9dba3ca5 527 if (p == MAP_FAILED) {
c8a706fe 528 goto fail;
9dba3ca5 529 }
a03e2d42
FB
530 /* update start so that it points to the file position at 'offset' */
531 host_start = (unsigned long)p;
54c5a2ae 532 if (!(flags & MAP_ANONYMOUS)) {
3e8f1628 533 p = mmap(g2h_untagged(start), len, host_prot,
54c5a2ae 534 flags | MAP_FIXED, fd, host_offset);
8384274e 535 if (p == MAP_FAILED) {
3e8f1628 536 munmap(g2h_untagged(start), host_len);
8384274e
JB
537 goto fail;
538 }
a03e2d42 539 host_start += offset - host_offset;
54c5a2ae 540 }
a03e2d42
FB
541 start = h2g(host_start);
542 } else {
543 if (start & ~TARGET_PAGE_MASK) {
e89f07d3 544 errno = EINVAL;
c8a706fe 545 goto fail;
e89f07d3 546 }
a03e2d42
FB
547 end = start + len;
548 real_end = HOST_PAGE_ALIGN(end);
7ab240ad 549
7d37435b
PB
550 /*
551 * Test if requested memory area fits target address space
552 * It can fail only on 64-bit host with 32-bit target.
553 * On any other target/host host mmap() handles this error correctly.
554 */
46b12f46 555 if (end < start || !guest_range_valid_untagged(start, len)) {
ebf9a363 556 errno = ENOMEM;
45bc1f52
AJ
557 goto fail;
558 }
559
a03e2d42
FB
560 /* worst case: we cannot map the file because the offset is not
561 aligned, so we read it */
562 if (!(flags & MAP_ANONYMOUS) &&
563 (offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
564 /* msync() won't work here, so we return an error if write is
565 possible while it is a shared mapping */
566 if ((flags & MAP_TYPE) == MAP_SHARED &&
9dba3ca5 567 (host_prot & PROT_WRITE)) {
a03e2d42 568 errno = EINVAL;
c8a706fe 569 goto fail;
a03e2d42 570 }
9dba3ca5 571 retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
a03e2d42
FB
572 MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
573 -1, 0);
574 if (retaddr == -1)
c8a706fe 575 goto fail;
3e8f1628 576 if (pread(fd, g2h_untagged(start), len, offset) == -1)
fb7e378c 577 goto fail;
9dba3ca5
RH
578 if (!(host_prot & PROT_WRITE)) {
579 ret = target_mprotect(start, len, target_prot);
86abac06 580 assert(ret == 0);
a03e2d42
FB
581 }
582 goto the_end;
54936004 583 }
a03e2d42
FB
584
585 /* handle the start of the mapping */
586 if (start > real_start) {
587 if (real_end == real_start + qemu_host_page_size) {
588 /* one single host page */
589 ret = mmap_frag(real_start, start, end,
9dba3ca5 590 host_prot, flags, fd, offset);
a03e2d42 591 if (ret == -1)
c8a706fe 592 goto fail;
a03e2d42
FB
593 goto the_end1;
594 }
595 ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
9dba3ca5 596 host_prot, flags, fd, offset);
54936004 597 if (ret == -1)
c8a706fe 598 goto fail;
a03e2d42
FB
599 real_start += qemu_host_page_size;
600 }
601 /* handle the end of the mapping */
602 if (end < real_end) {
603 ret = mmap_frag(real_end - qemu_host_page_size,
530c0032 604 real_end - qemu_host_page_size, end,
9dba3ca5 605 host_prot, flags, fd,
a03e2d42
FB
606 offset + real_end - qemu_host_page_size - start);
607 if (ret == -1)
c8a706fe 608 goto fail;
a03e2d42 609 real_end -= qemu_host_page_size;
54936004 610 }
3b46e624 611
a03e2d42
FB
612 /* map the middle (easier) */
613 if (real_start < real_end) {
614 void *p;
615 unsigned long offset1;
616 if (flags & MAP_ANONYMOUS)
617 offset1 = 0;
618 else
619 offset1 = offset + real_start - start;
3e8f1628 620 p = mmap(g2h_untagged(real_start), real_end - real_start,
9dba3ca5 621 host_prot, flags, fd, offset1);
a03e2d42 622 if (p == MAP_FAILED)
c8a706fe 623 goto fail;
a03e2d42 624 }
54936004
FB
625 }
626 the_end1:
26bab757
RH
627 if (flags & MAP_ANONYMOUS) {
628 page_flags |= PAGE_ANON;
629 }
d9c58585 630 page_flags |= PAGE_RESET;
9dba3ca5 631 page_set_flags(start, start + len, page_flags);
54936004 632 the_end:
d0e165ae 633 trace_target_mmap_complete(start);
10d0d505 634 if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
93756fdc
RH
635 FILE *f = qemu_log_trylock();
636 if (f) {
637 fprintf(f, "page layout changed following mmap\n");
638 page_dump(f);
639 qemu_log_unlock(f);
640 }
10d0d505 641 }
35865339 642 tb_invalidate_phys_range(start, start + len);
c8a706fe 643 mmap_unlock();
54936004 644 return start;
c8a706fe
PB
645fail:
646 mmap_unlock();
647 return -1;
54936004
FB
648}
649
68a1c816
PB
650static void mmap_reserve(abi_ulong start, abi_ulong size)
651{
652 abi_ulong real_start;
653 abi_ulong real_end;
654 abi_ulong addr;
655 abi_ulong end;
656 int prot;
657
658 real_start = start & qemu_host_page_mask;
659 real_end = HOST_PAGE_ALIGN(start + size);
660 end = start + size;
661 if (start > real_start) {
662 /* handle host page containing start */
663 prot = 0;
664 for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
665 prot |= page_get_flags(addr);
666 }
667 if (real_end == real_start + qemu_host_page_size) {
668 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
669 prot |= page_get_flags(addr);
670 }
671 end = real_end;
672 }
673 if (prot != 0)
674 real_start += qemu_host_page_size;
675 }
676 if (end < real_end) {
677 prot = 0;
678 for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
679 prot |= page_get_flags(addr);
680 }
681 if (prot != 0)
682 real_end -= qemu_host_page_size;
683 }
684 if (real_start != real_end) {
3e8f1628 685 mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
68a1c816
PB
686 MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
687 -1, 0);
688 }
689}
690
992f48a0 691int target_munmap(abi_ulong start, abi_ulong len)
54936004 692{
992f48a0 693 abi_ulong end, real_start, real_end, addr;
54936004
FB
694 int prot, ret;
695
b7b18d26
AB
696 trace_target_munmap(start, len);
697
54936004 698 if (start & ~TARGET_PAGE_MASK)
78cf3390 699 return -TARGET_EINVAL;
54936004 700 len = TARGET_PAGE_ALIGN(len);
46b12f46 701 if (len == 0 || !guest_range_valid_untagged(start, len)) {
78cf3390 702 return -TARGET_EINVAL;
ebf9a363
MF
703 }
704
c8a706fe 705 mmap_lock();
54936004 706 end = start + len;
53a5960a
PB
707 real_start = start & qemu_host_page_mask;
708 real_end = HOST_PAGE_ALIGN(end);
54936004 709
53a5960a 710 if (start > real_start) {
54936004
FB
711 /* handle host page containing start */
712 prot = 0;
53a5960a 713 for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
54936004
FB
714 prot |= page_get_flags(addr);
715 }
53a5960a
PB
716 if (real_end == real_start + qemu_host_page_size) {
717 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
d418c81e
FB
718 prot |= page_get_flags(addr);
719 }
53a5960a 720 end = real_end;
d418c81e 721 }
54936004 722 if (prot != 0)
53a5960a 723 real_start += qemu_host_page_size;
54936004 724 }
53a5960a 725 if (end < real_end) {
54936004 726 prot = 0;
53a5960a 727 for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
54936004
FB
728 prot |= page_get_flags(addr);
729 }
730 if (prot != 0)
53a5960a 731 real_end -= qemu_host_page_size;
54936004 732 }
3b46e624 733
c8a706fe 734 ret = 0;
54936004 735 /* unmap what we can */
53a5960a 736 if (real_start < real_end) {
b76f21a7 737 if (reserved_va) {
68a1c816
PB
738 mmap_reserve(real_start, real_end - real_start);
739 } else {
3e8f1628 740 ret = munmap(g2h_untagged(real_start), real_end - real_start);
68a1c816 741 }
54936004
FB
742 }
743
77a8f1a5 744 if (ret == 0) {
c8a706fe 745 page_set_flags(start, start + len, 0);
35865339 746 tb_invalidate_phys_range(start, start + len);
77a8f1a5 747 }
c8a706fe
PB
748 mmap_unlock();
749 return ret;
54936004
FB
750}
751
992f48a0
BS
752abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size,
753 abi_ulong new_size, unsigned long flags,
754 abi_ulong new_addr)
54936004
FB
755{
756 int prot;
f19412a2 757 void *host_addr;
54936004 758
46b12f46 759 if (!guest_range_valid_untagged(old_addr, old_size) ||
ebf9a363 760 ((flags & MREMAP_FIXED) &&
46b12f46 761 !guest_range_valid_untagged(new_addr, new_size)) ||
ccc5ccc1 762 ((flags & MREMAP_MAYMOVE) == 0 &&
46b12f46 763 !guest_range_valid_untagged(old_addr, new_size))) {
ebf9a363
MF
764 errno = ENOMEM;
765 return -1;
766 }
767
c8a706fe 768 mmap_lock();
f19412a2 769
68a1c816 770 if (flags & MREMAP_FIXED) {
3e8f1628
RH
771 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
772 flags, g2h_untagged(new_addr));
68a1c816 773
b76f21a7 774 if (reserved_va && host_addr != MAP_FAILED) {
68a1c816
PB
775 /* If new and old addresses overlap then the above mremap will
776 already have failed with EINVAL. */
777 mmap_reserve(old_addr, old_size);
778 }
779 } else if (flags & MREMAP_MAYMOVE) {
f19412a2
AJ
780 abi_ulong mmap_start;
781
30ab9ef2 782 mmap_start = mmap_find_vma(0, new_size, TARGET_PAGE_SIZE);
f19412a2
AJ
783
784 if (mmap_start == -1) {
785 errno = ENOMEM;
786 host_addr = MAP_FAILED;
68a1c816 787 } else {
3e8f1628
RH
788 host_addr = mremap(g2h_untagged(old_addr), old_size, new_size,
789 flags | MREMAP_FIXED,
790 g2h_untagged(mmap_start));
b76f21a7 791 if (reserved_va) {
c65ffe6d 792 mmap_reserve(old_addr, old_size);
793 }
68a1c816 794 }
3af72a4d 795 } else {
68a1c816 796 int prot = 0;
b76f21a7 797 if (reserved_va && old_size < new_size) {
68a1c816
PB
798 abi_ulong addr;
799 for (addr = old_addr + old_size;
800 addr < old_addr + new_size;
801 addr++) {
802 prot |= page_get_flags(addr);
803 }
804 }
805 if (prot == 0) {
3e8f1628
RH
806 host_addr = mremap(g2h_untagged(old_addr),
807 old_size, new_size, flags);
56d19084
TK
808
809 if (host_addr != MAP_FAILED) {
810 /* Check if address fits target address space */
46b12f46 811 if (!guest_range_valid_untagged(h2g(host_addr), new_size)) {
56d19084 812 /* Revert mremap() changes */
3e8f1628
RH
813 host_addr = mremap(g2h_untagged(old_addr),
814 new_size, old_size, flags);
56d19084
TK
815 errno = ENOMEM;
816 host_addr = MAP_FAILED;
817 } else if (reserved_va && old_size > new_size) {
818 mmap_reserve(old_addr + old_size, old_size - new_size);
819 }
68a1c816
PB
820 }
821 } else {
822 errno = ENOMEM;
823 host_addr = MAP_FAILED;
824 }
f19412a2
AJ
825 }
826
827 if (host_addr == MAP_FAILED) {
c8a706fe
PB
828 new_addr = -1;
829 } else {
830 new_addr = h2g(host_addr);
831 prot = page_get_flags(old_addr);
832 page_set_flags(old_addr, old_addr + old_size, 0);
d9c58585
RH
833 page_set_flags(new_addr, new_addr + new_size,
834 prot | PAGE_VALID | PAGE_RESET);
c8a706fe 835 }
35865339 836 tb_invalidate_phys_range(new_addr, new_addr + new_size);
c8a706fe 837 mmap_unlock();
54936004
FB
838 return new_addr;
839}
892a4f6a
IL
840
841static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
842{
843 ulong addr;
844
845 if ((start | end) & ~qemu_host_page_mask) {
846 return false;
847 }
848
849 for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
850 if (!(page_get_flags(addr) & PAGE_ANON)) {
851 return false;
852 }
853 }
854
855 return true;
856}
857
858abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
859{
860 abi_ulong len, end;
861 int ret = 0;
862
863 if (start & ~TARGET_PAGE_MASK) {
864 return -TARGET_EINVAL;
865 }
866 len = TARGET_PAGE_ALIGN(len_in);
867
868 if (len_in && !len) {
869 return -TARGET_EINVAL;
870 }
871
872 end = start + len;
873 if (end < start) {
874 return -TARGET_EINVAL;
875 }
876
877 if (end == start) {
878 return 0;
879 }
880
881 if (!guest_range_valid_untagged(start, len)) {
882 return -TARGET_EINVAL;
883 }
884
885 /*
886 * A straight passthrough may not be safe because qemu sometimes turns
887 * private file-backed mappings into anonymous mappings.
888 *
889 * This is a hint, so ignoring and returning success is ok.
890 *
891 * This breaks MADV_DONTNEED, completely implementing which is quite
892 * complicated. However, there is one low-hanging fruit: host-page-aligned
893 * anonymous mappings. In this case passthrough is safe, so do it.
894 */
895 mmap_lock();
f71fa4e3 896 if (advice == MADV_DONTNEED &&
892a4f6a
IL
897 can_passthrough_madv_dontneed(start, end)) {
898 ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
dbbf8975
VB
899 if (ret == 0) {
900 page_reset_target_data(start, start + len);
901 }
892a4f6a
IL
902 }
903 mmap_unlock();
904
905 return ret;
906}