* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
+#include <sys/shm.h>
#include "trace.h"
#include "exec/log.h"
#include "qemu.h"
#include "user-internals.h"
#include "user-mmap.h"
+#include "target_mman.h"
+#include "qemu/interval-tree.h"
+
+#ifdef TARGET_ARM
+#include "target/arm/cpu-features.h"
+#endif
static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
static __thread int mmap_lock_count;
void mmap_unlock(void)
{
+ assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
void mmap_fork_end(int child)
{
- if (child)
+ if (child) {
pthread_mutex_init(&mmap_mutex, NULL);
- else
+ } else {
pthread_mutex_unlock(&mmap_mutex);
+ }
+}
+
+/* Protected by mmap_lock. */
+static IntervalTreeRoot shm_regions;
+
+static void shm_region_add(abi_ptr start, abi_ptr last)
+{
+ IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
+
+ i->start = start;
+ i->last = last;
+ interval_tree_insert(i, &shm_regions);
+}
+
+static abi_ptr shm_region_find(abi_ptr start)
+{
+ IntervalTreeNode *i;
+
+ for (i = interval_tree_iter_first(&shm_regions, start, start); i;
+ i = interval_tree_iter_next(i, start, start)) {
+ if (i->start == start) {
+ return i->last;
+ }
+ }
+ return 0;
+}
+
+static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
+{
+ IntervalTreeNode *i, *n;
+
+ for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
+ n = interval_tree_iter_next(i, start, last);
+ if (i->start >= start && i->last <= last) {
+ interval_tree_remove(i, &shm_regions);
+ g_free(i);
+ }
+ }
}
/*
* Return 0 if the target prot bitmask is invalid, otherwise
* the internal qemu page_flags (which will include PAGE_VALID).
*/
-static int validate_prot_to_pageflags(int *host_prot, int prot)
+static int validate_prot_to_pageflags(int prot)
{
int valid = PROT_READ | PROT_WRITE | PROT_EXEC | TARGET_PROT_SEM;
int page_flags = (prot & PAGE_BITS) | PAGE_VALID;
- /*
- * For the host, we need not pass anything except read/write/exec.
- * While PROT_SEM is allowed by all hosts, it is also ignored, so
- * don't bother transforming guest bit to host bit. Any other
- * target-specific prot bits will not be understood by the host
- * and will need to be encoded into page_flags for qemu emulation.
- *
- * Pages that are executable by the guest will never be executed
- * by the host, but the host will need to be able to read them.
- */
- *host_prot = (prot & (PROT_READ | PROT_WRITE))
- | (prot & PROT_EXEC ? PROT_READ : 0);
-
#ifdef TARGET_AARCH64
{
ARMCPU *cpu = ARM_CPU(thread_cpu);
page_flags |= PAGE_MTE;
}
}
+#elif defined(TARGET_HPPA)
+ valid |= PROT_GROWSDOWN | PROT_GROWSUP;
#endif
return prot & ~valid ? 0 : page_flags;
}
+/*
+ * For the host, we need not pass anything except read/write/exec.
+ * While PROT_SEM is allowed by all hosts, it is also ignored, so
+ * don't bother transforming guest bit to host bit. Any other
+ * target-specific prot bits will not be understood by the host
+ * and will need to be encoded into page_flags for qemu emulation.
+ *
+ * Pages that are executable by the guest will never be executed
+ * by the host, but the host will need to be able to read them.
+ */
+static int target_to_host_prot(int prot)
+{
+ return (prot & (PROT_READ | PROT_WRITE)) |
+ (prot & PROT_EXEC ? PROT_READ : 0);
+}
+
/* NOTE: all the constants are the HOST ones, but addresses are target. */
int target_mprotect(abi_ulong start, abi_ulong len, int target_prot)
{
- abi_ulong end, host_start, host_end, addr;
- int prot1, ret, page_flags, host_prot;
+ abi_ulong starts[3];
+ abi_ulong lens[3];
+ int prots[3];
+ abi_ulong host_start, host_last, last;
+ int prot1, ret, page_flags, nranges;
trace_target_mprotect(start, len, target_prot);
if ((start & ~TARGET_PAGE_MASK) != 0) {
return -TARGET_EINVAL;
}
- page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
+ page_flags = validate_prot_to_pageflags(target_prot);
if (!page_flags) {
return -TARGET_EINVAL;
}
+ if (len == 0) {
+ return 0;
+ }
len = TARGET_PAGE_ALIGN(len);
- end = start + len;
if (!guest_range_valid_untagged(start, len)) {
return -TARGET_ENOMEM;
}
- if (len == 0) {
- return 0;
- }
- mmap_lock();
+ last = start + len - 1;
host_start = start & qemu_host_page_mask;
- host_end = HOST_PAGE_ALIGN(end);
- if (start > host_start) {
- /* handle host page containing start */
- prot1 = host_prot;
- for (addr = host_start; addr < start; addr += TARGET_PAGE_SIZE) {
- prot1 |= page_get_flags(addr);
- }
- if (host_end == host_start + qemu_host_page_size) {
- for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
- prot1 |= page_get_flags(addr);
- }
- end = host_end;
+ host_last = HOST_PAGE_ALIGN(last) - 1;
+ nranges = 0;
+
+ mmap_lock();
+
+ if (host_last - host_start < qemu_host_page_size) {
+ /* Single host page contains all guest pages: sum the prot. */
+ prot1 = target_prot;
+ for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
+ prot1 |= page_get_flags(a);
}
- ret = mprotect(g2h_untagged(host_start), qemu_host_page_size,
- prot1 & PAGE_BITS);
- if (ret != 0) {
- goto error;
+ for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
+ prot1 |= page_get_flags(a + 1);
}
- host_start += qemu_host_page_size;
- }
- if (end < host_end) {
- prot1 = host_prot;
- for (addr = end; addr < host_end; addr += TARGET_PAGE_SIZE) {
- prot1 |= page_get_flags(addr);
+ starts[nranges] = host_start;
+ lens[nranges] = qemu_host_page_size;
+ prots[nranges] = prot1;
+ nranges++;
+ } else {
+ if (host_start < start) {
+ /* Host page contains more than one guest page: sum the prot. */
+ prot1 = target_prot;
+ for (abi_ulong a = host_start; a < start; a += TARGET_PAGE_SIZE) {
+ prot1 |= page_get_flags(a);
+ }
+ /* If the resulting sum differs, create a new range. */
+ if (prot1 != target_prot) {
+ starts[nranges] = host_start;
+ lens[nranges] = qemu_host_page_size;
+ prots[nranges] = prot1;
+ nranges++;
+ host_start += qemu_host_page_size;
+ }
}
- ret = mprotect(g2h_untagged(host_end - qemu_host_page_size),
- qemu_host_page_size, prot1 & PAGE_BITS);
- if (ret != 0) {
- goto error;
+
+ if (last < host_last) {
+ /* Host page contains more than one guest page: sum the prot. */
+ prot1 = target_prot;
+ for (abi_ulong a = last; a < host_last; a += TARGET_PAGE_SIZE) {
+ prot1 |= page_get_flags(a + 1);
+ }
+ /* If the resulting sum differs, create a new range. */
+ if (prot1 != target_prot) {
+ host_last -= qemu_host_page_size;
+ starts[nranges] = host_last + 1;
+ lens[nranges] = qemu_host_page_size;
+ prots[nranges] = prot1;
+ nranges++;
+ }
+ }
+
+ /* Create a range for the middle, if any remains. */
+ if (host_start < host_last) {
+ starts[nranges] = host_start;
+ lens[nranges] = host_last - host_start + 1;
+ prots[nranges] = target_prot;
+ nranges++;
}
- host_end -= qemu_host_page_size;
}
- /* handle the pages in the middle */
- if (host_start < host_end) {
- ret = mprotect(g2h_untagged(host_start),
- host_end - host_start, host_prot);
+ for (int i = 0; i < nranges; ++i) {
+ ret = mprotect(g2h_untagged(starts[i]), lens[i],
+ target_to_host_prot(prots[i]));
if (ret != 0) {
goto error;
}
}
- page_set_flags(start, start + len, page_flags);
- tb_invalidate_phys_range(start, start + len);
+ page_set_flags(start, last, page_flags);
ret = 0;
-error:
+ error:
mmap_unlock();
return ret;
}
/* map an incomplete host page */
-static int mmap_frag(abi_ulong real_start,
- abi_ulong start, abi_ulong end,
- int prot, int flags, int fd, abi_ulong offset)
+static bool mmap_frag(abi_ulong real_start, abi_ulong start, abi_ulong last,
+ int prot, int flags, int fd, off_t offset)
{
- abi_ulong real_end, addr;
+ abi_ulong real_last;
void *host_start;
- int prot1, prot_new;
+ int prot_old, prot_new;
+ int host_prot_old, host_prot_new;
+
+ if (!(flags & MAP_ANONYMOUS)
+ && (flags & MAP_TYPE) == MAP_SHARED
+ && (prot & PROT_WRITE)) {
+ /*
+ * msync() won't work with the partial page, so we return an
+ * error if write is possible while it is a shared mapping.
+ */
+ errno = EINVAL;
+ return false;
+ }
- real_end = real_start + qemu_host_page_size;
+ real_last = real_start + qemu_host_page_size - 1;
host_start = g2h_untagged(real_start);
- /* get the protection of the target pages outside the mapping */
- prot1 = 0;
- for(addr = real_start; addr < real_end; addr++) {
- if (addr < start || addr >= end)
- prot1 |= page_get_flags(addr);
+ /* Get the protection of the target pages outside the mapping. */
+ prot_old = 0;
+ for (abi_ulong a = real_start; a < start; a += TARGET_PAGE_SIZE) {
+ prot_old |= page_get_flags(a);
+ }
+ for (abi_ulong a = real_last; a > last; a -= TARGET_PAGE_SIZE) {
+ prot_old |= page_get_flags(a);
}
- if (prot1 == 0) {
- /* no page was there, so we allocate one */
- void *p = mmap(host_start, qemu_host_page_size, prot,
+ if (prot_old == 0) {
+ /*
+ * Since !(prot_old & PAGE_VALID), there were no guest pages
+ * outside of the fragment we need to map. Allocate a new host
+ * page to cover, discarding whatever else may have been present.
+ */
+ void *p = mmap(host_start, qemu_host_page_size,
+ target_to_host_prot(prot),
flags | MAP_ANONYMOUS, -1, 0);
- if (p == MAP_FAILED)
- return -1;
- prot1 = prot;
- }
- prot1 &= PAGE_BITS;
-
- prot_new = prot | prot1;
- if (!(flags & MAP_ANONYMOUS)) {
- /* msync() won't work here, so we return an error if write is
- possible while it is a shared mapping */
- if ((flags & MAP_TYPE) == MAP_SHARED &&
- (prot & PROT_WRITE))
- return -1;
-
- /* adjust protection to be able to read */
- if (!(prot1 & PROT_WRITE))
- mprotect(host_start, qemu_host_page_size, prot1 | PROT_WRITE);
-
- /* read the corresponding file data */
- if (pread(fd, g2h_untagged(start), end - start, offset) == -1)
- return -1;
-
- /* put final protection */
- if (prot_new != (prot1 | PROT_WRITE))
- mprotect(host_start, qemu_host_page_size, prot_new);
- } else {
- if (prot_new != prot1) {
- mprotect(host_start, qemu_host_page_size, prot_new);
+ if (p != host_start) {
+ if (p != MAP_FAILED) {
+ munmap(p, qemu_host_page_size);
+ errno = EEXIST;
+ }
+ return false;
}
- if (prot_new & PROT_WRITE) {
- memset(g2h_untagged(start), 0, end - start);
+ prot_old = prot;
+ }
+ prot_new = prot | prot_old;
+
+ host_prot_old = target_to_host_prot(prot_old);
+ host_prot_new = target_to_host_prot(prot_new);
+
+ /* Adjust protection to be able to write. */
+ if (!(host_prot_old & PROT_WRITE)) {
+ host_prot_old |= PROT_WRITE;
+ mprotect(host_start, qemu_host_page_size, host_prot_old);
+ }
+
+ /* Read or zero the new guest pages. */
+ if (flags & MAP_ANONYMOUS) {
+ memset(g2h_untagged(start), 0, last - start + 1);
+ } else {
+ if (pread(fd, g2h_untagged(start), last - start + 1, offset) == -1) {
+ return false;
}
}
- return 0;
-}
-#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
-#ifdef TARGET_AARCH64
-# define TASK_UNMAPPED_BASE 0x5500000000
-#else
-# define TASK_UNMAPPED_BASE (1ul << 38)
-#endif
-#else
-# define TASK_UNMAPPED_BASE 0x40000000
-#endif
-abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
+ /* Put final protection */
+ if (host_prot_new != host_prot_old) {
+ mprotect(host_start, qemu_host_page_size, host_prot_new);
+ }
+ return true;
+}
-unsigned long last_brk;
+abi_ulong task_unmapped_base;
+abi_ulong elf_et_dyn_base;
+abi_ulong mmap_next_start;
-/* Subroutine of mmap_find_vma, used when we have pre-allocated a chunk
- of guest address space. */
+/*
+ * Subroutine of mmap_find_vma, used when we have pre-allocated
+ * a chunk of guest address space.
+ */
static abi_ulong mmap_find_vma_reserved(abi_ulong start, abi_ulong size,
abi_ulong align)
{
- abi_ulong addr, end_addr, incr = qemu_host_page_size;
- int prot;
- bool looped = false;
-
- if (size > reserved_va) {
- return (abi_ulong)-1;
- }
+ target_ulong ret;
- /* Note that start and size have already been aligned by mmap_find_vma. */
-
- end_addr = start + size;
- if (start > reserved_va - size) {
- /* Start at the top of the address space. */
- end_addr = ((reserved_va - size) & -align) + size;
- looped = true;
+ ret = page_find_range_empty(start, reserved_va, size, align);
+ if (ret == -1 && start > mmap_min_addr) {
+ /* Restart at the beginning of the address space. */
+ ret = page_find_range_empty(mmap_min_addr, start - 1, size, align);
}
- /* Search downward from END_ADDR, checking to see if a page is in use. */
- addr = end_addr;
- while (1) {
- addr -= incr;
- if (addr > end_addr) {
- if (looped) {
- /* Failure. The entire address space has been searched. */
- return (abi_ulong)-1;
- }
- /* Re-start at the top of the address space. */
- addr = end_addr = ((reserved_va - size) & -align) + size;
- looped = true;
- } else {
- prot = page_get_flags(addr);
- if (prot) {
- /* Page in use. Restart below this page. */
- addr = end_addr = ((addr - size) & -align) + size;
- } else if (addr && addr + size == end_addr) {
- /* Success! All pages between ADDR and END_ADDR are free. */
- if (start == mmap_next_start) {
- mmap_next_start = addr;
- }
- return addr;
- }
- }
- }
+ return ret;
}
/*
* - shmat() with SHM_REMAP flag
*/
ptr = mmap(g2h_untagged(addr), size, PROT_NONE,
- MAP_ANONYMOUS|MAP_PRIVATE|MAP_NORESERVE, -1, 0);
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
/* ENOMEM, if host address space has no memory */
if (ptr == MAP_FAILED) {
return (abi_ulong)-1;
}
- /* Count the number of sequential returns of the same address.
- This is used to modify the search algorithm below. */
+ /*
+ * Count the number of sequential returns of the same address.
+ * This is used to modify the search algorithm below.
+ */
repeat = (ptr == prev ? repeat + 1 : 0);
if (h2g_valid(ptr + size - 1)) {
if ((addr & (align - 1)) == 0) {
/* Success. */
- if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
+ if (start == mmap_next_start && addr >= task_unmapped_base) {
mmap_next_start = addr + size;
}
return addr;
/* The address is not properly aligned for the target. */
switch (repeat) {
case 0:
- /* Assume the result that the kernel gave us is the
- first with enough free space, so start again at the
- next higher target page. */
+ /*
+ * Assume the result that the kernel gave us is the
+ * first with enough free space, so start again at the
+ * next higher target page.
+ */
addr = ROUND_UP(addr, align);
break;
case 1:
- /* Sometimes the kernel decides to perform the allocation
- at the top end of memory instead. */
+ /*
+ * Sometimes the kernel decides to perform the allocation
+ * at the top end of memory instead.
+ */
addr &= -align;
break;
case 2:
break;
}
} else {
- /* Since the result the kernel gave didn't fit, start
- again at low memory. If any repetition, fail. */
+ /*
+ * Since the result the kernel gave didn't fit, start
+ * again at low memory. If any repetition, fail.
+ */
addr = (repeat ? -1 : 0);
}
return (abi_ulong)-1;
}
wrapped = 1;
- /* Don't actually use 0 when wrapping, instead indicate
- that we'd truly like an allocation in low memory. */
+ /*
+ * Don't actually use 0 when wrapping, instead indicate
+ * that we'd truly like an allocation in low memory.
+ */
addr = (mmap_min_addr > TARGET_PAGE_SIZE
? TARGET_PAGE_ALIGN(mmap_min_addr)
: TARGET_PAGE_SIZE);
/* NOTE: all the constants are the HOST ones */
abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
- int flags, int fd, abi_ulong offset)
+ int flags, int fd, off_t offset)
{
- abi_ulong ret, end, real_start, real_end, retaddr, host_offset, host_len;
- int page_flags, host_prot;
+ abi_ulong ret, last, real_start, real_last, retaddr, host_len;
+ abi_ulong passthrough_start = -1, passthrough_last = 0;
+ int page_flags;
+ off_t host_offset;
mmap_lock();
trace_target_mmap(start, len, target_prot, flags, fd, offset);
goto fail;
}
- page_flags = validate_prot_to_pageflags(&host_prot, target_prot);
+ page_flags = validate_prot_to_pageflags(target_prot);
if (!page_flags) {
errno = EINVAL;
goto fail;
real_start = start & qemu_host_page_mask;
host_offset = offset & qemu_host_page_mask;
- /* If the user is asking for the kernel to find a location, do that
- before we truncate the length for mapping files below. */
- if (!(flags & MAP_FIXED)) {
+ /*
+ * If the user is asking for the kernel to find a location, do that
+ * before we truncate the length for mapping files below.
+ */
+ if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
host_len = len + offset - host_offset;
host_len = HOST_PAGE_ALIGN(host_len);
start = mmap_find_vma(real_start, host_len, TARGET_PAGE_SIZE);
}
}
- /* When mapping files into a memory area larger than the file, accesses
- to pages beyond the file size will cause a SIGBUS.
-
- For example, if mmaping a file of 100 bytes on a host with 4K pages
- emulating a target with 8K pages, the target expects to be able to
- access the first 8K. But the host will trap us on any access beyond
- 4K.
-
- When emulating a target with a larger page-size than the hosts, we
- may need to truncate file maps at EOF and add extra anonymous pages
- up to the targets page boundary. */
-
+ /*
+ * When mapping files into a memory area larger than the file, accesses
+ * to pages beyond the file size will cause a SIGBUS.
+ *
+ * For example, if mmaping a file of 100 bytes on a host with 4K pages
+ * emulating a target with 8K pages, the target expects to be able to
+ * access the first 8K. But the host will trap us on any access beyond
+ * 4K.
+ *
+ * When emulating a target with a larger page-size than the hosts, we
+ * may need to truncate file maps at EOF and add extra anonymous pages
+ * up to the targets page boundary.
+ */
if ((qemu_real_host_page_size() < qemu_host_page_size) &&
!(flags & MAP_ANONYMOUS)) {
struct stat sb;
- if (fstat (fd, &sb) == -1)
- goto fail;
+ if (fstat(fd, &sb) == -1) {
+ goto fail;
+ }
- /* Are we trying to create a map beyond EOF?. */
- if (offset + len > sb.st_size) {
- /* If so, truncate the file map at eof aligned with
- the hosts real pagesize. Additional anonymous maps
- will be created beyond EOF. */
- len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
- }
+ /* Are we trying to create a map beyond EOF?. */
+ if (offset + len > sb.st_size) {
+ /*
+ * If so, truncate the file map at eof aligned with
+ * the hosts real pagesize. Additional anonymous maps
+ * will be created beyond EOF.
+ */
+ len = REAL_HOST_PAGE_ALIGN(sb.st_size - offset);
+ }
}
- if (!(flags & MAP_FIXED)) {
- unsigned long host_start;
+ if (!(flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))) {
+ uintptr_t host_start;
+ int host_prot;
void *p;
host_len = len + offset - host_offset;
host_len = HOST_PAGE_ALIGN(host_len);
+ host_prot = target_to_host_prot(target_prot);
- /* Note: we prefer to control the mapping address. It is
- especially important if qemu_host_page_size >
- qemu_real_host_page_size */
+ /*
+ * Note: we prefer to control the mapping address. It is
+ * especially important if qemu_host_page_size >
+ * qemu_real_host_page_size.
+ */
p = mmap(g2h_untagged(start), host_len, host_prot,
flags | MAP_FIXED | MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED) {
goto fail;
}
/* update start so that it points to the file position at 'offset' */
- host_start = (unsigned long)p;
+ host_start = (uintptr_t)p;
if (!(flags & MAP_ANONYMOUS)) {
p = mmap(g2h_untagged(start), len, host_prot,
flags | MAP_FIXED, fd, host_offset);
host_start += offset - host_offset;
}
start = h2g(host_start);
+ last = start + len - 1;
+ passthrough_start = start;
+ passthrough_last = last;
} else {
if (start & ~TARGET_PAGE_MASK) {
errno = EINVAL;
goto fail;
}
- end = start + len;
- real_end = HOST_PAGE_ALIGN(end);
+ last = start + len - 1;
+ real_last = HOST_PAGE_ALIGN(last) - 1;
/*
* Test if requested memory area fits target address space
* It can fail only on 64-bit host with 32-bit target.
* On any other target/host host mmap() handles this error correctly.
*/
- if (end < start || !guest_range_valid_untagged(start, len)) {
+ if (last < start || !guest_range_valid_untagged(start, len)) {
errno = ENOMEM;
goto fail;
}
- /* worst case: we cannot map the file because the offset is not
- aligned, so we read it */
+ if (flags & MAP_FIXED_NOREPLACE) {
+ /* Validate that the chosen range is empty. */
+ if (!page_check_range_empty(start, last)) {
+ errno = EEXIST;
+ goto fail;
+ }
+
+ /*
+ * With reserved_va, the entire address space is mmaped in the
+ * host to ensure it isn't accidentally used for something else.
+ * We have just checked that the guest address is not mapped
+ * within the guest, but need to replace the host reservation.
+ *
+ * Without reserved_va, despite the guest address check above,
+ * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
+ * any host address mappings.
+ */
+ if (reserved_va) {
+ flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
+ }
+ }
+
+ /*
+ * worst case: we cannot map the file because the offset is not
+ * aligned, so we read it
+ */
if (!(flags & MAP_ANONYMOUS) &&
(offset & ~qemu_host_page_mask) != (start & ~qemu_host_page_mask)) {
- /* msync() won't work here, so we return an error if write is
- possible while it is a shared mapping */
- if ((flags & MAP_TYPE) == MAP_SHARED &&
- (host_prot & PROT_WRITE)) {
+ /*
+ * msync() won't work here, so we return an error if write is
+ * possible while it is a shared mapping
+ */
+ if ((flags & MAP_TYPE) == MAP_SHARED
+ && (target_prot & PROT_WRITE)) {
errno = EINVAL;
goto fail;
}
retaddr = target_mmap(start, len, target_prot | PROT_WRITE,
- MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS,
+ (flags & (MAP_FIXED | MAP_FIXED_NOREPLACE))
+ | MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
- if (retaddr == -1)
+ if (retaddr == -1) {
goto fail;
- if (pread(fd, g2h_untagged(start), len, offset) == -1)
+ }
+ if (pread(fd, g2h_untagged(start), len, offset) == -1) {
goto fail;
- if (!(host_prot & PROT_WRITE)) {
+ }
+ if (!(target_prot & PROT_WRITE)) {
ret = target_mprotect(start, len, target_prot);
assert(ret == 0);
}
goto the_end;
}
-
+
/* handle the start of the mapping */
if (start > real_start) {
- if (real_end == real_start + qemu_host_page_size) {
+ if (real_last == real_start + qemu_host_page_size - 1) {
/* one single host page */
- ret = mmap_frag(real_start, start, end,
- host_prot, flags, fd, offset);
- if (ret == -1)
+ if (!mmap_frag(real_start, start, last,
+ target_prot, flags, fd, offset)) {
goto fail;
+ }
goto the_end1;
}
- ret = mmap_frag(real_start, start, real_start + qemu_host_page_size,
- host_prot, flags, fd, offset);
- if (ret == -1)
+ if (!mmap_frag(real_start, start,
+ real_start + qemu_host_page_size - 1,
+ target_prot, flags, fd, offset)) {
goto fail;
+ }
real_start += qemu_host_page_size;
}
/* handle the end of the mapping */
- if (end < real_end) {
- ret = mmap_frag(real_end - qemu_host_page_size,
- real_end - qemu_host_page_size, end,
- host_prot, flags, fd,
- offset + real_end - qemu_host_page_size - start);
- if (ret == -1)
+ if (last < real_last) {
+ abi_ulong real_page = real_last - qemu_host_page_size + 1;
+ if (!mmap_frag(real_page, real_page, last,
+ target_prot, flags, fd,
+ offset + real_page - start)) {
goto fail;
- real_end -= qemu_host_page_size;
+ }
+ real_last -= qemu_host_page_size;
}
/* map the middle (easier) */
- if (real_start < real_end) {
- void *p;
- unsigned long offset1;
- if (flags & MAP_ANONYMOUS)
+ if (real_start < real_last) {
+ void *p, *want_p;
+ off_t offset1;
+ size_t len1;
+
+ if (flags & MAP_ANONYMOUS) {
offset1 = 0;
- else
+ } else {
offset1 = offset + real_start - start;
- p = mmap(g2h_untagged(real_start), real_end - real_start,
- host_prot, flags, fd, offset1);
- if (p == MAP_FAILED)
+ }
+ len1 = real_last - real_start + 1;
+ want_p = g2h_untagged(real_start);
+
+ p = mmap(want_p, len1, target_to_host_prot(target_prot),
+ flags, fd, offset1);
+ if (p != want_p) {
+ if (p != MAP_FAILED) {
+ munmap(p, len1);
+ errno = EEXIST;
+ }
goto fail;
+ }
+ passthrough_start = real_start;
+ passthrough_last = real_last;
}
}
the_end1:
page_flags |= PAGE_ANON;
}
page_flags |= PAGE_RESET;
- page_set_flags(start, start + len, page_flags);
+ if (passthrough_start > passthrough_last) {
+ page_set_flags(start, last, page_flags);
+ } else {
+ if (start < passthrough_start) {
+ page_set_flags(start, passthrough_start - 1, page_flags);
+ }
+ page_set_flags(passthrough_start, passthrough_last,
+ page_flags | PAGE_PASSTHROUGH);
+ if (passthrough_last < last) {
+ page_set_flags(passthrough_last + 1, last, page_flags);
+ }
+ }
+ shm_region_rm_complete(start, last);
the_end:
trace_target_mmap_complete(start);
if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
qemu_log_unlock(f);
}
}
- tb_invalidate_phys_range(start, start + len);
mmap_unlock();
return start;
fail:
return -1;
}
-static void mmap_reserve(abi_ulong start, abi_ulong size)
+static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
{
abi_ulong real_start;
- abi_ulong real_end;
- abi_ulong addr;
- abi_ulong end;
+ abi_ulong real_last;
+ abi_ulong real_len;
+ abi_ulong last;
+ abi_ulong a;
+ void *host_start;
int prot;
+ last = start + len - 1;
real_start = start & qemu_host_page_mask;
- real_end = HOST_PAGE_ALIGN(start + size);
- end = start + size;
- if (start > real_start) {
- /* handle host page containing start */
+ real_last = HOST_PAGE_ALIGN(last) - 1;
+
+ /*
+ * If guest pages remain on the first or last host pages,
+ * adjust the deallocation to retain those guest pages.
+ * The single page special case is required for the last page,
+ * lest real_start overflow to zero.
+ */
+ if (real_last - real_start < qemu_host_page_size) {
prot = 0;
- for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
+ for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a);
}
- if (real_end == real_start + qemu_host_page_size) {
- for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- end = real_end;
+ for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a + 1);
+ }
+ if (prot != 0) {
+ return 0;
}
- if (prot != 0)
+ } else {
+ for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a);
+ }
+ if (prot != 0) {
real_start += qemu_host_page_size;
- }
- if (end < real_end) {
- prot = 0;
- for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
}
- if (prot != 0)
- real_end -= qemu_host_page_size;
+
+ for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a + 1);
+ }
+ if (prot != 0) {
+ real_last -= qemu_host_page_size;
+ }
+
+ if (real_last < real_start) {
+ return 0;
+ }
}
- if (real_start != real_end) {
- mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
- MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
- -1, 0);
+
+ real_len = real_last - real_start + 1;
+ host_start = g2h_untagged(real_start);
+
+ if (reserved_va) {
+ void *ptr = mmap(host_start, real_len, PROT_NONE,
+ MAP_FIXED | MAP_ANONYMOUS
+ | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
+ return ptr == host_start ? 0 : -1;
}
+ return munmap(host_start, real_len);
}
int target_munmap(abi_ulong start, abi_ulong len)
{
- abi_ulong end, real_start, real_end, addr;
- int prot, ret;
+ int ret;
trace_target_munmap(start, len);
- if (start & ~TARGET_PAGE_MASK)
- return -TARGET_EINVAL;
+ if (start & ~TARGET_PAGE_MASK) {
+ errno = EINVAL;
+ return -1;
+ }
len = TARGET_PAGE_ALIGN(len);
if (len == 0 || !guest_range_valid_untagged(start, len)) {
- return -TARGET_EINVAL;
+ errno = EINVAL;
+ return -1;
}
mmap_lock();
- end = start + len;
- real_start = start & qemu_host_page_mask;
- real_end = HOST_PAGE_ALIGN(end);
-
- if (start > real_start) {
- /* handle host page containing start */
- prot = 0;
- for(addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- if (real_end == real_start + qemu_host_page_size) {
- for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- end = real_end;
- }
- if (prot != 0)
- real_start += qemu_host_page_size;
- }
- if (end < real_end) {
- prot = 0;
- for(addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- if (prot != 0)
- real_end -= qemu_host_page_size;
- }
-
- ret = 0;
- /* unmap what we can */
- if (real_start < real_end) {
- if (reserved_va) {
- mmap_reserve(real_start, real_end - real_start);
- } else {
- ret = munmap(g2h_untagged(real_start), real_end - real_start);
- }
- }
-
- if (ret == 0) {
- page_set_flags(start, start + len, 0);
- tb_invalidate_phys_range(start, start + len);
+ ret = mmap_reserve_or_unmap(start, len);
+ if (likely(ret == 0)) {
+ page_set_flags(start, start + len - 1, 0);
+ shm_region_rm_complete(start, start + len - 1);
}
mmap_unlock();
+
return ret;
}
flags, g2h_untagged(new_addr));
if (reserved_va && host_addr != MAP_FAILED) {
- /* If new and old addresses overlap then the above mremap will
- already have failed with EINVAL. */
- mmap_reserve(old_addr, old_size);
+ /*
+ * If new and old addresses overlap then the above mremap will
+ * already have failed with EINVAL.
+ */
+ mmap_reserve_or_unmap(old_addr, old_size);
}
} else if (flags & MREMAP_MAYMOVE) {
abi_ulong mmap_start;
flags | MREMAP_FIXED,
g2h_untagged(mmap_start));
if (reserved_va) {
- mmap_reserve(old_addr, old_size);
+ mmap_reserve_or_unmap(old_addr, old_size);
}
}
} else {
- int prot = 0;
+ int page_flags = 0;
if (reserved_va && old_size < new_size) {
abi_ulong addr;
for (addr = old_addr + old_size;
addr < old_addr + new_size;
addr++) {
- prot |= page_get_flags(addr);
+ page_flags |= page_get_flags(addr);
}
}
- if (prot == 0) {
+ if (page_flags == 0) {
host_addr = mremap(g2h_untagged(old_addr),
old_size, new_size, flags);
errno = ENOMEM;
host_addr = MAP_FAILED;
} else if (reserved_va && old_size > new_size) {
- mmap_reserve(old_addr + old_size, old_size - new_size);
+ mmap_reserve_or_unmap(old_addr + old_size,
+ old_size - new_size);
}
}
} else {
} else {
new_addr = h2g(host_addr);
prot = page_get_flags(old_addr);
- page_set_flags(old_addr, old_addr + old_size, 0);
- page_set_flags(new_addr, new_addr + new_size,
+ page_set_flags(old_addr, old_addr + old_size - 1, 0);
+ shm_region_rm_complete(old_addr, old_addr + old_size - 1);
+ page_set_flags(new_addr, new_addr + new_size - 1,
prot | PAGE_VALID | PAGE_RESET);
+ shm_region_rm_complete(new_addr, new_addr + new_size - 1);
}
- tb_invalidate_phys_range(new_addr, new_addr + new_size);
mmap_unlock();
return new_addr;
}
-static bool can_passthrough_madv_dontneed(abi_ulong start, abi_ulong end)
+abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
{
- ulong addr;
+ abi_ulong len;
+ int ret = 0;
- if ((start | end) & ~qemu_host_page_mask) {
- return false;
+ if (start & ~TARGET_PAGE_MASK) {
+ return -TARGET_EINVAL;
+ }
+ if (len_in == 0) {
+ return 0;
+ }
+ len = TARGET_PAGE_ALIGN(len_in);
+ if (len == 0 || !guest_range_valid_untagged(start, len)) {
+ return -TARGET_EINVAL;
}
- for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
- if (!(page_get_flags(addr) & PAGE_ANON)) {
- return false;
+ /* Translate for some architectures which have different MADV_xxx values */
+ switch (advice) {
+ case TARGET_MADV_DONTNEED: /* alpha */
+ advice = MADV_DONTNEED;
+ break;
+ case TARGET_MADV_WIPEONFORK: /* parisc */
+ advice = MADV_WIPEONFORK;
+ break;
+ case TARGET_MADV_KEEPONFORK: /* parisc */
+ advice = MADV_KEEPONFORK;
+ break;
+ /* we do not care about the other MADV_xxx values yet */
+ }
+
+ /*
+ * Most advice values are hints, so ignoring and returning success is ok.
+ *
+ * However, some advice values such as MADV_DONTNEED, MADV_WIPEONFORK and
+ * MADV_KEEPONFORK are not hints and need to be emulated.
+ *
+ * A straight passthrough for those may not be safe because qemu sometimes
+ * turns private file-backed mappings into anonymous mappings.
+ * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
+ * same semantics for the host as for the guest.
+ *
+ * We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
+ * return failure if not.
+ *
+ * MADV_DONTNEED is passed through as well, if possible.
+ * If passthrough isn't possible, we nevertheless (wrongly!) return
+ * success, which is broken but some userspace programs fail to work
+ * otherwise. Completely implementing such emulation is quite complicated
+ * though.
+ */
+ mmap_lock();
+ switch (advice) {
+ case MADV_WIPEONFORK:
+ case MADV_KEEPONFORK:
+ ret = -EINVAL;
+ /* fall through */
+ case MADV_DONTNEED:
+ if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
+ ret = get_errno(madvise(g2h_untagged(start), len, advice));
+ if ((advice == MADV_DONTNEED) && (ret == 0)) {
+ page_reset_target_data(start, start + len - 1);
+ }
}
}
+ mmap_unlock();
- return true;
+ return ret;
}
-abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
+#ifndef TARGET_FORCE_SHMLBA
+/*
+ * For most architectures, SHMLBA is the same as the page size;
+ * some architectures have larger values, in which case they should
+ * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
+ * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
+ * and defining its own value for SHMLBA.
+ *
+ * The kernel also permits SHMLBA to be set by the architecture to a
+ * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
+ * this means that addresses are rounded to the large size if
+ * SHM_RND is set but addresses not aligned to that size are not rejected
+ * as long as they are at least page-aligned. Since the only architecture
+ * which uses this is ia64 this code doesn't provide for that oddity.
+ */
+static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
{
- abi_ulong len, end;
- int ret = 0;
+ return TARGET_PAGE_SIZE;
+}
+#endif
- if (start & ~TARGET_PAGE_MASK) {
- return -TARGET_EINVAL;
+abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
+ abi_ulong shmaddr, int shmflg)
+{
+ CPUState *cpu = env_cpu(cpu_env);
+ abi_ulong raddr;
+ struct shmid_ds shm_info;
+ int ret;
+ abi_ulong shmlba;
+
+ /* shmat pointers are always untagged */
+
+ /* find out the length of the shared memory segment */
+ ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
+ if (is_error(ret)) {
+ /* can't get length, bail out */
+ return ret;
}
- len = TARGET_PAGE_ALIGN(len_in);
- if (len_in && !len) {
- return -TARGET_EINVAL;
- }
+ shmlba = target_shmlba(cpu_env);
- end = start + len;
- if (end < start) {
+ if (shmaddr & (shmlba - 1)) {
+ if (shmflg & SHM_RND) {
+ shmaddr &= ~(shmlba - 1);
+ } else {
+ return -TARGET_EINVAL;
+ }
+ }
+ if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
return -TARGET_EINVAL;
}
- if (end == start) {
- return 0;
- }
+ WITH_MMAP_LOCK_GUARD() {
+ void *host_raddr;
+ abi_ulong last;
- if (!guest_range_valid_untagged(start, len)) {
- return -TARGET_EINVAL;
+ if (shmaddr) {
+ host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
+ } else {
+ abi_ulong mmap_start;
+
+ /* In order to use the host shmat, we need to honor host SHMLBA. */
+ mmap_start = mmap_find_vma(0, shm_info.shm_segsz,
+ MAX(SHMLBA, shmlba));
+
+ if (mmap_start == -1) {
+ return -TARGET_ENOMEM;
+ }
+ host_raddr = shmat(shmid, g2h_untagged(mmap_start),
+ shmflg | SHM_REMAP);
+ }
+
+ if (host_raddr == (void *)-1) {
+ return get_errno(-1);
+ }
+ raddr = h2g(host_raddr);
+ last = raddr + shm_info.shm_segsz - 1;
+
+ page_set_flags(raddr, last,
+ PAGE_VALID | PAGE_RESET | PAGE_READ |
+ (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
+
+ shm_region_rm_complete(raddr, last);
+ shm_region_add(raddr, last);
}
/*
- * A straight passthrough may not be safe because qemu sometimes turns
- * private file-backed mappings into anonymous mappings.
- *
- * This is a hint, so ignoring and returning success is ok.
- *
- * This breaks MADV_DONTNEED, completely implementing which is quite
- * complicated. However, there is one low-hanging fruit: host-page-aligned
- * anonymous mappings. In this case passthrough is safe, so do it.
+ * We're mapping shared memory, so ensure we generate code for parallel
+ * execution and flush old translations. This will work up to the level
+ * supported by the host -- anything that requires EXCP_ATOMIC will not
+ * be atomic with respect to an external process.
*/
- mmap_lock();
- if (advice == MADV_DONTNEED &&
- can_passthrough_madv_dontneed(start, end)) {
- ret = get_errno(madvise(g2h_untagged(start), len, MADV_DONTNEED));
- if (ret == 0) {
- page_reset_target_data(start, start + len);
- }
+ if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+ cpu->tcg_cflags |= CF_PARALLEL;
+ tb_flush(cpu);
}
- mmap_unlock();
- return ret;
+ return raddr;
+}
+
+abi_long target_shmdt(abi_ulong shmaddr)
+{
+ abi_long rv;
+
+ /* shmdt pointers are always untagged */
+
+ WITH_MMAP_LOCK_GUARD() {
+ abi_ulong last = shm_region_find(shmaddr);
+ if (last == 0) {
+ return -TARGET_EINVAL;
+ }
+
+ rv = get_errno(shmdt(g2h_untagged(shmaddr)));
+ if (rv == 0) {
+ abi_ulong size = last - shmaddr + 1;
+
+ page_set_flags(shmaddr, last, 0);
+ shm_region_rm_complete(shmaddr, last);
+ mmap_reserve_or_unmap(shmaddr, size);
+ }
+ }
+ return rv;
}