* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
#include "qemu/osdep.h"
+#include <sys/shm.h>
#include "trace.h"
#include "exec/log.h"
#include "qemu.h"
#include "user-internals.h"
#include "user-mmap.h"
#include "target_mman.h"
+#include "qemu/interval-tree.h"
+
+#ifdef TARGET_ARM
+#include "target/arm/cpu-features.h"
+#endif
static pthread_mutex_t mmap_mutex = PTHREAD_MUTEX_INITIALIZER;
static __thread int mmap_lock_count;
void mmap_unlock(void)
{
+ assert(mmap_lock_count > 0);
if (--mmap_lock_count == 0) {
pthread_mutex_unlock(&mmap_mutex);
}
}
}
+/* Protected by mmap_lock. */
+static IntervalTreeRoot shm_regions;
+
+static void shm_region_add(abi_ptr start, abi_ptr last)
+{
+ IntervalTreeNode *i = g_new0(IntervalTreeNode, 1);
+
+ i->start = start;
+ i->last = last;
+ interval_tree_insert(i, &shm_regions);
+}
+
+static abi_ptr shm_region_find(abi_ptr start)
+{
+ IntervalTreeNode *i;
+
+ for (i = interval_tree_iter_first(&shm_regions, start, start); i;
+ i = interval_tree_iter_next(i, start, start)) {
+ if (i->start == start) {
+ return i->last;
+ }
+ }
+ return 0;
+}
+
+static void shm_region_rm_complete(abi_ptr start, abi_ptr last)
+{
+ IntervalTreeNode *i, *n;
+
+ for (i = interval_tree_iter_first(&shm_regions, start, last); i; i = n) {
+ n = interval_tree_iter_next(i, start, last);
+ if (i->start >= start && i->last <= last) {
+ interval_tree_remove(i, &shm_regions);
+ g_free(i);
+ }
+ }
+}
+
/*
* Validate target prot bitmask.
* Return the prot bitmask for the host in *HOST_PROT.
void *p = mmap(host_start, qemu_host_page_size,
target_to_host_prot(prot),
flags | MAP_ANONYMOUS, -1, 0);
- if (p == MAP_FAILED) {
+ if (p != host_start) {
+ if (p != MAP_FAILED) {
+ munmap(p, qemu_host_page_size);
+ errno = EEXIST;
+ }
return false;
}
prot_old = prot;
return true;
}
-#if HOST_LONG_BITS == 64 && TARGET_ABI_BITS == 64
-#ifdef TARGET_AARCH64
-# define TASK_UNMAPPED_BASE 0x5500000000
-#else
-# define TASK_UNMAPPED_BASE (1ul << 38)
-#endif
-#else
-#ifdef TARGET_HPPA
-# define TASK_UNMAPPED_BASE 0xfa000000
-#else
-# define TASK_UNMAPPED_BASE 0x40000000
-#endif
-#endif
-abi_ulong mmap_next_start = TASK_UNMAPPED_BASE;
-
-unsigned long last_brk;
+abi_ulong task_unmapped_base;
+abi_ulong elf_et_dyn_base;
+abi_ulong mmap_next_start;
/*
* Subroutine of mmap_find_vma, used when we have pre-allocated
if ((addr & (align - 1)) == 0) {
/* Success. */
- if (start == mmap_next_start && addr >= TASK_UNMAPPED_BASE) {
+ if (start == mmap_next_start && addr >= task_unmapped_base) {
mmap_next_start = addr + size;
}
return addr;
abi_long target_mmap(abi_ulong start, abi_ulong len, int target_prot,
int flags, int fd, off_t offset)
{
- abi_ulong ret, end, real_start, real_end, retaddr, host_len,
- passthrough_start = -1, passthrough_end = -1;
+ abi_ulong ret, last, real_start, real_last, retaddr, host_len;
+ abi_ulong passthrough_start = -1, passthrough_last = 0;
int page_flags;
off_t host_offset;
host_start += offset - host_offset;
}
start = h2g(host_start);
+ last = start + len - 1;
passthrough_start = start;
- passthrough_end = start + len;
+ passthrough_last = last;
} else {
if (start & ~TARGET_PAGE_MASK) {
errno = EINVAL;
goto fail;
}
- end = start + len;
- real_end = HOST_PAGE_ALIGN(end);
+ last = start + len - 1;
+ real_last = HOST_PAGE_ALIGN(last) - 1;
/*
* Test if requested memory area fits target address space
* It can fail only on 64-bit host with 32-bit target.
* On any other target/host host mmap() handles this error correctly.
*/
- if (end < start || !guest_range_valid_untagged(start, len)) {
+ if (last < start || !guest_range_valid_untagged(start, len)) {
errno = ENOMEM;
goto fail;
}
- /* Validate that the chosen range is empty. */
- if ((flags & MAP_FIXED_NOREPLACE)
- && !page_check_range_empty(start, end - 1)) {
- errno = EEXIST;
- goto fail;
+ if (flags & MAP_FIXED_NOREPLACE) {
+ /* Validate that the chosen range is empty. */
+ if (!page_check_range_empty(start, last)) {
+ errno = EEXIST;
+ goto fail;
+ }
+
+ /*
+ * With reserved_va, the entire address space is mmaped in the
+ * host to ensure it isn't accidentally used for something else.
+ * We have just checked that the guest address is not mapped
+ * within the guest, but need to replace the host reservation.
+ *
+ * Without reserved_va, despite the guest address check above,
+ * keep MAP_FIXED_NOREPLACE so that the guest does not overwrite
+ * any host address mappings.
+ */
+ if (reserved_va) {
+ flags = (flags & ~MAP_FIXED_NOREPLACE) | MAP_FIXED;
+ }
}
/*
/* handle the start of the mapping */
if (start > real_start) {
- if (real_end == real_start + qemu_host_page_size) {
+ if (real_last == real_start + qemu_host_page_size - 1) {
/* one single host page */
- if (!mmap_frag(real_start, start, end - 1,
+ if (!mmap_frag(real_start, start, last,
target_prot, flags, fd, offset)) {
goto fail;
}
real_start += qemu_host_page_size;
}
/* handle the end of the mapping */
- if (end < real_end) {
- if (!mmap_frag(real_end - qemu_host_page_size,
- real_end - qemu_host_page_size, end - 1,
+ if (last < real_last) {
+ abi_ulong real_page = real_last - qemu_host_page_size + 1;
+ if (!mmap_frag(real_page, real_page, last,
target_prot, flags, fd,
- offset + real_end - qemu_host_page_size - start)) {
+ offset + real_page - start)) {
goto fail;
}
- real_end -= qemu_host_page_size;
+ real_last -= qemu_host_page_size;
}
/* map the middle (easier) */
- if (real_start < real_end) {
- void *p;
+ if (real_start < real_last) {
+ void *p, *want_p;
off_t offset1;
+ size_t len1;
if (flags & MAP_ANONYMOUS) {
offset1 = 0;
} else {
offset1 = offset + real_start - start;
}
- p = mmap(g2h_untagged(real_start), real_end - real_start,
- target_to_host_prot(target_prot), flags, fd, offset1);
- if (p == MAP_FAILED) {
+ len1 = real_last - real_start + 1;
+ want_p = g2h_untagged(real_start);
+
+ p = mmap(want_p, len1, target_to_host_prot(target_prot),
+ flags, fd, offset1);
+ if (p != want_p) {
+ if (p != MAP_FAILED) {
+ munmap(p, len1);
+ errno = EEXIST;
+ }
goto fail;
}
passthrough_start = real_start;
- passthrough_end = real_end;
+ passthrough_last = real_last;
}
}
the_end1:
page_flags |= PAGE_ANON;
}
page_flags |= PAGE_RESET;
- if (passthrough_start == passthrough_end) {
- page_set_flags(start, start + len - 1, page_flags);
+ if (passthrough_start > passthrough_last) {
+ page_set_flags(start, last, page_flags);
} else {
if (start < passthrough_start) {
page_set_flags(start, passthrough_start - 1, page_flags);
}
- page_set_flags(passthrough_start, passthrough_end - 1,
+ page_set_flags(passthrough_start, passthrough_last,
page_flags | PAGE_PASSTHROUGH);
- if (passthrough_end < start + len) {
- page_set_flags(passthrough_end, start + len - 1, page_flags);
+ if (passthrough_last < last) {
+ page_set_flags(passthrough_last + 1, last, page_flags);
}
}
+ shm_region_rm_complete(start, last);
the_end:
trace_target_mmap_complete(start);
if (qemu_loglevel_mask(CPU_LOG_PAGE)) {
return -1;
}
-static void mmap_reserve(abi_ulong start, abi_ulong size)
+static int mmap_reserve_or_unmap(abi_ulong start, abi_ulong len)
{
abi_ulong real_start;
- abi_ulong real_end;
- abi_ulong addr;
- abi_ulong end;
+ abi_ulong real_last;
+ abi_ulong real_len;
+ abi_ulong last;
+ abi_ulong a;
+ void *host_start;
int prot;
+ last = start + len - 1;
real_start = start & qemu_host_page_mask;
- real_end = HOST_PAGE_ALIGN(start + size);
- end = start + size;
- if (start > real_start) {
- /* handle host page containing start */
+ real_last = HOST_PAGE_ALIGN(last) - 1;
+
+ /*
+ * If guest pages remain on the first or last host pages,
+ * adjust the deallocation to retain those guest pages.
+ * The single page special case is required for the last page,
+ * lest real_start overflow to zero.
+ */
+ if (real_last - real_start < qemu_host_page_size) {
prot = 0;
- for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
+ for (a = real_start; a < start; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a);
}
- if (real_end == real_start + qemu_host_page_size) {
- for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- end = real_end;
+ for (a = last; a < real_last; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a + 1);
+ }
+ if (prot != 0) {
+ return 0;
+ }
+ } else {
+ for (prot = 0, a = real_start; a < start; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a);
}
if (prot != 0) {
real_start += qemu_host_page_size;
}
- }
- if (end < real_end) {
- prot = 0;
- for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
+
+ for (prot = 0, a = last; a < real_last; a += TARGET_PAGE_SIZE) {
+ prot |= page_get_flags(a + 1);
}
if (prot != 0) {
- real_end -= qemu_host_page_size;
+ real_last -= qemu_host_page_size;
+ }
+
+ if (real_last < real_start) {
+ return 0;
}
}
- if (real_start != real_end) {
- mmap(g2h_untagged(real_start), real_end - real_start, PROT_NONE,
- MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE,
- -1, 0);
+
+ real_len = real_last - real_start + 1;
+ host_start = g2h_untagged(real_start);
+
+ if (reserved_va) {
+ void *ptr = mmap(host_start, real_len, PROT_NONE,
+ MAP_FIXED | MAP_ANONYMOUS
+ | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
+ return ptr == host_start ? 0 : -1;
}
+ return munmap(host_start, real_len);
}
int target_munmap(abi_ulong start, abi_ulong len)
{
- abi_ulong end, real_start, real_end, addr;
- int prot, ret;
+ int ret;
trace_target_munmap(start, len);
if (start & ~TARGET_PAGE_MASK) {
- return -TARGET_EINVAL;
+ errno = EINVAL;
+ return -1;
}
len = TARGET_PAGE_ALIGN(len);
if (len == 0 || !guest_range_valid_untagged(start, len)) {
- return -TARGET_EINVAL;
+ errno = EINVAL;
+ return -1;
}
mmap_lock();
- end = start + len;
- real_start = start & qemu_host_page_mask;
- real_end = HOST_PAGE_ALIGN(end);
-
- if (start > real_start) {
- /* handle host page containing start */
- prot = 0;
- for (addr = real_start; addr < start; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- if (real_end == real_start + qemu_host_page_size) {
- for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- end = real_end;
- }
- if (prot != 0) {
- real_start += qemu_host_page_size;
- }
- }
- if (end < real_end) {
- prot = 0;
- for (addr = end; addr < real_end; addr += TARGET_PAGE_SIZE) {
- prot |= page_get_flags(addr);
- }
- if (prot != 0) {
- real_end -= qemu_host_page_size;
- }
- }
-
- ret = 0;
- /* unmap what we can */
- if (real_start < real_end) {
- if (reserved_va) {
- mmap_reserve(real_start, real_end - real_start);
- } else {
- ret = munmap(g2h_untagged(real_start), real_end - real_start);
- }
- }
-
- if (ret == 0) {
+ ret = mmap_reserve_or_unmap(start, len);
+ if (likely(ret == 0)) {
page_set_flags(start, start + len - 1, 0);
+ shm_region_rm_complete(start, start + len - 1);
}
mmap_unlock();
+
return ret;
}
* If new and old addresses overlap then the above mremap will
* already have failed with EINVAL.
*/
- mmap_reserve(old_addr, old_size);
+ mmap_reserve_or_unmap(old_addr, old_size);
}
} else if (flags & MREMAP_MAYMOVE) {
abi_ulong mmap_start;
flags | MREMAP_FIXED,
g2h_untagged(mmap_start));
if (reserved_va) {
- mmap_reserve(old_addr, old_size);
+ mmap_reserve_or_unmap(old_addr, old_size);
}
}
} else {
- int prot = 0;
+ int page_flags = 0;
if (reserved_va && old_size < new_size) {
abi_ulong addr;
for (addr = old_addr + old_size;
addr < old_addr + new_size;
addr++) {
- prot |= page_get_flags(addr);
+ page_flags |= page_get_flags(addr);
}
}
- if (prot == 0) {
+ if (page_flags == 0) {
host_addr = mremap(g2h_untagged(old_addr),
old_size, new_size, flags);
errno = ENOMEM;
host_addr = MAP_FAILED;
} else if (reserved_va && old_size > new_size) {
- mmap_reserve(old_addr + old_size, old_size - new_size);
+ mmap_reserve_or_unmap(old_addr + old_size,
+ old_size - new_size);
}
}
} else {
new_addr = h2g(host_addr);
prot = page_get_flags(old_addr);
page_set_flags(old_addr, old_addr + old_size - 1, 0);
+ shm_region_rm_complete(old_addr, old_addr + old_size - 1);
page_set_flags(new_addr, new_addr + new_size - 1,
prot | PAGE_VALID | PAGE_RESET);
+ shm_region_rm_complete(new_addr, new_addr + new_size - 1);
}
mmap_unlock();
return new_addr;
}
-static bool can_passthrough_madvise(abi_ulong start, abi_ulong end)
-{
- ulong addr;
-
- if ((start | end) & ~qemu_host_page_mask) {
- return false;
- }
-
- for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
- if (!(page_get_flags(addr) & PAGE_PASSTHROUGH)) {
- return false;
- }
- }
-
- return true;
-}
-
abi_long target_madvise(abi_ulong start, abi_ulong len_in, int advice)
{
- abi_ulong len, end;
+ abi_ulong len;
int ret = 0;
if (start & ~TARGET_PAGE_MASK) {
return -TARGET_EINVAL;
}
- len = TARGET_PAGE_ALIGN(len_in);
-
- if (len_in && !len) {
- return -TARGET_EINVAL;
- }
-
- end = start + len;
- if (end < start) {
- return -TARGET_EINVAL;
- }
-
- if (end == start) {
+ if (len_in == 0) {
return 0;
}
-
- if (!guest_range_valid_untagged(start, len)) {
+ len = TARGET_PAGE_ALIGN(len_in);
+ if (len == 0 || !guest_range_valid_untagged(start, len)) {
return -TARGET_EINVAL;
}
*
* A straight passthrough for those may not be safe because qemu sometimes
* turns private file-backed mappings into anonymous mappings.
- * can_passthrough_madvise() helps to check if a passthrough is possible by
- * comparing mappings that are known to have the same semantics in the host
- * and the guest. In this case passthrough is safe.
+ * If all guest pages have PAGE_PASSTHROUGH set, mappings have the
+ * same semantics for the host as for the guest.
*
* We pass through MADV_WIPEONFORK and MADV_KEEPONFORK if possible and
* return failure if not.
ret = -EINVAL;
/* fall through */
case MADV_DONTNEED:
- if (can_passthrough_madvise(start, end)) {
+ if (page_check_range(start, len, PAGE_PASSTHROUGH)) {
ret = get_errno(madvise(g2h_untagged(start), len, advice));
if ((advice == MADV_DONTNEED) && (ret == 0)) {
page_reset_target_data(start, start + len - 1);
return ret;
}
+
+#ifndef TARGET_FORCE_SHMLBA
+/*
+ * For most architectures, SHMLBA is the same as the page size;
+ * some architectures have larger values, in which case they should
+ * define TARGET_FORCE_SHMLBA and provide a target_shmlba() function.
+ * This corresponds to the kernel arch code defining __ARCH_FORCE_SHMLBA
+ * and defining its own value for SHMLBA.
+ *
+ * The kernel also permits SHMLBA to be set by the architecture to a
+ * value larger than the page size without setting __ARCH_FORCE_SHMLBA;
+ * this means that addresses are rounded to the large size if
+ * SHM_RND is set but addresses not aligned to that size are not rejected
+ * as long as they are at least page-aligned. Since the only architecture
+ * which uses this is ia64 this code doesn't provide for that oddity.
+ */
+static inline abi_ulong target_shmlba(CPUArchState *cpu_env)
+{
+ return TARGET_PAGE_SIZE;
+}
+#endif
+
+abi_ulong target_shmat(CPUArchState *cpu_env, int shmid,
+ abi_ulong shmaddr, int shmflg)
+{
+ CPUState *cpu = env_cpu(cpu_env);
+ abi_ulong raddr;
+ struct shmid_ds shm_info;
+ int ret;
+ abi_ulong shmlba;
+
+ /* shmat pointers are always untagged */
+
+ /* find out the length of the shared memory segment */
+ ret = get_errno(shmctl(shmid, IPC_STAT, &shm_info));
+ if (is_error(ret)) {
+ /* can't get length, bail out */
+ return ret;
+ }
+
+ shmlba = target_shmlba(cpu_env);
+
+ if (shmaddr & (shmlba - 1)) {
+ if (shmflg & SHM_RND) {
+ shmaddr &= ~(shmlba - 1);
+ } else {
+ return -TARGET_EINVAL;
+ }
+ }
+ if (!guest_range_valid_untagged(shmaddr, shm_info.shm_segsz)) {
+ return -TARGET_EINVAL;
+ }
+
+ WITH_MMAP_LOCK_GUARD() {
+ void *host_raddr;
+ abi_ulong last;
+
+ if (shmaddr) {
+ host_raddr = shmat(shmid, (void *)g2h_untagged(shmaddr), shmflg);
+ } else {
+ abi_ulong mmap_start;
+
+ /* In order to use the host shmat, we need to honor host SHMLBA. */
+ mmap_start = mmap_find_vma(0, shm_info.shm_segsz,
+ MAX(SHMLBA, shmlba));
+
+ if (mmap_start == -1) {
+ return -TARGET_ENOMEM;
+ }
+ host_raddr = shmat(shmid, g2h_untagged(mmap_start),
+ shmflg | SHM_REMAP);
+ }
+
+ if (host_raddr == (void *)-1) {
+ return get_errno(-1);
+ }
+ raddr = h2g(host_raddr);
+ last = raddr + shm_info.shm_segsz - 1;
+
+ page_set_flags(raddr, last,
+ PAGE_VALID | PAGE_RESET | PAGE_READ |
+ (shmflg & SHM_RDONLY ? 0 : PAGE_WRITE));
+
+ shm_region_rm_complete(raddr, last);
+ shm_region_add(raddr, last);
+ }
+
+ /*
+ * We're mapping shared memory, so ensure we generate code for parallel
+ * execution and flush old translations. This will work up to the level
+ * supported by the host -- anything that requires EXCP_ATOMIC will not
+ * be atomic with respect to an external process.
+ */
+ if (!(cpu->tcg_cflags & CF_PARALLEL)) {
+ cpu->tcg_cflags |= CF_PARALLEL;
+ tb_flush(cpu);
+ }
+
+ return raddr;
+}
+
+abi_long target_shmdt(abi_ulong shmaddr)
+{
+ abi_long rv;
+
+ /* shmdt pointers are always untagged */
+
+ WITH_MMAP_LOCK_GUARD() {
+ abi_ulong last = shm_region_find(shmaddr);
+ if (last == 0) {
+ return -TARGET_EINVAL;
+ }
+
+ rv = get_errno(shmdt(g2h_untagged(shmaddr)));
+ if (rv == 0) {
+ abi_ulong size = last - shmaddr + 1;
+
+ page_set_flags(shmaddr, last, 0);
+ shm_region_rm_complete(shmaddr, last);
+ mmap_reserve_or_unmap(shmaddr, size);
+ }
+ }
+ return rv;
+}