#include <sys/param.h>
#include <sys/resource.h>
+#include <sys/shm.h>
#include "qemu.h"
#include "disas/disas.h"
#include "qemu/path.h"
+#include "qemu/queue.h"
+#include "qemu/guest-random.h"
+#include "qemu/units.h"
+#include "qemu/selfmap.h"
#ifdef _ARCH_PPC64
#undef ARCH_DLINFO
/* The commpage only exists for 32 bit kernels */
-/* Return 1 if the proposed guest space is suitable for the guest.
- * Return 0 if the proposed guest space isn't suitable, but another
- * address space should be tried.
- * Return -1 if there is no way the proposed guest space can be
- * valid regardless of the base.
- * The guest code may leave a page mapped and populate it if the
- * address is suitable.
- */
-static int init_guest_commpage(unsigned long guest_base,
- unsigned long guest_size)
-{
- unsigned long real_start, test_page_addr;
-
- /* We need to check that we can force a fault on access to the
- * commpage at 0xffff0fxx
- */
- test_page_addr = guest_base + (0xffff0f00 & qemu_host_page_mask);
+#define ARM_COMMPAGE (intptr_t)0xffff0f00u
- /* If the commpage lies within the already allocated guest space,
- * then there is no way we can allocate it.
- *
- * You may be thinking that that this check is redundant because
- * we already validated the guest size against MAX_RESERVED_VA;
- * but if qemu_host_page_mask is unusually large, then
- * test_page_addr may be lower.
- */
- if (test_page_addr >= guest_base
- && test_page_addr < (guest_base + guest_size)) {
- return -1;
- }
-
- /* Note it needs to be writeable to let us initialise it */
- real_start = (unsigned long)
- mmap((void *)test_page_addr, qemu_host_page_size,
- PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+static bool init_guest_commpage(void)
+{
+ void *want = g2h(ARM_COMMPAGE & -qemu_host_page_size);
+ void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
- /* If we can't map it then try another address */
- if (real_start == -1ul) {
- return 0;
+ if (addr == MAP_FAILED) {
+ perror("Allocating guest commpage");
+ exit(EXIT_FAILURE);
}
-
- if (real_start != test_page_addr) {
- /* OS didn't put the page where we asked - unmap and reject */
- munmap((void *)real_start, qemu_host_page_size);
- return 0;
+ if (addr != want) {
+ return false;
}
- /* Leave the page mapped
- * Populate it (mmap should have left it all 0'd)
- */
-
- /* Kernel helper versions */
- __put_user(5, (uint32_t *)g2h(0xffff0ffcul));
+ /* Set kernel helper versions; rest of page is 0. */
+ __put_user(5, (uint32_t *)g2h(0xffff0ffcu));
- /* Now it's populated make it RO */
- if (mprotect((void *)test_page_addr, qemu_host_page_size, PROT_READ)) {
+ if (mprotect(addr, qemu_host_page_size, PROT_READ)) {
perror("Protecting guest commpage");
- exit(-1);
+ exit(EXIT_FAILURE);
}
-
- return 1; /* All good */
+ return true;
}
#define ELF_HWCAP get_elf_hwcap()
/* EDSP is in v5TE and above, but all our v5 CPUs are v5TE */
GET_FEATURE(ARM_FEATURE_V5, ARM_HWCAP_ARM_EDSP);
- GET_FEATURE(ARM_FEATURE_VFP, ARM_HWCAP_ARM_VFP);
GET_FEATURE(ARM_FEATURE_IWMMXT, ARM_HWCAP_ARM_IWMMXT);
GET_FEATURE(ARM_FEATURE_THUMB2EE, ARM_HWCAP_ARM_THUMBEE);
GET_FEATURE(ARM_FEATURE_NEON, ARM_HWCAP_ARM_NEON);
- GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPv3);
GET_FEATURE(ARM_FEATURE_V6K, ARM_HWCAP_ARM_TLS);
- GET_FEATURE(ARM_FEATURE_VFP4, ARM_HWCAP_ARM_VFPv4);
- GET_FEATURE_ID(arm_div, ARM_HWCAP_ARM_IDIVA);
- GET_FEATURE_ID(thumb_div, ARM_HWCAP_ARM_IDIVT);
- /* All QEMU's VFPv3 CPUs have 32 registers, see VFP_DREG in translate.c.
- * Note that the ARM_HWCAP_ARM_VFPv3D16 bit is always the inverse of
- * ARM_HWCAP_ARM_VFPD32 (and so always clear for QEMU); it is unrelated
- * to our VFP_FP16 feature bit.
- */
- GET_FEATURE(ARM_FEATURE_VFP3, ARM_HWCAP_ARM_VFPD32);
GET_FEATURE(ARM_FEATURE_LPAE, ARM_HWCAP_ARM_LPAE);
+ GET_FEATURE_ID(aa32_arm_div, ARM_HWCAP_ARM_IDIVA);
+ GET_FEATURE_ID(aa32_thumb_div, ARM_HWCAP_ARM_IDIVT);
+ GET_FEATURE_ID(aa32_vfp, ARM_HWCAP_ARM_VFP);
+
+ if (cpu_isar_feature(aa32_fpsp_v3, cpu) ||
+ cpu_isar_feature(aa32_fpdp_v3, cpu)) {
+ hwcaps |= ARM_HWCAP_ARM_VFPv3;
+ if (cpu_isar_feature(aa32_simd_r32, cpu)) {
+ hwcaps |= ARM_HWCAP_ARM_VFPD32;
+ } else {
+ hwcaps |= ARM_HWCAP_ARM_VFPv3D16;
+ }
+ }
+ GET_FEATURE_ID(aa32_simdfmac, ARM_HWCAP_ARM_VFPv4);
return hwcaps;
}
ARM_HWCAP_A64_SB = 1 << 29,
ARM_HWCAP_A64_PACA = 1 << 30,
ARM_HWCAP_A64_PACG = 1UL << 31,
+
+ ARM_HWCAP2_A64_DCPODP = 1 << 0,
+ ARM_HWCAP2_A64_SVE2 = 1 << 1,
+ ARM_HWCAP2_A64_SVEAES = 1 << 2,
+ ARM_HWCAP2_A64_SVEPMULL = 1 << 3,
+ ARM_HWCAP2_A64_SVEBITPERM = 1 << 4,
+ ARM_HWCAP2_A64_SVESHA3 = 1 << 5,
+ ARM_HWCAP2_A64_SVESM4 = 1 << 6,
+ ARM_HWCAP2_A64_FLAGM2 = 1 << 7,
+ ARM_HWCAP2_A64_FRINT = 1 << 8,
};
-#define ELF_HWCAP get_elf_hwcap()
+#define ELF_HWCAP get_elf_hwcap()
+#define ELF_HWCAP2 get_elf_hwcap2()
+
+#define GET_FEATURE_ID(feat, hwcap) \
+ do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0)
static uint32_t get_elf_hwcap(void)
{
hwcaps |= ARM_HWCAP_A64_CPUID;
/* probe for the extra features */
-#define GET_FEATURE_ID(feat, hwcap) \
- do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0)
GET_FEATURE_ID(aa64_aes, ARM_HWCAP_A64_AES);
GET_FEATURE_ID(aa64_pmull, ARM_HWCAP_A64_PMULL);
GET_FEATURE_ID(aa64_jscvt, ARM_HWCAP_A64_JSCVT);
GET_FEATURE_ID(aa64_sb, ARM_HWCAP_A64_SB);
GET_FEATURE_ID(aa64_condm_4, ARM_HWCAP_A64_FLAGM);
+ GET_FEATURE_ID(aa64_dcpop, ARM_HWCAP_A64_DCPOP);
+ GET_FEATURE_ID(aa64_rcpc_8_3, ARM_HWCAP_A64_LRCPC);
+ GET_FEATURE_ID(aa64_rcpc_8_4, ARM_HWCAP_A64_ILRCPC);
-#undef GET_FEATURE_ID
+ return hwcaps;
+}
+
+static uint32_t get_elf_hwcap2(void)
+{
+ ARMCPU *cpu = ARM_CPU(thread_cpu);
+ uint32_t hwcaps = 0;
+
+ GET_FEATURE_ID(aa64_dcpodp, ARM_HWCAP2_A64_DCPODP);
+ GET_FEATURE_ID(aa64_condm_5, ARM_HWCAP2_A64_FLAGM2);
+ GET_FEATURE_ID(aa64_frint, ARM_HWCAP2_A64_FRINT);
return hwcaps;
}
+#undef GET_FEATURE_ID
+
#endif /* not TARGET_AARCH64 */
#endif /* TARGET_ARM */
QEMU_PPC_FEATURE2_HAS_EBB = 0x10000000, /* Event Base Branching */
QEMU_PPC_FEATURE2_HAS_ISEL = 0x08000000, /* Integer Select */
QEMU_PPC_FEATURE2_HAS_TAR = 0x04000000, /* Target Address Register */
+ QEMU_PPC_FEATURE2_VEC_CRYPTO = 0x02000000,
+ QEMU_PPC_FEATURE2_HTM_NOSC = 0x01000000,
QEMU_PPC_FEATURE2_ARCH_3_00 = 0x00800000, /* ISA 3.00 */
+ QEMU_PPC_FEATURE2_HAS_IEEE128 = 0x00400000, /* VSX IEEE Bin Float 128-bit */
+ QEMU_PPC_FEATURE2_DARN = 0x00200000, /* darn random number insn */
+ QEMU_PPC_FEATURE2_SCV = 0x00100000, /* scv syscall */
+ QEMU_PPC_FEATURE2_HTM_NO_SUSPEND = 0x00080000, /* TM w/o suspended state */
};
#define ELF_HWCAP get_elf_hwcap()
GET_FEATURE(PPC_ISEL, QEMU_PPC_FEATURE2_HAS_ISEL);
GET_FEATURE2(PPC2_BCTAR_ISA207, QEMU_PPC_FEATURE2_HAS_TAR);
GET_FEATURE2((PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
- PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07);
- GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00);
+ PPC2_ISA207S), QEMU_PPC_FEATURE2_ARCH_2_07 |
+ QEMU_PPC_FEATURE2_VEC_CRYPTO);
+ GET_FEATURE2(PPC2_ISA300, QEMU_PPC_FEATURE2_ARCH_3_00 |
+ QEMU_PPC_FEATURE2_DARN);
#undef GET_FEATURE
#undef GET_FEATURE2
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_S390
+#include "elf.h"
+
+#define ELF_HWCAP get_elf_hwcap()
+
+#define GET_FEATURE(_feat, _hwcap) \
+ do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0)
+
+static uint32_t get_elf_hwcap(void)
+{
+ /*
+ * Let's assume we always have esan3 and zarch.
+ * 31-bit processes can use 64-bit registers (high gprs).
+ */
+ uint32_t hwcap = HWCAP_S390_ESAN3 | HWCAP_S390_ZARCH | HWCAP_S390_HIGH_GPRS;
+
+ GET_FEATURE(S390_FEAT_STFLE, HWCAP_S390_STFLE);
+ GET_FEATURE(S390_FEAT_MSA, HWCAP_S390_MSA);
+ GET_FEATURE(S390_FEAT_LONG_DISPLACEMENT, HWCAP_S390_LDISP);
+ GET_FEATURE(S390_FEAT_EXTENDED_IMMEDIATE, HWCAP_S390_EIMM);
+ if (s390_has_feat(S390_FEAT_EXTENDED_TRANSLATION_3) &&
+ s390_has_feat(S390_FEAT_ETF3_ENH)) {
+ hwcap |= HWCAP_S390_ETF3EH;
+ }
+ GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS);
+
+ return hwcap;
+}
+
static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
{
regs->psw.addr = infop->entry;
~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1))
#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1))
-#define DLINFO_ITEMS 15
+#define DLINFO_ITEMS 16
static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
{
}
/*
- * Generate 16 random bytes for userspace PRNG seeding (not
- * cryptically secure but it's not the aim of QEMU).
+ * Generate 16 random bytes for userspace PRNG seeding.
*/
- for (i = 0; i < 16; i++) {
- k_rand_bytes[i] = rand();
- }
+ qemu_guest_getrandom_nofail(k_rand_bytes, sizeof(k_rand_bytes));
if (STACK_GROWS_DOWN) {
sp -= 16;
u_rand_bytes = sp;
NEW_AUX_ENT(AT_CLKTCK, (abi_ulong) sysconf(_SC_CLK_TCK));
NEW_AUX_ENT(AT_RANDOM, (abi_ulong) u_rand_bytes);
NEW_AUX_ENT(AT_SECURE, (abi_ulong) qemu_getauxval(AT_SECURE));
+ NEW_AUX_ENT(AT_EXECFN, info->file_string);
#ifdef ELF_HWCAP2
NEW_AUX_ENT(AT_HWCAP2, (abi_ulong) ELF_HWCAP2);
return sp;
}
-unsigned long init_guest_space(unsigned long host_start,
- unsigned long host_size,
- unsigned long guest_start,
- bool fixed)
+#ifndef ARM_COMMPAGE
+#define ARM_COMMPAGE 0
+#define init_guest_commpage() true
+#endif
+
+static void pgb_fail_in_use(const char *image_name)
+{
+ error_report("%s: requires virtual address space that is in use "
+ "(omit the -B option or choose a different value)",
+ image_name);
+ exit(EXIT_FAILURE);
+}
+
+static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
+ abi_ulong guest_hiaddr, long align)
{
- unsigned long current_start, aligned_start;
- int flags;
+ const int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
+ void *addr, *test;
- assert(host_start || host_size);
+ if (!QEMU_IS_ALIGNED(guest_base, align)) {
+ fprintf(stderr, "Requested guest base 0x%lx does not satisfy "
+ "host minimum alignment (0x%lx)\n",
+ guest_base, align);
+ exit(EXIT_FAILURE);
+ }
- /* If just a starting address is given, then just verify that
- * address. */
- if (host_start && !host_size) {
-#if defined(TARGET_ARM) && !defined(TARGET_AARCH64)
- if (init_guest_commpage(host_start, host_size) != 1) {
- return (unsigned long)-1;
+ /* Sanity check the guest binary. */
+ if (reserved_va) {
+ if (guest_hiaddr > reserved_va) {
+ error_report("%s: requires more than reserved virtual "
+ "address space (0x%" PRIx64 " > 0x%lx)",
+ image_name, (uint64_t)guest_hiaddr, reserved_va);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+#if HOST_LONG_BITS < TARGET_ABI_BITS
+ if ((guest_hiaddr - guest_base) > ~(uintptr_t)0) {
+ error_report("%s: requires more virtual address space "
+ "than the host can provide (0x%" PRIx64 ")",
+ image_name, (uint64_t)guest_hiaddr - guest_base);
+ exit(EXIT_FAILURE);
}
#endif
- return host_start;
}
- /* Setup the initial flags and start address. */
- current_start = host_start & qemu_host_page_mask;
- flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
- if (fixed) {
- flags |= MAP_FIXED;
+ /*
+ * Expand the allocation to the entire reserved_va.
+ * Exclude the mmap_min_addr hole.
+ */
+ if (reserved_va) {
+ guest_loaddr = (guest_base >= mmap_min_addr ? 0
+ : mmap_min_addr - guest_base);
+ guest_hiaddr = reserved_va;
}
- /* Otherwise, a non-zero size region of memory needs to be mapped
- * and validated. */
+ /* Reserve the address space for the binary, or reserved_va. */
+ test = g2h(guest_loaddr);
+ addr = mmap(test, guest_hiaddr - guest_loaddr, PROT_NONE, flags, -1, 0);
+ if (test != addr) {
+ pgb_fail_in_use(image_name);
+ }
+}
-#if defined(TARGET_ARM) && !defined(TARGET_AARCH64)
- /* On 32-bit ARM, we need to map not just the usable memory, but
- * also the commpage. Try to find a suitable place by allocating
- * a big chunk for all of it. If host_start, then the naive
- * strategy probably does good enough.
- */
- if (!host_start) {
- unsigned long guest_full_size, host_full_size, real_start;
-
- guest_full_size =
- (0xffff0f00 & qemu_host_page_mask) + qemu_host_page_size;
- host_full_size = guest_full_size - guest_start;
- real_start = (unsigned long)
- mmap(NULL, host_full_size, PROT_NONE, flags, -1, 0);
- if (real_start == (unsigned long)-1) {
- if (host_size < host_full_size - qemu_host_page_size) {
- /* We failed to map a continous segment, but we're
- * allowed to have a gap between the usable memory and
- * the commpage where other things can be mapped.
- * This sparseness gives us more flexibility to find
- * an address range.
- */
- goto naive;
- }
- return (unsigned long)-1;
- }
- munmap((void *)real_start, host_full_size);
- if (real_start & ~qemu_host_page_mask) {
- /* The same thing again, but with an extra qemu_host_page_size
- * so that we can shift around alignment.
- */
- unsigned long real_size = host_full_size + qemu_host_page_size;
- real_start = (unsigned long)
- mmap(NULL, real_size, PROT_NONE, flags, -1, 0);
- if (real_start == (unsigned long)-1) {
- if (host_size < host_full_size - qemu_host_page_size) {
- goto naive;
- }
- return (unsigned long)-1;
+/**
+ * pgd_find_hole_fallback: potential mmap address
+ * @guest_size: size of available space
+ * @brk: location of break
+ * @align: memory alignment
+ *
+ * This is a fallback method for finding a hole in the host address
+ * space if we don't have the benefit of being able to access
+ * /proc/self/map. It can potentially take a very long time as we can
+ * only dumbly iterate up the host address space seeing if the
+ * allocation would work.
+ */
+static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk,
+ long align, uintptr_t offset)
+{
+ uintptr_t base;
+
+ /* Start (aligned) at the bottom and work our way up */
+ base = ROUND_UP(mmap_min_addr, align);
+
+ while (true) {
+ uintptr_t align_start, end;
+ align_start = ROUND_UP(base, align);
+ end = align_start + guest_size + offset;
+
+ /* if brk is anywhere in the range give ourselves some room to grow. */
+ if (align_start <= brk && brk < end) {
+ base = brk + (16 * MiB);
+ continue;
+ } else if (align_start + guest_size < align_start) {
+ /* we have run out of space */
+ return -1;
+ } else {
+ int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE | MAP_FIXED;
+ void * mmap_start = mmap((void *) align_start, guest_size,
+ PROT_NONE, flags, -1, 0);
+ if (mmap_start != MAP_FAILED) {
+ munmap((void *) align_start, guest_size);
+ return (uintptr_t) mmap_start + offset;
}
- munmap((void *)real_start, real_size);
- real_start = HOST_PAGE_ALIGN(real_start);
+ base += qemu_host_page_size;
}
- current_start = real_start;
}
- naive:
-#endif
+}
- while (1) {
- unsigned long real_start, real_size, aligned_size;
- aligned_size = real_size = host_size;
+/* Return value for guest_base, or -1 if no hole found. */
+static uintptr_t pgb_find_hole(uintptr_t guest_loaddr, uintptr_t guest_size,
+ long align, uintptr_t offset)
+{
+ GSList *maps, *iter;
+ uintptr_t this_start, this_end, next_start, brk;
+ intptr_t ret = -1;
- /* Do not use mmap_find_vma here because that is limited to the
- * guest address space. We are going to make the
- * guest address space fit whatever we're given.
- */
- real_start = (unsigned long)
- mmap((void *)current_start, host_size, PROT_NONE, flags, -1, 0);
- if (real_start == (unsigned long)-1) {
- return (unsigned long)-1;
- }
+ assert(QEMU_IS_ALIGNED(guest_loaddr, align));
+
+ maps = read_self_maps();
- /* Check to see if the address is valid. */
- if (host_start && real_start != current_start) {
- goto try_again;
+ /* Read brk after we've read the maps, which will malloc. */
+ brk = (uintptr_t)sbrk(0);
+
+ if (!maps) {
+ return pgd_find_hole_fallback(guest_size, brk, align, offset);
+ }
+
+ /* The first hole is before the first map entry. */
+ this_start = mmap_min_addr;
+
+ for (iter = maps; iter;
+ this_start = next_start, iter = g_slist_next(iter)) {
+ uintptr_t align_start, hole_size;
+
+ this_end = ((MapInfo *)iter->data)->start;
+ next_start = ((MapInfo *)iter->data)->end;
+ align_start = ROUND_UP(this_start + offset, align);
+
+ /* Skip holes that are too small. */
+ if (align_start >= this_end) {
+ continue;
+ }
+ hole_size = this_end - align_start;
+ if (hole_size < guest_size) {
+ continue;
}
- /* Ensure the address is properly aligned. */
- if (real_start & ~qemu_host_page_mask) {
- /* Ideally, we adjust like
- *
- * pages: [ ][ ][ ][ ][ ]
- * old: [ real ]
- * [ aligned ]
- * new: [ real ]
- * [ aligned ]
- *
- * But if there is something else mapped right after it,
- * then obviously it won't have room to grow, and the
- * kernel will put the new larger real someplace else with
- * unknown alignment (if we made it to here, then
- * fixed=false). Which is why we grow real by a full page
- * size, instead of by part of one; so that even if we get
- * moved, we can still guarantee alignment. But this does
- * mean that there is a padding of < 1 page both before
- * and after the aligned range; the "after" could could
- * cause problems for ARM emulation where it could butt in
- * to where we need to put the commpage.
- */
- munmap((void *)real_start, host_size);
- real_size = aligned_size + qemu_host_page_size;
- real_start = (unsigned long)
- mmap((void *)real_start, real_size, PROT_NONE, flags, -1, 0);
- if (real_start == (unsigned long)-1) {
- return (unsigned long)-1;
+ /* If this hole contains brk, give ourselves some room to grow. */
+ if (this_start <= brk && brk < this_end) {
+ hole_size -= guest_size;
+ if (sizeof(uintptr_t) == 8 && hole_size >= 1 * GiB) {
+ align_start += 1 * GiB;
+ } else if (hole_size >= 16 * MiB) {
+ align_start += 16 * MiB;
+ } else {
+ align_start = (this_end - guest_size) & -align;
+ if (align_start < this_start) {
+ continue;
+ }
}
- aligned_start = HOST_PAGE_ALIGN(real_start);
- } else {
- aligned_start = real_start;
}
-#if defined(TARGET_ARM) && !defined(TARGET_AARCH64)
- /* On 32-bit ARM, we need to also be able to map the commpage. */
- int valid = init_guest_commpage(aligned_start - guest_start,
- aligned_size + guest_start);
- if (valid == -1) {
- munmap((void *)real_start, real_size);
- return (unsigned long)-1;
- } else if (valid == 0) {
- goto try_again;
+ /* Record the lowest successful match. */
+ if (ret < 0) {
+ ret = align_start - guest_loaddr;
}
-#endif
+ /* If this hole contains the identity map, select it. */
+ if (align_start <= guest_loaddr &&
+ guest_loaddr + guest_size <= this_end) {
+ ret = 0;
+ }
+ /* If this hole ends above the identity map, stop looking. */
+ if (this_end >= guest_loaddr) {
+ break;
+ }
+ }
+ free_self_maps(maps);
- /* If nothing has said `return -1` or `goto try_again` yet,
- * then the address we have is good.
- */
- break;
-
- try_again:
- /* That address didn't work. Unmap and try a different one.
- * The address the host picked because is typically right at
- * the top of the host address space and leaves the guest with
- * no usable address space. Resort to a linear search. We
- * already compensated for mmap_min_addr, so this should not
- * happen often. Probably means we got unlucky and host
- * address space randomization put a shared library somewhere
- * inconvenient.
- *
- * This is probably a good strategy if host_start, but is
- * probably a bad strategy if not, which means we got here
- * because of trouble with ARM commpage setup.
+ return ret;
+}
+
+static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
+ abi_ulong orig_hiaddr, long align)
+{
+ uintptr_t loaddr = orig_loaddr;
+ uintptr_t hiaddr = orig_hiaddr;
+ uintptr_t offset = 0;
+ uintptr_t addr;
+
+ if (hiaddr != orig_hiaddr) {
+ error_report("%s: requires virtual address space that the "
+ "host cannot provide (0x%" PRIx64 ")",
+ image_name, (uint64_t)orig_hiaddr);
+ exit(EXIT_FAILURE);
+ }
+
+ loaddr &= -align;
+ if (ARM_COMMPAGE) {
+ /*
+ * Extend the allocation to include the commpage.
+ * For a 64-bit host, this is just 4GiB; for a 32-bit host we
+ * need to ensure there is space bellow the guest_base so we
+ * can map the commpage in the place needed when the address
+ * arithmetic wraps around.
*/
- munmap((void *)real_start, real_size);
- current_start += qemu_host_page_size;
- if (host_start == current_start) {
- /* Theoretically possible if host doesn't have any suitably
- * aligned areas. Normally the first mmap will fail.
- */
- return (unsigned long)-1;
+ if (sizeof(uintptr_t) == 8 || loaddr >= 0x80000000u) {
+ hiaddr = (uintptr_t) 4 << 30;
+ } else {
+ offset = -(ARM_COMMPAGE & -align);
}
}
- qemu_log_mask(CPU_LOG_PAGE, "Reserved 0x%lx bytes of guest address space\n", host_size);
+ addr = pgb_find_hole(loaddr, hiaddr - loaddr, align, offset);
+ if (addr == -1) {
+ /*
+ * If ARM_COMMPAGE, there *might* be a non-consecutive allocation
+ * that can satisfy both. But as the normal arm32 link base address
+ * is ~32k, and we extend down to include the commpage, making the
+ * overhead only ~96k, this is unlikely.
+ */
+ error_report("%s: Unable to allocate %#zx bytes of "
+ "virtual address space", image_name,
+ (size_t)(hiaddr - loaddr));
+ exit(EXIT_FAILURE);
+ }
- return aligned_start;
+ guest_base = addr;
}
-static void probe_guest_base(const char *image_name,
- abi_ulong loaddr, abi_ulong hiaddr)
+static void pgb_dynamic(const char *image_name, long align)
{
- /* Probe for a suitable guest base address, if the user has not set
- * it explicitly, and set guest_base appropriately.
- * In case of error we will print a suitable message and exit.
+ /*
+ * The executable is dynamic and does not require a fixed address.
+ * All we need is a commpage that satisfies align.
+ * If we do not need a commpage, leave guest_base == 0.
*/
- const char *errmsg;
- if (!have_guest_base && !reserved_va) {
- unsigned long host_start, real_start, host_size;
+ if (ARM_COMMPAGE) {
+ uintptr_t addr, commpage;
- /* Round addresses to page boundaries. */
- loaddr &= qemu_host_page_mask;
- hiaddr = HOST_PAGE_ALIGN(hiaddr);
-
- if (loaddr < mmap_min_addr) {
- host_start = HOST_PAGE_ALIGN(mmap_min_addr);
- } else {
- host_start = loaddr;
- if (host_start != loaddr) {
- errmsg = "Address overflow loading ELF binary";
- goto exit_errmsg;
- }
- }
- host_size = hiaddr - loaddr;
+ /* 64-bit hosts should have used reserved_va. */
+ assert(sizeof(uintptr_t) == 4);
- /* Setup the initial guest memory space with ranges gleaned from
- * the ELF image that is being loaded.
+ /*
+ * By putting the commpage at the first hole, that puts guest_base
+ * just above that, and maximises the positive guest addresses.
*/
- real_start = init_guest_space(host_start, host_size, loaddr, false);
- if (real_start == (unsigned long)-1) {
- errmsg = "Unable to find space for application";
- goto exit_errmsg;
- }
- guest_base = real_start - loaddr;
+ commpage = ARM_COMMPAGE & -align;
+ addr = pgb_find_hole(commpage, -commpage, align, 0);
+ assert(addr != -1);
+ guest_base = addr;
+ }
+}
- qemu_log_mask(CPU_LOG_PAGE, "Relocating guest address space from 0x"
- TARGET_ABI_FMT_lx " to 0x%lx\n",
- loaddr, real_start);
+static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
+ abi_ulong guest_hiaddr, long align)
+{
+ int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
+ void *addr, *test;
+
+ if (guest_hiaddr > reserved_va) {
+ error_report("%s: requires more than reserved virtual "
+ "address space (0x%" PRIx64 " > 0x%lx)",
+ image_name, (uint64_t)guest_hiaddr, reserved_va);
+ exit(EXIT_FAILURE);
}
- return;
-exit_errmsg:
- fprintf(stderr, "%s: %s\n", image_name, errmsg);
- exit(-1);
+ /* Widen the "image" to the entire reserved address space. */
+ pgb_static(image_name, 0, reserved_va, align);
+
+#ifdef MAP_FIXED_NOREPLACE
+ flags |= MAP_FIXED_NOREPLACE;
+#endif
+
+ /* Reserve the memory on the host. */
+ assert(guest_base != 0);
+ test = g2h(0);
+ addr = mmap(test, reserved_va, PROT_NONE, flags, -1, 0);
+ if (addr == MAP_FAILED) {
+ error_report("Unable to reserve 0x%lx bytes of virtual address "
+ "space (%s) for use as guest address space (check your "
+ "virtual memory ulimit setting or reserve less "
+ "using -R option)", reserved_va, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ assert(addr == test);
}
+void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
+ abi_ulong guest_hiaddr)
+{
+ /* In order to use host shmat, we must be able to honor SHMLBA. */
+ uintptr_t align = MAX(SHMLBA, qemu_host_page_size);
+
+ if (have_guest_base) {
+ pgb_have_guest_base(image_name, guest_loaddr, guest_hiaddr, align);
+ } else if (reserved_va) {
+ pgb_reserved_va(image_name, guest_loaddr, guest_hiaddr, align);
+ } else if (guest_loaddr) {
+ pgb_static(image_name, guest_loaddr, guest_hiaddr, align);
+ } else {
+ pgb_dynamic(image_name, align);
+ }
+
+ /* Reserve and initialize the commpage. */
+ if (!init_guest_commpage()) {
+ /*
+ * With have_guest_base, the user has selected the address and
+ * we are trying to work with that. Otherwise, we have selected
+ * free space and init_guest_commpage must succeeded.
+ */
+ assert(have_guest_base);
+ pgb_fail_in_use(image_name);
+ }
+
+ assert(QEMU_IS_ALIGNED(guest_base, align));
+ qemu_log_mask(CPU_LOG_PAGE, "Locating guest address space "
+ "@ 0x%" PRIx64 "\n", (uint64_t)guest_base);
+}
/* Load an ELF image into the address space.
}
}
- load_addr = loaddr;
- if (ehdr->e_type == ET_DYN) {
- /* The image indicates that it can be loaded anywhere. Find a
- location that can hold the memory space required. If the
- image is pre-linked, LOADDR will be non-zero. Since we do
- not supply MAP_FIXED here we'll use that address if and
- only if it remains available. */
- load_addr = target_mmap(loaddr, hiaddr - loaddr, PROT_NONE,
- MAP_PRIVATE | MAP_ANON | MAP_NORESERVE,
- -1, 0);
- if (load_addr == -1) {
- goto exit_perror;
+ if (pinterp_name != NULL) {
+ /*
+ * This is the main executable.
+ *
+ * Reserve extra space for brk.
+ * We hold on to this space while placing the interpreter
+ * and the stack, lest they be placed immediately after
+ * the data segment and block allocation from the brk.
+ *
+ * 16MB is chosen as "large enough" without being so large
+ * as to allow the result to not fit with a 32-bit guest on
+ * a 32-bit host.
+ */
+ info->reserve_brk = 16 * MiB;
+ hiaddr += info->reserve_brk;
+
+ if (ehdr->e_type == ET_EXEC) {
+ /*
+ * Make sure that the low address does not conflict with
+ * MMAP_MIN_ADDR or the QEMU application itself.
+ */
+ probe_guest_base(image_name, loaddr, hiaddr);
+ } else {
+ /*
+ * The binary is dynamic, but we still need to
+ * select guest_base. In this case we pass a size.
+ */
+ probe_guest_base(image_name, 0, hiaddr - loaddr);
}
- } else if (pinterp_name != NULL) {
- /* This is the main executable. Make sure that the low
- address does not conflict with MMAP_MIN_ADDR or the
- QEMU application itself. */
- probe_guest_base(image_name, loaddr, hiaddr);
+ }
+
+ /*
+ * Reserve address space for all of this.
+ *
+ * In the case of ET_EXEC, we supply MAP_FIXED so that we get
+ * exactly the address range that is required.
+ *
+ * Otherwise this is ET_DYN, and we are searching for a location
+ * that can hold the memory space required. If the image is
+ * pre-linked, LOADDR will be non-zero, and the kernel should
+ * honor that address if it happens to be free.
+ *
+ * In both cases, we will overwrite pages in this range with mappings
+ * from the executable.
+ */
+ load_addr = target_mmap(loaddr, hiaddr - loaddr, PROT_NONE,
+ MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
+ (ehdr->e_type == ET_EXEC ? MAP_FIXED : 0),
+ -1, 0);
+ if (load_addr == -1) {
+ goto exit_perror;
}
load_bias = load_addr - loaddr;
}
info->load_bias = load_bias;
+ info->code_offset = load_bias;
+ info->data_offset = load_bias;
info->load_addr = load_addr;
info->entry = ehdr->e_entry + load_bias;
info->start_code = -1;
vaddr_ps = TARGET_ELF_PAGESTART(vaddr);
vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po);
- error = target_mmap(vaddr_ps, vaddr_len,
- elf_prot, MAP_PRIVATE | MAP_FIXED,
- image_fd, eppnt->p_offset - vaddr_po);
- if (error == -1) {
- goto exit_perror;
+ /*
+ * Some segments may be completely empty without any backing file
+ * segment, in that case just let zero_bss allocate an empty buffer
+ * for it.
+ */
+ if (eppnt->p_filesz != 0) {
+ error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
+ MAP_PRIVATE | MAP_FIXED,
+ image_fd, eppnt->p_offset - vaddr_po);
+
+ if (error == -1) {
+ goto exit_perror;
+ }
}
vaddr_ef = vaddr + eppnt->p_filesz;
char *elf_interpreter = NULL;
char *scratch;
+ memset(&interp_info, 0, sizeof(interp_info));
+#ifdef TARGET_MIPS
+ interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN;
+#endif
+
info->start_mmap = (abi_ulong)ELF_START_MMAP;
load_elf_image(bprm->filename, bprm->fd, info,
bprm->core_dump = &elf_core_dump;
#endif
+ /*
+ * If we reserved extra space for brk, release it now.
+ * The implementation of do_brk in syscalls.c expects to be able
+ * to mmap pages in this space.
+ */
+ if (info->reserve_brk) {
+ abi_ulong start_brk = HOST_PAGE_ALIGN(info->brk);
+ abi_ulong end_brk = HOST_PAGE_ALIGN(info->brk + info->reserve_brk);
+ target_munmap(start_brk, end_brk - start_brk);
+ }
+
return 0;
}
target_gid_t pr_gid;
target_pid_t pr_pid, pr_ppid, pr_pgrp, pr_sid;
/* Lots missing */
- char pr_fname[16]; /* filename of executable */
+ char pr_fname[16] QEMU_NONSTRING; /* filename of executable */
char pr_psargs[ELF_PRARGSZ]; /* initial part of arg list */
};
static void fill_thread_info(struct elf_note_info *info, const CPUArchState *env)
{
- CPUState *cpu = ENV_GET_CPU((CPUArchState *)env);
+ CPUState *cpu = env_cpu((CPUArchState *)env);
TaskState *ts = (TaskState *)cpu->opaque;
struct elf_thread_status *ets;
long signr, const CPUArchState *env)
{
#define NUMNOTES 3
- CPUState *cpu = ENV_GET_CPU((CPUArchState *)env);
+ CPUState *cpu = env_cpu((CPUArchState *)env);
TaskState *ts = (TaskState *)cpu->opaque;
int i;
*/
static int elf_core_dump(int signr, const CPUArchState *env)
{
- const CPUState *cpu = ENV_GET_CPU((CPUArchState *)env);
+ const CPUState *cpu = env_cpu((CPUArchState *)env);
const TaskState *ts = (const TaskState *)cpu->opaque;
struct vm_area_struct *vma = NULL;
char corefile[PATH_MAX];