#include "qemu/guest-random.h"
#include "qemu/units.h"
#include "qemu/selfmap.h"
+#include "qemu/lockable.h"
#include "qapi/error.h"
#include "qemu/error-report.h"
#include "target_signal.h"
#undef ELF_ARCH
#endif
+#ifndef TARGET_ARCH_HAS_SIGTRAMP_PAGE
+#define TARGET_ARCH_HAS_SIGTRAMP_PAGE 0
+#endif
+
+typedef struct {
+ const uint8_t *image;
+ const uint32_t *relocs;
+ unsigned image_size;
+ unsigned reloc_count;
+ unsigned sigreturn_ofs;
+ unsigned rt_sigreturn_ofs;
+} VdsoImageInfo;
+
#define ELF_OSABI ELFOSABI_SYSV
/* from personality.h */
}
#ifdef TARGET_X86_64
-#define ELF_START_MMAP 0x2aaaaab000ULL
-
#define ELF_CLASS ELFCLASS64
#define ELF_ARCH EM_X86_64
#endif
#else
-#define ELF_START_MMAP 0x80000000
-
/*
* This is used to ensure we don't load something for the wrong architecture.
*/
(*regs)[15] = tswapreg(env->regs[R_ESP]);
(*regs)[16] = tswapreg(env->segs[R_SS].selector & 0xffff);
}
-#endif
+
+/*
+ * i386 is the only target which supplies AT_SYSINFO for the vdso.
+ * All others only supply AT_SYSINFO_EHDR.
+ */
+#define DLINFO_ARCH_ITEMS (vdso_info != NULL)
+#define ARCH_DLINFO \
+ do { \
+ if (vdso_info) { \
+ NEW_AUX_ENT(AT_SYSINFO, vdso_info->entry); \
+ } \
+ } while (0)
+
+#endif /* TARGET_X86_64 */
+
+#define VDSO_HEADER "vdso.c.inc"
#define USE_ELF_CORE_DUMP
#define ELF_EXEC_PAGESIZE 4096
-#endif
+#endif /* TARGET_I386 */
#ifdef TARGET_ARM
#ifndef TARGET_AARCH64
/* 32 bit ARM definitions */
-#define ELF_START_MMAP 0x80000000
-
#define ELF_ARCH EM_ARM
#define ELF_CLASS ELFCLASS32
#define EXSTACK_DEFAULT true
ARM_HWCAP_ARM_VFPD32 = 1 << 19,
ARM_HWCAP_ARM_LPAE = 1 << 20,
ARM_HWCAP_ARM_EVTSTRM = 1 << 21,
+ ARM_HWCAP_ARM_FPHP = 1 << 22,
+ ARM_HWCAP_ARM_ASIMDHP = 1 << 23,
+ ARM_HWCAP_ARM_ASIMDDP = 1 << 24,
+ ARM_HWCAP_ARM_ASIMDFHM = 1 << 25,
+ ARM_HWCAP_ARM_ASIMDBF16 = 1 << 26,
+ ARM_HWCAP_ARM_I8MM = 1 << 27,
};
enum {
ARM_HWCAP2_ARM_SHA1 = 1 << 2,
ARM_HWCAP2_ARM_SHA2 = 1 << 3,
ARM_HWCAP2_ARM_CRC32 = 1 << 4,
+ ARM_HWCAP2_ARM_SB = 1 << 5,
+ ARM_HWCAP2_ARM_SSBS = 1 << 6,
};
/* The commpage only exists for 32 bit kernels */
static bool init_guest_commpage(void)
{
- abi_ptr commpage = HI_COMMPAGE & -qemu_host_page_size;
- void *want = g2h_untagged(commpage);
- void *addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ARMCPU *cpu = ARM_CPU(thread_cpu);
+ abi_ptr commpage;
+ void *want;
+ void *addr;
+
+ /*
+ * M-profile allocates maximum of 2GB address space, so can never
+ * allocate the commpage. Skip it.
+ */
+ if (arm_feature(&cpu->env, ARM_FEATURE_M)) {
+ return true;
+ }
+
+ commpage = HI_COMMPAGE & -qemu_host_page_size;
+ want = g2h_untagged(commpage);
+ addr = mmap(want, qemu_host_page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (addr == MAP_FAILED) {
perror("Allocating guest commpage");
#define ELF_HWCAP get_elf_hwcap()
#define ELF_HWCAP2 get_elf_hwcap2()
-static uint32_t get_elf_hwcap(void)
+uint32_t get_elf_hwcap(void)
{
ARMCPU *cpu = ARM_CPU(thread_cpu);
uint32_t hwcaps = 0;
}
}
GET_FEATURE_ID(aa32_simdfmac, ARM_HWCAP_ARM_VFPv4);
+ /*
+ * MVFR1.FPHP and .SIMDHP must be in sync, and QEMU uses the same
+ * isar_feature function for both. The kernel reports them as two hwcaps.
+ */
+ GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_FPHP);
+ GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_ASIMDHP);
+ GET_FEATURE_ID(aa32_dp, ARM_HWCAP_ARM_ASIMDDP);
+ GET_FEATURE_ID(aa32_fhm, ARM_HWCAP_ARM_ASIMDFHM);
+ GET_FEATURE_ID(aa32_bf16, ARM_HWCAP_ARM_ASIMDBF16);
+ GET_FEATURE_ID(aa32_i8mm, ARM_HWCAP_ARM_I8MM);
return hwcaps;
}
-static uint32_t get_elf_hwcap2(void)
+uint32_t get_elf_hwcap2(void)
{
ARMCPU *cpu = ARM_CPU(thread_cpu);
uint32_t hwcaps = 0;
GET_FEATURE_ID(aa32_sha1, ARM_HWCAP2_ARM_SHA1);
GET_FEATURE_ID(aa32_sha2, ARM_HWCAP2_ARM_SHA2);
GET_FEATURE_ID(aa32_crc32, ARM_HWCAP2_ARM_CRC32);
+ GET_FEATURE_ID(aa32_sb, ARM_HWCAP2_ARM_SB);
+ GET_FEATURE_ID(aa32_ssbs, ARM_HWCAP2_ARM_SSBS);
return hwcaps;
}
+const char *elf_hwcap_str(uint32_t bit)
+{
+ static const char *hwcap_str[] = {
+ [__builtin_ctz(ARM_HWCAP_ARM_SWP )] = "swp",
+ [__builtin_ctz(ARM_HWCAP_ARM_HALF )] = "half",
+ [__builtin_ctz(ARM_HWCAP_ARM_THUMB )] = "thumb",
+ [__builtin_ctz(ARM_HWCAP_ARM_26BIT )] = "26bit",
+ [__builtin_ctz(ARM_HWCAP_ARM_FAST_MULT)] = "fast_mult",
+ [__builtin_ctz(ARM_HWCAP_ARM_FPA )] = "fpa",
+ [__builtin_ctz(ARM_HWCAP_ARM_VFP )] = "vfp",
+ [__builtin_ctz(ARM_HWCAP_ARM_EDSP )] = "edsp",
+ [__builtin_ctz(ARM_HWCAP_ARM_JAVA )] = "java",
+ [__builtin_ctz(ARM_HWCAP_ARM_IWMMXT )] = "iwmmxt",
+ [__builtin_ctz(ARM_HWCAP_ARM_CRUNCH )] = "crunch",
+ [__builtin_ctz(ARM_HWCAP_ARM_THUMBEE )] = "thumbee",
+ [__builtin_ctz(ARM_HWCAP_ARM_NEON )] = "neon",
+ [__builtin_ctz(ARM_HWCAP_ARM_VFPv3 )] = "vfpv3",
+ [__builtin_ctz(ARM_HWCAP_ARM_VFPv3D16 )] = "vfpv3d16",
+ [__builtin_ctz(ARM_HWCAP_ARM_TLS )] = "tls",
+ [__builtin_ctz(ARM_HWCAP_ARM_VFPv4 )] = "vfpv4",
+ [__builtin_ctz(ARM_HWCAP_ARM_IDIVA )] = "idiva",
+ [__builtin_ctz(ARM_HWCAP_ARM_IDIVT )] = "idivt",
+ [__builtin_ctz(ARM_HWCAP_ARM_VFPD32 )] = "vfpd32",
+ [__builtin_ctz(ARM_HWCAP_ARM_LPAE )] = "lpae",
+ [__builtin_ctz(ARM_HWCAP_ARM_EVTSTRM )] = "evtstrm",
+ [__builtin_ctz(ARM_HWCAP_ARM_FPHP )] = "fphp",
+ [__builtin_ctz(ARM_HWCAP_ARM_ASIMDHP )] = "asimdhp",
+ [__builtin_ctz(ARM_HWCAP_ARM_ASIMDDP )] = "asimddp",
+ [__builtin_ctz(ARM_HWCAP_ARM_ASIMDFHM )] = "asimdfhm",
+ [__builtin_ctz(ARM_HWCAP_ARM_ASIMDBF16)] = "asimdbf16",
+ [__builtin_ctz(ARM_HWCAP_ARM_I8MM )] = "i8mm",
+ };
+
+ return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
+const char *elf_hwcap2_str(uint32_t bit)
+{
+ static const char *hwcap_str[] = {
+ [__builtin_ctz(ARM_HWCAP2_ARM_AES )] = "aes",
+ [__builtin_ctz(ARM_HWCAP2_ARM_PMULL)] = "pmull",
+ [__builtin_ctz(ARM_HWCAP2_ARM_SHA1 )] = "sha1",
+ [__builtin_ctz(ARM_HWCAP2_ARM_SHA2 )] = "sha2",
+ [__builtin_ctz(ARM_HWCAP2_ARM_CRC32)] = "crc32",
+ [__builtin_ctz(ARM_HWCAP2_ARM_SB )] = "sb",
+ [__builtin_ctz(ARM_HWCAP2_ARM_SSBS )] = "ssbs",
+ };
+
+ return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
#undef GET_FEATURE
#undef GET_FEATURE_ID
static const char *get_elf_platform(void)
{
- CPUARMState *env = thread_cpu->env_ptr;
+ CPUARMState *env = cpu_env(thread_cpu);
#if TARGET_BIG_ENDIAN
# define END "b"
#else
/* 64 bit ARM definitions */
-#define ELF_START_MMAP 0x80000000
#define ELF_ARCH EM_AARCH64
#define ELF_CLASS ELFCLASS64
ARM_HWCAP2_A64_SME_B16F32 = 1 << 28,
ARM_HWCAP2_A64_SME_F32F32 = 1 << 29,
ARM_HWCAP2_A64_SME_FA64 = 1 << 30,
+ ARM_HWCAP2_A64_WFXT = 1ULL << 31,
+ ARM_HWCAP2_A64_EBF16 = 1ULL << 32,
+ ARM_HWCAP2_A64_SVE_EBF16 = 1ULL << 33,
+ ARM_HWCAP2_A64_CSSC = 1ULL << 34,
+ ARM_HWCAP2_A64_RPRFM = 1ULL << 35,
+ ARM_HWCAP2_A64_SVE2P1 = 1ULL << 36,
+ ARM_HWCAP2_A64_SME2 = 1ULL << 37,
+ ARM_HWCAP2_A64_SME2P1 = 1ULL << 38,
+ ARM_HWCAP2_A64_SME_I16I32 = 1ULL << 39,
+ ARM_HWCAP2_A64_SME_BI32I32 = 1ULL << 40,
+ ARM_HWCAP2_A64_SME_B16B16 = 1ULL << 41,
+ ARM_HWCAP2_A64_SME_F16F16 = 1ULL << 42,
+ ARM_HWCAP2_A64_MOPS = 1ULL << 43,
+ ARM_HWCAP2_A64_HBC = 1ULL << 44,
};
#define ELF_HWCAP get_elf_hwcap()
#define GET_FEATURE_ID(feat, hwcap) \
do { if (cpu_isar_feature(feat, cpu)) { hwcaps |= hwcap; } } while (0)
-static uint32_t get_elf_hwcap(void)
+uint32_t get_elf_hwcap(void)
{
ARMCPU *cpu = ARM_CPU(thread_cpu);
uint32_t hwcaps = 0;
return hwcaps;
}
-static uint32_t get_elf_hwcap2(void)
+uint32_t get_elf_hwcap2(void)
{
ARMCPU *cpu = ARM_CPU(thread_cpu);
uint32_t hwcaps = 0;
GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64);
GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64);
GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64);
+ GET_FEATURE_ID(aa64_hbc, ARM_HWCAP2_A64_HBC);
+ GET_FEATURE_ID(aa64_mops, ARM_HWCAP2_A64_MOPS);
return hwcaps;
}
+const char *elf_hwcap_str(uint32_t bit)
+{
+ static const char *hwcap_str[] = {
+ [__builtin_ctz(ARM_HWCAP_A64_FP )] = "fp",
+ [__builtin_ctz(ARM_HWCAP_A64_ASIMD )] = "asimd",
+ [__builtin_ctz(ARM_HWCAP_A64_EVTSTRM )] = "evtstrm",
+ [__builtin_ctz(ARM_HWCAP_A64_AES )] = "aes",
+ [__builtin_ctz(ARM_HWCAP_A64_PMULL )] = "pmull",
+ [__builtin_ctz(ARM_HWCAP_A64_SHA1 )] = "sha1",
+ [__builtin_ctz(ARM_HWCAP_A64_SHA2 )] = "sha2",
+ [__builtin_ctz(ARM_HWCAP_A64_CRC32 )] = "crc32",
+ [__builtin_ctz(ARM_HWCAP_A64_ATOMICS )] = "atomics",
+ [__builtin_ctz(ARM_HWCAP_A64_FPHP )] = "fphp",
+ [__builtin_ctz(ARM_HWCAP_A64_ASIMDHP )] = "asimdhp",
+ [__builtin_ctz(ARM_HWCAP_A64_CPUID )] = "cpuid",
+ [__builtin_ctz(ARM_HWCAP_A64_ASIMDRDM)] = "asimdrdm",
+ [__builtin_ctz(ARM_HWCAP_A64_JSCVT )] = "jscvt",
+ [__builtin_ctz(ARM_HWCAP_A64_FCMA )] = "fcma",
+ [__builtin_ctz(ARM_HWCAP_A64_LRCPC )] = "lrcpc",
+ [__builtin_ctz(ARM_HWCAP_A64_DCPOP )] = "dcpop",
+ [__builtin_ctz(ARM_HWCAP_A64_SHA3 )] = "sha3",
+ [__builtin_ctz(ARM_HWCAP_A64_SM3 )] = "sm3",
+ [__builtin_ctz(ARM_HWCAP_A64_SM4 )] = "sm4",
+ [__builtin_ctz(ARM_HWCAP_A64_ASIMDDP )] = "asimddp",
+ [__builtin_ctz(ARM_HWCAP_A64_SHA512 )] = "sha512",
+ [__builtin_ctz(ARM_HWCAP_A64_SVE )] = "sve",
+ [__builtin_ctz(ARM_HWCAP_A64_ASIMDFHM)] = "asimdfhm",
+ [__builtin_ctz(ARM_HWCAP_A64_DIT )] = "dit",
+ [__builtin_ctz(ARM_HWCAP_A64_USCAT )] = "uscat",
+ [__builtin_ctz(ARM_HWCAP_A64_ILRCPC )] = "ilrcpc",
+ [__builtin_ctz(ARM_HWCAP_A64_FLAGM )] = "flagm",
+ [__builtin_ctz(ARM_HWCAP_A64_SSBS )] = "ssbs",
+ [__builtin_ctz(ARM_HWCAP_A64_SB )] = "sb",
+ [__builtin_ctz(ARM_HWCAP_A64_PACA )] = "paca",
+ [__builtin_ctz(ARM_HWCAP_A64_PACG )] = "pacg",
+ };
+
+ return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
+const char *elf_hwcap2_str(uint32_t bit)
+{
+ static const char *hwcap_str[] = {
+ [__builtin_ctz(ARM_HWCAP2_A64_DCPODP )] = "dcpodp",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVE2 )] = "sve2",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVEAES )] = "sveaes",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVEPMULL )] = "svepmull",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVEBITPERM )] = "svebitperm",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVESHA3 )] = "svesha3",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVESM4 )] = "svesm4",
+ [__builtin_ctz(ARM_HWCAP2_A64_FLAGM2 )] = "flagm2",
+ [__builtin_ctz(ARM_HWCAP2_A64_FRINT )] = "frint",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVEI8MM )] = "svei8mm",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVEF32MM )] = "svef32mm",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVEF64MM )] = "svef64mm",
+ [__builtin_ctz(ARM_HWCAP2_A64_SVEBF16 )] = "svebf16",
+ [__builtin_ctz(ARM_HWCAP2_A64_I8MM )] = "i8mm",
+ [__builtin_ctz(ARM_HWCAP2_A64_BF16 )] = "bf16",
+ [__builtin_ctz(ARM_HWCAP2_A64_DGH )] = "dgh",
+ [__builtin_ctz(ARM_HWCAP2_A64_RNG )] = "rng",
+ [__builtin_ctz(ARM_HWCAP2_A64_BTI )] = "bti",
+ [__builtin_ctz(ARM_HWCAP2_A64_MTE )] = "mte",
+ [__builtin_ctz(ARM_HWCAP2_A64_ECV )] = "ecv",
+ [__builtin_ctz(ARM_HWCAP2_A64_AFP )] = "afp",
+ [__builtin_ctz(ARM_HWCAP2_A64_RPRES )] = "rpres",
+ [__builtin_ctz(ARM_HWCAP2_A64_MTE3 )] = "mte3",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME )] = "sme",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME_I16I64 )] = "smei16i64",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME_F64F64 )] = "smef64f64",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME_I8I32 )] = "smei8i32",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME_F16F32 )] = "smef16f32",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32 )] = "smeb16f32",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32 )] = "smef32f32",
+ [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64 )] = "smefa64",
+ [__builtin_ctz(ARM_HWCAP2_A64_WFXT )] = "wfxt",
+ [__builtin_ctzll(ARM_HWCAP2_A64_EBF16 )] = "ebf16",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SVE_EBF16 )] = "sveebf16",
+ [__builtin_ctzll(ARM_HWCAP2_A64_CSSC )] = "cssc",
+ [__builtin_ctzll(ARM_HWCAP2_A64_RPRFM )] = "rprfm",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SVE2P1 )] = "sve2p1",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SME2 )] = "sme2",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SME2P1 )] = "sme2p1",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SME_I16I32 )] = "smei16i32",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SME_BI32I32)] = "smebi32i32",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SME_B16B16 )] = "smeb16b16",
+ [__builtin_ctzll(ARM_HWCAP2_A64_SME_F16F16 )] = "smef16f16",
+ [__builtin_ctzll(ARM_HWCAP2_A64_MOPS )] = "mops",
+ [__builtin_ctzll(ARM_HWCAP2_A64_HBC )] = "hbc",
+ };
+
+ return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
#undef GET_FEATURE_ID
+#if TARGET_BIG_ENDIAN
+# define VDSO_HEADER "vdso-be.c.inc"
+#else
+# define VDSO_HEADER "vdso-le.c.inc"
+#endif
+
#endif /* not TARGET_AARCH64 */
#endif /* TARGET_ARM */
#ifdef TARGET_SPARC
#ifdef TARGET_SPARC64
-#define ELF_START_MMAP 0x80000000
#define ELF_HWCAP (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \
| HWCAP_SPARC_MULDIV | HWCAP_SPARC_V9)
#ifndef TARGET_ABI32
#define ELF_CLASS ELFCLASS64
#define ELF_ARCH EM_SPARCV9
#else
-#define ELF_START_MMAP 0x80000000
#define ELF_HWCAP (HWCAP_SPARC_FLUSH | HWCAP_SPARC_STBAR | HWCAP_SPARC_SWAP \
| HWCAP_SPARC_MULDIV)
#define ELF_CLASS ELFCLASS32
#ifdef TARGET_PPC
#define ELF_MACHINE PPC_ELF_MACHINE
-#define ELF_START_MMAP 0x80000000
#if defined(TARGET_PPC64)
#ifdef TARGET_LOONGARCH64
-#define ELF_START_MMAP 0x80000000
-
#define ELF_CLASS ELFCLASS64
#define ELF_ARCH EM_LOONGARCH
#define EXSTACK_DEFAULT true
hwcaps |= HWCAP_LOONGARCH_LAM;
}
+ if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LSX)) {
+ hwcaps |= HWCAP_LOONGARCH_LSX;
+ }
+
+ if (FIELD_EX32(cpu->env.cpucfg[2], CPUCFG2, LASX)) {
+ hwcaps |= HWCAP_LOONGARCH_LASX;
+ }
+
return hwcaps;
}
#ifdef TARGET_MIPS
-#define ELF_START_MMAP 0x80000000
-
#ifdef TARGET_MIPS64
#define ELF_CLASS ELFCLASS64
#else
#ifdef TARGET_MICROBLAZE
-#define ELF_START_MMAP 0x80000000
-
#define elf_check_arch(x) ( (x) == EM_MICROBLAZE || (x) == EM_MICROBLAZE_OLD)
#define ELF_CLASS ELFCLASS32
#ifdef TARGET_NIOS2
-#define ELF_START_MMAP 0x80000000
-
#define elf_check_arch(x) ((x) == EM_ALTERA_NIOS2)
#define ELF_CLASS ELFCLASS32
#ifdef TARGET_OPENRISC
-#define ELF_START_MMAP 0x08000000
-
#define ELF_ARCH EM_OPENRISC
#define ELF_CLASS ELFCLASS32
#define ELF_DATA ELFDATA2MSB
#ifdef TARGET_SH4
-#define ELF_START_MMAP 0x80000000
-
#define ELF_CLASS ELFCLASS32
#define ELF_ARCH EM_SH
#ifdef TARGET_CRIS
-#define ELF_START_MMAP 0x80000000
-
#define ELF_CLASS ELFCLASS32
#define ELF_ARCH EM_CRIS
#ifdef TARGET_M68K
-#define ELF_START_MMAP 0x80000000
-
#define ELF_CLASS ELFCLASS32
#define ELF_ARCH EM_68K
#ifdef TARGET_ALPHA
-#define ELF_START_MMAP (0x30000000000ULL)
-
#define ELF_CLASS ELFCLASS64
#define ELF_ARCH EM_ALPHA
#ifdef TARGET_S390X
-#define ELF_START_MMAP (0x20000000000ULL)
-
#define ELF_CLASS ELFCLASS64
#define ELF_DATA ELFDATA2MSB
#define ELF_ARCH EM_S390
#define GET_FEATURE(_feat, _hwcap) \
do { if (s390_has_feat(_feat)) { hwcap |= _hwcap; } } while (0)
-static uint32_t get_elf_hwcap(void)
+uint32_t get_elf_hwcap(void)
{
/*
* Let's assume we always have esan3 and zarch.
}
GET_FEATURE(S390_FEAT_VECTOR, HWCAP_S390_VXRS);
GET_FEATURE(S390_FEAT_VECTOR_ENH, HWCAP_S390_VXRS_EXT);
+ GET_FEATURE(S390_FEAT_VECTOR_ENH2, HWCAP_S390_VXRS_EXT2);
return hwcap;
}
+const char *elf_hwcap_str(uint32_t bit)
+{
+ static const char *hwcap_str[] = {
+ [HWCAP_S390_NR_ESAN3] = "esan3",
+ [HWCAP_S390_NR_ZARCH] = "zarch",
+ [HWCAP_S390_NR_STFLE] = "stfle",
+ [HWCAP_S390_NR_MSA] = "msa",
+ [HWCAP_S390_NR_LDISP] = "ldisp",
+ [HWCAP_S390_NR_EIMM] = "eimm",
+ [HWCAP_S390_NR_DFP] = "dfp",
+ [HWCAP_S390_NR_HPAGE] = "edat",
+ [HWCAP_S390_NR_ETF3EH] = "etf3eh",
+ [HWCAP_S390_NR_HIGH_GPRS] = "highgprs",
+ [HWCAP_S390_NR_TE] = "te",
+ [HWCAP_S390_NR_VXRS] = "vx",
+ [HWCAP_S390_NR_VXRS_BCD] = "vxd",
+ [HWCAP_S390_NR_VXRS_EXT] = "vxe",
+ [HWCAP_S390_NR_GS] = "gs",
+ [HWCAP_S390_NR_VXRS_EXT2] = "vxe2",
+ [HWCAP_S390_NR_VXRS_PDE] = "vxp",
+ [HWCAP_S390_NR_SORT] = "sort",
+ [HWCAP_S390_NR_DFLT] = "dflt",
+ [HWCAP_S390_NR_NNPA] = "nnpa",
+ [HWCAP_S390_NR_PCI_MIO] = "pcimio",
+ [HWCAP_S390_NR_SIE] = "sie",
+ };
+
+ return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL;
+}
+
static inline void init_thread(struct target_pt_regs *regs, struct image_info *infop)
{
regs->psw.addr = infop->entry;
- regs->psw.mask = PSW_MASK_64 | PSW_MASK_32;
+ regs->psw.mask = PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
+ PSW_MASK_MCHECK | PSW_MASK_PSTATE | PSW_MASK_64 | \
+ PSW_MASK_32;
regs->gprs[15] = infop->start_stack;
}
#ifdef TARGET_RISCV
-#define ELF_START_MMAP 0x80000000
#define ELF_ARCH EM_RISCV
#ifdef TARGET_RISCV32
#define MISA_BIT(EXT) (1 << (EXT - 'A'))
RISCVCPU *cpu = RISCV_CPU(thread_cpu);
uint32_t mask = MISA_BIT('I') | MISA_BIT('M') | MISA_BIT('A')
- | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C');
+ | MISA_BIT('F') | MISA_BIT('D') | MISA_BIT('C')
+ | MISA_BIT('V');
return cpu->env.misa_ext & mask;
#undef MISA_BIT
#ifdef TARGET_HPPA
-#define ELF_START_MMAP 0x80000000
#define ELF_CLASS ELFCLASS32
#define ELF_ARCH EM_PARISC
#define ELF_PLATFORM "PARISC"
#ifdef TARGET_XTENSA
-#define ELF_START_MMAP 0x20000000
-
#define ELF_CLASS ELFCLASS32
#define ELF_ARCH EM_XTENSA
#ifdef TARGET_HEXAGON
-#define ELF_START_MMAP 0x20000000
-
#define ELF_CLASS ELFCLASS32
#define ELF_ARCH EM_HEXAGON
#define ZMAGIC 0413
#define QMAGIC 0314
-/* Necessary parameters */
-#define TARGET_ELF_EXEC_PAGESIZE \
- (((eppnt->p_align & ~qemu_host_page_mask) != 0) ? \
- TARGET_PAGE_SIZE : MAX(qemu_host_page_size, TARGET_PAGE_SIZE))
-#define TARGET_ELF_PAGELENGTH(_v) ROUND_UP((_v), TARGET_ELF_EXEC_PAGESIZE)
-#define TARGET_ELF_PAGESTART(_v) ((_v) & \
- ~(abi_ulong)(TARGET_ELF_EXEC_PAGESIZE-1))
-#define TARGET_ELF_PAGEOFFSET(_v) ((_v) & (TARGET_ELF_EXEC_PAGESIZE-1))
-
#define DLINFO_ITEMS 16
static inline void memcpy_fromfs(void * to, const void * from, unsigned long n)
#ifdef USE_ELF_CORE_DUMP
static int elf_core_dump(int, const CPUArchState *);
#endif /* USE_ELF_CORE_DUMP */
-static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias);
+static void load_symbols(struct elfhdr *hdr, const ImageSource *src,
+ abi_ulong load_bias);
/* Verify the portions of EHDR within E_IDENT for the target.
This can be performed before bswapping the entire header. */
}
}
-/* Map and zero the bss. We need to explicitly zero any fractional pages
- after the data section (i.e. bss). */
-static void zero_bss(abi_ulong elf_bss, abi_ulong last_bss, int prot)
-{
- uintptr_t host_start, host_map_start, host_end;
-
- last_bss = TARGET_PAGE_ALIGN(last_bss);
-
- /* ??? There is confusion between qemu_real_host_page_size and
- qemu_host_page_size here and elsewhere in target_mmap, which
- may lead to the end of the data section mapping from the file
- not being mapped. At least there was an explicit test and
- comment for that here, suggesting that "the file size must
- be known". The comment probably pre-dates the introduction
- of the fstat system call in target_mmap which does in fact
- find out the size. What isn't clear is if the workaround
- here is still actually needed. For now, continue with it,
- but merge it with the "normal" mmap that would allocate the bss. */
-
- host_start = (uintptr_t) g2h_untagged(elf_bss);
- host_end = (uintptr_t) g2h_untagged(last_bss);
- host_map_start = REAL_HOST_PAGE_ALIGN(host_start);
-
- if (host_map_start < host_end) {
- void *p = mmap((void *)host_map_start, host_end - host_map_start,
- prot, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (p == MAP_FAILED) {
- perror("cannot mmap brk");
- exit(-1);
- }
+/**
+ * zero_bss:
+ *
+ * Map and zero the bss. We need to explicitly zero any fractional pages
+ * after the data section (i.e. bss). Return false on mapping failure.
+ */
+static bool zero_bss(abi_ulong start_bss, abi_ulong end_bss,
+ int prot, Error **errp)
+{
+ abi_ulong align_bss;
+
+ /* We only expect writable bss; the code segment shouldn't need this. */
+ if (!(prot & PROT_WRITE)) {
+ error_setg(errp, "PT_LOAD with non-writable bss");
+ return false;
}
- /* Ensure that the bss page(s) are valid */
- if ((page_get_flags(last_bss-1) & prot) != prot) {
- page_set_flags(elf_bss & TARGET_PAGE_MASK, last_bss - 1,
- prot | PAGE_VALID);
+ align_bss = TARGET_PAGE_ALIGN(start_bss);
+ end_bss = TARGET_PAGE_ALIGN(end_bss);
+
+ if (start_bss < align_bss) {
+ int flags = page_get_flags(start_bss);
+
+ if (!(flags & PAGE_BITS)) {
+ /*
+ * The whole address space of the executable was reserved
+ * at the start, therefore all pages will be VALID.
+ * But assuming there are no PROT_NONE PT_LOAD segments,
+ * a PROT_NONE page means no data all bss, and we can
+ * simply extend the new anon mapping back to the start
+ * of the page of bss.
+ */
+ align_bss -= TARGET_PAGE_SIZE;
+ } else {
+ /*
+ * The start of the bss shares a page with something.
+ * The only thing that we expect is the data section,
+ * which would already be marked writable.
+ * Overlapping the RX code segment seems malformed.
+ */
+ if (!(flags & PAGE_WRITE)) {
+ error_setg(errp, "PT_LOAD with bss overlapping "
+ "non-writable page");
+ return false;
+ }
+
+ /* The page is already mapped and writable. */
+ memset(g2h_untagged(start_bss), 0, align_bss - start_bss);
+ }
}
- if (host_start < host_map_start) {
- memset((void *)host_start, 0, host_map_start - host_start);
+ if (align_bss < end_bss &&
+ target_mmap(align_bss, end_bss - align_bss, prot,
+ MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == -1) {
+ error_setg_errno(errp, errno, "Error mapping bss");
+ return false;
}
+ return true;
}
#if defined(TARGET_ARM)
static abi_ulong create_elf_tables(abi_ulong p, int argc, int envc,
struct elfhdr *exec,
struct image_info *info,
- struct image_info *interp_info)
+ struct image_info *interp_info,
+ struct image_info *vdso_info)
{
abi_ulong sp;
abi_ulong u_argc, u_argv, u_envp, u_auxv;
}
size = (DLINFO_ITEMS + 1) * 2;
- if (k_base_platform)
+ if (k_base_platform) {
size += 2;
- if (k_platform)
+ }
+ if (k_platform) {
+ size += 2;
+ }
+ if (vdso_info) {
size += 2;
+ }
#ifdef DLINFO_ARCH_ITEMS
size += DLINFO_ARCH_ITEMS * 2;
#endif
if (u_platform) {
NEW_AUX_ENT(AT_PLATFORM, u_platform);
}
+ if (vdso_info) {
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, vdso_info->load_addr);
+ }
NEW_AUX_ENT (AT_NULL, 0);
#undef NEW_AUX_ENT
#endif
#endif
-static void pgb_fail_in_use(const char *image_name)
+/**
+ * pgb_try_mmap:
+ * @addr: host start address
+ * @addr_last: host last address
+ * @keep: do not unmap the probe region
+ *
+ * Return 1 if [@addr, @addr_last] is not mapped in the host,
+ * return 0 if it is not available to map, and -1 on mmap error.
+ * If @keep, the region is left mapped on success, otherwise unmapped.
+ */
+static int pgb_try_mmap(uintptr_t addr, uintptr_t addr_last, bool keep)
{
- error_report("%s: requires virtual address space that is in use "
- "(omit the -B option or choose a different value)",
- image_name);
- exit(EXIT_FAILURE);
+ size_t size = addr_last - addr + 1;
+ void *p = mmap((void *)addr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE |
+ MAP_NORESERVE | MAP_FIXED_NOREPLACE, -1, 0);
+ int ret;
+
+ if (p == MAP_FAILED) {
+ return errno == EEXIST ? 0 : -1;
+ }
+ ret = p == (void *)addr;
+ if (!keep || !ret) {
+ munmap(p, size);
+ }
+ return ret;
}
-static void pgb_have_guest_base(const char *image_name, abi_ulong guest_loaddr,
- abi_ulong guest_hiaddr, long align)
+/**
+ * pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t size, uintptr_t brk)
+ * @addr: host address
+ * @addr_last: host last address
+ * @brk: host brk
+ *
+ * Like pgb_try_mmap, but additionally reserve some memory following brk.
+ */
+static int pgb_try_mmap_skip_brk(uintptr_t addr, uintptr_t addr_last,
+ uintptr_t brk, bool keep)
{
- const int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
- void *addr, *test;
+ uintptr_t brk_last = brk + 16 * MiB - 1;
- if (!QEMU_IS_ALIGNED(guest_base, align)) {
- fprintf(stderr, "Requested guest base %p does not satisfy "
- "host minimum alignment (0x%lx)\n",
- (void *)guest_base, align);
- exit(EXIT_FAILURE);
+ /* Do not map anything close to the host brk. */
+ if (addr <= brk_last && brk <= addr_last) {
+ return 0;
}
+ return pgb_try_mmap(addr, addr_last, keep);
+}
- /* Sanity check the guest binary. */
- if (reserved_va) {
- if (guest_hiaddr > reserved_va) {
- error_report("%s: requires more than reserved virtual "
- "address space (0x%" PRIx64 " > 0x%lx)",
- image_name, (uint64_t)guest_hiaddr, reserved_va);
- exit(EXIT_FAILURE);
+/**
+ * pgb_try_mmap_set:
+ * @ga: set of guest addrs
+ * @base: guest_base
+ * @brk: host brk
+ *
+ * Return true if all @ga can be mapped by the host at @base.
+ * On success, retain the mapping at index 0 for reserved_va.
+ */
+
+typedef struct PGBAddrs {
+ uintptr_t bounds[3][2]; /* start/last pairs */
+ int nbounds;
+} PGBAddrs;
+
+static bool pgb_try_mmap_set(const PGBAddrs *ga, uintptr_t base, uintptr_t brk)
+{
+ for (int i = ga->nbounds - 1; i >= 0; --i) {
+ if (pgb_try_mmap_skip_brk(ga->bounds[i][0] + base,
+ ga->bounds[i][1] + base,
+ brk, i == 0 && reserved_va) <= 0) {
+ return false;
}
+ }
+ return true;
+}
+
+/**
+ * pgb_addr_set:
+ * @ga: output set of guest addrs
+ * @guest_loaddr: guest image low address
+ * @guest_loaddr: guest image high address
+ * @identity: create for identity mapping
+ *
+ * Fill in @ga with the image, COMMPAGE and NULL page.
+ */
+static bool pgb_addr_set(PGBAddrs *ga, abi_ulong guest_loaddr,
+ abi_ulong guest_hiaddr, bool try_identity)
+{
+ int n;
+
+ /*
+ * With a low commpage, or a guest mapped very low,
+ * we may not be able to use the identity map.
+ */
+ if (try_identity) {
+ if (LO_COMMPAGE != -1 && LO_COMMPAGE < mmap_min_addr) {
+ return false;
+ }
+ if (guest_loaddr != 0 && guest_loaddr < mmap_min_addr) {
+ return false;
+ }
+ }
+
+ memset(ga, 0, sizeof(*ga));
+ n = 0;
+
+ if (reserved_va) {
+ ga->bounds[n][0] = try_identity ? mmap_min_addr : 0;
+ ga->bounds[n][1] = reserved_va;
+ n++;
+ /* LO_COMMPAGE and NULL handled by reserving from 0. */
} else {
-#if HOST_LONG_BITS < TARGET_ABI_BITS
- if ((guest_hiaddr - guest_base) > ~(uintptr_t)0) {
- error_report("%s: requires more virtual address space "
- "than the host can provide (0x%" PRIx64 ")",
- image_name, (uint64_t)guest_hiaddr + 1 - guest_base);
- exit(EXIT_FAILURE);
+ /* Add any LO_COMMPAGE or NULL page. */
+ if (LO_COMMPAGE != -1) {
+ ga->bounds[n][0] = 0;
+ ga->bounds[n][1] = LO_COMMPAGE + TARGET_PAGE_SIZE - 1;
+ n++;
+ } else if (!try_identity) {
+ ga->bounds[n][0] = 0;
+ ga->bounds[n][1] = TARGET_PAGE_SIZE - 1;
+ n++;
+ }
+
+ /* Add the guest image for ET_EXEC. */
+ if (guest_loaddr) {
+ ga->bounds[n][0] = guest_loaddr;
+ ga->bounds[n][1] = guest_hiaddr;
+ n++;
}
-#endif
}
/*
- * Expand the allocation to the entire reserved_va.
- * Exclude the mmap_min_addr hole.
+ * Temporarily disable
+ * "comparison is always false due to limited range of data type"
+ * due to comparison between unsigned and (possible) 0.
*/
- if (reserved_va) {
- guest_loaddr = (guest_base >= mmap_min_addr ? 0
- : mmap_min_addr - guest_base);
- guest_hiaddr = reserved_va;
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wtype-limits"
+
+ /* Add any HI_COMMPAGE not covered by reserved_va. */
+ if (reserved_va < HI_COMMPAGE) {
+ ga->bounds[n][0] = HI_COMMPAGE & qemu_host_page_mask;
+ ga->bounds[n][1] = HI_COMMPAGE + TARGET_PAGE_SIZE - 1;
+ n++;
}
- /* Reserve the address space for the binary, or reserved_va. */
- test = g2h_untagged(guest_loaddr);
- addr = mmap(test, guest_hiaddr - guest_loaddr + 1, PROT_NONE, flags, -1, 0);
- if (test != addr) {
+#pragma GCC diagnostic pop
+
+ ga->nbounds = n;
+ return true;
+}
+
+static void pgb_fail_in_use(const char *image_name)
+{
+ error_report("%s: requires virtual address space that is in use "
+ "(omit the -B option or choose a different value)",
+ image_name);
+ exit(EXIT_FAILURE);
+}
+
+static void pgb_fixed(const char *image_name, uintptr_t guest_loaddr,
+ uintptr_t guest_hiaddr, uintptr_t align)
+{
+ PGBAddrs ga;
+ uintptr_t brk = (uintptr_t)sbrk(0);
+
+ if (!QEMU_IS_ALIGNED(guest_base, align)) {
+ fprintf(stderr, "Requested guest base %p does not satisfy "
+ "host minimum alignment (0x%" PRIxPTR ")\n",
+ (void *)guest_base, align);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, !guest_base)
+ || !pgb_try_mmap_set(&ga, guest_base, brk)) {
pgb_fail_in_use(image_name);
}
- qemu_log_mask(CPU_LOG_PAGE,
- "%s: base @ %p for %" PRIu64 " bytes\n",
- __func__, addr, (uint64_t)guest_hiaddr - guest_loaddr + 1);
}
/**
- * pgd_find_hole_fallback: potential mmap address
- * @guest_size: size of available space
- * @brk: location of break
- * @align: memory alignment
+ * pgb_find_fallback:
*
- * This is a fallback method for finding a hole in the host address
- * space if we don't have the benefit of being able to access
- * /proc/self/map. It can potentially take a very long time as we can
- * only dumbly iterate up the host address space seeing if the
- * allocation would work.
+ * This is a fallback method for finding holes in the host address space
+ * if we don't have the benefit of being able to access /proc/self/map.
+ * It can potentially take a very long time as we can only dumbly iterate
+ * up the host address space seeing if the allocation would work.
*/
-static uintptr_t pgd_find_hole_fallback(uintptr_t guest_size, uintptr_t brk,
- long align, uintptr_t offset)
+static uintptr_t pgb_find_fallback(const PGBAddrs *ga, uintptr_t align,
+ uintptr_t brk)
{
- uintptr_t base;
+ /* TODO: come up with a better estimate of how much to skip. */
+ uintptr_t skip = sizeof(uintptr_t) == 4 ? MiB : GiB;
- /* Start (aligned) at the bottom and work our way up */
- base = ROUND_UP(mmap_min_addr, align);
-
- while (true) {
- uintptr_t align_start, end;
- align_start = ROUND_UP(base, align);
- end = align_start + guest_size + offset;
-
- /* if brk is anywhere in the range give ourselves some room to grow. */
- if (align_start <= brk && brk < end) {
- base = brk + (16 * MiB);
- continue;
- } else if (align_start + guest_size < align_start) {
- /* we have run out of space */
+ for (uintptr_t base = skip; ; base += skip) {
+ base = ROUND_UP(base, align);
+ if (pgb_try_mmap_set(ga, base, brk)) {
+ return base;
+ }
+ if (base >= -skip) {
return -1;
- } else {
- int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE |
- MAP_FIXED_NOREPLACE;
- void * mmap_start = mmap((void *) align_start, guest_size,
- PROT_NONE, flags, -1, 0);
- if (mmap_start != MAP_FAILED) {
- munmap(mmap_start, guest_size);
- if (mmap_start == (void *) align_start) {
- qemu_log_mask(CPU_LOG_PAGE,
- "%s: base @ %p for %" PRIdPTR" bytes\n",
- __func__, mmap_start + offset, guest_size);
- return (uintptr_t) mmap_start + offset;
- }
- }
- base += qemu_host_page_size;
}
}
}
-/* Return value for guest_base, or -1 if no hole found. */
-static uintptr_t pgb_find_hole(uintptr_t guest_loaddr, uintptr_t guest_size,
- long align, uintptr_t offset)
+static uintptr_t pgb_try_itree(const PGBAddrs *ga, uintptr_t base,
+ IntervalTreeRoot *root)
{
- GSList *maps, *iter;
- uintptr_t this_start, this_end, next_start, brk;
- intptr_t ret = -1;
-
- assert(QEMU_IS_ALIGNED(guest_loaddr, align));
-
- maps = read_self_maps();
-
- /* Read brk after we've read the maps, which will malloc. */
- brk = (uintptr_t)sbrk(0);
+ for (int i = ga->nbounds - 1; i >= 0; --i) {
+ uintptr_t s = base + ga->bounds[i][0];
+ uintptr_t l = base + ga->bounds[i][1];
+ IntervalTreeNode *n;
+
+ if (l < s) {
+ /* Wraparound. Skip to advance S to mmap_min_addr. */
+ return mmap_min_addr - s;
+ }
- if (!maps) {
- return pgd_find_hole_fallback(guest_size, brk, align, offset);
+ n = interval_tree_iter_first(root, s, l);
+ if (n != NULL) {
+ /* Conflict. Skip to advance S to LAST + 1. */
+ return n->last - s + 1;
+ }
}
+ return 0; /* success */
+}
- /* The first hole is before the first map entry. */
- this_start = mmap_min_addr;
-
- for (iter = maps; iter;
- this_start = next_start, iter = g_slist_next(iter)) {
- uintptr_t align_start, hole_size;
-
- this_end = ((MapInfo *)iter->data)->start;
- next_start = ((MapInfo *)iter->data)->end;
- align_start = ROUND_UP(this_start + offset, align);
-
- /* Skip holes that are too small. */
- if (align_start >= this_end) {
- continue;
- }
- hole_size = this_end - align_start;
- if (hole_size < guest_size) {
- continue;
- }
+static uintptr_t pgb_find_itree(const PGBAddrs *ga, IntervalTreeRoot *root,
+ uintptr_t align, uintptr_t brk)
+{
+ uintptr_t last = mmap_min_addr;
+ uintptr_t base, skip;
- /* If this hole contains brk, give ourselves some room to grow. */
- if (this_start <= brk && brk < this_end) {
- hole_size -= guest_size;
- if (sizeof(uintptr_t) == 8 && hole_size >= 1 * GiB) {
- align_start += 1 * GiB;
- } else if (hole_size >= 16 * MiB) {
- align_start += 16 * MiB;
- } else {
- align_start = (this_end - guest_size) & -align;
- if (align_start < this_start) {
- continue;
- }
- }
+ while (true) {
+ base = ROUND_UP(last, align);
+ if (base < last) {
+ return -1;
}
- /* Record the lowest successful match. */
- if (ret < 0) {
- ret = align_start;
- }
- /* If this hole contains the identity map, select it. */
- if (align_start <= guest_loaddr &&
- guest_loaddr + guest_size <= this_end) {
- ret = 0;
- }
- /* If this hole ends above the identity map, stop looking. */
- if (this_end >= guest_loaddr) {
+ skip = pgb_try_itree(ga, base, root);
+ if (skip == 0) {
break;
}
- }
- free_self_maps(maps);
- if (ret != -1) {
- qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %" PRIxPTR
- " for %" PRIuPTR " bytes\n",
- __func__, ret, guest_size);
+ last = base + skip;
+ if (last < base) {
+ return -1;
+ }
}
- return ret;
+ /*
+ * We've chosen 'base' based on holes in the interval tree,
+ * but we don't yet know if it is a valid host address.
+ * Because it is the first matching hole, if the host addresses
+ * are invalid we know there are no further matches.
+ */
+ return pgb_try_mmap_set(ga, base, brk) ? base : -1;
}
-static void pgb_static(const char *image_name, abi_ulong orig_loaddr,
- abi_ulong orig_hiaddr, long align)
+static void pgb_dynamic(const char *image_name, uintptr_t guest_loaddr,
+ uintptr_t guest_hiaddr, uintptr_t align)
{
- uintptr_t loaddr = orig_loaddr;
- uintptr_t hiaddr = orig_hiaddr;
- uintptr_t offset = 0;
- uintptr_t addr;
+ IntervalTreeRoot *root;
+ uintptr_t brk, ret;
+ PGBAddrs ga;
- if (hiaddr != orig_hiaddr) {
- error_report("%s: requires virtual address space that the "
- "host cannot provide (0x%" PRIx64 ")",
- image_name, (uint64_t)orig_hiaddr + 1);
- exit(EXIT_FAILURE);
- }
+ assert(QEMU_IS_ALIGNED(guest_loaddr, align));
- loaddr &= -align;
- if (HI_COMMPAGE) {
- /*
- * Extend the allocation to include the commpage.
- * For a 64-bit host, this is just 4GiB; for a 32-bit host we
- * need to ensure there is space bellow the guest_base so we
- * can map the commpage in the place needed when the address
- * arithmetic wraps around.
- */
- if (sizeof(uintptr_t) == 8 || loaddr >= 0x80000000u) {
- hiaddr = UINT32_MAX;
- } else {
- offset = -(HI_COMMPAGE & -align);
+ /* Try the identity map first. */
+ if (pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, true)) {
+ brk = (uintptr_t)sbrk(0);
+ if (pgb_try_mmap_set(&ga, 0, brk)) {
+ guest_base = 0;
+ return;
}
- } else if (LO_COMMPAGE != -1) {
- loaddr = MIN(loaddr, LO_COMMPAGE & -align);
- }
-
- addr = pgb_find_hole(loaddr, hiaddr - loaddr + 1, align, offset);
- if (addr == -1) {
- /*
- * If HI_COMMPAGE, there *might* be a non-consecutive allocation
- * that can satisfy both. But as the normal arm32 link base address
- * is ~32k, and we extend down to include the commpage, making the
- * overhead only ~96k, this is unlikely.
- */
- error_report("%s: Unable to allocate %#zx bytes of "
- "virtual address space", image_name,
- (size_t)(hiaddr - loaddr));
- exit(EXIT_FAILURE);
}
- guest_base = addr;
-
- qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %"PRIxPTR" for %" PRIuPTR" bytes\n",
- __func__, addr, hiaddr - loaddr);
-}
-
-static void pgb_dynamic(const char *image_name, long align)
-{
/*
- * The executable is dynamic and does not require a fixed address.
- * All we need is a commpage that satisfies align.
- * If we do not need a commpage, leave guest_base == 0.
+ * Rebuild the address set for non-identity map.
+ * This differs in the mapping of the guest NULL page.
*/
- if (HI_COMMPAGE) {
- uintptr_t addr, commpage;
+ pgb_addr_set(&ga, guest_loaddr, guest_hiaddr, false);
+
+ root = read_self_maps();
- /* 64-bit hosts should have used reserved_va. */
- assert(sizeof(uintptr_t) == 4);
+ /* Read brk after we've read the maps, which will malloc. */
+ brk = (uintptr_t)sbrk(0);
+ if (!root) {
+ ret = pgb_find_fallback(&ga, align, brk);
+ } else {
/*
- * By putting the commpage at the first hole, that puts guest_base
- * just above that, and maximises the positive guest addresses.
+ * Reserve the area close to the host brk.
+ * This will be freed with the rest of the tree.
*/
- commpage = HI_COMMPAGE & -align;
- addr = pgb_find_hole(commpage, -commpage, align, 0);
- assert(addr != -1);
- guest_base = addr;
- }
-}
+ IntervalTreeNode *b = g_new0(IntervalTreeNode, 1);
+ b->start = brk;
+ b->last = brk + 16 * MiB - 1;
+ interval_tree_insert(b, root);
-static void pgb_reserved_va(const char *image_name, abi_ulong guest_loaddr,
- abi_ulong guest_hiaddr, long align)
-{
- int flags = MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE;
- void *addr, *test;
-
- if (guest_hiaddr > reserved_va) {
- error_report("%s: requires more than reserved virtual "
- "address space (0x%" PRIx64 " > 0x%lx)",
- image_name, (uint64_t)guest_hiaddr, reserved_va);
- exit(EXIT_FAILURE);
+ ret = pgb_find_itree(&ga, root, align, brk);
+ free_self_maps(root);
}
- /* Widen the "image" to the entire reserved address space. */
- pgb_static(image_name, 0, reserved_va, align);
+ if (ret == -1) {
+ int w = TARGET_LONG_BITS / 4;
- /* osdep.h defines this as 0 if it's missing */
- flags |= MAP_FIXED_NOREPLACE;
+ error_report("%s: Unable to find a guest_base to satisfy all "
+ "guest address mapping requirements", image_name);
- /* Reserve the memory on the host. */
- assert(guest_base != 0);
- test = g2h_untagged(0);
- addr = mmap(test, reserved_va + 1, PROT_NONE, flags, -1, 0);
- if (addr == MAP_FAILED || addr != test) {
- error_report("Unable to reserve 0x%lx bytes of virtual address "
- "space at %p (%s) for use as guest address space (check your "
- "virtual memory ulimit setting, min_mmap_addr or reserve less "
- "using -R option)", reserved_va + 1, test, strerror(errno));
+ for (int i = 0; i < ga.nbounds; ++i) {
+ error_printf(" %0*" PRIx64 "-%0*" PRIx64 "\n",
+ w, (uint64_t)ga.bounds[i][0],
+ w, (uint64_t)ga.bounds[i][1]);
+ }
exit(EXIT_FAILURE);
}
-
- qemu_log_mask(CPU_LOG_PAGE, "%s: base @ %p for %lu bytes\n",
- __func__, addr, reserved_va + 1);
+ guest_base = ret;
}
void probe_guest_base(const char *image_name, abi_ulong guest_loaddr,
/* In order to use host shmat, we must be able to honor SHMLBA. */
uintptr_t align = MAX(SHMLBA, qemu_host_page_size);
+ /* Sanity check the guest binary. */
+ if (reserved_va) {
+ if (guest_hiaddr > reserved_va) {
+ error_report("%s: requires more than reserved virtual "
+ "address space (0x%" PRIx64 " > 0x%lx)",
+ image_name, (uint64_t)guest_hiaddr, reserved_va);
+ exit(EXIT_FAILURE);
+ }
+ } else {
+ if (guest_hiaddr != (uintptr_t)guest_hiaddr) {
+ error_report("%s: requires more virtual address space "
+ "than the host can provide (0x%" PRIx64 ")",
+ image_name, (uint64_t)guest_hiaddr + 1);
+ exit(EXIT_FAILURE);
+ }
+ }
+
if (have_guest_base) {
- pgb_have_guest_base(image_name, guest_loaddr, guest_hiaddr, align);
- } else if (reserved_va) {
- pgb_reserved_va(image_name, guest_loaddr, guest_hiaddr, align);
- } else if (guest_loaddr) {
- pgb_static(image_name, guest_loaddr, guest_hiaddr, align);
+ pgb_fixed(image_name, guest_loaddr, guest_hiaddr, align);
} else {
- pgb_dynamic(image_name, align);
+ pgb_dynamic(image_name, guest_loaddr, guest_hiaddr, align);
}
/* Reserve and initialize the commpage. */
if (!init_guest_commpage()) {
- /*
- * With have_guest_base, the user has selected the address and
- * we are trying to work with that. Otherwise, we have selected
- * free space and init_guest_commpage must succeeded.
- */
- assert(have_guest_base);
- pgb_fail_in_use(image_name);
+ /* We have already probed for the commpage being free. */
+ g_assert_not_reached();
}
assert(QEMU_IS_ALIGNED(guest_base, align));
}
/* Process NT_GNU_PROPERTY_TYPE_0. */
-static bool parse_elf_properties(int image_fd,
+static bool parse_elf_properties(const ImageSource *src,
struct image_info *info,
const struct elf_phdr *phdr,
- char bprm_buf[BPRM_BUF_SIZE],
Error **errp)
{
union {
return false;
}
- if (phdr->p_offset + n <= BPRM_BUF_SIZE) {
- memcpy(¬e, bprm_buf + phdr->p_offset, n);
- } else {
- ssize_t len = pread(image_fd, ¬e, n, phdr->p_offset);
- if (len != n) {
- error_setg_errno(errp, errno, "Error reading file header");
- return false;
- }
+ if (!imgsrc_read(¬e, phdr->p_offset, n, src, errp)) {
+ return false;
}
/*
}
}
-/* Load an ELF image into the address space.
-
- IMAGE_NAME is the filename of the image, to use in error messages.
- IMAGE_FD is the open file descriptor for the image.
-
- BPRM_BUF is a copy of the beginning of the file; this of course
- contains the elf file header at offset 0. It is assumed that this
- buffer is sufficiently aligned to present no problems to the host
- in accessing data at aligned offsets within the buffer.
-
- On return: INFO values will be filled in, as necessary or available. */
+/**
+ * load_elf_image: Load an ELF image into the address space.
+ * @image_name: the filename of the image, to use in error messages.
+ * @src: the ImageSource from which to read.
+ * @info: info collected from the loaded image.
+ * @ehdr: the ELF header, not yet bswapped.
+ * @pinterp_name: record any PT_INTERP string found.
+ *
+ * On return: @info values will be filled in, as necessary or available.
+ */
-static void load_elf_image(const char *image_name, int image_fd,
- struct image_info *info, char **pinterp_name,
- char bprm_buf[BPRM_BUF_SIZE])
+static void load_elf_image(const char *image_name, const ImageSource *src,
+ struct image_info *info, struct elfhdr *ehdr,
+ char **pinterp_name)
{
- struct elfhdr *ehdr = (struct elfhdr *)bprm_buf;
- struct elf_phdr *phdr;
+ g_autofree struct elf_phdr *phdr = NULL;
abi_ulong load_addr, load_bias, loaddr, hiaddr, error;
- int i, retval, prot_exec;
+ int i, prot_exec;
Error *err = NULL;
- /* First of all, some simple consistency checks */
+ /*
+ * First of all, some simple consistency checks.
+ * Note that we rely on the bswapped ehdr staying in bprm_buf,
+ * for later use by load_elf_binary and create_elf_tables.
+ */
+ if (!imgsrc_read(ehdr, 0, sizeof(*ehdr), src, &err)) {
+ goto exit_errmsg;
+ }
if (!elf_check_ident(ehdr)) {
error_setg(&err, "Invalid ELF image for this architecture");
goto exit_errmsg;
goto exit_errmsg;
}
- i = ehdr->e_phnum * sizeof(struct elf_phdr);
- if (ehdr->e_phoff + i <= BPRM_BUF_SIZE) {
- phdr = (struct elf_phdr *)(bprm_buf + ehdr->e_phoff);
- } else {
- phdr = (struct elf_phdr *) alloca(i);
- retval = pread(image_fd, phdr, i, ehdr->e_phoff);
- if (retval != i) {
- goto exit_read;
- }
+ phdr = imgsrc_read_alloc(ehdr->e_phoff,
+ ehdr->e_phnum * sizeof(struct elf_phdr),
+ src, &err);
+ if (phdr == NULL) {
+ goto exit_errmsg;
}
bswap_phdr(phdr, ehdr->e_phnum);
goto exit_errmsg;
}
- interp_name = g_malloc(eppnt->p_filesz);
-
- if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) {
- memcpy(interp_name, bprm_buf + eppnt->p_offset,
- eppnt->p_filesz);
- } else {
- retval = pread(image_fd, interp_name, eppnt->p_filesz,
- eppnt->p_offset);
- if (retval != eppnt->p_filesz) {
- goto exit_read;
- }
+ interp_name = imgsrc_read_alloc(eppnt->p_offset, eppnt->p_filesz,
+ src, &err);
+ if (interp_name == NULL) {
+ goto exit_errmsg;
}
if (interp_name[eppnt->p_filesz - 1] != 0) {
error_setg(&err, "Invalid PT_INTERP entry");
}
*pinterp_name = g_steal_pointer(&interp_name);
} else if (eppnt->p_type == PT_GNU_PROPERTY) {
- if (!parse_elf_properties(image_fd, info, eppnt, bprm_buf, &err)) {
+ if (!parse_elf_properties(src, info, eppnt, &err)) {
goto exit_errmsg;
}
} else if (eppnt->p_type == PT_GNU_STACK) {
}
}
- if (pinterp_name != NULL) {
- /*
- * This is the main executable.
- *
- * Reserve extra space for brk.
- * We hold on to this space while placing the interpreter
- * and the stack, lest they be placed immediately after
- * the data segment and block allocation from the brk.
- *
- * 16MB is chosen as "large enough" without being so large as
- * to allow the result to not fit with a 32-bit guest on a
- * 32-bit host. However some 64 bit guests (e.g. s390x)
- * attempt to place their heap further ahead and currently
- * nothing stops them smashing into QEMUs address space.
- */
-#if TARGET_LONG_BITS == 64
- info->reserve_brk = 32 * MiB;
-#else
- info->reserve_brk = 16 * MiB;
-#endif
- hiaddr += info->reserve_brk;
+ load_addr = loaddr;
+ if (pinterp_name != NULL) {
if (ehdr->e_type == ET_EXEC) {
/*
* Make sure that the low address does not conflict with
*/
probe_guest_base(image_name, loaddr, hiaddr);
} else {
+ abi_ulong align;
+
/*
* The binary is dynamic, but we still need to
* select guest_base. In this case we pass a size.
*/
probe_guest_base(image_name, 0, hiaddr - loaddr);
+
+ /*
+ * Avoid collision with the loader by providing a different
+ * default load address.
+ */
+ load_addr += elf_et_dyn_base;
+
+ /*
+ * TODO: Better support for mmap alignment is desirable.
+ * Since we do not have complete control over the guest
+ * address space, we prefer the kernel to choose some address
+ * rather than force the use of LOAD_ADDR via MAP_FIXED.
+ * But without MAP_FIXED we cannot guarantee alignment,
+ * only suggest it.
+ */
+ align = pow2ceil(info->alignment);
+ if (align) {
+ load_addr &= -align;
+ }
}
}
/*
* Reserve address space for all of this.
*
- * In the case of ET_EXEC, we supply MAP_FIXED so that we get
- * exactly the address range that is required.
+ * In the case of ET_EXEC, we supply MAP_FIXED_NOREPLACE so that we get
+ * exactly the address range that is required. Without reserved_va,
+ * the guest address space is not isolated. We have attempted to avoid
+ * conflict with the host program itself via probe_guest_base, but using
+ * MAP_FIXED_NOREPLACE instead of MAP_FIXED provides an extra check.
*
* Otherwise this is ET_DYN, and we are searching for a location
* that can hold the memory space required. If the image is
- * pre-linked, LOADDR will be non-zero, and the kernel should
+ * pre-linked, LOAD_ADDR will be non-zero, and the kernel should
* honor that address if it happens to be free.
*
* In both cases, we will overwrite pages in this range with mappings
* from the executable.
*/
- load_addr = target_mmap(loaddr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
+ load_addr = target_mmap(load_addr, (size_t)hiaddr - loaddr + 1, PROT_NONE,
MAP_PRIVATE | MAP_ANON | MAP_NORESERVE |
- (ehdr->e_type == ET_EXEC ? MAP_FIXED : 0),
+ (ehdr->e_type == ET_EXEC ? MAP_FIXED_NOREPLACE : 0),
-1, 0);
if (load_addr == -1) {
goto exit_mmap;
info->end_code = 0;
info->start_data = -1;
info->end_data = 0;
- info->brk = 0;
+ /* Usual start for brk is after all sections of the main executable. */
+ info->brk = TARGET_PAGE_ALIGN(hiaddr + load_bias);
info->elf_flags = ehdr->e_flags;
prot_exec = PROT_EXEC;
for (i = 0; i < ehdr->e_phnum; i++) {
struct elf_phdr *eppnt = phdr + i;
if (eppnt->p_type == PT_LOAD) {
- abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em, vaddr_len;
+ abi_ulong vaddr, vaddr_po, vaddr_ps, vaddr_ef, vaddr_em;
int elf_prot = 0;
if (eppnt->p_flags & PF_R) {
}
vaddr = load_bias + eppnt->p_vaddr;
- vaddr_po = TARGET_ELF_PAGEOFFSET(vaddr);
- vaddr_ps = TARGET_ELF_PAGESTART(vaddr);
+ vaddr_po = vaddr & ~TARGET_PAGE_MASK;
+ vaddr_ps = vaddr & TARGET_PAGE_MASK;
vaddr_ef = vaddr + eppnt->p_filesz;
vaddr_em = vaddr + eppnt->p_memsz;
* but no backing file segment.
*/
if (eppnt->p_filesz != 0) {
- vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_filesz + vaddr_po);
- error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
- MAP_PRIVATE | MAP_FIXED,
- image_fd, eppnt->p_offset - vaddr_po);
-
+ error = imgsrc_mmap(vaddr_ps, eppnt->p_filesz + vaddr_po,
+ elf_prot, MAP_PRIVATE | MAP_FIXED,
+ src, eppnt->p_offset - vaddr_po);
if (error == -1) {
goto exit_mmap;
}
+ }
- /*
- * If the load segment requests extra zeros (e.g. bss), map it.
- */
- if (eppnt->p_filesz < eppnt->p_memsz) {
- zero_bss(vaddr_ef, vaddr_em, elf_prot);
- }
- } else if (eppnt->p_memsz != 0) {
- vaddr_len = TARGET_ELF_PAGELENGTH(eppnt->p_memsz + vaddr_po);
- error = target_mmap(vaddr_ps, vaddr_len, elf_prot,
- MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS,
- -1, 0);
-
- if (error == -1) {
- goto exit_mmap;
- }
+ /* If the load segment requests extra zeros (e.g. bss), map it. */
+ if (vaddr_ef < vaddr_em &&
+ !zero_bss(vaddr_ef, vaddr_em, elf_prot, &err)) {
+ goto exit_errmsg;
}
/* Find the full program boundaries. */
info->end_data = vaddr_ef;
}
}
- if (vaddr_em > info->brk) {
- info->brk = vaddr_em;
- }
#ifdef TARGET_MIPS
} else if (eppnt->p_type == PT_MIPS_ABIFLAGS) {
Mips_elf_abiflags_v0 abiflags;
- if (eppnt->p_filesz < sizeof(Mips_elf_abiflags_v0)) {
- error_setg(&err, "Invalid PT_MIPS_ABIFLAGS entry");
+
+ if (!imgsrc_read(&abiflags, eppnt->p_offset, sizeof(abiflags),
+ src, &err)) {
goto exit_errmsg;
}
- if (eppnt->p_offset + eppnt->p_filesz <= BPRM_BUF_SIZE) {
- memcpy(&abiflags, bprm_buf + eppnt->p_offset,
- sizeof(Mips_elf_abiflags_v0));
- } else {
- retval = pread(image_fd, &abiflags, sizeof(Mips_elf_abiflags_v0),
- eppnt->p_offset);
- if (retval != sizeof(Mips_elf_abiflags_v0)) {
- goto exit_read;
- }
- }
bswap_mips_abiflags(&abiflags);
info->fp_abi = abiflags.fp_abi;
#endif
}
if (qemu_log_enabled()) {
- load_symbols(ehdr, image_fd, load_bias);
+ load_symbols(ehdr, src, load_bias);
}
- debuginfo_report_elf(image_name, image_fd, load_bias);
+ debuginfo_report_elf(image_name, src->fd, load_bias);
mmap_unlock();
- close(image_fd);
+ close(src->fd);
return;
- exit_read:
- if (retval >= 0) {
- error_setg(&err, "Incomplete read of file header");
- } else {
- error_setg_errno(&err, errno, "Error reading file header");
- }
- goto exit_errmsg;
exit_mmap:
error_setg_errno(&err, errno, "Error mapping file");
goto exit_errmsg;
static void load_elf_interp(const char *filename, struct image_info *info,
char bprm_buf[BPRM_BUF_SIZE])
{
+ struct elfhdr ehdr;
+ ImageSource src;
int fd, retval;
Error *err = NULL;
exit(-1);
}
- if (retval < BPRM_BUF_SIZE) {
- memset(bprm_buf + retval, 0, BPRM_BUF_SIZE - retval);
+ src.fd = fd;
+ src.cache = bprm_buf;
+ src.cache_size = retval;
+
+ load_elf_image(filename, &src, info, &ehdr, NULL);
+}
+
+#ifdef VDSO_HEADER
+#include VDSO_HEADER
+#define vdso_image_info() &vdso_image_info
+#else
+#define vdso_image_info() NULL
+#endif
+
+static void load_elf_vdso(struct image_info *info, const VdsoImageInfo *vdso)
+{
+ ImageSource src;
+ struct elfhdr ehdr;
+ abi_ulong load_bias, load_addr;
+
+ src.fd = -1;
+ src.cache = vdso->image;
+ src.cache_size = vdso->image_size;
+
+ load_elf_image("<internal-vdso>", &src, info, &ehdr, NULL);
+ load_addr = info->load_addr;
+ load_bias = info->load_bias;
+
+ /*
+ * We need to relocate the VDSO image. The one built into the kernel
+ * is built for a fixed address. The one built for QEMU is not, since
+ * that requires close control of the guest address space.
+ * We pre-processed the image to locate all of the addresses that need
+ * to be updated.
+ */
+ for (unsigned i = 0, n = vdso->reloc_count; i < n; i++) {
+ abi_ulong *addr = g2h_untagged(load_addr + vdso->relocs[i]);
+ *addr = tswapal(tswapal(*addr) + load_bias);
}
- load_elf_image(filename, fd, info, NULL, bprm_buf);
+ /* Install signal trampolines, if present. */
+ if (vdso->sigreturn_ofs) {
+ default_sigreturn = load_addr + vdso->sigreturn_ofs;
+ }
+ if (vdso->rt_sigreturn_ofs) {
+ default_rt_sigreturn = load_addr + vdso->rt_sigreturn_ofs;
+ }
+
+ /* Remove write from VDSO segment. */
+ target_mprotect(info->start_data, info->end_data - info->start_data,
+ PROT_READ | PROT_EXEC);
}
static int symfind(const void *s0, const void *s1)
{
- target_ulong addr = *(target_ulong *)s0;
struct elf_sym *sym = (struct elf_sym *)s1;
+ __typeof(sym->st_value) addr = *(uint64_t *)s0;
int result = 0;
+
if (addr < sym->st_value) {
result = -1;
} else if (addr >= sym->st_value + sym->st_size) {
return result;
}
-static const char *lookup_symbolxx(struct syminfo *s, target_ulong orig_addr)
+static const char *lookup_symbolxx(struct syminfo *s, uint64_t orig_addr)
{
#if ELF_CLASS == ELFCLASS32
struct elf_sym *syms = s->disas_symtab.elf32;
}
/* Best attempt to load symbols from this ELF object. */
-static void load_symbols(struct elfhdr *hdr, int fd, abi_ulong load_bias)
+static void load_symbols(struct elfhdr *hdr, const ImageSource *src,
+ abi_ulong load_bias)
{
int i, shnum, nsyms, sym_idx = 0, str_idx = 0;
- uint64_t segsz;
- struct elf_shdr *shdr;
+ g_autofree struct elf_shdr *shdr = NULL;
char *strings = NULL;
- struct syminfo *s = NULL;
- struct elf_sym *new_syms, *syms = NULL;
+ struct elf_sym *syms = NULL;
+ struct elf_sym *new_syms;
+ uint64_t segsz;
shnum = hdr->e_shnum;
- i = shnum * sizeof(struct elf_shdr);
- shdr = (struct elf_shdr *)alloca(i);
- if (pread(fd, shdr, i, hdr->e_shoff) != i) {
+ shdr = imgsrc_read_alloc(hdr->e_shoff, shnum * sizeof(struct elf_shdr),
+ src, NULL);
+ if (shdr == NULL) {
return;
}
found:
/* Now know where the strtab and symtab are. Snarf them. */
- s = g_try_new(struct syminfo, 1);
- if (!s) {
- goto give_up;
- }
segsz = shdr[str_idx].sh_size;
- s->disas_strtab = strings = g_try_malloc(segsz);
- if (!strings ||
- pread(fd, strings, segsz, shdr[str_idx].sh_offset) != segsz) {
+ strings = g_try_malloc(segsz);
+ if (!strings) {
goto give_up;
}
-
- segsz = shdr[sym_idx].sh_size;
- syms = g_try_malloc(segsz);
- if (!syms || pread(fd, syms, segsz, shdr[sym_idx].sh_offset) != segsz) {
+ if (!imgsrc_read(strings, shdr[str_idx].sh_offset, segsz, src, NULL)) {
goto give_up;
}
+ segsz = shdr[sym_idx].sh_size;
if (segsz / sizeof(struct elf_sym) > INT_MAX) {
- /* Implausibly large symbol table: give up rather than ploughing
- * on with the number of symbols calculation overflowing
+ /*
+ * Implausibly large symbol table: give up rather than ploughing
+ * on with the number of symbols calculation overflowing.
*/
goto give_up;
}
nsyms = segsz / sizeof(struct elf_sym);
+ syms = g_try_malloc(segsz);
+ if (!syms) {
+ goto give_up;
+ }
+ if (!imgsrc_read(syms, shdr[sym_idx].sh_offset, segsz, src, NULL)) {
+ goto give_up;
+ }
+
for (i = 0; i < nsyms; ) {
bswap_sym(syms + i);
/* Throw away entries which we do not need. */
goto give_up;
}
- /* Attempt to free the storage associated with the local symbols
- that we threw away. Whether or not this has any effect on the
- memory allocation depends on the malloc implementation and how
- many symbols we managed to discard. */
+ /*
+ * Attempt to free the storage associated with the local symbols
+ * that we threw away. Whether or not this has any effect on the
+ * memory allocation depends on the malloc implementation and how
+ * many symbols we managed to discard.
+ */
new_syms = g_try_renew(struct elf_sym, syms, nsyms);
if (new_syms == NULL) {
goto give_up;
qsort(syms, nsyms, sizeof(*syms), symcmp);
- s->disas_num_syms = nsyms;
+ {
+ struct syminfo *s = g_new(struct syminfo, 1);
+
+ s->disas_strtab = strings;
+ s->disas_num_syms = nsyms;
#if ELF_CLASS == ELFCLASS32
- s->disas_symtab.elf32 = syms;
+ s->disas_symtab.elf32 = syms;
#else
- s->disas_symtab.elf64 = syms;
+ s->disas_symtab.elf64 = syms;
#endif
- s->lookup_symbol = lookup_symbolxx;
- s->next = syminfos;
- syminfos = s;
-
+ s->lookup_symbol = lookup_symbolxx;
+ s->next = syminfos;
+ syminfos = s;
+ }
return;
-give_up:
- g_free(s);
+ give_up:
g_free(strings);
g_free(syms);
}
int load_elf_binary(struct linux_binprm *bprm, struct image_info *info)
{
- struct image_info interp_info;
- struct elfhdr elf_ex;
+ /*
+ * We need a copy of the elf header for passing to create_elf_tables.
+ * We will have overwritten the original when we re-use bprm->buf
+ * while loading the interpreter. Allocate the storage for this now
+ * and let elf_load_image do any swapping that may be required.
+ */
+ struct elfhdr ehdr;
+ struct image_info interp_info, vdso_info;
char *elf_interpreter = NULL;
char *scratch;
interp_info.fp_abi = MIPS_ABI_FP_UNKNOWN;
#endif
- info->start_mmap = (abi_ulong)ELF_START_MMAP;
-
- load_elf_image(bprm->filename, bprm->fd, info,
- &elf_interpreter, bprm->buf);
-
- /* ??? We need a copy of the elf header for passing to create_elf_tables.
- If we do nothing, we'll have overwritten this when we re-use bprm->buf
- when we load the interpreter. */
- elf_ex = *(struct elfhdr *)bprm->buf;
+ load_elf_image(bprm->filename, &bprm->src, info, &ehdr, &elf_interpreter);
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
if (elf_interpreter) {
load_elf_interp(elf_interpreter, &interp_info, bprm->buf);
+ /*
+ * While unusual because of ELF_ET_DYN_BASE, if we are unlucky
+ * with the mappings the interpreter can be loaded above but
+ * near the main executable, which can leave very little room
+ * for the heap.
+ * If the current brk has less than 16MB, use the end of the
+ * interpreter.
+ */
+ if (interp_info.brk > info->brk &&
+ interp_info.load_bias - info->brk < 16 * MiB) {
+ info->brk = interp_info.brk;
+ }
+
/* If the program interpreter is one of these two, then assume
an iBCS2 image. Otherwise assume a native linux image. */
}
/*
- * TODO: load a vdso, which would also contain the signal trampolines.
- * Otherwise, allocate a private page to hold them.
+ * Load a vdso if available, which will amongst other things contain the
+ * signal trampolines. Otherwise, allocate a separate page for them.
*/
- if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) {
+ const VdsoImageInfo *vdso = vdso_image_info();
+ if (vdso) {
+ load_elf_vdso(&vdso_info, vdso);
+ } else if (TARGET_ARCH_HAS_SIGTRAMP_PAGE) {
abi_long tramp_page = target_mmap(0, TARGET_PAGE_SIZE,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON, -1, 0);
target_mprotect(tramp_page, TARGET_PAGE_SIZE, PROT_READ | PROT_EXEC);
}
- bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &elf_ex,
- info, (elf_interpreter ? &interp_info : NULL));
+ bprm->p = create_elf_tables(bprm->p, bprm->argc, bprm->envc, &ehdr, info,
+ elf_interpreter ? &interp_info : NULL,
+ vdso ? &vdso_info : NULL);
info->start_stack = bprm->p;
/* If we have an interpreter, set that as the program's entry point.
bprm->core_dump = &elf_core_dump;
#endif
- /*
- * If we reserved extra space for brk, release it now.
- * The implementation of do_brk in syscalls.c expects to be able
- * to mmap pages in this space.
- */
- if (info->reserve_brk) {
- abi_ulong start_brk = HOST_PAGE_ALIGN(info->brk);
- abi_ulong end_brk = HOST_PAGE_ALIGN(info->brk + info->reserve_brk);
- target_munmap(start_brk, end_brk - start_brk);
- }
-
return 0;
}
info->notes_size += note_size(&info->notes[i]);
/* read and fill status of all threads */
- cpu_list_lock();
- CPU_FOREACH(cpu) {
- if (cpu == thread_cpu) {
- continue;
+ WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
+ CPU_FOREACH(cpu) {
+ if (cpu == thread_cpu) {
+ continue;
+ }
+ fill_thread_info(info, cpu_env(cpu));
}
- fill_thread_info(info, cpu->env_ptr);
}
- cpu_list_unlock();
return (0);
}