From: pbrook Date: Sun, 30 Jul 2006 19:16:29 +0000 (+0000) Subject: Rewrite Arm host support. X-Git-Tag: release_0_9_0~331 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=46152182100e68f7f8aa4954af1bf91160bb3d15;p=qemu.git Rewrite Arm host support. git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@2071 c046a42c-6fe2-441c-8c8c-71466251a162 --- diff --git a/arm.ld b/arm.ld index 61f4c3486..e216cbfa7 100644 --- a/arm.ld +++ b/arm.ld @@ -53,6 +53,10 @@ SECTIONS .fini : { *(.fini) } =0x47ff041f .rodata : { *(.rodata) *(.gnu.linkonce.r*) } .rodata1 : { *(.rodata1) } + .ARM.extab : { *(.ARM.extab* .gnu.linkonce.armextab.*) } + __exidx_start = .; + .ARM.exidx : { *(.ARM.exidx* .gnu.linkonce.armexidx.*) } + __exidx_end = .; .reginfo : { *(.reginfo) } /* Adjust the address for the data segment. We want to adjust up to the same address within the page on the next page up. */ @@ -63,7 +67,28 @@ SECTIONS *(.gnu.linkonce.d*) CONSTRUCTORS } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) } .data1 : { *(.data1) } + .preinit_array : + { + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP (*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + } + .init_array : + { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP (*(SORT(.init_array.*))) + KEEP (*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + } + .fini_array : + { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP (*(.fini_array)) + KEEP (*(SORT(.fini_array.*))) + PROVIDE_HIDDEN (__fini_array_end = .); + } .ctors : { *(.ctors) diff --git a/cpu-all.h b/cpu-all.h index 145d84beb..996289eaf 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -992,6 +992,15 @@ static inline int64_t cpu_get_real_ticks (void) return rval.i64; #endif } +#else +/* The host CPU doesn't have an easily accessible cycle counter. + Just return a monotonically increasing vlue. This will be totally wrong, + but hopefully better than nothing. */ +static inline int64_t cpu_get_real_ticks (void) +{ + static int64_t ticks = 0; + return ticks++; +} #endif /* profiling */ diff --git a/disas.c b/disas.c index fd91b9220..27b677792 100644 --- a/disas.c +++ b/disas.c @@ -271,11 +271,9 @@ void disas(FILE *out, void *code, unsigned long size) for (pc = (unsigned long)code; pc < (unsigned long)code + size; pc += count) { fprintf(out, "0x%08lx: ", pc); #ifdef __arm__ - /* since data are included in the code, it is better to + /* since data is included in the code, it is better to display code data too */ - if (is_host) { - fprintf(out, "%08x ", (int)bfd_getl32((const bfd_byte *)pc)); - } + fprintf(out, "%08x ", (int)bfd_getl32((const bfd_byte *)pc)); #endif count = print_insn(pc, &disasm_info); fprintf(out, "\n"); diff --git a/dyngen.c b/dyngen.c index 5fb921e28..2d93283d6 100644 --- a/dyngen.c +++ b/dyngen.c @@ -1255,90 +1255,149 @@ int arm_emit_ldr_info(const char *name, unsigned long start_offset, { uint8_t *p; uint32_t insn; - int offset, min_offset, pc_offset, data_size; + int offset, min_offset, pc_offset, data_size, spare, max_pool; uint8_t data_allocated[1024]; unsigned int data_index; + int type; memset(data_allocated, 0, sizeof(data_allocated)); p = p_start; min_offset = p_end - p_start; + spare = 0x7fffffff; while (p < p_start + min_offset) { insn = get32((uint32_t *)p); + /* TODO: Armv5e ldrd. */ + /* TODO: VFP load. */ if ((insn & 0x0d5f0000) == 0x051f0000) { /* ldr reg, [pc, #im] */ offset = insn & 0xfff; if (!(insn & 0x00800000)) - offset = -offset; + offset = -offset; + max_pool = 4096; + type = 0; + } else if ((insn & 0x0e5f0f00) == 0x0c1f0100) { + /* FPA ldf. */ + offset = (insn & 0xff) << 2; + if (!(insn & 0x00800000)) + offset = -offset; + max_pool = 1024; + type = 1; + } else if ((insn & 0x0fff0000) == 0x028f0000) { + /* Some gcc load a doubleword immediate with + add regN, pc, #imm + ldmia regN, {regN, regM} + Hope and pray the compiler never generates somethin like + add reg, pc, #imm1; ldr reg, [reg, #-imm2]; */ + int r; + + r = (insn & 0xf00) >> 7; + offset = ((insn & 0xff) >> r) | ((insn & 0xff) << (32 - r)); + max_pool = 1024; + type = 2; + } else { + max_pool = 0; + type = -1; + } + if (type >= 0) { + /* PC-relative load needs fixing up. */ + if (spare > max_pool - offset) + spare = max_pool - offset; if ((offset & 3) !=0) - error("%s:%04x: ldr pc offset must be 32 bit aligned", + error("%s:%04x: pc offset must be 32 bit aligned", + name, start_offset + p - p_start); + if (offset < 0) + error("%s:%04x: Embedded literal value", name, start_offset + p - p_start); pc_offset = p - p_start + offset + 8; if (pc_offset <= (p - p_start) || pc_offset >= (p_end - p_start)) - error("%s:%04x: ldr pc offset must point inside the function code", + error("%s:%04x: pc offset must point inside the function code", name, start_offset + p - p_start); if (pc_offset < min_offset) min_offset = pc_offset; if (outfile) { - /* ldr position */ + /* The intruction position */ fprintf(outfile, " arm_ldr_ptr->ptr = gen_code_ptr + %d;\n", p - p_start); - /* ldr data index */ - data_index = ((p_end - p_start) - pc_offset - 4) >> 2; - fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr + %d;\n", + /* The position of the constant pool data. */ + data_index = ((p_end - p_start) - pc_offset) >> 2; + fprintf(outfile, " arm_ldr_ptr->data_ptr = arm_data_ptr - %d;\n", data_index); + fprintf(outfile, " arm_ldr_ptr->type = %d;\n", type); fprintf(outfile, " arm_ldr_ptr++;\n"); - if (data_index >= sizeof(data_allocated)) - error("%s: too many data", name); - if (!data_allocated[data_index]) { - ELF_RELOC *rel; - int i, addend, type; - const char *sym_name, *p; - char relname[1024]; - - data_allocated[data_index] = 1; - - /* data value */ - addend = get32((uint32_t *)(p_start + pc_offset)); - relname[0] = '\0'; - for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { - if (rel->r_offset == (pc_offset + start_offset)) { - sym_name = get_rel_sym_name(rel); - /* the compiler leave some unnecessary references to the code */ - get_reloc_expr(relname, sizeof(relname), sym_name); - type = ELF32_R_TYPE(rel->r_info); - if (type != R_ARM_ABS32) - error("%s: unsupported data relocation", name); - break; - } - } - fprintf(outfile, " arm_data_ptr[%d] = 0x%x", - data_index, addend); - if (relname[0] != '\0') - fprintf(outfile, " + %s", relname); - fprintf(outfile, ";\n"); - } } } p += 4; } + + /* Copy and relocate the constant pool data. */ data_size = (p_end - p_start) - min_offset; if (data_size > 0 && outfile) { - fprintf(outfile, " arm_data_ptr += %d;\n", data_size >> 2); + spare += min_offset; + fprintf(outfile, " arm_data_ptr -= %d;\n", data_size >> 2); + fprintf(outfile, " arm_pool_ptr -= %d;\n", data_size); + fprintf(outfile, " if (arm_pool_ptr > gen_code_ptr + %d)\n" + " arm_pool_ptr = gen_code_ptr + %d;\n", + spare, spare); + + data_index = 0; + for (pc_offset = min_offset; + pc_offset < p_end - p_start; + pc_offset += 4) { + + ELF_RELOC *rel; + int i, addend, type; + const char *sym_name; + char relname[1024]; + + /* data value */ + addend = get32((uint32_t *)(p_start + pc_offset)); + relname[0] = '\0'; + for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) { + if (rel->r_offset == (pc_offset + start_offset)) { + sym_name = get_rel_sym_name(rel); + /* the compiler leave some unnecessary references to the code */ + get_reloc_expr(relname, sizeof(relname), sym_name); + type = ELF32_R_TYPE(rel->r_info); + if (type != R_ARM_ABS32) + error("%s: unsupported data relocation", name); + break; + } + } + fprintf(outfile, " arm_data_ptr[%d] = 0x%x", + data_index, addend); + if (relname[0] != '\0') + fprintf(outfile, " + %s", relname); + fprintf(outfile, ";\n"); + + data_index++; + } } - /* the last instruction must be a mov pc, lr */ if (p == p_start) goto arm_ret_error; p -= 4; insn = get32((uint32_t *)p); - if ((insn & 0xffff0000) != 0xe91b0000) { + /* The last instruction must be an ldm instruction. There are several + forms generated by gcc: + ldmib sp, {..., pc} (implies a sp adjustment of +4) + ldmia sp, {..., pc} + ldmea fp, {..., pc} */ + if ((insn & 0xffff8000) == 0xe99d8000) { + if (outfile) { + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + %d) = 0xe28dd004;\n", + p - p_start); + } + p += 4; + } else if ((insn & 0xffff8000) != 0xe89d8000 + && (insn & 0xffff8000) != 0xe91b8000) { arm_ret_error: if (!outfile) printf("%s: invalid epilog\n", name); } - return p - p_start; + return p - p_start; } #endif @@ -1537,6 +1596,8 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, } #elif defined(HOST_ARM) { + uint32_t insn; + if ((p_end - p_start) <= 16) error("%s: function too small", name); if (get32((uint32_t *)p_start) != 0xe1a0c00d || @@ -1545,6 +1606,12 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, error("%s: invalid prolog", name); p_start += 12; start_offset += 12; + insn = get32((uint32_t *)p_start); + if ((insn & 0xffffff00) == 0xe24dd000) { + /* Stack adjustment. Assume op uses the frame pointer. */ + p_start -= 4; + start_offset -= 4; + } copy_size = arm_emit_ldr_info(name, start_offset, NULL, p_start, p_end, relocs, nb_relocs); } @@ -2282,7 +2349,37 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, int type; int addend; int reloc_offset; - + uint32_t insn; + + insn = get32((uint32_t *)(p_start + 4)); + /* If prologue ends in sub sp, sp, #const then assume + op has a stack frame and needs the frame pointer. */ + if ((insn & 0xffffff00) == 0xe24dd000) { + int i; + uint32_t opcode; + opcode = 0xe28db000; /* add fp, sp, #0. */ +#if 0 +/* ??? Need to undo the extra stack adjustment at the end of the op. + For now just leave the stack misaligned and hope it doesn't break anything + too important. */ + if ((insn & 4) != 0) { + /* Preserve doubleword stack alignment. */ + fprintf(outfile, + " *(uint32_t *)(gen_code_ptr + 4)= 0x%x;\n", + insn + 4); + opcode -= 4; + } +#endif + insn = get32((uint32_t *)(p_start - 4)); + /* Calculate the size of the saved registers, + excluding pc. */ + for (i = 0; i < 15; i++) { + if (insn & (1 << i)) + opcode += 4; + } + fprintf(outfile, + " *(uint32_t *)gen_code_ptr = 0x%x;\n", opcode); + } arm_emit_ldr_info(name, start_offset, outfile, p_start, p_end, relocs, nb_relocs); @@ -2303,6 +2400,8 @@ void gen_code(const char *name, host_ulong offset, host_ulong size, reloc_offset, name, addend); break; case R_ARM_PC24: + case R_ARM_JUMP24: + case R_ARM_CALL: fprintf(outfile, " arm_reloc_pc24((uint32_t *)(gen_code_ptr + %d), 0x%x, %s);\n", reloc_offset, addend, name); break; @@ -2407,6 +2506,28 @@ int gen_file(FILE *outfile, int out_type) } else { /* generate big code generation switch */ + +#ifdef HOST_ARM + /* We need to know the size of all the ops so we can figure out when + to emit constant pools. This must be consistent with opc.h. */ +fprintf(outfile, +"static const uint32_t arm_opc_size[] = {\n" +" 0,\n" /* end */ +" 0,\n" /* nop */ +" 0,\n" /* nop1 */ +" 0,\n" /* nop2 */ +" 0,\n"); /* nop3 */ + for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { + const char *name; + name = get_sym_name(sym); + if (strstart(name, OP_PREFIX, NULL)) { + fprintf(outfile, " %d,\n", sym->st_size); + } + } +fprintf(outfile, +"};\n"); +#endif + fprintf(outfile, "int dyngen_code(uint8_t *gen_code_buf,\n" " uint16_t *label_offsets, uint16_t *jmp_offsets,\n" @@ -2417,10 +2538,36 @@ fprintf(outfile, " const uint32_t *opparam_ptr;\n"); #ifdef HOST_ARM +/* Arm is tricky because it uses constant pools for loading immediate values. + We assume (and require) each function is code followed by a constant pool. + All the ops are small so this should be ok. For each op we figure + out how much "spare" range we have in the load instructions. This allows + us to insert subsequent ops in between the op and the constant pool, + eliminating the neeed to jump around the pool. + + We currently generate: + + [ For this example we assume merging would move op1_pool out of range. + In practice we should be able to combine many ops before the offset + limits are reached. ] + op1_code; + op2_code; + goto op3; + op2_pool; + op1_pool; +op3: + op3_code; + ret; + op3_pool; + + Ideally we'd put op1_pool before op2_pool, but that requires two passes. + */ fprintf(outfile, " uint8_t *last_gen_code_ptr = gen_code_buf;\n" " LDREntry *arm_ldr_ptr = arm_ldr_table;\n" -" uint32_t *arm_data_ptr = arm_data_table;\n"); +" uint32_t *arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n" +/* Initialise the parmissible pool offset to an arbitary large value. */ +" uint8_t *arm_pool_ptr = gen_code_buf + 0x1000000;\n"); #endif #ifdef HOST_IA64 { @@ -2489,9 +2636,23 @@ fprintf(outfile, /* Generate prologue, if needed. */ fprintf(outfile, -" for(;;) {\n" -" switch(*opc_ptr++) {\n" -); +" for(;;) {\n"); + +#ifdef HOST_ARM +/* Generate constant pool if needed */ +fprintf(outfile, +" if (gen_code_ptr + arm_opc_size[*opc_ptr] >= arm_pool_ptr) {\n" +" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, " +"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 1);\n" +" last_gen_code_ptr = gen_code_ptr;\n" +" arm_ldr_ptr = arm_ldr_table;\n" +" arm_data_ptr = arm_data_table + ARM_LDR_TABLE_SIZE;\n" +" arm_pool_ptr = gen_code_ptr + 0x1000000;\n" +" }\n"); +#endif + +fprintf(outfile, +" switch(*opc_ptr++) {\n"); for(i = 0, sym = symtab; i < nb_syms; i++, sym++) { const char *name; @@ -2525,17 +2686,6 @@ fprintf(outfile, " goto the_end;\n" " }\n"); -#ifdef HOST_ARM -/* generate constant table if needed */ -fprintf(outfile, -" if ((gen_code_ptr - last_gen_code_ptr) >= (MAX_FRAG_SIZE - MAX_OP_SIZE)) {\n" -" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 1);\n" -" last_gen_code_ptr = gen_code_ptr;\n" -" arm_ldr_ptr = arm_ldr_table;\n" -" arm_data_ptr = arm_data_table;\n" -" }\n"); -#endif - fprintf(outfile, " }\n" @@ -2553,7 +2703,10 @@ fprintf(outfile, /* generate some code patching */ #ifdef HOST_ARM -fprintf(outfile, "gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, arm_ldr_ptr, arm_data_table, arm_data_ptr, 0);\n"); +fprintf(outfile, +"if (arm_data_ptr != arm_data_table + ARM_LDR_TABLE_SIZE)\n" +" gen_code_ptr = arm_flush_ldr(gen_code_ptr, arm_ldr_table, " +"arm_ldr_ptr, arm_data_ptr, arm_data_table + ARM_LDR_TABLE_SIZE, 0);\n"); #endif /* flush instruction cache */ fprintf(outfile, "flush_icache_range((unsigned long)gen_code_buf, (unsigned long)gen_code_ptr);\n"); diff --git a/dyngen.h b/dyngen.h index fe0a9364e..2a87c448f 100644 --- a/dyngen.h +++ b/dyngen.h @@ -19,7 +19,7 @@ */ int __op_param1, __op_param2, __op_param3; -#ifdef __sparc__ +#if defined(__sparc__) || defined(__arm__) void __op_gen_label1(){} void __op_gen_label2(){} void __op_gen_label3(){} @@ -145,18 +145,16 @@ void fix_bsr(void *p, int offset) { #ifdef __arm__ -#define MAX_OP_SIZE (128 * 4) /* in bytes */ -/* max size of the code that can be generated without calling arm_flush_ldr */ -#define MAX_FRAG_SIZE (1024 * 4) -//#define MAX_FRAG_SIZE (135 * 4) /* for testing */ +#define ARM_LDR_TABLE_SIZE 1024 typedef struct LDREntry { uint8_t *ptr; uint32_t *data_ptr; + unsigned type:2; } LDREntry; static LDREntry arm_ldr_table[1024]; -static uint32_t arm_data_table[1024]; +static uint32_t arm_data_table[ARM_LDR_TABLE_SIZE]; extern char exec_loop; @@ -175,8 +173,9 @@ static uint8_t *arm_flush_ldr(uint8_t *gen_code_ptr, int offset, data_size, target; uint8_t *data_ptr; uint32_t insn; + uint32_t mask; - data_size = (uint8_t *)data_end - (uint8_t *)data_start; + data_size = (data_end - data_start) << 2; if (gen_jmp) { /* generate branch to skip the data */ @@ -198,17 +197,48 @@ static uint8_t *arm_flush_ldr(uint8_t *gen_code_ptr, offset = ((unsigned long)(le->data_ptr) - (unsigned long)data_start) + (unsigned long)data_ptr - (unsigned long)ptr - 8; - insn = *ptr & ~(0xfff | 0x00800000); if (offset < 0) { - offset = - offset; - } else { - insn |= 0x00800000; - } - if (offset > 0xfff) { - fprintf(stderr, "Error ldr offset\n"); + fprintf(stderr, "Negative constant pool offset\n"); abort(); } - insn |= offset; + switch (le->type) { + case 0: /* ldr */ + mask = ~0x00800fff; + if (offset >= 4096) { + fprintf(stderr, "Bad ldr offset\n"); + abort(); + } + break; + case 1: /* ldc */ + mask = ~0x008000ff; + if (offset >= 1024 ) { + fprintf(stderr, "Bad ldc offset\n"); + abort(); + } + break; + case 2: /* add */ + mask = ~0xfff; + if (offset >= 1024 ) { + fprintf(stderr, "Bad add offset\n"); + abort(); + } + break; + default: + fprintf(stderr, "Bad pc relative fixup\n"); + abort(); + } + insn = *ptr & mask; + switch (le->type) { + case 0: /* ldr */ + insn |= offset | 0x00800000; + break; + case 1: /* ldc */ + insn |= (offset >> 2) | 0x00800000; + break; + case 2: /* add */ + insn |= (offset >> 2) | 0xf00; + break; + } *ptr = insn; } return gen_code_ptr; diff --git a/elf.h b/elf.h index 8ceb94976..1825d50e8 100644 --- a/elf.h +++ b/elf.h @@ -502,6 +502,8 @@ typedef struct { #define R_ARM_GOTPC 25 /* 32 bit PC relative offset to GOT */ #define R_ARM_GOT32 26 /* 32 bit GOT entry */ #define R_ARM_PLT32 27 /* 32 bit PLT address */ +#define R_ARM_CALL 28 +#define R_ARM_JUMP24 29 #define R_ARM_GNU_VTENTRY 100 #define R_ARM_GNU_VTINHERIT 101 #define R_ARM_THM_PC11 102 /* thumb unconditional branch */