]> git.proxmox.com Git - qemu.git/commitdiff
new x86 CPU core
authorbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>
Sat, 1 Mar 2003 17:13:26 +0000 (17:13 +0000)
committerbellard <bellard@c046a42c-6fe2-441c-8c8c-71466251a162>
Sat, 1 Mar 2003 17:13:26 +0000 (17:13 +0000)
git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@14 c046a42c-6fe2-441c-8c8c-71466251a162

Makefile
TODO
cpu-i386.h [new file with mode: 0644]
dyngen.c [new file with mode: 0644]
gen-i386.h [new file with mode: 0644]
linux-user/main.c
op-i386.c
ops_template.h [new file with mode: 0644]
tests/Makefile
thunk.h
translate-i386.c [new file with mode: 0644]

index 9f71211333f6451740a1c5f365700341ec082931..397ddf1342a439992c8a978595e24f33c6a46cba 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,9 @@
 ARCH=i386
 #ARCH=ppc
+HOST_CC=gcc
 
 ifeq ($(ARCH),i386)
-CFLAGS=-Wall -O2 -g
+CFLAGS=-Wall -O2 -g -fomit-frame-pointer
 LDFLAGS=-g
 LIBS=
 CC=gcc
@@ -27,38 +28,59 @@ endif
 
 #########################################################
 
-DEFINES+=-D_GNU_SOURCE -DGEMU -DDOSEMU #-DNO_TRACE_MSGS
+DEFINES+=-D_GNU_SOURCE -DGEMU -DDOSEMU -DNO_TRACE_MSGS
+DEFINES+=-DCONFIG_PREFIX=\"/usr/local\"
 LDSCRIPT=$(ARCH).ld
+LIBS+=-ldl
 
 OBJS= i386/fp87.o i386/interp_main.o i386/interp_modrm.o i386/interp_16_32.o \
       i386/interp_32_16.o i386/interp_32_32.o i386/emu-utils.o \
       i386/dis8086.o i386/emu-ldt.o
+OBJS+=translate-i386.o op-i386.o
 OBJS+= elfload.o main.o thunk.o syscall.o
-
 SRCS = $(OBJS:.o=.c)
 
 all: gemu
 
 gemu: $(OBJS)
-       $(CC) -Wl,-T,$(LDSCRIPT) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+       $(CC) -Wl,-T,$(LDSCRIPT) $(LDFLAGS) -o $@ $^ $(LIBS)
 
 depend: $(SRCS)
        $(CC) -MM $(CFLAGS) $^ 1>.depend
 
+# old i386 emulator
+i386/interp_32_32.o: i386/interp_32_32.c i386/interp_gen.h
+
+i386/interp_gen.h: i386/gencode
+       ./i386/gencode > $@
+
+i386/gencode: i386/gencode.c
+       $(CC) -O2 -Wall -g $< -o $@
+
+# new i386 emulator
+dyngen: dyngen.c
+       $(HOST_CC) -O2 -Wall -g $< -o $@
+
+translate-i386.o: translate-i386.c op-i386.h cpu-i386.h
+
+op-i386.h: op-i386.o dyngen
+       ./dyngen -o $@ $<
+
+op-i386.o: op-i386.c opreg_template.h ops_template.h
+       $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $<
+
 %.o: %.c
        $(CC) $(CFLAGS) $(DEFINES) -c -o $@ $<
 
 clean:
-       rm -f *.o *~ i386/*.o i386/*~ gemu hello test1 test2 TAGS
-
-hello: hello.c
-       $(CC) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $<
+       rm -f *.o *~ i386/*.o i386/*~ gemu TAGS
 
-test1: test1.c
-       $(CC) $(CFLAGS) -static $(LDFLAGS) -o $@ $<
+# various test targets
+test speed: gemu
+       make -C tests $@
 
-test2: test2.c
-       $(CC) $(CFLAGS) -static $(LDFLAGS) -o $@ $<
+TAGS: 
+       etags *.[ch] i386/*.[ch]
 
 ifneq ($(wildcard .depend),)
 include .depend
diff --git a/TODO b/TODO
index 045f877f469cb0454a36d3b7cdcb44eb53b98106..7ba6ab4a7978ea214a1cb206372da2ab538a281c 100644 (file)
--- a/TODO
+++ b/TODO
@@ -1,2 +1,5 @@
-- swap all elf paramters
+- tests
+- signals
+- threads
 - fix printf for doubles (fp87.c bug ?)
+- make it self runnable (use same trick as ld.so : include its own relocator and libc)
diff --git a/cpu-i386.h b/cpu-i386.h
new file mode 100644 (file)
index 0000000..a857efb
--- /dev/null
@@ -0,0 +1,148 @@
+#ifndef CPU_I386_H
+#define CPU_I386_H
+
+#define R_EAX 0
+#define R_ECX 1
+#define R_EDX 2
+#define R_EBX 3
+#define R_ESP 4
+#define R_EBP 5
+#define R_ESI 6
+#define R_EDI 7
+
+#define R_AL 0
+#define R_CL 1
+#define R_DL 2
+#define R_BL 3
+#define R_AH 4
+#define R_CH 5
+#define R_DH 6
+#define R_BH 7
+
+#define R_ES 0
+#define R_CS 1
+#define R_SS 2
+#define R_DS 3
+#define R_FS 4
+#define R_GS 5
+
+#define CC_C           0x0001
+#define CC_P   0x0004
+#define CC_A   0x0010
+#define CC_Z   0x0040
+#define CC_S    0x0080
+#define CC_O    0x0800
+
+#define TRAP_FLAG              0x0100
+#define INTERRUPT_FLAG         0x0200
+#define DIRECTION_FLAG         0x0400
+#define IOPL_FLAG_MASK         0x3000
+#define NESTED_FLAG            0x4000
+#define BYTE_FL                        0x8000  /* Intel reserved! */
+#define RF_FLAG                        0x10000
+#define VM_FLAG                        0x20000
+/* AC                          0x40000 */
+
+enum {
+    CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
+    CC_OP_EFLAGS,  /* all cc are explicitely computed, CC_SRC = flags */
+    CC_OP_MUL, /* modify all flags, C, O = (CC_SRC != 0) */
+
+    CC_OP_ADDB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_ADDW,
+    CC_OP_ADDL,
+
+    CC_OP_SUBB, /* modify all flags, CC_DST = res, CC_SRC = src1 */
+    CC_OP_SUBW,
+    CC_OP_SUBL,
+
+    CC_OP_LOGICB, /* modify all flags, CC_DST = res */
+    CC_OP_LOGICW,
+    CC_OP_LOGICL,
+
+    CC_OP_INCB, /* modify all flags except, CC_DST = res */
+    CC_OP_INCW,
+    CC_OP_INCL,
+
+    CC_OP_DECB, /* modify all flags except, CC_DST = res */
+    CC_OP_DECW,
+    CC_OP_DECL,
+
+    CC_OP_SHLB, /* modify all flags, CC_DST = res, CC_SRC.lsb = C */
+    CC_OP_SHLW,
+    CC_OP_SHLL,
+
+    CC_OP_NB,
+};
+
+typedef struct CPU86State {
+    /* standard registers */
+    uint32_t regs[8];
+    uint32_t pc; /* cs_case + eip value */
+
+    /* eflags handling */
+    uint32_t eflags;
+    uint32_t cc_src;
+    uint32_t cc_dst;
+    uint32_t cc_op;
+    int32_t df; /* D flag : 1 if D = 0, -1 if D = 1 */
+    
+    /* segments */
+    uint8_t *segs_base[6];
+    uint32_t segs[6];
+
+    /* emulator internal variables */
+    uint32_t t0; /* temporary t0 storage */
+    uint32_t t1; /* temporary t1 storage */
+    uint32_t a0; /* temporary a0 storage (address) */
+} CPU86State;
+
+static inline int ldub(void *ptr)
+{
+    return *(uint8_t *)ptr;
+}
+
+static inline int ldsb(void *ptr)
+{
+    return *(int8_t *)ptr;
+}
+
+static inline int lduw(void *ptr)
+{
+    return *(uint16_t *)ptr;
+}
+
+static inline int ldsw(void *ptr)
+{
+    return *(int16_t *)ptr;
+}
+
+static inline int ldl(void *ptr)
+{
+    return *(uint32_t *)ptr;
+}
+
+
+static inline void stb(void *ptr, int v)
+{
+    *(uint8_t *)ptr = v;
+}
+
+static inline void stw(void *ptr, int v)
+{
+    *(uint16_t *)ptr = v;
+}
+
+static inline void stl(void *ptr, int v)
+{
+    *(uint32_t *)ptr = v;
+}
+
+void port_outb(int addr, int val);
+void port_outw(int addr, int val);
+void port_outl(int addr, int val);
+int port_inb(int addr);
+int port_inw(int addr);
+int port_inl(int addr);
+
+#endif /* CPU_I386_H */
diff --git a/dyngen.c b/dyngen.c
new file mode 100644 (file)
index 0000000..ff10891
--- /dev/null
+++ b/dyngen.c
@@ -0,0 +1,521 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <elf.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "thunk.h"
+
+/* all dynamically generated functions begin with this code */
+#define OP_PREFIX "op"
+
+int elf_must_swap(Elf32_Ehdr *h)
+{
+  union {
+      uint32_t i;
+      uint8_t b[4];
+  } swaptest;
+
+  swaptest.i = 1;
+  return (h->e_ident[EI_DATA] == ELFDATA2MSB) != 
+      (swaptest.b[0] == 0);
+}
+  
+void swab16s(uint16_t *p)
+{
+    *p = bswap16(*p);
+}
+
+void swab32s(uint32_t *p)
+{
+    *p = bswap32(*p);
+}
+
+void swab64s(uint32_t *p)
+{
+    *p = bswap64(*p);
+}
+
+void elf_swap_ehdr(Elf32_Ehdr *h)
+{
+    swab16s(&h->e_type);                       /* Object file type */
+    swab16s(&h->       e_machine);             /* Architecture */
+    swab32s(&h->       e_version);             /* Object file version */
+    swab32s(&h->       e_entry);               /* Entry point virtual address */
+    swab32s(&h->       e_phoff);               /* Program header table file offset */
+    swab32s(&h->       e_shoff);               /* Section header table file offset */
+    swab32s(&h->       e_flags);               /* Processor-specific flags */
+    swab16s(&h->       e_ehsize);              /* ELF header size in bytes */
+    swab16s(&h->       e_phentsize);           /* Program header table entry size */
+    swab16s(&h->       e_phnum);               /* Program header table entry count */
+    swab16s(&h->       e_shentsize);           /* Section header table entry size */
+    swab16s(&h->       e_shnum);               /* Section header table entry count */
+    swab16s(&h->       e_shstrndx);            /* Section header string table index */
+}
+
+void elf_swap_shdr(Elf32_Shdr *h)
+{
+  swab32s(&h-> sh_name);               /* Section name (string tbl index) */
+  swab32s(&h-> sh_type);               /* Section type */
+  swab32s(&h-> sh_flags);              /* Section flags */
+  swab32s(&h-> sh_addr);               /* Section virtual addr at execution */
+  swab32s(&h-> sh_offset);             /* Section file offset */
+  swab32s(&h-> sh_size);               /* Section size in bytes */
+  swab32s(&h-> sh_link);               /* Link to another section */
+  swab32s(&h-> sh_info);               /* Additional section information */
+  swab32s(&h-> sh_addralign);          /* Section alignment */
+  swab32s(&h-> sh_entsize);            /* Entry size if section holds table */
+}
+
+void elf_swap_phdr(Elf32_Phdr *h)
+{
+    swab32s(&h->p_type);                       /* Segment type */
+    swab32s(&h->p_offset);             /* Segment file offset */
+    swab32s(&h->p_vaddr);              /* Segment virtual address */
+    swab32s(&h->p_paddr);              /* Segment physical address */
+    swab32s(&h->p_filesz);             /* Segment size in file */
+    swab32s(&h->p_memsz);              /* Segment size in memory */
+    swab32s(&h->p_flags);              /* Segment flags */
+    swab32s(&h->p_align);              /* Segment alignment */
+}
+
+int do_swap;
+int e_machine;
+
+uint16_t get16(uint16_t *p)
+{
+    uint16_t val;
+    val = *p;
+    if (do_swap)
+        val = bswap16(val);
+    return val;
+}
+
+uint32_t get32(uint32_t *p)
+{
+    uint32_t val;
+    val = *p;
+    if (do_swap)
+        val = bswap32(val);
+    return val;
+}
+
+void put16(uint16_t *p, uint16_t val)
+{
+    if (do_swap)
+        val = bswap16(val);
+    *p = val;
+}
+
+void put32(uint32_t *p, uint32_t val)
+{
+    if (do_swap)
+        val = bswap32(val);
+    *p = val;
+}
+
+void __attribute__((noreturn)) error(const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    fprintf(stderr, "dyngen: ");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+    exit(1);
+}
+
+
+Elf32_Shdr *find_elf_section(Elf32_Shdr *shdr, int shnum, const char *shstr, 
+                             const char *name)
+{
+    int i;
+    const char *shname;
+    Elf32_Shdr *sec;
+
+    for(i = 0; i < shnum; i++) {
+        sec = &shdr[i];
+        if (!sec->sh_name)
+            continue;
+        shname = shstr + sec->sh_name;
+        if (!strcmp(shname, name))
+            return sec;
+    }
+    return NULL;
+}
+
+void *load_data(int fd, long offset, unsigned int size)
+{
+    char *data;
+
+    data = malloc(size);
+    if (!data)
+        return NULL;
+    lseek(fd, offset, SEEK_SET);
+    if (read(fd, data, size) != size) {
+        free(data);
+        return NULL;
+    }
+    return data;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+    const char *p, *q;
+    p = str;
+    q = val;
+    while (*q != '\0') {
+        if (*p != *q)
+            return 0;
+        p++;
+        q++;
+    }
+    if (ptr)
+        *ptr = p;
+    return 1;
+}
+
+#define MAX_ARGS 3
+
+/* generate op code */
+void gen_code(const char *name, unsigned long offset, unsigned long size, 
+              FILE *outfile, uint8_t *text, void *relocs, int nb_relocs, int reloc_sh_type,
+              Elf32_Sym *symtab, char *strtab)
+{
+    int copy_size = 0;
+    uint8_t *p_start, *p_end;
+    int nb_args, i;
+    uint8_t args_present[MAX_ARGS];
+    const char *sym_name, *p;
+
+    /* compute exact size excluding return instruction */
+    p_start = text + offset;
+    p_end = p_start + size;
+    switch(e_machine) {
+    case EM_386:
+        {
+            uint8_t *p;
+            p = p_end - 1;
+            /* find ret */
+            while (p > p_start && *p != 0xc3)
+                p--;
+            /* skip double ret */
+            if (p > p_start && p[-1] == 0xc3)
+                p--;
+            if (p == p_start)
+                error("empty code for %s", name);
+            copy_size = p - p_start;
+        }
+        break;
+    case EM_PPC:
+        {
+            uint8_t *p;
+            p = (void *)(p_end - 4);
+            /* find ret */
+            while (p > p_start && get32((uint32_t *)p) != 0x4e800020)
+                p -= 4;
+            /* skip double ret */
+            if (p > p_start && get32((uint32_t *)(p - 4)) == 0x4e800020)
+                p -= 4;
+            if (p == p_start)
+                error("empty code for %s", name);
+            copy_size = p - p_start;
+        }
+        break;
+    default:
+        error("unsupported CPU (%d)", e_machine);
+    }
+
+    /* compute the number of arguments by looking at the relocations */
+    for(i = 0;i < MAX_ARGS; i++)
+        args_present[i] = 0;
+
+    if (reloc_sh_type == SHT_REL) {
+        Elf32_Rel *rel;
+        int n;
+        for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+            if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name;
+                if (strstart(sym_name, "__op_param", &p)) {
+                    n = strtoul(p, NULL, 10);
+                    if (n >= MAX_ARGS)
+                        error("too many arguments in %s", name);
+                    args_present[n - 1] = 1;
+                }
+            }
+        }
+    } else {
+        Elf32_Rela *rel;
+        int n;
+        for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+            if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name;
+                if (strstart(sym_name, "__op_param", &p)) {
+                    n = strtoul(p, NULL, 10);
+                    if (n >= MAX_ARGS)
+                        error("too many arguments in %s", name);
+                    args_present[n - 1] = 1;
+                }
+            }
+        }
+    }
+    
+    nb_args = 0;
+    while (nb_args < MAX_ARGS && args_present[nb_args])
+        nb_args++;
+    for(i = nb_args; i < MAX_ARGS; i++) {
+        if (args_present[i])
+            error("inconsistent argument numbering in %s", name);
+    }
+
+    /* output C code */
+    fprintf(outfile, "extern void %s();\n", name);
+    fprintf(outfile, "static inline void gen_%s(", name);
+    if (nb_args == 0) {
+        fprintf(outfile, "void");
+    } else {
+        for(i = 0; i < nb_args; i++) {
+            if (i != 0)
+                fprintf(outfile, ", ");
+            fprintf(outfile, "long param%d", i + 1);
+        }
+    }
+    fprintf(outfile, ")\n");
+    fprintf(outfile, "{\n");
+    fprintf(outfile, "    memcpy(gen_code_ptr, &%s, %d);\n", name, copy_size);
+    
+    /* patch relocations */
+    switch(e_machine) {
+    case EM_386:
+        {
+            Elf32_Rel *rel;
+            char name[256];
+            int type;
+            long addend;
+            for(i = 0, rel = relocs;i < nb_relocs; i++, rel++) {
+                if (rel->r_offset >= offset && rel->r_offset < offset + copy_size) {
+                    sym_name = strtab + symtab[ELF32_R_SYM(rel->r_info)].st_name;
+                    if (strstart(sym_name, "__op_param", &p)) {
+                        snprintf(name, sizeof(name), "param%s", p);
+                    } else {
+                        snprintf(name, sizeof(name), "(long)(&%s)", sym_name);
+                    }
+                    type = ELF32_R_TYPE(rel->r_info);
+                    addend = get32((uint32_t *)(text + rel->r_offset));
+                    switch(type) {
+                    case R_386_32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %ld) = %s + %ld;\n", 
+                                rel->r_offset - offset, name, addend);
+                        break;
+                    case R_386_PC32:
+                        fprintf(outfile, "    *(uint32_t *)(gen_code_ptr + %ld) = %s - (long)(gen_code_ptr + %ld) + %ld;\n", 
+                                rel->r_offset - offset, name, rel->r_offset - offset, addend);
+                        break;
+                    default:
+                        error("unsupported i386 relocation (%d)", type);
+                    }
+                }
+            }
+        }
+        break;
+    default:
+        error("unsupported CPU for relocations (%d)", e_machine);
+    }
+
+
+    fprintf(outfile, "    gen_code_ptr += %d;\n", copy_size);
+    fprintf(outfile, "}\n\n");
+}
+
+/* load an elf object file */
+int load_elf(const char *filename, FILE *outfile)
+{
+    int fd;
+    Elf32_Ehdr ehdr;
+    Elf32_Shdr *sec, *shdr, *symtab_sec, *strtab_sec, *text_sec;
+    int i, j, nb_syms;
+    Elf32_Sym *symtab, *sym;
+    const char *cpu_name;
+    char *shstr, *strtab;
+    uint8_t *text;
+    void *relocs;
+    int nb_relocs, reloc_sh_type;
+    
+    fd = open(filename, O_RDONLY);
+    if (fd < 0) 
+        error("can't open file '%s'", filename);
+    
+    /* Read ELF header.  */
+    if (read(fd, &ehdr, sizeof (ehdr)) != sizeof (ehdr))
+        error("unable to read file header");
+
+    /* Check ELF identification.  */
+    if (ehdr.e_ident[EI_MAG0] != ELFMAG0
+     || ehdr.e_ident[EI_MAG1] != ELFMAG1
+     || ehdr.e_ident[EI_MAG2] != ELFMAG2
+     || ehdr.e_ident[EI_MAG3] != ELFMAG3
+     || ehdr.e_ident[EI_CLASS] != ELFCLASS32
+     || ehdr.e_ident[EI_VERSION] != EV_CURRENT) {
+        error("bad ELF header");
+    }
+
+    do_swap = elf_must_swap(&ehdr);
+    if (do_swap)
+        elf_swap_ehdr(&ehdr);
+    if (ehdr.e_type != ET_REL)
+        error("ELF object file expected");
+    if (ehdr.e_version != EV_CURRENT)
+        error("Invalid ELF version");
+    e_machine = ehdr.e_machine;
+
+    /* read section headers */
+    shdr = load_data(fd, ehdr.e_shoff, ehdr.e_shnum * sizeof(Elf32_Shdr));
+    if (do_swap) {
+        for(i = 0; i < ehdr.e_shnum; i++) {
+            elf_swap_shdr(&shdr[i]);
+        }
+    }
+
+    sec = &shdr[ehdr.e_shstrndx];
+    shstr = load_data(fd, sec->sh_offset, sec->sh_size);
+
+    /* text section */
+
+    text_sec = find_elf_section(shdr, ehdr.e_shnum, shstr, ".text");
+    if (!text_sec)
+        error("could not find .text section");
+    text = load_data(fd, text_sec->sh_offset, text_sec->sh_size);
+
+    /* find text relocations, if any */
+    nb_relocs = 0;
+    relocs = NULL;
+    reloc_sh_type = 0;
+    for(i = 0; i < ehdr.e_shnum; i++) {
+        sec = &shdr[i];
+        if ((sec->sh_type == SHT_REL || sec->sh_type == SHT_RELA) &&
+            sec->sh_info == (text_sec - shdr)) {
+            reloc_sh_type = sec->sh_type;
+            relocs = load_data(fd, sec->sh_offset, sec->sh_size);
+            nb_relocs = sec->sh_size / sec->sh_entsize;
+            if (do_swap) {
+                if (sec->sh_type == SHT_REL) {
+                    Elf32_Rel *rel = relocs;
+                    for(j = 0, rel = relocs; j < nb_relocs; j++, rel++) {
+                        swab32s(&rel->r_offset);
+                        swab32s(&rel->r_info);
+                    }
+                } else {
+                    Elf32_Rela *rel = relocs;
+                    for(j = 0, rel = relocs; j < nb_relocs; j++, rel++) {
+                        swab32s(&rel->r_offset);
+                        swab32s(&rel->r_info);
+                        swab32s(&rel->r_addend);
+                    }
+                }
+            }
+            break;
+        }
+    }
+
+    symtab_sec = find_elf_section(shdr, ehdr.e_shnum, shstr, ".symtab");
+    if (!symtab_sec)
+        error("could not find .symtab section");
+    strtab_sec = &shdr[symtab_sec->sh_link];
+
+    symtab = load_data(fd, symtab_sec->sh_offset, symtab_sec->sh_size);
+    strtab = load_data(fd, strtab_sec->sh_offset, strtab_sec->sh_size);
+    
+    nb_syms = symtab_sec->sh_size / sizeof(Elf32_Sym);
+    if (do_swap) {
+        for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+            swab32s(&sym->st_name);
+            swab32s(&sym->st_value);
+            swab32s(&sym->st_size);
+            swab16s(&sym->st_shndx);
+        }
+    }
+
+    switch(e_machine) {
+    case EM_386:
+        cpu_name = "i386";
+        break;
+    case EM_PPC:
+        cpu_name = "ppc";
+        break;
+    case EM_MIPS:
+        cpu_name = "mips";
+        break;
+    case EM_ARM:
+        cpu_name = "arm";
+        break;
+    case EM_SPARC:
+        cpu_name = "sparc";
+        break;
+    default:
+        error("unsupported CPU (e_machine=%d)", e_machine);
+    }
+
+    fprintf(outfile, "#include \"gen-%s.h\"\n\n", cpu_name);
+
+    for(i = 0, sym = symtab; i < nb_syms; i++, sym++) {
+        const char *name;
+        name = strtab + sym->st_name;
+        if (strstart(name, "op_", NULL) ||
+            strstart(name, "op1_", NULL) ||
+            strstart(name, "op2_", NULL) ||
+            strstart(name, "op3_", NULL)) {
+#if 0
+            printf("%4d: %s pos=0x%08x len=%d\n", 
+                   i, name, sym->st_value, sym->st_size);
+#endif
+            if (sym->st_shndx != (text_sec - shdr))
+                error("invalid section for opcode (0x%x)", sym->st_shndx);
+            gen_code(name, sym->st_value, sym->st_size, outfile, 
+                     text, relocs, nb_relocs, reloc_sh_type, symtab, strtab);
+        }
+    }
+
+    close(fd);
+    return 0;
+}
+
+void usage(void)
+{
+    printf("dyngen (c) 2003 Fabrice Bellard\n"
+           "usage: dyngen [-o outfile] objfile\n"
+           "Generate a dynamic code generator from an object file\n");
+    exit(1);
+}
+
+int main(int argc, char **argv)
+{
+    int c;
+    const char *filename, *outfilename;
+    FILE *outfile;
+
+    outfilename = "out.c";
+    for(;;) {
+        c = getopt(argc, argv, "ho:");
+        if (c == -1)
+            break;
+        switch(c) {
+        case 'h':
+            usage();
+            break;
+        case 'o':
+            outfilename = optarg;
+            break;
+        }
+    }
+    if (optind >= argc)
+        usage();
+    filename = argv[optind];
+    outfile = fopen(outfilename, "w");
+    if (!outfile)
+        error("could not open '%s'", outfilename);
+    load_elf(filename, outfile);
+    fclose(outfile);
+    return 0;
+}
diff --git a/gen-i386.h b/gen-i386.h
new file mode 100644 (file)
index 0000000..a5d7f59
--- /dev/null
@@ -0,0 +1,8 @@
+static inline void gen_start(void)
+{
+}
+
+static inline void gen_end(void)
+{
+    *gen_code_ptr++ = 0xc3; /* ret */
+}
index 544953eb2503102e0e4ffb427da583d2189e633e..1d76d4d7cc15ae99703c5ae0b02893385328cefb 100644 (file)
@@ -191,6 +191,41 @@ void INT_handler(int num, void *env)
 }
 
 /***********************************************************/
+/* new CPU core */
+
+void port_outb(int addr, int val)
+{
+    fprintf(stderr, "outb: port=0x%04x, data=%02x\n", addr, val);
+}
+
+void port_outw(int addr, int val)
+{
+    fprintf(stderr, "outw: port=0x%04x, data=%04x\n", addr, val);
+}
+
+void port_outl(int addr, int val)
+{
+    fprintf(stderr, "outl: port=0x%04x, data=%08x\n", addr, val);
+}
+
+int port_inb(int addr)
+{
+    fprintf(stderr, "inb: port=0x%04x\n", addr);
+    return 0;
+}
+
+int port_inw(int addr)
+{
+    fprintf(stderr, "inw: port=0x%04x\n", addr);
+    return 0;
+}
+
+int port_inl(int addr)
+{
+    fprintf(stderr, "inl: port=0x%04x\n", addr);
+    return 0;
+}
+
 
 /* XXX: currently we use LDT entries */
 #define __USER_CS      (0x23|4)
@@ -270,6 +305,7 @@ int main(int argc, char **argv)
     LDT[__USER_DS >> 3].dwSelLimit = 0xfffff;
     LDT[__USER_DS >> 3].lpSelBase = NULL;
     init_npu();
+    build_decode_tables();
 
     for(;;) {
         int err;
index fdd2fa5ade7b8ff56fd70f081aeecb40007527d3..8607cf4552ebf5b99f265eec3f722b2140db3e23 100644 (file)
--- a/op-i386.c
+++ b/op-i386.c
@@ -8,6 +8,8 @@ typedef signed short int16_t;
 typedef signed int int32_t;
 typedef signed long long int64_t;
 
+#define NULL 0
+
 #ifdef __i386__
 register int T0 asm("esi");
 register int T1 asm("ebx");
@@ -74,13 +76,12 @@ extern int __op_param1, __op_param2, __op_param3;
 #include "cpu-i386.h"
 
 typedef struct CCTable {
-    int (*compute_c)(void);  /* return the C flag */
-    int (*compute_z)(void);  /* return the Z flag */
-    int (*compute_s)(void);  /* return the S flag */
-    int (*compute_o)(void);  /* return the O flag */
     int (*compute_all)(void); /* return all the flags */
+    int (*compute_c)(void);  /* return the C flag */
 } CCTable;
 
+extern CCTable cc_table[];
+
 uint8_t parity_table[256] = {
     CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0,
     0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
@@ -116,120 +117,30 @@ uint8_t parity_table[256] = {
     0, CC_P, CC_P, 0, CC_P, 0, 0, CC_P,
 };
 
-static int compute_eflags_all(void)
-{
-    return CC_SRC;
-}
-
-static int compute_eflags_addb(void)
-{
-    int cf, pf, af, zf, sf, of;
-    int src1, src2;
-    src1 = CC_SRC;
-    src2 = CC_DST - CC_SRC;
-    cf = (uint8_t)CC_DST < (uint8_t)src1;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((uint8_t)CC_DST != 0) << 6;
-    sf = CC_DST & 0x80;
-    of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4;
-    return cf | pf | af | zf | sf | of;
-}
-
-static int compute_eflags_subb(void)
-{
-    int cf, pf, af, zf, sf, of;
-    int src1, src2;
-    src1 = CC_SRC;
-    src2 = CC_SRC - CC_DST;
-    cf = (uint8_t)src1 < (uint8_t)src2;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((uint8_t)CC_DST != 0) << 6;
-    sf = CC_DST & 0x80;
-    of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4;
-    return cf | pf | af | zf | sf | of;
-}
-
-static int compute_eflags_logicb(void)
-{
-    cf = 0;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = 0;
-    zf = ((uint8_t)CC_DST != 0) << 6;
-    sf = CC_DST & 0x80;
-    of = 0;
-    return cf | pf | af | zf | sf | of;
-}
-
-static int compute_eflags_incb(void)
-{
-    int cf, pf, af, zf, sf, of;
-    int src2;
-    src1 = CC_DST - 1;
-    src2 = 1;
-    cf = CC_SRC;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((uint8_t)CC_DST != 0) << 6;
-    sf = CC_DST & 0x80;
-    of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4;
-    return cf | pf | af | zf | sf | of;
-}
-
-static int compute_eflags_decb(void)
-{
-    int cf, pf, af, zf, sf, of;
-    int src1, src2;
-    src1 = CC_DST + 1;
-    src2 = 1;
-    cf = (uint8_t)src1 < (uint8_t)src2;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = (CC_DST ^ src1 ^ src2) & 0x10;
-    zf = ((uint8_t)CC_DST != 0) << 6;
-    sf = CC_DST & 0x80;
-    of = ((src1 ^ src2 ^ -1) & (src1 ^ CC_DST) & 0x80) << 4;
-    return cf | pf | af | zf | sf | of;
-}
-
-static int compute_eflags_shlb(void)
-{
-    cf = CC_SRC;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = 0; /* undefined */
-    zf = ((uint8_t)CC_DST != 0) << 6;
-    sf = CC_DST & 0x80;
-    of = 0; /* undefined */
-    return cf | pf | af | zf | sf | of;
-}
+/* modulo 17 table */
+const uint8_t rclw_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 
+    8, 9,10,11,12,13,14,15,
+   16, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 9,10,11,12,13,14,
+};
 
-static int compute_eflags_shrb(void)
-{
-    cf = CC_SRC & 1;
-    pf = parity_table[(uint8_t)CC_DST];
-    af = 0; /* undefined */
-    zf = ((uint8_t)CC_DST != 0) << 6;
-    sf = CC_DST & 0x80;
-    of = sf << 4;
-    return cf | pf | af | zf | sf | of;
-}
+/* modulo 9 table */
+const uint8_t rclb_table[32] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 
+    8, 0, 1, 2, 3, 4, 5, 6,
+    7, 8, 0, 1, 2, 3, 4, 5, 
+    6, 7, 8, 0, 1, 2, 3, 4,
+};
 
-static int compute_eflags_mul(void)
+/* n must be a constant to be efficient */
+static inline int lshift(int x, int n)
 {
-    cf = (CC_SRC != 0);
-    pf = 0; /* undefined */
-    af = 0; /* undefined */
-    zf = 0; /* undefined */
-    sf = 0; /* undefined */
-    of = cf << 11;
-    return cf | pf | af | zf | sf | of;
+    if (n >= 0)
+        return x << n;
+    else
+        return x >> (-n);
 }
-    
-CTable cc_table[CC_OP_NB] = {
-    [CC_OP_DYNAMIC] = { NULL, NULL, NULL },
-    [CC_OP_EFLAGS] = { NULL, NULL, NULL },
-    
-};
 
 /* we define the various pieces of code used by the JIT */
 
@@ -365,338 +276,6 @@ void OPPROTO op_testl_T0_T1_cc(void)
     CC_DST = T0 & T1;
 }
 
-/* shifts */
-
-void OPPROTO op_roll_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count) {
-        CC_SRC = T0;
-        T0 = (T0 << count) | (T0 >> (32 - count));
-        CC_DST = T0;
-        CC_OP = CC_OP_ROLL;
-    }
-}
-
-void OPPROTO op_rolw_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0xf;
-    if (count) {
-        T0 = T0 & 0xffff;
-        CC_SRC = T0;
-        T0 = (T0 << count) | (T0 >> (16 - count));
-        CC_DST = T0;
-        CC_OP = CC_OP_ROLW;
-    }
-}
-
-void OPPROTO op_rolb_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x7;
-    if (count) {
-        T0 = T0 & 0xff;
-        CC_SRC = T0;
-        T0 = (T0 << count) | (T0 >> (8 - count));
-        CC_DST = T0;
-        CC_OP = CC_OP_ROLB;
-    }
-}
-
-void OPPROTO op_rorl_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count) {
-        CC_SRC = T0;
-        T0 = (T0 >> count) | (T0 << (32 - count));
-        CC_DST = T0;
-        CC_OP = CC_OP_RORB;
-    }
-}
-
-void OPPROTO op_rorw_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0xf;
-    if (count) {
-        CC_SRC = T0;
-        T0 = (T0 >> count) | (T0 << (16 - count));
-        CC_DST = T0;
-        CC_OP = CC_OP_RORW;
-    }
-}
-
-void OPPROTO op_rorb_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x7;
-    if (count) {
-        CC_SRC = T0;
-        T0 = (T0 >> count) | (T0 << (8 - count));
-        CC_DST = T0;
-        CC_OP = CC_OP_RORL;
-    }
-}
-
-/* modulo 17 table */
-const uint8_t rclw_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7, 
-    8, 9,10,11,12,13,14,15,
-   16, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 9,10,11,12,13,14,
-};
-
-/* modulo 9 table */
-const uint8_t rclb_table[32] = {
-    0, 1, 2, 3, 4, 5, 6, 7, 
-    8, 0, 1, 2, 3, 4, 5, 6,
-    7, 8, 0, 1, 2, 3, 4, 5, 
-    6, 7, 8, 0, 1, 2, 3, 4,
-};
-
-void helper_rcll_T0_T1_cc(void)
-{
-    int count, res;
-
-    count = T1 & 0x1f;
-    if (count) {
-        CC_SRC = T0;
-        res = (T0 << count) | (cc_table[CC_OP].compute_c() << (count - 1));
-        if (count > 1)
-            res |= T0 >> (33 - count);
-        T0 = res;
-        CC_DST = T0 ^ CC_SRC;    /* O is in bit 31 */
-        CC_SRC >>= (32 - count); /* CC is in bit 0 */
-        CC_OP = CC_OP_RCLL;
-    }
-}
-
-void OPPROTO op_rcll_T0_T1_cc(void)
-{
-    helper_rcll_T0_T1_cc();
-}
-
-void OPPROTO op_rclw_T0_T1_cc(void)
-{
-    int count;
-    count = rclw_table[T1 & 0x1f];
-    if (count) {
-        T0 = T0 & 0xffff;
-        CC_SRC = T0;
-        T0 = (T0 << count) | (cc_table[CC_OP].compute_c() << (count - 1)) |
-            (T0 >> (17 - count));
-        CC_DST = T0 ^ CC_SRC;
-        CC_SRC >>= (16 - count);
-        CC_OP = CC_OP_RCLW;
-    }
-}
-
-void OPPROTO op_rclb_T0_T1_cc(void)
-{
-    int count;
-    count = rclb_table[T1 & 0x1f];
-    if (count) {
-        T0 = T0 & 0xff;
-        CC_SRC = T0;
-        T0 = (T0 << count) | (cc_table[CC_OP].compute_c() << (count - 1)) |
-            (T0 >> (9 - count));
-        CC_DST = T0 ^ CC_SRC;
-        CC_SRC >>= (8 - count);
-        CC_OP = CC_OP_RCLB;
-    }
-}
-
-void OPPROTO op_rcrl_T0_T1_cc(void)
-{
-    int count, res;
-    count = T1 & 0x1f;
-    if (count) {
-        CC_SRC = T0;
-        res = (T0 >> count) | (cc_table[CC_OP].compute_c() << (32 - count));
-        if (count > 1)
-            res |= T0 << (33 - count);
-        T0 = res;
-        CC_DST = T0 ^ CC_SRC;
-        CC_SRC >>= (count - 1);
-        CC_OP = CC_OP_RCLL;
-    }
-}
-
-void OPPROTO op_rcrw_T0_T1_cc(void)
-{
-    int count;
-    count = rclw_table[T1 & 0x1f];
-    if (count) {
-        T0 = T0 & 0xffff;
-        CC_SRC = T0;
-        T0 = (T0 >> count) | (cc_table[CC_OP].compute_c() << (16 - count)) |
-            (T0 << (17 - count));
-        CC_DST = T0 ^ CC_SRC;
-        CC_SRC >>= (count - 1);
-        CC_OP = CC_OP_RCLW;
-    }
-}
-
-void OPPROTO op_rcrb_T0_T1_cc(void)
-{
-    int count;
-    count = rclb_table[T1 & 0x1f];
-    if (count) {
-        T0 = T0 & 0xff;
-        CC_SRC = T0;
-        T0 = (T0 >> count) | (cc_table[CC_OP].compute_c() << (8 - count)) |
-            (T0 << (9 - count));
-        CC_DST = T0 ^ CC_SRC;
-        CC_SRC >>= (count - 1);
-        CC_OP = CC_OP_RCLB;
-    }
-}
-
-void OPPROTO op_shll_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count == 1) {
-        CC_SRC = T0;
-        T0 = T0 << 1;
-        CC_DST = T0;
-        CC_OP = CC_OP_ADDL;
-    } else if (count) {
-        CC_SRC = T0 >> (32 - count);
-        T0 = T0 << count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLL;
-    }
-}
-
-void OPPROTO op_shlw_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count == 1) {
-        CC_SRC = T0;
-        T0 = T0 << 1;
-        CC_DST = T0;
-        CC_OP = CC_OP_ADDW;
-    } else if (count) {
-        CC_SRC = T0 >> (16 - count);
-        T0 = T0 << count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLW;
-    }
-}
-
-void OPPROTO op_shlb_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count == 1) {
-        CC_SRC = T0;
-        T0 = T0 << 1;
-        CC_DST = T0;
-        CC_OP = CC_OP_ADDB;
-    } else if (count) {
-        CC_SRC = T0 >> (8 - count);
-        T0 = T0 << count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLB;
-    }
-}
-
-void OPPROTO op_shrl_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count == 1) {
-        CC_SRC = T0;
-        T0 = T0 >> 1;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHRL;
-    } else if (count) {
-        CC_SRC = T0 >> (count - 1);
-        T0 = T0 >> count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLL;
-    }
-}
-
-void OPPROTO op_shrw_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count == 1) {
-        T0 = T0 & 0xffff;
-        CC_SRC = T0;
-        T0 = T0 >> 1;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHRW;
-    } else if (count) {
-        T0 = T0 & 0xffff;
-        CC_SRC = T0 >> (count - 1);
-        T0 = T0 >> count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLW;
-    }
-}
-
-void OPPROTO op_shrb_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count == 1) {
-        T0 = T0 & 0xff;
-        CC_SRC = T0;
-        T0 = T0 >> 1;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHRB;
-    } else if (count) {
-        T0 = T0 & 0xff;
-        CC_SRC = T0 >> (count - 1);
-        T0 = T0 >> count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLB;
-    }
-}
-
-void OPPROTO op_sarl_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count) {
-        CC_SRC = (int32_t)T0 >> (count - 1);
-        T0 = (int32_t)T0 >> count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLL;
-    }
-}
-
-void OPPROTO op_sarw_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count) {
-        CC_SRC = (int16_t)T0 >> (count - 1);
-        T0 = (int16_t)T0 >> count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLW;
-    }
-}
-
-void OPPROTO op_sarb_T0_T1_cc(void)
-{
-    int count;
-    count = T1 & 0x1f;
-    if (count) {
-        CC_SRC = (int8_t)T0 >> (count - 1);
-        T0 = (int8_t)T0 >> count;
-        CC_DST = T0;
-        CC_OP = CC_OP_SHLB;
-    }
-}
-
 /* multiply/divide */
 void OPPROTO op_mulb_AL_T0(void)
 {
@@ -924,41 +503,6 @@ void OPPROTO op_stl_T0_A0(void)
     stl((uint8_t *)A0, T0);
 }
 
-/* flags */
-
-void OPPROTO op_set_cc_op(void)
-{
-    CC_OP = PARAM1;
-}
-
-void OPPROTO op_movl_eflags_T0(void)
-{
-    CC_SRC = T0;
-    DF = (T0 & DIRECTION_FLAG) ? -1 : 1;
-}
-
-void OPPROTO op_movb_eflags_T0(void)
-{
-    int cc_o;
-    cc_o = cc_table[CC_OP].compute_o();
-    CC_SRC = T0 | (cc_o << 11);
-}
-
-void OPPROTO op_movl_T0_eflags(void)
-{
-    cc_table[CC_OP].compute_eflags();
-}
-
-void OPPROTO op_cld(void)
-{
-    DF = 1;
-}
-
-void OPPROTO op_std(void)
-{
-    DF = -1;
-}
-
 /* jumps */
 
 /* indirect jump */
@@ -972,54 +516,20 @@ void OPPROTO op_jmp_im(void)
     PC = PARAM1;
 }
 
-void OPPROTO op_jne_b(void)
-{
-    if ((uint8_t)CC_DST != 0)
-        PC += PARAM1;
-    else
-        PC += PARAM2;
-    FORCE_RET();
-}
-
-void OPPROTO op_jne_w(void)
-{
-    if ((uint16_t)CC_DST != 0)
-        PC += PARAM1;
-    else
-        PC += PARAM2;
-    FORCE_RET();
-}
-
-void OPPROTO op_jne_l(void)
-{
-    if (CC_DST != 0)
-        PC += PARAM1;
-    else
-        PC += PARAM2;
-    FORCE_RET(); /* generate a return so that gcc does not generate an
-                    early function return */
-}
-
 /* string ops */
 
 #define ldul ldl
 
-#define SUFFIX b
 #define SHIFT 0
-#include "opstring_template.h"
-#undef SUFFIX
+#include "ops_template.h"
 #undef SHIFT
 
-#define SUFFIX w
 #define SHIFT 1
-#include "opstring_template.h"
-#undef SUFFIX
+#include "ops_template.h"
 #undef SHIFT
 
-#define SUFFIX l
 #define SHIFT 2
-#include "opstring_template.h"
-#undef SUFFIX
+#include "ops_template.h"
 #undef SHIFT
 
 /* sign extend */
@@ -1095,3 +605,264 @@ void op_addl_ESP_im(void)
 {
     ESP += PARAM1;
 }
+
+/* flags handling */
+
+/* slow jumps cases (compute x86 flags) */
+void OPPROTO op_jo_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if (eflags & CC_O)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+void OPPROTO op_jb_cc(void)
+{
+    if (cc_table[CC_OP].compute_c())
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+void OPPROTO op_jz_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if (eflags & CC_Z)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+void OPPROTO op_jbe_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if (eflags & (CC_Z | CC_C))
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+void OPPROTO op_js_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if (eflags & CC_S)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+void OPPROTO op_jp_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if (eflags & CC_P)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+void OPPROTO op_jl_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if ((eflags ^ (eflags >> 4)) & 0x80)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+void OPPROTO op_jle_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    if (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z))
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+}
+
+/* slow set cases (compute x86 flags) */
+void OPPROTO op_seto_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 11) & 1;
+}
+
+void OPPROTO op_setb_T0_cc(void)
+{
+    T0 = cc_table[CC_OP].compute_c();
+}
+
+void OPPROTO op_setz_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 6) & 1;
+}
+
+void OPPROTO op_setbe_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags & (CC_Z | CC_C)) != 0;
+}
+
+void OPPROTO op_sets_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 7) & 1;
+}
+
+void OPPROTO op_setp_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (eflags >> 2) & 1;
+}
+
+void OPPROTO op_setl_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = ((eflags ^ (eflags >> 4)) >> 7) & 1;
+}
+
+void OPPROTO op_setle_T0_cc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    T0 = (((eflags ^ (eflags >> 4)) & 0x80) || (eflags & CC_Z)) != 0;
+}
+
+void OPPROTO op_xor_T0_1(void)
+{
+    T0 ^= 1;
+}
+
+void OPPROTO op_set_cc_op(void)
+{
+    CC_OP = PARAM1;
+}
+
+void OPPROTO op_movl_eflags_T0(void)
+{
+    CC_SRC = T0;
+    DF = 1 - (2 * ((T0 >> 10) & 1));
+}
+
+/* XXX: compute only O flag */
+void OPPROTO op_movb_eflags_T0(void)
+{
+    int of;
+    of = cc_table[CC_OP].compute_all() & CC_O;
+    CC_SRC = T0 | of;
+}
+
+void OPPROTO op_movl_T0_eflags(void)
+{
+    T0 = cc_table[CC_OP].compute_all();
+    T0 |= (DF & DIRECTION_FLAG);
+}
+
+void OPPROTO op_cld(void)
+{
+    DF = 1;
+}
+
+void OPPROTO op_std(void)
+{
+    DF = -1;
+}
+
+void OPPROTO op_clc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags &= ~CC_C;
+    CC_SRC = eflags;
+}
+
+void OPPROTO op_stc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags |= CC_C;
+    CC_SRC = eflags;
+}
+
+void OPPROTO op_cmc(void)
+{
+    int eflags;
+    eflags = cc_table[CC_OP].compute_all();
+    eflags ^= CC_C;
+    CC_SRC = eflags;
+}
+
+static int compute_all_eflags(void)
+{
+    return CC_SRC;
+}
+
+static int compute_c_eflags(void)
+{
+    return CC_SRC & CC_C;
+}
+
+static int compute_c_mul(void)
+{
+    int cf;
+    cf = (CC_SRC != 0);
+    return cf;
+}
+
+static int compute_all_mul(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = (CC_SRC != 0);
+    pf = 0; /* undefined */
+    af = 0; /* undefined */
+    zf = 0; /* undefined */
+    sf = 0; /* undefined */
+    of = cf << 11;
+    return cf | pf | af | zf | sf | of;
+}
+    
+CCTable cc_table[CC_OP_NB] = {
+    [CC_OP_DYNAMIC] = { /* should never happen */ },
+
+    [CC_OP_EFLAGS] = { compute_all_eflags, compute_c_eflags },
+
+    [CC_OP_MUL] = { compute_all_mul, compute_c_mul },
+
+    [CC_OP_ADDB] = { compute_all_addb, compute_c_addb },
+    [CC_OP_ADDW] = { compute_all_addw, compute_c_addw  },
+    [CC_OP_ADDL] = { compute_all_addl, compute_c_addl  },
+
+    [CC_OP_SUBB] = { compute_all_subb, compute_c_subb  },
+    [CC_OP_SUBW] = { compute_all_subw, compute_c_subw  },
+    [CC_OP_SUBL] = { compute_all_subl, compute_c_subl  },
+    
+    [CC_OP_LOGICB] = { compute_all_logicb, compute_c_logicb },
+    [CC_OP_LOGICW] = { compute_all_logicw, compute_c_logicw },
+    [CC_OP_LOGICL] = { compute_all_logicl, compute_c_logicl },
+    
+    [CC_OP_INCB] = { compute_all_incb, compute_c_incb },
+    [CC_OP_INCW] = { compute_all_incw, compute_c_incw },
+    [CC_OP_INCL] = { compute_all_incl, compute_c_incl },
+    
+    [CC_OP_DECB] = { compute_all_decb, compute_c_incb },
+    [CC_OP_DECW] = { compute_all_decw, compute_c_incw },
+    [CC_OP_DECL] = { compute_all_decl, compute_c_incl },
+    
+    [CC_OP_SHLB] = { compute_all_shlb, compute_c_shlb },
+    [CC_OP_SHLW] = { compute_all_shlw, compute_c_shlw },
+    [CC_OP_SHLL] = { compute_all_shll, compute_c_shll },
+};
diff --git a/ops_template.h b/ops_template.h
new file mode 100644 (file)
index 0000000..4032472
--- /dev/null
@@ -0,0 +1,628 @@
+
+#define DATA_BITS (1 << (3 + SHIFT))
+#define SHIFT_MASK (DATA_BITS - 1)
+#define SIGN_MASK (1 << (DATA_BITS - 1))
+
+#if DATA_BITS == 8
+#define SUFFIX b
+#define DATA_TYPE uint8_t
+#define DATA_STYPE int8_t
+#define DATA_MASK 0xff
+#elif DATA_BITS == 16
+#define SUFFIX w
+#define DATA_TYPE uint16_t
+#define DATA_STYPE int16_t
+#define DATA_MASK 0xffff
+#elif DATA_BITS == 32
+#define SUFFIX l
+#define DATA_TYPE uint32_t
+#define DATA_STYPE int32_t
+#define DATA_MASK 0xffffffff
+#else
+#error unhandled operand size
+#endif
+
+/* dynamic flags computation */
+
+static int glue(compute_all_add, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_DST - CC_SRC;
+    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST != 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_add, SUFFIX)(void)
+{
+    int src1, cf;
+    src1 = CC_SRC;
+    cf = (DATA_TYPE)CC_DST < (DATA_TYPE)src1;
+    return cf;
+}
+
+static int glue(compute_all_sub, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+    cf = (DATA_TYPE)src1 < (DATA_TYPE)src2;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST != 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_sub, SUFFIX)(void)
+{
+    int src1, src2, cf;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+    cf = (DATA_TYPE)src1 < (DATA_TYPE)src1;
+    return cf;
+}
+
+static int glue(compute_all_logic, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = 0;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0;
+    zf = ((DATA_TYPE)CC_DST != 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = 0;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_logic, SUFFIX)(void)
+{
+    return 0;
+}
+
+static int glue(compute_all_inc, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    int src1, src2;
+    src1 = CC_DST - 1;
+    src2 = 1;
+    cf = CC_SRC;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST != 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_inc, SUFFIX)(void)
+{
+    return CC_SRC;
+}
+
+static int glue(compute_all_dec, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    int src1, src2;
+    src1 = CC_DST + 1;
+    src2 = 1;
+    cf = CC_SRC;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = (CC_DST ^ src1 ^ src2) & 0x10;
+    zf = ((DATA_TYPE)CC_DST != 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = lshift((src1 ^ src2 ^ -1) & (src1 ^ CC_DST), 12 - DATA_BITS) & CC_O;
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_all_shl, SUFFIX)(void)
+{
+    int cf, pf, af, zf, sf, of;
+    cf = CC_SRC & 1;
+    pf = parity_table[(uint8_t)CC_DST];
+    af = 0; /* undefined */
+    zf = ((DATA_TYPE)CC_DST != 0) << 6;
+    sf = lshift(CC_DST, 8 - DATA_BITS) & 0x80;
+    of = sf << 4; /* only meaniful for shr with count == 1 */
+    return cf | pf | af | zf | sf | of;
+}
+
+static int glue(compute_c_shl, SUFFIX)(void)
+{
+    return CC_SRC & 1;
+}
+
+/* various optimized jumps cases */
+
+void OPPROTO glue(op_jb_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    if ((DATA_TYPE)src1 < (DATA_TYPE)src2)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jz_sub, SUFFIX)(void)
+{
+    if ((DATA_TYPE)CC_DST != 0)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jbe_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    if ((DATA_TYPE)src1 <= (DATA_TYPE)src2)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_js_sub, SUFFIX)(void)
+{
+    if (CC_DST & SIGN_MASK)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jl_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    if ((DATA_STYPE)src1 < (DATA_STYPE)src2)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+    FORCE_RET();
+}
+
+void OPPROTO glue(op_jle_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    if ((DATA_STYPE)src1 <= (DATA_STYPE)src2)
+        PC += PARAM1;
+    else
+        PC += PARAM2;
+    FORCE_RET();
+}
+
+/* various optimized set cases */
+
+void OPPROTO glue(op_setb_T0_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    T0 = ((DATA_TYPE)src1 < (DATA_TYPE)src2);
+}
+
+void OPPROTO glue(op_setz_T0_sub, SUFFIX)(void)
+{
+    T0 = ((DATA_TYPE)CC_DST != 0);
+}
+
+void OPPROTO glue(op_setbe_T0_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    T0 = ((DATA_TYPE)src1 <= (DATA_TYPE)src2);
+}
+
+void OPPROTO glue(op_sets_T0_sub, SUFFIX)(void)
+{
+    T0 = lshift(CC_DST, -(DATA_BITS - 1)) & 1;
+}
+
+void OPPROTO glue(op_setl_T0_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    T0 = ((DATA_STYPE)src1 < (DATA_STYPE)src2);
+}
+
+void OPPROTO glue(op_setle_T0_sub, SUFFIX)(void)
+{
+    int src1, src2;
+    src1 = CC_SRC;
+    src2 = CC_SRC - CC_DST;
+
+    T0 = ((DATA_STYPE)src1 <= (DATA_STYPE)src2);
+}
+
+/* shifts */
+
+void OPPROTO glue(glue(op_rol, SUFFIX), _T0_T1_cc)(void)
+{
+    int count, src;
+    count = T1 & SHIFT_MASK;
+    if (count) {
+        CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
+        src = T0;
+        T0 &= DATA_MASK;
+        T0 = (T0 << count) | (T0 >> (DATA_BITS - count));
+        CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            (T0 & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+}
+
+void OPPROTO glue(glue(op_ror, SUFFIX), _T0_T1_cc)(void)
+{
+    int count, src;
+    count = T1 & SHIFT_MASK;
+    if (count) {
+        CC_SRC = cc_table[CC_OP].compute_all() & ~(CC_O | CC_C);
+        src = T0;
+        T0 &= DATA_MASK;
+        T0 = (T0 >> count) | (T0 << (DATA_BITS - count));
+        CC_SRC |= (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            ((T0 >> (DATA_BITS - 1)) & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+}
+
+void OPPROTO glue(glue(op_rcl, SUFFIX), _T0_T1_cc)(void)
+{
+    int count, res, eflags;
+    unsigned int src;
+
+    count = T1 & 0x1f;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = cc_table[CC_OP].compute_all();
+        src = T0;
+        res = (T0 << count) | ((eflags & CC_C) << (count - 1));
+        if (count > 1)
+            res |= T0 >> (DATA_BITS + 1 - count);
+        T0 = res;
+        CC_SRC = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            ((src >> (DATA_BITS - count)) & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+}
+
+void OPPROTO glue(glue(op_rcr, SUFFIX), _T0_T1_cc)(void)
+{
+    int count, res, eflags;
+    unsigned int src;
+
+    count = T1 & 0x1f;
+#if DATA_BITS == 16
+    count = rclw_table[count];
+#elif DATA_BITS == 8
+    count = rclb_table[count];
+#endif
+    if (count) {
+        eflags = cc_table[CC_OP].compute_all();
+        src = T0;
+        res = (T0 >> count) | ((eflags & CC_C) << (DATA_BITS - count));
+        if (count > 1)
+            res |= T0 << (DATA_BITS + 1 - count);
+        T0 = res;
+        CC_SRC = (eflags & ~(CC_C | CC_O)) |
+            (lshift(src ^ T0, 11 - (DATA_BITS - 1)) & CC_O) | 
+            ((src >> (count - 1)) & CC_C);
+        CC_OP = CC_OP_EFLAGS;
+    }
+}
+
+void OPPROTO glue(glue(op_shl, SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    count = T1 & 0x1f;
+    if (count == 1) {
+        CC_SRC = T0;
+        T0 = T0 << 1;
+        CC_DST = T0;
+        CC_OP = CC_OP_ADDB + SHIFT;
+    } else if (count) {
+        CC_SRC = T0 >> (DATA_BITS - count);
+        T0 = T0 << count;
+        CC_DST = T0;
+        CC_OP = CC_OP_SHLB + SHIFT;
+    }
+}
+
+void OPPROTO glue(glue(op_shr, SUFFIX), _T0_T1_cc)(void)
+{
+    int count;
+    count = T1 & 0x1f;
+    if (count) {
+        T0 &= DATA_MASK;
+        CC_SRC = T0 >> (count - 1);
+        T0 = T0 >> count;
+        CC_DST = T0;
+        CC_OP = CC_OP_SHLB + SHIFT;
+    }
+}
+
+void OPPROTO glue(glue(op_sar, SUFFIX), _T0_T1_cc)(void)
+{
+    int count, src;
+    count = T1 & 0x1f;
+    if (count) {
+        src = (DATA_STYPE)T0;
+        CC_SRC =  src >> (count - 1);
+        T0 = src >> count;
+        CC_DST = T0;
+        CC_OP = CC_OP_SHLB + SHIFT;
+    }
+}
+
+/* string operations */
+/* XXX: maybe use lower level instructions to ease exception handling */
+
+void OPPROTO glue(op_movs, SUFFIX)(void)
+{
+    int v;
+    v = glue(ldu, SUFFIX)((void *)ESI);
+    glue(st, SUFFIX)((void *)EDI, v);
+    ESI += (DF << SHIFT);
+    EDI += (DF << SHIFT);
+}
+
+void OPPROTO glue(op_rep_movs, SUFFIX)(void)
+{
+    int v, inc;
+    inc = (DF << SHIFT);
+    while (ECX != 0) {
+        v = glue(ldu, SUFFIX)((void *)ESI);
+        glue(st, SUFFIX)((void *)EDI, v);
+        ESI += inc;
+        EDI += inc;
+        ECX--;
+    }
+}
+
+void OPPROTO glue(op_stos, SUFFIX)(void)
+{
+    glue(st, SUFFIX)((void *)EDI, EAX);
+    EDI += (DF << SHIFT);
+}
+
+void OPPROTO glue(op_rep_stos, SUFFIX)(void)
+{
+    int inc;
+    inc = (DF << SHIFT);
+    while (ECX != 0) {
+        glue(st, SUFFIX)((void *)EDI, EAX);
+        EDI += inc;
+        ECX--;
+    }
+}
+
+void OPPROTO glue(op_lods, SUFFIX)(void)
+{
+    int v;
+    v = glue(ldu, SUFFIX)((void *)ESI);
+#if SHIFT == 0
+    EAX = (EAX & ~0xff) | v;
+#elif SHIFT == 1
+    EAX = (EAX & ~0xffff) | v;
+#else
+    EAX = v;
+#endif
+    ESI += (DF << SHIFT);
+}
+
+/* don't know if it is used */
+void OPPROTO glue(op_rep_lods, SUFFIX)(void)
+{
+    int v, inc;
+    inc = (DF << SHIFT);
+    while (ECX != 0) {
+        v = glue(ldu, SUFFIX)((void *)ESI);
+#if SHIFT == 0
+        EAX = (EAX & ~0xff) | v;
+#elif SHIFT == 1
+        EAX = (EAX & ~0xffff) | v;
+#else
+        EAX = v;
+#endif
+        ESI += inc;
+        ECX--;
+    }
+}
+
+void OPPROTO glue(op_scas, SUFFIX)(void)
+{
+    int v;
+
+    v = glue(ldu, SUFFIX)((void *)ESI);
+    ESI += (DF << SHIFT);
+    CC_SRC = EAX;
+    CC_DST = EAX - v;
+}
+
+void OPPROTO glue(op_repz_scas, SUFFIX)(void)
+{
+    int v1, v2, inc;
+
+    if (ECX != 0) {
+        /* NOTE: the flags are not modified if ECX == 0 */
+#if SHIFT == 0
+        v1 = EAX & 0xff;
+#elif SHIFT == 1
+        v1 = EAX & 0xffff;
+#else
+        v1 = EAX;
+#endif
+        inc = (DF << SHIFT);
+        do {
+            v2 = glue(ldu, SUFFIX)((void *)ESI);
+            if (v1 != v2)
+                break;
+            ESI += inc;
+            ECX--;
+        } while (ECX != 0);
+        CC_SRC = v1;
+        CC_DST = v1 - v2;
+        CC_OP = CC_OP_SUBB + SHIFT;
+    }
+}
+
+void OPPROTO glue(op_repnz_scas, SUFFIX)(void)
+{
+    int v1, v2, inc;
+
+    if (ECX != 0) {
+        /* NOTE: the flags are not modified if ECX == 0 */
+#if SHIFT == 0
+        v1 = EAX & 0xff;
+#elif SHIFT == 1
+        v1 = EAX & 0xffff;
+#else
+        v1 = EAX;
+#endif
+        inc = (DF << SHIFT);
+        do {
+            v2 = glue(ldu, SUFFIX)((void *)ESI);
+            if (v1 == v2)
+                break;
+            ESI += inc;
+            ECX--;
+        } while (ECX != 0);
+        CC_SRC = v1;
+        CC_DST = v1 - v2;
+        CC_OP = CC_OP_SUBB + SHIFT;
+    }
+}
+
+void OPPROTO glue(op_cmps, SUFFIX)(void)
+{
+    int v1, v2;
+    v1 = glue(ldu, SUFFIX)((void *)ESI);
+    v2 = glue(ldu, SUFFIX)((void *)EDI);
+    ESI += (DF << SHIFT);
+    EDI += (DF << SHIFT);
+    CC_SRC = v1;
+    CC_DST = v1 - v2;
+}
+
+void OPPROTO glue(op_repz_cmps, SUFFIX)(void)
+{
+    int v1, v2, inc;
+    if (ECX != 0) {
+        inc = (DF << SHIFT);
+        do {
+            v1 = glue(ldu, SUFFIX)((void *)ESI);
+            v2 = glue(ldu, SUFFIX)((void *)EDI);
+            if (v1 != v2)
+                break;
+            ESI += inc;
+            EDI += inc;
+            ECX--;
+        } while (ECX != 0);
+        CC_SRC = v1;
+        CC_DST = v1 - v2;
+        CC_OP = CC_OP_SUBB + SHIFT;
+    }
+}
+
+void OPPROTO glue(op_repnz_cmps, SUFFIX)(void)
+{
+    int v1, v2, inc;
+    if (ECX != 0) {
+        inc = (DF << SHIFT);
+        do {
+            v1 = glue(ldu, SUFFIX)((void *)ESI);
+            v2 = glue(ldu, SUFFIX)((void *)EDI);
+            if (v1 == v2)
+                break;
+            ESI += inc;
+            EDI += inc;
+            ECX--;
+        } while (ECX != 0);
+        CC_SRC = v1;
+        CC_DST = v1 - v2;
+        CC_OP = CC_OP_SUBB + SHIFT;
+    }
+}
+
+void OPPROTO glue(op_outs, SUFFIX)(void)
+{
+    int v, dx;
+    dx = EDX & 0xffff;
+    v = glue(ldu, SUFFIX)((void *)ESI);
+    glue(port_out, SUFFIX)(dx, v);
+    ESI += (DF << SHIFT);
+}
+
+void OPPROTO glue(op_rep_outs, SUFFIX)(void)
+{
+    int v, dx, inc;
+    inc = (DF << SHIFT);
+    dx = EDX & 0xffff;
+    while (ECX != 0) {
+        v = glue(ldu, SUFFIX)((void *)ESI);
+        glue(port_out, SUFFIX)(dx, v);
+        ESI += inc;
+        ECX--;
+    }
+}
+
+void OPPROTO glue(op_ins, SUFFIX)(void)
+{
+    int v, dx;
+    dx = EDX & 0xffff;
+    v = glue(port_in, SUFFIX)(dx);
+    glue(st, SUFFIX)((void *)EDI, v);
+    EDI += (DF << SHIFT);
+}
+
+void OPPROTO glue(op_rep_ins, SUFFIX)(void)
+{
+    int v, dx, inc;
+    inc = (DF << SHIFT);
+    dx = EDX & 0xffff;
+    while (ECX != 0) {
+        v = glue(port_in, SUFFIX)(dx);
+        glue(st, SUFFIX)((void *)EDI, v);
+        EDI += (DF << SHIFT);
+        ECX--;
+    }
+}
+
+#undef DATA_BITS
+#undef SHIFT_MASK
+#undef SIGN_MASK
+#undef DATA_TYPE
+#undef DATA_STYPE
+#undef DATA_MASK
+#undef SUFFIX
index c6347edfcc37d37b2cb7703900ada0811ced0d39..2c2b059df4b1a7338e92027b32d62bb8017c5b97 100644 (file)
@@ -2,7 +2,9 @@ CC=gcc
 CFLAGS=-Wall -O2 -g
 LDFLAGS=
 
-TESTS=hello test1 test2 sha1 test-i386 
+TESTS=hello test1 test2 sha1 test-i386
+TESTS+=op-i386.o #op-i386.o op-ppc.o op-arm.o op-mips.o op-sparc.o
+
 GEMU=../gemu
 
 all: $(TESTS)
@@ -25,6 +27,22 @@ test: test-i386
        $(GEMU) test-i386 > test-i386.out
        @if diff -u test-i386.ref test-i386.out ; then echo "Auto Test OK"; fi
 
+# dyngen tests
+op-i386.o: op.c
+       gcc $(CFLAGS) -c -o $@ $<
+
+op-ppc.o: op.c
+       powerpc-linux-gcc $(CFLAGS) -c -o $@ $<
+
+op-arm.o: op.c
+       arm-linux-gcc $(CFLAGS) -c -o $@ $<
+
+op-mips.o: op.c
+       mips-linux-gcc $(CFLAGS) -mno-abicalls -c -o $@ $<
+
+op-sparc.o: op.c
+       sparc-linux-gcc $(CFLAGS) -mflat -c -o $@ $<
+
 # speed test
 sha1: sha1.c
        $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $<
diff --git a/thunk.h b/thunk.h
index 0b83d202af6c1e92c910e8768ace1111b1358cb4..5e5d9dd18df576b75b299d205915014bd4a90e44 100644 (file)
--- a/thunk.h
+++ b/thunk.h
 
 #define bswap_64(x) \
 ({ \
-       __u64 __x = (x); \
-       ((__u64)( \
-               (__u64)(((__u64)(__x) & (__u64)0x00000000000000ffULL) << 56) | \
-               (__u64)(((__u64)(__x) & (__u64)0x000000000000ff00ULL) << 40) | \
-               (__u64)(((__u64)(__x) & (__u64)0x0000000000ff0000ULL) << 24) | \
-               (__u64)(((__u64)(__x) & (__u64)0x00000000ff000000ULL) <<  8) | \
-               (__u64)(((__u64)(__x) & (__u64)0x000000ff00000000ULL) >>  8) | \
-               (__u64)(((__u64)(__x) & (__u64)0x0000ff0000000000ULL) >> 24) | \
-               (__u64)(((__u64)(__x) & (__u64)0x00ff000000000000ULL) >> 40) | \
-               (__u64)(((__u64)(__x) & (__u64)0xff00000000000000ULL) >> 56) )); \
+       uint64_t __x = (x); \
+       ((uint64_t)( \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000000000ffULL) << 56) | \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000000000ff00ULL) << 40) | \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000000000ff0000ULL) << 24) | \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00000000ff000000ULL) <<  8) | \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0x000000ff00000000ULL) >>  8) | \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0x0000ff0000000000ULL) >> 24) | \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0x00ff000000000000ULL) >> 40) | \
+               (uint64_t)(((uint64_t)(__x) & (uint64_t)0xff00000000000000ULL) >> 56) )); \
 })
 
 #endif
@@ -51,7 +51,7 @@
 #define BSWAP_NEEDED
 #endif
 
-/* XXX: auto autoconf */
+/* XXX: autoconf */
 #define TARGET_I386
 #define TARGET_LONG_BITS 32
 
diff --git a/translate-i386.c b/translate-i386.c
new file mode 100644 (file)
index 0000000..0c1b95a
--- /dev/null
@@ -0,0 +1,2133 @@
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include "cpu-i386.h"
+
+static uint8_t *gen_code_ptr;
+int __op_param1, __op_param2, __op_param3;
+
+/* supress that */
+static void error(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    vfprintf(stderr, fmt, ap);
+    va_end(ap);
+    exit(1);
+}
+
+#define PREFIX_REPZ 1
+#define PREFIX_REPNZ 2
+#define PREFIX_LOCK 4
+#define PREFIX_CS 8
+#define PREFIX_SS 0x10
+#define PREFIX_DS 0x20
+#define PREFIX_ES 0x40
+#define PREFIX_FS 0x80
+#define PREFIX_GS 0x100
+#define PREFIX_DATA 0x200
+#define PREFIX_ADR 0x400
+#define PREFIX_FWAIT 0x800
+
+typedef struct DisasContext {
+    /* current insn context */
+    int prefix;
+    int aflag, dflag;
+    uint8_t *pc; /* current pc */
+    uint8_t *runtime_pc; /* current pc in the runtime generated code */
+    int cc_op; /* current CC operation */
+    int f_st;
+} DisasContext;
+
+/* i386 arith/logic operations */
+enum {
+    OP_ADDL, 
+    OP_ORL, 
+    OP_ADCL, 
+    OP_SBBL,
+    OP_ANDL, 
+    OP_SUBL, 
+    OP_XORL, 
+    OP_CMPL,
+};
+
+/* i386 shift ops */
+enum {
+    OP_ROL, 
+    OP_ROR, 
+    OP_RCL, 
+    OP_RCR, 
+    OP_SHL, 
+    OP_SHR, 
+    OP_SHL1, /* undocumented */
+    OP_SAR = 7,
+};
+
+
+static const int fp_ops[8] = {
+#if 0
+    OP_FADDQ, OP_FMULQ, OP_CMP, OP_CMP,
+    OP_FSUBQ, OP_FSUBQ, OP_FDIVQ, OP_FDIVQ
+#endif
+};
+
+extern char cc_table, rclw_table, rclb_table;
+extern char helper_rcll_T0_T1_cc;
+extern char __udivdi3, __umoddi3;
+
+#include "op-i386.h"
+
+/* operand size */
+enum {
+    OT_BYTE = 0,
+    OT_WORD,
+    OT_LONG, 
+    OT_QUAD,
+};
+
+enum {
+    /* I386 int registers */
+    OR_EAX,   /* MUST be even numbered */
+    OR_ECX,
+    OR_EDX,
+    OR_EBX,
+    OR_ESP,
+    OR_EBP,
+    OR_ESI,
+    OR_EDI,
+
+    /* I386 float registers */
+    OR_ST0,
+    OR_ST1,
+    OR_ST2,
+    OR_ST3,
+    OR_ST4,
+    OR_ST5,
+    OR_ST6,
+    OR_ST7,
+    OR_TMP0,    /* temporary operand register */
+    OR_TMP1,
+    OR_A0, /* temporary register used when doing address evaluation */
+    OR_EFLAGS,  /* cpu flags */
+    OR_ITMP0, /* used for byte/word insertion */
+    OR_ITMP1, /* used for byte/word insertion */
+    OR_ITMP2, /* used for byte/word insertion */
+    OR_FTMP0, /* float temporary */
+    OR_DF,    /* D flag, for string ops */
+    OR_ZERO, /* fixed zero register */
+    OR_IM, /* dummy immediate value register */
+    NB_OREGS,
+};
+
+#if 0
+static const double tab_const[7] = {
+    1.0, 
+    3.32192809488736234789, /* log2(10) */
+    M_LOG2E,
+    M_PI,
+    0.30102999566398119521, /* log10(2) */
+    M_LN2,
+    0.0
+};
+#endif
+
+typedef void (GenOpFunc)(void);
+typedef void (GenOpFunc1)(long);
+typedef void (GenOpFunc2)(long, long);
+                    
+static GenOpFunc *gen_op_mov_reg_T0[3][8] = {
+    [OT_BYTE] = {
+        gen_op_movb_EAX_T0,
+        gen_op_movb_ECX_T0,
+        gen_op_movb_EDX_T0,
+        gen_op_movb_EBX_T0,
+        gen_op_movh_EAX_T0,
+        gen_op_movh_ECX_T0,
+        gen_op_movh_EDX_T0,
+        gen_op_movh_EBX_T0,
+    },
+    [OT_WORD] = {
+        gen_op_movw_EAX_T0,
+        gen_op_movw_ECX_T0,
+        gen_op_movw_EDX_T0,
+        gen_op_movw_EBX_T0,
+        gen_op_movw_ESP_T0,
+        gen_op_movw_EBP_T0,
+        gen_op_movw_ESI_T0,
+        gen_op_movw_EDI_T0,
+    },
+    [OT_LONG] = {
+        gen_op_movl_EAX_T0,
+        gen_op_movl_ECX_T0,
+        gen_op_movl_EDX_T0,
+        gen_op_movl_EBX_T0,
+        gen_op_movl_ESP_T0,
+        gen_op_movl_EBP_T0,
+        gen_op_movl_ESI_T0,
+        gen_op_movl_EDI_T0,
+    },
+};
+
+static GenOpFunc *gen_op_mov_reg_T1[3][8] = {
+    [OT_BYTE] = {
+        gen_op_movb_EAX_T1,
+        gen_op_movb_ECX_T1,
+        gen_op_movb_EDX_T1,
+        gen_op_movb_EBX_T1,
+        gen_op_movh_EAX_T1,
+        gen_op_movh_ECX_T1,
+        gen_op_movh_EDX_T1,
+        gen_op_movh_EBX_T1,
+    },
+    [OT_WORD] = {
+        gen_op_movw_EAX_T1,
+        gen_op_movw_ECX_T1,
+        gen_op_movw_EDX_T1,
+        gen_op_movw_EBX_T1,
+        gen_op_movw_ESP_T1,
+        gen_op_movw_EBP_T1,
+        gen_op_movw_ESI_T1,
+        gen_op_movw_EDI_T1,
+    },
+    [OT_LONG] = {
+        gen_op_movl_EAX_T1,
+        gen_op_movl_ECX_T1,
+        gen_op_movl_EDX_T1,
+        gen_op_movl_EBX_T1,
+        gen_op_movl_ESP_T1,
+        gen_op_movl_EBP_T1,
+        gen_op_movl_ESI_T1,
+        gen_op_movl_EDI_T1,
+    },
+};
+
+static GenOpFunc *gen_op_mov_reg_A0[2][8] = {
+    [0] = {
+        gen_op_movw_EAX_A0,
+        gen_op_movw_ECX_A0,
+        gen_op_movw_EDX_A0,
+        gen_op_movw_EBX_A0,
+        gen_op_movw_ESP_A0,
+        gen_op_movw_EBP_A0,
+        gen_op_movw_ESI_A0,
+        gen_op_movw_EDI_A0,
+    },
+    [1] = {
+        gen_op_movl_EAX_A0,
+        gen_op_movl_ECX_A0,
+        gen_op_movl_EDX_A0,
+        gen_op_movl_EBX_A0,
+        gen_op_movl_ESP_A0,
+        gen_op_movl_EBP_A0,
+        gen_op_movl_ESI_A0,
+        gen_op_movl_EDI_A0,
+    },
+};
+
+static GenOpFunc *gen_op_mov_TN_reg[3][2][8] = 
+{
+    [OT_BYTE] = {
+        {
+            gen_op_movl_T0_EAX,
+            gen_op_movl_T0_ECX,
+            gen_op_movl_T0_EDX,
+            gen_op_movl_T0_EBX,
+            gen_op_movh_T0_EAX,
+            gen_op_movh_T0_ECX,
+            gen_op_movh_T0_EDX,
+            gen_op_movh_T0_EBX,
+        },
+        {
+            gen_op_movl_T1_EAX,
+            gen_op_movl_T1_ECX,
+            gen_op_movl_T1_EDX,
+            gen_op_movl_T1_EBX,
+            gen_op_movh_T1_EAX,
+            gen_op_movh_T1_ECX,
+            gen_op_movh_T1_EDX,
+            gen_op_movh_T1_EBX,
+        },
+    },
+    [OT_WORD] = {
+        {
+            gen_op_movl_T0_EAX,
+            gen_op_movl_T0_ECX,
+            gen_op_movl_T0_EDX,
+            gen_op_movl_T0_EBX,
+            gen_op_movl_T0_ESP,
+            gen_op_movl_T0_EBP,
+            gen_op_movl_T0_ESI,
+            gen_op_movl_T0_EDI,
+        },
+        {
+            gen_op_movl_T1_EAX,
+            gen_op_movl_T1_ECX,
+            gen_op_movl_T1_EDX,
+            gen_op_movl_T1_EBX,
+            gen_op_movl_T1_ESP,
+            gen_op_movl_T1_EBP,
+            gen_op_movl_T1_ESI,
+            gen_op_movl_T1_EDI,
+        },
+    },
+    [OT_LONG] = {
+        {
+            gen_op_movl_T0_EAX,
+            gen_op_movl_T0_ECX,
+            gen_op_movl_T0_EDX,
+            gen_op_movl_T0_EBX,
+            gen_op_movl_T0_ESP,
+            gen_op_movl_T0_EBP,
+            gen_op_movl_T0_ESI,
+            gen_op_movl_T0_EDI,
+        },
+        {
+            gen_op_movl_T1_EAX,
+            gen_op_movl_T1_ECX,
+            gen_op_movl_T1_EDX,
+            gen_op_movl_T1_EBX,
+            gen_op_movl_T1_ESP,
+            gen_op_movl_T1_EBP,
+            gen_op_movl_T1_ESI,
+            gen_op_movl_T1_EDI,
+        },
+    },
+};
+
+static GenOpFunc *gen_op_movl_A0_reg[8] = {
+    gen_op_movl_A0_EAX,
+    gen_op_movl_A0_ECX,
+    gen_op_movl_A0_EDX,
+    gen_op_movl_A0_EBX,
+    gen_op_movl_A0_ESP,
+    gen_op_movl_A0_EBP,
+    gen_op_movl_A0_ESI,
+    gen_op_movl_A0_EDI,
+};
+
+static GenOpFunc *gen_op_addl_A0_reg_sN[4][8] = {
+    [0] = {
+        gen_op_addl_A0_EAX,
+        gen_op_addl_A0_ECX,
+        gen_op_addl_A0_EDX,
+        gen_op_addl_A0_EBX,
+        gen_op_addl_A0_ESP,
+        gen_op_addl_A0_EBP,
+        gen_op_addl_A0_ESI,
+        gen_op_addl_A0_EDI,
+    },
+    [1] = {
+        gen_op_addl_A0_EAX_s1,
+        gen_op_addl_A0_ECX_s1,
+        gen_op_addl_A0_EDX_s1,
+        gen_op_addl_A0_EBX_s1,
+        gen_op_addl_A0_ESP_s1,
+        gen_op_addl_A0_EBP_s1,
+        gen_op_addl_A0_ESI_s1,
+        gen_op_addl_A0_EDI_s1,
+    },
+    [2] = {
+        gen_op_addl_A0_EAX_s2,
+        gen_op_addl_A0_ECX_s2,
+        gen_op_addl_A0_EDX_s2,
+        gen_op_addl_A0_EBX_s2,
+        gen_op_addl_A0_ESP_s2,
+        gen_op_addl_A0_EBP_s2,
+        gen_op_addl_A0_ESI_s2,
+        gen_op_addl_A0_EDI_s2,
+    },
+    [3] = {
+        gen_op_addl_A0_EAX_s3,
+        gen_op_addl_A0_ECX_s3,
+        gen_op_addl_A0_EDX_s3,
+        gen_op_addl_A0_EBX_s3,
+        gen_op_addl_A0_ESP_s3,
+        gen_op_addl_A0_EBP_s3,
+        gen_op_addl_A0_ESI_s3,
+        gen_op_addl_A0_EDI_s3,
+    },
+};
+
+static GenOpFunc *gen_op_arith_T0_T1_cc[8] = {
+    gen_op_addl_T0_T1_cc,
+    gen_op_orl_T0_T1_cc,
+    gen_op_adcl_T0_T1_cc,
+    gen_op_sbbl_T0_T1_cc,
+    gen_op_andl_T0_T1_cc,
+    gen_op_subl_T0_T1_cc,
+    gen_op_xorl_T0_T1_cc,
+    gen_op_cmpl_T0_T1_cc,
+};
+
+static const int cc_op_arithb[8] = {
+    CC_OP_ADDB,
+    CC_OP_LOGICB,
+    CC_OP_ADDB,
+    CC_OP_SUBB,
+    CC_OP_LOGICB,
+    CC_OP_SUBB,
+    CC_OP_LOGICB,
+    CC_OP_SUBB,
+};
+
+static GenOpFunc *gen_op_shift_T0_T1_cc[3][8] = {
+    [OT_BYTE] = {
+        gen_op_rolb_T0_T1_cc,
+        gen_op_rorb_T0_T1_cc,
+        gen_op_rclb_T0_T1_cc,
+        gen_op_rcrb_T0_T1_cc,
+        gen_op_shlb_T0_T1_cc,
+        gen_op_shrb_T0_T1_cc,
+        gen_op_shlb_T0_T1_cc,
+        gen_op_sarb_T0_T1_cc,
+    },
+    [OT_WORD] = {
+        gen_op_rolw_T0_T1_cc,
+        gen_op_rorw_T0_T1_cc,
+        gen_op_rclw_T0_T1_cc,
+        gen_op_rcrw_T0_T1_cc,
+        gen_op_shlw_T0_T1_cc,
+        gen_op_shrw_T0_T1_cc,
+        gen_op_shlw_T0_T1_cc,
+        gen_op_sarw_T0_T1_cc,
+    },
+    [OT_LONG] = {
+        gen_op_roll_T0_T1_cc,
+        gen_op_rorl_T0_T1_cc,
+        gen_op_rcll_T0_T1_cc,
+        gen_op_rcrl_T0_T1_cc,
+        gen_op_shll_T0_T1_cc,
+        gen_op_shrl_T0_T1_cc,
+        gen_op_shll_T0_T1_cc,
+        gen_op_sarl_T0_T1_cc,
+    },
+};
+
+static GenOpFunc *gen_op_lds_T0_A0[3] = {
+    gen_op_ldsb_T0_A0,
+    gen_op_ldsw_T0_A0,
+};
+
+static GenOpFunc *gen_op_ldu_T0_A0[3] = {
+    gen_op_ldub_T0_A0,
+    gen_op_lduw_T0_A0,
+};
+
+/* sign does not matter */
+static GenOpFunc *gen_op_ld_T0_A0[3] = {
+    gen_op_ldub_T0_A0,
+    gen_op_lduw_T0_A0,
+    gen_op_ldl_T0_A0,
+};
+
+static GenOpFunc *gen_op_ld_T1_A0[3] = {
+    gen_op_ldub_T1_A0,
+    gen_op_lduw_T1_A0,
+    gen_op_ldl_T1_A0,
+};
+
+static GenOpFunc *gen_op_st_T0_A0[3] = {
+    gen_op_stb_T0_A0,
+    gen_op_stw_T0_A0,
+    gen_op_stl_T0_A0,
+};
+
+static GenOpFunc *gen_op_movs[6] = {
+    gen_op_movsb,
+    gen_op_movsw,
+    gen_op_movsl,
+    gen_op_rep_movsb,
+    gen_op_rep_movsw,
+    gen_op_rep_movsl,
+};
+
+static GenOpFunc *gen_op_stos[6] = {
+    gen_op_stosb,
+    gen_op_stosw,
+    gen_op_stosl,
+    gen_op_rep_stosb,
+    gen_op_rep_stosw,
+    gen_op_rep_stosl,
+};
+
+static GenOpFunc *gen_op_lods[6] = {
+    gen_op_lodsb,
+    gen_op_lodsw,
+    gen_op_lodsl,
+    gen_op_rep_lodsb,
+    gen_op_rep_lodsw,
+    gen_op_rep_lodsl,
+};
+
+static GenOpFunc *gen_op_scas[9] = {
+    gen_op_scasb,
+    gen_op_scasw,
+    gen_op_scasl,
+    gen_op_repz_scasb,
+    gen_op_repz_scasw,
+    gen_op_repz_scasl,
+    gen_op_repnz_scasb,
+    gen_op_repnz_scasw,
+    gen_op_repnz_scasl,
+};
+
+static GenOpFunc *gen_op_cmps[9] = {
+    gen_op_cmpsb,
+    gen_op_cmpsw,
+    gen_op_cmpsl,
+    gen_op_repz_cmpsb,
+    gen_op_repz_cmpsw,
+    gen_op_repz_cmpsl,
+    gen_op_repnz_cmpsb,
+    gen_op_repnz_cmpsw,
+    gen_op_repnz_cmpsl,
+};
+
+static GenOpFunc *gen_op_ins[6] = {
+    gen_op_insb,
+    gen_op_insw,
+    gen_op_insl,
+    gen_op_rep_insb,
+    gen_op_rep_insw,
+    gen_op_rep_insl,
+};
+
+
+static GenOpFunc *gen_op_outs[6] = {
+    gen_op_outsb,
+    gen_op_outsw,
+    gen_op_outsl,
+    gen_op_rep_outsb,
+    gen_op_rep_outsw,
+    gen_op_rep_outsl,
+};
+
+enum {
+    JCC_O,
+    JCC_B,
+    JCC_Z,
+    JCC_BE,
+    JCC_S,
+    JCC_P,
+    JCC_L,
+    JCC_LE,
+};
+
+static GenOpFunc2 *gen_jcc_slow[8] = {
+    gen_op_jo_cc,
+    gen_op_jb_cc,
+    gen_op_jz_cc,
+    gen_op_jbe_cc,
+    gen_op_js_cc,
+    gen_op_jp_cc,
+    gen_op_jl_cc,
+    gen_op_jle_cc,
+};
+    
+static GenOpFunc2 *gen_jcc_sub[3][8] = {
+    [OT_BYTE] = {
+        NULL,
+        gen_op_jb_subb,
+        gen_op_jz_subb,
+        gen_op_jbe_subb,
+        gen_op_js_subb,
+        NULL,
+        gen_op_jl_subb,
+        gen_op_jle_subb,
+    },
+    [OT_WORD] = {
+        NULL,
+        gen_op_jb_subw,
+        gen_op_jz_subw,
+        gen_op_jbe_subw,
+        gen_op_js_subw,
+        NULL,
+        gen_op_jl_subw,
+        gen_op_jle_subw,
+    },
+    [OT_LONG] = {
+        NULL,
+        gen_op_jb_subl,
+        gen_op_jz_subl,
+        gen_op_jbe_subl,
+        gen_op_js_subl,
+        NULL,
+        gen_op_jl_subl,
+        gen_op_jle_subl,
+    },
+};
+
+static GenOpFunc *gen_setcc_slow[8] = {
+    gen_op_seto_T0_cc,
+    gen_op_setb_T0_cc,
+    gen_op_setz_T0_cc,
+    gen_op_setbe_T0_cc,
+    gen_op_sets_T0_cc,
+    gen_op_setp_T0_cc,
+    gen_op_setl_T0_cc,
+    gen_op_setle_T0_cc,
+};
+
+static GenOpFunc *gen_setcc_sub[3][8] = {
+    [OT_BYTE] = {
+        NULL,
+        gen_op_setb_T0_subb,
+        gen_op_setz_T0_subb,
+        gen_op_setbe_T0_subb,
+        gen_op_sets_T0_subb,
+        NULL,
+        gen_op_setl_T0_subb,
+        gen_op_setle_T0_subb,
+    },
+    [OT_WORD] = {
+        NULL,
+        gen_op_setb_T0_subw,
+        gen_op_setz_T0_subw,
+        gen_op_setbe_T0_subw,
+        gen_op_sets_T0_subw,
+        NULL,
+        gen_op_setl_T0_subw,
+        gen_op_setle_T0_subw,
+    },
+    [OT_LONG] = {
+        NULL,
+        gen_op_setb_T0_subl,
+        gen_op_setz_T0_subl,
+        gen_op_setbe_T0_subl,
+        gen_op_sets_T0_subl,
+        NULL,
+        gen_op_setl_T0_subl,
+        gen_op_setle_T0_subl,
+    },
+};
+
+static void gen_op(DisasContext *s1, int op, int ot, int d, int s)
+{
+    if (d != OR_TMP0)
+        gen_op_mov_TN_reg[ot][0][d]();
+    if (s != OR_TMP1)
+        gen_op_mov_TN_reg[ot][1][s]();
+    if ((op == OP_ADCL || op == OP_SBBL) && s1->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s1->cc_op);
+    gen_op_arith_T0_T1_cc[op]();
+    if (d != OR_TMP0 && op != OP_CMPL)
+        gen_op_mov_reg_T0[ot][d]();
+    s1->cc_op = cc_op_arithb[op] + ot;
+}
+
+static void gen_opi(DisasContext *s1, int op, int ot, int d, int c)
+{
+    gen_op1_movl_T1_im(c);
+    gen_op(s1, op, ot, d, OR_TMP0);
+}
+
+static void gen_inc(DisasContext *s1, int ot, int d, int c)
+{
+    if (d != OR_TMP0)
+        gen_op_mov_TN_reg[ot][0][d]();
+    if (s1->cc_op != CC_OP_DYNAMIC)
+        gen_op_set_cc_op(s1->cc_op);
+    if (c > 0)
+        gen_op_incl_T0_cc();
+    else
+        gen_op_decl_T0_cc();
+    if (d != OR_TMP0)
+        gen_op_mov_reg_T0[ot][d]();
+}
+
+static void gen_shift(DisasContext *s1, int op, int ot, int d, int s)
+{
+    if (d != OR_TMP0)
+        gen_op_mov_TN_reg[ot][0][d]();
+    if (s != OR_TMP1)
+        gen_op_mov_TN_reg[ot][1][s]();
+    switch(op) {
+    case OP_ROL:
+    case OP_ROR:
+    case OP_RCL:
+    case OP_RCR:
+        /* only C and O are modified, so we must update flags dynamically */
+        if (s1->cc_op != CC_OP_DYNAMIC)
+            gen_op_set_cc_op(s1->cc_op);
+        gen_op_shift_T0_T1_cc[ot][op]();
+        break;
+    default:
+        gen_op_shift_T0_T1_cc[ot][op]();
+        break;
+    }
+    if (d != OR_TMP0)
+        gen_op_mov_reg_T0[ot][d]();
+    s1->cc_op = CC_OP_DYNAMIC; /* cannot predict flags after */
+}
+
+static void gen_shifti(DisasContext *s1, int op, int ot, int d, int c)
+{
+    /* currently not optimized */
+    gen_op1_movl_T1_im(c);
+    gen_shift(s1, op, ot, d, OR_TMP1);
+}
+
+static void gen_lea_modrm(DisasContext *s, int modrm, int *reg_ptr, int *offset_ptr)
+{
+    int havesib;
+    int havebase;
+    int base, disp;
+    int index = 0;
+    int scale = 0;
+    int reg1, reg2, opreg;
+    int mod, rm, code;
+
+    mod = (modrm >> 6) & 3;
+    rm = modrm & 7;
+
+    if (s->aflag) {
+
+        havesib = 0;
+        havebase = 1;
+        base = rm;
+        
+        if (base == 4) {
+            havesib = 1;
+            code = ldub(s->pc++);
+            scale = (code >> 6) & 3;
+            index = (code >> 3) & 7;
+            base = code & 7;
+        }
+
+        switch (mod) {
+        case 0:
+            if (base == 5) {
+                havebase = 0;
+                disp = ldl(s->pc);
+                s->pc += 4;
+            } else {
+                disp = 0;
+            }
+            break;
+        case 1:
+            disp = (int8_t)ldub(s->pc++);
+            break;
+        default:
+        case 2:
+            disp = ldl(s->pc);
+            s->pc += 4;
+            break;
+        }
+
+        reg1 = OR_ZERO;
+        reg2 = OR_ZERO;
+          
+        if (havebase || (havesib && (index != 4 || scale != 0))) {
+            if (havebase)
+                reg1 = OR_EAX + base;
+            if (havesib && index != 4) {
+                if (havebase)
+                    reg2 = index + OR_EAX;
+                else
+                    reg1 = index + OR_EAX;
+            }
+        }
+        /* XXX: disp only ? */
+        if (reg2 == OR_ZERO) {
+            /* op: disp + (reg1 << scale) */
+            if (reg1 == OR_ZERO) {
+                gen_op1_movl_A0_im(disp);
+            } else if (scale == 0 && disp == 0) {
+                gen_op_movl_A0_reg[reg1]();
+            } else {
+                gen_op_addl_A0_reg_sN[scale][reg1]();
+            }
+        } else {
+            /* op: disp + reg1 + (reg2 << scale) */
+            if (disp != 0) {
+                gen_op1_movl_A0_im(disp);
+                gen_op_addl_A0_reg_sN[0][reg1]();
+            } else {
+                gen_op_movl_A0_reg[reg1]();
+            }
+            gen_op_addl_A0_reg_sN[scale][reg2]();
+        }
+        opreg = OR_A0;
+    } else {
+        fprintf(stderr, "16 bit addressing not supported\n");
+        disp = 0;
+        opreg = 0;
+    }
+    *reg_ptr = opreg;
+    *offset_ptr = disp;
+}
+
+/* generate modrm memory load or store of 'reg'. TMP0 is used if reg !=
+   OR_TMP0 */
+static void gen_ldst_modrm(DisasContext *s, int modrm, int ot, int reg, int is_store)
+{
+    int mod, rm, opreg, disp;
+
+    mod = (modrm >> 6) & 3;
+    rm = modrm & 7;
+    if (mod == 3) {
+        if (is_store) {
+            if (reg != OR_TMP0)
+                gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_mov_reg_T0[ot][rm]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+            if (reg != OR_TMP0)
+                gen_op_mov_reg_T0[ot][reg]();
+        }
+    } else {
+        gen_lea_modrm(s, modrm, &opreg, &disp);
+        if (is_store) {
+            if (reg != OR_TMP0)
+                gen_op_mov_TN_reg[ot][0][reg]();
+            gen_op_st_T0_A0[ot]();
+        } else {
+            gen_op_ld_T0_A0[ot]();
+            if (reg != OR_TMP0)
+                gen_op_mov_reg_T0[ot][reg]();
+        }
+    }
+}
+
+static inline uint32_t insn_get(DisasContext *s, int ot)
+{
+    uint32_t ret;
+
+    switch(ot) {
+    case OT_BYTE:
+        ret = ldub(s->pc);
+        s->pc++;
+        break;
+    case OT_WORD:
+        ret = lduw(s->pc);
+        s->pc += 2;
+        break;
+    default:
+    case OT_LONG:
+        ret = ldl(s->pc);
+        s->pc += 4;
+        break;
+    }
+    return ret;
+}
+
+static void gen_jcc(DisasContext *s, int b, int val)
+{
+    int inv, jcc_op;
+    GenOpFunc2 *func;
+
+    inv = b & 1;
+    jcc_op = (b >> 1) & 7;
+    switch(s->cc_op) {
+        /* we optimize the cmp/jcc case */
+    case CC_OP_SUBB:
+    case CC_OP_SUBW:
+    case CC_OP_SUBL:
+        func = gen_jcc_sub[s->cc_op - CC_OP_SUBB][jcc_op];
+        if (!func)
+            goto slow_jcc;
+        break;
+        
+        /* some jumps are easy to compute */
+    case CC_OP_ADDB:
+    case CC_OP_ADDW:
+    case CC_OP_ADDL:
+    case CC_OP_LOGICB:
+    case CC_OP_LOGICW:
+    case CC_OP_LOGICL:
+    case CC_OP_INCB:
+    case CC_OP_INCW:
+    case CC_OP_INCL:
+    case CC_OP_DECB:
+    case CC_OP_DECW:
+    case CC_OP_DECL:
+    case CC_OP_SHLB:
+    case CC_OP_SHLW:
+    case CC_OP_SHLL:
+        switch(jcc_op) {
+        case JCC_Z:
+            func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op];
+            break;
+        case JCC_S:
+            func = gen_jcc_sub[(s->cc_op - CC_OP_ADDB) % 3][jcc_op];
+            break;
+        default:
+            goto slow_jcc;
+        }
+        break;
+    default:
+    slow_jcc:
+        if (s->cc_op != CC_OP_DYNAMIC)
+            op_set_cc_op(s->cc_op);
+        func = gen_jcc_slow[jcc_op];
+        break;
+    }
+    if (!inv) {
+        func(val, (long)s->pc);
+    } else {
+        func((long)s->pc, val);
+    }
+}
+
+static void gen_setcc(DisasContext *s, int b)
+{
+    int inv, jcc_op;
+    GenOpFunc *func;
+
+    inv = b & 1;
+    jcc_op = (b >> 1) & 7;
+    switch(s->cc_op) {
+        /* we optimize the cmp/jcc case */
+    case CC_OP_SUBB:
+    case CC_OP_SUBW:
+    case CC_OP_SUBL:
+        func = gen_setcc_sub[s->cc_op - CC_OP_SUBB][jcc_op];
+        if (!func)
+            goto slow_jcc;
+        break;
+        
+        /* some jumps are easy to compute */
+    case CC_OP_ADDB:
+    case CC_OP_ADDW:
+    case CC_OP_ADDL:
+    case CC_OP_LOGICB:
+    case CC_OP_LOGICW:
+    case CC_OP_LOGICL:
+    case CC_OP_INCB:
+    case CC_OP_INCW:
+    case CC_OP_INCL:
+    case CC_OP_DECB:
+    case CC_OP_DECW:
+    case CC_OP_DECL:
+    case CC_OP_SHLB:
+    case CC_OP_SHLW:
+    case CC_OP_SHLL:
+        switch(jcc_op) {
+        case JCC_Z:
+            func = gen_setcc_sub[s->cc_op - CC_OP_ADDB][jcc_op];
+            break;
+        case JCC_S:
+            func = gen_setcc_sub[s->cc_op - CC_OP_ADDB][jcc_op];
+            break;
+        default:
+            goto slow_jcc;
+        }
+        break;
+    default:
+    slow_jcc:
+        if (s->cc_op != CC_OP_DYNAMIC)
+            op_set_cc_op(s->cc_op);
+        func = gen_setcc_slow[jcc_op];
+        break;
+    }
+    func();
+    if (inv) {
+        gen_op_xor_T0_1();
+    }
+}
+
+/* return the size of the intruction. Return -1 if no insn found */
+int disas_insn(DisasContext *s, uint8_t *pc_start)
+{
+    int b, prefixes, aflag, dflag;
+    int shift, ot;
+    int modrm, reg, rm, mod, reg_addr, op, opreg, offset_addr, val;
+
+    s->pc = pc_start;
+    prefixes = 0;
+    aflag = 1;
+    dflag = 1;
+    //    cur_pc = s->pc; /* for insn generation */
+ next_byte:
+    b = ldub(s->pc);
+    if (b < 0)
+        return -1;
+    s->pc++;
+    /* check prefixes */
+    switch (b) {
+    case 0xf3:
+        prefixes |= PREFIX_REPZ;
+        goto next_byte;
+    case 0xf2:
+        prefixes |= PREFIX_REPNZ;
+        goto next_byte;
+    case 0xf0:
+        prefixes |= PREFIX_LOCK;
+        goto next_byte;
+    case 0x2e:
+        prefixes |= PREFIX_CS;
+        goto next_byte;
+    case 0x36:
+        prefixes |= PREFIX_SS;
+        goto next_byte;
+    case 0x3e:
+        prefixes |= PREFIX_DS;
+        goto next_byte;
+    case 0x26:
+        prefixes |= PREFIX_ES;
+        goto next_byte;
+    case 0x64:
+        prefixes |= PREFIX_FS;
+        goto next_byte;
+    case 0x65:
+        prefixes |= PREFIX_GS;
+        goto next_byte;
+    case 0x66:
+        prefixes |= PREFIX_DATA;
+        goto next_byte;
+    case 0x67:
+        prefixes |= PREFIX_ADR;
+        goto next_byte;
+    case 0x9b:
+        prefixes |= PREFIX_FWAIT;
+        goto next_byte;
+    }
+
+    if (prefixes & PREFIX_DATA)
+        dflag ^= 1;
+    if (prefixes & PREFIX_ADR)
+        aflag ^= 1;
+
+    s->prefix = prefixes;
+    s->aflag = aflag;
+    s->dflag = dflag;
+
+    /* now check op code */
+ reswitch:
+    switch(b) {
+    case 0x0f:
+        /**************************/
+        /* extended op code */
+        b = ldub(s->pc++) | 0x100;
+        goto reswitch;
+        
+        /**************************/
+        /* arith & logic */
+    case 0x00 ... 0x05:
+    case 0x08 ... 0x0d:
+    case 0x10 ... 0x15:
+    case 0x18 ... 0x1d:
+    case 0x20 ... 0x25:
+    case 0x28 ... 0x2d:
+    case 0x30 ... 0x35:
+    case 0x38 ... 0x3d:
+        {
+            int op, f, val;
+            op = (b >> 3) & 7;
+            f = (b >> 1) & 3;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag ? OT_LONG : OT_WORD;
+            
+            switch(f) {
+            case 0: /* OP Ev, Gv */
+                modrm = ldub(s->pc++);
+                reg = ((modrm >> 3) & 7) + OR_EAX;
+                mod = (modrm >> 6) & 3;
+                rm = modrm & 7;
+                if (mod != 3) {
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_op_ld_T0_A0[ot]();
+                    opreg = OR_TMP0;
+                } else {
+                    opreg = OR_EAX + rm;
+                }
+                gen_op(s, op, ot, opreg, reg);
+                if (mod != 3 && op != 7) {
+                    gen_op_st_T0_A0[ot]();
+                }
+                break;
+            case 1: /* OP Gv, Ev */
+                modrm = ldub(s->pc++);
+                mod = (modrm >> 6) & 3;
+                reg = ((modrm >> 3) & 7) + OR_EAX;
+                rm = modrm & 7;
+                if (mod != 3) {
+                    gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                    gen_op_ld_T1_A0[ot]();
+                    opreg = OR_TMP1;
+                } else {
+                    opreg = OR_EAX + rm;
+                }
+                gen_op(s, op, ot, reg, opreg);
+                break;
+            case 2: /* OP A, Iv */
+                val = insn_get(s, ot);
+                gen_opi(s, op, ot, OR_EAX, val);
+                break;
+            }
+        }
+        break;
+
+    case 0x80: /* GRP1 */
+    case 0x81:
+    case 0x83:
+        {
+            int val;
+
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag ? OT_LONG : OT_WORD;
+            
+            modrm = ldub(s->pc++);
+            mod = (modrm >> 6) & 3;
+            rm = modrm & 7;
+            op = (modrm >> 3) & 7;
+            
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_T0_A0[ot]();
+                opreg = OR_TMP0;
+            } else {
+                opreg = rm + OR_EAX;
+            }
+
+            switch(b) {
+            default:
+            case 0x80:
+            case 0x81:
+                val = insn_get(s, ot);
+                break;
+            case 0x83:
+                val = (int8_t)insn_get(s, OT_BYTE);
+                break;
+            }
+
+            gen_opi(s, op, ot, opreg, val);
+            if (op != 7 && mod != 3) {
+                gen_op_st_T0_A0[ot]();
+            }
+        }
+        break;
+
+        /**************************/
+        /* inc, dec, and other misc arith */
+    case 0x40 ... 0x47: /* inc Gv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        gen_inc(s, ot, OR_EAX + (b & 7), 1);
+        break;
+    case 0x48 ... 0x4f: /* dec Gv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        gen_inc(s, ot, OR_EAX + (b & 7), -1);
+        break;
+    case 0xf6: /* GRP3 */
+    case 0xf7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+
+        modrm = ldub(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        op = (modrm >> 3) & 7;
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0[ot]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+        }
+
+        switch(op) {
+        case 0: /* test */
+            val = insn_get(s, ot);
+            gen_op1_movl_T1_im(val);
+            gen_op_testl_T0_T1_cc();
+            s->cc_op = CC_OP_LOGICB + ot;
+            break;
+        case 2: /* not */
+            gen_op_notl_T0();
+            if (mod != 3) {
+                gen_op_st_T0_A0[ot]();
+            } else {
+                gen_op_mov_reg_T0[ot][rm]();
+            }
+            break;
+        case 3: /* neg */
+            gen_op_negl_T0_cc();
+            if (mod != 3) {
+                gen_op_st_T0_A0[ot]();
+            } else {
+                gen_op_mov_reg_T0[ot][rm]();
+            }
+            s->cc_op = CC_OP_SUBB + ot;
+            break;
+        case 4: /* mul */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_mulb_AL_T0();
+                break;
+            case OT_WORD:
+                gen_op_mulw_AX_T0();
+                break;
+            default:
+            case OT_LONG:
+                gen_op_mull_EAX_T0();
+                break;
+            }
+            break;
+        case 5: /* imul */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_imulb_AL_T0();
+                break;
+            case OT_WORD:
+                gen_op_imulw_AX_T0();
+                break;
+            default:
+            case OT_LONG:
+                gen_op_imull_EAX_T0();
+                break;
+            }
+            break;
+        case 6: /* div */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_divb_AL_T0();
+                break;
+            case OT_WORD:
+                gen_op_divw_AX_T0();
+                break;
+            default:
+            case OT_LONG:
+                gen_op_divl_EAX_T0();
+                break;
+            }
+            break;
+        case 7: /* idiv */
+            switch(ot) {
+            case OT_BYTE:
+                gen_op_idivb_AL_T0();
+                break;
+            case OT_WORD:
+                gen_op_idivw_AX_T0();
+                break;
+            default:
+            case OT_LONG:
+                gen_op_idivl_EAX_T0();
+                break;
+            }
+            break;
+        default:
+            error("GRP3: bad instruction");
+            return -1;
+        }
+        break;
+
+    case 0xfe: /* GRP4 */
+    case 0xff: /* GRP5 */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+
+        modrm = ldub(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        op = (modrm >> 3) & 7;
+        if (op >= 2 && b == 0xfe) {
+            error("GRP4: bad instruction");
+            return -1;
+        }
+        if (mod != 3) {
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            gen_op_ld_T0_A0[ot]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][rm]();
+        }
+
+        switch(op) {
+        case 0: /* inc Ev */
+            gen_inc(s, ot, OR_TMP0, 1);
+            if (mod != 3)
+                gen_op_st_T0_A0[ot]();
+            break;
+        case 1: /* dec Ev */
+            gen_inc(s, ot, OR_TMP0, -1);
+            if (mod != 3)
+                gen_op_st_T0_A0[ot]();
+            break;
+        case 2: /* call Ev */
+            gen_op1_movl_T1_im((long)s->pc);
+            gen_op_pushl_T1();
+            gen_op_jmp_T0();
+            break;
+        case 4: /* jmp Ev */
+            gen_op_jmp_T0();
+            break;
+        case 6: /* push Ev */
+            gen_op_pushl_T0();
+            break;
+        default:
+            error("GRP5: bad instruction");
+            return -1;
+        }
+        break;
+
+    case 0x84: /* test Ev, Gv */
+    case 0x85: 
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+
+        modrm = ldub(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        reg = (modrm >> 3) & 7;
+        
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_op_mov_TN_reg[ot][1][reg + OR_EAX]();
+        gen_op_testl_T0_T1_cc();
+        s->cc_op = CC_OP_LOGICB + ot;
+        break;
+        
+    case 0xa8: /* test eAX, Iv */
+    case 0xa9:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        val = insn_get(s, ot);
+
+        gen_op_mov_TN_reg[ot][0][OR_EAX]();
+        gen_op1_movl_T1_im(val);
+        gen_op_testl_T0_T1_cc();
+        s->cc_op = CC_OP_LOGICB + ot;
+        break;
+        
+    case 0x98: /* CWDE/CBW */
+        if (dflag)
+            gen_op_movswl_EAX_AX();
+        else
+            gen_op_movsbw_AX_AL();
+        break;
+    case 0x99: /* CDQ/CWD */
+        if (dflag)
+            gen_op_movslq_EDX_EAX();
+        else
+            gen_op_movswl_DX_AX();
+        break;
+    case 0x1af: /* imul Gv, Ev */
+    case 0x69: /* imul Gv, Ev, I */
+    case 0x6b:
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub(s->pc++);
+        reg = ((modrm >> 3) & 7) + OR_EAX;
+        
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        if (b == 0x69) {
+            val = insn_get(s, ot);
+            gen_op1_movl_T1_im(val);
+        } else if (b == 0x6b) {
+            val = insn_get(s, OT_BYTE);
+            gen_op1_movl_T1_im(val);
+        } else {
+            gen_op_mov_TN_reg[ot][1][reg]();
+        }
+
+        if (ot == OT_LONG) {
+            op_imull_T0_T1();
+        } else {
+            op_imulw_T0_T1();
+        }
+        gen_op_mov_reg_T0[ot][reg]();
+        break;
+        
+        /**************************/
+        /* push/pop */
+    case 0x50 ... 0x57: /* push */
+        gen_op_mov_TN_reg[OT_LONG][0][(b & 7)]();
+        gen_op_pushl_T0();
+        break;
+    case 0x58 ... 0x5f: /* pop */
+        gen_op_popl_T0();
+        gen_op_mov_reg_T0[OT_LONG][reg]();
+        break;
+    case 0x68: /* push Iv */
+    case 0x6a:
+        ot = dflag ? OT_LONG : OT_WORD;
+        if (b == 0x68)
+            val = insn_get(s, ot);
+        else
+            val = (int8_t)insn_get(s, OT_BYTE);
+        gen_op1_movl_T0_im(val);
+        gen_op_pushl_T0();
+        break;
+    case 0x8f: /* pop Ev */
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub(s->pc++);
+        gen_op_popl_T0();
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+        break;
+    case 0xc9: /* leave */
+        gen_op_mov_TN_reg[OT_LONG][0][R_EBP]();
+        gen_op_mov_reg_T0[OT_LONG][R_ESP]();
+        gen_op_popl_T0();
+        gen_op_mov_reg_T0[OT_LONG][R_EBP]();
+        break;
+        /**************************/
+        /* mov */
+    case 0x88:
+    case 0x89: /* mov Gv, Ev */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub(s->pc++);
+        reg = (modrm >> 3) & 7;
+        
+        /* generate a generic store */
+        gen_ldst_modrm(s, modrm, ot, OR_EAX + reg, 1);
+        break;
+    case 0xc6:
+    case 0xc7: /* mov Ev, Iv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub(s->pc++);
+        mod = (modrm >> 6) & 3;
+
+        val = insn_get(s, ot);
+        gen_op1_movl_T0_im(val);
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 1);
+        break;
+    case 0x8a:
+    case 0x8b: /* mov Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub(s->pc++);
+        reg = (modrm >> 3) & 7;
+        
+        gen_ldst_modrm(s, modrm, ot, OR_TMP0, 0);
+        gen_op_mov_reg_T0[ot][reg]();
+        break;
+
+    case 0x1b6: /* movzbS Gv, Eb */
+    case 0x1b7: /* movzwS Gv, Eb */
+    case 0x1be: /* movsbS Gv, Eb */
+    case 0x1bf: /* movswS Gv, Eb */
+        {
+            int d_ot;
+            /* d_ot is the size of destination */
+            d_ot = dflag + OT_WORD;
+            /* ot is the size of source */
+            ot = (b & 1) + OT_BYTE;
+            modrm = ldub(s->pc++);
+            reg = ((modrm >> 3) & 7) + OR_EAX;
+            mod = (modrm >> 6) & 3;
+            rm = modrm & 7;
+            
+            if (mod == 3) {
+                gen_op_mov_TN_reg[ot][0][rm]();
+                switch(ot | (b & 8)) {
+                case OT_BYTE:
+                    gen_op_movzbl_T0_T0();
+                    break;
+                case OT_BYTE | 8:
+                    gen_op_movsbl_T0_T0();
+                    break;
+                case OT_WORD:
+                    gen_op_movzwl_T0_T0();
+                    break;
+                default:
+                case OT_WORD | 8:
+                    gen_op_movswl_T0_T0();
+                    break;
+                }
+                gen_op_mov_reg_T0[d_ot][reg]();
+            } else {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                if (b & 8) {
+                    gen_op_lds_T0_A0[ot]();
+                } else {
+                    gen_op_ldu_T0_A0[ot]();
+                }
+                gen_op_mov_reg_T0[d_ot][reg]();
+            }
+        }
+        break;
+
+    case 0x8d: /* lea */
+        ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub(s->pc++);
+        reg = (modrm >> 3) & 7;
+
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_op_mov_reg_A0[ot - OT_WORD][reg]();
+        break;
+        
+    case 0xa0: /* mov EAX, Ov */
+    case 0xa1:
+    case 0xa2: /* mov Ov, EAX */
+    case 0xa3:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (s->aflag)
+            offset_addr = insn_get(s, OT_LONG);
+        else
+            offset_addr = insn_get(s, OT_WORD);
+            
+        if ((b & 2) == 0) {
+            gen_op_ld_T0_A0[ot]();
+            gen_op_mov_reg_T0[ot][R_EAX]();
+        } else {
+            gen_op_mov_TN_reg[ot][0][R_EAX]();
+            gen_op_st_T0_A0[ot]();
+        }
+        break;
+
+    case 0xb0 ... 0xb7: /* mov R, Ib */
+        val = insn_get(s, OT_BYTE);
+        gen_op1_movl_T0_im(val);
+        gen_op_mov_reg_T0[OT_BYTE][b & 7]();
+        break;
+    case 0xb8 ... 0xbf: /* mov R, Iv */
+        ot = dflag ? OT_LONG : OT_WORD;
+        val = insn_get(s, ot);
+        reg = OR_EAX + (b & 7);
+        gen_op1_movl_T0_im(val);
+        gen_op_mov_reg_T0[ot][reg]();
+        break;
+
+    case 0x91 ... 0x97: /* xchg R, EAX */
+        ot = dflag ? OT_LONG : OT_WORD;
+        reg = b & 7;
+        gen_op_mov_TN_reg[ot][0][reg]();
+        gen_op_mov_TN_reg[ot][1][R_EAX]();
+        gen_op_mov_reg_T0[ot][R_EAX]();
+        gen_op_mov_reg_T1[ot][reg]();
+        break;
+    case 0x86:
+    case 0x87: /* xchg Ev, Gv */
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        modrm = ldub(s->pc++);
+        reg = (modrm >> 3) & 7;
+
+        gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+        gen_op_mov_TN_reg[ot][0][reg]();
+        gen_op_ld_T1_A0[ot]();
+        gen_op_st_T0_A0[ot]();
+        gen_op_mov_reg_T1[ot][reg]();
+        break;
+        
+        /************************/
+        /* shifts */
+    case 0xc0:
+    case 0xc1:
+        /* shift Ev,Ib */
+        shift = 2;
+    grp2:
+        {
+            if ((b & 1) == 0)
+                ot = OT_BYTE;
+            else
+                ot = dflag ? OT_LONG : OT_WORD;
+            
+            modrm = ldub(s->pc++);
+            mod = (modrm >> 6) & 3;
+            rm = modrm & 7;
+            op = (modrm >> 3) & 7;
+            
+            if (mod != 3) {
+                gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+                gen_op_ld_T0_A0[ot]();
+                opreg = OR_TMP0;
+            } else {
+                opreg = rm + OR_EAX;
+            }
+
+            /* simpler op */
+            if (shift == 0) {
+                gen_shift(s, op, ot, opreg, OR_ECX);
+            } else {
+                if (shift == 2) {
+                    shift = ldub(s->pc++);
+                }
+                gen_shifti(s, op, ot, opreg, shift);
+            }
+
+            if (mod != 3) {
+                gen_op_st_T0_A0[ot]();
+            }
+        }
+        break;
+    case 0xd0:
+    case 0xd1:
+        /* shift Ev,1 */
+        shift = 1;
+        goto grp2;
+    case 0xd2:
+    case 0xd3:
+        /* shift Ev,cl */
+        shift = 0;
+        goto grp2;
+
+        /************************/
+        /* floats */
+#if 0        
+    case 0xd8 ... 0xdf: 
+        modrm = ldub(s->pc++);
+        mod = (modrm >> 6) & 3;
+        rm = modrm & 7;
+        op = ((b & 7) << 3) | ((modrm >> 3) & 7);
+        
+        if (mod != 3) {
+            /* memory op */
+            gen_lea_modrm(s, modrm, &reg_addr, &offset_addr);
+            switch(op) {
+            case 0x00 ... 0x07: /* fxxxs */
+            case 0x10 ... 0x17: /* fixxxl */
+            case 0x20 ... 0x27: /* fxxxl */
+            case 0x30 ... 0x37: /* fixxx */
+                {
+                    int op1, swap;
+                    op1 = fp_ops[op & 7];
+
+                    swap = 0;
+                    if ((op & 7) == 5 || (op & 7) == 7)
+                        swap = 1;
+
+                    switch(op >> 4) {
+                    case 0:
+                        ot = OT_LONG;
+                        is_int = 0;
+                        break;
+                    case 1:
+                        ot = OT_LONG;
+                        is_int = 1;
+                        break;
+                    case 2:
+                        ot = OT_QUAD;
+                        is_int = 0;
+                        break;
+                    case 3:
+                    default:
+                        ot = OT_WORD;
+                        is_int = 1;
+                        break;
+                    }
+                    
+                    /* if integer, needs to convert to float */
+                    if (is_int) {
+                        /* XXX: potential loss of precision if large integer */
+                        gen_ld(OP_LDUB + ot, OR_TMP0, reg_addr, offset_addr);
+                        gen_insn2(OP_I2FL, OR_FTMP0, OR_TMP0);
+                    } else {
+                        gen_ld(OP_LDUB + ot, OR_FTMP0, reg_addr, offset_addr);
+                    }
+                    if (ot != OT_QUAD)
+                        op1 += OP_FADDL - OP_FADDQ;
+
+                    if (!swap)
+                        gen_insn3(op1, OR_ST0, OR_ST0, OR_FTMP0);
+                    else
+                        gen_insn3(op1, OR_ST0, OR_FTMP0, OR_ST0);
+                        
+                    if ((op & 7) == 3) {
+                        /* fcomp needs pop */
+                        gen_insn0(OP_FPOP);
+                    }
+                }
+                break;
+            case 0x08: /* flds */
+            case 0x0a: /* fsts */
+            case 0x0b: /* fstps */
+            case 0x18: /* fildl */
+            case 0x1a: /* fistl */
+            case 0x1b: /* fistpl */
+            case 0x28: /* fldl */
+            case 0x2a: /* fstl */
+            case 0x2b: /* fstpl */
+            case 0x38: /* filds */
+            case 0x3a: /* fists */
+            case 0x3b: /* fistps */
+                
+                switch(op >> 4) {
+                case 0:
+                    ot = OT_LONG;
+                    is_int = 0;
+                    break;
+                case 1:
+                    ot = OT_LONG;
+                    is_int = 1;
+                    break;
+                case 2:
+                    ot = OT_QUAD;
+                    is_int = 0;
+                    break;
+                case 3:
+                default:
+                    ot = OT_WORD;
+                    is_int = 1;
+                    break;
+                }
+
+                switch(op & 7) {
+                case 0:
+                    gen_insn0(OP_FPUSH);
+                    if (is_int) {
+                        /* XXX: potential loss of precision */
+                        gen_ld(OP_LDUB + ot, OR_TMP0, reg_addr, offset_addr);
+                        gen_insn2(OP_I2FL, OR_ST0, OR_TMP0);
+                    } else {
+                        gen_ld(OP_LDUB + ot, OR_ST0, reg_addr, offset_addr);
+                    }
+                    break;
+                default:
+                    if (is_int) {
+                        gen_insn2(OP_F2IL, OR_TMP0, OR_ST0);
+                        gen_st(OP_STB + ot, OR_TMP0, reg_addr, offset_addr);
+                    } else {
+                        gen_st(OP_STB + ot, OR_ST0, reg_addr, offset_addr);
+                    }
+                    if ((op & 7) == 3)
+                        gen_insn0(OP_FPOP);
+                    break;
+                }
+                break;
+            case 0x2f: /* fnstsw mem */
+                gen_insn3(OP_FNSTS, OR_TMP0, OR_ZERO, OR_ZERO);
+                gen_st(OP_STW, OR_TMP0, reg_addr, offset_addr);
+                break;
+
+            case 0x3c: /* fbld */
+            case 0x3e: /* fbstp */
+                error("float BCD not hanlded");
+                return -1;
+            case 0x3d: /* fildll */
+                gen_insn0(OP_FPUSH);
+                gen_ld(OP_LDQ, OR_TMP0, reg_addr, offset_addr);
+                gen_insn2(OP_I2FQ, OR_ST0, OR_TMP0);
+                break;
+            case 0x3f: /* fistpll */
+                gen_insn2(OP_F2IQ, OR_TMP0, OR_ST0);
+                gen_st(OP_STQ, OR_TMP0, reg_addr, offset_addr);
+                gen_insn0(OP_FPOP);
+                break;
+            default:
+                error("unhandled memory FP\n");
+                return -1;
+            }
+        } else {
+            /* register float ops */
+            opreg = rm + OR_ST0;
+
+            switch(op) {
+            case 0x08: /* fld sti */
+                gen_insn0(OP_FPUSH);
+                gen_mov(OR_ST0, OR_ST0 + ((rm + 1) & 7));
+                break;
+            case 0x09: /* fxchg sti */
+                gen_mov(OR_TMP0, OR_ST0);
+                gen_mov(OR_ST0, opreg);
+                gen_mov(opreg, OR_TMP0);
+                break;
+            case 0x0a: /* grp d9/2 */
+                switch(rm) {
+                case 0: /* fnop */
+                    gen_insn0(OP_NOP);
+                    break;
+                default:
+                    error("unhandled FP GRP d9/2\n");
+                    return -1;
+                }
+                break;
+            case 0x0c: /* grp d9/4 */
+                switch(rm) {
+                case 0: /* fchs */
+                    gen_insn3(OP_FSUBQ, OR_ST0, OR_ZERO, OR_ST0);
+                    break;
+                case 1: /* fabs */
+                    gen_insn2(OP_FABSQ, OR_ST0, OR_ST0);
+                    break;
+                case 4: /* ftst */
+                    gen_insn3(OP_CMP, OR_ZERO, OR_ST0, OR_ZERO);
+                    break;
+                case 5: /* fxam */
+                    gen_insn3(OP_FSPECIAL, OR_ZERO, OR_ST0, OR_ZERO);
+                    break;
+                default:
+                    return -1;
+                }
+                break;
+            case 0x0d: /* grp d9/5 */
+                {
+                    if (rm == 7) {
+                        error("bad GRP d9/5");
+                        return -1;
+                    }
+                    /* XXX: needs constant load or symbol table */
+                    gen_insn0(OP_FPUSH);
+                    gen_ld(OP_LDQ, OR_ST0, OR_ZERO, 
+                               (rm * 8) + FLOAT_CONST_ADDR);
+                }
+                break;
+            case 0x0e: /* grp d9/6 */
+                switch(rm) {
+                case 0: /* f2xm1 */
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ZERO);
+                    break;
+                case 1: /* fyl2x */
+                    gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST0, OR_ST1);
+                    gen_insn0(OP_FPOP);
+                    break;
+                case 2: /* fptan */
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ZERO);
+                    gen_insn0(OP_FPUSH);
+                    /* load one */
+                    gen_ld(OP_LDQ, OR_ST0, OR_ZERO, 
+                               (0 * 8) + FLOAT_CONST_ADDR);
+                    break;
+                case 3: /* fpatan */
+                    gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST0, OR_ST1);
+                    gen_insn0(OP_FPOP);
+                    break;
+                case 4: /* fxtract */
+                    gen_insn0(OP_FPUSH);
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST1, OR_ZERO);
+                    gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST1, OR_ZERO);
+                    break;
+                case 5: /* fprem1 */
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ST1);
+                    break;
+                case 6: /* fdecstp */
+                    gen_insn0(OP_FPUSH);
+                    break;
+                default:
+                case 7: /* fdecstp */
+                    gen_insn0(OP_FPOP);
+                    break;
+                }
+                break;
+            case 0x0f: /* grp d9/7 */
+                switch(rm) {
+                case 0: /* fprem */
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ST1);
+                    break;
+                case 1: /* fyl2xp1 */
+                    gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST0, OR_ST1);
+                    gen_insn0(OP_FPOP);
+                    break;
+                case 3: /* fsincos */
+                    gen_insn0(OP_FPUSH);
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST1, OR_ZERO);
+                    gen_insn3(OP_FSPECIAL, OR_ST1, OR_ST1, OR_ZERO);
+                    break;
+                case 5: /* fscale */
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ST1);
+                    break;
+                case 2: /* fsqrt */
+                case 4: /* frndint */
+                case 6: /* fsin */
+                default:
+                case 7: /* fcos */
+                    gen_insn3(OP_FSPECIAL, OR_ST0, OR_ST0, OR_ZERO);
+                    break;
+                }
+                break;
+            case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */
+            case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */
+            case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */
+                {
+                    int op1, swap;
+                    
+                    op1 = fp_ops[op & 7];
+                    swap = 0;
+                    if ((op & 7) == 5 || (op & 7) == 7)
+                        swap = 1;
+                    if (op >= 0x20) {
+                        if (swap) 
+                            gen_insn3(op1, opreg, OR_ST0, opreg);
+                        else
+                            gen_insn3(op1, opreg, opreg, OR_ST0);
+                    } else {
+                        if (swap)
+                            gen_insn3(op1, OR_ST0, opreg, OR_ST0);
+                        else
+                            gen_insn3(op1, OR_ST0, OR_ST0, opreg);
+                    }
+                    if (op >= 0x30)
+                        gen_insn0(OP_FPOP);
+                }
+                break;
+            case 0x02: /* fcom */
+                gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg);
+                break;
+            case 0x03: /* fcomp */
+                gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg);
+                gen_insn0(OP_FPOP);
+                break;
+            case 0x15: /* da/5 */
+                switch(rm) {
+                case 1: /* fucompp */
+                    gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg);
+                    gen_insn0(OP_FPOP);
+                    gen_insn0(OP_FPOP);
+                    break;
+                default:
+                    return -1;
+                }
+                break;
+            case 0x2a: /* fst sti */
+                gen_mov(opreg, OR_ST0);
+                break;
+            case 0x2b: /* fstp sti */
+                gen_mov(opreg, OR_ST0);
+                gen_insn0(OP_FPOP);
+                break;
+            case 0x33: /* de/3 */
+                switch(rm) {
+                case 1: /* fcompp */
+                    gen_insn3(OP_CMP, OR_ZERO, OR_ST0, opreg);
+                    gen_insn0(OP_FPOP);
+                    gen_insn0(OP_FPOP);
+                    break;
+                default:
+                    return -1;
+                }
+                break;
+            case 0x3c: /* df/4 */
+                switch(rm) {
+                case 0:
+                    gen_insn3(OP_FNSTS, OR_EAX, OR_ZERO, OR_ZERO);
+                    break;
+                default:
+                    return -1;
+                }
+                break;
+            default:
+                error("unhandled FP\n");
+                return -1;
+            }
+        }
+        break;
+#endif
+        /************************/
+        /* string ops */
+    case 0xa4: /* movsS */
+    case 0xa5:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (prefixes & PREFIX_REPZ) {
+            gen_op_movs[3 + ot]();
+        } else {
+            gen_op_movs[ot]();
+        }
+        break;
+        
+    case 0xaa: /* stosS */
+    case 0xab:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (prefixes & PREFIX_REPZ) {
+            gen_op_stos[3 + ot]();
+        } else {
+            gen_op_stos[ot]();
+        }
+        break;
+    case 0xac: /* lodsS */
+    case 0xad:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (prefixes & PREFIX_REPZ) {
+            gen_op_lods[3 + ot]();
+        } else {
+            gen_op_lods[ot]();
+        }
+        break;
+    case 0xae: /* scasS */
+    case 0xaf:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (prefixes & PREFIX_REPNZ) {
+            gen_op_scas[6 + ot]();
+        } else if (prefixes & PREFIX_REPZ) {
+            gen_op_scas[3 + ot]();
+        } else {
+            gen_op_scas[ot]();
+        }
+        break;
+
+    case 0xa6: /* cmpsS */
+    case 0xa7:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (prefixes & PREFIX_REPNZ) {
+            gen_op_cmps[6 + ot]();
+        } else if (prefixes & PREFIX_REPZ) {
+            gen_op_cmps[3 + ot]();
+        } else {
+            gen_op_cmps[ot]();
+        }
+        break;
+        
+    case 0x6c: /* insS */
+    case 0x6d:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (prefixes & PREFIX_REPZ) {
+            gen_op_ins[3 + ot]();
+        } else {
+            gen_op_ins[ot]();
+        }
+        break;
+    case 0x6e: /* outsS */
+    case 0x6f:
+        if ((b & 1) == 0)
+            ot = OT_BYTE;
+        else
+            ot = dflag ? OT_LONG : OT_WORD;
+        if (prefixes & PREFIX_REPZ) {
+            gen_op_outs[3 + ot]();
+        } else {
+            gen_op_outs[ot]();
+        }
+        break;
+
+        /************************/
+        /* control */
+    case 0xc2: /* ret im */
+        /* XXX: handle stack pop ? */
+        val = ldsw(s->pc);
+        s->pc += 2;
+        gen_op_popl_T0();
+        gen_op_addl_ESP_im(val);
+        gen_op_jmp_T0();
+        break;
+    case 0xc3: /* ret */
+        gen_op_popl_T0();
+        gen_op_jmp_T0();
+        break;
+    case 0xe8: /* call */
+        val = insn_get(s, OT_LONG);
+        val += (long)s->pc;
+        gen_op1_movl_T1_im((long)s->pc);
+        gen_op_pushl_T1();
+        gen_op_jmp_im(val);
+        break;
+    case 0xe9: /* jmp */
+        val = insn_get(s, OT_LONG);
+        val += (long)s->pc;
+        gen_op_jmp_im(val);
+        break;
+    case 0xeb: /* jmp Jb */
+        val = (int8_t)insn_get(s, OT_BYTE);
+        val += (long)s->pc;
+        gen_op_jmp_im(val);
+        break;
+    case 0x70 ... 0x7f: /* jcc Jb */
+        val = (int8_t)insn_get(s, OT_BYTE);
+        val += (long)s->pc;
+        goto do_jcc;
+    case 0x180 ... 0x18f: /* jcc Jv */
+        if (dflag) {
+            val = insn_get(s, OT_LONG);
+        } else {
+            val = (int16_t)insn_get(s, OT_WORD); 
+        }
+        val += (long)s->pc; /* XXX: fix 16 bit wrap */
+    do_jcc:
+        gen_jcc(s, b, val);
+        break;
+
+    case 0x190 ... 0x19f:
+        modrm = ldub(s->pc++);
+        gen_setcc(s, b);
+        gen_ldst_modrm(s, modrm, OT_BYTE, OR_TMP0, 1);
+        break;
+
+        /************************/
+        /* flags */
+    case 0x9c: /* pushf */
+        gen_op_movl_T0_eflags();
+        gen_op_pushl_T0();
+        break;
+    case 0x9d: /* popf */
+        gen_op_popl_T0();
+        gen_op_movl_eflags_T0();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x9e: /* sahf */
+        gen_op_mov_TN_reg[OT_BYTE][0][R_AH]();
+        if (s->cc_op != CC_OP_DYNAMIC)
+            op_set_cc_op(s->cc_op);
+        gen_op_movb_eflags_T0();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0x9f: /* lahf */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            op_set_cc_op(s->cc_op);
+        gen_op_movl_T0_eflags();
+        gen_op_mov_reg_T0[OT_BYTE][R_AH]();
+        break;
+    case 0xf5: /* cmc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            op_set_cc_op(s->cc_op);
+        gen_op_cmc();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xf8: /* clc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            op_set_cc_op(s->cc_op);
+        gen_op_clc();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xf9: /* stc */
+        if (s->cc_op != CC_OP_DYNAMIC)
+            op_set_cc_op(s->cc_op);
+        gen_op_stc();
+        s->cc_op = CC_OP_EFLAGS;
+        break;
+    case 0xfc: /* cld */
+        gen_op_cld();
+        break;
+    case 0xfd: /* std */
+        gen_op_std();
+        break;
+
+        /************************/
+        /* misc */
+    case 0x90: /* nop */
+        break;
+
+#if 0        
+    case 0x1a2: /* cpuid */
+        gen_insn0(OP_ASM);
+        break;
+#endif
+    default:
+        error("unknown opcode %x", b);
+        return -1;
+    }
+    return (long)s->pc;
+}
+