]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | //===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===// |
2 | // | |
3 | // The LLVM Compiler Infrastructure | |
4 | // | |
5 | // This file is distributed under the University of Illinois Open Source | |
6 | // License. See LICENSE.TXT for details. | |
7 | // | |
8 | //===----------------------------------------------------------------------===// | |
9 | // | |
10 | // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and | |
11 | // 64-bit X86 instruction sets. The main decode sequence for an assembly | |
12 | // instruction in this disassembler is: | |
13 | // | |
14 | // 1. Read the prefix bytes and determine the attributes of the instruction. | |
15 | // These attributes, recorded in enum attributeBits | |
16 | // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM | |
17 | // provides a mapping from bitmasks to contexts, which are represented by | |
18 | // enum InstructionContext (ibid.). | |
19 | // | |
20 | // 2. Read the opcode, and determine what kind of opcode it is. The | |
21 | // disassembler distinguishes four kinds of opcodes, which are enumerated in | |
22 | // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte | |
23 | // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a | |
24 | // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context. | |
25 | // | |
26 | // 3. Depending on the opcode type, look in one of four ClassDecision structures | |
27 | // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which | |
28 | // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get | |
29 | // a ModRMDecision (ibid.). | |
30 | // | |
31 | // 4. Some instructions, such as escape opcodes or extended opcodes, or even | |
32 | // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the | |
33 | // ModR/M byte to complete decode. The ModRMDecision's type is an entry from | |
34 | // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the | |
35 | // ModR/M byte is required and how to interpret it. | |
36 | // | |
37 | // 5. After resolving the ModRMDecision, the disassembler has a unique ID | |
38 | // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in | |
39 | // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and | |
40 | // meanings of its operands. | |
41 | // | |
42 | // 6. For each operand, its encoding is an entry from OperandEncoding | |
43 | // (X86DisassemblerDecoderCommon.h) and its type is an entry from | |
44 | // OperandType (ibid.). The encoding indicates how to read it from the | |
45 | // instruction; the type indicates how to interpret the value once it has | |
46 | // been read. For example, a register operand could be stored in the R/M | |
47 | // field of the ModR/M byte, the REG field of the ModR/M byte, or added to | |
48 | // the main opcode. This is orthogonal from its meaning (an GPR or an XMM | |
49 | // register, for instance). Given this information, the operands can be | |
50 | // extracted and interpreted. | |
51 | // | |
52 | // 7. As the last step, the disassembler translates the instruction information | |
53 | // and operands into a format understandable by the client - in this case, an | |
54 | // MCInst for use by the MC infrastructure. | |
55 | // | |
56 | // The disassembler is broken broadly into two parts: the table emitter that | |
57 | // emits the instruction decode tables discussed above during compilation, and | |
58 | // the disassembler itself. The table emitter is documented in more detail in | |
59 | // utils/TableGen/X86DisassemblerEmitter.h. | |
60 | // | |
61 | // X86Disassembler.h contains the public interface for the disassembler, | |
62 | // adhering to the MCDisassembler interface. | |
63 | // X86Disassembler.cpp contains the code responsible for step 7, and for | |
64 | // invoking the decoder to execute steps 1-6. | |
65 | // X86DisassemblerDecoderCommon.h contains the definitions needed by both the | |
66 | // table emitter and the disassembler. | |
67 | // X86DisassemblerDecoder.h contains the public interface of the decoder, | |
68 | // factored out into C for possible use by other projects. | |
69 | // X86DisassemblerDecoder.c contains the source code of the decoder, which is | |
70 | // responsible for steps 1-6. | |
71 | // | |
72 | //===----------------------------------------------------------------------===// | |
73 | ||
74 | #ifndef X86DISASSEMBLER_H | |
75 | #define X86DISASSEMBLER_H | |
76 | ||
77 | #define INSTRUCTION_SPECIFIER_FIELDS \ | |
78 | uint16_t operands; | |
79 | ||
80 | #define INSTRUCTION_IDS \ | |
81 | uint16_t instructionIDs; | |
82 | ||
83 | #include "X86DisassemblerDecoderCommon.h" | |
84 | ||
85 | #undef INSTRUCTION_SPECIFIER_FIELDS | |
86 | #undef INSTRUCTION_IDS | |
87 | ||
88 | #include "llvm/MC/MCDisassembler.h" | |
89 | ||
90 | namespace llvm { | |
91 | ||
92 | class MCInst; | |
93 | class MCInstrInfo; | |
94 | class MCSubtargetInfo; | |
95 | class MemoryObject; | |
96 | class raw_ostream; | |
97 | ||
223e47cc LB |
98 | namespace X86Disassembler { |
99 | ||
100 | /// X86GenericDisassembler - Generic disassembler for all X86 platforms. | |
101 | /// All each platform class should have to do is subclass the constructor, and | |
102 | /// provide a different disassemblerMode value. | |
103 | class X86GenericDisassembler : public MCDisassembler { | |
104 | const MCInstrInfo *MII; | |
105 | public: | |
106 | /// Constructor - Initializes the disassembler. | |
107 | /// | |
108 | /// @param mode - The X86 architecture mode to decode for. | |
109 | X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode, | |
110 | const MCInstrInfo *MII); | |
111 | private: | |
112 | ~X86GenericDisassembler(); | |
113 | public: | |
114 | ||
115 | /// getInstruction - See MCDisassembler. | |
116 | DecodeStatus getInstruction(MCInst &instr, | |
117 | uint64_t &size, | |
118 | const MemoryObject ®ion, | |
119 | uint64_t address, | |
120 | raw_ostream &vStream, | |
121 | raw_ostream &cStream) const; | |
122 | ||
223e47cc LB |
123 | private: |
124 | DisassemblerMode fMode; | |
125 | }; | |
126 | ||
127 | } // namespace X86Disassembler | |
128 | ||
129 | } // namespace llvm | |
130 | ||
131 | #endif |