]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | #===- disassembler.py - Python LLVM Bindings -----------------*- python -*--===# |
2 | # | |
3 | # The LLVM Compiler Infrastructure | |
4 | # | |
5 | # This file is distributed under the University of Illinois Open Source | |
6 | # License. See LICENSE.TXT for details. | |
7 | # | |
8 | #===------------------------------------------------------------------------===# | |
9 | ||
10 | from ctypes import CFUNCTYPE | |
11 | from ctypes import POINTER | |
12 | from ctypes import addressof | |
223e47cc LB |
13 | from ctypes import c_byte |
14 | from ctypes import c_char_p | |
15 | from ctypes import c_int | |
16 | from ctypes import c_size_t | |
17 | from ctypes import c_ubyte | |
18 | from ctypes import c_uint64 | |
19 | from ctypes import c_void_p | |
20 | from ctypes import cast | |
21 | ||
22 | from .common import LLVMObject | |
23 | from .common import c_object_p | |
24 | from .common import get_library | |
25 | ||
26 | __all__ = [ | |
27 | 'Disassembler', | |
28 | ] | |
29 | ||
30 | lib = get_library() | |
31 | callbacks = {} | |
32 | ||
970d7e83 LB |
33 | # Constants for set_options |
34 | Option_UseMarkup = 1 | |
35 | ||
1a4d82fc JJ |
36 | |
37 | ||
38 | _initialized = False | |
39 | _targets = ['AArch64', 'ARM', 'Hexagon', 'MSP430', 'Mips', 'NVPTX', 'PowerPC', 'R600', 'Sparc', 'SystemZ', 'X86', 'XCore'] | |
40 | def _ensure_initialized(): | |
41 | global _initialized | |
42 | if not _initialized: | |
43 | # Here one would want to call the functions | |
44 | # LLVMInitializeAll{TargetInfo,TargetMC,Disassembler}s, but | |
45 | # unfortunately they are only defined as static inline | |
46 | # functions in the header files of llvm-c, so they don't exist | |
47 | # as symbols in the shared library. | |
48 | # So until that is fixed use this hack to initialize them all | |
49 | for tgt in _targets: | |
50 | for initializer in ("TargetInfo", "TargetMC", "Disassembler"): | |
51 | try: | |
52 | f = getattr(lib, "LLVMInitialize" + tgt + initializer) | |
53 | except AttributeError: | |
54 | continue | |
55 | f() | |
56 | _initialized = True | |
57 | ||
58 | ||
223e47cc LB |
59 | class Disassembler(LLVMObject): |
60 | """Represents a disassembler instance. | |
61 | ||
62 | Disassembler instances are tied to specific "triple," which must be defined | |
63 | at creation time. | |
64 | ||
65 | Disassembler instances can disassemble instructions from multiple sources. | |
66 | """ | |
67 | def __init__(self, triple): | |
68 | """Create a new disassembler instance. | |
69 | ||
70 | The triple argument is the triple to create the disassembler for. This | |
71 | is something like 'i386-apple-darwin9'. | |
72 | """ | |
1a4d82fc JJ |
73 | |
74 | _ensure_initialized() | |
75 | ||
223e47cc LB |
76 | ptr = lib.LLVMCreateDisasm(c_char_p(triple), c_void_p(None), c_int(0), |
77 | callbacks['op_info'](0), callbacks['symbol_lookup'](0)) | |
1a4d82fc | 78 | if not ptr: |
223e47cc LB |
79 | raise Exception('Could not obtain disassembler for triple: %s' % |
80 | triple) | |
81 | ||
82 | LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisasmDispose) | |
83 | ||
84 | def get_instruction(self, source, pc=0): | |
85 | """Obtain the next instruction from an input source. | |
86 | ||
87 | The input source should be a str or bytearray or something that | |
88 | represents a sequence of bytes. | |
89 | ||
90 | This function will start reading bytes from the beginning of the | |
91 | source. | |
92 | ||
93 | The pc argument specifies the address that the first byte is at. | |
94 | ||
95 | This returns a 2-tuple of: | |
96 | ||
97 | long number of bytes read. 0 if no instruction was read. | |
98 | str representation of instruction. This will be the assembly that | |
99 | represents the instruction. | |
100 | """ | |
101 | buf = cast(c_char_p(source), POINTER(c_ubyte)) | |
102 | out_str = cast((c_byte * 255)(), c_char_p) | |
103 | ||
104 | result = lib.LLVMDisasmInstruction(self, buf, c_uint64(len(source)), | |
105 | c_uint64(pc), out_str, 255) | |
106 | ||
107 | return (result, out_str.value) | |
108 | ||
109 | def get_instructions(self, source, pc=0): | |
110 | """Obtain multiple instructions from an input source. | |
111 | ||
112 | This is like get_instruction() except it is a generator for all | |
113 | instructions within the source. It starts at the beginning of the | |
114 | source and reads instructions until no more can be read. | |
115 | ||
116 | This generator returns 3-tuple of: | |
117 | ||
118 | long address of instruction. | |
119 | long size of instruction, in bytes. | |
120 | str representation of instruction. | |
121 | """ | |
122 | source_bytes = c_char_p(source) | |
123 | out_str = cast((c_byte * 255)(), c_char_p) | |
124 | ||
125 | # This could probably be written cleaner. But, it does work. | |
126 | buf = cast(source_bytes, POINTER(c_ubyte * len(source))).contents | |
127 | offset = 0 | |
128 | address = pc | |
129 | end_address = pc + len(source) | |
130 | while address < end_address: | |
131 | b = cast(addressof(buf) + offset, POINTER(c_ubyte)) | |
132 | result = lib.LLVMDisasmInstruction(self, b, | |
133 | c_uint64(len(source) - offset), c_uint64(address), | |
134 | out_str, 255) | |
135 | ||
136 | if result == 0: | |
137 | break | |
138 | ||
139 | yield (address, result, out_str.value) | |
140 | ||
141 | address += result | |
142 | offset += result | |
143 | ||
970d7e83 LB |
144 | def set_options(self, options): |
145 | if not lib.LLVMSetDisasmOptions(self, options): | |
146 | raise Exception('Unable to set all disassembler options in %i' % options) | |
147 | ||
223e47cc LB |
148 | |
149 | def register_library(library): | |
150 | library.LLVMCreateDisasm.argtypes = [c_char_p, c_void_p, c_int, | |
151 | callbacks['op_info'], callbacks['symbol_lookup']] | |
152 | library.LLVMCreateDisasm.restype = c_object_p | |
153 | ||
154 | library.LLVMDisasmDispose.argtypes = [Disassembler] | |
155 | ||
156 | library.LLVMDisasmInstruction.argtypes = [Disassembler, POINTER(c_ubyte), | |
157 | c_uint64, c_uint64, c_char_p, c_size_t] | |
158 | library.LLVMDisasmInstruction.restype = c_size_t | |
159 | ||
970d7e83 LB |
160 | library.LLVMSetDisasmOptions.argtypes = [Disassembler, c_uint64] |
161 | library.LLVMSetDisasmOptions.restype = c_int | |
162 | ||
163 | ||
223e47cc LB |
164 | callbacks['op_info'] = CFUNCTYPE(c_int, c_void_p, c_uint64, c_uint64, c_uint64, |
165 | c_int, c_void_p) | |
166 | callbacks['symbol_lookup'] = CFUNCTYPE(c_char_p, c_void_p, c_uint64, | |
167 | POINTER(c_uint64), c_uint64, | |
168 | POINTER(c_char_p)) | |
169 | ||
170 | register_library(lib) |