]>
Commit | Line | Data |
---|---|---|
223e47cc LB |
1 | #===- object.py - Python Object Bindings --------------------*- python -*--===# |
2 | # | |
3 | # The LLVM Compiler Infrastructure | |
4 | # | |
5 | # This file is distributed under the University of Illinois Open Source | |
6 | # License. See LICENSE.TXT for details. | |
7 | # | |
8 | #===------------------------------------------------------------------------===# | |
9 | ||
10 | r""" | |
11 | Object File Interface | |
12 | ===================== | |
13 | ||
14 | This module provides an interface for reading information from object files | |
15 | (e.g. binary executables and libraries). | |
16 | ||
17 | Using this module, you can obtain information about an object file's sections, | |
18 | symbols, and relocations. These are represented by the classes ObjectFile, | |
19 | Section, Symbol, and Relocation, respectively. | |
20 | ||
21 | Usage | |
22 | ----- | |
23 | ||
24 | The only way to use this module is to start by creating an ObjectFile. You can | |
25 | create an ObjectFile by loading a file (specified by its path) or by creating a | |
26 | llvm.core.MemoryBuffer and loading that. | |
27 | ||
28 | Once you have an object file, you can inspect its sections and symbols directly | |
29 | by calling get_sections() and get_symbols() respectively. To inspect | |
30 | relocations, call get_relocations() on a Section instance. | |
31 | ||
32 | Iterator Interface | |
33 | ------------------ | |
34 | ||
35 | The LLVM bindings expose iteration over sections, symbols, and relocations in a | |
36 | way that only allows one instance to be operated on at a single time. This is | |
37 | slightly annoying from a Python perspective, as it isn't very Pythonic to have | |
38 | objects that "expire" but are still active from a dynamic language. | |
39 | ||
40 | To aid working around this limitation, each Section, Symbol, and Relocation | |
41 | instance caches its properties after first access. So, if the underlying | |
42 | iterator is advanced, the properties can still be obtained provided they have | |
43 | already been retrieved. | |
44 | ||
45 | In addition, we also provide a "cache" method on each class to cache all | |
46 | available data. You can call this on each obtained instance. Or, you can pass | |
47 | cache=True to the appropriate get_XXX() method to have this done for you. | |
48 | ||
49 | Here are some examples on how to perform iteration: | |
50 | ||
51 | obj = ObjectFile(filename='/bin/ls') | |
52 | ||
53 | # This is OK. Each Section is only accessed inside its own iteration slot. | |
54 | section_names = [] | |
55 | for section in obj.get_sections(): | |
56 | section_names.append(section.name) | |
57 | ||
58 | # This is NOT OK. You perform a lookup after the object has expired. | |
59 | symbols = list(obj.get_symbols()) | |
60 | for symbol in symbols: | |
61 | print symbol.name # This raises because the object has expired. | |
62 | ||
63 | # In this example, we mix a working and failing scenario. | |
64 | symbols = [] | |
65 | for symbol in obj.get_symbols(): | |
66 | symbols.append(symbol) | |
67 | print symbol.name | |
68 | ||
69 | for symbol in symbols: | |
70 | print symbol.name # OK | |
71 | print symbol.address # NOT OK. We didn't look up this property before. | |
72 | ||
73 | # Cache everything up front. | |
74 | symbols = list(obj.get_symbols(cache=True)) | |
75 | for symbol in symbols: | |
76 | print symbol.name # OK | |
77 | ||
78 | """ | |
79 | ||
80 | from ctypes import c_char_p | |
1a4d82fc JJ |
81 | from ctypes import c_char |
82 | from ctypes import POINTER | |
223e47cc | 83 | from ctypes import c_uint64 |
1a4d82fc | 84 | from ctypes import string_at |
223e47cc LB |
85 | |
86 | from .common import CachedProperty | |
87 | from .common import LLVMObject | |
88 | from .common import c_object_p | |
89 | from .common import get_library | |
90 | from .core import MemoryBuffer | |
91 | ||
92 | __all__ = [ | |
93 | "lib", | |
94 | "ObjectFile", | |
95 | "Relocation", | |
96 | "Section", | |
97 | "Symbol", | |
98 | ] | |
99 | ||
100 | class ObjectFile(LLVMObject): | |
101 | """Represents an object/binary file.""" | |
102 | ||
103 | def __init__(self, filename=None, contents=None): | |
104 | """Construct an instance from a filename or binary data. | |
105 | ||
106 | filename must be a path to a file that can be opened with open(). | |
107 | contents can be either a native Python buffer type (like str) or a | |
108 | llvm.core.MemoryBuffer instance. | |
109 | """ | |
110 | if contents: | |
111 | assert isinstance(contents, MemoryBuffer) | |
112 | ||
113 | if filename is not None: | |
114 | contents = MemoryBuffer(filename=filename) | |
115 | ||
116 | if contents is None: | |
117 | raise Exception('No input found.') | |
118 | ||
119 | ptr = lib.LLVMCreateObjectFile(contents) | |
120 | LLVMObject.__init__(self, ptr, disposer=lib.LLVMDisposeObjectFile) | |
121 | self.take_ownership(contents) | |
122 | ||
123 | def get_sections(self, cache=False): | |
124 | """Obtain the sections in this object file. | |
125 | ||
126 | This is a generator for llvm.object.Section instances. | |
127 | ||
128 | Sections are exposed as limited-use objects. See the module's | |
129 | documentation on iterators for more. | |
130 | """ | |
131 | sections = lib.LLVMGetSections(self) | |
132 | last = None | |
133 | while True: | |
134 | if lib.LLVMIsSectionIteratorAtEnd(self, sections): | |
135 | break | |
136 | ||
137 | last = Section(sections) | |
138 | if cache: | |
139 | last.cache() | |
140 | ||
141 | yield last | |
142 | ||
143 | lib.LLVMMoveToNextSection(sections) | |
144 | last.expire() | |
145 | ||
146 | if last is not None: | |
147 | last.expire() | |
148 | ||
149 | lib.LLVMDisposeSectionIterator(sections) | |
150 | ||
151 | def get_symbols(self, cache=False): | |
152 | """Obtain the symbols in this object file. | |
153 | ||
154 | This is a generator for llvm.object.Symbol instances. | |
155 | ||
156 | Each Symbol instance is a limited-use object. See this module's | |
157 | documentation on iterators for more. | |
158 | """ | |
159 | symbols = lib.LLVMGetSymbols(self) | |
160 | last = None | |
161 | while True: | |
162 | if lib.LLVMIsSymbolIteratorAtEnd(self, symbols): | |
163 | break | |
164 | ||
165 | last = Symbol(symbols, self) | |
166 | if cache: | |
167 | last.cache() | |
168 | ||
169 | yield last | |
170 | ||
171 | lib.LLVMMoveToNextSymbol(symbols) | |
172 | last.expire() | |
173 | ||
174 | if last is not None: | |
175 | last.expire() | |
176 | ||
177 | lib.LLVMDisposeSymbolIterator(symbols) | |
178 | ||
179 | class Section(LLVMObject): | |
180 | """Represents a section in an object file.""" | |
181 | ||
182 | def __init__(self, ptr): | |
183 | """Construct a new section instance. | |
184 | ||
185 | Section instances can currently only be created from an ObjectFile | |
186 | instance. Therefore, this constructor should not be used outside of | |
187 | this module. | |
188 | """ | |
189 | LLVMObject.__init__(self, ptr) | |
190 | ||
191 | self.expired = False | |
192 | ||
193 | @CachedProperty | |
194 | def name(self): | |
195 | """Obtain the string name of the section. | |
196 | ||
197 | This is typically something like '.dynsym' or '.rodata'. | |
198 | """ | |
199 | if self.expired: | |
200 | raise Exception('Section instance has expired.') | |
201 | ||
202 | return lib.LLVMGetSectionName(self) | |
203 | ||
204 | @CachedProperty | |
205 | def size(self): | |
206 | """The size of the section, in long bytes.""" | |
207 | if self.expired: | |
208 | raise Exception('Section instance has expired.') | |
209 | ||
210 | return lib.LLVMGetSectionSize(self) | |
211 | ||
212 | @CachedProperty | |
213 | def contents(self): | |
214 | if self.expired: | |
215 | raise Exception('Section instance has expired.') | |
216 | ||
1a4d82fc JJ |
217 | siz = self.size |
218 | ||
219 | r = lib.LLVMGetSectionContents(self) | |
220 | if r: | |
221 | return string_at(r, siz) | |
222 | return None | |
223e47cc LB |
223 | |
224 | @CachedProperty | |
225 | def address(self): | |
226 | """The address of this section, in long bytes.""" | |
227 | if self.expired: | |
228 | raise Exception('Section instance has expired.') | |
229 | ||
230 | return lib.LLVMGetSectionAddress(self) | |
231 | ||
232 | def has_symbol(self, symbol): | |
233 | """Returns whether a Symbol instance is present in this Section.""" | |
234 | if self.expired: | |
235 | raise Exception('Section instance has expired.') | |
236 | ||
237 | assert isinstance(symbol, Symbol) | |
238 | return lib.LLVMGetSectionContainsSymbol(self, symbol) | |
239 | ||
240 | def get_relocations(self, cache=False): | |
241 | """Obtain the relocations in this Section. | |
242 | ||
243 | This is a generator for llvm.object.Relocation instances. | |
244 | ||
245 | Each instance is a limited used object. See this module's documentation | |
246 | on iterators for more. | |
247 | """ | |
248 | if self.expired: | |
249 | raise Exception('Section instance has expired.') | |
250 | ||
251 | relocations = lib.LLVMGetRelocations(self) | |
252 | last = None | |
253 | while True: | |
254 | if lib.LLVMIsRelocationIteratorAtEnd(self, relocations): | |
255 | break | |
256 | ||
257 | last = Relocation(relocations) | |
258 | if cache: | |
259 | last.cache() | |
260 | ||
261 | yield last | |
262 | ||
263 | lib.LLVMMoveToNextRelocation(relocations) | |
264 | last.expire() | |
265 | ||
266 | if last is not None: | |
267 | last.expire() | |
268 | ||
269 | lib.LLVMDisposeRelocationIterator(relocations) | |
270 | ||
271 | def cache(self): | |
272 | """Cache properties of this Section. | |
273 | ||
274 | This can be called as a workaround to the single active Section | |
275 | limitation. When called, the properties of the Section are fetched so | |
276 | they are still available after the Section has been marked inactive. | |
277 | """ | |
278 | getattr(self, 'name') | |
279 | getattr(self, 'size') | |
280 | getattr(self, 'contents') | |
281 | getattr(self, 'address') | |
282 | ||
283 | def expire(self): | |
284 | """Expire the section. | |
285 | ||
286 | This is called internally by the section iterator. | |
287 | """ | |
288 | self.expired = True | |
289 | ||
290 | class Symbol(LLVMObject): | |
291 | """Represents a symbol in an object file.""" | |
292 | def __init__(self, ptr, object_file): | |
293 | assert isinstance(ptr, c_object_p) | |
294 | assert isinstance(object_file, ObjectFile) | |
295 | ||
296 | LLVMObject.__init__(self, ptr) | |
297 | ||
298 | self.expired = False | |
299 | self._object_file = object_file | |
300 | ||
301 | @CachedProperty | |
302 | def name(self): | |
303 | """The str name of the symbol. | |
304 | ||
305 | This is often a function or variable name. Keep in mind that name | |
306 | mangling could be in effect. | |
307 | """ | |
308 | if self.expired: | |
309 | raise Exception('Symbol instance has expired.') | |
310 | ||
311 | return lib.LLVMGetSymbolName(self) | |
312 | ||
313 | @CachedProperty | |
314 | def address(self): | |
315 | """The address of this symbol, in long bytes.""" | |
316 | if self.expired: | |
317 | raise Exception('Symbol instance has expired.') | |
318 | ||
319 | return lib.LLVMGetSymbolAddress(self) | |
320 | ||
223e47cc LB |
321 | @CachedProperty |
322 | def size(self): | |
323 | """The size of the symbol, in long bytes.""" | |
324 | if self.expired: | |
325 | raise Exception('Symbol instance has expired.') | |
326 | ||
327 | return lib.LLVMGetSymbolSize(self) | |
328 | ||
329 | @CachedProperty | |
330 | def section(self): | |
331 | """The Section to which this Symbol belongs. | |
332 | ||
333 | The returned Section instance does not expire, unlike Sections that are | |
334 | commonly obtained through iteration. | |
335 | ||
336 | Because this obtains a new section iterator each time it is accessed, | |
337 | calling this on a number of Symbol instances could be expensive. | |
338 | """ | |
339 | sections = lib.LLVMGetSections(self._object_file) | |
340 | lib.LLVMMoveToContainingSection(sections, self) | |
341 | ||
342 | return Section(sections) | |
343 | ||
344 | def cache(self): | |
345 | """Cache all cacheable properties.""" | |
346 | getattr(self, 'name') | |
347 | getattr(self, 'address') | |
223e47cc LB |
348 | getattr(self, 'size') |
349 | ||
350 | def expire(self): | |
351 | """Mark the object as expired to prevent future API accesses. | |
352 | ||
353 | This is called internally by this module and it is unlikely that | |
354 | external callers have a legitimate reason for using it. | |
355 | """ | |
356 | self.expired = True | |
357 | ||
358 | class Relocation(LLVMObject): | |
359 | """Represents a relocation definition.""" | |
360 | def __init__(self, ptr): | |
361 | """Create a new relocation instance. | |
362 | ||
363 | Relocations are created from objects derived from Section instances. | |
364 | Therefore, this constructor should not be called outside of this | |
365 | module. See Section.get_relocations() for the proper method to obtain | |
366 | a Relocation instance. | |
367 | """ | |
368 | assert isinstance(ptr, c_object_p) | |
369 | ||
370 | LLVMObject.__init__(self, ptr) | |
371 | ||
372 | self.expired = False | |
373 | ||
374 | @CachedProperty | |
375 | def address(self): | |
376 | """The address of this relocation, in long bytes.""" | |
377 | if self.expired: | |
378 | raise Exception('Relocation instance has expired.') | |
379 | ||
380 | return lib.LLVMGetRelocationAddress(self) | |
381 | ||
382 | @CachedProperty | |
383 | def offset(self): | |
384 | """The offset of this relocation, in long bytes.""" | |
385 | if self.expired: | |
386 | raise Exception('Relocation instance has expired.') | |
387 | ||
388 | return lib.LLVMGetRelocationOffset(self) | |
389 | ||
390 | @CachedProperty | |
391 | def symbol(self): | |
392 | """The Symbol corresponding to this Relocation.""" | |
393 | if self.expired: | |
394 | raise Exception('Relocation instance has expired.') | |
395 | ||
396 | ptr = lib.LLVMGetRelocationSymbol(self) | |
397 | return Symbol(ptr) | |
398 | ||
399 | @CachedProperty | |
400 | def type_number(self): | |
401 | """The relocation type, as a long.""" | |
402 | if self.expired: | |
403 | raise Exception('Relocation instance has expired.') | |
404 | ||
405 | return lib.LLVMGetRelocationType(self) | |
406 | ||
407 | @CachedProperty | |
408 | def type_name(self): | |
409 | """The relocation type's name, as a str.""" | |
410 | if self.expired: | |
411 | raise Exception('Relocation instance has expired.') | |
412 | ||
413 | return lib.LLVMGetRelocationTypeName(self) | |
414 | ||
415 | @CachedProperty | |
416 | def value_string(self): | |
417 | if self.expired: | |
418 | raise Exception('Relocation instance has expired.') | |
419 | ||
420 | return lib.LLVMGetRelocationValueString(self) | |
421 | ||
422 | def expire(self): | |
423 | """Expire this instance, making future API accesses fail.""" | |
424 | self.expired = True | |
425 | ||
426 | def cache(self): | |
427 | """Cache all cacheable properties on this instance.""" | |
428 | getattr(self, 'address') | |
429 | getattr(self, 'offset') | |
430 | getattr(self, 'symbol') | |
431 | getattr(self, 'type') | |
432 | getattr(self, 'type_name') | |
433 | getattr(self, 'value_string') | |
434 | ||
435 | def register_library(library): | |
436 | """Register function prototypes with LLVM library instance.""" | |
437 | ||
438 | # Object.h functions | |
439 | library.LLVMCreateObjectFile.argtypes = [MemoryBuffer] | |
440 | library.LLVMCreateObjectFile.restype = c_object_p | |
441 | ||
442 | library.LLVMDisposeObjectFile.argtypes = [ObjectFile] | |
443 | ||
444 | library.LLVMGetSections.argtypes = [ObjectFile] | |
445 | library.LLVMGetSections.restype = c_object_p | |
446 | ||
447 | library.LLVMDisposeSectionIterator.argtypes = [c_object_p] | |
448 | ||
449 | library.LLVMIsSectionIteratorAtEnd.argtypes = [ObjectFile, c_object_p] | |
450 | library.LLVMIsSectionIteratorAtEnd.restype = bool | |
451 | ||
452 | library.LLVMMoveToNextSection.argtypes = [c_object_p] | |
453 | ||
454 | library.LLVMMoveToContainingSection.argtypes = [c_object_p, c_object_p] | |
455 | ||
456 | library.LLVMGetSymbols.argtypes = [ObjectFile] | |
457 | library.LLVMGetSymbols.restype = c_object_p | |
458 | ||
459 | library.LLVMDisposeSymbolIterator.argtypes = [c_object_p] | |
460 | ||
461 | library.LLVMIsSymbolIteratorAtEnd.argtypes = [ObjectFile, c_object_p] | |
462 | library.LLVMIsSymbolIteratorAtEnd.restype = bool | |
463 | ||
464 | library.LLVMMoveToNextSymbol.argtypes = [c_object_p] | |
465 | ||
466 | library.LLVMGetSectionName.argtypes = [c_object_p] | |
467 | library.LLVMGetSectionName.restype = c_char_p | |
468 | ||
469 | library.LLVMGetSectionSize.argtypes = [c_object_p] | |
470 | library.LLVMGetSectionSize.restype = c_uint64 | |
471 | ||
472 | library.LLVMGetSectionContents.argtypes = [c_object_p] | |
1a4d82fc JJ |
473 | # Can't use c_char_p here as it isn't a NUL-terminated string. |
474 | library.LLVMGetSectionContents.restype = POINTER(c_char) | |
223e47cc LB |
475 | |
476 | library.LLVMGetSectionAddress.argtypes = [c_object_p] | |
477 | library.LLVMGetSectionAddress.restype = c_uint64 | |
478 | ||
479 | library.LLVMGetSectionContainsSymbol.argtypes = [c_object_p, c_object_p] | |
480 | library.LLVMGetSectionContainsSymbol.restype = bool | |
481 | ||
482 | library.LLVMGetRelocations.argtypes = [c_object_p] | |
483 | library.LLVMGetRelocations.restype = c_object_p | |
484 | ||
485 | library.LLVMDisposeRelocationIterator.argtypes = [c_object_p] | |
486 | ||
487 | library.LLVMIsRelocationIteratorAtEnd.argtypes = [c_object_p, c_object_p] | |
488 | library.LLVMIsRelocationIteratorAtEnd.restype = bool | |
489 | ||
490 | library.LLVMMoveToNextRelocation.argtypes = [c_object_p] | |
491 | ||
492 | library.LLVMGetSymbolName.argtypes = [Symbol] | |
493 | library.LLVMGetSymbolName.restype = c_char_p | |
494 | ||
495 | library.LLVMGetSymbolAddress.argtypes = [Symbol] | |
496 | library.LLVMGetSymbolAddress.restype = c_uint64 | |
497 | ||
223e47cc LB |
498 | library.LLVMGetSymbolSize.argtypes = [Symbol] |
499 | library.LLVMGetSymbolSize.restype = c_uint64 | |
500 | ||
501 | library.LLVMGetRelocationAddress.argtypes = [c_object_p] | |
502 | library.LLVMGetRelocationAddress.restype = c_uint64 | |
503 | ||
504 | library.LLVMGetRelocationOffset.argtypes = [c_object_p] | |
505 | library.LLVMGetRelocationOffset.restype = c_uint64 | |
506 | ||
507 | library.LLVMGetRelocationSymbol.argtypes = [c_object_p] | |
508 | library.LLVMGetRelocationSymbol.restype = c_object_p | |
509 | ||
510 | library.LLVMGetRelocationType.argtypes = [c_object_p] | |
511 | library.LLVMGetRelocationType.restype = c_uint64 | |
512 | ||
513 | library.LLVMGetRelocationTypeName.argtypes = [c_object_p] | |
514 | library.LLVMGetRelocationTypeName.restype = c_char_p | |
515 | ||
516 | library.LLVMGetRelocationValueString.argtypes = [c_object_p] | |
517 | library.LLVMGetRelocationValueString.restype = c_char_p | |
518 | ||
519 | lib = get_library() | |
520 | register_library(lib) |