]>
Commit | Line | Data |
---|---|---|
f035d41b XL |
1 | //! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions |
2 | use alloc::vec::Vec; | |
3 | use core::fmt; | |
4 | ||
5 | use log::debug; | |
6 | ||
7 | use scroll::ctx::SizeWith; | |
8 | use scroll::{Pread, BE}; | |
9 | ||
10 | use crate::container; | |
11 | use crate::error; | |
12 | ||
13 | pub mod bind_opcodes; | |
14 | pub mod constants; | |
15 | pub mod exports; | |
16 | pub mod fat; | |
17 | pub mod header; | |
18 | pub mod imports; | |
19 | pub mod load_command; | |
20 | pub mod relocation; | |
21 | pub mod segment; | |
22 | pub mod symbols; | |
23 | ||
24 | pub use self::constants::cputype; | |
25 | ||
26 | /// Returns a big endian magical number | |
27 | pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> { | |
28 | Ok(bytes.pread_with::<u32>(offset, scroll::BE)?) | |
29 | } | |
30 | ||
31 | /// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number. | |
32 | pub fn parse_magic_and_ctx( | |
33 | bytes: &[u8], | |
34 | offset: usize, | |
35 | ) -> error::Result<(u32, Option<container::Ctx>)> { | |
36 | use crate::container::Container; | |
37 | use crate::mach::header::*; | |
38 | let magic = bytes.pread_with::<u32>(offset, BE)?; | |
39 | let ctx = match magic { | |
40 | MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => { | |
41 | let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64; | |
42 | let le = scroll::Endian::from(is_lsb); | |
43 | let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 { | |
44 | Container::Big | |
45 | } else { | |
46 | Container::Little | |
47 | }; | |
48 | Some(container::Ctx::new(container, le)) | |
49 | } | |
50 | _ => None, | |
51 | }; | |
52 | Ok((magic, ctx)) | |
53 | } | |
54 | ||
55 | /// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser | |
56 | pub struct MachO<'a> { | |
57 | /// The mach-o header | |
58 | pub header: header::Header, | |
59 | /// The load commands tell the kernel and dynamic linker how to use/interpret this binary | |
60 | pub load_commands: Vec<load_command::LoadCommand>, | |
61 | /// The load command "segments" - typically the pieces of the binary that are loaded into memory | |
62 | pub segments: segment::Segments<'a>, | |
63 | /// The "Nlist" style symbols in this binary - strippable | |
64 | pub symbols: Option<symbols::Symbols<'a>>, | |
65 | /// The dylibs this library depends on | |
66 | pub libs: Vec<&'a str>, | |
67 | /// The entry point (as a virtual memory address), 0 if none | |
68 | pub entry: u64, | |
69 | /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint | |
70 | pub old_style_entry: bool, | |
71 | /// The name of the dylib, if any | |
72 | pub name: Option<&'a str>, | |
73 | /// Are we a little-endian binary? | |
74 | pub little_endian: bool, | |
75 | /// Are we a 64-bit binary | |
76 | pub is_64: bool, | |
77 | data: &'a [u8], | |
78 | ctx: container::Ctx, | |
79 | export_trie: Option<exports::ExportTrie<'a>>, | |
80 | bind_interpreter: Option<imports::BindInterpreter<'a>>, | |
81 | } | |
82 | ||
83 | impl<'a> fmt::Debug for MachO<'a> { | |
84 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { | |
85 | fmt.debug_struct("MachO") | |
86 | .field("header", &self.header) | |
87 | .field("load_commands", &self.load_commands) | |
88 | .field("segments", &self.segments) | |
89 | .field("entry", &self.entry) | |
90 | .field("old_style_entry", &self.old_style_entry) | |
91 | .field("libs", &self.libs) | |
92 | .field("name", &self.name) | |
93 | .field("little_endian", &self.little_endian) | |
94 | .field("is_64", &self.is_64) | |
95 | .field("symbols()", &self.symbols().collect::<Vec<_>>()) | |
96 | .field("exports()", &self.exports()) | |
97 | .field("imports()", &self.imports()) | |
98 | .finish() | |
99 | } | |
100 | } | |
101 | ||
102 | impl<'a> MachO<'a> { | |
103 | /// Is this a relocatable object file? | |
104 | pub fn is_object_file(&self) -> bool { | |
105 | self.header.filetype == header::MH_OBJECT | |
106 | } | |
107 | /// Return an iterator over all the symbols in this binary | |
108 | pub fn symbols(&self) -> symbols::SymbolIterator<'a> { | |
109 | if let Some(ref symbols) = self.symbols { | |
110 | symbols.into_iter() | |
111 | } else { | |
112 | symbols::SymbolIterator::default() | |
113 | } | |
114 | } | |
115 | /// Return a vector of the relocations in this binary | |
116 | pub fn relocations( | |
117 | &self, | |
118 | ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> { | |
119 | debug!("Iterating relocations"); | |
120 | let mut relocs = Vec::new(); | |
121 | for (_i, segment) in (&self.segments).into_iter().enumerate() { | |
122 | for (j, section) in segment.into_iter().enumerate() { | |
123 | let (section, _data) = section?; | |
124 | if section.nreloc > 0 { | |
125 | relocs.push((j, section.iter_relocations(self.data, self.ctx), section)); | |
126 | } | |
127 | } | |
128 | } | |
129 | Ok(relocs) | |
130 | } | |
131 | /// Return the exported symbols in this binary (if any) | |
132 | pub fn exports(&self) -> error::Result<Vec<exports::Export>> { | |
133 | if let Some(ref trie) = self.export_trie { | |
134 | trie.exports(self.libs.as_slice()) | |
135 | } else { | |
136 | Ok(vec![]) | |
137 | } | |
138 | } | |
139 | /// Return the imported symbols in this binary that dyld knows about (if any) | |
140 | pub fn imports(&self) -> error::Result<Vec<imports::Import>> { | |
141 | if let Some(ref interpreter) = self.bind_interpreter { | |
142 | interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx) | |
143 | } else { | |
144 | Ok(vec![]) | |
145 | } | |
146 | } | |
147 | /// Parses the Mach-o binary from `bytes` at `offset` | |
148 | pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> { | |
149 | let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?; | |
150 | let ctx = if let Some(ctx) = maybe_ctx { | |
151 | ctx | |
152 | } else { | |
153 | return Err(error::Error::BadMagic(u64::from(magic))); | |
154 | }; | |
155 | debug!("Ctx: {:?}", ctx); | |
156 | let offset = &mut offset; | |
157 | let header: header::Header = bytes.pread_with(*offset, ctx)?; | |
158 | debug!("Mach-o header: {:?}", header); | |
159 | let little_endian = ctx.le.is_little(); | |
160 | let is_64 = ctx.container.is_big(); | |
161 | *offset += header::Header::size_with(&ctx.container); | |
162 | let ncmds = header.ncmds; | |
163 | let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds); | |
164 | let mut symbols = None; | |
165 | let mut libs = vec!["self"]; | |
166 | let mut export_trie = None; | |
167 | let mut bind_interpreter = None; | |
168 | let mut unixthread_entry_address = None; | |
169 | let mut main_entry_offset = None; | |
170 | let mut name = None; | |
171 | let mut segments = segment::Segments::new(ctx); | |
172 | for i in 0..ncmds { | |
173 | let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?; | |
174 | debug!("{} - {:?}", i, cmd); | |
175 | match cmd.command { | |
176 | load_command::CommandVariant::Segment32(command) => { | |
177 | // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue? | |
178 | segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?) | |
179 | } | |
180 | load_command::CommandVariant::Segment64(command) => { | |
181 | segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?) | |
182 | } | |
183 | load_command::CommandVariant::Symtab(command) => { | |
184 | symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?); | |
185 | } | |
186 | load_command::CommandVariant::LoadDylib(command) | |
187 | | load_command::CommandVariant::LoadUpwardDylib(command) | |
188 | | load_command::CommandVariant::ReexportDylib(command) | |
189 | | load_command::CommandVariant::LoadWeakDylib(command) | |
190 | | load_command::CommandVariant::LazyLoadDylib(command) => { | |
191 | let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; | |
192 | libs.push(lib); | |
193 | } | |
194 | load_command::CommandVariant::DyldInfo(command) | |
195 | | load_command::CommandVariant::DyldInfoOnly(command) => { | |
196 | export_trie = Some(exports::ExportTrie::new(bytes, &command)); | |
197 | bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command)); | |
198 | } | |
199 | load_command::CommandVariant::Unixthread(command) => { | |
200 | // dyld cares only about the first LC_UNIXTHREAD | |
201 | if unixthread_entry_address.is_none() { | |
202 | unixthread_entry_address = | |
203 | Some(command.instruction_pointer(header.cputype)?); | |
204 | } | |
205 | } | |
206 | load_command::CommandVariant::Main(command) => { | |
207 | // dyld cares only about the first LC_MAIN | |
208 | if main_entry_offset.is_none() { | |
209 | main_entry_offset = Some(command.entryoff); | |
210 | } | |
211 | } | |
212 | load_command::CommandVariant::IdDylib(command) => { | |
213 | let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?; | |
214 | libs[0] = id; | |
215 | name = Some(id); | |
216 | } | |
217 | _ => (), | |
218 | } | |
219 | cmds.push(cmd) | |
220 | } | |
221 | ||
222 | // dyld prefers LC_MAIN over LC_UNIXTHREAD | |
223 | // choose the same way here | |
224 | let (entry, old_style_entry) = if let Some(offset) = main_entry_offset { | |
225 | // map the entrypoint offset to a virtual memory address | |
226 | let base_address = segments | |
227 | .iter() | |
228 | .filter(|s| &s.segname[0..7] == b"__TEXT\0") | |
229 | .map(|s| s.vmaddr - s.fileoff) | |
230 | .next() | |
231 | .ok_or_else(|| { | |
232 | error::Error::Malformed(format!( | |
233 | "image specifies LC_MAIN offset {} but has no __TEXT segment", | |
234 | offset | |
235 | )) | |
236 | })?; | |
237 | ||
238 | (base_address + offset, false) | |
239 | } else if let Some(address) = unixthread_entry_address { | |
240 | (address, true) | |
241 | } else { | |
242 | (0, false) | |
243 | }; | |
244 | ||
245 | Ok(MachO { | |
246 | header, | |
247 | load_commands: cmds, | |
248 | segments, | |
249 | symbols, | |
250 | libs, | |
251 | export_trie, | |
252 | bind_interpreter, | |
253 | entry, | |
254 | old_style_entry, | |
255 | name, | |
256 | ctx, | |
257 | is_64, | |
258 | little_endian, | |
259 | data: bytes, | |
260 | }) | |
261 | } | |
262 | } | |
263 | ||
264 | /// A Mach-o multi architecture (Fat) binary container | |
265 | pub struct MultiArch<'a> { | |
266 | data: &'a [u8], | |
267 | start: usize, | |
268 | pub narches: usize, | |
269 | } | |
270 | ||
271 | /// Iterator over the fat architecture headers in a `MultiArch` container | |
272 | pub struct FatArchIterator<'a> { | |
273 | index: usize, | |
274 | data: &'a [u8], | |
275 | narches: usize, | |
276 | start: usize, | |
277 | } | |
278 | ||
279 | impl<'a> Iterator for FatArchIterator<'a> { | |
280 | type Item = error::Result<fat::FatArch>; | |
281 | fn next(&mut self) -> Option<Self::Item> { | |
282 | if self.index >= self.narches { | |
283 | None | |
284 | } else { | |
285 | let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start; | |
286 | let arch = self | |
287 | .data | |
288 | .pread_with::<fat::FatArch>(offset, scroll::BE) | |
289 | .map_err(core::convert::Into::into); | |
290 | self.index += 1; | |
291 | Some(arch) | |
292 | } | |
293 | } | |
294 | } | |
295 | ||
296 | /// Iterator over every `MachO` binary contained in this `MultiArch` container | |
297 | pub struct MachOIterator<'a> { | |
298 | index: usize, | |
299 | data: &'a [u8], | |
300 | narches: usize, | |
301 | start: usize, | |
302 | } | |
303 | ||
304 | impl<'a> Iterator for MachOIterator<'a> { | |
305 | type Item = error::Result<MachO<'a>>; | |
306 | fn next(&mut self) -> Option<Self::Item> { | |
307 | if self.index >= self.narches { | |
308 | None | |
309 | } else { | |
310 | let index = self.index; | |
311 | let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; | |
312 | self.index += 1; | |
313 | match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) { | |
314 | Ok(arch) => { | |
315 | let bytes = arch.slice(self.data); | |
316 | let binary = MachO::parse(bytes, 0); | |
317 | Some(binary) | |
318 | } | |
319 | Err(e) => Some(Err(e.into())), | |
320 | } | |
321 | } | |
322 | } | |
323 | } | |
324 | ||
325 | impl<'a, 'b> IntoIterator for &'b MultiArch<'a> { | |
326 | type Item = error::Result<MachO<'a>>; | |
327 | type IntoIter = MachOIterator<'a>; | |
328 | fn into_iter(self) -> Self::IntoIter { | |
329 | MachOIterator { | |
330 | index: 0, | |
331 | data: self.data, | |
332 | narches: self.narches, | |
333 | start: self.start, | |
334 | } | |
335 | } | |
336 | } | |
337 | ||
338 | impl<'a> MultiArch<'a> { | |
339 | /// Lazily construct `Self` | |
340 | pub fn new(bytes: &'a [u8]) -> error::Result<Self> { | |
341 | let header = fat::FatHeader::parse(bytes)?; | |
342 | Ok(MultiArch { | |
343 | data: bytes, | |
344 | start: fat::SIZEOF_FAT_HEADER, | |
345 | narches: header.nfat_arch as usize, | |
346 | }) | |
347 | } | |
348 | /// Iterate every fat arch header | |
349 | pub fn iter_arches(&self) -> FatArchIterator { | |
350 | FatArchIterator { | |
351 | index: 0, | |
352 | data: self.data, | |
353 | narches: self.narches, | |
354 | start: self.start, | |
355 | } | |
356 | } | |
357 | /// Return all the architectures in this binary | |
358 | pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> { | |
359 | let mut arches = Vec::with_capacity(self.narches); | |
360 | for arch in self.iter_arches() { | |
361 | arches.push(arch?); | |
362 | } | |
363 | Ok(arches) | |
364 | } | |
365 | /// Try to get the Mach-o binary at `index` | |
366 | pub fn get(&self, index: usize) -> error::Result<MachO<'a>> { | |
367 | if index >= self.narches { | |
368 | return Err(error::Error::Malformed(format!( | |
369 | "Requested the {}-th binary, but there are only {} architectures in this container", | |
370 | index, self.narches | |
371 | ))); | |
372 | } | |
373 | let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start; | |
374 | let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?; | |
375 | let bytes = arch.slice(self.data); | |
376 | Ok(MachO::parse(bytes, 0)?) | |
377 | } | |
378 | ||
379 | pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>( | |
380 | &'a self, | |
381 | f: F, | |
382 | ) -> Option<error::Result<MachO<'a>>> { | |
383 | for (i, arch) in self.iter_arches().enumerate() { | |
384 | if f(arch) { | |
385 | return Some(self.get(i)); | |
386 | } | |
387 | } | |
388 | None | |
389 | } | |
390 | /// Try and find the `cputype` in `Self`, if there is one | |
391 | pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> { | |
392 | for arch in self.iter_arches() { | |
393 | let arch = arch?; | |
394 | if arch.cputype == cputype { | |
395 | return Ok(Some(arch)); | |
396 | } | |
397 | } | |
398 | Ok(None) | |
399 | } | |
400 | } | |
401 | ||
402 | impl<'a> fmt::Debug for MultiArch<'a> { | |
403 | fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { | |
404 | fmt.debug_struct("MultiArch") | |
405 | .field("arches", &self.arches().unwrap()) | |
406 | .field("data", &self.data.len()) | |
407 | .finish() | |
408 | } | |
409 | } | |
410 | ||
411 | #[derive(Debug)] | |
412 | #[allow(clippy::large_enum_variant)] | |
413 | /// Either a collection of multiple architectures, or a single mach-o binary | |
414 | pub enum Mach<'a> { | |
415 | /// A "fat" multi-architecture binary container | |
416 | Fat(MultiArch<'a>), | |
417 | /// A regular Mach-o binary | |
418 | Binary(MachO<'a>), | |
419 | } | |
420 | ||
421 | impl<'a> Mach<'a> { | |
422 | /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary | |
423 | pub fn parse(bytes: &'a [u8]) -> error::Result<Self> { | |
424 | let size = bytes.len(); | |
425 | if size < 4 { | |
426 | let error = error::Error::Malformed("size is smaller than a magical number".into()); | |
427 | return Err(error); | |
428 | } | |
429 | let magic = peek(&bytes, 0)?; | |
430 | match magic { | |
431 | fat::FAT_MAGIC => { | |
432 | let multi = MultiArch::new(bytes)?; | |
433 | Ok(Mach::Fat(multi)) | |
434 | } | |
435 | // we might be a regular binary | |
436 | _ => { | |
437 | let binary = MachO::parse(bytes, 0)?; | |
438 | Ok(Mach::Binary(binary)) | |
439 | } | |
440 | } | |
441 | } | |
442 | } |