]> git.proxmox.com Git - rustc.git/blame - vendor/goblin/src/mach/mod.rs
New upstream version 1.48.0+dfsg1
[rustc.git] / vendor / goblin / src / mach / mod.rs
CommitLineData
f035d41b
XL
1//! The Mach-o, mostly zero-copy, binary format parser and raw struct definitions
2use alloc::vec::Vec;
3use core::fmt;
4
5use log::debug;
6
7use scroll::ctx::SizeWith;
8use scroll::{Pread, BE};
9
10use crate::container;
11use crate::error;
12
13pub mod bind_opcodes;
14pub mod constants;
15pub mod exports;
16pub mod fat;
17pub mod header;
18pub mod imports;
19pub mod load_command;
20pub mod relocation;
21pub mod segment;
22pub mod symbols;
23
24pub use self::constants::cputype;
25
26/// Returns a big endian magical number
27pub fn peek(bytes: &[u8], offset: usize) -> error::Result<u32> {
28 Ok(bytes.pread_with::<u32>(offset, scroll::BE)?)
29}
30
31/// Parses a magic number, and an accompanying mach-o binary parsing context, according to the magic number.
32pub fn parse_magic_and_ctx(
33 bytes: &[u8],
34 offset: usize,
35) -> error::Result<(u32, Option<container::Ctx>)> {
36 use crate::container::Container;
37 use crate::mach::header::*;
38 let magic = bytes.pread_with::<u32>(offset, BE)?;
39 let ctx = match magic {
40 MH_CIGAM_64 | MH_CIGAM | MH_MAGIC_64 | MH_MAGIC => {
41 let is_lsb = magic == MH_CIGAM || magic == MH_CIGAM_64;
42 let le = scroll::Endian::from(is_lsb);
43 let container = if magic == MH_MAGIC_64 || magic == MH_CIGAM_64 {
44 Container::Big
45 } else {
46 Container::Little
47 };
48 Some(container::Ctx::new(container, le))
49 }
50 _ => None,
51 };
52 Ok((magic, ctx))
53}
54
55/// A cross-platform, zero-copy, endian-aware, 32/64 bit Mach-o binary parser
56pub struct MachO<'a> {
57 /// The mach-o header
58 pub header: header::Header,
59 /// The load commands tell the kernel and dynamic linker how to use/interpret this binary
60 pub load_commands: Vec<load_command::LoadCommand>,
61 /// The load command "segments" - typically the pieces of the binary that are loaded into memory
62 pub segments: segment::Segments<'a>,
63 /// The "Nlist" style symbols in this binary - strippable
64 pub symbols: Option<symbols::Symbols<'a>>,
65 /// The dylibs this library depends on
66 pub libs: Vec<&'a str>,
67 /// The entry point (as a virtual memory address), 0 if none
68 pub entry: u64,
69 /// Whether `entry` refers to an older `LC_UNIXTHREAD` instead of the newer `LC_MAIN` entrypoint
70 pub old_style_entry: bool,
71 /// The name of the dylib, if any
72 pub name: Option<&'a str>,
73 /// Are we a little-endian binary?
74 pub little_endian: bool,
75 /// Are we a 64-bit binary
76 pub is_64: bool,
77 data: &'a [u8],
78 ctx: container::Ctx,
79 export_trie: Option<exports::ExportTrie<'a>>,
80 bind_interpreter: Option<imports::BindInterpreter<'a>>,
81}
82
83impl<'a> fmt::Debug for MachO<'a> {
84 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
85 fmt.debug_struct("MachO")
86 .field("header", &self.header)
87 .field("load_commands", &self.load_commands)
88 .field("segments", &self.segments)
89 .field("entry", &self.entry)
90 .field("old_style_entry", &self.old_style_entry)
91 .field("libs", &self.libs)
92 .field("name", &self.name)
93 .field("little_endian", &self.little_endian)
94 .field("is_64", &self.is_64)
95 .field("symbols()", &self.symbols().collect::<Vec<_>>())
96 .field("exports()", &self.exports())
97 .field("imports()", &self.imports())
98 .finish()
99 }
100}
101
102impl<'a> MachO<'a> {
103 /// Is this a relocatable object file?
104 pub fn is_object_file(&self) -> bool {
105 self.header.filetype == header::MH_OBJECT
106 }
107 /// Return an iterator over all the symbols in this binary
108 pub fn symbols(&self) -> symbols::SymbolIterator<'a> {
109 if let Some(ref symbols) = self.symbols {
110 symbols.into_iter()
111 } else {
112 symbols::SymbolIterator::default()
113 }
114 }
115 /// Return a vector of the relocations in this binary
116 pub fn relocations(
117 &self,
118 ) -> error::Result<Vec<(usize, segment::RelocationIterator, segment::Section)>> {
119 debug!("Iterating relocations");
120 let mut relocs = Vec::new();
121 for (_i, segment) in (&self.segments).into_iter().enumerate() {
122 for (j, section) in segment.into_iter().enumerate() {
123 let (section, _data) = section?;
124 if section.nreloc > 0 {
125 relocs.push((j, section.iter_relocations(self.data, self.ctx), section));
126 }
127 }
128 }
129 Ok(relocs)
130 }
131 /// Return the exported symbols in this binary (if any)
132 pub fn exports(&self) -> error::Result<Vec<exports::Export>> {
133 if let Some(ref trie) = self.export_trie {
134 trie.exports(self.libs.as_slice())
135 } else {
136 Ok(vec![])
137 }
138 }
139 /// Return the imported symbols in this binary that dyld knows about (if any)
140 pub fn imports(&self) -> error::Result<Vec<imports::Import>> {
141 if let Some(ref interpreter) = self.bind_interpreter {
142 interpreter.imports(self.libs.as_slice(), self.segments.as_slice(), self.ctx)
143 } else {
144 Ok(vec![])
145 }
146 }
147 /// Parses the Mach-o binary from `bytes` at `offset`
148 pub fn parse(bytes: &'a [u8], mut offset: usize) -> error::Result<MachO<'a>> {
149 let (magic, maybe_ctx) = parse_magic_and_ctx(bytes, offset)?;
150 let ctx = if let Some(ctx) = maybe_ctx {
151 ctx
152 } else {
153 return Err(error::Error::BadMagic(u64::from(magic)));
154 };
155 debug!("Ctx: {:?}", ctx);
156 let offset = &mut offset;
157 let header: header::Header = bytes.pread_with(*offset, ctx)?;
158 debug!("Mach-o header: {:?}", header);
159 let little_endian = ctx.le.is_little();
160 let is_64 = ctx.container.is_big();
161 *offset += header::Header::size_with(&ctx.container);
162 let ncmds = header.ncmds;
163 let mut cmds: Vec<load_command::LoadCommand> = Vec::with_capacity(ncmds);
164 let mut symbols = None;
165 let mut libs = vec!["self"];
166 let mut export_trie = None;
167 let mut bind_interpreter = None;
168 let mut unixthread_entry_address = None;
169 let mut main_entry_offset = None;
170 let mut name = None;
171 let mut segments = segment::Segments::new(ctx);
172 for i in 0..ncmds {
173 let cmd = load_command::LoadCommand::parse(bytes, offset, ctx.le)?;
174 debug!("{} - {:?}", i, cmd);
175 match cmd.command {
176 load_command::CommandVariant::Segment32(command) => {
177 // FIXME: we may want to be less strict about failure here, and just return an empty segment to allow parsing to continue?
178 segments.push(segment::Segment::from_32(bytes, &command, cmd.offset, ctx)?)
179 }
180 load_command::CommandVariant::Segment64(command) => {
181 segments.push(segment::Segment::from_64(bytes, &command, cmd.offset, ctx)?)
182 }
183 load_command::CommandVariant::Symtab(command) => {
184 symbols = Some(symbols::Symbols::parse(bytes, &command, ctx)?);
185 }
186 load_command::CommandVariant::LoadDylib(command)
187 | load_command::CommandVariant::LoadUpwardDylib(command)
188 | load_command::CommandVariant::ReexportDylib(command)
189 | load_command::CommandVariant::LoadWeakDylib(command)
190 | load_command::CommandVariant::LazyLoadDylib(command) => {
191 let lib = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
192 libs.push(lib);
193 }
194 load_command::CommandVariant::DyldInfo(command)
195 | load_command::CommandVariant::DyldInfoOnly(command) => {
196 export_trie = Some(exports::ExportTrie::new(bytes, &command));
197 bind_interpreter = Some(imports::BindInterpreter::new(bytes, &command));
198 }
199 load_command::CommandVariant::Unixthread(command) => {
200 // dyld cares only about the first LC_UNIXTHREAD
201 if unixthread_entry_address.is_none() {
202 unixthread_entry_address =
203 Some(command.instruction_pointer(header.cputype)?);
204 }
205 }
206 load_command::CommandVariant::Main(command) => {
207 // dyld cares only about the first LC_MAIN
208 if main_entry_offset.is_none() {
209 main_entry_offset = Some(command.entryoff);
210 }
211 }
212 load_command::CommandVariant::IdDylib(command) => {
213 let id = bytes.pread::<&str>(cmd.offset + command.dylib.name as usize)?;
214 libs[0] = id;
215 name = Some(id);
216 }
217 _ => (),
218 }
219 cmds.push(cmd)
220 }
221
222 // dyld prefers LC_MAIN over LC_UNIXTHREAD
223 // choose the same way here
224 let (entry, old_style_entry) = if let Some(offset) = main_entry_offset {
225 // map the entrypoint offset to a virtual memory address
226 let base_address = segments
227 .iter()
228 .filter(|s| &s.segname[0..7] == b"__TEXT\0")
229 .map(|s| s.vmaddr - s.fileoff)
230 .next()
231 .ok_or_else(|| {
232 error::Error::Malformed(format!(
233 "image specifies LC_MAIN offset {} but has no __TEXT segment",
234 offset
235 ))
236 })?;
237
238 (base_address + offset, false)
239 } else if let Some(address) = unixthread_entry_address {
240 (address, true)
241 } else {
242 (0, false)
243 };
244
245 Ok(MachO {
246 header,
247 load_commands: cmds,
248 segments,
249 symbols,
250 libs,
251 export_trie,
252 bind_interpreter,
253 entry,
254 old_style_entry,
255 name,
256 ctx,
257 is_64,
258 little_endian,
259 data: bytes,
260 })
261 }
262}
263
264/// A Mach-o multi architecture (Fat) binary container
265pub struct MultiArch<'a> {
266 data: &'a [u8],
267 start: usize,
268 pub narches: usize,
269}
270
271/// Iterator over the fat architecture headers in a `MultiArch` container
272pub struct FatArchIterator<'a> {
273 index: usize,
274 data: &'a [u8],
275 narches: usize,
276 start: usize,
277}
278
279impl<'a> Iterator for FatArchIterator<'a> {
280 type Item = error::Result<fat::FatArch>;
281 fn next(&mut self) -> Option<Self::Item> {
282 if self.index >= self.narches {
283 None
284 } else {
285 let offset = (self.index * fat::SIZEOF_FAT_ARCH) + self.start;
286 let arch = self
287 .data
288 .pread_with::<fat::FatArch>(offset, scroll::BE)
289 .map_err(core::convert::Into::into);
290 self.index += 1;
291 Some(arch)
292 }
293 }
294}
295
296/// Iterator over every `MachO` binary contained in this `MultiArch` container
297pub struct MachOIterator<'a> {
298 index: usize,
299 data: &'a [u8],
300 narches: usize,
301 start: usize,
302}
303
304impl<'a> Iterator for MachOIterator<'a> {
305 type Item = error::Result<MachO<'a>>;
306 fn next(&mut self) -> Option<Self::Item> {
307 if self.index >= self.narches {
308 None
309 } else {
310 let index = self.index;
311 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
312 self.index += 1;
313 match self.data.pread_with::<fat::FatArch>(offset, scroll::BE) {
314 Ok(arch) => {
315 let bytes = arch.slice(self.data);
316 let binary = MachO::parse(bytes, 0);
317 Some(binary)
318 }
319 Err(e) => Some(Err(e.into())),
320 }
321 }
322 }
323}
324
325impl<'a, 'b> IntoIterator for &'b MultiArch<'a> {
326 type Item = error::Result<MachO<'a>>;
327 type IntoIter = MachOIterator<'a>;
328 fn into_iter(self) -> Self::IntoIter {
329 MachOIterator {
330 index: 0,
331 data: self.data,
332 narches: self.narches,
333 start: self.start,
334 }
335 }
336}
337
338impl<'a> MultiArch<'a> {
339 /// Lazily construct `Self`
340 pub fn new(bytes: &'a [u8]) -> error::Result<Self> {
341 let header = fat::FatHeader::parse(bytes)?;
342 Ok(MultiArch {
343 data: bytes,
344 start: fat::SIZEOF_FAT_HEADER,
345 narches: header.nfat_arch as usize,
346 })
347 }
348 /// Iterate every fat arch header
349 pub fn iter_arches(&self) -> FatArchIterator {
350 FatArchIterator {
351 index: 0,
352 data: self.data,
353 narches: self.narches,
354 start: self.start,
355 }
356 }
357 /// Return all the architectures in this binary
358 pub fn arches(&self) -> error::Result<Vec<fat::FatArch>> {
359 let mut arches = Vec::with_capacity(self.narches);
360 for arch in self.iter_arches() {
361 arches.push(arch?);
362 }
363 Ok(arches)
364 }
365 /// Try to get the Mach-o binary at `index`
366 pub fn get(&self, index: usize) -> error::Result<MachO<'a>> {
367 if index >= self.narches {
368 return Err(error::Error::Malformed(format!(
369 "Requested the {}-th binary, but there are only {} architectures in this container",
370 index, self.narches
371 )));
372 }
373 let offset = (index * fat::SIZEOF_FAT_ARCH) + self.start;
374 let arch = self.data.pread_with::<fat::FatArch>(offset, scroll::BE)?;
375 let bytes = arch.slice(self.data);
376 Ok(MachO::parse(bytes, 0)?)
377 }
378
379 pub fn find<F: Fn(error::Result<fat::FatArch>) -> bool>(
380 &'a self,
381 f: F,
382 ) -> Option<error::Result<MachO<'a>>> {
383 for (i, arch) in self.iter_arches().enumerate() {
384 if f(arch) {
385 return Some(self.get(i));
386 }
387 }
388 None
389 }
390 /// Try and find the `cputype` in `Self`, if there is one
391 pub fn find_cputype(&self, cputype: u32) -> error::Result<Option<fat::FatArch>> {
392 for arch in self.iter_arches() {
393 let arch = arch?;
394 if arch.cputype == cputype {
395 return Ok(Some(arch));
396 }
397 }
398 Ok(None)
399 }
400}
401
402impl<'a> fmt::Debug for MultiArch<'a> {
403 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
404 fmt.debug_struct("MultiArch")
405 .field("arches", &self.arches().unwrap())
406 .field("data", &self.data.len())
407 .finish()
408 }
409}
410
411#[derive(Debug)]
412#[allow(clippy::large_enum_variant)]
413/// Either a collection of multiple architectures, or a single mach-o binary
414pub enum Mach<'a> {
415 /// A "fat" multi-architecture binary container
416 Fat(MultiArch<'a>),
417 /// A regular Mach-o binary
418 Binary(MachO<'a>),
419}
420
421impl<'a> Mach<'a> {
422 /// Parse from `bytes` either a multi-arch binary or a regular mach-o binary
423 pub fn parse(bytes: &'a [u8]) -> error::Result<Self> {
424 let size = bytes.len();
425 if size < 4 {
426 let error = error::Error::Malformed("size is smaller than a magical number".into());
427 return Err(error);
428 }
429 let magic = peek(&bytes, 0)?;
430 match magic {
431 fat::FAT_MAGIC => {
432 let multi = MultiArch::new(bytes)?;
433 Ok(Mach::Fat(multi))
434 }
435 // we might be a regular binary
436 _ => {
437 let binary = MachO::parse(bytes, 0)?;
438 Ok(Mach::Binary(binary))
439 }
440 }
441 }
442}