]>
Commit | Line | Data |
---|---|---|
17df50a5 XL |
1 | //! Support for archive files. |
2 | ||
3 | use core::convert::TryInto; | |
4 | ||
5 | use crate::archive; | |
6 | use crate::read::{self, Error, ReadError, ReadRef}; | |
7 | ||
8 | /// The kind of archive format. | |
17df50a5 XL |
9 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] |
10 | #[non_exhaustive] | |
11 | pub enum ArchiveKind { | |
12 | /// There are no special files that indicate the archive format. | |
13 | Unknown, | |
14 | /// The GNU (or System V) archive format. | |
15 | Gnu, | |
136023e0 XL |
16 | /// The GNU (or System V) archive format with 64-bit symbol table. |
17 | Gnu64, | |
17df50a5 XL |
18 | /// The BSD archive format. |
19 | Bsd, | |
136023e0 XL |
20 | /// The BSD archive format with 64-bit symbol table. |
21 | /// | |
22 | /// This is used for Darwin. | |
23 | Bsd64, | |
17df50a5 XL |
24 | /// The Windows COFF archive format. |
25 | Coff, | |
26 | } | |
27 | ||
28 | /// A partially parsed archive file. | |
29 | #[derive(Debug)] | |
30 | pub struct ArchiveFile<'data, R: ReadRef<'data> = &'data [u8]> { | |
31 | data: R, | |
32 | len: u64, | |
33 | offset: u64, | |
34 | kind: ArchiveKind, | |
35 | symbols: (u64, u64), | |
36 | names: &'data [u8], | |
37 | } | |
38 | ||
39 | impl<'data, R: ReadRef<'data>> ArchiveFile<'data, R> { | |
40 | /// Parse the archive header and special members. | |
41 | pub fn parse(data: R) -> read::Result<Self> { | |
42 | let len = data.len().read_error("Unknown archive length")?; | |
43 | let mut tail = 0; | |
44 | let magic = data | |
45 | .read_bytes(&mut tail, archive::MAGIC.len() as u64) | |
46 | .read_error("Invalid archive size")?; | |
47 | if magic != &archive::MAGIC[..] { | |
48 | return Err(Error("Unsupported archive identifier")); | |
49 | } | |
50 | ||
51 | let mut file = ArchiveFile { | |
52 | data, | |
53 | offset: tail, | |
54 | len, | |
55 | kind: ArchiveKind::Unknown, | |
56 | symbols: (0, 0), | |
57 | names: &[], | |
58 | }; | |
59 | ||
60 | // The first few members may be special, so parse them. | |
61 | // GNU has: | |
136023e0 | 62 | // - "/" or "/SYM64/": symbol table (optional) |
17df50a5 XL |
63 | // - "//": names table (optional) |
64 | // COFF has: | |
65 | // - "/": first linker member | |
66 | // - "/": second linker member | |
67 | // - "//": names table | |
68 | // BSD has: | |
69 | // - "__.SYMDEF" or "__.SYMDEF SORTED": symbol table (optional) | |
136023e0 XL |
70 | // BSD 64-bit has: |
71 | // - "__.SYMDEF_64" or "__.SYMDEF_64 SORTED": symbol table (optional) | |
72 | // BSD may use the extended name for the symbol table. This is handled | |
73 | // by `ArchiveMember::parse`. | |
17df50a5 XL |
74 | if tail < len { |
75 | let member = ArchiveMember::parse(data, &mut tail, &[])?; | |
76 | if member.name == b"/" { | |
77 | // GNU symbol table (unless we later determine this is COFF). | |
78 | file.kind = ArchiveKind::Gnu; | |
79 | file.symbols = member.file_range(); | |
80 | file.offset = tail; | |
81 | ||
82 | if tail < len { | |
83 | let member = ArchiveMember::parse(data, &mut tail, &[])?; | |
84 | if member.name == b"/" { | |
85 | // COFF linker member. | |
86 | file.kind = ArchiveKind::Coff; | |
87 | file.symbols = member.file_range(); | |
88 | file.offset = tail; | |
89 | ||
90 | if tail < len { | |
91 | let member = ArchiveMember::parse(data, &mut tail, &[])?; | |
92 | if member.name == b"//" { | |
93 | // COFF names table. | |
94 | file.names = member.data(data)?; | |
95 | file.offset = tail; | |
96 | } | |
97 | } | |
98 | } else if member.name == b"//" { | |
99 | // GNU names table. | |
100 | file.names = member.data(data)?; | |
101 | file.offset = tail; | |
102 | } | |
103 | } | |
136023e0 XL |
104 | } else if member.name == b"/SYM64/" { |
105 | // GNU 64-bit symbol table. | |
106 | file.kind = ArchiveKind::Gnu64; | |
107 | file.symbols = member.file_range(); | |
108 | file.offset = tail; | |
109 | ||
110 | if tail < len { | |
111 | let member = ArchiveMember::parse(data, &mut tail, &[])?; | |
112 | if member.name == b"//" { | |
113 | // GNU names table. | |
114 | file.names = member.data(data)?; | |
115 | file.offset = tail; | |
116 | } | |
117 | } | |
17df50a5 XL |
118 | } else if member.name == b"//" { |
119 | // GNU names table. | |
120 | file.kind = ArchiveKind::Gnu; | |
121 | file.names = member.data(data)?; | |
122 | file.offset = tail; | |
123 | } else if member.name == b"__.SYMDEF" || member.name == b"__.SYMDEF SORTED" { | |
124 | // BSD symbol table. | |
125 | file.kind = ArchiveKind::Bsd; | |
126 | file.symbols = member.file_range(); | |
127 | file.offset = tail; | |
136023e0 XL |
128 | } else if member.name == b"__.SYMDEF_64" || member.name == b"__.SYMDEF_64 SORTED" { |
129 | // BSD 64-bit symbol table. | |
130 | file.kind = ArchiveKind::Bsd64; | |
131 | file.symbols = member.file_range(); | |
132 | file.offset = tail; | |
17df50a5 XL |
133 | } else { |
134 | // TODO: This could still be a BSD file. We leave this as unknown for now. | |
135 | } | |
136 | } | |
137 | Ok(file) | |
138 | } | |
139 | ||
140 | /// Return the archive format. | |
141 | #[inline] | |
142 | pub fn kind(&self) -> ArchiveKind { | |
143 | self.kind | |
144 | } | |
145 | ||
146 | /// Iterate over the members of the archive. | |
147 | /// | |
148 | /// This does not return special members. | |
149 | #[inline] | |
150 | pub fn members(&self) -> ArchiveMemberIterator<'data, R> { | |
151 | ArchiveMemberIterator { | |
152 | data: self.data, | |
153 | offset: self.offset, | |
154 | len: self.len, | |
155 | names: self.names, | |
156 | } | |
157 | } | |
158 | } | |
159 | ||
160 | /// An iterator over the members of an archive. | |
161 | #[derive(Debug)] | |
162 | pub struct ArchiveMemberIterator<'data, R: ReadRef<'data> = &'data [u8]> { | |
163 | data: R, | |
164 | offset: u64, | |
165 | len: u64, | |
166 | names: &'data [u8], | |
167 | } | |
168 | ||
169 | impl<'data, R: ReadRef<'data>> Iterator for ArchiveMemberIterator<'data, R> { | |
170 | type Item = read::Result<ArchiveMember<'data>>; | |
171 | ||
172 | fn next(&mut self) -> Option<Self::Item> { | |
173 | if self.offset >= self.len { | |
174 | return None; | |
175 | } | |
176 | let member = ArchiveMember::parse(self.data, &mut self.offset, self.names); | |
177 | if member.is_err() { | |
178 | self.offset = self.len; | |
179 | } | |
180 | Some(member) | |
181 | } | |
182 | } | |
183 | ||
184 | /// A partially parsed archive member. | |
185 | #[derive(Debug)] | |
186 | pub struct ArchiveMember<'data> { | |
187 | header: &'data archive::Header, | |
188 | name: &'data [u8], | |
189 | offset: u64, | |
190 | size: u64, | |
191 | } | |
192 | ||
193 | impl<'data> ArchiveMember<'data> { | |
194 | /// Parse the archive member header, name, and file data. | |
195 | /// | |
196 | /// This reads the extended name (if any) and adjusts the file size. | |
197 | fn parse<R: ReadRef<'data>>( | |
198 | data: R, | |
199 | offset: &mut u64, | |
200 | names: &'data [u8], | |
201 | ) -> read::Result<Self> { | |
202 | let header = data | |
203 | .read::<archive::Header>(offset) | |
204 | .read_error("Invalid archive member header")?; | |
205 | if header.terminator != archive::TERMINATOR { | |
206 | return Err(Error("Invalid archive terminator")); | |
207 | } | |
208 | ||
209 | let mut file_offset = *offset; | |
210 | let mut file_size = | |
211 | parse_u64_digits(&header.size, 10).read_error("Invalid archive member size")?; | |
212 | *offset = offset | |
213 | .checked_add(file_size) | |
214 | .read_error("Archive member size is too large")?; | |
215 | // Entries are padded to an even number of bytes. | |
216 | if (file_size & 1) != 0 { | |
217 | *offset = offset.saturating_add(1); | |
218 | } | |
219 | ||
220 | let name = if header.name[0] == b'/' && (header.name[1] as char).is_digit(10) { | |
221 | // Read file name from the names table. | |
222 | parse_sysv_extended_name(&header.name[1..], names) | |
223 | .read_error("Invalid archive extended name offset")? | |
224 | } else if &header.name[..3] == b"#1/" && (header.name[3] as char).is_digit(10) { | |
225 | // Read file name from the start of the file data. | |
226 | parse_bsd_extended_name(&header.name[3..], data, &mut file_offset, &mut file_size) | |
227 | .read_error("Invalid archive extended name length")? | |
228 | } else if header.name[0] == b'/' { | |
229 | let name_len = memchr::memchr(b' ', &header.name).unwrap_or(header.name.len()); | |
230 | &header.name[..name_len] | |
231 | } else { | |
232 | let name_len = memchr::memchr(b'/', &header.name) | |
233 | .or_else(|| memchr::memchr(b' ', &header.name)) | |
234 | .unwrap_or(header.name.len()); | |
235 | &header.name[..name_len] | |
236 | }; | |
237 | ||
238 | Ok(ArchiveMember { | |
239 | header, | |
240 | name, | |
241 | offset: file_offset, | |
242 | size: file_size, | |
243 | }) | |
244 | } | |
245 | ||
246 | /// Return the raw header. | |
247 | #[inline] | |
248 | pub fn header(&self) -> &'data archive::Header { | |
249 | self.header | |
250 | } | |
251 | ||
252 | /// Return the parsed file name. | |
253 | /// | |
254 | /// This may be an extended file name. | |
255 | #[inline] | |
256 | pub fn name(&self) -> &'data [u8] { | |
257 | self.name | |
258 | } | |
259 | ||
260 | /// Parse the file modification timestamp from the header. | |
261 | #[inline] | |
262 | pub fn date(&self) -> Option<u64> { | |
263 | parse_u64_digits(&self.header.date, 10) | |
264 | } | |
265 | ||
266 | /// Parse the user ID from the header. | |
267 | #[inline] | |
268 | pub fn uid(&self) -> Option<u64> { | |
269 | parse_u64_digits(&self.header.uid, 10) | |
270 | } | |
271 | ||
272 | /// Parse the group ID from the header. | |
273 | #[inline] | |
274 | pub fn gid(&self) -> Option<u64> { | |
275 | parse_u64_digits(&self.header.gid, 10) | |
276 | } | |
277 | ||
278 | /// Parse the file mode from the header. | |
279 | #[inline] | |
280 | pub fn mode(&self) -> Option<u64> { | |
281 | parse_u64_digits(&self.header.mode, 8) | |
282 | } | |
283 | ||
284 | /// Return the offset and size of the file data. | |
285 | pub fn file_range(&self) -> (u64, u64) { | |
286 | (self.offset, self.size) | |
287 | } | |
288 | ||
289 | /// Return the file data. | |
290 | #[inline] | |
291 | pub fn data<R: ReadRef<'data>>(&self, data: R) -> read::Result<&'data [u8]> { | |
292 | data.read_bytes_at(self.offset, self.size) | |
293 | .read_error("Archive member size is too large") | |
294 | } | |
295 | } | |
296 | ||
297 | // Ignores bytes starting from the first space. | |
298 | fn parse_u64_digits(digits: &[u8], radix: u32) -> Option<u64> { | |
299 | if let [b' ', ..] = digits { | |
300 | return None; | |
301 | } | |
302 | let mut result: u64 = 0; | |
303 | for &c in digits { | |
304 | if c == b' ' { | |
305 | return Some(result); | |
306 | } else { | |
307 | let x = (c as char).to_digit(radix)?; | |
308 | result = result | |
309 | .checked_mul(u64::from(radix))? | |
310 | .checked_add(u64::from(x))?; | |
311 | } | |
312 | } | |
313 | Some(result) | |
314 | } | |
315 | ||
316 | fn parse_sysv_extended_name<'data>(digits: &[u8], names: &'data [u8]) -> Result<&'data [u8], ()> { | |
317 | let offset = parse_u64_digits(digits, 10).ok_or(())?; | |
318 | let offset = offset.try_into().map_err(|_| ())?; | |
319 | let name_data = names.get(offset..).ok_or(())?; | |
320 | let name = match memchr::memchr2(b'/', b'\0', name_data) { | |
321 | Some(len) => &name_data[..len], | |
322 | None => name_data, | |
323 | }; | |
324 | Ok(name) | |
325 | } | |
326 | ||
327 | /// Modifies `data` to start after the extended name. | |
328 | fn parse_bsd_extended_name<'data, R: ReadRef<'data>>( | |
329 | digits: &[u8], | |
330 | data: R, | |
331 | offset: &mut u64, | |
332 | size: &mut u64, | |
333 | ) -> Result<&'data [u8], ()> { | |
334 | let len = parse_u64_digits(digits, 10).ok_or(())?; | |
335 | *size = size.checked_sub(len).ok_or(())?; | |
336 | let name_data = data.read_bytes(offset, len)?; | |
337 | let name = match memchr::memchr(b'\0', name_data) { | |
338 | Some(len) => &name_data[..len], | |
339 | None => name_data, | |
340 | }; | |
341 | Ok(name) | |
342 | } | |
343 | ||
344 | #[cfg(test)] | |
345 | mod tests { | |
346 | use super::*; | |
347 | ||
348 | #[test] | |
349 | fn kind() { | |
350 | let data = b"!<arch>\n"; | |
351 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
352 | assert_eq!(archive.kind(), ArchiveKind::Unknown); | |
353 | ||
354 | let data = b"\ | |
355 | !<arch>\n\ | |
356 | / 4 `\n\ | |
357 | 0000"; | |
358 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
359 | assert_eq!(archive.kind(), ArchiveKind::Gnu); | |
360 | ||
361 | let data = b"\ | |
362 | !<arch>\n\ | |
363 | // 4 `\n\ | |
364 | 0000"; | |
365 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
366 | assert_eq!(archive.kind(), ArchiveKind::Gnu); | |
367 | ||
368 | let data = b"\ | |
369 | !<arch>\n\ | |
370 | / 4 `\n\ | |
371 | 0000\ | |
372 | // 4 `\n\ | |
373 | 0000"; | |
374 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
375 | assert_eq!(archive.kind(), ArchiveKind::Gnu); | |
376 | ||
136023e0 XL |
377 | let data = b"\ |
378 | !<arch>\n\ | |
379 | /SYM64/ 4 `\n\ | |
380 | 0000"; | |
381 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
382 | assert_eq!(archive.kind(), ArchiveKind::Gnu64); | |
383 | ||
384 | let data = b"\ | |
385 | !<arch>\n\ | |
386 | /SYM64/ 4 `\n\ | |
387 | 0000\ | |
388 | // 4 `\n\ | |
389 | 0000"; | |
390 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
391 | assert_eq!(archive.kind(), ArchiveKind::Gnu64); | |
392 | ||
17df50a5 XL |
393 | let data = b"\ |
394 | !<arch>\n\ | |
395 | __.SYMDEF 4 `\n\ | |
396 | 0000"; | |
397 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
398 | assert_eq!(archive.kind(), ArchiveKind::Bsd); | |
399 | ||
400 | let data = b"\ | |
401 | !<arch>\n\ | |
402 | #1/9 13 `\n\ | |
403 | __.SYMDEF0000"; | |
404 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
405 | assert_eq!(archive.kind(), ArchiveKind::Bsd); | |
406 | ||
407 | let data = b"\ | |
408 | !<arch>\n\ | |
409 | #1/16 20 `\n\ | |
410 | __.SYMDEF SORTED0000"; | |
411 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
412 | assert_eq!(archive.kind(), ArchiveKind::Bsd); | |
413 | ||
136023e0 XL |
414 | let data = b"\ |
415 | !<arch>\n\ | |
416 | __.SYMDEF_64 4 `\n\ | |
417 | 0000"; | |
418 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
419 | assert_eq!(archive.kind(), ArchiveKind::Bsd64); | |
420 | ||
421 | let data = b"\ | |
422 | !<arch>\n\ | |
423 | #1/12 16 `\n\ | |
424 | __.SYMDEF_640000"; | |
425 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
426 | assert_eq!(archive.kind(), ArchiveKind::Bsd64); | |
427 | ||
428 | let data = b"\ | |
429 | !<arch>\n\ | |
430 | #1/19 23 `\n\ | |
431 | __.SYMDEF_64 SORTED0000"; | |
432 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
433 | assert_eq!(archive.kind(), ArchiveKind::Bsd64); | |
434 | ||
17df50a5 XL |
435 | let data = b"\ |
436 | !<arch>\n\ | |
437 | / 4 `\n\ | |
438 | 0000\ | |
439 | / 4 `\n\ | |
440 | 0000\ | |
441 | // 4 `\n\ | |
442 | 0000"; | |
443 | let archive = ArchiveFile::parse(&data[..]).unwrap(); | |
444 | assert_eq!(archive.kind(), ArchiveKind::Coff); | |
445 | } | |
446 | ||
447 | #[test] | |
448 | fn gnu_names() { | |
449 | let data = b"\ | |
450 | !<arch>\n\ | |
451 | // 18 `\n\ | |
452 | 0123456789abcdef/\n\ | |
453 | s p a c e/ 0 0 0 644 4 `\n\ | |
454 | 0000\ | |
455 | 0123456789abcde/0 0 0 644 3 `\n\ | |
456 | odd\n\ | |
457 | /0 0 0 0 644 4 `\n\ | |
458 | even"; | |
459 | let data = &data[..]; | |
460 | let archive = ArchiveFile::parse(data).unwrap(); | |
461 | assert_eq!(archive.kind(), ArchiveKind::Gnu); | |
462 | let mut members = archive.members(); | |
463 | ||
464 | let member = members.next().unwrap().unwrap(); | |
465 | assert_eq!(member.name(), b"s p a c e"); | |
466 | assert_eq!(member.data(data).unwrap(), &b"0000"[..]); | |
467 | ||
468 | let member = members.next().unwrap().unwrap(); | |
469 | assert_eq!(member.name(), b"0123456789abcde"); | |
470 | assert_eq!(member.data(data).unwrap(), &b"odd"[..]); | |
471 | ||
472 | let member = members.next().unwrap().unwrap(); | |
473 | assert_eq!(member.name(), b"0123456789abcdef"); | |
474 | assert_eq!(member.data(data).unwrap(), &b"even"[..]); | |
475 | ||
476 | assert!(members.next().is_none()); | |
477 | } | |
478 | ||
479 | #[test] | |
480 | fn bsd_names() { | |
481 | let data = b"\ | |
482 | !<arch>\n\ | |
483 | 0123456789abcde 0 0 0 644 3 `\n\ | |
484 | odd\n\ | |
485 | #1/16 0 0 0 644 20 `\n\ | |
486 | 0123456789abcdefeven"; | |
487 | let data = &data[..]; | |
488 | let archive = ArchiveFile::parse(data).unwrap(); | |
489 | assert_eq!(archive.kind(), ArchiveKind::Unknown); | |
490 | let mut members = archive.members(); | |
491 | ||
492 | let member = members.next().unwrap().unwrap(); | |
493 | assert_eq!(member.name(), b"0123456789abcde"); | |
494 | assert_eq!(member.data(data).unwrap(), &b"odd"[..]); | |
495 | ||
496 | let member = members.next().unwrap().unwrap(); | |
497 | assert_eq!(member.name(), b"0123456789abcdef"); | |
498 | assert_eq!(member.data(data).unwrap(), &b"even"[..]); | |
499 | ||
500 | assert!(members.next().is_none()); | |
501 | } | |
502 | } |