]>
Commit | Line | Data |
---|---|---|
6cd4f635 WB |
1 | //! The `pxar` decoder state machine. |
2 | //! | |
3 | //! This is the implementation used by both the synchronous and async pxar wrappers. | |
4 | ||
e5a2495e WB |
5 | #![deny(missing_docs)] |
6 | ||
6cd4f635 WB |
7 | use std::ffi::OsString; |
8 | use std::io; | |
9 | use std::mem::{self, size_of, size_of_val, MaybeUninit}; | |
10 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; | |
11 | use std::path::{Path, PathBuf}; | |
12 | use std::pin::Pin; | |
13 | use std::task::{Context, Poll}; | |
14 | ||
15 | //use std::os::unix::fs::FileExt; | |
16 | ||
17 | use endian_trait::Endian; | |
18 | ||
19 | use crate::format::{self, Header}; | |
20 | use crate::poll_fn::poll_fn; | |
21 | use crate::util::{self, io_err_other}; | |
22 | use crate::{Entry, EntryKind, Metadata}; | |
23 | ||
24 | pub mod aio; | |
25 | pub mod sync; | |
26 | ||
27 | #[doc(inline)] | |
28 | pub use sync::Decoder; | |
29 | ||
30 | /// To skip through non-seekable files. | |
31 | static mut SCRATCH_BUFFER: MaybeUninit<[u8; 4096]> = MaybeUninit::uninit(); | |
32 | ||
33 | fn scratch_buffer() -> &'static mut [u8] { | |
34 | unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] } | |
35 | } | |
36 | ||
37 | /// Sequential read interface used by the decoder's state machine. | |
38 | /// | |
39 | /// To simply iterate through a directory we just need the equivalent of `poll_read()`. | |
40 | /// | |
41 | /// Currently we also have a `poll_position()` method which can be added for types supporting | |
42 | /// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available | |
43 | /// (accessible via the `Entry::offset()`), to allow jumping between entries. | |
44 | pub trait SeqRead { | |
45 | /// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent. | |
46 | fn poll_seq_read( | |
47 | self: Pin<&mut Self>, | |
48 | cx: &mut Context, | |
49 | buf: &mut [u8], | |
50 | ) -> Poll<io::Result<usize>>; | |
51 | ||
52 | /// While going through the data we may want to take notes about some offsets within the file | |
53 | /// for later. If the reader does not support seeking or positional reading, this can just | |
54 | /// return `None`. | |
55 | fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> { | |
56 | Poll::Ready(None) | |
57 | } | |
58 | } | |
59 | ||
60 | /// Allow using trait objects for generics taking a `SeqRead`: | |
61 | impl<'a> SeqRead for &mut (dyn SeqRead + 'a) { | |
62 | fn poll_seq_read( | |
63 | self: Pin<&mut Self>, | |
64 | cx: &mut Context, | |
65 | buf: &mut [u8], | |
66 | ) -> Poll<io::Result<usize>> { | |
67 | unsafe { | |
68 | self.map_unchecked_mut(|this| &mut **this) | |
69 | .poll_seq_read(cx, buf) | |
70 | } | |
71 | } | |
72 | ||
73 | fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> { | |
74 | unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) } | |
75 | } | |
76 | } | |
77 | ||
951620f1 | 78 | /// awaitable version of `poll_position`. |
710e6c8b | 79 | async fn seq_read_position<T: SeqRead + ?Sized>(input: &mut T) -> Option<io::Result<u64>> { |
951620f1 WB |
80 | poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_position(cx) }).await |
81 | } | |
6cd4f635 | 82 | |
951620f1 WB |
83 | /// awaitable version of `poll_seq_read`. |
84 | pub(crate) async fn seq_read<T: SeqRead + ?Sized>( | |
85 | input: &mut T, | |
86 | buf: &mut [u8], | |
87 | ) -> io::Result<usize> { | |
88 | poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_seq_read(cx, buf) }).await | |
89 | } | |
6cd4f635 | 90 | |
951620f1 WB |
91 | /// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling |
92 | async fn seq_read_exact_or_eof<T>(input: &mut T, mut buf: &mut [u8]) -> io::Result<Option<()>> | |
93 | where | |
94 | T: SeqRead + ?Sized, | |
95 | { | |
96 | let mut eof_ok = true; | |
97 | while !buf.is_empty() { | |
98 | match seq_read(&mut *input, buf).await? { | |
99 | 0 if eof_ok => return Ok(None), | |
100 | 0 => io_bail!("unexpected EOF"), | |
101 | got => buf = &mut buf[got..], | |
6cd4f635 | 102 | } |
951620f1 | 103 | eof_ok = false; |
6cd4f635 | 104 | } |
951620f1 WB |
105 | Ok(Some(())) |
106 | } | |
6cd4f635 | 107 | |
951620f1 WB |
108 | /// `read_exact` - since that's what we _actually_ want most of the time. |
109 | async fn seq_read_exact<T: SeqRead + ?Sized>(input: &mut T, buf: &mut [u8]) -> io::Result<()> { | |
110 | match seq_read_exact_or_eof(input, buf).await? { | |
111 | Some(()) => Ok(()), | |
a191c6a7 | 112 | None => io_bail!("unexpected EOF"), |
6cd4f635 | 113 | } |
951620f1 | 114 | } |
6cd4f635 | 115 | |
951620f1 WB |
116 | /// Helper to read into an allocated byte vector. |
117 | async fn seq_read_exact_data<T>(input: &mut T, size: usize) -> io::Result<Vec<u8>> | |
118 | where | |
119 | T: SeqRead + ?Sized, | |
120 | { | |
81d50029 | 121 | let mut data = unsafe { util::vec_new_uninitialized(size) }; |
951620f1 WB |
122 | seq_read_exact(input, &mut data[..]).await?; |
123 | Ok(data) | |
124 | } | |
6cd4f635 | 125 | |
951620f1 WB |
126 | /// `seq_read_entry` with EOF handling |
127 | async fn seq_read_entry_or_eof<T, E>(input: &mut T) -> io::Result<Option<E>> | |
128 | where | |
129 | T: SeqRead + ?Sized, | |
130 | E: Endian, | |
131 | { | |
132 | let mut data = MaybeUninit::<E>::uninit(); | |
133 | let buf = | |
134 | unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) }; | |
135 | if seq_read_exact_or_eof(input, buf).await?.is_none() { | |
136 | return Ok(None); | |
6cd4f635 | 137 | } |
951620f1 WB |
138 | Ok(Some(unsafe { data.assume_init().from_le() })) |
139 | } | |
6cd4f635 | 140 | |
951620f1 WB |
141 | /// Helper to read into an `Endian`-implementing `struct`. |
142 | async fn seq_read_entry<T: SeqRead + ?Sized, E: Endian>(input: &mut T) -> io::Result<E> { | |
143 | seq_read_entry_or_eof(input) | |
144 | .await? | |
a191c6a7 | 145 | .ok_or_else(|| io_format_err!("unexpected EOF")) |
6cd4f635 WB |
146 | } |
147 | ||
148 | /// The decoder state machine implementation. | |
149 | /// | |
150 | /// We use `async fn` to implement the decoder state machine so that we can easily plug in both | |
151 | /// synchronous or `async` I/O objects in as input. | |
5cf335be | 152 | pub(crate) struct DecoderImpl<T> { |
06070d26 | 153 | pub(crate) input: T, |
fd99ae79 | 154 | current_header: Header, |
6cd4f635 | 155 | entry: Entry, |
fd99ae79 | 156 | path_lengths: Vec<usize>, |
6cd4f635 WB |
157 | state: State, |
158 | with_goodbye_tables: bool, | |
318462ea WB |
159 | |
160 | /// The random access code uses decoders for sub-ranges which may not end in a `PAYLOAD` for | |
161 | /// entries like FIFOs or sockets, so there we explicitly allow an item to terminate with EOF. | |
162 | eof_after_entry: bool, | |
6cd4f635 WB |
163 | } |
164 | ||
165 | enum State { | |
166 | Begin, | |
167 | Default, | |
2287d8b2 WB |
168 | InPayload { |
169 | offset: u64, | |
170 | }, | |
171 | ||
172 | /// file entries with no data (fifo, socket) | |
173 | InSpecialFile, | |
174 | ||
f7b824c3 | 175 | InGoodbyeTable, |
6cd4f635 WB |
176 | InDirectory, |
177 | Eof, | |
178 | } | |
179 | ||
180 | /// Control flow while parsing items. | |
181 | /// | |
182 | /// When parsing an entry, we usually go through all of its attribute items. Once we reach the end | |
183 | /// of the entry we stop. | |
184 | /// Note that if we're in a directory, we stopped at the beginning of its contents. | |
185 | #[derive(Clone, Copy, Debug, Eq, PartialEq)] | |
06070d26 | 186 | pub(crate) enum ItemResult { |
6cd4f635 WB |
187 | /// We parsed an "attribute" item and should continue parsing. |
188 | Attribute, | |
189 | ||
190 | /// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a | |
191 | /// directory (`FILENAME`, `GOODBYE`). | |
192 | /// | |
193 | /// We stop moving forward at this point. | |
194 | Entry, | |
195 | } | |
196 | ||
0a00a48c WB |
197 | impl<I: SeqRead> DecoderImpl<I> { |
198 | pub async fn new(input: I) -> io::Result<Self> { | |
318462ea | 199 | Self::new_full(input, "/".into(), false).await |
6cd4f635 WB |
200 | } |
201 | ||
1568a754 DM |
202 | pub(crate) fn input(&self) -> &I { |
203 | &self.input | |
204 | } | |
205 | ||
318462ea WB |
206 | pub(crate) async fn new_full( |
207 | input: I, | |
208 | path: PathBuf, | |
209 | eof_after_entry: bool, | |
210 | ) -> io::Result<Self> { | |
6cd4f635 WB |
211 | let this = DecoderImpl { |
212 | input, | |
213 | current_header: unsafe { mem::zeroed() }, | |
214 | entry: Entry { | |
215 | path, | |
27631c16 | 216 | kind: EntryKind::GoodbyeTable, |
6cd4f635 | 217 | metadata: Metadata::default(), |
6cd4f635 WB |
218 | }, |
219 | path_lengths: Vec::new(), | |
220 | state: State::Begin, | |
221 | with_goodbye_tables: false, | |
318462ea | 222 | eof_after_entry, |
6cd4f635 WB |
223 | }; |
224 | ||
225 | // this.read_next_entry().await?; | |
226 | ||
227 | Ok(this) | |
228 | } | |
229 | ||
230 | /// Get the next file entry, recursing into directories. | |
231 | pub async fn next(&mut self) -> Option<io::Result<Entry>> { | |
232 | self.next_do().await.transpose() | |
233 | } | |
234 | ||
fd99ae79 | 235 | async fn next_do(&mut self) -> io::Result<Option<Entry>> { |
6cd4f635 WB |
236 | loop { |
237 | match self.state { | |
238 | State::Eof => return Ok(None), | |
239 | State::Begin => return self.read_next_entry().await.map(Some), | |
240 | State::Default => { | |
241 | // we completely finished an entry, so now we're going "up" in the directory | |
242 | // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE: | |
243 | self.read_next_item().await?; | |
244 | } | |
8eb622dd | 245 | State::InPayload { offset } => { |
6cd4f635 | 246 | // We need to skip the current payload first. |
6100072b | 247 | self.skip_entry(offset).await?; |
6cd4f635 WB |
248 | self.read_next_item().await?; |
249 | } | |
f7b824c3 WB |
250 | State::InGoodbyeTable => { |
251 | self.skip_entry(0).await?; | |
4c42ef2e WB |
252 | if self.path_lengths.pop().is_none() { |
253 | // The root directory has an entry containing '1'. | |
254 | io_bail!("unexpected EOF in goodbye table"); | |
f7b824c3 | 255 | } |
4c42ef2e WB |
256 | |
257 | if self.path_lengths.is_empty() { | |
258 | // we are at the end of the archive now | |
259 | self.state = State::Eof; | |
260 | return Ok(None); | |
261 | } | |
262 | ||
263 | // We left the directory, now keep going in our parent. | |
264 | self.state = State::Default; | |
265 | continue; | |
f7b824c3 | 266 | } |
2287d8b2 WB |
267 | State::InSpecialFile => { |
268 | self.entry.clear_data(); | |
269 | self.state = State::InDirectory; | |
270 | self.entry.kind = EntryKind::Directory; | |
271 | } | |
6cd4f635 WB |
272 | State::InDirectory => { |
273 | // We're at the next FILENAME or GOODBYE item. | |
274 | } | |
275 | } | |
276 | ||
277 | match self.current_header.htype { | |
278 | format::PXAR_FILENAME => return self.handle_file_entry().await, | |
279 | format::PXAR_GOODBYE => { | |
f7b824c3 WB |
280 | self.state = State::InGoodbyeTable; |
281 | ||
6cd4f635 | 282 | if self.with_goodbye_tables { |
af356979 WB |
283 | self.entry.clear_data(); |
284 | return Ok(Some(Entry { | |
285 | path: PathBuf::new(), | |
286 | metadata: Metadata::default(), | |
287 | kind: EntryKind::GoodbyeTable, | |
288 | })); | |
6cd4f635 | 289 | } else { |
f7b824c3 WB |
290 | // go up to goodbye table handling |
291 | continue; | |
6cd4f635 WB |
292 | } |
293 | } | |
4a13b8a3 FG |
294 | _ => io_bail!( |
295 | "expected filename or directory-goodbye pxar entry, got: {}", | |
296 | self.current_header, | |
6cd4f635 WB |
297 | ), |
298 | } | |
299 | } | |
300 | } | |
301 | ||
3a11ff3e WB |
302 | pub fn content_size(&self) -> Option<u64> { |
303 | if let State::InPayload { .. } = self.state { | |
304 | Some(self.current_header.content_size()) | |
305 | } else { | |
306 | None | |
307 | } | |
308 | } | |
309 | ||
1b25fc08 | 310 | pub fn content_reader(&mut self) -> Option<Contents<I>> { |
6100072b WB |
311 | if let State::InPayload { offset } = &mut self.state { |
312 | Some(Contents::new( | |
313 | &mut self.input, | |
314 | offset, | |
315 | self.current_header.content_size(), | |
316 | )) | |
317 | } else { | |
318 | None | |
319 | } | |
320 | } | |
321 | ||
6cd4f635 WB |
322 | async fn handle_file_entry(&mut self) -> io::Result<Option<Entry>> { |
323 | let mut data = self.read_entry_as_bytes().await?; | |
324 | ||
325 | // filenames are zero terminated! | |
326 | if data.pop() != Some(0) { | |
327 | io_bail!("illegal path found (missing terminating zero)"); | |
328 | } | |
f3ac1c51 WB |
329 | |
330 | crate::util::validate_filename(&data)?; | |
6cd4f635 WB |
331 | |
332 | let path = PathBuf::from(OsString::from_vec(data)); | |
333 | self.set_path(&path)?; | |
334 | self.read_next_entry().await.map(Some) | |
335 | } | |
336 | ||
337 | fn reset_path(&mut self) -> io::Result<()> { | |
338 | let path_len = *self | |
339 | .path_lengths | |
340 | .last() | |
341 | .ok_or_else(|| io_format_err!("internal decoder error: path underrun"))?; | |
342 | let mut path = mem::replace(&mut self.entry.path, PathBuf::new()) | |
343 | .into_os_string() | |
344 | .into_vec(); | |
345 | path.truncate(path_len); | |
346 | self.entry.path = PathBuf::from(OsString::from_vec(path)); | |
347 | Ok(()) | |
348 | } | |
349 | ||
350 | fn set_path(&mut self, path: &Path) -> io::Result<()> { | |
351 | self.reset_path()?; | |
352 | self.entry.path.push(path); | |
353 | Ok(()) | |
354 | } | |
355 | ||
356 | async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> { | |
357 | self.state = State::Default; | |
358 | self.entry.clear_data(); | |
359 | ||
2ab25a17 WB |
360 | let header: Header = match seq_read_entry_or_eof(&mut self.input).await? { |
361 | None => return Ok(None), | |
362 | Some(header) => header, | |
363 | }; | |
364 | ||
ec0761f9 FG |
365 | header.check_header_size()?; |
366 | ||
2ab25a17 WB |
367 | if header.htype == format::PXAR_HARDLINK { |
368 | // The only "dangling" header without an 'Entry' in front of it because it does not | |
369 | // carry its own metadata. | |
370 | self.current_header = header; | |
371 | ||
372 | // Hardlinks have no metadata and no additional items. | |
373 | self.entry.metadata = Metadata::default(); | |
374 | self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?); | |
375 | ||
376 | Ok(Some(self.entry.take())) | |
644e844d WB |
377 | } else if header.htype == format::PXAR_ENTRY || header.htype == format::PXAR_ENTRY_V1 { |
378 | if header.htype == format::PXAR_ENTRY { | |
379 | self.entry.metadata = Metadata { | |
380 | stat: seq_read_entry(&mut self.input).await?, | |
381 | ..Default::default() | |
382 | }; | |
383 | } else if header.htype == format::PXAR_ENTRY_V1 { | |
2ea8aff2 | 384 | let stat: format::Stat_V1 = seq_read_entry(&mut self.input).await?; |
644e844d WB |
385 | |
386 | self.entry.metadata = Metadata { | |
387 | stat: stat.into(), | |
388 | ..Default::default() | |
389 | }; | |
390 | } else { | |
391 | unreachable!(); | |
392 | } | |
6cd4f635 | 393 | |
2ab25a17 WB |
394 | self.current_header = unsafe { mem::zeroed() }; |
395 | ||
318462ea WB |
396 | loop { |
397 | match self.read_next_item_or_eof().await? { | |
398 | Some(ItemResult::Entry) => break, | |
399 | Some(ItemResult::Attribute) => continue, | |
a6e6873f WB |
400 | None if self.eof_after_entry => { |
401 | // Single FIFOs and sockets (as received from the Accessor) won't reach a | |
402 | // FILENAME/GOODBYE entry: | |
403 | if self.entry.metadata.is_fifo() { | |
404 | self.entry.kind = EntryKind::Fifo; | |
405 | } else if self.entry.metadata.is_socket() { | |
406 | self.entry.kind = EntryKind::Socket; | |
407 | } else { | |
408 | self.entry.kind = EntryKind::Directory; | |
409 | } | |
410 | break; | |
411 | } | |
318462ea WB |
412 | None => io_bail!("unexpected EOF in entry"), |
413 | } | |
414 | } | |
2ab25a17 WB |
415 | |
416 | if self.entry.is_dir() { | |
417 | self.path_lengths | |
418 | .push(self.entry.path.as_os_str().as_bytes().len()); | |
6cd4f635 | 419 | } |
6cd4f635 | 420 | |
2ab25a17 WB |
421 | Ok(Some(self.entry.take())) |
422 | } else { | |
710e6c8b | 423 | io_bail!("expected pxar entry of type 'Entry', got: {}", header,); |
6cd4f635 | 424 | } |
6cd4f635 WB |
425 | } |
426 | ||
427 | async fn read_next_entry(&mut self) -> io::Result<Entry> { | |
428 | self.read_next_entry_or_eof() | |
429 | .await? | |
430 | .ok_or_else(|| io_format_err!("unexpected EOF")) | |
431 | } | |
432 | ||
318462ea WB |
433 | async fn read_next_item(&mut self) -> io::Result<ItemResult> { |
434 | match self.read_next_item_or_eof().await? { | |
435 | Some(item) => Ok(item), | |
436 | None => io_bail!("unexpected EOF"), | |
437 | } | |
438 | } | |
439 | ||
440 | // NOTE: The random accessor will decode FIFOs and Sockets in a decoder instance with a ranged | |
441 | // reader so there is no PAYLOAD or GOODBYE TABLE to "end" an entry. | |
442 | // | |
06070d26 WB |
443 | // NOTE: This behavior method is also recreated in the accessor's `get_decoder_at_filename` |
444 | // function! Keep in mind when changing! | |
318462ea WB |
445 | async fn read_next_item_or_eof(&mut self) -> io::Result<Option<ItemResult>> { |
446 | match self.read_next_header_or_eof().await? { | |
447 | Some(()) => self.read_current_item().await.map(Some), | |
448 | None => Ok(None), | |
449 | } | |
6cd4f635 WB |
450 | } |
451 | ||
318462ea | 452 | async fn read_next_header_or_eof(&mut self) -> io::Result<Option<()>> { |
6cd4f635 WB |
453 | let dest = unsafe { |
454 | std::slice::from_raw_parts_mut( | |
455 | &mut self.current_header as *mut Header as *mut u8, | |
456 | size_of_val(&self.current_header), | |
457 | ) | |
458 | }; | |
ec0761f9 | 459 | |
318462ea WB |
460 | match seq_read_exact_or_eof(&mut self.input, dest).await? { |
461 | Some(()) => { | |
462 | self.current_header.check_header_size()?; | |
463 | Ok(Some(())) | |
464 | } | |
465 | None => Ok(None), | |
466 | } | |
6cd4f635 WB |
467 | } |
468 | ||
469 | /// Read the next item, the header is already loaded. | |
fd99ae79 | 470 | async fn read_current_item(&mut self) -> io::Result<ItemResult> { |
6cd4f635 WB |
471 | match self.current_header.htype { |
472 | format::PXAR_XATTR => { | |
473 | let xattr = self.read_xattr().await?; | |
474 | self.entry.metadata.xattrs.push(xattr); | |
475 | } | |
476 | format::PXAR_ACL_USER => { | |
477 | let entry = self.read_acl_user().await?; | |
478 | self.entry.metadata.acl.users.push(entry); | |
479 | } | |
480 | format::PXAR_ACL_GROUP => { | |
481 | let entry = self.read_acl_group().await?; | |
482 | self.entry.metadata.acl.groups.push(entry); | |
483 | } | |
484 | format::PXAR_ACL_GROUP_OBJ => { | |
485 | if self.entry.metadata.acl.group_obj.is_some() { | |
486 | io_bail!("multiple acl group object entries detected"); | |
487 | } | |
488 | let entry = self.read_acl_group_object().await?; | |
489 | self.entry.metadata.acl.group_obj = Some(entry); | |
490 | } | |
491 | format::PXAR_ACL_DEFAULT => { | |
492 | if self.entry.metadata.acl.default.is_some() { | |
493 | io_bail!("multiple acl default entries detected"); | |
494 | } | |
495 | let entry = self.read_acl_default().await?; | |
496 | self.entry.metadata.acl.default = Some(entry); | |
497 | } | |
498 | format::PXAR_ACL_DEFAULT_USER => { | |
499 | let entry = self.read_acl_user().await?; | |
500 | self.entry.metadata.acl.default_users.push(entry); | |
501 | } | |
502 | format::PXAR_ACL_DEFAULT_GROUP => { | |
503 | let entry = self.read_acl_group().await?; | |
504 | self.entry.metadata.acl.default_groups.push(entry); | |
505 | } | |
506 | format::PXAR_FCAPS => { | |
507 | if self.entry.metadata.fcaps.is_some() { | |
508 | io_bail!("multiple file capability entries detected"); | |
509 | } | |
510 | let entry = self.read_fcaps().await?; | |
511 | self.entry.metadata.fcaps = Some(entry); | |
512 | } | |
513 | format::PXAR_QUOTA_PROJID => { | |
514 | if self.entry.metadata.quota_project_id.is_some() { | |
515 | io_bail!("multiple quota project id entries detected"); | |
516 | } | |
517 | let entry = self.read_quota_project_id().await?; | |
518 | self.entry.metadata.quota_project_id = Some(entry); | |
519 | } | |
520 | format::PXAR_SYMLINK => { | |
521 | self.entry.kind = EntryKind::Symlink(self.read_symlink().await?); | |
522 | return Ok(ItemResult::Entry); | |
523 | } | |
2ab25a17 | 524 | format::PXAR_HARDLINK => io_bail!("encountered unexpected hardlink entry"), |
6cd4f635 WB |
525 | format::PXAR_DEVICE => { |
526 | self.entry.kind = EntryKind::Device(self.read_device().await?); | |
527 | return Ok(ItemResult::Entry); | |
528 | } | |
529 | format::PXAR_PAYLOAD => { | |
951620f1 | 530 | let offset = seq_read_position(&mut self.input).await.transpose()?; |
6cd4f635 WB |
531 | self.entry.kind = EntryKind::File { |
532 | size: self.current_header.content_size(), | |
c76d3f98 | 533 | offset, |
6cd4f635 | 534 | }; |
6100072b | 535 | self.state = State::InPayload { offset: 0 }; |
6cd4f635 WB |
536 | return Ok(ItemResult::Entry); |
537 | } | |
538 | format::PXAR_FILENAME | format::PXAR_GOODBYE => { | |
2287d8b2 WB |
539 | if self.entry.metadata.is_fifo() { |
540 | self.state = State::InSpecialFile; | |
541 | self.entry.kind = EntryKind::Fifo; | |
2287d8b2 WB |
542 | } else if self.entry.metadata.is_socket() { |
543 | self.state = State::InSpecialFile; | |
544 | self.entry.kind = EntryKind::Socket; | |
2287d8b2 WB |
545 | } else { |
546 | // As a shortcut this is copy-pasted to `next_do`'s `InSpecialFile` case. | |
547 | // Keep in mind when editing this! | |
548 | self.state = State::InDirectory; | |
549 | self.entry.kind = EntryKind::Directory; | |
2287d8b2 | 550 | } |
d4a04d53 | 551 | return Ok(ItemResult::Entry); |
6cd4f635 | 552 | } |
4a13b8a3 | 553 | _ => io_bail!("unexpected entry type: {}", self.current_header), |
6cd4f635 WB |
554 | } |
555 | ||
556 | Ok(ItemResult::Attribute) | |
557 | } | |
558 | ||
559 | // | |
560 | // Local read helpers. | |
561 | // | |
562 | // These utilize additional information and hence are not part of the `dyn SeqRead` impl. | |
563 | // | |
564 | ||
6100072b WB |
565 | async fn skip_entry(&mut self, offset: u64) -> io::Result<()> { |
566 | let mut len = self.current_header.content_size() - offset; | |
6cd4f635 WB |
567 | let scratch = scratch_buffer(); |
568 | while len >= (scratch.len() as u64) { | |
951620f1 | 569 | seq_read_exact(&mut self.input, scratch).await?; |
6cd4f635 WB |
570 | len -= scratch.len() as u64; |
571 | } | |
572 | let len = len as usize; | |
573 | if len > 0 { | |
951620f1 | 574 | seq_read_exact(&mut self.input, &mut scratch[..len]).await?; |
6cd4f635 WB |
575 | } |
576 | Ok(()) | |
577 | } | |
578 | ||
579 | async fn read_entry_as_bytes(&mut self) -> io::Result<Vec<u8>> { | |
580 | let size = usize::try_from(self.current_header.content_size()).map_err(io_err_other)?; | |
951620f1 | 581 | let data = seq_read_exact_data(&mut self.input, size).await?; |
6cd4f635 WB |
582 | Ok(data) |
583 | } | |
584 | ||
585 | /// Helper to read a struct entry while checking its size. | |
0a00a48c | 586 | async fn read_simple_entry<T: Endian + 'static>( |
6cd4f635 WB |
587 | &mut self, |
588 | what: &'static str, | |
0a00a48c WB |
589 | ) -> io::Result<T> { |
590 | if self.current_header.content_size() != (size_of::<T>() as u64) { | |
6cd4f635 WB |
591 | io_bail!( |
592 | "bad {} size: {} (expected {})", | |
593 | what, | |
594 | self.current_header.content_size(), | |
0a00a48c | 595 | size_of::<T>(), |
6cd4f635 WB |
596 | ); |
597 | } | |
951620f1 | 598 | seq_read_entry(&mut self.input).await |
6cd4f635 WB |
599 | } |
600 | ||
601 | // | |
602 | // Read functions for PXAR components. | |
603 | // | |
604 | ||
605 | async fn read_xattr(&mut self) -> io::Result<format::XAttr> { | |
606 | let data = self.read_entry_as_bytes().await?; | |
607 | ||
608 | let name_len = data | |
609 | .iter() | |
610 | .position(|c| *c == 0) | |
611 | .ok_or_else(|| io_format_err!("missing value separator in xattr"))?; | |
612 | ||
613 | Ok(format::XAttr { data, name_len }) | |
614 | } | |
615 | ||
616 | async fn read_symlink(&mut self) -> io::Result<format::Symlink> { | |
617 | let data = self.read_entry_as_bytes().await?; | |
618 | Ok(format::Symlink { data }) | |
619 | } | |
620 | ||
621 | async fn read_hardlink(&mut self) -> io::Result<format::Hardlink> { | |
28be6927 WB |
622 | let content_size = |
623 | usize::try_from(self.current_header.content_size()).map_err(io_err_other)?; | |
624 | ||
625 | if content_size <= size_of::<u64>() { | |
626 | io_bail!("bad hardlink entry (too small)"); | |
627 | } | |
628 | let data_size = content_size - size_of::<u64>(); | |
629 | ||
630 | let offset: u64 = seq_read_entry(&mut self.input).await?; | |
631 | let data = seq_read_exact_data(&mut self.input, data_size).await?; | |
632 | ||
b0752929 | 633 | Ok(format::Hardlink { offset, data }) |
6cd4f635 WB |
634 | } |
635 | ||
636 | async fn read_device(&mut self) -> io::Result<format::Device> { | |
637 | self.read_simple_entry("device").await | |
638 | } | |
639 | ||
640 | async fn read_fcaps(&mut self) -> io::Result<format::FCaps> { | |
641 | let data = self.read_entry_as_bytes().await?; | |
642 | Ok(format::FCaps { data }) | |
643 | } | |
644 | ||
645 | async fn read_acl_user(&mut self) -> io::Result<format::acl::User> { | |
646 | self.read_simple_entry("acl user").await | |
647 | } | |
648 | ||
649 | async fn read_acl_group(&mut self) -> io::Result<format::acl::Group> { | |
650 | self.read_simple_entry("acl group").await | |
651 | } | |
652 | ||
653 | async fn read_acl_group_object(&mut self) -> io::Result<format::acl::GroupObject> { | |
654 | self.read_simple_entry("acl group object").await | |
655 | } | |
656 | ||
657 | async fn read_acl_default(&mut self) -> io::Result<format::acl::Default> { | |
658 | self.read_simple_entry("acl default").await | |
659 | } | |
660 | ||
661 | async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> { | |
662 | self.read_simple_entry("quota project id").await | |
663 | } | |
664 | } | |
6100072b | 665 | |
e5a2495e | 666 | /// Reader for file contents inside a pxar archive. |
6e91d157 WB |
667 | pub struct Contents<'a, T: SeqRead> { |
668 | input: &'a mut T, | |
6100072b WB |
669 | at: &'a mut u64, |
670 | len: u64, | |
671 | } | |
672 | ||
6e91d157 | 673 | impl<'a, T: SeqRead> Contents<'a, T> { |
e5a2495e | 674 | fn new(input: &'a mut T, at: &'a mut u64, len: u64) -> Self { |
6100072b WB |
675 | Self { input, at, len } |
676 | } | |
677 | ||
678 | #[inline] | |
679 | fn remaining(&self) -> u64 { | |
680 | self.len - *self.at | |
681 | } | |
682 | } | |
683 | ||
6e91d157 | 684 | impl<'a, T: SeqRead> SeqRead for Contents<'a, T> { |
6100072b WB |
685 | fn poll_seq_read( |
686 | mut self: Pin<&mut Self>, | |
687 | cx: &mut Context, | |
688 | buf: &mut [u8], | |
689 | ) -> Poll<io::Result<usize>> { | |
690 | let max_read = (buf.len() as u64).min(self.remaining()) as usize; | |
691 | if max_read == 0 { | |
8eb622dd | 692 | return Poll::Ready(Ok(0)); |
6100072b WB |
693 | } |
694 | ||
695 | let buf = &mut buf[..max_read]; | |
8eb622dd | 696 | let got = ready!(unsafe { Pin::new_unchecked(&mut *self.input) }.poll_seq_read(cx, buf))?; |
6100072b WB |
697 | *self.at += got as u64; |
698 | Poll::Ready(Ok(got)) | |
699 | } | |
700 | ||
701 | fn poll_position(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> { | |
8eb622dd | 702 | unsafe { Pin::new_unchecked(&mut *self.input) }.poll_position(cx) |
6100072b WB |
703 | } |
704 | } |