]> git.proxmox.com Git - pxar.git/blame - src/decoder.rs
fix hardlink format
[pxar.git] / src / decoder.rs
CommitLineData
6cd4f635
WB
1//! The `pxar` decoder state machine.
2//!
3//! This is the implementation used by both the synchronous and async pxar wrappers.
4
5use std::convert::TryFrom;
6use std::ffi::OsString;
7use std::io;
8use std::mem::{self, size_of, size_of_val, MaybeUninit};
9use std::os::unix::ffi::{OsStrExt, OsStringExt};
10use std::path::{Path, PathBuf};
11use std::pin::Pin;
12use std::task::{Context, Poll};
13
14//use std::os::unix::fs::FileExt;
15
16use endian_trait::Endian;
17
18use crate::format::{self, Header};
19use crate::poll_fn::poll_fn;
20use crate::util::{self, io_err_other};
21use crate::{Entry, EntryKind, Metadata};
22
23pub mod aio;
24pub mod sync;
25
26#[doc(inline)]
27pub use sync::Decoder;
28
29/// To skip through non-seekable files.
30static mut SCRATCH_BUFFER: MaybeUninit<[u8; 4096]> = MaybeUninit::uninit();
31
32fn scratch_buffer() -> &'static mut [u8] {
33 unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] }
34}
35
36/// Sequential read interface used by the decoder's state machine.
37///
38/// To simply iterate through a directory we just need the equivalent of `poll_read()`.
39///
40/// Currently we also have a `poll_position()` method which can be added for types supporting
41/// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available
42/// (accessible via the `Entry::offset()`), to allow jumping between entries.
43pub trait SeqRead {
44 /// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent.
45 fn poll_seq_read(
46 self: Pin<&mut Self>,
47 cx: &mut Context,
48 buf: &mut [u8],
49 ) -> Poll<io::Result<usize>>;
50
51 /// While going through the data we may want to take notes about some offsets within the file
52 /// for later. If the reader does not support seeking or positional reading, this can just
53 /// return `None`.
54 fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
55 Poll::Ready(None)
56 }
57}
58
59/// Allow using trait objects for generics taking a `SeqRead`:
60impl<'a> SeqRead for &mut (dyn SeqRead + 'a) {
61 fn poll_seq_read(
62 self: Pin<&mut Self>,
63 cx: &mut Context,
64 buf: &mut [u8],
65 ) -> Poll<io::Result<usize>> {
66 unsafe {
67 self.map_unchecked_mut(|this| &mut **this)
68 .poll_seq_read(cx, buf)
69 }
70 }
71
72 fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
73 unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
74 }
75}
76
951620f1
WB
77/// awaitable version of `poll_position`.
78pub(crate) async fn seq_read_position<T: SeqRead + ?Sized>(
79 input: &mut T,
80) -> Option<io::Result<u64>> {
81 poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_position(cx) }).await
82}
6cd4f635 83
951620f1
WB
84/// awaitable version of `poll_seq_read`.
85pub(crate) async fn seq_read<T: SeqRead + ?Sized>(
86 input: &mut T,
87 buf: &mut [u8],
88) -> io::Result<usize> {
89 poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_seq_read(cx, buf) }).await
90}
6cd4f635 91
951620f1
WB
92/// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling
93async fn seq_read_exact_or_eof<T>(input: &mut T, mut buf: &mut [u8]) -> io::Result<Option<()>>
94where
95 T: SeqRead + ?Sized,
96{
97 let mut eof_ok = true;
98 while !buf.is_empty() {
99 match seq_read(&mut *input, buf).await? {
100 0 if eof_ok => return Ok(None),
101 0 => io_bail!("unexpected EOF"),
102 got => buf = &mut buf[got..],
6cd4f635 103 }
951620f1 104 eof_ok = false;
6cd4f635 105 }
951620f1
WB
106 Ok(Some(()))
107}
6cd4f635 108
951620f1
WB
109/// `read_exact` - since that's what we _actually_ want most of the time.
110async fn seq_read_exact<T: SeqRead + ?Sized>(input: &mut T, buf: &mut [u8]) -> io::Result<()> {
111 match seq_read_exact_or_eof(input, buf).await? {
112 Some(()) => Ok(()),
113 None => io_bail!("unexpected eof"),
6cd4f635 114 }
951620f1 115}
6cd4f635 116
951620f1
WB
117/// Helper to read into an allocated byte vector.
118async fn seq_read_exact_data<T>(input: &mut T, size: usize) -> io::Result<Vec<u8>>
119where
120 T: SeqRead + ?Sized,
121{
122 let mut data = util::vec_new(size);
123 seq_read_exact(input, &mut data[..]).await?;
124 Ok(data)
125}
6cd4f635 126
951620f1
WB
127/// `seq_read_entry` with EOF handling
128async fn seq_read_entry_or_eof<T, E>(input: &mut T) -> io::Result<Option<E>>
129where
130 T: SeqRead + ?Sized,
131 E: Endian,
132{
133 let mut data = MaybeUninit::<E>::uninit();
134 let buf =
135 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) };
136 if seq_read_exact_or_eof(input, buf).await?.is_none() {
137 return Ok(None);
6cd4f635 138 }
951620f1
WB
139 Ok(Some(unsafe { data.assume_init().from_le() }))
140}
6cd4f635 141
951620f1
WB
142/// Helper to read into an `Endian`-implementing `struct`.
143async fn seq_read_entry<T: SeqRead + ?Sized, E: Endian>(input: &mut T) -> io::Result<E> {
144 seq_read_entry_or_eof(input)
145 .await?
146 .ok_or_else(|| io_format_err!("unexepcted EOF"))
6cd4f635
WB
147}
148
149/// The decoder state machine implementation.
150///
151/// We use `async fn` to implement the decoder state machine so that we can easily plug in both
152/// synchronous or `async` I/O objects in as input.
5cf335be 153pub(crate) struct DecoderImpl<T> {
6cd4f635
WB
154 input: T,
155 current_header: Header,
156 entry: Entry,
157 path_lengths: Vec<usize>,
158 state: State,
159 with_goodbye_tables: bool,
160}
161
162enum State {
163 Begin,
164 Default,
2287d8b2
WB
165 InPayload {
166 offset: u64,
167 },
168
169 /// file entries with no data (fifo, socket)
170 InSpecialFile,
171
f7b824c3 172 InGoodbyeTable,
6cd4f635
WB
173 InDirectory,
174 Eof,
175}
176
177/// Control flow while parsing items.
178///
179/// When parsing an entry, we usually go through all of its attribute items. Once we reach the end
180/// of the entry we stop.
181/// Note that if we're in a directory, we stopped at the beginning of its contents.
182#[derive(Clone, Copy, Debug, Eq, PartialEq)]
183enum ItemResult {
184 /// We parsed an "attribute" item and should continue parsing.
185 Attribute,
186
187 /// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a
188 /// directory (`FILENAME`, `GOODBYE`).
189 ///
190 /// We stop moving forward at this point.
191 Entry,
192}
193
0a00a48c
WB
194impl<I: SeqRead> DecoderImpl<I> {
195 pub async fn new(input: I) -> io::Result<Self> {
6cd4f635
WB
196 Self::new_full(input, "/".into()).await
197 }
198
0a00a48c 199 pub(crate) async fn new_full(input: I, path: PathBuf) -> io::Result<Self> {
6cd4f635
WB
200 let this = DecoderImpl {
201 input,
202 current_header: unsafe { mem::zeroed() },
203 entry: Entry {
204 path,
27631c16 205 kind: EntryKind::GoodbyeTable,
6cd4f635 206 metadata: Metadata::default(),
6cd4f635
WB
207 },
208 path_lengths: Vec::new(),
209 state: State::Begin,
210 with_goodbye_tables: false,
211 };
212
213 // this.read_next_entry().await?;
214
215 Ok(this)
216 }
217
218 /// Get the next file entry, recursing into directories.
219 pub async fn next(&mut self) -> Option<io::Result<Entry>> {
220 self.next_do().await.transpose()
221 }
222
223 pub(crate) async fn next_do(&mut self) -> io::Result<Option<Entry>> {
224 loop {
225 match self.state {
226 State::Eof => return Ok(None),
227 State::Begin => return self.read_next_entry().await.map(Some),
228 State::Default => {
229 // we completely finished an entry, so now we're going "up" in the directory
230 // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
231 self.read_next_item().await?;
232 }
8eb622dd 233 State::InPayload { offset } => {
6cd4f635 234 // We need to skip the current payload first.
6100072b 235 self.skip_entry(offset).await?;
6cd4f635
WB
236 self.read_next_item().await?;
237 }
f7b824c3
WB
238 State::InGoodbyeTable => {
239 self.skip_entry(0).await?;
4c42ef2e
WB
240 if self.path_lengths.pop().is_none() {
241 // The root directory has an entry containing '1'.
242 io_bail!("unexpected EOF in goodbye table");
f7b824c3 243 }
4c42ef2e
WB
244
245 if self.path_lengths.is_empty() {
246 // we are at the end of the archive now
247 self.state = State::Eof;
248 return Ok(None);
249 }
250
251 // We left the directory, now keep going in our parent.
252 self.state = State::Default;
253 continue;
f7b824c3 254 }
2287d8b2
WB
255 State::InSpecialFile => {
256 self.entry.clear_data();
257 self.state = State::InDirectory;
258 self.entry.kind = EntryKind::Directory;
259 }
6cd4f635
WB
260 State::InDirectory => {
261 // We're at the next FILENAME or GOODBYE item.
262 }
263 }
264
265 match self.current_header.htype {
266 format::PXAR_FILENAME => return self.handle_file_entry().await,
267 format::PXAR_GOODBYE => {
f7b824c3
WB
268 self.state = State::InGoodbyeTable;
269
6cd4f635 270 if self.with_goodbye_tables {
af356979
WB
271 self.entry.clear_data();
272 return Ok(Some(Entry {
273 path: PathBuf::new(),
274 metadata: Metadata::default(),
275 kind: EntryKind::GoodbyeTable,
276 }));
6cd4f635 277 } else {
f7b824c3
WB
278 // go up to goodbye table handling
279 continue;
6cd4f635
WB
280 }
281 }
282 h => io_bail!(
283 "expected filename or directory-goodbye pxar entry, got: {:x}",
284 h
285 ),
286 }
287 }
288 }
289
3a11ff3e
WB
290 pub fn content_size(&self) -> Option<u64> {
291 if let State::InPayload { .. } = self.state {
292 Some(self.current_header.content_size())
293 } else {
294 None
295 }
296 }
297
6e91d157 298 pub fn content_reader<'a>(&'a mut self) -> Option<Contents<'a, I>> {
6100072b
WB
299 if let State::InPayload { offset } = &mut self.state {
300 Some(Contents::new(
301 &mut self.input,
302 offset,
303 self.current_header.content_size(),
304 ))
305 } else {
306 None
307 }
308 }
309
6cd4f635
WB
310 async fn handle_file_entry(&mut self) -> io::Result<Option<Entry>> {
311 let mut data = self.read_entry_as_bytes().await?;
312
313 // filenames are zero terminated!
314 if data.pop() != Some(0) {
315 io_bail!("illegal path found (missing terminating zero)");
316 }
317 if data.is_empty() {
318 io_bail!("illegal path found (empty)");
319 }
320
321 let path = PathBuf::from(OsString::from_vec(data));
322 self.set_path(&path)?;
323 self.read_next_entry().await.map(Some)
324 }
325
326 fn reset_path(&mut self) -> io::Result<()> {
327 let path_len = *self
328 .path_lengths
329 .last()
330 .ok_or_else(|| io_format_err!("internal decoder error: path underrun"))?;
331 let mut path = mem::replace(&mut self.entry.path, PathBuf::new())
332 .into_os_string()
333 .into_vec();
334 path.truncate(path_len);
335 self.entry.path = PathBuf::from(OsString::from_vec(path));
336 Ok(())
337 }
338
339 fn set_path(&mut self, path: &Path) -> io::Result<()> {
340 self.reset_path()?;
341 self.entry.path.push(path);
342 Ok(())
343 }
344
345 async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
346 self.state = State::Default;
347 self.entry.clear_data();
348
2ab25a17
WB
349 let header: Header = match seq_read_entry_or_eof(&mut self.input).await? {
350 None => return Ok(None),
351 Some(header) => header,
352 };
353
354 if header.htype == format::PXAR_HARDLINK {
355 // The only "dangling" header without an 'Entry' in front of it because it does not
356 // carry its own metadata.
357 self.current_header = header;
358
359 // Hardlinks have no metadata and no additional items.
360 self.entry.metadata = Metadata::default();
361 self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
362
363 Ok(Some(self.entry.take()))
364 } else if header.htype == format::PXAR_ENTRY {
365 self.entry.metadata = Metadata {
366 stat: seq_read_entry(&mut self.input).await?,
367 ..Default::default()
368 };
6cd4f635 369
2ab25a17
WB
370 self.current_header = unsafe { mem::zeroed() };
371
372 while self.read_next_item().await? != ItemResult::Entry {}
373
374 if self.entry.is_dir() {
375 self.path_lengths
376 .push(self.entry.path.as_os_str().as_bytes().len());
6cd4f635 377 }
6cd4f635 378
2ab25a17
WB
379 Ok(Some(self.entry.take()))
380 } else {
6cd4f635
WB
381 io_bail!(
382 "expected pxar entry of type 'Entry', got: {:x}",
2ab25a17 383 header.htype
6cd4f635
WB
384 );
385 }
6cd4f635
WB
386 }
387
388 async fn read_next_entry(&mut self) -> io::Result<Entry> {
389 self.read_next_entry_or_eof()
390 .await?
391 .ok_or_else(|| io_format_err!("unexpected EOF"))
392 }
393
394 async fn read_next_item(&mut self) -> io::Result<ItemResult> {
395 self.read_next_header().await?;
396 self.read_current_item().await
397 }
398
399 async fn read_next_header(&mut self) -> io::Result<()> {
400 let dest = unsafe {
401 std::slice::from_raw_parts_mut(
402 &mut self.current_header as *mut Header as *mut u8,
403 size_of_val(&self.current_header),
404 )
405 };
951620f1 406 seq_read_exact(&mut self.input, dest).await?;
6cd4f635
WB
407 Ok(())
408 }
409
410 /// Read the next item, the header is already loaded.
411 async fn read_current_item(&mut self) -> io::Result<ItemResult> {
412 match self.current_header.htype {
413 format::PXAR_XATTR => {
414 let xattr = self.read_xattr().await?;
415 self.entry.metadata.xattrs.push(xattr);
416 }
417 format::PXAR_ACL_USER => {
418 let entry = self.read_acl_user().await?;
419 self.entry.metadata.acl.users.push(entry);
420 }
421 format::PXAR_ACL_GROUP => {
422 let entry = self.read_acl_group().await?;
423 self.entry.metadata.acl.groups.push(entry);
424 }
425 format::PXAR_ACL_GROUP_OBJ => {
426 if self.entry.metadata.acl.group_obj.is_some() {
427 io_bail!("multiple acl group object entries detected");
428 }
429 let entry = self.read_acl_group_object().await?;
430 self.entry.metadata.acl.group_obj = Some(entry);
431 }
432 format::PXAR_ACL_DEFAULT => {
433 if self.entry.metadata.acl.default.is_some() {
434 io_bail!("multiple acl default entries detected");
435 }
436 let entry = self.read_acl_default().await?;
437 self.entry.metadata.acl.default = Some(entry);
438 }
439 format::PXAR_ACL_DEFAULT_USER => {
440 let entry = self.read_acl_user().await?;
441 self.entry.metadata.acl.default_users.push(entry);
442 }
443 format::PXAR_ACL_DEFAULT_GROUP => {
444 let entry = self.read_acl_group().await?;
445 self.entry.metadata.acl.default_groups.push(entry);
446 }
447 format::PXAR_FCAPS => {
448 if self.entry.metadata.fcaps.is_some() {
449 io_bail!("multiple file capability entries detected");
450 }
451 let entry = self.read_fcaps().await?;
452 self.entry.metadata.fcaps = Some(entry);
453 }
454 format::PXAR_QUOTA_PROJID => {
455 if self.entry.metadata.quota_project_id.is_some() {
456 io_bail!("multiple quota project id entries detected");
457 }
458 let entry = self.read_quota_project_id().await?;
459 self.entry.metadata.quota_project_id = Some(entry);
460 }
461 format::PXAR_SYMLINK => {
462 self.entry.kind = EntryKind::Symlink(self.read_symlink().await?);
463 return Ok(ItemResult::Entry);
464 }
2ab25a17 465 format::PXAR_HARDLINK => io_bail!("encountered unexpected hardlink entry"),
6cd4f635
WB
466 format::PXAR_DEVICE => {
467 self.entry.kind = EntryKind::Device(self.read_device().await?);
468 return Ok(ItemResult::Entry);
469 }
470 format::PXAR_PAYLOAD => {
951620f1 471 let offset = seq_read_position(&mut self.input).await.transpose()?;
6cd4f635
WB
472 self.entry.kind = EntryKind::File {
473 size: self.current_header.content_size(),
c76d3f98 474 offset,
6cd4f635 475 };
6100072b 476 self.state = State::InPayload { offset: 0 };
6cd4f635
WB
477 return Ok(ItemResult::Entry);
478 }
479 format::PXAR_FILENAME | format::PXAR_GOODBYE => {
2287d8b2
WB
480 if self.entry.metadata.is_fifo() {
481 self.state = State::InSpecialFile;
482 self.entry.kind = EntryKind::Fifo;
483 return Ok(ItemResult::Entry);
484 } else if self.entry.metadata.is_socket() {
485 self.state = State::InSpecialFile;
486 self.entry.kind = EntryKind::Socket;
487 return Ok(ItemResult::Entry);
488 } else {
489 // As a shortcut this is copy-pasted to `next_do`'s `InSpecialFile` case.
490 // Keep in mind when editing this!
491 self.state = State::InDirectory;
492 self.entry.kind = EntryKind::Directory;
493 return Ok(ItemResult::Entry);
494 }
6cd4f635
WB
495 }
496 _ => io_bail!("unexpected entry type: {:x}", self.current_header.htype),
497 }
498
499 Ok(ItemResult::Attribute)
500 }
501
502 //
503 // Local read helpers.
504 //
505 // These utilize additional information and hence are not part of the `dyn SeqRead` impl.
506 //
507
6100072b
WB
508 async fn skip_entry(&mut self, offset: u64) -> io::Result<()> {
509 let mut len = self.current_header.content_size() - offset;
6cd4f635
WB
510 let scratch = scratch_buffer();
511 while len >= (scratch.len() as u64) {
951620f1 512 seq_read_exact(&mut self.input, scratch).await?;
6cd4f635
WB
513 len -= scratch.len() as u64;
514 }
515 let len = len as usize;
516 if len > 0 {
951620f1 517 seq_read_exact(&mut self.input, &mut scratch[..len]).await?;
6cd4f635
WB
518 }
519 Ok(())
520 }
521
522 async fn read_entry_as_bytes(&mut self) -> io::Result<Vec<u8>> {
523 let size = usize::try_from(self.current_header.content_size()).map_err(io_err_other)?;
951620f1 524 let data = seq_read_exact_data(&mut self.input, size).await?;
6cd4f635
WB
525 Ok(data)
526 }
527
528 /// Helper to read a struct entry while checking its size.
0a00a48c 529 async fn read_simple_entry<T: Endian + 'static>(
6cd4f635
WB
530 &mut self,
531 what: &'static str,
0a00a48c
WB
532 ) -> io::Result<T> {
533 if self.current_header.content_size() != (size_of::<T>() as u64) {
6cd4f635
WB
534 io_bail!(
535 "bad {} size: {} (expected {})",
536 what,
537 self.current_header.content_size(),
0a00a48c 538 size_of::<T>(),
6cd4f635
WB
539 );
540 }
951620f1 541 seq_read_entry(&mut self.input).await
6cd4f635
WB
542 }
543
544 //
545 // Read functions for PXAR components.
546 //
547
548 async fn read_xattr(&mut self) -> io::Result<format::XAttr> {
549 let data = self.read_entry_as_bytes().await?;
550
551 let name_len = data
552 .iter()
553 .position(|c| *c == 0)
554 .ok_or_else(|| io_format_err!("missing value separator in xattr"))?;
555
556 Ok(format::XAttr { data, name_len })
557 }
558
559 async fn read_symlink(&mut self) -> io::Result<format::Symlink> {
560 let data = self.read_entry_as_bytes().await?;
561 Ok(format::Symlink { data })
562 }
563
564 async fn read_hardlink(&mut self) -> io::Result<format::Hardlink> {
565 let data = self.read_entry_as_bytes().await?;
566 Ok(format::Hardlink { data })
567 }
568
569 async fn read_device(&mut self) -> io::Result<format::Device> {
570 self.read_simple_entry("device").await
571 }
572
573 async fn read_fcaps(&mut self) -> io::Result<format::FCaps> {
574 let data = self.read_entry_as_bytes().await?;
575 Ok(format::FCaps { data })
576 }
577
578 async fn read_acl_user(&mut self) -> io::Result<format::acl::User> {
579 self.read_simple_entry("acl user").await
580 }
581
582 async fn read_acl_group(&mut self) -> io::Result<format::acl::Group> {
583 self.read_simple_entry("acl group").await
584 }
585
586 async fn read_acl_group_object(&mut self) -> io::Result<format::acl::GroupObject> {
587 self.read_simple_entry("acl group object").await
588 }
589
590 async fn read_acl_default(&mut self) -> io::Result<format::acl::Default> {
591 self.read_simple_entry("acl default").await
592 }
593
594 async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> {
595 self.read_simple_entry("quota project id").await
596 }
597}
6100072b 598
6e91d157
WB
599pub struct Contents<'a, T: SeqRead> {
600 input: &'a mut T,
6100072b
WB
601 at: &'a mut u64,
602 len: u64,
603}
604
6e91d157
WB
605impl<'a, T: SeqRead> Contents<'a, T> {
606 pub fn new(input: &'a mut T, at: &'a mut u64, len: u64) -> Self {
6100072b
WB
607 Self { input, at, len }
608 }
609
610 #[inline]
611 fn remaining(&self) -> u64 {
612 self.len - *self.at
613 }
614}
615
6e91d157 616impl<'a, T: SeqRead> SeqRead for Contents<'a, T> {
6100072b
WB
617 fn poll_seq_read(
618 mut self: Pin<&mut Self>,
619 cx: &mut Context,
620 buf: &mut [u8],
621 ) -> Poll<io::Result<usize>> {
622 let max_read = (buf.len() as u64).min(self.remaining()) as usize;
623 if max_read == 0 {
8eb622dd 624 return Poll::Ready(Ok(0));
6100072b
WB
625 }
626
627 let buf = &mut buf[..max_read];
8eb622dd 628 let got = ready!(unsafe { Pin::new_unchecked(&mut *self.input) }.poll_seq_read(cx, buf))?;
6100072b
WB
629 *self.at += got as u64;
630 Poll::Ready(Ok(got))
631 }
632
633 fn poll_position(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
8eb622dd 634 unsafe { Pin::new_unchecked(&mut *self.input) }.poll_position(cx)
6100072b
WB
635 }
636}