1 //! The `pxar` decoder state machine.
3 //! This is the implementation used by both the synchronous and async pxar wrappers.
5 use std
::convert
::TryFrom
;
6 use std
::ffi
::OsString
;
8 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
9 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
10 use std
::path
::{Path, PathBuf}
;
12 use std
::task
::{Context, Poll}
;
14 //use std::os::unix::fs::FileExt;
16 use endian_trait
::Endian
;
18 use crate::format
::{self, Header}
;
19 use crate::poll_fn
::poll_fn
;
20 use crate::util
::{self, io_err_other}
;
21 use crate::{Entry, EntryKind, Metadata}
;
27 pub use sync
::Decoder
;
29 /// To skip through non-seekable files.
30 static mut SCRATCH_BUFFER
: MaybeUninit
<[u8; 4096]> = MaybeUninit
::uninit();
32 fn scratch_buffer() -> &'
static mut [u8] {
33 unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] }
36 /// Sequential read interface used by the decoder's state machine.
38 /// To simply iterate through a directory we just need the equivalent of `poll_read()`.
40 /// Currently we also have a `poll_position()` method which can be added for types supporting
41 /// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available
42 /// (accessible via the `Entry::offset()`), to allow jumping between entries.
44 /// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent.
49 ) -> Poll
<io
::Result
<usize>>;
51 /// While going through the data we may want to take notes about some offsets within the file
52 /// for later. If the reader does not support seeking or positional reading, this can just
54 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
59 /// Allow using trait objects for generics taking a `SeqRead`:
60 impl<'a
> SeqRead
for &mut (dyn SeqRead
+ 'a
) {
65 ) -> Poll
<io
::Result
<usize>> {
67 self.map_unchecked_mut(|this
| &mut **this
)
68 .poll_seq_read(cx
, buf
)
72 fn poll_position(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
73 unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
77 /// We do not want to bother with actual polling, so we implement `async fn` variants of the above
80 /// The reason why this is not an internal `SeqReadExt` trait like `AsyncReadExt` is simply that
81 /// we'd then need to define all the `Future` types they return manually and explicitly. Since we
82 /// have no use for them, all we want is the ability to use `async fn`...
84 /// The downside is that we need some `(&mut self.input as &mut dyn SeqRead)` casts in the
85 /// decoder's code, but that's fine.
86 impl<'a
> dyn SeqRead
+ 'a
{
87 /// awaitable version of `poll_position`.
88 async
fn position(&mut self) -> Option
<io
::Result
<u64>> {
89 poll_fn(|cx
| unsafe { Pin::new_unchecked(&mut *self).poll_position(cx) }
).await
92 /// awaitable version of `poll_seq_read`.
93 async
fn seq_read(&mut self, buf
: &mut [u8]) -> io
::Result
<usize> {
94 poll_fn(|cx
| unsafe { Pin::new_unchecked(&mut *self).poll_seq_read(cx, buf) }
).await
97 /// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling
98 async
fn seq_read_exact_or_eof(&mut self, mut buf
: &mut [u8]) -> io
::Result
<Option
<()>> {
99 let mut eof_ok
= true;
100 while !buf
.is_empty() {
101 match self.seq_read(buf
).await?
{
102 0 if eof_ok
=> break,
103 0 => io_bail
!("unexpected EOF"),
104 got
=> buf
= &mut buf
[got
..],
111 /// `read_exact` - since that's what we _actually_ want most of the time.
112 async
fn seq_read_exact(&mut self, buf
: &mut [u8]) -> io
::Result
<()> {
113 match self.seq_read_exact_or_eof(buf
).await?
{
115 None
=> io_bail
!("unexpected eof"),
119 /// Helper to read into an allocated byte vector.
120 async
fn seq_read_exact_data(&mut self, size
: usize) -> io
::Result
<Vec
<u8>> {
121 let mut data
= util
::vec_new(size
);
122 self.seq_read_exact(&mut data
[..]).await?
;
126 /// `seq_read_entry` with EOF handling
127 async
fn seq_read_entry_or_eof
<T
: Endian
>(&mut self) -> io
::Result
<Option
<T
>> {
128 let mut data
= MaybeUninit
::<T
>::uninit();
130 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) }
;
131 if self.seq_read_exact_or_eof(buf
).await?
.is_none() {
134 Ok(Some(unsafe { data.assume_init().from_le() }
))
137 /// Helper to read into an `Endian`-implementing `struct`.
138 async
fn seq_read_entry
<T
: Endian
>(&mut self) -> io
::Result
<T
> {
139 self.seq_read_entry_or_eof()
141 .ok_or_else(|| io_format_err
!("unexepcted EOF"))
145 /// The decoder state machine implementation.
147 /// We use `async fn` to implement the decoder state machine so that we can easily plug in both
148 /// synchronous or `async` I/O objects in as input.
149 pub struct DecoderImpl
<T
> {
151 current_header
: Header
,
153 path_lengths
: Vec
<usize>,
155 with_goodbye_tables
: bool
,
166 /// Control flow while parsing items.
168 /// When parsing an entry, we usually go through all of its attribute items. Once we reach the end
169 /// of the entry we stop.
170 /// Note that if we're in a directory, we stopped at the beginning of its contents.
171 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
173 /// We parsed an "attribute" item and should continue parsing.
176 /// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a
177 /// directory (`FILENAME`, `GOODBYE`).
179 /// We stop moving forward at this point.
183 impl<T
: SeqRead
> DecoderImpl
<T
> {
184 pub async
fn new(input
: T
) -> io
::Result
<Self> {
185 Self::new_full(input
, "/".into()).await
188 pub(crate) async
fn new_full(mut input
: T
, path
: PathBuf
) -> io
::Result
<Self> {
189 let offset
= (&mut input
as &mut dyn SeqRead
)
193 let this
= DecoderImpl
{
195 current_header
: unsafe { mem::zeroed() }
,
198 kind
: EntryKind
::EndOfDirectory
,
199 metadata
: Metadata
::default(),
202 path_lengths
: Vec
::new(),
204 with_goodbye_tables
: false,
207 // this.read_next_entry().await?;
212 /// Get the next file entry, recursing into directories.
213 pub async
fn next(&mut self) -> Option
<io
::Result
<Entry
>> {
214 self.next_do().await
.transpose()
217 pub(crate) async
fn next_do(&mut self) -> io
::Result
<Option
<Entry
>> {
220 State
::Eof
=> return Ok(None
),
221 State
::Begin
=> return self.read_next_entry().await
.map(Some
),
223 // we completely finished an entry, so now we're going "up" in the directory
224 // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
225 self.read_next_item().await?
;
227 State
::InPayload
=> {
228 // We need to skip the current payload first.
229 self.skip_entry().await?
;
230 self.read_next_item().await?
;
232 State
::InDirectory
=> {
233 // We're at the next FILENAME or GOODBYE item.
237 match self.current_header
.htype
{
238 format
::PXAR_FILENAME
=> return self.handle_file_entry().await
,
239 format
::PXAR_GOODBYE
=> {
240 if self.with_goodbye_tables
{
241 self.entry
.kind
= EntryKind
::EndOfDirectory
;
242 let offset
= (&mut self.input
as &mut dyn SeqRead
)
246 self.entry
.offset
= offset
;
247 self.state
= State
::InPayload
;
248 return Ok(Some(self.entry
.take()));
251 self.skip_entry().await?
;
252 if self.path_lengths
.pop().is_some() {
253 self.state
= State
::Default
;
257 self.state
= State
::Eof
;
263 "expected filename or directory-goodbye pxar entry, got: {:x}",
270 async
fn handle_file_entry(&mut self) -> io
::Result
<Option
<Entry
>> {
271 let mut data
= self.read_entry_as_bytes().await?
;
273 // filenames are zero terminated!
274 if data
.pop() != Some(0) {
275 io_bail
!("illegal path found (missing terminating zero)");
278 io_bail
!("illegal path found (empty)");
281 let path
= PathBuf
::from(OsString
::from_vec(data
));
282 self.set_path(&path
)?
;
283 self.read_next_entry().await
.map(Some
)
286 fn reset_path(&mut self) -> io
::Result
<()> {
290 .ok_or_else(|| io_format_err
!("internal decoder error: path underrun"))?
;
291 let mut path
= mem
::replace(&mut self.entry
.path
, PathBuf
::new())
294 path
.truncate(path_len
);
295 self.entry
.path
= PathBuf
::from(OsString
::from_vec(path
));
299 fn set_path(&mut self, path
: &Path
) -> io
::Result
<()> {
301 self.entry
.path
.push(path
);
305 async
fn read_next_entry_or_eof(&mut self) -> io
::Result
<Option
<Entry
>> {
306 self.state
= State
::Default
;
307 self.entry
.clear_data();
311 struct WithHeader
<U
: Endian
> {
316 let entry
: WithHeader
<format
::Entry
> = {
317 let input
: &mut dyn SeqRead
= &mut self.input
;
318 match input
.seq_read_entry_or_eof().await?
{
319 None
=> return Ok(None
),
320 Some(entry
) => entry
,
324 if entry
.header
.htype
!= format
::PXAR_ENTRY
{
326 "expected pxar entry of type 'Entry', got: {:x}",
331 self.current_header
= unsafe { mem::zeroed() }
;
332 self.entry
.metadata
= Metadata
{
337 while self.read_next_item().await?
!= ItemResult
::Entry {}
339 if self.entry
.is_dir() {
341 .push(self.entry
.path
.as_os_str().as_bytes().len());
344 Ok(Some(self.entry
.take()))
347 async
fn read_next_entry(&mut self) -> io
::Result
<Entry
> {
348 self.read_next_entry_or_eof()
350 .ok_or_else(|| io_format_err
!("unexpected EOF"))
353 async
fn read_next_item(&mut self) -> io
::Result
<ItemResult
> {
354 self.read_next_header().await?
;
355 self.read_current_item().await
358 async
fn read_next_header(&mut self) -> io
::Result
<()> {
360 std
::slice
::from_raw_parts_mut(
361 &mut self.current_header
as *mut Header
as *mut u8,
362 size_of_val(&self.current_header
),
365 (&mut self.input
as &mut dyn SeqRead
)
366 .seq_read_exact(dest
)
371 /// Read the next item, the header is already loaded.
372 async
fn read_current_item(&mut self) -> io
::Result
<ItemResult
> {
373 match self.current_header
.htype
{
374 format
::PXAR_XATTR
=> {
375 let xattr
= self.read_xattr().await?
;
376 self.entry
.metadata
.xattrs
.push(xattr
);
378 format
::PXAR_ACL_USER
=> {
379 let entry
= self.read_acl_user().await?
;
380 self.entry
.metadata
.acl
.users
.push(entry
);
382 format
::PXAR_ACL_GROUP
=> {
383 let entry
= self.read_acl_group().await?
;
384 self.entry
.metadata
.acl
.groups
.push(entry
);
386 format
::PXAR_ACL_GROUP_OBJ
=> {
387 if self.entry
.metadata
.acl
.group_obj
.is_some() {
388 io_bail
!("multiple acl group object entries detected");
390 let entry
= self.read_acl_group_object().await?
;
391 self.entry
.metadata
.acl
.group_obj
= Some(entry
);
393 format
::PXAR_ACL_DEFAULT
=> {
394 if self.entry
.metadata
.acl
.default.is_some() {
395 io_bail
!("multiple acl default entries detected");
397 let entry
= self.read_acl_default().await?
;
398 self.entry
.metadata
.acl
.default = Some(entry
);
400 format
::PXAR_ACL_DEFAULT_USER
=> {
401 let entry
= self.read_acl_user().await?
;
402 self.entry
.metadata
.acl
.default_users
.push(entry
);
404 format
::PXAR_ACL_DEFAULT_GROUP
=> {
405 let entry
= self.read_acl_group().await?
;
406 self.entry
.metadata
.acl
.default_groups
.push(entry
);
408 format
::PXAR_FCAPS
=> {
409 if self.entry
.metadata
.fcaps
.is_some() {
410 io_bail
!("multiple file capability entries detected");
412 let entry
= self.read_fcaps().await?
;
413 self.entry
.metadata
.fcaps
= Some(entry
);
415 format
::PXAR_QUOTA_PROJID
=> {
416 if self.entry
.metadata
.quota_project_id
.is_some() {
417 io_bail
!("multiple quota project id entries detected");
419 let entry
= self.read_quota_project_id().await?
;
420 self.entry
.metadata
.quota_project_id
= Some(entry
);
422 format
::PXAR_SYMLINK
=> {
423 self.entry
.kind
= EntryKind
::Symlink(self.read_symlink().await?
);
424 return Ok(ItemResult
::Entry
);
426 format
::PXAR_HARDLINK
=> {
427 self.entry
.kind
= EntryKind
::Hardlink(self.read_hardlink().await?
);
428 return Ok(ItemResult
::Entry
);
430 format
::PXAR_DEVICE
=> {
431 self.entry
.kind
= EntryKind
::Device(self.read_device().await?
);
432 return Ok(ItemResult
::Entry
);
434 format
::PXAR_PAYLOAD
=> {
435 self.entry
.kind
= EntryKind
::File
{
436 size
: self.current_header
.content_size(),
438 self.state
= State
::InPayload
;
439 return Ok(ItemResult
::Entry
);
441 format
::PXAR_FILENAME
| format
::PXAR_GOODBYE
=> {
442 self.state
= State
::InDirectory
;
443 self.entry
.kind
= EntryKind
::Directory
;
444 return Ok(ItemResult
::Entry
);
446 _
=> io_bail
!("unexpected entry type: {:x}", self.current_header
.htype
),
449 Ok(ItemResult
::Attribute
)
453 // Local read helpers.
455 // These utilize additional information and hence are not part of the `dyn SeqRead` impl.
458 async
fn skip_entry(&mut self) -> io
::Result
<()> {
459 let mut len
= self.current_header
.content_size();
460 let scratch
= scratch_buffer();
461 while len
>= (scratch
.len() as u64) {
462 (&mut self.input
as &mut dyn SeqRead
)
463 .seq_read_exact(scratch
)
465 len
-= scratch
.len() as u64;
467 let len
= len
as usize;
469 (&mut self.input
as &mut dyn SeqRead
)
470 .seq_read_exact(&mut scratch
[..len
])
476 async
fn read_entry_as_bytes(&mut self) -> io
::Result
<Vec
<u8>> {
477 let size
= usize::try_from(self.current_header
.content_size()).map_err(io_err_other
)?
;
478 let data
= (&mut self.input
as &mut dyn SeqRead
)
479 .seq_read_exact_data(size
)
484 /// Helper to read a struct entry while checking its size.
485 async
fn read_simple_entry
<U
: Endian
+ '
static>(
489 if self.current_header
.content_size() != (size_of
::<T
>() as u64) {
491 "bad {} size: {} (expected {})",
493 self.current_header
.content_size(),
497 (&mut self.input
as &mut dyn SeqRead
).seq_read_entry().await
501 // Read functions for PXAR components.
504 async
fn read_xattr(&mut self) -> io
::Result
<format
::XAttr
> {
505 let data
= self.read_entry_as_bytes().await?
;
509 .position(|c
| *c
== 0)
510 .ok_or_else(|| io_format_err
!("missing value separator in xattr"))?
;
512 Ok(format
::XAttr { data, name_len }
)
515 async
fn read_symlink(&mut self) -> io
::Result
<format
::Symlink
> {
516 let data
= self.read_entry_as_bytes().await?
;
517 Ok(format
::Symlink { data }
)
520 async
fn read_hardlink(&mut self) -> io
::Result
<format
::Hardlink
> {
521 let data
= self.read_entry_as_bytes().await?
;
522 Ok(format
::Hardlink { data }
)
525 async
fn read_device(&mut self) -> io
::Result
<format
::Device
> {
526 self.read_simple_entry("device").await
529 async
fn read_fcaps(&mut self) -> io
::Result
<format
::FCaps
> {
530 let data
= self.read_entry_as_bytes().await?
;
531 Ok(format
::FCaps { data }
)
534 async
fn read_acl_user(&mut self) -> io
::Result
<format
::acl
::User
> {
535 self.read_simple_entry("acl user").await
538 async
fn read_acl_group(&mut self) -> io
::Result
<format
::acl
::Group
> {
539 self.read_simple_entry("acl group").await
542 async
fn read_acl_group_object(&mut self) -> io
::Result
<format
::acl
::GroupObject
> {
543 self.read_simple_entry("acl group object").await
546 async
fn read_acl_default(&mut self) -> io
::Result
<format
::acl
::Default
> {
547 self.read_simple_entry("acl default").await
550 async
fn read_quota_project_id(&mut self) -> io
::Result
<format
::QuotaProjectId
> {
551 self.read_simple_entry("quota project id").await