]> git.proxmox.com Git - pxar.git/commitdiff
import
authorWolfgang Bumiller <w.bumiller@proxmox.com>
Thu, 23 Jan 2020 10:18:04 +0000 (11:18 +0100)
committerWolfgang Bumiller <w.bumiller@proxmox.com>
Tue, 18 Feb 2020 13:08:02 +0000 (14:08 +0100)
Signed-off-by: Wolfgang Bumiller <w.bumiller@proxmox.com>
19 files changed:
.gitignore [new file with mode: 0644]
Cargo.toml [new file with mode: 0644]
examples/apxar.rs [new file with mode: 0644]
examples/randaccess.rs [new file with mode: 0644]
rust-toolchain [new file with mode: 0644]
rustfmt.toml [new file with mode: 0644]
src/accessor.rs [new file with mode: 0644]
src/accessor/aio.rs [new file with mode: 0644]
src/accessor/sync.rs [new file with mode: 0644]
src/bin/pxar.rs [new file with mode: 0644]
src/decoder.rs [new file with mode: 0644]
src/decoder/aio.rs [new file with mode: 0644]
src/decoder/sync.rs [new file with mode: 0644]
src/format.rs [new file with mode: 0644]
src/format/acl.rs [new file with mode: 0644]
src/lib.rs [new file with mode: 0644]
src/macros.rs [new file with mode: 0644]
src/poll_fn.rs [new file with mode: 0644]
src/util.rs [new file with mode: 0644]

diff --git a/.gitignore b/.gitignore
new file mode 100644 (file)
index 0000000..14568f4
--- /dev/null
@@ -0,0 +1,4 @@
+/target
+**/*.rs.bk
+Cargo.lock
+test.pxar
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644 (file)
index 0000000..eb1c2eb
--- /dev/null
@@ -0,0 +1,32 @@
+[package]
+name = "pxar"
+version = "0.1.0"
+authors = ["Wolfgang Bumiller <w.bumiller@proxmox.com>"]
+edition = "2018"
+
+[[example]]
+name = "apxar"
+path = "examples/apxar.rs"
+required-features = [ "async-example" ]
+
+[dependencies]
+bitflags = "1.2.1"
+endian_trait = { version = "0.6", features = ["arrays"] }
+failure = "0.1"
+siphasher = "0.3"
+
+futures = { version = "0.3.1", optional = true }
+tokio = { version = "0.2.10", optional = true, default-features = false }
+
+[features]
+default = [ "futures-io", "tokio-io" ]
+futures-io = [ "futures" ]
+tokio-io = [ "tokio" ]
+async-example = [
+    "futures-io",
+    "tokio-io",
+    "tokio/fs",
+    "tokio/rt-threaded",
+    "tokio/io-driver",
+    "tokio/macros",
+]
diff --git a/examples/apxar.rs b/examples/apxar.rs
new file mode 100644 (file)
index 0000000..03c7590
--- /dev/null
@@ -0,0 +1,32 @@
+use pxar::decoder::aio::Decoder;
+
+#[tokio::main]
+async fn main() {
+    let mut args = std::env::args_os().skip(1);
+
+    let file = args.next().expect("expected a file name");
+    let file = tokio::fs::File::open(file)
+        .await
+        .expect("failed to open file");
+
+    let mut reader = Decoder::from_tokio(file)
+        .await
+        .expect("failed to open pxar archive contents");
+    let mut i = 0;
+    while let Some(entry) = reader.next().await {
+        println!("{:#?}", entry.expect("failed to parse entry").path());
+        i += 1;
+        if i == 2 {
+            break;
+        }
+    }
+
+    // Use a Stream for the remaining entries:
+    use futures::stream::StreamExt;
+
+    let mut stream = reader.into_stream();
+
+    while let Some(entry) = stream.next().await {
+        println!("{:#?}", entry.expect("failed to parse entry").path());
+    }
+}
diff --git a/examples/randaccess.rs b/examples/randaccess.rs
new file mode 100644 (file)
index 0000000..f228155
--- /dev/null
@@ -0,0 +1,62 @@
+use pxar::accessor::Accessor;
+
+fn main() {
+    let mut args = std::env::args_os().skip(1);
+
+    let file = args.next().expect("expected a file name");
+
+    let mut accessor = Accessor::open(file).expect("failed to open file");
+    let mut dir = accessor
+        .open_root()
+        .expect("failed to open archive root directory");
+    for i in dir.decode_full().expect("failed to access root directory") {
+        println!("{:#?}", i.expect("failed to parse entry").path());
+    }
+
+    let da = dir
+        .lookup("da")
+        .expect("error looking up da/")
+        .expect("failed to lookup da/");
+    dir.lookup("db").expect("failed to lookup db");
+    dir.lookup("root1.txt").expect("failed to lookup root1.txt");
+    dir.lookup("root2.txt").expect("failed to lookup root2.txt");
+
+    println!("{:?}", da.entry());
+    let da = da.enter_directory().expect("failed to enter /da directory");
+    for i in da.decode_full().expect("failed to access /da directory") {
+        println!(
+            " ==> {:#?}",
+            i.expect("failed to parse /da file entry").path()
+        );
+    }
+
+    for i in dir.read_dir() {
+        let i = i.expect("failed to read directory entry");
+        println!("read_dir => {:?}", i.file_name());
+    }
+
+    //    let file = tokio::fs::File::open(file)
+    //        .await
+    //        .expect("failed to open file");
+    //
+    //    let mut reader = Accessor::from_tokio(file)
+    //        .await
+    //        .expect("failed to open pxar archive contents");
+    //    let mut i = 0;
+    //    while let Some(entry) = reader.next().await {
+    //        println!("{:#?}", entry.expect("failed to parse entry").path());
+    //        i += 1;
+    //        if i == 2 {
+    //            break;
+    //        }
+    //    }
+    //
+    //    // Use a Stream for the remaining entries:
+    //    use futures::stream::StreamExt;
+    //
+    //    let mut stream = reader.into_stream();
+    //
+    //    while let Some(entry) = stream.next().await {
+    //        println!("{:#?}", entry.expect("failed to parse entry").path());
+    //    }
+}
diff --git a/rust-toolchain b/rust-toolchain
new file mode 100644 (file)
index 0000000..bf867e0
--- /dev/null
@@ -0,0 +1 @@
+nightly
diff --git a/rustfmt.toml b/rustfmt.toml
new file mode 100644 (file)
index 0000000..32a9786
--- /dev/null
@@ -0,0 +1 @@
+edition = "2018"
diff --git a/src/accessor.rs b/src/accessor.rs
new file mode 100644 (file)
index 0000000..6655fa3
--- /dev/null
@@ -0,0 +1,443 @@
+//! Random access for PXAR files.
+
+use std::ffi::OsString;
+use std::io;
+use std::mem::{size_of, size_of_val, MaybeUninit};
+use std::ops::Range;
+use std::os::unix::ffi::{OsStrExt, OsStringExt};
+use std::path::{Path, PathBuf};
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+use endian_trait::Endian;
+
+use crate::decoder::{self, DecoderImpl};
+use crate::format::{self, GoodbyeItem};
+use crate::poll_fn::poll_fn;
+use crate::util;
+use crate::Entry;
+
+pub mod aio;
+pub mod sync;
+
+#[doc(inline)]
+pub use sync::Accessor;
+
+/// Random access read implementation.
+pub trait ReadAt {
+    fn poll_read_at(
+        self: Pin<&Self>,
+        cx: &mut Context,
+        buf: &mut [u8],
+        offset: u64,
+    ) -> Poll<io::Result<usize>>;
+}
+
+/// We do not want to bother with actual polling, so we implement `async fn` variants of the above
+/// on `dyn ReadAt`.
+///
+/// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
+/// we'd then need to define all the `Future` types they return manually and explicitly. Since we
+/// have no use for them, all we want is the ability to use `async fn`...
+///
+/// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
+/// decoder's code, but that's fine.
+impl<'a> dyn ReadAt + 'a {
+    /// awaitable version of `poll_read_at`.
+    async fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result<usize> {
+        poll_fn(|cx| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }).await
+    }
+
+    /// `read_exact_at` - since that's what we _actually_ want most of the time.
+    async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> {
+        while !buf.is_empty() {
+            match self.read_at(buf, offset).await? {
+                0 => io_bail!("unexpected EOF"),
+                got => {
+                    buf = &mut buf[got..];
+                    offset += got as u64;
+                }
+            }
+        }
+        Ok(())
+    }
+
+    /// Helper to read into an `Endian`-implementing `struct`.
+    async fn read_entry_at<T: Endian>(&self, offset: u64) -> io::Result<T> {
+        let mut data = MaybeUninit::<T>::uninit();
+        let buf =
+            unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) };
+        self.read_exact_at(buf, offset).await?;
+        Ok(unsafe { data.assume_init().from_le() })
+    }
+
+    /// Helper to read into an allocated byte vector.
+    async fn read_exact_data_at(&self, size: usize, offset: u64) -> io::Result<Vec<u8>> {
+        let mut data = util::vec_new(size);
+        self.read_exact_at(&mut data[..], offset).await?;
+        Ok(data)
+    }
+}
+
+/// The random access state machine implementation.
+pub struct AccessorImpl<T> {
+    input: T,
+    size: u64,
+}
+
+impl<T: ReadAt> AccessorImpl<T> {
+    pub async fn new(input: T, size: u64) -> io::Result<Self> {
+        if size < (size_of::<GoodbyeItem>() as u64) {
+            io_bail!("too small to contain a pxar archive");
+        }
+        Ok(Self { input, size })
+    }
+
+    pub async fn open_root<'a>(&'a self) -> io::Result<DirectoryImpl<'a>> {
+        DirectoryImpl::open_at_end(&self.input, self.size, "/".into()).await
+    }
+}
+
+/// The directory random-access state machine implementation.
+pub struct DirectoryImpl<'a> {
+    input: &'a dyn ReadAt,
+    entry_ofs: u64,
+    goodbye_ofs: u64,
+    size: u64,
+    table: Box<[GoodbyeItem]>,
+    path: PathBuf,
+}
+
+impl<'a> DirectoryImpl<'a> {
+    /// Open a directory ending at the specified position.
+    pub(crate) async fn open_at_end(
+        input: &'a dyn ReadAt,
+        end_offset: u64,
+        path: PathBuf,
+    ) -> io::Result<DirectoryImpl<'a>> {
+        let tail = Self::read_tail_entry(input, end_offset).await?;
+
+        if end_offset < tail.size {
+            io_bail!("goodbye tail size out of range");
+        }
+
+        let goodbye_ofs = end_offset - tail.size;
+
+        if goodbye_ofs < tail.offset {
+            io_bail!("goodbye offset out of range");
+        }
+
+        let entry_ofs = goodbye_ofs - tail.offset;
+        let size = end_offset - entry_ofs;
+
+        let mut this = Self {
+            input,
+            entry_ofs,
+            goodbye_ofs,
+            size,
+            table: Box::new([]),
+            path,
+        };
+
+        // sanity check:
+        if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
+            io_bail!("invalid goodbye table size: {}", this.table_size());
+        }
+
+        this.table = this.load_table().await?;
+
+        Ok(this)
+    }
+
+    /// Load the entire goodbye table:
+    async fn load_table(&self) -> io::Result<Box<[GoodbyeItem]>> {
+        let len = self.len();
+        let mut data = Vec::with_capacity(self.len());
+        unsafe {
+            data.set_len(len);
+            let slice = std::slice::from_raw_parts_mut(
+                data.as_mut_ptr() as *mut u8,
+                len * size_of_val(&data[0]),
+            );
+            self.input.read_exact_at(slice, self.table_offset()).await?;
+            drop(slice);
+        }
+        Ok(data.into_boxed_slice())
+    }
+
+    #[inline]
+    fn end_offset(&self) -> u64 {
+        self.entry_ofs + self.size
+    }
+
+    #[inline]
+    fn table_size(&self) -> u64 {
+        (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
+    }
+
+    #[inline]
+    fn table_offset(&self) -> u64 {
+        self.goodbye_ofs + (size_of::<format::Header>() as u64)
+    }
+
+    /// Length *excluding* the tail marker!
+    #[inline]
+    fn len(&self) -> usize {
+        (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
+    }
+
+    /// Read the goodbye tail and perform some sanity checks.
+    async fn read_tail_entry(input: &'a dyn ReadAt, end_offset: u64) -> io::Result<GoodbyeItem> {
+        if end_offset < (size_of::<GoodbyeItem>() as u64) {
+            io_bail!("goodbye tail does not fit");
+        }
+
+        let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
+        let tail: GoodbyeItem = input.read_entry_at(tail_offset).await?;
+
+        if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
+            io_bail!("no goodbye tail marker found");
+        }
+
+        Ok(tail)
+    }
+
+    /// Get a decoder for the directory contents.
+    pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<'a>>> {
+        let (dir, decoder) = self
+            .decode_one_entry(self.entry_ofs..(self.entry_ofs + self.size), None)
+            .await?;
+        if !dir.is_dir() {
+            io_bail!("directory does not seem to be a directory");
+        }
+        Ok(decoder)
+    }
+
+    async fn get_decoder(
+        &self,
+        entry_range: Range<u64>,
+        file_name: Option<&Path>,
+    ) -> io::Result<DecoderImpl<SeqReadAtAdapter<'a>>> {
+        Ok(DecoderImpl::new_full(
+            SeqReadAtAdapter::new(self.input, entry_range),
+            match file_name {
+                None => self.path.clone(),
+                Some(file) => self.path.join(file),
+            },
+        )
+        .await?)
+    }
+
+    async fn decode_one_entry(
+        &self,
+        entry_range: Range<u64>,
+        file_name: Option<&Path>,
+    ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<'a>>)> {
+        let mut decoder = self.get_decoder(entry_range, file_name).await?;
+        let entry = decoder
+            .next()
+            .await
+            .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+        Ok((entry, decoder))
+    }
+
+    fn lookup_hash_position(&self, hash: u64) -> Option<usize> {
+        format::search_binary_tree_array_by(&self.table, |i| hash.cmp(&i.hash))
+    }
+
+    /// Lookup a directory entry.
+    pub async fn lookup(&'a self, path: &Path) -> io::Result<Option<FileEntryImpl<'a>>> {
+        let hash = format::hash_filename(path.as_os_str().as_bytes());
+        let index = match self.lookup_hash_position(hash) {
+            Some(index) => index,
+            None => return Ok(None),
+        };
+
+        // Lookup FILENAME, if it doesn't match increase index, once found, use the GoodbyeItem's
+        // offset+size as well as the file's Entry to return a DirEntry::Dir or Dir::Entry.
+
+        while index < self.table.len() && self.table[index].hash == hash {
+            let cursor = self.get_cursor(index).await?;
+            if cursor.file_name == path {
+                return Ok(Some(cursor.get_entry().await?));
+            }
+        }
+
+        Ok(None)
+    }
+
+    async fn get_cursor(&'a self, index: usize) -> io::Result<DirEntryImpl<'a>> {
+        let entry = &self.table[index];
+        let file_goodbye_ofs = entry.offset;
+        if self.goodbye_ofs < file_goodbye_ofs {
+            io_bail!("invalid file offset");
+        }
+
+        let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
+        let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
+
+        Ok(DirEntryImpl {
+            dir: self,
+            file_name,
+            entry_range: Range {
+                start: entry_ofs,
+                end: file_ofs + entry.size,
+            },
+        })
+    }
+
+    async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
+        let head: format::Header = self.input.read_entry_at(file_ofs).await?;
+        if head.htype != format::PXAR_FILENAME {
+            io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype);
+        }
+
+        let mut path = self
+            .input
+            .read_exact_data_at(
+                head.content_size() as usize,
+                file_ofs + (size_of_val(&head) as u64),
+            )
+            .await?;
+
+        if path.pop() != Some(0) {
+            io_bail!("invalid file name (missing terminating zero)");
+        }
+
+        if path.is_empty() {
+            io_bail!("invalid empty file name");
+        }
+
+        let file_name = PathBuf::from(OsString::from_vec(path));
+        format::check_file_name(&file_name)?;
+
+        Ok((file_name, file_ofs + head.full_size()))
+    }
+
+    pub fn read_dir(&'a self) -> ReadDirImpl<'a> {
+        ReadDirImpl::new(self, 0)
+    }
+}
+
+/// A file entry retrieved from a Directory.
+pub struct FileEntryImpl<'a> {
+    parent: &'a DirectoryImpl<'a>,
+    entry: Entry,
+    decoder: Option<DecoderImpl<SeqReadAtAdapter<'a>>>,
+    end_offset: u64,
+}
+
+impl<'a> FileEntryImpl<'a> {
+    pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<'a>> {
+        if !self.entry.is_dir() {
+            io_bail!("enter_directory() on a non-directory");
+        }
+
+        DirectoryImpl::open_at_end(self.parent.input, self.end_offset, self.entry.path.clone())
+            .await
+    }
+
+    #[inline]
+    pub fn into_entry(self) -> Entry {
+        self.entry
+    }
+
+    #[inline]
+    pub fn entry(&self) -> &Entry {
+        &self.entry
+    }
+}
+
+/// An iterator over the contents of a directory.
+pub struct ReadDirImpl<'a> {
+    dir: &'a DirectoryImpl<'a>,
+    at: usize,
+}
+
+impl<'a> ReadDirImpl<'a> {
+    pub fn new(dir: &'a DirectoryImpl<'a>, at: usize) -> Self {
+        Self { dir, at }
+    }
+
+    pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a>>> {
+        if self.at == self.dir.table.len() {
+            Ok(None)
+        } else {
+            let cursor = self.dir.get_cursor(self.at).await?;
+            self.at += 1;
+            Ok(Some(cursor))
+        }
+    }
+}
+
+/// A cursor pointing to a file in a directory.
+///
+/// At this point only the file name has been read and we remembered the position for finding the
+/// actual data. This can be upgraded into a FileEntryImpl.
+pub struct DirEntryImpl<'a> {
+    dir: &'a DirectoryImpl<'a>,
+    file_name: PathBuf,
+    entry_range: Range<u64>,
+}
+
+impl<'a> DirEntryImpl<'a> {
+    pub fn file_name(&self) -> &Path {
+        &self.file_name
+    }
+
+    pub async fn get_entry(&self) -> io::Result<FileEntryImpl<'a>> {
+        let end_offset = self.entry_range.end;
+        let (entry, decoder) = self
+            .dir
+            .decode_one_entry(self.entry_range.clone(), Some(&self.file_name))
+            .await?;
+        let decoder = if entry.is_dir() { Some(decoder) } else { None };
+
+        Ok(FileEntryImpl {
+            parent: self.dir,
+            entry,
+            decoder,
+            end_offset,
+        })
+    }
+}
+
+#[doc(hidden)]
+pub struct SeqReadAtAdapter<'a> {
+    input: &'a dyn ReadAt,
+    range: Range<u64>,
+}
+
+impl<'a> SeqReadAtAdapter<'a> {
+    pub fn new(input: &'a dyn ReadAt, range: Range<u64>) -> Self {
+        Self { input, range }
+    }
+
+    #[inline]
+    fn remaining(&self) -> usize {
+        (self.range.end - self.range.start) as usize
+    }
+}
+
+impl<'a> decoder::SeqRead for SeqReadAtAdapter<'a> {
+    fn poll_seq_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context,
+        buf: &mut [u8],
+    ) -> Poll<io::Result<usize>> {
+        let len = buf.len().min(self.remaining());
+        let buf = &mut buf[..len];
+
+        let this = self.get_mut();
+
+        let got = ready!(unsafe {
+            Pin::new_unchecked(this.input).poll_read_at(cx, buf, this.range.start)
+        })?;
+        this.range.start += got as u64;
+        Poll::Ready(Ok(got))
+    }
+
+    fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
+        Poll::Ready(Some(Ok(self.range.start)))
+    }
+}
diff --git a/src/accessor/aio.rs b/src/accessor/aio.rs
new file mode 100644 (file)
index 0000000..b75d365
--- /dev/null
@@ -0,0 +1,3 @@
+//! Asynchronous `pxar` random-access handling.
+//!
+//! Currently neither tokio nor futures have an `AsyncFileExt` variant.
diff --git a/src/accessor/sync.rs b/src/accessor/sync.rs
new file mode 100644 (file)
index 0000000..7be5a9a
--- /dev/null
@@ -0,0 +1,183 @@
+//! Blocking `pxar` random access handling.
+
+use std::io;
+use std::os::unix::fs::FileExt;
+use std::path::Path;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+use crate::accessor::{self, ReadAt};
+use crate::decoder::Decoder;
+use crate::util::poll_result_once;
+use crate::Entry;
+
+/// Blocking `pxar` random-access decoder.
+///
+/// This is the blocking I/O version of the `pxar` accessor. This will *not* work with an
+/// asynchronous I/O object. I/O must always return `Poll::Ready`.
+///
+/// Attempting to use a `Waker` from this context *will* `panic!`
+///
+/// If you need to use asynchronous I/O, use `aio::Accessor`.
+#[repr(transparent)]
+pub struct Accessor<T> {
+    inner: accessor::AccessorImpl<T>,
+}
+
+impl<T: FileExt> Accessor<T> {
+    /// Decode a `pxar` archive from a standard file implementing `FileExt`.
+    #[inline]
+    pub fn from_file_and_size(input: T, size: u64) -> io::Result<Accessor<FileReader<T>>> {
+        Accessor::new(FileReader::new(input), size)
+    }
+}
+
+impl Accessor<FileReader<std::fs::File>> {
+    /// Decode a `pxar` archive from a regular `std::io::File` input.
+    #[inline]
+    pub fn from_file(input: std::fs::File) -> io::Result<Self> {
+        let size = input.metadata()?.len();
+        Accessor::from_file_and_size(input, size)
+    }
+
+    /// Convenience shortcut for `File::open` followed by `Accessor::from_file`.
+    pub fn open<P: AsRef<Path>>(path: P) -> io::Result<Self> {
+        Self::from_file(std::fs::File::open(path.as_ref())?)
+    }
+}
+
+impl<T: ReadAt> Accessor<T> {
+    /// Create a *blocking* random-access decoder from an input implementing our internal read
+    /// interface.
+    ///
+    /// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is
+    /// not allowed to use the `Waker`, as this will cause a `panic!`.
+    pub fn new(input: T, size: u64) -> io::Result<Self> {
+        Ok(Self {
+            inner: poll_result_once(accessor::AccessorImpl::new(input, size))?,
+        })
+    }
+
+    /// Open a directory handle to the root of the pxar archive.
+    pub fn open_root<'a>(&'a self) -> io::Result<Directory<'a>> {
+        Ok(Directory::new(poll_result_once(self.inner.open_root())?))
+    }
+}
+
+/// Adapter for FileExt readers.
+pub struct FileReader<T> {
+    inner: T,
+}
+
+impl<T: FileExt> FileReader<T> {
+    pub fn new(inner: T) -> Self {
+        Self { inner }
+    }
+}
+
+impl<T: FileExt> ReadAt for FileReader<T> {
+    fn poll_read_at(
+        self: Pin<&Self>,
+        _cx: &mut Context,
+        buf: &mut [u8],
+        offset: u64,
+    ) -> Poll<io::Result<usize>> {
+        Poll::Ready(self.get_ref().inner.read_at(buf, offset))
+    }
+}
+
+/// Blocking Directory variant:
+#[repr(transparent)]
+pub struct Directory<'a> {
+    inner: accessor::DirectoryImpl<'a>,
+}
+
+impl<'a> Directory<'a> {
+    fn new(inner: accessor::DirectoryImpl<'a>) -> Self {
+        Self { inner }
+    }
+
+    /// Get a decoder for the directory contents.
+    pub fn decode_full(&self) -> io::Result<Decoder<accessor::SeqReadAtAdapter<'a>>> {
+        Ok(Decoder::from_impl(poll_result_once(
+            self.inner.decode_full(),
+        )?))
+    }
+
+    /// Lookup an entry in a directory.
+    pub fn lookup<P: AsRef<Path>>(&'a self, path: P) -> io::Result<Option<FileEntry<'a>>> {
+        if let Some(file_entry) = poll_result_once(self.inner.lookup(path.as_ref()))? {
+            Ok(Some(FileEntry { inner: file_entry }))
+        } else {
+            Ok(None)
+        }
+    }
+
+    /// Get an iterator over the directory's contents.
+    pub fn read_dir(&'a self) -> ReadDir<'a> {
+        ReadDir {
+            inner: self.inner.read_dir(),
+        }
+    }
+}
+
+/// A file entry retrieved from a `Directory` via the `lookup` method.
+#[repr(transparent)]
+pub struct FileEntry<'a> {
+    inner: accessor::FileEntryImpl<'a>,
+}
+
+impl<'a> FileEntry<'a> {
+    pub fn enter_directory(&self) -> io::Result<Directory<'a>> {
+        Ok(Directory::new(poll_result_once(
+            self.inner.enter_directory(),
+        )?))
+    }
+
+    #[inline]
+    pub fn into_entry(self) -> Entry {
+        self.inner.into_entry()
+    }
+
+    #[inline]
+    pub fn entry(&self) -> &Entry {
+        &self.inner.entry()
+    }
+}
+
+/// An iterator over the contents of a `Directory`.
+#[repr(transparent)]
+pub struct ReadDir<'a> {
+    inner: accessor::ReadDirImpl<'a>,
+}
+
+impl<'a> Iterator for ReadDir<'a> {
+    type Item = io::Result<DirEntry<'a>>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match poll_result_once(self.inner.next()) {
+            Ok(Some(inner)) => Some(Ok(DirEntry { inner })),
+            Ok(None) => None,
+            Err(err) => Some(Err(err)),
+        }
+    }
+}
+
+impl<'a> std::iter::FusedIterator for ReadDir<'a> {}
+
+/// A directory entry. When iterating through the contents of a directory we first get access to
+/// the file name. The remaining information can be decoded afterwards.
+#[repr(transparent)]
+pub struct DirEntry<'a> {
+    inner: accessor::DirEntryImpl<'a>,
+}
+
+impl<'a> DirEntry<'a> {
+    pub fn file_name(&self) -> &Path {
+        self.inner.file_name()
+    }
+
+    pub fn get_entry(&self) -> io::Result<FileEntry<'a>> {
+        poll_result_once(self.inner.get_entry()).map(|inner| FileEntry { inner })
+    }
+}
diff --git a/src/bin/pxar.rs b/src/bin/pxar.rs
new file mode 100644 (file)
index 0000000..9c269a7
--- /dev/null
@@ -0,0 +1,13 @@
+use pxar::decoder::Decoder;
+
+fn main() {
+    let mut args = std::env::args_os().skip(1);
+
+    let file = args.next().expect("expected a file name");
+    let file = std::fs::File::open(file).expect("failed to open file");
+
+    let reader = Decoder::from_std(file).expect("failed to open pxar archive contents");
+    for entry in reader {
+        println!("{:#?}", entry.expect("failed to parse entry").path());
+    }
+}
diff --git a/src/decoder.rs b/src/decoder.rs
new file mode 100644 (file)
index 0000000..b7f143e
--- /dev/null
@@ -0,0 +1,553 @@
+//! The `pxar` decoder state machine.
+//!
+//! This is the implementation used by both the synchronous and async pxar wrappers.
+
+use std::convert::TryFrom;
+use std::ffi::OsString;
+use std::io;
+use std::mem::{self, size_of, size_of_val, MaybeUninit};
+use std::os::unix::ffi::{OsStrExt, OsStringExt};
+use std::path::{Path, PathBuf};
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+//use std::os::unix::fs::FileExt;
+
+use endian_trait::Endian;
+
+use crate::format::{self, Header};
+use crate::poll_fn::poll_fn;
+use crate::util::{self, io_err_other};
+use crate::{Entry, EntryKind, Metadata};
+
+pub mod aio;
+pub mod sync;
+
+#[doc(inline)]
+pub use sync::Decoder;
+
+/// To skip through non-seekable files.
+static mut SCRATCH_BUFFER: MaybeUninit<[u8; 4096]> = MaybeUninit::uninit();
+
+fn scratch_buffer() -> &'static mut [u8] {
+    unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] }
+}
+
+/// Sequential read interface used by the decoder's state machine.
+///
+/// To simply iterate through a directory we just need the equivalent of `poll_read()`.
+///
+/// Currently we also have a `poll_position()` method which can be added for types supporting
+/// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available
+/// (accessible via the `Entry::offset()`), to allow jumping between entries.
+pub trait SeqRead {
+    /// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent.
+    fn poll_seq_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context,
+        buf: &mut [u8],
+    ) -> Poll<io::Result<usize>>;
+
+    /// While going through the data we may want to take notes about some offsets within the file
+    /// for later. If the reader does not support seeking or positional reading, this can just
+    /// return `None`.
+    fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
+        Poll::Ready(None)
+    }
+}
+
+/// Allow using trait objects for generics taking a `SeqRead`:
+impl<'a> SeqRead for &mut (dyn SeqRead + 'a) {
+    fn poll_seq_read(
+        self: Pin<&mut Self>,
+        cx: &mut Context,
+        buf: &mut [u8],
+    ) -> Poll<io::Result<usize>> {
+        unsafe {
+            self.map_unchecked_mut(|this| &mut **this)
+                .poll_seq_read(cx, buf)
+        }
+    }
+
+    fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
+        unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
+    }
+}
+
+/// We do not want to bother with actual polling, so we implement `async fn` variants of the above
+/// on `dyn SeqRead`.
+///
+/// The reason why this is not an internal `SeqReadExt` trait like `AsyncReadExt` is simply that
+/// we'd then need to define all the `Future` types they return manually and explicitly. Since we
+/// have no use for them, all we want is the ability to use `async fn`...
+///
+/// The downside is that we need some `(&mut self.input as &mut dyn SeqRead)` casts in the
+/// decoder's code, but that's fine.
+impl<'a> dyn SeqRead + 'a {
+    /// awaitable version of `poll_position`.
+    async fn position(&mut self) -> Option<io::Result<u64>> {
+        poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *self).poll_position(cx) }).await
+    }
+
+    /// awaitable version of `poll_seq_read`.
+    async fn seq_read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *self).poll_seq_read(cx, buf) }).await
+    }
+
+    /// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling
+    async fn seq_read_exact_or_eof(&mut self, mut buf: &mut [u8]) -> io::Result<Option<()>> {
+        let mut eof_ok = true;
+        while !buf.is_empty() {
+            match self.seq_read(buf).await? {
+                0 if eof_ok => break,
+                0 => io_bail!("unexpected EOF"),
+                got => buf = &mut buf[got..],
+            }
+            eof_ok = false;
+        }
+        Ok(Some(()))
+    }
+
+    /// `read_exact` - since that's what we _actually_ want most of the time.
+    async fn seq_read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> {
+        match self.seq_read_exact_or_eof(buf).await? {
+            Some(()) => Ok(()),
+            None => io_bail!("unexpected eof"),
+        }
+    }
+
+    /// Helper to read into an allocated byte vector.
+    async fn seq_read_exact_data(&mut self, size: usize) -> io::Result<Vec<u8>> {
+        let mut data = util::vec_new(size);
+        self.seq_read_exact(&mut data[..]).await?;
+        Ok(data)
+    }
+
+    /// `seq_read_entry` with EOF handling
+    async fn seq_read_entry_or_eof<T: Endian>(&mut self) -> io::Result<Option<T>> {
+        let mut data = MaybeUninit::<T>::uninit();
+        let buf =
+            unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) };
+        if self.seq_read_exact_or_eof(buf).await?.is_none() {
+            return Ok(None);
+        }
+        Ok(Some(unsafe { data.assume_init().from_le() }))
+    }
+
+    /// Helper to read into an `Endian`-implementing `struct`.
+    async fn seq_read_entry<T: Endian>(&mut self) -> io::Result<T> {
+        self.seq_read_entry_or_eof()
+            .await?
+            .ok_or_else(|| io_format_err!("unexepcted EOF"))
+    }
+}
+
+/// The decoder state machine implementation.
+///
+/// We use `async fn` to implement the decoder state machine so that we can easily plug in both
+/// synchronous or `async` I/O objects in as input.
+pub struct DecoderImpl<T> {
+    input: T,
+    current_header: Header,
+    entry: Entry,
+    path_lengths: Vec<usize>,
+    state: State,
+    with_goodbye_tables: bool,
+}
+
+enum State {
+    Begin,
+    Default,
+    InPayload,
+    InDirectory,
+    Eof,
+}
+
+/// Control flow while parsing items.
+///
+/// When parsing an entry, we usually go through all of its attribute items. Once we reach the end
+/// of the entry we stop.
+/// Note that if we're in a directory, we stopped at the beginning of its contents.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum ItemResult {
+    /// We parsed an "attribute" item and should continue parsing.
+    Attribute,
+
+    /// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a
+    /// directory (`FILENAME`, `GOODBYE`).
+    ///
+    /// We stop moving forward at this point.
+    Entry,
+}
+
+impl<T: SeqRead> DecoderImpl<T> {
+    pub async fn new(input: T) -> io::Result<Self> {
+        Self::new_full(input, "/".into()).await
+    }
+
+    pub(crate) async fn new_full(mut input: T, path: PathBuf) -> io::Result<Self> {
+        let offset = (&mut input as &mut dyn SeqRead)
+            .position()
+            .await
+            .transpose()?;
+        let this = DecoderImpl {
+            input,
+            current_header: unsafe { mem::zeroed() },
+            entry: Entry {
+                path,
+                kind: EntryKind::EndOfDirectory,
+                metadata: Metadata::default(),
+                offset,
+            },
+            path_lengths: Vec::new(),
+            state: State::Begin,
+            with_goodbye_tables: false,
+        };
+
+        // this.read_next_entry().await?;
+
+        Ok(this)
+    }
+
+    /// Get the next file entry, recursing into directories.
+    pub async fn next(&mut self) -> Option<io::Result<Entry>> {
+        self.next_do().await.transpose()
+    }
+
+    pub(crate) async fn next_do(&mut self) -> io::Result<Option<Entry>> {
+        loop {
+            match self.state {
+                State::Eof => return Ok(None),
+                State::Begin => return self.read_next_entry().await.map(Some),
+                State::Default => {
+                    // we completely finished an entry, so now we're going "up" in the directory
+                    // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
+                    self.read_next_item().await?;
+                }
+                State::InPayload => {
+                    // We need to skip the current payload first.
+                    self.skip_entry().await?;
+                    self.read_next_item().await?;
+                }
+                State::InDirectory => {
+                    // We're at the next FILENAME or GOODBYE item.
+                }
+            }
+
+            match self.current_header.htype {
+                format::PXAR_FILENAME => return self.handle_file_entry().await,
+                format::PXAR_GOODBYE => {
+                    if self.with_goodbye_tables {
+                        self.entry.kind = EntryKind::EndOfDirectory;
+                        let offset = (&mut self.input as &mut dyn SeqRead)
+                            .position()
+                            .await
+                            .transpose()?;
+                        self.entry.offset = offset;
+                        self.state = State::InPayload;
+                        return Ok(Some(self.entry.take()));
+                    }
+
+                    self.skip_entry().await?;
+                    if self.path_lengths.pop().is_some() {
+                        self.state = State::Default;
+                        // and move on:
+                        continue;
+                    } else {
+                        self.state = State::Eof;
+                        // early out:
+                        return Ok(None);
+                    }
+                }
+                h => io_bail!(
+                    "expected filename or directory-goodbye pxar entry, got: {:x}",
+                    h
+                ),
+            }
+        }
+    }
+
+    async fn handle_file_entry(&mut self) -> io::Result<Option<Entry>> {
+        let mut data = self.read_entry_as_bytes().await?;
+
+        // filenames are zero terminated!
+        if data.pop() != Some(0) {
+            io_bail!("illegal path found (missing terminating zero)");
+        }
+        if data.is_empty() {
+            io_bail!("illegal path found (empty)");
+        }
+
+        let path = PathBuf::from(OsString::from_vec(data));
+        self.set_path(&path)?;
+        self.read_next_entry().await.map(Some)
+    }
+
+    fn reset_path(&mut self) -> io::Result<()> {
+        let path_len = *self
+            .path_lengths
+            .last()
+            .ok_or_else(|| io_format_err!("internal decoder error: path underrun"))?;
+        let mut path = mem::replace(&mut self.entry.path, PathBuf::new())
+            .into_os_string()
+            .into_vec();
+        path.truncate(path_len);
+        self.entry.path = PathBuf::from(OsString::from_vec(path));
+        Ok(())
+    }
+
+    fn set_path(&mut self, path: &Path) -> io::Result<()> {
+        self.reset_path()?;
+        self.entry.path.push(path);
+        Ok(())
+    }
+
+    async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
+        self.state = State::Default;
+        self.entry.clear_data();
+
+        #[derive(Endian)]
+        #[repr(C)]
+        struct WithHeader<U: Endian> {
+            header: Header,
+            data: U,
+        }
+
+        let entry: WithHeader<format::Entry> = {
+            let input: &mut dyn SeqRead = &mut self.input;
+            match input.seq_read_entry_or_eof().await? {
+                None => return Ok(None),
+                Some(entry) => entry,
+            }
+        };
+
+        if entry.header.htype != format::PXAR_ENTRY {
+            io_bail!(
+                "expected pxar entry of type 'Entry', got: {:x}",
+                entry.header.htype
+            );
+        }
+
+        self.current_header = unsafe { mem::zeroed() };
+        self.entry.metadata = Metadata {
+            stat: entry.data,
+            ..Default::default()
+        };
+
+        while self.read_next_item().await? != ItemResult::Entry {}
+
+        if self.entry.is_dir() {
+            self.path_lengths
+                .push(self.entry.path.as_os_str().as_bytes().len());
+        }
+
+        Ok(Some(self.entry.take()))
+    }
+
+    async fn read_next_entry(&mut self) -> io::Result<Entry> {
+        self.read_next_entry_or_eof()
+            .await?
+            .ok_or_else(|| io_format_err!("unexpected EOF"))
+    }
+
+    async fn read_next_item(&mut self) -> io::Result<ItemResult> {
+        self.read_next_header().await?;
+        self.read_current_item().await
+    }
+
+    async fn read_next_header(&mut self) -> io::Result<()> {
+        let dest = unsafe {
+            std::slice::from_raw_parts_mut(
+                &mut self.current_header as *mut Header as *mut u8,
+                size_of_val(&self.current_header),
+            )
+        };
+        (&mut self.input as &mut dyn SeqRead)
+            .seq_read_exact(dest)
+            .await?;
+        Ok(())
+    }
+
+    /// Read the next item, the header is already loaded.
+    async fn read_current_item(&mut self) -> io::Result<ItemResult> {
+        match self.current_header.htype {
+            format::PXAR_XATTR => {
+                let xattr = self.read_xattr().await?;
+                self.entry.metadata.xattrs.push(xattr);
+            }
+            format::PXAR_ACL_USER => {
+                let entry = self.read_acl_user().await?;
+                self.entry.metadata.acl.users.push(entry);
+            }
+            format::PXAR_ACL_GROUP => {
+                let entry = self.read_acl_group().await?;
+                self.entry.metadata.acl.groups.push(entry);
+            }
+            format::PXAR_ACL_GROUP_OBJ => {
+                if self.entry.metadata.acl.group_obj.is_some() {
+                    io_bail!("multiple acl group object entries detected");
+                }
+                let entry = self.read_acl_group_object().await?;
+                self.entry.metadata.acl.group_obj = Some(entry);
+            }
+            format::PXAR_ACL_DEFAULT => {
+                if self.entry.metadata.acl.default.is_some() {
+                    io_bail!("multiple acl default entries detected");
+                }
+                let entry = self.read_acl_default().await?;
+                self.entry.metadata.acl.default = Some(entry);
+            }
+            format::PXAR_ACL_DEFAULT_USER => {
+                let entry = self.read_acl_user().await?;
+                self.entry.metadata.acl.default_users.push(entry);
+            }
+            format::PXAR_ACL_DEFAULT_GROUP => {
+                let entry = self.read_acl_group().await?;
+                self.entry.metadata.acl.default_groups.push(entry);
+            }
+            format::PXAR_FCAPS => {
+                if self.entry.metadata.fcaps.is_some() {
+                    io_bail!("multiple file capability entries detected");
+                }
+                let entry = self.read_fcaps().await?;
+                self.entry.metadata.fcaps = Some(entry);
+            }
+            format::PXAR_QUOTA_PROJID => {
+                if self.entry.metadata.quota_project_id.is_some() {
+                    io_bail!("multiple quota project id entries detected");
+                }
+                let entry = self.read_quota_project_id().await?;
+                self.entry.metadata.quota_project_id = Some(entry);
+            }
+            format::PXAR_SYMLINK => {
+                self.entry.kind = EntryKind::Symlink(self.read_symlink().await?);
+                return Ok(ItemResult::Entry);
+            }
+            format::PXAR_HARDLINK => {
+                self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
+                return Ok(ItemResult::Entry);
+            }
+            format::PXAR_DEVICE => {
+                self.entry.kind = EntryKind::Device(self.read_device().await?);
+                return Ok(ItemResult::Entry);
+            }
+            format::PXAR_PAYLOAD => {
+                self.entry.kind = EntryKind::File {
+                    size: self.current_header.content_size(),
+                };
+                self.state = State::InPayload;
+                return Ok(ItemResult::Entry);
+            }
+            format::PXAR_FILENAME | format::PXAR_GOODBYE => {
+                self.state = State::InDirectory;
+                self.entry.kind = EntryKind::Directory;
+                return Ok(ItemResult::Entry);
+            }
+            _ => io_bail!("unexpected entry type: {:x}", self.current_header.htype),
+        }
+
+        Ok(ItemResult::Attribute)
+    }
+
+    //
+    // Local read helpers.
+    //
+    // These utilize additional information and hence are not part of the `dyn SeqRead` impl.
+    //
+
+    async fn skip_entry(&mut self) -> io::Result<()> {
+        let mut len = self.current_header.content_size();
+        let scratch = scratch_buffer();
+        while len >= (scratch.len() as u64) {
+            (&mut self.input as &mut dyn SeqRead)
+                .seq_read_exact(scratch)
+                .await?;
+            len -= scratch.len() as u64;
+        }
+        let len = len as usize;
+        if len > 0 {
+            (&mut self.input as &mut dyn SeqRead)
+                .seq_read_exact(&mut scratch[..len])
+                .await?;
+        }
+        Ok(())
+    }
+
+    async fn read_entry_as_bytes(&mut self) -> io::Result<Vec<u8>> {
+        let size = usize::try_from(self.current_header.content_size()).map_err(io_err_other)?;
+        let data = (&mut self.input as &mut dyn SeqRead)
+            .seq_read_exact_data(size)
+            .await?;
+        Ok(data)
+    }
+
+    /// Helper to read a struct entry while checking its size.
+    async fn read_simple_entry<U: Endian + 'static>(
+        &mut self,
+        what: &'static str,
+    ) -> io::Result<U> {
+        if self.current_header.content_size() != (size_of::<T>() as u64) {
+            io_bail!(
+                "bad {} size: {} (expected {})",
+                what,
+                self.current_header.content_size(),
+                size_of::<T>(),
+            );
+        }
+        (&mut self.input as &mut dyn SeqRead).seq_read_entry().await
+    }
+
+    //
+    // Read functions for PXAR components.
+    //
+
+    async fn read_xattr(&mut self) -> io::Result<format::XAttr> {
+        let data = self.read_entry_as_bytes().await?;
+
+        let name_len = data
+            .iter()
+            .position(|c| *c == 0)
+            .ok_or_else(|| io_format_err!("missing value separator in xattr"))?;
+
+        Ok(format::XAttr { data, name_len })
+    }
+
+    async fn read_symlink(&mut self) -> io::Result<format::Symlink> {
+        let data = self.read_entry_as_bytes().await?;
+        Ok(format::Symlink { data })
+    }
+
+    async fn read_hardlink(&mut self) -> io::Result<format::Hardlink> {
+        let data = self.read_entry_as_bytes().await?;
+        Ok(format::Hardlink { data })
+    }
+
+    async fn read_device(&mut self) -> io::Result<format::Device> {
+        self.read_simple_entry("device").await
+    }
+
+    async fn read_fcaps(&mut self) -> io::Result<format::FCaps> {
+        let data = self.read_entry_as_bytes().await?;
+        Ok(format::FCaps { data })
+    }
+
+    async fn read_acl_user(&mut self) -> io::Result<format::acl::User> {
+        self.read_simple_entry("acl user").await
+    }
+
+    async fn read_acl_group(&mut self) -> io::Result<format::acl::Group> {
+        self.read_simple_entry("acl group").await
+    }
+
+    async fn read_acl_group_object(&mut self) -> io::Result<format::acl::GroupObject> {
+        self.read_simple_entry("acl group object").await
+    }
+
+    async fn read_acl_default(&mut self) -> io::Result<format::acl::Default> {
+        self.read_simple_entry("acl default").await
+    }
+
+    async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> {
+        self.read_simple_entry("quota project id").await
+    }
+}
diff --git a/src/decoder/aio.rs b/src/decoder/aio.rs
new file mode 100644 (file)
index 0000000..8cf7aa8
--- /dev/null
@@ -0,0 +1,169 @@
+//! Asynchronous `pxar` format handling.
+
+use std::io;
+
+use crate::decoder::{self, SeqRead};
+use crate::Entry;
+
+/// Asynchronous `pxar` decoder.
+///
+/// This is the `async` version of the `pxar` decoder.
+#[repr(transparent)]
+pub struct Decoder<T> {
+    inner: decoder::DecoderImpl<T>,
+}
+
+#[cfg(feature = "futures-io")]
+impl<T: futures::io::AsyncRead> Decoder<T> {
+    /// Decode a `pxar` archive from a `futures::io::AsyncRead` input.
+    #[inline]
+    pub async fn from_futures(input: T) -> io::Result<Decoder<FuturesReader<T>>> {
+        Decoder::new(FuturesReader::new(input)).await
+    }
+}
+
+#[cfg(feature = "tokio-io")]
+impl<T: tokio::io::AsyncRead> Decoder<T> {
+    /// Decode a `pxar` archive from a `tokio::io::AsyncRead` input.
+    #[inline]
+    pub async fn from_tokio(input: T) -> io::Result<Decoder<TokioReader<T>>> {
+        Decoder::new(TokioReader::new(input)).await
+    }
+}
+
+impl<T: SeqRead> Decoder<T> {
+    /// Create an async decoder from an input implementing our internal read interface.
+    pub async fn new(input: T) -> io::Result<Self> {
+        Ok(Self {
+            inner: decoder::DecoderImpl::new(input).await?,
+        })
+    }
+
+    /// If this is a directory entry, get the next item inside the directory.
+    pub async fn next(&mut self) -> Option<io::Result<Entry>> {
+        self.inner.next_do().await.transpose()
+    }
+
+    /// Turn this decoder into a `Stream`.
+    #[cfg(feature = "futures-io")]
+    pub fn into_stream(self) -> DecoderStream<T> {
+        DecoderStream::new(self)
+    }
+}
+
+#[cfg(feature = "futures-io")]
+mod stream {
+    use std::future::Future;
+    use std::io;
+    use std::pin::Pin;
+    use std::task::{Context, Poll};
+
+    use super::{Entry, SeqRead};
+
+    /// A wrapper for the async decoder implementing `futures::stream::Stream`.
+    ///
+    /// As long as streams are poll-based this wrapper is required to turn `async fn next()` into
+    /// `Stream`'s `poll_next()` interface.
+    pub struct DecoderStream<T> {
+        inner: super::Decoder<T>,
+        future: Option<Pin<Box<dyn Future<Output = Option<io::Result<Entry>>>>>>,
+    }
+
+    impl<T> DecoderStream<T> {
+        pub fn new(inner: super::Decoder<T>) -> Self {
+            Self {
+                inner,
+                future: None,
+            }
+        }
+    }
+
+    impl<T: SeqRead> futures::stream::Stream for DecoderStream<T> {
+        type Item = io::Result<Entry>;
+
+        fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
+            let this = unsafe { self.get_unchecked_mut() };
+            loop {
+                if let Some(mut fut) = this.future.take() {
+                    match fut.as_mut().poll(cx) {
+                        Poll::Ready(res) => return Poll::Ready(res),
+                        Poll::Pending => {
+                            this.future = Some(fut);
+                            return Poll::Pending;
+                        }
+                    }
+                }
+                unsafe {
+                    let fut: Box<dyn Future<Output = _>> = Box::new(this.inner.next());
+                    // Discard the lifetime:
+                    let fut: *mut (dyn Future<Output = Option<io::Result<Entry>>> + 'static) =
+                        core::mem::transmute(Box::into_raw(fut));
+                    let fut = Box::from_raw(fut);
+                    this.future = Some(Pin::new_unchecked(fut));
+                }
+            }
+        }
+    }
+}
+
+#[cfg(feature = "futures-io")]
+pub use stream::DecoderStream;
+
+macro_rules! async_io_impl {
+    (
+        #[cfg( $($attr:tt)+ )]
+        mod $mod:ident {
+            $(#[$docs:meta])*
+            $name:ident : $trait:path ;
+        }
+    ) => {
+        #[cfg( $($attr)+ )]
+        mod $mod {
+            use std::io;
+            use std::pin::Pin;
+            use std::task::{Context, Poll};
+
+            $(#[$docs])*
+            pub struct $name<T> {
+                inner: T,
+            }
+
+            impl<T: $trait> $name<T> {
+                pub fn new(inner: T) -> Self {
+                    Self { inner }
+                }
+            }
+
+            impl<T: $trait> crate::decoder::SeqRead for $name<T> {
+                fn poll_seq_read(
+                    self: Pin<&mut Self>,
+                    cx: &mut Context,
+                    buf: &mut [u8],
+                ) -> Poll<io::Result<usize>> {
+                    unsafe {
+                        self.map_unchecked_mut(|this| &mut this.inner)
+                            .poll_read(cx, buf)
+                    }
+                }
+            }
+        }
+        #[cfg( $($attr)+ )]
+        pub use $mod::$name;
+    }
+}
+
+async_io_impl! {
+    #[cfg(feature = "futures-io")]
+    mod fut {
+        /// Read adapter for `futures::io::AsyncRead`.
+        FuturesReader : futures::io::AsyncRead;
+    }
+}
+
+async_io_impl! {
+    #[cfg(feature = "tokio-io")]
+    mod tok {
+        /// Read adapter for `tokio::io::AsyncRead`.
+        TokioReader : tokio::io::AsyncRead;
+    }
+}
diff --git a/src/decoder/sync.rs b/src/decoder/sync.rs
new file mode 100644 (file)
index 0000000..9311f21
--- /dev/null
@@ -0,0 +1,82 @@
+//! Blocking `pxar` format handling.
+
+use std::io;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+use crate::decoder::{self, SeqRead};
+use crate::util::poll_result_once;
+use crate::Entry;
+
+/// Blocking `pxar` decoder.
+///
+/// This is the blocking I/O version of the `pxar` decoder. This will *not* work with an
+/// asynchronous I/O object. I/O must always return `Poll::Ready`.
+///
+/// Attempting to use a `Waker` from this context *will* `panic!`
+///
+/// If you need to use asynchronous I/O, use `aio::Decoder`.
+#[repr(transparent)]
+pub struct Decoder<T> {
+    inner: decoder::DecoderImpl<T>,
+}
+
+impl<T: io::Read> Decoder<T> {
+    /// Decode a `pxar` archive from a regular `std::io::Read` input.
+    #[inline]
+    pub fn from_std(input: T) -> io::Result<Decoder<StandardReader<T>>> {
+        Decoder::new(StandardReader::new(input))
+    }
+}
+
+impl<T: SeqRead> Decoder<T> {
+    /// Create a *blocking* decoder from an input implementing our internal read interface.
+    ///
+    /// Note that the `input`'s `SeqRead` implementation must always return `Poll::Ready` and is
+    /// not allowed to use the `Waker`, as this will cause a `panic!`.
+    pub fn new(input: T) -> io::Result<Self> {
+        Ok(Self {
+            inner: poll_result_once(decoder::DecoderImpl::new(input))?,
+        })
+    }
+
+    /// Internal helper for `Accessor`. In this case we have the low-level state machine, and the
+    /// layer "above" the `Accessor` propagates the actual type (sync vs async).
+    pub(crate) fn from_impl(inner: decoder::DecoderImpl<T>) -> Self {
+        Self { inner }
+    }
+
+    /// If this is a directory entry, get the next item inside the directory.
+    pub fn next(&mut self) -> Option<io::Result<Entry>> {
+        poll_result_once(self.inner.next_do()).transpose()
+    }
+}
+
+impl<T: SeqRead> Iterator for Decoder<T> {
+    type Item = io::Result<Entry>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        Decoder::next(self)
+    }
+}
+
+/// Pxar decoder read adapter for `std::io::Read`.
+pub struct StandardReader<T> {
+    inner: T,
+}
+
+impl<T: io::Read> StandardReader<T> {
+    pub fn new(inner: T) -> Self {
+        Self { inner }
+    }
+}
+
+impl<T: io::Read> SeqRead for StandardReader<T> {
+    fn poll_seq_read(
+        self: Pin<&mut Self>,
+        _cx: &mut Context,
+        buf: &mut [u8],
+    ) -> Poll<io::Result<usize>> {
+        Poll::Ready(unsafe { self.get_unchecked_mut() }.inner.read(buf))
+    }
+}
diff --git a/src/format.rs b/src/format.rs
new file mode 100644 (file)
index 0000000..1a67659
--- /dev/null
@@ -0,0 +1,233 @@
+//! *pxar* binary format definition
+//!
+//! Please note the all values are stored in little endian ordering.
+//!
+//! The Archive contains a list of items. Each item starts with a `Header`, followed by the
+//! item data.
+
+use std::cmp::Ordering;
+use std::io;
+use std::mem::size_of;
+use std::path::Path;
+
+use endian_trait::Endian;
+use siphasher::sip::SipHasher24;
+
+pub mod acl;
+
+pub const PXAR_ENTRY: u64 = 0x1396fabcea5bbb51;
+pub const PXAR_FILENAME: u64 = 0x6dbb6ebcb3161f0b;
+pub const PXAR_SYMLINK: u64 = 0x664a6fb6830e0d6c;
+pub const PXAR_DEVICE: u64 = 0xac3dace369dfe643;
+pub const PXAR_XATTR: u64 = 0xb8157091f80bc486;
+pub const PXAR_ACL_USER: u64 = 0x297dc88b2ef12faf;
+pub const PXAR_ACL_GROUP: u64 = 0x36f2acb56cb3dd0b;
+pub const PXAR_ACL_GROUP_OBJ: u64 = 0x23047110441f38f3;
+pub const PXAR_ACL_DEFAULT: u64 = 0xfe3eeda6823c8cd0;
+pub const PXAR_ACL_DEFAULT_USER: u64 = 0xbdf03df9bd010a91;
+pub const PXAR_ACL_DEFAULT_GROUP: u64 = 0xa0cb1168782d1f51;
+pub const PXAR_FCAPS: u64 = 0xf7267db0afed0629;
+pub const PXAR_QUOTA_PROJID: u64 = 0x161baf2d8772a72b;
+
+/// Marks item as hardlink
+/// compute_goodbye_hash(b"__PROXMOX_FORMAT_HARDLINK__");
+pub const PXAR_HARDLINK: u64 = 0x2c5e06f634f65b86;
+/// Marks the beginnig of the payload (actual content) of regular files
+pub const PXAR_PAYLOAD: u64 = 0x8b9e1d93d6dcffc9;
+/// Marks item as entry of goodbye table
+pub const PXAR_GOODBYE: u64 = 0xdfd35c5e8327c403;
+/// The end marker used in the GOODBYE object
+pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0x57446fa533702943;
+
+#[derive(Debug, Endian)]
+#[repr(C)]
+pub struct Header {
+    /// The item type (see `PXAR_` constants).
+    pub htype: u64,
+    /// The size of the item, including the size of `Header`.
+    full_size: u64,
+}
+
+impl Header {
+    #[inline]
+    pub fn full_size(&self) -> u64 {
+        self.full_size
+    }
+
+    #[inline]
+    pub fn content_size(&self) -> u64 {
+        self.full_size() - (size_of::<Self>() as u64)
+    }
+}
+
+#[derive(Clone, Debug, Default, Endian)]
+#[repr(C)]
+pub struct Entry {
+    pub mode: u64,
+    pub flags: u64,
+    pub uid: u32,
+    pub gid: u32,
+    pub mtime: u64,
+}
+
+#[derive(Clone, Debug)]
+pub struct Filename {
+    pub name: Vec<u8>,
+}
+
+#[derive(Clone, Debug)]
+pub struct Symlink {
+    pub data: Vec<u8>,
+}
+
+#[derive(Clone, Debug)]
+pub struct Hardlink {
+    pub data: Vec<u8>,
+}
+
+#[derive(Clone, Debug, Eq)]
+#[repr(C)]
+pub struct XAttr {
+    pub(crate) data: Vec<u8>,
+    pub(crate) name_len: usize,
+}
+
+impl XAttr {
+    pub fn new<N: AsRef<[u8]>, V: AsRef<[u8]>>(name: N, value: V) -> Self {
+        let name = name.as_ref();
+        let value = value.as_ref();
+        let mut data = Vec::with_capacity(name.len() + value.len() + 1);
+        data.extend(name);
+        data.push(0);
+        data.extend(value);
+        Self {
+            data,
+            name_len: name.len(),
+        }
+    }
+
+    pub fn name(&self) -> &[u8] {
+        &self.data[..self.name_len]
+    }
+
+    pub fn value(&self) -> &[u8] {
+        &self.data[(self.name_len + 1)..]
+    }
+}
+
+impl Ord for XAttr {
+    fn cmp(&self, other: &XAttr) -> Ordering {
+        self.name().cmp(&other.name())
+    }
+}
+
+impl PartialOrd for XAttr {
+    fn partial_cmp(&self, other: &XAttr) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for XAttr {
+    fn eq(&self, other: &XAttr) -> bool {
+        self.name() == other.name()
+    }
+}
+
+#[derive(Clone, Debug, Endian)]
+#[repr(C)]
+pub struct Device {
+    pub major: u64,
+    pub minor: u64,
+}
+
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub struct FCaps {
+    pub data: Vec<u8>,
+}
+
+#[derive(Clone, Debug, Endian)]
+#[repr(C)]
+pub struct QuotaProjectId {
+    pub projid: u64,
+}
+
+#[derive(Debug, Endian)]
+#[repr(C)]
+pub struct GoodbyeItem {
+    /// SipHash24 of the directory item name. The last GOODBYE item uses the special hash value
+    /// `PXAR_GOODBYE_TAIL_MARKER`.
+    pub hash: u64,
+
+    /// The offset from the start of the GOODBYE object to the start of the matching directory item
+    /// (point to a FILENAME). The last GOODBYE item points to the start of the matching ENTRY
+    /// object.
+    pub offset: u64,
+
+    /// The overall size of the directory item. This includes the FILENAME header. In other words,
+    /// `goodbye_start - offset + size` points to the end of the directory.
+    ///
+    /// The last GOODBYE item repeats the size of the GOODBYE item.
+    pub size: u64,
+}
+
+impl GoodbyeItem {
+    pub fn new(name: &[u8], offset: u64, size: u64) -> Self {
+        let hash = hash_filename(name);
+        Self { hash, offset, size }
+    }
+}
+
+pub fn hash_filename(name: &[u8]) -> u64 {
+    use std::hash::Hasher;
+    let mut hasher = SipHasher24::new_with_keys(0x8574442b0f1d84b3, 0x2736ed30d1c22ec1);
+    hasher.write(name);
+    hasher.finish()
+}
+
+/*
+pub fn search_binary_tree_array<F, T>(table: &[T], key: &T) -> Option<usize>
+where
+    T: Ord,
+    F: FnMut(&T) -> std::cmp::Ordering,
+{
+    search_binary_tree_array_by(table, |elem| key.cmp(elem))
+}
+*/
+
+pub fn search_binary_tree_array_by<F, T>(table: &[T], mut f: F) -> Option<usize>
+where
+    F: FnMut(&T) -> Ordering,
+{
+    let mut i = 0;
+
+    while !table.is_empty() {
+        match f(&table[i]) {
+            Ordering::Equal => return Some(i),
+            Ordering::Less => i = 2 * i + 1,
+            Ordering::Greater => i = 2 * i + 2,
+        }
+        if i >= table.len() {
+            break;
+        }
+    }
+
+    None
+}
+
+pub fn path_is_legal_component(path: &Path) -> bool {
+    let mut components = path.components();
+    match components.next() {
+        Some(std::path::Component::Normal(_)) => (),
+        _ => return false,
+    }
+    components.next().is_none()
+}
+
+pub fn check_file_name(path: &Path) -> io::Result<()> {
+    if !path_is_legal_component(path) {
+        io_bail!("invalid file name in archive: {:?}", path);
+    } else {
+        Ok(())
+    }
+}
diff --git a/src/format/acl.rs b/src/format/acl.rs
new file mode 100644 (file)
index 0000000..457630e
--- /dev/null
@@ -0,0 +1,94 @@
+//! ACL related data
+
+use std::cmp::Ordering;
+
+use endian_trait::Endian;
+
+bitflags::bitflags! {
+    /// ACL permission bits.
+    #[derive(Endian)]
+    pub struct Permissions: u64 {
+        const PXAR_ACL_PERMISSION_READ = 4;
+        const PXAR_ACL_PERMISSION_WRITE = 2;
+        const PXAR_ACL_PERMISSION_EXECUTE = 1;
+    }
+}
+
+#[derive(Clone, Debug, Endian, Eq)]
+#[repr(C)]
+pub struct User {
+    pub uid: u64,
+    pub permissions: Permissions,
+    //pub name: Vec<u64>, not impl for now
+}
+
+// TODO if also name is impl, sort by uid, then by name and last by permissions
+impl Ord for User {
+    fn cmp(&self, other: &User) -> Ordering {
+        match self.uid.cmp(&other.uid) {
+            // uids are equal, entries ordered by permissions
+            Ordering::Equal => self.permissions.cmp(&other.permissions),
+            // uids are different, entries ordered by uid
+            uid_order => uid_order,
+        }
+    }
+}
+
+impl PartialOrd for User {
+    fn partial_cmp(&self, other: &User) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for User {
+    fn eq(&self, other: &User) -> bool {
+        self.uid == other.uid && self.permissions == other.permissions
+    }
+}
+
+#[derive(Clone, Debug, Endian, Eq)]
+#[repr(C)]
+pub struct Group {
+    pub gid: u64,
+    pub permissions: Permissions,
+    //pub name: Vec<u64>, not impl for now
+}
+
+// TODO if also name is impl, sort by gid, then by name and last by permissions
+impl Ord for Group {
+    fn cmp(&self, other: &Group) -> Ordering {
+        match self.gid.cmp(&other.gid) {
+            // gids are equal, entries are ordered by permissions
+            Ordering::Equal => self.permissions.cmp(&other.permissions),
+            // gids are different, entries ordered by gid
+            gid_ordering => gid_ordering,
+        }
+    }
+}
+
+impl PartialOrd for Group {
+    fn partial_cmp(&self, other: &Group) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl PartialEq for Group {
+    fn eq(&self, other: &Group) -> bool {
+        self.gid == other.gid && self.permissions == other.permissions
+    }
+}
+
+#[derive(Clone, Debug, Endian)]
+#[repr(C)]
+pub struct GroupObject {
+    pub permissions: Permissions,
+}
+
+#[derive(Clone, Debug, Endian)]
+#[repr(C)]
+pub struct Default {
+    pub user_obj_permissions: Permissions,
+    pub group_obj_permissions: Permissions,
+    pub other_permissions: Permissions,
+    pub mask_permissions: Permissions,
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644 (file)
index 0000000..24b4955
--- /dev/null
@@ -0,0 +1,218 @@
+//! Proxmox backup archive format handling.
+//!
+//! This implements a reader and writer for the proxmox archive format (.pxar).
+
+use std::ffi::OsStr;
+use std::mem;
+use std::os::unix::ffi::OsStrExt;
+use std::path::{Path, PathBuf};
+
+#[macro_use]
+mod macros;
+
+pub mod format;
+
+pub(crate) mod util;
+
+mod poll_fn;
+
+pub mod accessor;
+pub mod decoder;
+
+/// File metadata found in pxar archives.
+///
+/// This includes the usual data you'd get from `stat()` as well as ACLs, extended attributes, file
+/// capabilities and more.
+#[derive(Clone, Debug, Default)]
+pub struct Metadata {
+    /// Data typically found in a `stat()` call.
+    pub stat: format::Entry,
+
+    /// Extended attributes.
+    pub xattrs: Vec<format::XAttr>,
+
+    /// ACLs.
+    pub acl: Acl,
+
+    /// File capabilities.
+    pub fcaps: Option<format::FCaps>,
+
+    /// Quota project id.
+    pub quota_project_id: Option<format::QuotaProjectId>,
+}
+
+/// ACL entries of a pxar archive.
+///
+/// This contains all the various ACL entry types supported by the pxar archive format.
+#[derive(Clone, Debug, Default)]
+pub struct Acl {
+    /// User ACL list.
+    pub users: Vec<format::acl::User>,
+
+    /// Group ACL list.
+    pub groups: Vec<format::acl::Group>,
+
+    /// Group object ACL entry.
+    pub group_obj: Option<format::acl::GroupObject>,
+
+    /// Default permissions.
+    pub default: Option<format::acl::Default>,
+
+    /// Default user permissions.
+    pub default_users: Vec<format::acl::User>,
+
+    /// Default group permissions.
+    pub default_groups: Vec<format::acl::Group>,
+}
+
+/// Pxar archive entry kind.
+///
+/// Identifies whether the entry is a file, symlink, directory, etc.
+#[derive(Clone, Debug)]
+pub enum EntryKind {
+    /// Symbolic links.
+    Symlink(format::Symlink),
+
+    /// Hard links, relative to the root of the current archive.
+    Hardlink(format::Hardlink),
+
+    /// Device node.
+    Device(format::Device),
+
+    /// Regular file.
+    File { size: u64 },
+
+    /// Directory entry. When iterating through an archive, the contents follow next.
+    Directory,
+
+    /// End of a directory. This is for internal use to remember the goodbye-table of a directory
+    /// entry. Will not occur during normal iteration.
+    EndOfDirectory,
+}
+
+/// A pxar archive entry. This contains the current path, file metadata and entry type specific
+/// information.
+#[derive(Clone, Debug)]
+pub struct Entry {
+    path: PathBuf,
+    metadata: Metadata,
+    kind: EntryKind,
+    offset: Option<u64>,
+}
+
+/// General accessors.
+impl Entry {
+    /// Clear everything except for the path.
+    fn clear_data(&mut self) {
+        self.metadata = Metadata::default();
+        self.kind = EntryKind::EndOfDirectory;
+        self.offset = None;
+    }
+
+    fn internal_default() -> Self {
+        Self {
+            path: PathBuf::default(),
+            metadata: Metadata::default(),
+            kind: EntryKind::EndOfDirectory,
+            offset: None,
+        }
+    }
+
+    fn take(&mut self) -> Self {
+        let this = mem::replace(self, Self::internal_default());
+        self.path = this.path.clone();
+        this
+    }
+
+    /// If the underlying I/O implementation supports seeking, this will be filled with the start
+    /// offset of this entry, allowing one to jump back to this entry later on.
+    #[inline]
+    pub fn offset(&self) -> Option<u64> {
+        self.offset
+    }
+
+    /// Get the full path of this file within the current pxar directory structure.
+    #[inline]
+    pub fn path(&self) -> &Path {
+        &self.path
+    }
+
+    /// Convenience method to get just the file name portion of the current path.
+    #[inline]
+    pub fn file_name(&self) -> &OsStr {
+        self.path.file_name().unwrap_or(OsStr::new(""))
+    }
+
+    /// Get the file metadata.
+    #[inline]
+    pub fn metadata(&self) -> &Metadata {
+        &self.metadata
+    }
+
+    /// Get the value of the symbolic link if it is one.
+    pub fn get_symlink(&self) -> Option<&OsStr> {
+        match &self.kind {
+            EntryKind::Symlink(link) => Some(OsStr::from_bytes(&link.data)),
+            _ => None,
+        }
+    }
+
+    /// Get the value of the hard link if it is one.
+    pub fn get_hardlink(&self) -> Option<&OsStr> {
+        match &self.kind {
+            EntryKind::Hardlink(link) => Some(OsStr::from_bytes(&link.data)),
+            _ => None,
+        }
+    }
+
+    /// Get the value of the device node if it is one.
+    pub fn get_device(&self) -> Option<format::Device> {
+        match &self.kind {
+            EntryKind::Device(dev) => Some(dev.clone()),
+            _ => None,
+        }
+    }
+}
+
+/// Convenience helpers.
+impl Entry {
+    /// Check whether this is a directory.
+    pub fn is_dir(&self) -> bool {
+        match self.kind {
+            EntryKind::Directory { .. } => true,
+            _ => false,
+        }
+    }
+
+    /// Check whether this is a symbolic link.
+    pub fn is_symlink(&self) -> bool {
+        match self.kind {
+            EntryKind::Symlink(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Check whether this is a hard link.
+    pub fn is_hardlink(&self) -> bool {
+        match self.kind {
+            EntryKind::Hardlink(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Check whether this is a device node.
+    pub fn is_device(&self) -> bool {
+        match self.kind {
+            EntryKind::Device(_) => true,
+            _ => false,
+        }
+    }
+
+    /// Check whether this is a regular file.
+    pub fn is_regular_file(&self) -> bool {
+        match self.kind {
+            EntryKind::File { .. } => true,
+            _ => false,
+        }
+    }
+}
diff --git a/src/macros.rs b/src/macros.rs
new file mode 100644 (file)
index 0000000..31df1be
--- /dev/null
@@ -0,0 +1,23 @@
+/// Like failure's `format_err` but producing a `std::io::Error`.
+macro_rules! io_format_err {
+    ($($msg:tt)+) => {
+        ::std::io::Error::new(::std::io::ErrorKind::Other, format!($($msg)+))
+    };
+}
+
+/// Like failure's `bail` but producing a `std::io::Error`.
+macro_rules! io_bail {
+    ($($msg:tt)+) => {{
+        return Err(io_format_err!($($msg)+));
+    }};
+}
+
+/// Our dependency on `futures` is optional.
+macro_rules! ready {
+    ($expr:expr) => {{
+        match $expr {
+            std::task::Poll::Ready(r) => r,
+            std::task::Poll::Pending => return std::task::Poll::Pending,
+        }
+    }};
+}
diff --git a/src/poll_fn.rs b/src/poll_fn.rs
new file mode 100644 (file)
index 0000000..f193c6e
--- /dev/null
@@ -0,0 +1,39 @@
+//! `poll_fn` reimplementation as it is otherwise the only thing we need from the futures crate.
+//!
+//! Our `futures` crate dependency is optional.
+
+use std::future::Future;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+pub struct PollFn<F> {
+    func: Option<F>,
+}
+
+pub fn poll_fn<F, R>(func: F) -> PollFn<F>
+where
+    F: FnMut(&mut Context) -> Poll<R>,
+{
+    PollFn { func: Some(func) }
+}
+
+impl<F, R> Future for PollFn<F>
+where
+    F: FnMut(&mut Context) -> Poll<R>,
+{
+    type Output = R;
+
+    fn poll(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Self::Output> {
+        let this = unsafe { self.get_unchecked_mut() };
+        match &mut this.func {
+            None => panic!("poll() after Ready"),
+            Some(func) => {
+                let res = func(cx);
+                if res.is_ready() {
+                    this.func = None;
+                }
+                res
+            }
+        }
+    }
+}
diff --git a/src/util.rs b/src/util.rs
new file mode 100644 (file)
index 0000000..f78e32c
--- /dev/null
@@ -0,0 +1,115 @@
+#![allow(dead_code)]
+
+use std::future::Future;
+use std::io;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+// from /usr/include/linux/magic.h
+// and from casync util.h
+#[rustfmt::skip]
+#[allow(clippy::unreadable_literal)]
+mod consts {
+    pub const BINFMTFS_MAGIC      : i64 = 0x42494e4d;
+    pub const CGROUP2_SUPER_MAGIC : i64 = 0x63677270;
+    pub const CGROUP_SUPER_MAGIC  : i64 = 0x0027e0eb;
+    pub const CONFIGFS_MAGIC      : i64 = 0x62656570;
+    pub const DEBUGFS_MAGIC       : i64 = 0x64626720;
+    pub const DEVPTS_SUPER_MAGIC  : i64 = 0x00001cd1;
+    pub const EFIVARFS_MAGIC      : i64 = 0xde5e81e4;
+    pub const FUSE_CTL_SUPER_MAGIC: i64 = 0x65735543;
+    pub const HUGETLBFS_MAGIC     : i64 = 0x958458f6;
+    pub const MQUEUE_MAGIC        : i64 = 0x19800202;
+    pub const NFSD_MAGIC          : i64 = 0x6e667364;
+    pub const PROC_SUPER_MAGIC    : i64 = 0x00009fa0;
+    pub const PSTOREFS_MAGIC      : i64 = 0x6165676C;
+    pub const RPCAUTH_GSSMAGIC    : i64 = 0x67596969;
+    pub const SECURITYFS_MAGIC    : i64 = 0x73636673;
+    pub const SELINUX_MAGIC       : i64 = 0xf97cff8c;
+    pub const SMACK_MAGIC         : i64 = 0x43415d53;
+    pub const RAMFS_MAGIC         : i64 = 0x858458f6;
+    pub const TMPFS_MAGIC         : i64 = 0x01021994;
+    pub const SYSFS_MAGIC         : i64 = 0x62656572;
+    pub const MSDOS_SUPER_MAGIC   : i64 = 0x00004d44;
+    pub const BTRFS_SUPER_MAGIC   : i64 = 0x9123683E;
+    pub const FUSE_SUPER_MAGIC    : i64 = 0x65735546;
+    pub const EXT4_SUPER_MAGIC    : i64 = 0x0000EF53;
+    pub const XFS_SUPER_MAGIC     : i64 = 0x58465342;
+    pub const ZFS_SUPER_MAGIC     : i64 = 0x2FC12FC1;
+}
+
+pub fn is_virtual_file_system(magic: i64) -> bool {
+    match magic {
+        consts::BINFMTFS_MAGIC
+        | consts::CGROUP2_SUPER_MAGIC
+        | consts::CGROUP_SUPER_MAGIC
+        | consts::CONFIGFS_MAGIC
+        | consts::DEBUGFS_MAGIC
+        | consts::DEVPTS_SUPER_MAGIC
+        | consts::EFIVARFS_MAGIC
+        | consts::FUSE_CTL_SUPER_MAGIC
+        | consts::HUGETLBFS_MAGIC
+        | consts::MQUEUE_MAGIC
+        | consts::NFSD_MAGIC
+        | consts::PROC_SUPER_MAGIC
+        | consts::PSTOREFS_MAGIC
+        | consts::RPCAUTH_GSSMAGIC
+        | consts::SECURITYFS_MAGIC
+        | consts::SELINUX_MAGIC
+        | consts::SMACK_MAGIC
+        | consts::SYSFS_MAGIC => true,
+        _ => false,
+    }
+}
+
+/// Helper function to extract file names from binary archive.
+pub fn read_os_string(buffer: &[u8]) -> std::ffi::OsString {
+    use std::os::unix::ffi::OsStrExt;
+    std::ffi::OsStr::from_bytes(if buffer.ends_with(&[0]) {
+        &buffer[..(buffer.len() - 1)]
+    } else {
+        buffer
+    })
+    .into()
+}
+
+#[inline]
+pub fn vec_new(size: usize) -> Vec<u8> {
+    let mut data = Vec::with_capacity(size);
+    unsafe {
+        data.set_len(size);
+    }
+    data
+}
+
+pub fn io_err_other<E: std::fmt::Display>(err: E) -> io::Error {
+    io::Error::new(io::ErrorKind::Other, err.to_string())
+}
+
+pub fn poll_result_once<T, R>(mut fut: T) -> io::Result<R>
+where
+    T: Future<Output = io::Result<R>>,
+{
+    let waker = std::task::RawWaker::new(std::ptr::null(), &WAKER_VTABLE);
+    let waker = unsafe { std::task::Waker::from_raw(waker) };
+    let mut cx = Context::from_waker(&waker);
+    unsafe {
+        match Pin::new_unchecked(&mut fut).poll(&mut cx) {
+            Poll::Pending => Err(io_err_other("got Poll::Pending synchronous context")),
+            Poll::Ready(r) => r,
+        }
+    }
+}
+
+const WAKER_VTABLE: std::task::RawWakerVTable =
+    std::task::RawWakerVTable::new(forbid_clone, forbid_wake, forbid_wake, ignore_drop);
+
+unsafe fn forbid_clone(_: *const ()) -> std::task::RawWaker {
+    panic!("tried to clone waker for synchronous task");
+}
+
+unsafe fn forbid_wake(_: *const ()) {
+    panic!("tried to wake synchronous task");
+}
+
+unsafe fn ignore_drop(_: *const ()) {}