+++ /dev/null
-//! Random access for PXAR files.
-
-use std::ffi::{OsStr, OsString};
-use std::io;
-use std::mem::{self, size_of, size_of_val, MaybeUninit};
-use std::ops::Range;
-use std::os::unix::ffi::{OsStrExt, OsStringExt};
-use std::path::{Path, PathBuf};
-use std::pin::Pin;
-use std::sync::Arc;
-use std::task::{Context, Poll};
-
-use endian_trait::Endian;
-
-use crate::binary_tree_array;
-use crate::decoder::{self, DecoderImpl};
-use crate::format::{self, GoodbyeItem};
-use crate::poll_fn::poll_fn;
-use crate::util;
-use crate::{Entry, EntryKind};
-
-pub mod aio;
-pub mod cache;
-pub mod sync;
-
-#[doc(inline)]
-pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir};
-
-use cache::Cache;
-
-/// Random access read implementation.
-pub trait ReadAt {
- fn poll_read_at(
- self: Pin<&Self>,
- cx: &mut Context,
- buf: &mut [u8],
- offset: u64,
- ) -> Poll<io::Result<usize>>;
-}
-
-/// awaitable version of `poll_read_at`.
-async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize>
-where
- T: ReadAt + ?Sized,
-{
- poll_fn(|cx| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }).await
-}
-
-/// `read_exact_at` - since that's what we _actually_ want most of the time.
-async fn read_exact_at<T>(input: &T, mut buf: &mut [u8], mut offset: u64) -> io::Result<()>
-where
- T: ReadAt + ?Sized,
-{
- while !buf.is_empty() {
- match read_at(input, buf, offset).await? {
- 0 => io_bail!("unexpected EOF"),
- got => {
- buf = &mut buf[got..];
- offset += got as u64;
- }
- }
- }
- Ok(())
-}
-
-/// Helper to read into an `Endian`-implementing `struct`.
-async fn read_entry_at<T, E: Endian>(input: &T, offset: u64) -> io::Result<E>
-where
- T: ReadAt + ?Sized,
-{
- let mut data = MaybeUninit::<E>::uninit();
- let buf =
- unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) };
- read_exact_at(input, buf, offset).await?;
- Ok(unsafe { data.assume_init().from_le() })
-}
-
-/// Helper to read into an allocated byte vector.
-async fn read_exact_data_at<T>(input: &T, size: usize, offset: u64) -> io::Result<Vec<u8>>
-where
- T: ReadAt + ?Sized,
-{
- let mut data = util::vec_new(size);
- read_exact_at(input, &mut data[..], offset).await?;
- Ok(data)
-}
-
-/// Allow using trait objects for `T: ReadAt`
-impl<'a> ReadAt for &(dyn ReadAt + 'a) {
- fn poll_read_at(
- self: Pin<&Self>,
- cx: &mut Context,
- buf: &mut [u8],
- offset: u64,
- ) -> Poll<io::Result<usize>> {
- unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
- }
-}
-
-/// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires
-/// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that
-/// implemments `ReadAt` for type monomorphization.
-impl ReadAt for Arc<dyn ReadAt + Send + Sync + 'static> {
- fn poll_read_at(
- self: Pin<&Self>,
- cx: &mut Context,
- buf: &mut [u8],
- offset: u64,
- ) -> Poll<io::Result<usize>> {
- unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
- }
-}
-
-#[derive(Clone)]
-struct Caches {
- /// The goodbye table cache maps goodbye table offsets to cache entries.
- gbt_cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
-}
-
-impl Default for Caches {
- fn default() -> Self {
- Self { gbt_cache: None }
- }
-}
-
-/// The random access state machine implementation.
-pub(crate) struct AccessorImpl<T> {
- input: T,
- size: u64,
- caches: Arc<Caches>,
-}
-
-impl<T: ReadAt> AccessorImpl<T> {
- pub async fn new(input: T, size: u64) -> io::Result<Self> {
- if size < (size_of::<GoodbyeItem>() as u64) {
- io_bail!("too small to contain a pxar archive");
- }
-
- Ok(Self {
- input,
- size,
- caches: Arc::new(Caches::default()),
- })
- }
-
- pub fn size(&self) -> u64 {
- self.size
- }
-
- pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> {
- DirectoryImpl::open_at_end(
- &self.input as &dyn ReadAt,
- self.size,
- "/".into(),
- Arc::clone(&self.caches),
- )
- .await
- }
-
- pub fn set_goodbye_table_cache(
- &mut self,
- cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
- ) {
- let new_caches = Arc::new(Caches {
- gbt_cache: cache,
- ..*self.caches
- });
- self.caches = new_caches;
- }
-}
-
-async fn get_decoder<T: ReadAt>(
- input: T,
- entry_range: Range<u64>,
- path: PathBuf,
-) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
- Ok(DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path).await?)
-}
-
-impl<T: Clone + ReadAt> AccessorImpl<T> {
- pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> {
- DirectoryImpl::open_at_end(
- self.input.clone(),
- self.size,
- "/".into(),
- Arc::clone(&self.caches),
- )
- .await
- }
-
- /// Allow opening a directory at a specified offset.
- pub async unsafe fn open_dir_at_end(&self, offset: u64) -> io::Result<DirectoryImpl<T>> {
- DirectoryImpl::open_at_end(
- self.input.clone(),
- offset,
- "/".into(),
- Arc::clone(&self.caches),
- )
- .await
- }
-
- /// Allow opening a regular file from a specified range.
- pub async unsafe fn open_file_at_range(
- &self,
- range: Range<u64>,
- ) -> io::Result<FileEntryImpl<T>> {
- let mut decoder = get_decoder(self.input.clone(), range.clone(), PathBuf::new()).await?;
- let entry = decoder
- .next()
- .await
- .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
- Ok(FileEntryImpl {
- input: self.input.clone(),
- entry,
- entry_range: range,
- caches: Arc::clone(&self.caches),
- })
- }
-
- /// Allow opening arbitrary contents from a specific range.
- pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> {
- FileContentsImpl::new(self.input.clone(), range)
- }
-}
-
-/// The directory random-access state machine implementation.
-pub(crate) struct DirectoryImpl<T> {
- input: T,
- entry_ofs: u64,
- goodbye_ofs: u64,
- size: u64,
- table: Arc<[GoodbyeItem]>,
- path: PathBuf,
- caches: Arc<Caches>,
-}
-
-impl<T: Clone + ReadAt> DirectoryImpl<T> {
- /// Open a directory ending at the specified position.
- async fn open_at_end(
- input: T,
- end_offset: u64,
- path: PathBuf,
- caches: Arc<Caches>,
- ) -> io::Result<DirectoryImpl<T>> {
- let tail = Self::read_tail_entry(&input, end_offset).await?;
-
- if end_offset < tail.size {
- io_bail!("goodbye tail size out of range");
- }
-
- let goodbye_ofs = end_offset - tail.size;
-
- if goodbye_ofs < tail.offset {
- io_bail!("goodbye offset out of range");
- }
-
- let entry_ofs = goodbye_ofs - tail.offset;
- let size = end_offset - entry_ofs;
-
- let table: Option<Arc<[GoodbyeItem]>> = caches
- .gbt_cache
- .as_ref()
- .and_then(|cache| cache.fetch(goodbye_ofs));
-
- let mut this = Self {
- input,
- entry_ofs,
- goodbye_ofs,
- size,
- table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone),
- path,
- caches,
- };
-
- // sanity check:
- if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
- io_bail!("invalid goodbye table size: {}", this.table_size());
- }
-
- if table.is_none() {
- this.table = this.load_table().await?;
- if let Some(ref cache) = this.caches.gbt_cache {
- cache.insert(goodbye_ofs, Arc::clone(&this.table));
- }
- }
-
- Ok(this)
- }
-
- /// Load the entire goodbye table:
- async fn load_table(&self) -> io::Result<Arc<[GoodbyeItem]>> {
- let len = self.len();
- let mut data = Vec::with_capacity(self.len());
- unsafe {
- data.set_len(len);
- let slice = std::slice::from_raw_parts_mut(
- data.as_mut_ptr() as *mut u8,
- len * size_of::<GoodbyeItem>(),
- );
- read_exact_at(&self.input, slice, self.table_offset()).await?;
- drop(slice);
- }
- Ok(Arc::from(data))
- }
-
- #[inline]
- fn end_offset(&self) -> u64 {
- self.entry_ofs + self.size
- }
-
- #[inline]
- fn entry_range(&self) -> Range<u64> {
- self.entry_ofs..self.end_offset()
- }
-
- #[inline]
- fn table_size(&self) -> u64 {
- (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
- }
-
- #[inline]
- fn table_offset(&self) -> u64 {
- self.goodbye_ofs + (size_of::<format::Header>() as u64)
- }
-
- /// Length *excluding* the tail marker!
- #[inline]
- fn len(&self) -> usize {
- (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
- }
-
- /// Read the goodbye tail and perform some sanity checks.
- async fn read_tail_entry(input: &T, end_offset: u64) -> io::Result<GoodbyeItem> {
- if end_offset < (size_of::<GoodbyeItem>() as u64) {
- io_bail!("goodbye tail does not fit");
- }
-
- let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
- let tail: GoodbyeItem = read_entry_at(input, tail_offset).await?;
-
- if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
- io_bail!("no goodbye tail marker found");
- }
-
- Ok(tail)
- }
-
- /// Get a decoder for the directory contents.
- pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
- let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?;
- if !dir.is_dir() {
- io_bail!("directory does not seem to be a directory");
- }
- Ok(decoder)
- }
-
- async fn get_decoder(
- &self,
- entry_range: Range<u64>,
- file_name: Option<&Path>,
- ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
- get_decoder(
- self.input.clone(),
- entry_range,
- match file_name {
- None => self.path.clone(),
- Some(file) => self.path.join(file),
- },
- )
- .await
- }
-
- async fn decode_one_entry(
- &self,
- entry_range: Range<u64>,
- file_name: Option<&Path>,
- ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
- let mut decoder = self.get_decoder(entry_range, file_name).await?;
- let entry = decoder
- .next()
- .await
- .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
- Ok((entry, decoder))
- }
-
- fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> {
- binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash))
- }
-
- pub async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> {
- let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?;
- Ok(FileEntryImpl {
- input: self.input.clone(),
- entry,
- entry_range: self.entry_range(),
- caches: Arc::clone(&self.caches),
- })
- }
-
- /// Lookup a directory entry.
- pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> {
- let mut cur: Option<FileEntryImpl<T>> = None;
-
- let mut first = true;
- for component in path.components() {
- use std::path::Component;
-
- let first = mem::replace(&mut first, false);
-
- let component = match component {
- Component::Normal(path) => path,
- Component::ParentDir => io_bail!("cannot enter parent directory in archive"),
- Component::RootDir | Component::CurDir if first => {
- cur = Some(self.lookup_self().await?);
- continue;
- }
- Component::CurDir => continue,
- _ => io_bail!("invalid component in path"),
- };
-
- let next = match cur {
- Some(entry) => {
- entry
- .enter_directory()
- .await?
- .lookup_component(component)
- .await?
- }
- None => self.lookup_component(component).await?,
- };
-
- if next.is_none() {
- return Ok(None);
- }
-
- cur = next;
- }
-
- Ok(cur)
- }
-
- /// Lookup a single directory entry component (does not handle multiple components in path)
- pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> {
- let hash = format::hash_filename(path.as_bytes());
- let first_index = match self.lookup_hash_position(hash, 0, 0) {
- Some(index) => index,
- None => return Ok(None),
- };
-
- // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
- // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
- // a DirEntry::Dir or Dir::Entry.
- //
- let mut dup = 0;
- loop {
- let index = match self.lookup_hash_position(hash, first_index, dup) {
- Some(index) => index,
- None => return Ok(None),
- };
-
- let cursor = self.get_cursor(index).await?;
- if cursor.file_name == path {
- return Ok(Some(cursor.decode_entry().await?));
- }
-
- dup += 1;
- }
- }
-
- async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> {
- let entry = &self.table[index];
- let file_goodbye_ofs = entry.offset;
- if self.goodbye_ofs < file_goodbye_ofs {
- io_bail!("invalid file offset");
- }
-
- let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
- let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
-
- let entry_range = Range {
- start: entry_ofs,
- end: file_ofs + entry.size,
- };
- if entry_range.end < entry_range.start {
- io_bail!(
- "bad file: invalid entry ranges for {:?}: \
- start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
- file_name,
- entry_ofs,
- file_ofs,
- entry.size,
- );
- }
-
- Ok(DirEntryImpl {
- dir: self,
- file_name,
- entry_range,
- caches: Arc::clone(&self.caches),
- })
- }
-
- async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
- let head: format::Header = read_entry_at(&self.input, file_ofs).await?;
- if head.htype != format::PXAR_FILENAME {
- io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype);
- }
-
- let mut path = read_exact_data_at(
- &self.input,
- head.content_size() as usize,
- file_ofs + (size_of_val(&head) as u64),
- )
- .await?;
-
- if path.pop() != Some(0) {
- io_bail!("invalid file name (missing terminating zero)");
- }
-
- if path.is_empty() {
- io_bail!("invalid empty file name");
- }
-
- let file_name = PathBuf::from(OsString::from_vec(path));
- format::check_file_name(&file_name)?;
-
- Ok((file_name, file_ofs + head.full_size()))
- }
-
- pub fn read_dir(&self) -> ReadDirImpl<T> {
- ReadDirImpl::new(self, 0)
- }
-
- pub fn entry_count(&self) -> usize {
- self.table.len()
- }
-}
-
-/// A file entry retrieved from a Directory.
-#[derive(Clone)]
-pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
- input: T,
- entry: Entry,
- entry_range: Range<u64>,
- caches: Arc<Caches>,
-}
-
-impl<T: Clone + ReadAt> FileEntryImpl<T> {
- pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> {
- if !self.entry.is_dir() {
- io_bail!("enter_directory() on a non-directory");
- }
-
- DirectoryImpl::open_at_end(
- self.input.clone(),
- self.entry_range.end,
- self.entry.path.clone(),
- Arc::clone(&self.caches),
- )
- .await
- }
-
- /// For use with unsafe accessor methods.
- pub fn content_range(&self) -> io::Result<Option<Range<u64>>> {
- match self.entry.kind {
- EntryKind::File { offset: None, .. } => {
- io_bail!("cannot open file, reader provided no offset")
- }
- EntryKind::File {
- size,
- offset: Some(offset),
- } => Ok(Some(offset..(offset + size))),
- _ => Ok(None),
- }
- }
-
- pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
- match self.content_range()? {
- Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)),
- None => io_bail!("not a file"),
- }
- }
-
- #[inline]
- pub fn into_entry(self) -> Entry {
- self.entry
- }
-
- #[inline]
- pub fn entry(&self) -> &Entry {
- &self.entry
- }
-
- /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
- #[inline]
- pub fn entry_range(&self) -> Range<u64> {
- self.entry_range.clone()
- }
-}
-
-/// An iterator over the contents of a directory.
-pub(crate) struct ReadDirImpl<'a, T> {
- dir: &'a DirectoryImpl<T>,
- at: usize,
-}
-
-impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> {
- fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self {
- Self { dir, at }
- }
-
- /// Get the next entry.
- pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> {
- if self.at == self.dir.table.len() {
- Ok(None)
- } else {
- let cursor = self.dir.get_cursor(self.at).await?;
- self.at += 1;
- Ok(Some(cursor))
- }
- }
-
- /// Efficient alternative to `Iterator::skip`.
- #[inline]
- pub fn skip(self, n: usize) -> Self {
- Self {
- at: (self.at + n).min(self.dir.table.len()),
- dir: self.dir,
- }
- }
-
- /// Efficient alternative to `Iterator::count`.
- #[inline]
- pub fn count(self) -> usize {
- self.dir.table.len()
- }
-}
-
-/// A cursor pointing to a file in a directory.
-///
-/// At this point only the file name has been read and we remembered the position for finding the
-/// actual data. This can be upgraded into a FileEntryImpl.
-pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> {
- dir: &'a DirectoryImpl<T>,
- file_name: PathBuf,
- entry_range: Range<u64>,
- caches: Arc<Caches>,
-}
-
-impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
- pub fn file_name(&self) -> &Path {
- &self.file_name
- }
-
- async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> {
- let (entry, _decoder) = self
- .dir
- .decode_one_entry(self.entry_range.clone(), Some(&self.file_name))
- .await?;
-
- Ok(FileEntryImpl {
- input: self.dir.input.clone(),
- entry,
- entry_range: self.entry_range(),
- caches: Arc::clone(&self.caches),
- })
- }
-
- /// Exposed for raw by-offset access methods.
- #[inline]
- pub fn entry_range(&self) -> Range<u64> {
- self.entry_range.clone()
- }
-}
-
-/// A reader for file contents.
-pub(crate) struct FileContentsImpl<T> {
- input: T,
-
- /// Absolute offset inside the `input`.
- range: Range<u64>,
-}
-
-impl<T: Clone + ReadAt> FileContentsImpl<T> {
- pub fn new(input: T, range: Range<u64>) -> Self {
- Self { input, range }
- }
-
- #[inline]
- pub fn file_size(&self) -> u64 {
- self.range.end - self.range.start
- }
-
- async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> {
- let size = self.file_size();
- if offset >= size {
- return Ok(0);
- }
- let remaining = size - offset;
-
- if remaining < buf.len() as u64 {
- buf = &mut buf[..(remaining as usize)];
- }
-
- read_at(&self.input, buf, self.range.start + offset).await
- }
-}
-
-impl<T: Clone + ReadAt> ReadAt for FileContentsImpl<T> {
- fn poll_read_at(
- self: Pin<&Self>,
- cx: &mut Context,
- mut buf: &mut [u8],
- offset: u64,
- ) -> Poll<io::Result<usize>> {
- let size = self.file_size();
- if offset >= size {
- return Poll::Ready(Ok(0));
- }
- let remaining = size - offset;
-
- if remaining < buf.len() as u64 {
- buf = &mut buf[..(remaining as usize)];
- }
-
- let offset = self.range.start + offset;
- unsafe { self.map_unchecked(|this| &this.input) }.poll_read_at(cx, buf, offset)
- }
-}
-
-#[doc(hidden)]
-pub struct SeqReadAtAdapter<T> {
- input: T,
- range: Range<u64>,
-}
-
-impl<T: ReadAt> SeqReadAtAdapter<T> {
- pub fn new(input: T, range: Range<u64>) -> Self {
- if range.end < range.start {
- panic!("BAD SEQ READ AT ADAPTER");
- }
- Self { input, range }
- }
-
- #[inline]
- fn remaining(&self) -> usize {
- (self.range.end - self.range.start) as usize
- }
-}
-
-impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> {
- fn poll_seq_read(
- self: Pin<&mut Self>,
- cx: &mut Context,
- buf: &mut [u8],
- ) -> Poll<io::Result<usize>> {
- let len = buf.len().min(self.remaining());
- let buf = &mut buf[..len];
-
- let this = unsafe { self.get_unchecked_mut() };
-
- let got = ready!(unsafe {
- Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start)
- })?;
- this.range.start += got as u64;
- Poll::Ready(Ok(got))
- }
-
- fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
- Poll::Ready(Some(Ok(self.range.start)))
- }
-}
--- /dev/null
+//! Random access for PXAR files.
+
+use std::ffi::{OsStr, OsString};
+use std::io;
+use std::mem::{self, size_of, size_of_val, MaybeUninit};
+use std::ops::Range;
+use std::os::unix::ffi::{OsStrExt, OsStringExt};
+use std::path::{Path, PathBuf};
+use std::pin::Pin;
+use std::sync::Arc;
+use std::task::{Context, Poll};
+
+use endian_trait::Endian;
+
+use crate::binary_tree_array;
+use crate::decoder::{self, DecoderImpl};
+use crate::format::{self, GoodbyeItem};
+use crate::poll_fn::poll_fn;
+use crate::util;
+use crate::{Entry, EntryKind};
+
+pub mod aio;
+pub mod cache;
+pub mod sync;
+
+#[doc(inline)]
+pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir};
+
+use cache::Cache;
+
+/// Random access read implementation.
+pub trait ReadAt {
+ fn poll_read_at(
+ self: Pin<&Self>,
+ cx: &mut Context,
+ buf: &mut [u8],
+ offset: u64,
+ ) -> Poll<io::Result<usize>>;
+}
+
+/// awaitable version of `poll_read_at`.
+async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize>
+where
+ T: ReadAt + ?Sized,
+{
+ poll_fn(|cx| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }).await
+}
+
+/// `read_exact_at` - since that's what we _actually_ want most of the time.
+async fn read_exact_at<T>(input: &T, mut buf: &mut [u8], mut offset: u64) -> io::Result<()>
+where
+ T: ReadAt + ?Sized,
+{
+ while !buf.is_empty() {
+ match read_at(input, buf, offset).await? {
+ 0 => io_bail!("unexpected EOF"),
+ got => {
+ buf = &mut buf[got..];
+ offset += got as u64;
+ }
+ }
+ }
+ Ok(())
+}
+
+/// Helper to read into an `Endian`-implementing `struct`.
+async fn read_entry_at<T, E: Endian>(input: &T, offset: u64) -> io::Result<E>
+where
+ T: ReadAt + ?Sized,
+{
+ let mut data = MaybeUninit::<E>::uninit();
+ let buf =
+ unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) };
+ read_exact_at(input, buf, offset).await?;
+ Ok(unsafe { data.assume_init().from_le() })
+}
+
+/// Helper to read into an allocated byte vector.
+async fn read_exact_data_at<T>(input: &T, size: usize, offset: u64) -> io::Result<Vec<u8>>
+where
+ T: ReadAt + ?Sized,
+{
+ let mut data = util::vec_new(size);
+ read_exact_at(input, &mut data[..], offset).await?;
+ Ok(data)
+}
+
+/// Allow using trait objects for `T: ReadAt`
+impl<'a> ReadAt for &(dyn ReadAt + 'a) {
+ fn poll_read_at(
+ self: Pin<&Self>,
+ cx: &mut Context,
+ buf: &mut [u8],
+ offset: u64,
+ ) -> Poll<io::Result<usize>> {
+ unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
+ }
+}
+
+/// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires
+/// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that
+/// implemments `ReadAt` for type monomorphization.
+impl ReadAt for Arc<dyn ReadAt + Send + Sync + 'static> {
+ fn poll_read_at(
+ self: Pin<&Self>,
+ cx: &mut Context,
+ buf: &mut [u8],
+ offset: u64,
+ ) -> Poll<io::Result<usize>> {
+ unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
+ }
+}
+
+#[derive(Clone)]
+struct Caches {
+ /// The goodbye table cache maps goodbye table offsets to cache entries.
+ gbt_cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
+}
+
+impl Default for Caches {
+ fn default() -> Self {
+ Self { gbt_cache: None }
+ }
+}
+
+/// The random access state machine implementation.
+pub(crate) struct AccessorImpl<T> {
+ input: T,
+ size: u64,
+ caches: Arc<Caches>,
+}
+
+impl<T: ReadAt> AccessorImpl<T> {
+ pub async fn new(input: T, size: u64) -> io::Result<Self> {
+ if size < (size_of::<GoodbyeItem>() as u64) {
+ io_bail!("too small to contain a pxar archive");
+ }
+
+ Ok(Self {
+ input,
+ size,
+ caches: Arc::new(Caches::default()),
+ })
+ }
+
+ pub fn size(&self) -> u64 {
+ self.size
+ }
+
+ pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> {
+ DirectoryImpl::open_at_end(
+ &self.input as &dyn ReadAt,
+ self.size,
+ "/".into(),
+ Arc::clone(&self.caches),
+ )
+ .await
+ }
+
+ pub fn set_goodbye_table_cache(
+ &mut self,
+ cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
+ ) {
+ let new_caches = Arc::new(Caches {
+ gbt_cache: cache,
+ ..*self.caches
+ });
+ self.caches = new_caches;
+ }
+}
+
+async fn get_decoder<T: ReadAt>(
+ input: T,
+ entry_range: Range<u64>,
+ path: PathBuf,
+) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
+ Ok(DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path).await?)
+}
+
+impl<T: Clone + ReadAt> AccessorImpl<T> {
+ pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> {
+ DirectoryImpl::open_at_end(
+ self.input.clone(),
+ self.size,
+ "/".into(),
+ Arc::clone(&self.caches),
+ )
+ .await
+ }
+
+ /// Allow opening a directory at a specified offset.
+ pub async unsafe fn open_dir_at_end(&self, offset: u64) -> io::Result<DirectoryImpl<T>> {
+ DirectoryImpl::open_at_end(
+ self.input.clone(),
+ offset,
+ "/".into(),
+ Arc::clone(&self.caches),
+ )
+ .await
+ }
+
+ /// Allow opening a regular file from a specified range.
+ pub async unsafe fn open_file_at_range(
+ &self,
+ range: Range<u64>,
+ ) -> io::Result<FileEntryImpl<T>> {
+ let mut decoder = get_decoder(self.input.clone(), range.clone(), PathBuf::new()).await?;
+ let entry = decoder
+ .next()
+ .await
+ .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
+ Ok(FileEntryImpl {
+ input: self.input.clone(),
+ entry,
+ entry_range: range,
+ caches: Arc::clone(&self.caches),
+ })
+ }
+
+ /// Allow opening arbitrary contents from a specific range.
+ pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> {
+ FileContentsImpl::new(self.input.clone(), range)
+ }
+}
+
+/// The directory random-access state machine implementation.
+pub(crate) struct DirectoryImpl<T> {
+ input: T,
+ entry_ofs: u64,
+ goodbye_ofs: u64,
+ size: u64,
+ table: Arc<[GoodbyeItem]>,
+ path: PathBuf,
+ caches: Arc<Caches>,
+}
+
+impl<T: Clone + ReadAt> DirectoryImpl<T> {
+ /// Open a directory ending at the specified position.
+ async fn open_at_end(
+ input: T,
+ end_offset: u64,
+ path: PathBuf,
+ caches: Arc<Caches>,
+ ) -> io::Result<DirectoryImpl<T>> {
+ let tail = Self::read_tail_entry(&input, end_offset).await?;
+
+ if end_offset < tail.size {
+ io_bail!("goodbye tail size out of range");
+ }
+
+ let goodbye_ofs = end_offset - tail.size;
+
+ if goodbye_ofs < tail.offset {
+ io_bail!("goodbye offset out of range");
+ }
+
+ let entry_ofs = goodbye_ofs - tail.offset;
+ let size = end_offset - entry_ofs;
+
+ let table: Option<Arc<[GoodbyeItem]>> = caches
+ .gbt_cache
+ .as_ref()
+ .and_then(|cache| cache.fetch(goodbye_ofs));
+
+ let mut this = Self {
+ input,
+ entry_ofs,
+ goodbye_ofs,
+ size,
+ table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone),
+ path,
+ caches,
+ };
+
+ // sanity check:
+ if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
+ io_bail!("invalid goodbye table size: {}", this.table_size());
+ }
+
+ if table.is_none() {
+ this.table = this.load_table().await?;
+ if let Some(ref cache) = this.caches.gbt_cache {
+ cache.insert(goodbye_ofs, Arc::clone(&this.table));
+ }
+ }
+
+ Ok(this)
+ }
+
+ /// Load the entire goodbye table:
+ async fn load_table(&self) -> io::Result<Arc<[GoodbyeItem]>> {
+ let len = self.len();
+ let mut data = Vec::with_capacity(self.len());
+ unsafe {
+ data.set_len(len);
+ let slice = std::slice::from_raw_parts_mut(
+ data.as_mut_ptr() as *mut u8,
+ len * size_of::<GoodbyeItem>(),
+ );
+ read_exact_at(&self.input, slice, self.table_offset()).await?;
+ drop(slice);
+ }
+ Ok(Arc::from(data))
+ }
+
+ #[inline]
+ fn end_offset(&self) -> u64 {
+ self.entry_ofs + self.size
+ }
+
+ #[inline]
+ fn entry_range(&self) -> Range<u64> {
+ self.entry_ofs..self.end_offset()
+ }
+
+ #[inline]
+ fn table_size(&self) -> u64 {
+ (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
+ }
+
+ #[inline]
+ fn table_offset(&self) -> u64 {
+ self.goodbye_ofs + (size_of::<format::Header>() as u64)
+ }
+
+ /// Length *excluding* the tail marker!
+ #[inline]
+ fn len(&self) -> usize {
+ (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
+ }
+
+ /// Read the goodbye tail and perform some sanity checks.
+ async fn read_tail_entry(input: &T, end_offset: u64) -> io::Result<GoodbyeItem> {
+ if end_offset < (size_of::<GoodbyeItem>() as u64) {
+ io_bail!("goodbye tail does not fit");
+ }
+
+ let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
+ let tail: GoodbyeItem = read_entry_at(input, tail_offset).await?;
+
+ if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
+ io_bail!("no goodbye tail marker found");
+ }
+
+ Ok(tail)
+ }
+
+ /// Get a decoder for the directory contents.
+ pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
+ let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?;
+ if !dir.is_dir() {
+ io_bail!("directory does not seem to be a directory");
+ }
+ Ok(decoder)
+ }
+
+ async fn get_decoder(
+ &self,
+ entry_range: Range<u64>,
+ file_name: Option<&Path>,
+ ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
+ get_decoder(
+ self.input.clone(),
+ entry_range,
+ match file_name {
+ None => self.path.clone(),
+ Some(file) => self.path.join(file),
+ },
+ )
+ .await
+ }
+
+ async fn decode_one_entry(
+ &self,
+ entry_range: Range<u64>,
+ file_name: Option<&Path>,
+ ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
+ let mut decoder = self.get_decoder(entry_range, file_name).await?;
+ let entry = decoder
+ .next()
+ .await
+ .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
+ Ok((entry, decoder))
+ }
+
+ fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> {
+ binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash))
+ }
+
+ pub async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> {
+ let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?;
+ Ok(FileEntryImpl {
+ input: self.input.clone(),
+ entry,
+ entry_range: self.entry_range(),
+ caches: Arc::clone(&self.caches),
+ })
+ }
+
+ /// Lookup a directory entry.
+ pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> {
+ let mut cur: Option<FileEntryImpl<T>> = None;
+
+ let mut first = true;
+ for component in path.components() {
+ use std::path::Component;
+
+ let first = mem::replace(&mut first, false);
+
+ let component = match component {
+ Component::Normal(path) => path,
+ Component::ParentDir => io_bail!("cannot enter parent directory in archive"),
+ Component::RootDir | Component::CurDir if first => {
+ cur = Some(self.lookup_self().await?);
+ continue;
+ }
+ Component::CurDir => continue,
+ _ => io_bail!("invalid component in path"),
+ };
+
+ let next = match cur {
+ Some(entry) => {
+ entry
+ .enter_directory()
+ .await?
+ .lookup_component(component)
+ .await?
+ }
+ None => self.lookup_component(component).await?,
+ };
+
+ if next.is_none() {
+ return Ok(None);
+ }
+
+ cur = next;
+ }
+
+ Ok(cur)
+ }
+
+ /// Lookup a single directory entry component (does not handle multiple components in path)
+ pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> {
+ let hash = format::hash_filename(path.as_bytes());
+ let first_index = match self.lookup_hash_position(hash, 0, 0) {
+ Some(index) => index,
+ None => return Ok(None),
+ };
+
+ // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
+ // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
+ // a DirEntry::Dir or Dir::Entry.
+ //
+ let mut dup = 0;
+ loop {
+ let index = match self.lookup_hash_position(hash, first_index, dup) {
+ Some(index) => index,
+ None => return Ok(None),
+ };
+
+ let cursor = self.get_cursor(index).await?;
+ if cursor.file_name == path {
+ return Ok(Some(cursor.decode_entry().await?));
+ }
+
+ dup += 1;
+ }
+ }
+
+ async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> {
+ let entry = &self.table[index];
+ let file_goodbye_ofs = entry.offset;
+ if self.goodbye_ofs < file_goodbye_ofs {
+ io_bail!("invalid file offset");
+ }
+
+ let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
+ let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
+
+ let entry_range = Range {
+ start: entry_ofs,
+ end: file_ofs + entry.size,
+ };
+ if entry_range.end < entry_range.start {
+ io_bail!(
+ "bad file: invalid entry ranges for {:?}: \
+ start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
+ file_name,
+ entry_ofs,
+ file_ofs,
+ entry.size,
+ );
+ }
+
+ Ok(DirEntryImpl {
+ dir: self,
+ file_name,
+ entry_range,
+ caches: Arc::clone(&self.caches),
+ })
+ }
+
+ async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
+ let head: format::Header = read_entry_at(&self.input, file_ofs).await?;
+ if head.htype != format::PXAR_FILENAME {
+ io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype);
+ }
+
+ let mut path = read_exact_data_at(
+ &self.input,
+ head.content_size() as usize,
+ file_ofs + (size_of_val(&head) as u64),
+ )
+ .await?;
+
+ if path.pop() != Some(0) {
+ io_bail!("invalid file name (missing terminating zero)");
+ }
+
+ if path.is_empty() {
+ io_bail!("invalid empty file name");
+ }
+
+ let file_name = PathBuf::from(OsString::from_vec(path));
+ format::check_file_name(&file_name)?;
+
+ Ok((file_name, file_ofs + head.full_size()))
+ }
+
+ pub fn read_dir(&self) -> ReadDirImpl<T> {
+ ReadDirImpl::new(self, 0)
+ }
+
+ pub fn entry_count(&self) -> usize {
+ self.table.len()
+ }
+}
+
+/// A file entry retrieved from a Directory.
+#[derive(Clone)]
+pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
+ input: T,
+ entry: Entry,
+ entry_range: Range<u64>,
+ caches: Arc<Caches>,
+}
+
+impl<T: Clone + ReadAt> FileEntryImpl<T> {
+ pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> {
+ if !self.entry.is_dir() {
+ io_bail!("enter_directory() on a non-directory");
+ }
+
+ DirectoryImpl::open_at_end(
+ self.input.clone(),
+ self.entry_range.end,
+ self.entry.path.clone(),
+ Arc::clone(&self.caches),
+ )
+ .await
+ }
+
+ /// For use with unsafe accessor methods.
+ pub fn content_range(&self) -> io::Result<Option<Range<u64>>> {
+ match self.entry.kind {
+ EntryKind::File { offset: None, .. } => {
+ io_bail!("cannot open file, reader provided no offset")
+ }
+ EntryKind::File {
+ size,
+ offset: Some(offset),
+ } => Ok(Some(offset..(offset + size))),
+ _ => Ok(None),
+ }
+ }
+
+ pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
+ match self.content_range()? {
+ Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)),
+ None => io_bail!("not a file"),
+ }
+ }
+
+ #[inline]
+ pub fn into_entry(self) -> Entry {
+ self.entry
+ }
+
+ #[inline]
+ pub fn entry(&self) -> &Entry {
+ &self.entry
+ }
+
+ /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
+ #[inline]
+ pub fn entry_range(&self) -> Range<u64> {
+ self.entry_range.clone()
+ }
+}
+
+/// An iterator over the contents of a directory.
+pub(crate) struct ReadDirImpl<'a, T> {
+ dir: &'a DirectoryImpl<T>,
+ at: usize,
+}
+
+impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> {
+ fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self {
+ Self { dir, at }
+ }
+
+ /// Get the next entry.
+ pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> {
+ if self.at == self.dir.table.len() {
+ Ok(None)
+ } else {
+ let cursor = self.dir.get_cursor(self.at).await?;
+ self.at += 1;
+ Ok(Some(cursor))
+ }
+ }
+
+ /// Efficient alternative to `Iterator::skip`.
+ #[inline]
+ pub fn skip(self, n: usize) -> Self {
+ Self {
+ at: (self.at + n).min(self.dir.table.len()),
+ dir: self.dir,
+ }
+ }
+
+ /// Efficient alternative to `Iterator::count`.
+ #[inline]
+ pub fn count(self) -> usize {
+ self.dir.table.len()
+ }
+}
+
+/// A cursor pointing to a file in a directory.
+///
+/// At this point only the file name has been read and we remembered the position for finding the
+/// actual data. This can be upgraded into a FileEntryImpl.
+pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> {
+ dir: &'a DirectoryImpl<T>,
+ file_name: PathBuf,
+ entry_range: Range<u64>,
+ caches: Arc<Caches>,
+}
+
+impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
+ pub fn file_name(&self) -> &Path {
+ &self.file_name
+ }
+
+ async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> {
+ let (entry, _decoder) = self
+ .dir
+ .decode_one_entry(self.entry_range.clone(), Some(&self.file_name))
+ .await?;
+
+ Ok(FileEntryImpl {
+ input: self.dir.input.clone(),
+ entry,
+ entry_range: self.entry_range(),
+ caches: Arc::clone(&self.caches),
+ })
+ }
+
+ /// Exposed for raw by-offset access methods.
+ #[inline]
+ pub fn entry_range(&self) -> Range<u64> {
+ self.entry_range.clone()
+ }
+}
+
+/// A reader for file contents.
+pub(crate) struct FileContentsImpl<T> {
+ input: T,
+
+ /// Absolute offset inside the `input`.
+ range: Range<u64>,
+}
+
+impl<T: Clone + ReadAt> FileContentsImpl<T> {
+ pub fn new(input: T, range: Range<u64>) -> Self {
+ Self { input, range }
+ }
+
+ #[inline]
+ pub fn file_size(&self) -> u64 {
+ self.range.end - self.range.start
+ }
+
+ async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> {
+ let size = self.file_size();
+ if offset >= size {
+ return Ok(0);
+ }
+ let remaining = size - offset;
+
+ if remaining < buf.len() as u64 {
+ buf = &mut buf[..(remaining as usize)];
+ }
+
+ read_at(&self.input, buf, self.range.start + offset).await
+ }
+}
+
+impl<T: Clone + ReadAt> ReadAt for FileContentsImpl<T> {
+ fn poll_read_at(
+ self: Pin<&Self>,
+ cx: &mut Context,
+ mut buf: &mut [u8],
+ offset: u64,
+ ) -> Poll<io::Result<usize>> {
+ let size = self.file_size();
+ if offset >= size {
+ return Poll::Ready(Ok(0));
+ }
+ let remaining = size - offset;
+
+ if remaining < buf.len() as u64 {
+ buf = &mut buf[..(remaining as usize)];
+ }
+
+ let offset = self.range.start + offset;
+ unsafe { self.map_unchecked(|this| &this.input) }.poll_read_at(cx, buf, offset)
+ }
+}
+
+#[doc(hidden)]
+pub struct SeqReadAtAdapter<T> {
+ input: T,
+ range: Range<u64>,
+}
+
+impl<T: ReadAt> SeqReadAtAdapter<T> {
+ pub fn new(input: T, range: Range<u64>) -> Self {
+ if range.end < range.start {
+ panic!("BAD SEQ READ AT ADAPTER");
+ }
+ Self { input, range }
+ }
+
+ #[inline]
+ fn remaining(&self) -> usize {
+ (self.range.end - self.range.start) as usize
+ }
+}
+
+impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> {
+ fn poll_seq_read(
+ self: Pin<&mut Self>,
+ cx: &mut Context,
+ buf: &mut [u8],
+ ) -> Poll<io::Result<usize>> {
+ let len = buf.len().min(self.remaining());
+ let buf = &mut buf[..len];
+
+ let this = unsafe { self.get_unchecked_mut() };
+
+ let got = ready!(unsafe {
+ Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start)
+ })?;
+ this.range.start += got as u64;
+ Poll::Ready(Ok(got))
+ }
+
+ fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
+ Poll::Ready(Some(Ok(self.range.start)))
+ }
+}
+++ /dev/null
-//! The `pxar` decoder state machine.
-//!
-//! This is the implementation used by both the synchronous and async pxar wrappers.
-
-use std::convert::TryFrom;
-use std::ffi::OsString;
-use std::io;
-use std::mem::{self, size_of, size_of_val, MaybeUninit};
-use std::os::unix::ffi::{OsStrExt, OsStringExt};
-use std::path::{Path, PathBuf};
-use std::pin::Pin;
-use std::task::{Context, Poll};
-
-//use std::os::unix::fs::FileExt;
-
-use endian_trait::Endian;
-
-use crate::format::{self, Header};
-use crate::poll_fn::poll_fn;
-use crate::util::{self, io_err_other};
-use crate::{Entry, EntryKind, Metadata};
-
-pub mod aio;
-pub mod sync;
-
-#[doc(inline)]
-pub use sync::Decoder;
-
-/// To skip through non-seekable files.
-static mut SCRATCH_BUFFER: MaybeUninit<[u8; 4096]> = MaybeUninit::uninit();
-
-fn scratch_buffer() -> &'static mut [u8] {
- unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] }
-}
-
-/// Sequential read interface used by the decoder's state machine.
-///
-/// To simply iterate through a directory we just need the equivalent of `poll_read()`.
-///
-/// Currently we also have a `poll_position()` method which can be added for types supporting
-/// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available
-/// (accessible via the `Entry::offset()`), to allow jumping between entries.
-pub trait SeqRead {
- /// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent.
- fn poll_seq_read(
- self: Pin<&mut Self>,
- cx: &mut Context,
- buf: &mut [u8],
- ) -> Poll<io::Result<usize>>;
-
- /// While going through the data we may want to take notes about some offsets within the file
- /// for later. If the reader does not support seeking or positional reading, this can just
- /// return `None`.
- fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
- Poll::Ready(None)
- }
-}
-
-/// Allow using trait objects for generics taking a `SeqRead`:
-impl<'a> SeqRead for &mut (dyn SeqRead + 'a) {
- fn poll_seq_read(
- self: Pin<&mut Self>,
- cx: &mut Context,
- buf: &mut [u8],
- ) -> Poll<io::Result<usize>> {
- unsafe {
- self.map_unchecked_mut(|this| &mut **this)
- .poll_seq_read(cx, buf)
- }
- }
-
- fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
- unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
- }
-}
-
-/// awaitable version of `poll_position`.
-pub(crate) async fn seq_read_position<T: SeqRead + ?Sized>(
- input: &mut T,
-) -> Option<io::Result<u64>> {
- poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_position(cx) }).await
-}
-
-/// awaitable version of `poll_seq_read`.
-pub(crate) async fn seq_read<T: SeqRead + ?Sized>(
- input: &mut T,
- buf: &mut [u8],
-) -> io::Result<usize> {
- poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_seq_read(cx, buf) }).await
-}
-
-/// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling
-async fn seq_read_exact_or_eof<T>(input: &mut T, mut buf: &mut [u8]) -> io::Result<Option<()>>
-where
- T: SeqRead + ?Sized,
-{
- let mut eof_ok = true;
- while !buf.is_empty() {
- match seq_read(&mut *input, buf).await? {
- 0 if eof_ok => return Ok(None),
- 0 => io_bail!("unexpected EOF"),
- got => buf = &mut buf[got..],
- }
- eof_ok = false;
- }
- Ok(Some(()))
-}
-
-/// `read_exact` - since that's what we _actually_ want most of the time.
-async fn seq_read_exact<T: SeqRead + ?Sized>(input: &mut T, buf: &mut [u8]) -> io::Result<()> {
- match seq_read_exact_or_eof(input, buf).await? {
- Some(()) => Ok(()),
- None => io_bail!("unexpected eof"),
- }
-}
-
-/// Helper to read into an allocated byte vector.
-async fn seq_read_exact_data<T>(input: &mut T, size: usize) -> io::Result<Vec<u8>>
-where
- T: SeqRead + ?Sized,
-{
- let mut data = util::vec_new(size);
- seq_read_exact(input, &mut data[..]).await?;
- Ok(data)
-}
-
-/// `seq_read_entry` with EOF handling
-async fn seq_read_entry_or_eof<T, E>(input: &mut T) -> io::Result<Option<E>>
-where
- T: SeqRead + ?Sized,
- E: Endian,
-{
- let mut data = MaybeUninit::<E>::uninit();
- let buf =
- unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) };
- if seq_read_exact_or_eof(input, buf).await?.is_none() {
- return Ok(None);
- }
- Ok(Some(unsafe { data.assume_init().from_le() }))
-}
-
-/// Helper to read into an `Endian`-implementing `struct`.
-async fn seq_read_entry<T: SeqRead + ?Sized, E: Endian>(input: &mut T) -> io::Result<E> {
- seq_read_entry_or_eof(input)
- .await?
- .ok_or_else(|| io_format_err!("unexepcted EOF"))
-}
-
-/// The decoder state machine implementation.
-///
-/// We use `async fn` to implement the decoder state machine so that we can easily plug in both
-/// synchronous or `async` I/O objects in as input.
-pub(crate) struct DecoderImpl<T> {
- input: T,
- current_header: Header,
- entry: Entry,
- path_lengths: Vec<usize>,
- state: State,
- with_goodbye_tables: bool,
-}
-
-enum State {
- Begin,
- Default,
- InPayload {
- offset: u64,
- },
-
- /// file entries with no data (fifo, socket)
- InSpecialFile,
-
- InGoodbyeTable,
- InDirectory,
- Eof,
-}
-
-/// Control flow while parsing items.
-///
-/// When parsing an entry, we usually go through all of its attribute items. Once we reach the end
-/// of the entry we stop.
-/// Note that if we're in a directory, we stopped at the beginning of its contents.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-enum ItemResult {
- /// We parsed an "attribute" item and should continue parsing.
- Attribute,
-
- /// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a
- /// directory (`FILENAME`, `GOODBYE`).
- ///
- /// We stop moving forward at this point.
- Entry,
-}
-
-impl<I: SeqRead> DecoderImpl<I> {
- pub async fn new(input: I) -> io::Result<Self> {
- Self::new_full(input, "/".into()).await
- }
-
- pub(crate) async fn new_full(input: I, path: PathBuf) -> io::Result<Self> {
- let this = DecoderImpl {
- input,
- current_header: unsafe { mem::zeroed() },
- entry: Entry {
- path,
- kind: EntryKind::GoodbyeTable,
- metadata: Metadata::default(),
- },
- path_lengths: Vec::new(),
- state: State::Begin,
- with_goodbye_tables: false,
- };
-
- // this.read_next_entry().await?;
-
- Ok(this)
- }
-
- /// Get the next file entry, recursing into directories.
- pub async fn next(&mut self) -> Option<io::Result<Entry>> {
- self.next_do().await.transpose()
- }
-
- pub(crate) async fn next_do(&mut self) -> io::Result<Option<Entry>> {
- loop {
- match self.state {
- State::Eof => return Ok(None),
- State::Begin => return self.read_next_entry().await.map(Some),
- State::Default => {
- // we completely finished an entry, so now we're going "up" in the directory
- // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
- self.read_next_item().await?;
- }
- State::InPayload { offset } => {
- // We need to skip the current payload first.
- self.skip_entry(offset).await?;
- self.read_next_item().await?;
- }
- State::InGoodbyeTable => {
- self.skip_entry(0).await?;
- if self.path_lengths.pop().is_none() {
- // The root directory has an entry containing '1'.
- io_bail!("unexpected EOF in goodbye table");
- }
-
- if self.path_lengths.is_empty() {
- // we are at the end of the archive now
- self.state = State::Eof;
- return Ok(None);
- }
-
- // We left the directory, now keep going in our parent.
- self.state = State::Default;
- continue;
- }
- State::InSpecialFile => {
- self.entry.clear_data();
- self.state = State::InDirectory;
- self.entry.kind = EntryKind::Directory;
- }
- State::InDirectory => {
- // We're at the next FILENAME or GOODBYE item.
- }
- }
-
- match self.current_header.htype {
- format::PXAR_FILENAME => return self.handle_file_entry().await,
- format::PXAR_GOODBYE => {
- self.state = State::InGoodbyeTable;
-
- if self.with_goodbye_tables {
- self.entry.clear_data();
- return Ok(Some(Entry {
- path: PathBuf::new(),
- metadata: Metadata::default(),
- kind: EntryKind::GoodbyeTable,
- }));
- } else {
- // go up to goodbye table handling
- continue;
- }
- }
- h => io_bail!(
- "expected filename or directory-goodbye pxar entry, got: {:x}",
- h
- ),
- }
- }
- }
-
- pub fn content_size(&self) -> Option<u64> {
- if let State::InPayload { .. } = self.state {
- Some(self.current_header.content_size())
- } else {
- None
- }
- }
-
- pub fn content_reader<'a>(&'a mut self) -> Option<Contents<'a, I>> {
- if let State::InPayload { offset } = &mut self.state {
- Some(Contents::new(
- &mut self.input,
- offset,
- self.current_header.content_size(),
- ))
- } else {
- None
- }
- }
-
- async fn handle_file_entry(&mut self) -> io::Result<Option<Entry>> {
- let mut data = self.read_entry_as_bytes().await?;
-
- // filenames are zero terminated!
- if data.pop() != Some(0) {
- io_bail!("illegal path found (missing terminating zero)");
- }
- if data.is_empty() {
- io_bail!("illegal path found (empty)");
- }
-
- let path = PathBuf::from(OsString::from_vec(data));
- self.set_path(&path)?;
- self.read_next_entry().await.map(Some)
- }
-
- fn reset_path(&mut self) -> io::Result<()> {
- let path_len = *self
- .path_lengths
- .last()
- .ok_or_else(|| io_format_err!("internal decoder error: path underrun"))?;
- let mut path = mem::replace(&mut self.entry.path, PathBuf::new())
- .into_os_string()
- .into_vec();
- path.truncate(path_len);
- self.entry.path = PathBuf::from(OsString::from_vec(path));
- Ok(())
- }
-
- fn set_path(&mut self, path: &Path) -> io::Result<()> {
- self.reset_path()?;
- self.entry.path.push(path);
- Ok(())
- }
-
- async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
- self.state = State::Default;
- self.entry.clear_data();
-
- let header: Header = match seq_read_entry_or_eof(&mut self.input).await? {
- None => return Ok(None),
- Some(header) => header,
- };
-
- if header.htype == format::PXAR_HARDLINK {
- // The only "dangling" header without an 'Entry' in front of it because it does not
- // carry its own metadata.
- self.current_header = header;
-
- // Hardlinks have no metadata and no additional items.
- self.entry.metadata = Metadata::default();
- self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
-
- Ok(Some(self.entry.take()))
- } else if header.htype == format::PXAR_ENTRY {
- self.entry.metadata = Metadata {
- stat: seq_read_entry(&mut self.input).await?,
- ..Default::default()
- };
-
- self.current_header = unsafe { mem::zeroed() };
-
- while self.read_next_item().await? != ItemResult::Entry {}
-
- if self.entry.is_dir() {
- self.path_lengths
- .push(self.entry.path.as_os_str().as_bytes().len());
- }
-
- Ok(Some(self.entry.take()))
- } else {
- io_bail!(
- "expected pxar entry of type 'Entry', got: {:x}",
- header.htype
- );
- }
- }
-
- async fn read_next_entry(&mut self) -> io::Result<Entry> {
- self.read_next_entry_or_eof()
- .await?
- .ok_or_else(|| io_format_err!("unexpected EOF"))
- }
-
- async fn read_next_item(&mut self) -> io::Result<ItemResult> {
- self.read_next_header().await?;
- self.read_current_item().await
- }
-
- async fn read_next_header(&mut self) -> io::Result<()> {
- let dest = unsafe {
- std::slice::from_raw_parts_mut(
- &mut self.current_header as *mut Header as *mut u8,
- size_of_val(&self.current_header),
- )
- };
- seq_read_exact(&mut self.input, dest).await?;
- Ok(())
- }
-
- /// Read the next item, the header is already loaded.
- async fn read_current_item(&mut self) -> io::Result<ItemResult> {
- match self.current_header.htype {
- format::PXAR_XATTR => {
- let xattr = self.read_xattr().await?;
- self.entry.metadata.xattrs.push(xattr);
- }
- format::PXAR_ACL_USER => {
- let entry = self.read_acl_user().await?;
- self.entry.metadata.acl.users.push(entry);
- }
- format::PXAR_ACL_GROUP => {
- let entry = self.read_acl_group().await?;
- self.entry.metadata.acl.groups.push(entry);
- }
- format::PXAR_ACL_GROUP_OBJ => {
- if self.entry.metadata.acl.group_obj.is_some() {
- io_bail!("multiple acl group object entries detected");
- }
- let entry = self.read_acl_group_object().await?;
- self.entry.metadata.acl.group_obj = Some(entry);
- }
- format::PXAR_ACL_DEFAULT => {
- if self.entry.metadata.acl.default.is_some() {
- io_bail!("multiple acl default entries detected");
- }
- let entry = self.read_acl_default().await?;
- self.entry.metadata.acl.default = Some(entry);
- }
- format::PXAR_ACL_DEFAULT_USER => {
- let entry = self.read_acl_user().await?;
- self.entry.metadata.acl.default_users.push(entry);
- }
- format::PXAR_ACL_DEFAULT_GROUP => {
- let entry = self.read_acl_group().await?;
- self.entry.metadata.acl.default_groups.push(entry);
- }
- format::PXAR_FCAPS => {
- if self.entry.metadata.fcaps.is_some() {
- io_bail!("multiple file capability entries detected");
- }
- let entry = self.read_fcaps().await?;
- self.entry.metadata.fcaps = Some(entry);
- }
- format::PXAR_QUOTA_PROJID => {
- if self.entry.metadata.quota_project_id.is_some() {
- io_bail!("multiple quota project id entries detected");
- }
- let entry = self.read_quota_project_id().await?;
- self.entry.metadata.quota_project_id = Some(entry);
- }
- format::PXAR_SYMLINK => {
- self.entry.kind = EntryKind::Symlink(self.read_symlink().await?);
- return Ok(ItemResult::Entry);
- }
- format::PXAR_HARDLINK => io_bail!("encountered unexpected hardlink entry"),
- format::PXAR_DEVICE => {
- self.entry.kind = EntryKind::Device(self.read_device().await?);
- return Ok(ItemResult::Entry);
- }
- format::PXAR_PAYLOAD => {
- let offset = seq_read_position(&mut self.input).await.transpose()?;
- self.entry.kind = EntryKind::File {
- size: self.current_header.content_size(),
- offset,
- };
- self.state = State::InPayload { offset: 0 };
- return Ok(ItemResult::Entry);
- }
- format::PXAR_FILENAME | format::PXAR_GOODBYE => {
- if self.entry.metadata.is_fifo() {
- self.state = State::InSpecialFile;
- self.entry.kind = EntryKind::Fifo;
- return Ok(ItemResult::Entry);
- } else if self.entry.metadata.is_socket() {
- self.state = State::InSpecialFile;
- self.entry.kind = EntryKind::Socket;
- return Ok(ItemResult::Entry);
- } else {
- // As a shortcut this is copy-pasted to `next_do`'s `InSpecialFile` case.
- // Keep in mind when editing this!
- self.state = State::InDirectory;
- self.entry.kind = EntryKind::Directory;
- return Ok(ItemResult::Entry);
- }
- }
- _ => io_bail!("unexpected entry type: {:x}", self.current_header.htype),
- }
-
- Ok(ItemResult::Attribute)
- }
-
- //
- // Local read helpers.
- //
- // These utilize additional information and hence are not part of the `dyn SeqRead` impl.
- //
-
- async fn skip_entry(&mut self, offset: u64) -> io::Result<()> {
- let mut len = self.current_header.content_size() - offset;
- let scratch = scratch_buffer();
- while len >= (scratch.len() as u64) {
- seq_read_exact(&mut self.input, scratch).await?;
- len -= scratch.len() as u64;
- }
- let len = len as usize;
- if len > 0 {
- seq_read_exact(&mut self.input, &mut scratch[..len]).await?;
- }
- Ok(())
- }
-
- async fn read_entry_as_bytes(&mut self) -> io::Result<Vec<u8>> {
- let size = usize::try_from(self.current_header.content_size()).map_err(io_err_other)?;
- let data = seq_read_exact_data(&mut self.input, size).await?;
- Ok(data)
- }
-
- /// Helper to read a struct entry while checking its size.
- async fn read_simple_entry<T: Endian + 'static>(
- &mut self,
- what: &'static str,
- ) -> io::Result<T> {
- if self.current_header.content_size() != (size_of::<T>() as u64) {
- io_bail!(
- "bad {} size: {} (expected {})",
- what,
- self.current_header.content_size(),
- size_of::<T>(),
- );
- }
- seq_read_entry(&mut self.input).await
- }
-
- //
- // Read functions for PXAR components.
- //
-
- async fn read_xattr(&mut self) -> io::Result<format::XAttr> {
- let data = self.read_entry_as_bytes().await?;
-
- let name_len = data
- .iter()
- .position(|c| *c == 0)
- .ok_or_else(|| io_format_err!("missing value separator in xattr"))?;
-
- Ok(format::XAttr { data, name_len })
- }
-
- async fn read_symlink(&mut self) -> io::Result<format::Symlink> {
- let data = self.read_entry_as_bytes().await?;
- Ok(format::Symlink { data })
- }
-
- async fn read_hardlink(&mut self) -> io::Result<format::Hardlink> {
- let offset: u64 = self.read_simple_entry("hardlink offset").await?;
- let size =
- usize::try_from(self.current_header.content_size()).map_err(io_err_other)?
- - size_of::<u64>();
- let data = seq_read_exact_data(&mut self.input, size).await?;
- Ok(format::Hardlink { offset, data })
- }
-
- async fn read_device(&mut self) -> io::Result<format::Device> {
- self.read_simple_entry("device").await
- }
-
- async fn read_fcaps(&mut self) -> io::Result<format::FCaps> {
- let data = self.read_entry_as_bytes().await?;
- Ok(format::FCaps { data })
- }
-
- async fn read_acl_user(&mut self) -> io::Result<format::acl::User> {
- self.read_simple_entry("acl user").await
- }
-
- async fn read_acl_group(&mut self) -> io::Result<format::acl::Group> {
- self.read_simple_entry("acl group").await
- }
-
- async fn read_acl_group_object(&mut self) -> io::Result<format::acl::GroupObject> {
- self.read_simple_entry("acl group object").await
- }
-
- async fn read_acl_default(&mut self) -> io::Result<format::acl::Default> {
- self.read_simple_entry("acl default").await
- }
-
- async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> {
- self.read_simple_entry("quota project id").await
- }
-}
-
-pub struct Contents<'a, T: SeqRead> {
- input: &'a mut T,
- at: &'a mut u64,
- len: u64,
-}
-
-impl<'a, T: SeqRead> Contents<'a, T> {
- pub fn new(input: &'a mut T, at: &'a mut u64, len: u64) -> Self {
- Self { input, at, len }
- }
-
- #[inline]
- fn remaining(&self) -> u64 {
- self.len - *self.at
- }
-}
-
-impl<'a, T: SeqRead> SeqRead for Contents<'a, T> {
- fn poll_seq_read(
- mut self: Pin<&mut Self>,
- cx: &mut Context,
- buf: &mut [u8],
- ) -> Poll<io::Result<usize>> {
- let max_read = (buf.len() as u64).min(self.remaining()) as usize;
- if max_read == 0 {
- return Poll::Ready(Ok(0));
- }
-
- let buf = &mut buf[..max_read];
- let got = ready!(unsafe { Pin::new_unchecked(&mut *self.input) }.poll_seq_read(cx, buf))?;
- *self.at += got as u64;
- Poll::Ready(Ok(got))
- }
-
- fn poll_position(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
- unsafe { Pin::new_unchecked(&mut *self.input) }.poll_position(cx)
- }
-}
--- /dev/null
+//! The `pxar` decoder state machine.
+//!
+//! This is the implementation used by both the synchronous and async pxar wrappers.
+
+use std::convert::TryFrom;
+use std::ffi::OsString;
+use std::io;
+use std::mem::{self, size_of, size_of_val, MaybeUninit};
+use std::os::unix::ffi::{OsStrExt, OsStringExt};
+use std::path::{Path, PathBuf};
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+//use std::os::unix::fs::FileExt;
+
+use endian_trait::Endian;
+
+use crate::format::{self, Header};
+use crate::poll_fn::poll_fn;
+use crate::util::{self, io_err_other};
+use crate::{Entry, EntryKind, Metadata};
+
+pub mod aio;
+pub mod sync;
+
+#[doc(inline)]
+pub use sync::Decoder;
+
+/// To skip through non-seekable files.
+static mut SCRATCH_BUFFER: MaybeUninit<[u8; 4096]> = MaybeUninit::uninit();
+
+fn scratch_buffer() -> &'static mut [u8] {
+ unsafe { &mut (*SCRATCH_BUFFER.as_mut_ptr())[..] }
+}
+
+/// Sequential read interface used by the decoder's state machine.
+///
+/// To simply iterate through a directory we just need the equivalent of `poll_read()`.
+///
+/// Currently we also have a `poll_position()` method which can be added for types supporting
+/// `Seek` or `AsyncSeek`. In this case the starting position of each entry becomes available
+/// (accessible via the `Entry::offset()`), to allow jumping between entries.
+pub trait SeqRead {
+ /// Mostly we want to read sequentially, so this is basically an `AsyncRead` equivalent.
+ fn poll_seq_read(
+ self: Pin<&mut Self>,
+ cx: &mut Context,
+ buf: &mut [u8],
+ ) -> Poll<io::Result<usize>>;
+
+ /// While going through the data we may want to take notes about some offsets within the file
+ /// for later. If the reader does not support seeking or positional reading, this can just
+ /// return `None`.
+ fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
+ Poll::Ready(None)
+ }
+}
+
+/// Allow using trait objects for generics taking a `SeqRead`:
+impl<'a> SeqRead for &mut (dyn SeqRead + 'a) {
+ fn poll_seq_read(
+ self: Pin<&mut Self>,
+ cx: &mut Context,
+ buf: &mut [u8],
+ ) -> Poll<io::Result<usize>> {
+ unsafe {
+ self.map_unchecked_mut(|this| &mut **this)
+ .poll_seq_read(cx, buf)
+ }
+ }
+
+ fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
+ unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
+ }
+}
+
+/// awaitable version of `poll_position`.
+pub(crate) async fn seq_read_position<T: SeqRead + ?Sized>(
+ input: &mut T,
+) -> Option<io::Result<u64>> {
+ poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_position(cx) }).await
+}
+
+/// awaitable version of `poll_seq_read`.
+pub(crate) async fn seq_read<T: SeqRead + ?Sized>(
+ input: &mut T,
+ buf: &mut [u8],
+) -> io::Result<usize> {
+ poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *input).poll_seq_read(cx, buf) }).await
+}
+
+/// `read_exact` - since that's what we _actually_ want most of the time, but with EOF handling
+async fn seq_read_exact_or_eof<T>(input: &mut T, mut buf: &mut [u8]) -> io::Result<Option<()>>
+where
+ T: SeqRead + ?Sized,
+{
+ let mut eof_ok = true;
+ while !buf.is_empty() {
+ match seq_read(&mut *input, buf).await? {
+ 0 if eof_ok => return Ok(None),
+ 0 => io_bail!("unexpected EOF"),
+ got => buf = &mut buf[got..],
+ }
+ eof_ok = false;
+ }
+ Ok(Some(()))
+}
+
+/// `read_exact` - since that's what we _actually_ want most of the time.
+async fn seq_read_exact<T: SeqRead + ?Sized>(input: &mut T, buf: &mut [u8]) -> io::Result<()> {
+ match seq_read_exact_or_eof(input, buf).await? {
+ Some(()) => Ok(()),
+ None => io_bail!("unexpected eof"),
+ }
+}
+
+/// Helper to read into an allocated byte vector.
+async fn seq_read_exact_data<T>(input: &mut T, size: usize) -> io::Result<Vec<u8>>
+where
+ T: SeqRead + ?Sized,
+{
+ let mut data = util::vec_new(size);
+ seq_read_exact(input, &mut data[..]).await?;
+ Ok(data)
+}
+
+/// `seq_read_entry` with EOF handling
+async fn seq_read_entry_or_eof<T, E>(input: &mut T) -> io::Result<Option<E>>
+where
+ T: SeqRead + ?Sized,
+ E: Endian,
+{
+ let mut data = MaybeUninit::<E>::uninit();
+ let buf =
+ unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) };
+ if seq_read_exact_or_eof(input, buf).await?.is_none() {
+ return Ok(None);
+ }
+ Ok(Some(unsafe { data.assume_init().from_le() }))
+}
+
+/// Helper to read into an `Endian`-implementing `struct`.
+async fn seq_read_entry<T: SeqRead + ?Sized, E: Endian>(input: &mut T) -> io::Result<E> {
+ seq_read_entry_or_eof(input)
+ .await?
+ .ok_or_else(|| io_format_err!("unexepcted EOF"))
+}
+
+/// The decoder state machine implementation.
+///
+/// We use `async fn` to implement the decoder state machine so that we can easily plug in both
+/// synchronous or `async` I/O objects in as input.
+pub(crate) struct DecoderImpl<T> {
+ input: T,
+ current_header: Header,
+ entry: Entry,
+ path_lengths: Vec<usize>,
+ state: State,
+ with_goodbye_tables: bool,
+}
+
+enum State {
+ Begin,
+ Default,
+ InPayload {
+ offset: u64,
+ },
+
+ /// file entries with no data (fifo, socket)
+ InSpecialFile,
+
+ InGoodbyeTable,
+ InDirectory,
+ Eof,
+}
+
+/// Control flow while parsing items.
+///
+/// When parsing an entry, we usually go through all of its attribute items. Once we reach the end
+/// of the entry we stop.
+/// Note that if we're in a directory, we stopped at the beginning of its contents.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+enum ItemResult {
+ /// We parsed an "attribute" item and should continue parsing.
+ Attribute,
+
+ /// We finished an entry (`SYMLINK`, `HARDLINK`, ...) or just entered the contents of a
+ /// directory (`FILENAME`, `GOODBYE`).
+ ///
+ /// We stop moving forward at this point.
+ Entry,
+}
+
+impl<I: SeqRead> DecoderImpl<I> {
+ pub async fn new(input: I) -> io::Result<Self> {
+ Self::new_full(input, "/".into()).await
+ }
+
+ pub(crate) async fn new_full(input: I, path: PathBuf) -> io::Result<Self> {
+ let this = DecoderImpl {
+ input,
+ current_header: unsafe { mem::zeroed() },
+ entry: Entry {
+ path,
+ kind: EntryKind::GoodbyeTable,
+ metadata: Metadata::default(),
+ },
+ path_lengths: Vec::new(),
+ state: State::Begin,
+ with_goodbye_tables: false,
+ };
+
+ // this.read_next_entry().await?;
+
+ Ok(this)
+ }
+
+ /// Get the next file entry, recursing into directories.
+ pub async fn next(&mut self) -> Option<io::Result<Entry>> {
+ self.next_do().await.transpose()
+ }
+
+ pub(crate) async fn next_do(&mut self) -> io::Result<Option<Entry>> {
+ loop {
+ match self.state {
+ State::Eof => return Ok(None),
+ State::Begin => return self.read_next_entry().await.map(Some),
+ State::Default => {
+ // we completely finished an entry, so now we're going "up" in the directory
+ // hierarchy and parse the next PXAR_FILENAME or the PXAR_GOODBYE:
+ self.read_next_item().await?;
+ }
+ State::InPayload { offset } => {
+ // We need to skip the current payload first.
+ self.skip_entry(offset).await?;
+ self.read_next_item().await?;
+ }
+ State::InGoodbyeTable => {
+ self.skip_entry(0).await?;
+ if self.path_lengths.pop().is_none() {
+ // The root directory has an entry containing '1'.
+ io_bail!("unexpected EOF in goodbye table");
+ }
+
+ if self.path_lengths.is_empty() {
+ // we are at the end of the archive now
+ self.state = State::Eof;
+ return Ok(None);
+ }
+
+ // We left the directory, now keep going in our parent.
+ self.state = State::Default;
+ continue;
+ }
+ State::InSpecialFile => {
+ self.entry.clear_data();
+ self.state = State::InDirectory;
+ self.entry.kind = EntryKind::Directory;
+ }
+ State::InDirectory => {
+ // We're at the next FILENAME or GOODBYE item.
+ }
+ }
+
+ match self.current_header.htype {
+ format::PXAR_FILENAME => return self.handle_file_entry().await,
+ format::PXAR_GOODBYE => {
+ self.state = State::InGoodbyeTable;
+
+ if self.with_goodbye_tables {
+ self.entry.clear_data();
+ return Ok(Some(Entry {
+ path: PathBuf::new(),
+ metadata: Metadata::default(),
+ kind: EntryKind::GoodbyeTable,
+ }));
+ } else {
+ // go up to goodbye table handling
+ continue;
+ }
+ }
+ h => io_bail!(
+ "expected filename or directory-goodbye pxar entry, got: {:x}",
+ h
+ ),
+ }
+ }
+ }
+
+ pub fn content_size(&self) -> Option<u64> {
+ if let State::InPayload { .. } = self.state {
+ Some(self.current_header.content_size())
+ } else {
+ None
+ }
+ }
+
+ pub fn content_reader<'a>(&'a mut self) -> Option<Contents<'a, I>> {
+ if let State::InPayload { offset } = &mut self.state {
+ Some(Contents::new(
+ &mut self.input,
+ offset,
+ self.current_header.content_size(),
+ ))
+ } else {
+ None
+ }
+ }
+
+ async fn handle_file_entry(&mut self) -> io::Result<Option<Entry>> {
+ let mut data = self.read_entry_as_bytes().await?;
+
+ // filenames are zero terminated!
+ if data.pop() != Some(0) {
+ io_bail!("illegal path found (missing terminating zero)");
+ }
+ if data.is_empty() {
+ io_bail!("illegal path found (empty)");
+ }
+
+ let path = PathBuf::from(OsString::from_vec(data));
+ self.set_path(&path)?;
+ self.read_next_entry().await.map(Some)
+ }
+
+ fn reset_path(&mut self) -> io::Result<()> {
+ let path_len = *self
+ .path_lengths
+ .last()
+ .ok_or_else(|| io_format_err!("internal decoder error: path underrun"))?;
+ let mut path = mem::replace(&mut self.entry.path, PathBuf::new())
+ .into_os_string()
+ .into_vec();
+ path.truncate(path_len);
+ self.entry.path = PathBuf::from(OsString::from_vec(path));
+ Ok(())
+ }
+
+ fn set_path(&mut self, path: &Path) -> io::Result<()> {
+ self.reset_path()?;
+ self.entry.path.push(path);
+ Ok(())
+ }
+
+ async fn read_next_entry_or_eof(&mut self) -> io::Result<Option<Entry>> {
+ self.state = State::Default;
+ self.entry.clear_data();
+
+ let header: Header = match seq_read_entry_or_eof(&mut self.input).await? {
+ None => return Ok(None),
+ Some(header) => header,
+ };
+
+ if header.htype == format::PXAR_HARDLINK {
+ // The only "dangling" header without an 'Entry' in front of it because it does not
+ // carry its own metadata.
+ self.current_header = header;
+
+ // Hardlinks have no metadata and no additional items.
+ self.entry.metadata = Metadata::default();
+ self.entry.kind = EntryKind::Hardlink(self.read_hardlink().await?);
+
+ Ok(Some(self.entry.take()))
+ } else if header.htype == format::PXAR_ENTRY {
+ self.entry.metadata = Metadata {
+ stat: seq_read_entry(&mut self.input).await?,
+ ..Default::default()
+ };
+
+ self.current_header = unsafe { mem::zeroed() };
+
+ while self.read_next_item().await? != ItemResult::Entry {}
+
+ if self.entry.is_dir() {
+ self.path_lengths
+ .push(self.entry.path.as_os_str().as_bytes().len());
+ }
+
+ Ok(Some(self.entry.take()))
+ } else {
+ io_bail!(
+ "expected pxar entry of type 'Entry', got: {:x}",
+ header.htype
+ );
+ }
+ }
+
+ async fn read_next_entry(&mut self) -> io::Result<Entry> {
+ self.read_next_entry_or_eof()
+ .await?
+ .ok_or_else(|| io_format_err!("unexpected EOF"))
+ }
+
+ async fn read_next_item(&mut self) -> io::Result<ItemResult> {
+ self.read_next_header().await?;
+ self.read_current_item().await
+ }
+
+ async fn read_next_header(&mut self) -> io::Result<()> {
+ let dest = unsafe {
+ std::slice::from_raw_parts_mut(
+ &mut self.current_header as *mut Header as *mut u8,
+ size_of_val(&self.current_header),
+ )
+ };
+ seq_read_exact(&mut self.input, dest).await?;
+ Ok(())
+ }
+
+ /// Read the next item, the header is already loaded.
+ async fn read_current_item(&mut self) -> io::Result<ItemResult> {
+ match self.current_header.htype {
+ format::PXAR_XATTR => {
+ let xattr = self.read_xattr().await?;
+ self.entry.metadata.xattrs.push(xattr);
+ }
+ format::PXAR_ACL_USER => {
+ let entry = self.read_acl_user().await?;
+ self.entry.metadata.acl.users.push(entry);
+ }
+ format::PXAR_ACL_GROUP => {
+ let entry = self.read_acl_group().await?;
+ self.entry.metadata.acl.groups.push(entry);
+ }
+ format::PXAR_ACL_GROUP_OBJ => {
+ if self.entry.metadata.acl.group_obj.is_some() {
+ io_bail!("multiple acl group object entries detected");
+ }
+ let entry = self.read_acl_group_object().await?;
+ self.entry.metadata.acl.group_obj = Some(entry);
+ }
+ format::PXAR_ACL_DEFAULT => {
+ if self.entry.metadata.acl.default.is_some() {
+ io_bail!("multiple acl default entries detected");
+ }
+ let entry = self.read_acl_default().await?;
+ self.entry.metadata.acl.default = Some(entry);
+ }
+ format::PXAR_ACL_DEFAULT_USER => {
+ let entry = self.read_acl_user().await?;
+ self.entry.metadata.acl.default_users.push(entry);
+ }
+ format::PXAR_ACL_DEFAULT_GROUP => {
+ let entry = self.read_acl_group().await?;
+ self.entry.metadata.acl.default_groups.push(entry);
+ }
+ format::PXAR_FCAPS => {
+ if self.entry.metadata.fcaps.is_some() {
+ io_bail!("multiple file capability entries detected");
+ }
+ let entry = self.read_fcaps().await?;
+ self.entry.metadata.fcaps = Some(entry);
+ }
+ format::PXAR_QUOTA_PROJID => {
+ if self.entry.metadata.quota_project_id.is_some() {
+ io_bail!("multiple quota project id entries detected");
+ }
+ let entry = self.read_quota_project_id().await?;
+ self.entry.metadata.quota_project_id = Some(entry);
+ }
+ format::PXAR_SYMLINK => {
+ self.entry.kind = EntryKind::Symlink(self.read_symlink().await?);
+ return Ok(ItemResult::Entry);
+ }
+ format::PXAR_HARDLINK => io_bail!("encountered unexpected hardlink entry"),
+ format::PXAR_DEVICE => {
+ self.entry.kind = EntryKind::Device(self.read_device().await?);
+ return Ok(ItemResult::Entry);
+ }
+ format::PXAR_PAYLOAD => {
+ let offset = seq_read_position(&mut self.input).await.transpose()?;
+ self.entry.kind = EntryKind::File {
+ size: self.current_header.content_size(),
+ offset,
+ };
+ self.state = State::InPayload { offset: 0 };
+ return Ok(ItemResult::Entry);
+ }
+ format::PXAR_FILENAME | format::PXAR_GOODBYE => {
+ if self.entry.metadata.is_fifo() {
+ self.state = State::InSpecialFile;
+ self.entry.kind = EntryKind::Fifo;
+ return Ok(ItemResult::Entry);
+ } else if self.entry.metadata.is_socket() {
+ self.state = State::InSpecialFile;
+ self.entry.kind = EntryKind::Socket;
+ return Ok(ItemResult::Entry);
+ } else {
+ // As a shortcut this is copy-pasted to `next_do`'s `InSpecialFile` case.
+ // Keep in mind when editing this!
+ self.state = State::InDirectory;
+ self.entry.kind = EntryKind::Directory;
+ return Ok(ItemResult::Entry);
+ }
+ }
+ _ => io_bail!("unexpected entry type: {:x}", self.current_header.htype),
+ }
+
+ Ok(ItemResult::Attribute)
+ }
+
+ //
+ // Local read helpers.
+ //
+ // These utilize additional information and hence are not part of the `dyn SeqRead` impl.
+ //
+
+ async fn skip_entry(&mut self, offset: u64) -> io::Result<()> {
+ let mut len = self.current_header.content_size() - offset;
+ let scratch = scratch_buffer();
+ while len >= (scratch.len() as u64) {
+ seq_read_exact(&mut self.input, scratch).await?;
+ len -= scratch.len() as u64;
+ }
+ let len = len as usize;
+ if len > 0 {
+ seq_read_exact(&mut self.input, &mut scratch[..len]).await?;
+ }
+ Ok(())
+ }
+
+ async fn read_entry_as_bytes(&mut self) -> io::Result<Vec<u8>> {
+ let size = usize::try_from(self.current_header.content_size()).map_err(io_err_other)?;
+ let data = seq_read_exact_data(&mut self.input, size).await?;
+ Ok(data)
+ }
+
+ /// Helper to read a struct entry while checking its size.
+ async fn read_simple_entry<T: Endian + 'static>(
+ &mut self,
+ what: &'static str,
+ ) -> io::Result<T> {
+ if self.current_header.content_size() != (size_of::<T>() as u64) {
+ io_bail!(
+ "bad {} size: {} (expected {})",
+ what,
+ self.current_header.content_size(),
+ size_of::<T>(),
+ );
+ }
+ seq_read_entry(&mut self.input).await
+ }
+
+ //
+ // Read functions for PXAR components.
+ //
+
+ async fn read_xattr(&mut self) -> io::Result<format::XAttr> {
+ let data = self.read_entry_as_bytes().await?;
+
+ let name_len = data
+ .iter()
+ .position(|c| *c == 0)
+ .ok_or_else(|| io_format_err!("missing value separator in xattr"))?;
+
+ Ok(format::XAttr { data, name_len })
+ }
+
+ async fn read_symlink(&mut self) -> io::Result<format::Symlink> {
+ let data = self.read_entry_as_bytes().await?;
+ Ok(format::Symlink { data })
+ }
+
+ async fn read_hardlink(&mut self) -> io::Result<format::Hardlink> {
+ let offset: u64 = self.read_simple_entry("hardlink offset").await?;
+ let size =
+ usize::try_from(self.current_header.content_size()).map_err(io_err_other)?
+ - size_of::<u64>();
+ let data = seq_read_exact_data(&mut self.input, size).await?;
+ Ok(format::Hardlink { offset, data })
+ }
+
+ async fn read_device(&mut self) -> io::Result<format::Device> {
+ self.read_simple_entry("device").await
+ }
+
+ async fn read_fcaps(&mut self) -> io::Result<format::FCaps> {
+ let data = self.read_entry_as_bytes().await?;
+ Ok(format::FCaps { data })
+ }
+
+ async fn read_acl_user(&mut self) -> io::Result<format::acl::User> {
+ self.read_simple_entry("acl user").await
+ }
+
+ async fn read_acl_group(&mut self) -> io::Result<format::acl::Group> {
+ self.read_simple_entry("acl group").await
+ }
+
+ async fn read_acl_group_object(&mut self) -> io::Result<format::acl::GroupObject> {
+ self.read_simple_entry("acl group object").await
+ }
+
+ async fn read_acl_default(&mut self) -> io::Result<format::acl::Default> {
+ self.read_simple_entry("acl default").await
+ }
+
+ async fn read_quota_project_id(&mut self) -> io::Result<format::QuotaProjectId> {
+ self.read_simple_entry("quota project id").await
+ }
+}
+
+pub struct Contents<'a, T: SeqRead> {
+ input: &'a mut T,
+ at: &'a mut u64,
+ len: u64,
+}
+
+impl<'a, T: SeqRead> Contents<'a, T> {
+ pub fn new(input: &'a mut T, at: &'a mut u64, len: u64) -> Self {
+ Self { input, at, len }
+ }
+
+ #[inline]
+ fn remaining(&self) -> u64 {
+ self.len - *self.at
+ }
+}
+
+impl<'a, T: SeqRead> SeqRead for Contents<'a, T> {
+ fn poll_seq_read(
+ mut self: Pin<&mut Self>,
+ cx: &mut Context,
+ buf: &mut [u8],
+ ) -> Poll<io::Result<usize>> {
+ let max_read = (buf.len() as u64).min(self.remaining()) as usize;
+ if max_read == 0 {
+ return Poll::Ready(Ok(0));
+ }
+
+ let buf = &mut buf[..max_read];
+ let got = ready!(unsafe { Pin::new_unchecked(&mut *self.input) }.poll_seq_read(cx, buf))?;
+ *self.at += got as u64;
+ Poll::Ready(Ok(got))
+ }
+
+ fn poll_position(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
+ unsafe { Pin::new_unchecked(&mut *self.input) }.poll_position(cx)
+ }
+}
+++ /dev/null
-//! The `pxar` encoder state machine.
-//!
-//! This is the implementation used by both the synchronous and async pxar wrappers.
-
-use std::io;
-use std::mem::{forget, size_of, size_of_val, take};
-use std::os::unix::ffi::OsStrExt;
-use std::path::Path;
-use std::pin::Pin;
-use std::task::{Context, Poll};
-
-use endian_trait::Endian;
-
-use crate::binary_tree_array;
-use crate::decoder::{self, SeqRead};
-use crate::format::{self, GoodbyeItem};
-use crate::poll_fn::poll_fn;
-use crate::Metadata;
-
-pub mod aio;
-pub mod sync;
-
-#[doc(inline)]
-pub use sync::Encoder;
-
-/// Sequential write interface used by the encoder's state machine.
-///
-/// This is our internal writer trait which is available for `std::io::Write` types in the
-/// synchronous wrapper and for both `tokio` and `future` `AsyncWrite` types in the asynchronous
-/// wrapper.
-pub trait SeqWrite {
- fn poll_seq_write(
- self: Pin<&mut Self>,
- cx: &mut Context,
- buf: &[u8],
- ) -> Poll<io::Result<usize>>;
-
- fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>>;
-
- fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>>;
-
- /// While writing to a pxar archive we need to remember how much dat we've written to track some
- /// offsets. Particularly items like the goodbye table need to be able to compute offsets to
- /// further back in the archive.
- fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<u64>>;
-
- /// To avoid recursively borrowing each time we nest into a subdirectory we add this helper.
- /// Otherwise starting a subdirectory will get a trait object pointing to `T`, nesting another
- /// subdirectory in that would have a trait object pointing to the trait object, and so on.
- fn as_trait_object(&mut self) -> &mut dyn SeqWrite
- where
- Self: Sized,
- {
- self as &mut dyn SeqWrite
- }
-}
-
-/// Allow using trait objects for generics taking a `SeqWrite`.
-impl<'a> SeqWrite for &mut (dyn SeqWrite + 'a) {
- fn poll_seq_write(
- self: Pin<&mut Self>,
- cx: &mut Context,
- buf: &[u8],
- ) -> Poll<io::Result<usize>> {
- unsafe {
- self.map_unchecked_mut(|this| &mut **this)
- .poll_seq_write(cx, buf)
- }
- }
-
- fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- unsafe { self.map_unchecked_mut(|this| &mut **this).poll_flush(cx) }
- }
-
- fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- unsafe { self.map_unchecked_mut(|this| &mut **this).poll_close(cx) }
- }
-
- fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<u64>> {
- unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
- }
-
- fn as_trait_object(&mut self) -> &mut dyn SeqWrite
- where
- Self: Sized,
- {
- &mut **self
- }
-}
-
-/// awaitable version of `poll_position`.
-async fn seq_write_position<T: SeqWrite + ?Sized>(output: &mut T) -> io::Result<u64> {
- poll_fn(move |cx| unsafe { Pin::new_unchecked(&mut *output).poll_position(cx) }).await
-}
-
-/// awaitable verison of `poll_seq_write`.
-async fn seq_write<T: SeqWrite + ?Sized>(output: &mut T, buf: &[u8]) -> io::Result<usize> {
- poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *output).poll_seq_write(cx, buf) }).await
-}
-
-/// Write the entire contents of a buffer, handling short writes.
-async fn seq_write_all<T: SeqWrite + ?Sized>(output: &mut T, mut buf: &[u8]) -> io::Result<()> {
- while !buf.is_empty() {
- let got = seq_write(&mut *output, buf).await?;
- buf = &buf[got..];
- }
- Ok(())
-}
-
-/// Write an endian-swappable struct.
-async fn seq_write_struct<E: Endian, T>(output: &mut T, data: E) -> io::Result<()>
-where
- T: SeqWrite + ?Sized,
-{
- let data = data.to_le();
- seq_write_all(output, unsafe {
- std::slice::from_raw_parts(&data as *const E as *const u8, size_of_val(&data))
- })
- .await
-}
-
-/// Write a pxar entry.
-async fn seq_write_pxar_entry<T>(output: &mut T, htype: u64, data: &[u8]) -> io::Result<()>
-where
- T: SeqWrite + ?Sized,
-{
- seq_write_struct(
- &mut *output,
- format::Header::with_content_size(htype, data.len() as u64),
- )
- .await?;
- seq_write_all(output, data).await
-}
-
-/// Write a pxar entry terminated by an additional zero which is not contained in the provided
-/// data buffer.
-async fn seq_write_pxar_entry_zero<T>(output: &mut T, htype: u64, data: &[u8]) -> io::Result<()>
-where
- T: SeqWrite + ?Sized,
-{
- seq_write_struct(
- &mut *output,
- format::Header::with_content_size(htype, 1 + data.len() as u64),
- )
- .await?;
- seq_write_all(&mut *output, data).await?;
- seq_write_all(output, &[0u8]).await
-}
-
-/// Write a pxar entry consiting of an endian-swappable struct.
-async fn seq_write_pxar_struct_entry<E, T>(output: &mut T, htype: u64, data: E) -> io::Result<()>
-where
- T: SeqWrite + ?Sized,
- E: Endian,
-{
- let data = data.to_le();
- seq_write_pxar_entry(output, htype, unsafe {
- std::slice::from_raw_parts(&data as *const E as *const u8, size_of_val(&data))
- })
- .await
-}
-
-/// Error conditions caused by wrong usage of this crate.
-#[derive(Clone, Copy, Debug, Eq, PartialEq)]
-pub enum EncodeError {
- /// The user dropped a `File` without without finishing writing all of its contents.
- ///
- /// This is required because the payload lengths is written out at the begining and decoding
- /// requires there to follow the right amount of data.
- IncompleteFile,
-
- /// The user dropped a directory without finalizing it.
- ///
- /// Finalizing is required to build the goodbye table at the end of a directory.
- IncompleteDirectory,
-}
-
-#[derive(Default)]
-struct EncoderState {
- /// Goodbye items for this directory, excluding the tail.
- items: Vec<GoodbyeItem>,
-
- /// User caused error conditions.
- encode_error: Option<EncodeError>,
-
- /// Offset of this directory's ENTRY.
- entry_offset: u64,
-
- /// Offset to this directory's first FILENAME.
- files_offset: u64,
-
- /// If this is a subdirectory, this points to the this directory's FILENAME.
- file_offset: Option<u64>,
-
- /// If this is a subdirectory, this contains this directory's hash for the goodbye item.
- file_hash: u64,
-}
-
-impl EncoderState {
- fn merge_error(&mut self, error: Option<EncodeError>) {
- // one error is enough:
- if self.encode_error.is_none() {
- self.encode_error = error;
- }
- }
-
- fn add_error(&mut self, error: EncodeError) {
- self.merge_error(Some(error));
- }
-}
-
-/// The encoder state machine implementation for a directory.
-///
-/// We use `async fn` to implement the encoder state machine so that we can easily plug in both
-/// synchronous or `async` I/O objects in as output.
-pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
- output: T,
- state: EncoderState,
- parent: Option<&'a mut EncoderState>,
- finished: bool,
-}
-
-impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> {
- fn drop(&mut self) {
- if let Some(ref mut parent) = self.parent {
- // propagate errors:
- parent.merge_error(self.state.encode_error);
- if !self.finished {
- parent.add_error(EncodeError::IncompleteDirectory);
- }
- } else if !self.finished {
- // FIXME: how do we deal with this?
- // eprintln!("Encoder dropped without finishing!");
- }
- }
-}
-
-impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
- pub async fn new(output: T, metadata: &Metadata) -> io::Result<EncoderImpl<'a, T>> {
- if !metadata.is_dir() {
- io_bail!("directory metadata must contain the directory mode flag");
- }
- let mut this = Self {
- output,
- state: EncoderState::default(),
- parent: None,
- finished: false,
- };
-
- this.encode_metadata(metadata).await?;
- this.state.files_offset = seq_write_position(&mut this.output).await?;
-
- Ok(this)
- }
-
- fn check(&self) -> io::Result<()> {
- match self.state.encode_error {
- Some(EncodeError::IncompleteFile) => io_bail!("incomplete file"),
- Some(EncodeError::IncompleteDirectory) => io_bail!("directory not finalized"),
- None => Ok(()),
- }
- }
-
- pub async fn create_file<'b>(
- &'b mut self,
- metadata: &Metadata,
- file_name: &Path,
- file_size: u64,
- ) -> io::Result<FileImpl<'b>>
- where
- 'a: 'b,
- {
- self.create_file_do(metadata, file_name.as_os_str().as_bytes(), file_size)
- .await
- }
-
- async fn create_file_do<'b>(
- &'b mut self,
- metadata: &Metadata,
- file_name: &[u8],
- file_size: u64,
- ) -> io::Result<FileImpl<'b>>
- where
- 'a: 'b,
- {
- self.check()?;
-
- let file_offset = seq_write_position(&mut self.output).await?;
- self.start_file_do(Some(metadata), file_name).await?;
-
- seq_write_struct(
- &mut self.output,
- format::Header::with_content_size(format::PXAR_PAYLOAD, file_size),
- )
- .await?;
-
- let payload_data_offset = seq_write_position(&mut self.output).await?;
-
- let meta_size = payload_data_offset - file_offset;
-
- Ok(FileImpl {
- output: &mut self.output,
- goodbye_item: GoodbyeItem {
- hash: format::hash_filename(file_name),
- offset: file_offset,
- size: file_size + meta_size,
- },
- remaining_size: file_size,
- parent: &mut self.state,
- })
- }
-
- pub async fn add_file(
- &mut self,
- metadata: &Metadata,
- file_name: &Path,
- file_size: u64,
- content: &mut dyn SeqRead,
- ) -> io::Result<()> {
- let mut file = self.create_file(metadata, file_name, file_size).await?;
- let mut buf = crate::util::vec_new(4096);
- loop {
- let got = decoder::seq_read(&mut *content, &mut buf).await?;
- if got == 0 {
- break;
- } else {
- file.write_all(&buf[..got]).await?;
- }
- }
- Ok(())
- }
-
- pub async fn add_symlink(
- &mut self,
- metadata: &Metadata,
- file_name: &Path,
- target: &Path,
- ) -> io::Result<()> {
- self.add_file_entry(
- Some(metadata),
- file_name,
- Some((format::PXAR_SYMLINK, target.as_os_str().as_bytes())),
- )
- .await
- }
-
- pub async fn add_hardlink(
- &mut self,
- file_name: &Path,
- target: &Path,
- offset: u64,
- ) -> io::Result<()> {
- let hardlink = format::Hardlink {
- offset,
- data: target.as_os_str().as_bytes().to_vec(),
- };
- let hardlink = unsafe {
- std::slice::from_raw_parts(
- &hardlink as *const format::Hardlink as *const u8,
- size_of::<format::Hardlink>(),
- )
- };
- self.add_file_entry(
- None,
- file_name,
- Some((format::PXAR_HARDLINK, hardlink)),
- )
- .await
- }
-
- pub async fn add_device(
- &mut self,
- metadata: &Metadata,
- file_name: &Path,
- device: format::Device,
- ) -> io::Result<()> {
- if !metadata.is_device() {
- io_bail!("entry added via add_device must have a device mode in its metadata");
- }
-
- let device = device.to_le();
- let device = unsafe {
- std::slice::from_raw_parts(
- &device as *const format::Device as *const u8,
- size_of::<format::Device>(),
- )
- };
- self.add_file_entry(
- Some(metadata),
- file_name,
- Some((format::PXAR_DEVICE, device)),
- )
- .await
- }
-
- pub async fn add_fifo(&mut self, metadata: &Metadata, file_name: &Path) -> io::Result<()> {
- if !metadata.is_fifo() {
- io_bail!("entry added via add_device must be of type fifo in its metadata");
- }
-
- self.add_file_entry(Some(metadata), file_name, None).await
- }
-
- pub async fn add_socket(&mut self, metadata: &Metadata, file_name: &Path) -> io::Result<()> {
- if !metadata.is_socket() {
- io_bail!("entry added via add_device must be of type socket in its metadata");
- }
-
- self.add_file_entry(Some(metadata), file_name, None).await
- }
-
- async fn add_file_entry(
- &mut self,
- metadata: Option<&Metadata>,
- file_name: &Path,
- entry_htype_data: Option<(u64, &[u8])>,
- ) -> io::Result<()> {
- self.check()?;
-
- let file_offset = seq_write_position(&mut self.output).await?;
-
- let file_name = file_name.as_os_str().as_bytes();
-
- self.start_file_do(metadata, file_name).await?;
- if let Some((htype, entry_data)) = entry_htype_data {
- seq_write_pxar_entry_zero(&mut self.output, htype, entry_data).await?;
- }
-
- let end_offset = seq_write_position(&mut self.output).await?;
-
- self.state.items.push(GoodbyeItem {
- hash: format::hash_filename(file_name),
- offset: file_offset,
- size: end_offset - file_offset,
- });
-
- Ok(())
- }
-
- /// Helper
- #[inline]
- async fn position(&mut self) -> io::Result<u64> {
- seq_write_position(&mut self.output).await
- }
-
- pub async fn create_directory<'b>(
- &'b mut self,
- file_name: &Path,
- metadata: &Metadata,
- ) -> io::Result<EncoderImpl<'b, &'b mut dyn SeqWrite>>
- where
- 'a: 'b,
- {
- self.check()?;
-
- if !metadata.is_dir() {
- io_bail!("directory metadata must contain the directory mode flag");
- }
-
- let file_name = file_name.as_os_str().as_bytes();
- let file_hash = format::hash_filename(file_name);
-
- let file_offset = self.position().await?;
- self.encode_filename(file_name).await?;
-
- let entry_offset = self.position().await?;
- self.encode_metadata(&metadata).await?;
-
- let files_offset = self.position().await?;
-
- Ok(EncoderImpl {
- output: self.output.as_trait_object(),
- state: EncoderState {
- entry_offset,
- files_offset,
- file_offset: Some(file_offset),
- file_hash: file_hash,
- ..Default::default()
- },
- parent: Some(&mut self.state),
- finished: false,
- })
- }
-
- async fn start_file_do(
- &mut self,
- metadata: Option<&Metadata>,
- file_name: &[u8],
- ) -> io::Result<()> {
- self.encode_filename(file_name).await?;
- if let Some(metadata) = metadata {
- self.encode_metadata(&metadata).await?;
- }
- Ok(())
- }
-
- async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> {
- seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ENTRY, metadata.stat.clone())
- .await?;
-
- for xattr in &metadata.xattrs {
- self.write_xattr(xattr).await?;
- }
-
- self.write_acls(&metadata.acl).await?;
-
- if let Some(fcaps) = &metadata.fcaps {
- self.write_file_capabilities(fcaps).await?;
- }
-
- if let Some(qpid) = &metadata.quota_project_id {
- self.write_quota_project_id(qpid).await?;
- }
-
- Ok(())
- }
-
- async fn write_xattr(&mut self, xattr: &format::XAttr) -> io::Result<()> {
- seq_write_pxar_entry(&mut self.output, format::PXAR_XATTR, &xattr.data).await
- }
-
- async fn write_acls(&mut self, acl: &crate::Acl) -> io::Result<()> {
- for acl in &acl.users {
- seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_USER, acl.clone())
- .await?;
- }
-
- for acl in &acl.groups {
- seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_GROUP, acl.clone())
- .await?;
- }
-
- if let Some(acl) = &acl.group_obj {
- seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_GROUP_OBJ, acl.clone())
- .await?;
- }
-
- if let Some(acl) = &acl.default {
- seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_DEFAULT, acl.clone())
- .await?;
- }
-
- for acl in &acl.default_users {
- seq_write_pxar_struct_entry(
- &mut self.output,
- format::PXAR_ACL_DEFAULT_USER,
- acl.clone(),
- )
- .await?;
- }
-
- for acl in &acl.default_groups {
- seq_write_pxar_struct_entry(
- &mut self.output,
- format::PXAR_ACL_DEFAULT_GROUP,
- acl.clone(),
- )
- .await?;
- }
-
- Ok(())
- }
-
- async fn write_file_capabilities(&mut self, fcaps: &format::FCaps) -> io::Result<()> {
- seq_write_pxar_entry(&mut self.output, format::PXAR_FCAPS, &fcaps.data).await
- }
-
- async fn write_quota_project_id(
- &mut self,
- quota_project_id: &format::QuotaProjectId,
- ) -> io::Result<()> {
- seq_write_pxar_struct_entry(
- &mut self.output,
- format::PXAR_QUOTA_PROJID,
- quota_project_id.clone(),
- )
- .await
- }
-
- async fn encode_filename(&mut self, file_name: &[u8]) -> io::Result<()> {
- seq_write_pxar_entry_zero(&mut self.output, format::PXAR_FILENAME, file_name).await
- }
-
- pub async fn finish(mut self) -> io::Result<()> {
- let tail_bytes = self.finish_goodbye_table().await?;
- seq_write_pxar_entry(&mut self.output, format::PXAR_GOODBYE, &tail_bytes).await?;
- if let Some(parent) = &mut self.parent {
- let file_offset = self
- .state
- .file_offset
- .expect("internal error: parent set but no file_offset?");
-
- let end_offset = seq_write_position(&mut self.output).await?;
-
- parent.items.push(GoodbyeItem {
- hash: self.state.file_hash,
- offset: file_offset,
- size: end_offset - file_offset,
- });
- }
- self.finished = true;
- Ok(())
- }
-
- async fn finish_goodbye_table(&mut self) -> io::Result<Vec<u8>> {
- let goodbye_offset = seq_write_position(&mut self.output).await?;
-
- // "take" out the tail (to not leave an array of endian-swapped structs in `self`)
- let mut tail = take(&mut self.state.items);
- let tail_size = (tail.len() + 1) * size_of::<GoodbyeItem>();
- let goodbye_size = tail_size as u64 + size_of::<format::Header>() as u64;
-
- // sort, then create a BST
- tail.sort_unstable_by(|a, b| a.hash.cmp(&b.hash));
-
- let mut bst = Vec::with_capacity(tail.len() + 1);
- unsafe {
- bst.set_len(tail.len());
- }
- binary_tree_array::copy(tail.len(), |src, dest| {
- let mut item = tail[src].clone();
- // fixup the goodbye table offsets to be relative and with the right endianess
- item.offset = goodbye_offset - item.offset;
- unsafe {
- std::ptr::write(&mut bst[dest], item.to_le());
- }
- });
- drop(tail);
-
- bst.push(
- GoodbyeItem {
- hash: format::PXAR_GOODBYE_TAIL_MARKER,
- offset: goodbye_offset - self.state.entry_offset,
- size: goodbye_size,
- }
- .to_le(),
- );
-
- // turn this into a byte vector since after endian-swapping we can no longer guarantee that
- // the items make sense:
- let data = bst.as_mut_ptr() as *mut u8;
- let capacity = bst.capacity() * size_of::<GoodbyeItem>();
- forget(bst);
- Ok(unsafe { Vec::from_raw_parts(data, tail_size, capacity) })
- }
-}
-
-/// Writer for a file object in a directory.
-pub struct FileImpl<'a> {
- output: &'a mut dyn SeqWrite,
-
- /// This file's `GoodbyeItem`. FIXME: We currently don't touch this, can we just push it
- /// directly instead of on Drop of FileImpl?
- goodbye_item: GoodbyeItem,
-
- /// While writing data to this file, this is how much space we still have left, this must reach
- /// exactly zero.
- remaining_size: u64,
-
- /// The directory containing this file. This is where we propagate the `IncompleteFile` error
- /// to, and where we insert our `GoodbyeItem`.
- parent: &'a mut EncoderState,
-}
-
-impl<'a> Drop for FileImpl<'a> {
- fn drop(&mut self) {
- if self.remaining_size != 0 {
- self.parent.add_error(EncodeError::IncompleteFile);
- }
-
- self.parent.items.push(self.goodbye_item.clone());
- }
-}
-
-impl<'a> FileImpl<'a> {
- fn check_remaining(&self, size: usize) -> io::Result<()> {
- if size as u64 > self.remaining_size {
- io_bail!("attempted to write more than previously allocated");
- } else {
- Ok(())
- }
- }
-
- /// Poll write interface to more easily connect to tokio/futures.
- #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
- pub fn poll_write(
- self: Pin<&mut Self>,
- cx: &mut Context,
- data: &[u8],
- ) -> Poll<io::Result<usize>> {
- let this = self.get_mut();
- this.check_remaining(data.len())?;
- let output = unsafe { Pin::new_unchecked(&mut *this.output) };
- match output.poll_seq_write(cx, data) {
- Poll::Ready(Ok(put)) => {
- this.remaining_size -= put as u64;
- Poll::Ready(Ok(put))
- }
- other => other,
- }
- }
-
- /// Poll flush interface to more easily connect to tokio/futures.
- #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
- pub fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- unsafe {
- self.map_unchecked_mut(|this| &mut this.output)
- .poll_flush(cx)
- }
- }
-
- /// Poll close/shutdown interface to more easily connect to tokio/futures.
- #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
- pub fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- unsafe {
- self.map_unchecked_mut(|this| &mut this.output)
- .poll_close(cx)
- }
- }
-
- /// Write file data for the current file entry in a pxar archive.
- ///
- /// This forwards to the output's `SeqWrite::poll_seq_write` and may write fewer bytes than
- /// requested. Check the return value for how many. There's also a `write_all` method available
- /// for convenience.
- pub async fn write(&mut self, data: &[u8]) -> io::Result<usize> {
- self.check_remaining(data.len())?;
- let put = seq_write(&mut self.output, data).await?;
- self.remaining_size -= put as u64;
- Ok(put)
- }
-
- /// Completely write file data for the current file entry in a pxar archive.
- pub async fn write_all(&mut self, data: &[u8]) -> io::Result<()> {
- self.check_remaining(data.len())?;
- seq_write_all(&mut self.output, data).await?;
- self.remaining_size -= data.len() as u64;
- Ok(())
- }
-}
-
-#[cfg(feature = "tokio-io")]
-impl<'a> tokio::io::AsyncWrite for FileImpl<'a> {
- fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<io::Result<usize>> {
- FileImpl::poll_write(self, cx, buf)
- }
-
- fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- FileImpl::poll_flush(self, cx)
- }
-
- fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- FileImpl::poll_close(self, cx)
- }
-}
-
-#[cfg(feature = "futures-io")]
-impl<'a> futures::io::AsyncWrite for FileImpl<'a> {
- fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<io::Result<usize>> {
- FileImpl::poll_write(self, cx, buf)
- }
-
- fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- FileImpl::poll_flush(self, cx)
- }
-
- fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
- FileImpl::poll_close(self, cx)
- }
-}
--- /dev/null
+//! The `pxar` encoder state machine.
+//!
+//! This is the implementation used by both the synchronous and async pxar wrappers.
+
+use std::io;
+use std::mem::{forget, size_of, size_of_val, take};
+use std::os::unix::ffi::OsStrExt;
+use std::path::Path;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
+use endian_trait::Endian;
+
+use crate::binary_tree_array;
+use crate::decoder::{self, SeqRead};
+use crate::format::{self, GoodbyeItem};
+use crate::poll_fn::poll_fn;
+use crate::Metadata;
+
+pub mod aio;
+pub mod sync;
+
+#[doc(inline)]
+pub use sync::Encoder;
+
+/// Sequential write interface used by the encoder's state machine.
+///
+/// This is our internal writer trait which is available for `std::io::Write` types in the
+/// synchronous wrapper and for both `tokio` and `future` `AsyncWrite` types in the asynchronous
+/// wrapper.
+pub trait SeqWrite {
+ fn poll_seq_write(
+ self: Pin<&mut Self>,
+ cx: &mut Context,
+ buf: &[u8],
+ ) -> Poll<io::Result<usize>>;
+
+ fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>>;
+
+ fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>>;
+
+ /// While writing to a pxar archive we need to remember how much dat we've written to track some
+ /// offsets. Particularly items like the goodbye table need to be able to compute offsets to
+ /// further back in the archive.
+ fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<u64>>;
+
+ /// To avoid recursively borrowing each time we nest into a subdirectory we add this helper.
+ /// Otherwise starting a subdirectory will get a trait object pointing to `T`, nesting another
+ /// subdirectory in that would have a trait object pointing to the trait object, and so on.
+ fn as_trait_object(&mut self) -> &mut dyn SeqWrite
+ where
+ Self: Sized,
+ {
+ self as &mut dyn SeqWrite
+ }
+}
+
+/// Allow using trait objects for generics taking a `SeqWrite`.
+impl<'a> SeqWrite for &mut (dyn SeqWrite + 'a) {
+ fn poll_seq_write(
+ self: Pin<&mut Self>,
+ cx: &mut Context,
+ buf: &[u8],
+ ) -> Poll<io::Result<usize>> {
+ unsafe {
+ self.map_unchecked_mut(|this| &mut **this)
+ .poll_seq_write(cx, buf)
+ }
+ }
+
+ fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ unsafe { self.map_unchecked_mut(|this| &mut **this).poll_flush(cx) }
+ }
+
+ fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ unsafe { self.map_unchecked_mut(|this| &mut **this).poll_close(cx) }
+ }
+
+ fn poll_position(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<u64>> {
+ unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
+ }
+
+ fn as_trait_object(&mut self) -> &mut dyn SeqWrite
+ where
+ Self: Sized,
+ {
+ &mut **self
+ }
+}
+
+/// awaitable version of `poll_position`.
+async fn seq_write_position<T: SeqWrite + ?Sized>(output: &mut T) -> io::Result<u64> {
+ poll_fn(move |cx| unsafe { Pin::new_unchecked(&mut *output).poll_position(cx) }).await
+}
+
+/// awaitable verison of `poll_seq_write`.
+async fn seq_write<T: SeqWrite + ?Sized>(output: &mut T, buf: &[u8]) -> io::Result<usize> {
+ poll_fn(|cx| unsafe { Pin::new_unchecked(&mut *output).poll_seq_write(cx, buf) }).await
+}
+
+/// Write the entire contents of a buffer, handling short writes.
+async fn seq_write_all<T: SeqWrite + ?Sized>(output: &mut T, mut buf: &[u8]) -> io::Result<()> {
+ while !buf.is_empty() {
+ let got = seq_write(&mut *output, buf).await?;
+ buf = &buf[got..];
+ }
+ Ok(())
+}
+
+/// Write an endian-swappable struct.
+async fn seq_write_struct<E: Endian, T>(output: &mut T, data: E) -> io::Result<()>
+where
+ T: SeqWrite + ?Sized,
+{
+ let data = data.to_le();
+ seq_write_all(output, unsafe {
+ std::slice::from_raw_parts(&data as *const E as *const u8, size_of_val(&data))
+ })
+ .await
+}
+
+/// Write a pxar entry.
+async fn seq_write_pxar_entry<T>(output: &mut T, htype: u64, data: &[u8]) -> io::Result<()>
+where
+ T: SeqWrite + ?Sized,
+{
+ seq_write_struct(
+ &mut *output,
+ format::Header::with_content_size(htype, data.len() as u64),
+ )
+ .await?;
+ seq_write_all(output, data).await
+}
+
+/// Write a pxar entry terminated by an additional zero which is not contained in the provided
+/// data buffer.
+async fn seq_write_pxar_entry_zero<T>(output: &mut T, htype: u64, data: &[u8]) -> io::Result<()>
+where
+ T: SeqWrite + ?Sized,
+{
+ seq_write_struct(
+ &mut *output,
+ format::Header::with_content_size(htype, 1 + data.len() as u64),
+ )
+ .await?;
+ seq_write_all(&mut *output, data).await?;
+ seq_write_all(output, &[0u8]).await
+}
+
+/// Write a pxar entry consiting of an endian-swappable struct.
+async fn seq_write_pxar_struct_entry<E, T>(output: &mut T, htype: u64, data: E) -> io::Result<()>
+where
+ T: SeqWrite + ?Sized,
+ E: Endian,
+{
+ let data = data.to_le();
+ seq_write_pxar_entry(output, htype, unsafe {
+ std::slice::from_raw_parts(&data as *const E as *const u8, size_of_val(&data))
+ })
+ .await
+}
+
+/// Error conditions caused by wrong usage of this crate.
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
+pub enum EncodeError {
+ /// The user dropped a `File` without without finishing writing all of its contents.
+ ///
+ /// This is required because the payload lengths is written out at the begining and decoding
+ /// requires there to follow the right amount of data.
+ IncompleteFile,
+
+ /// The user dropped a directory without finalizing it.
+ ///
+ /// Finalizing is required to build the goodbye table at the end of a directory.
+ IncompleteDirectory,
+}
+
+#[derive(Default)]
+struct EncoderState {
+ /// Goodbye items for this directory, excluding the tail.
+ items: Vec<GoodbyeItem>,
+
+ /// User caused error conditions.
+ encode_error: Option<EncodeError>,
+
+ /// Offset of this directory's ENTRY.
+ entry_offset: u64,
+
+ /// Offset to this directory's first FILENAME.
+ files_offset: u64,
+
+ /// If this is a subdirectory, this points to the this directory's FILENAME.
+ file_offset: Option<u64>,
+
+ /// If this is a subdirectory, this contains this directory's hash for the goodbye item.
+ file_hash: u64,
+}
+
+impl EncoderState {
+ fn merge_error(&mut self, error: Option<EncodeError>) {
+ // one error is enough:
+ if self.encode_error.is_none() {
+ self.encode_error = error;
+ }
+ }
+
+ fn add_error(&mut self, error: EncodeError) {
+ self.merge_error(Some(error));
+ }
+}
+
+/// The encoder state machine implementation for a directory.
+///
+/// We use `async fn` to implement the encoder state machine so that we can easily plug in both
+/// synchronous or `async` I/O objects in as output.
+pub(crate) struct EncoderImpl<'a, T: SeqWrite + 'a> {
+ output: T,
+ state: EncoderState,
+ parent: Option<&'a mut EncoderState>,
+ finished: bool,
+}
+
+impl<'a, T: SeqWrite + 'a> Drop for EncoderImpl<'a, T> {
+ fn drop(&mut self) {
+ if let Some(ref mut parent) = self.parent {
+ // propagate errors:
+ parent.merge_error(self.state.encode_error);
+ if !self.finished {
+ parent.add_error(EncodeError::IncompleteDirectory);
+ }
+ } else if !self.finished {
+ // FIXME: how do we deal with this?
+ // eprintln!("Encoder dropped without finishing!");
+ }
+ }
+}
+
+impl<'a, T: SeqWrite + 'a> EncoderImpl<'a, T> {
+ pub async fn new(output: T, metadata: &Metadata) -> io::Result<EncoderImpl<'a, T>> {
+ if !metadata.is_dir() {
+ io_bail!("directory metadata must contain the directory mode flag");
+ }
+ let mut this = Self {
+ output,
+ state: EncoderState::default(),
+ parent: None,
+ finished: false,
+ };
+
+ this.encode_metadata(metadata).await?;
+ this.state.files_offset = seq_write_position(&mut this.output).await?;
+
+ Ok(this)
+ }
+
+ fn check(&self) -> io::Result<()> {
+ match self.state.encode_error {
+ Some(EncodeError::IncompleteFile) => io_bail!("incomplete file"),
+ Some(EncodeError::IncompleteDirectory) => io_bail!("directory not finalized"),
+ None => Ok(()),
+ }
+ }
+
+ pub async fn create_file<'b>(
+ &'b mut self,
+ metadata: &Metadata,
+ file_name: &Path,
+ file_size: u64,
+ ) -> io::Result<FileImpl<'b>>
+ where
+ 'a: 'b,
+ {
+ self.create_file_do(metadata, file_name.as_os_str().as_bytes(), file_size)
+ .await
+ }
+
+ async fn create_file_do<'b>(
+ &'b mut self,
+ metadata: &Metadata,
+ file_name: &[u8],
+ file_size: u64,
+ ) -> io::Result<FileImpl<'b>>
+ where
+ 'a: 'b,
+ {
+ self.check()?;
+
+ let file_offset = seq_write_position(&mut self.output).await?;
+ self.start_file_do(Some(metadata), file_name).await?;
+
+ seq_write_struct(
+ &mut self.output,
+ format::Header::with_content_size(format::PXAR_PAYLOAD, file_size),
+ )
+ .await?;
+
+ let payload_data_offset = seq_write_position(&mut self.output).await?;
+
+ let meta_size = payload_data_offset - file_offset;
+
+ Ok(FileImpl {
+ output: &mut self.output,
+ goodbye_item: GoodbyeItem {
+ hash: format::hash_filename(file_name),
+ offset: file_offset,
+ size: file_size + meta_size,
+ },
+ remaining_size: file_size,
+ parent: &mut self.state,
+ })
+ }
+
+ pub async fn add_file(
+ &mut self,
+ metadata: &Metadata,
+ file_name: &Path,
+ file_size: u64,
+ content: &mut dyn SeqRead,
+ ) -> io::Result<()> {
+ let mut file = self.create_file(metadata, file_name, file_size).await?;
+ let mut buf = crate::util::vec_new(4096);
+ loop {
+ let got = decoder::seq_read(&mut *content, &mut buf).await?;
+ if got == 0 {
+ break;
+ } else {
+ file.write_all(&buf[..got]).await?;
+ }
+ }
+ Ok(())
+ }
+
+ pub async fn add_symlink(
+ &mut self,
+ metadata: &Metadata,
+ file_name: &Path,
+ target: &Path,
+ ) -> io::Result<()> {
+ self.add_file_entry(
+ Some(metadata),
+ file_name,
+ Some((format::PXAR_SYMLINK, target.as_os_str().as_bytes())),
+ )
+ .await
+ }
+
+ pub async fn add_hardlink(
+ &mut self,
+ file_name: &Path,
+ target: &Path,
+ offset: u64,
+ ) -> io::Result<()> {
+ let hardlink = format::Hardlink {
+ offset,
+ data: target.as_os_str().as_bytes().to_vec(),
+ };
+ let hardlink = unsafe {
+ std::slice::from_raw_parts(
+ &hardlink as *const format::Hardlink as *const u8,
+ size_of::<format::Hardlink>(),
+ )
+ };
+ self.add_file_entry(
+ None,
+ file_name,
+ Some((format::PXAR_HARDLINK, hardlink)),
+ )
+ .await
+ }
+
+ pub async fn add_device(
+ &mut self,
+ metadata: &Metadata,
+ file_name: &Path,
+ device: format::Device,
+ ) -> io::Result<()> {
+ if !metadata.is_device() {
+ io_bail!("entry added via add_device must have a device mode in its metadata");
+ }
+
+ let device = device.to_le();
+ let device = unsafe {
+ std::slice::from_raw_parts(
+ &device as *const format::Device as *const u8,
+ size_of::<format::Device>(),
+ )
+ };
+ self.add_file_entry(
+ Some(metadata),
+ file_name,
+ Some((format::PXAR_DEVICE, device)),
+ )
+ .await
+ }
+
+ pub async fn add_fifo(&mut self, metadata: &Metadata, file_name: &Path) -> io::Result<()> {
+ if !metadata.is_fifo() {
+ io_bail!("entry added via add_device must be of type fifo in its metadata");
+ }
+
+ self.add_file_entry(Some(metadata), file_name, None).await
+ }
+
+ pub async fn add_socket(&mut self, metadata: &Metadata, file_name: &Path) -> io::Result<()> {
+ if !metadata.is_socket() {
+ io_bail!("entry added via add_device must be of type socket in its metadata");
+ }
+
+ self.add_file_entry(Some(metadata), file_name, None).await
+ }
+
+ async fn add_file_entry(
+ &mut self,
+ metadata: Option<&Metadata>,
+ file_name: &Path,
+ entry_htype_data: Option<(u64, &[u8])>,
+ ) -> io::Result<()> {
+ self.check()?;
+
+ let file_offset = seq_write_position(&mut self.output).await?;
+
+ let file_name = file_name.as_os_str().as_bytes();
+
+ self.start_file_do(metadata, file_name).await?;
+ if let Some((htype, entry_data)) = entry_htype_data {
+ seq_write_pxar_entry_zero(&mut self.output, htype, entry_data).await?;
+ }
+
+ let end_offset = seq_write_position(&mut self.output).await?;
+
+ self.state.items.push(GoodbyeItem {
+ hash: format::hash_filename(file_name),
+ offset: file_offset,
+ size: end_offset - file_offset,
+ });
+
+ Ok(())
+ }
+
+ /// Helper
+ #[inline]
+ async fn position(&mut self) -> io::Result<u64> {
+ seq_write_position(&mut self.output).await
+ }
+
+ pub async fn create_directory<'b>(
+ &'b mut self,
+ file_name: &Path,
+ metadata: &Metadata,
+ ) -> io::Result<EncoderImpl<'b, &'b mut dyn SeqWrite>>
+ where
+ 'a: 'b,
+ {
+ self.check()?;
+
+ if !metadata.is_dir() {
+ io_bail!("directory metadata must contain the directory mode flag");
+ }
+
+ let file_name = file_name.as_os_str().as_bytes();
+ let file_hash = format::hash_filename(file_name);
+
+ let file_offset = self.position().await?;
+ self.encode_filename(file_name).await?;
+
+ let entry_offset = self.position().await?;
+ self.encode_metadata(&metadata).await?;
+
+ let files_offset = self.position().await?;
+
+ Ok(EncoderImpl {
+ output: self.output.as_trait_object(),
+ state: EncoderState {
+ entry_offset,
+ files_offset,
+ file_offset: Some(file_offset),
+ file_hash: file_hash,
+ ..Default::default()
+ },
+ parent: Some(&mut self.state),
+ finished: false,
+ })
+ }
+
+ async fn start_file_do(
+ &mut self,
+ metadata: Option<&Metadata>,
+ file_name: &[u8],
+ ) -> io::Result<()> {
+ self.encode_filename(file_name).await?;
+ if let Some(metadata) = metadata {
+ self.encode_metadata(&metadata).await?;
+ }
+ Ok(())
+ }
+
+ async fn encode_metadata(&mut self, metadata: &Metadata) -> io::Result<()> {
+ seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ENTRY, metadata.stat.clone())
+ .await?;
+
+ for xattr in &metadata.xattrs {
+ self.write_xattr(xattr).await?;
+ }
+
+ self.write_acls(&metadata.acl).await?;
+
+ if let Some(fcaps) = &metadata.fcaps {
+ self.write_file_capabilities(fcaps).await?;
+ }
+
+ if let Some(qpid) = &metadata.quota_project_id {
+ self.write_quota_project_id(qpid).await?;
+ }
+
+ Ok(())
+ }
+
+ async fn write_xattr(&mut self, xattr: &format::XAttr) -> io::Result<()> {
+ seq_write_pxar_entry(&mut self.output, format::PXAR_XATTR, &xattr.data).await
+ }
+
+ async fn write_acls(&mut self, acl: &crate::Acl) -> io::Result<()> {
+ for acl in &acl.users {
+ seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_USER, acl.clone())
+ .await?;
+ }
+
+ for acl in &acl.groups {
+ seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_GROUP, acl.clone())
+ .await?;
+ }
+
+ if let Some(acl) = &acl.group_obj {
+ seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_GROUP_OBJ, acl.clone())
+ .await?;
+ }
+
+ if let Some(acl) = &acl.default {
+ seq_write_pxar_struct_entry(&mut self.output, format::PXAR_ACL_DEFAULT, acl.clone())
+ .await?;
+ }
+
+ for acl in &acl.default_users {
+ seq_write_pxar_struct_entry(
+ &mut self.output,
+ format::PXAR_ACL_DEFAULT_USER,
+ acl.clone(),
+ )
+ .await?;
+ }
+
+ for acl in &acl.default_groups {
+ seq_write_pxar_struct_entry(
+ &mut self.output,
+ format::PXAR_ACL_DEFAULT_GROUP,
+ acl.clone(),
+ )
+ .await?;
+ }
+
+ Ok(())
+ }
+
+ async fn write_file_capabilities(&mut self, fcaps: &format::FCaps) -> io::Result<()> {
+ seq_write_pxar_entry(&mut self.output, format::PXAR_FCAPS, &fcaps.data).await
+ }
+
+ async fn write_quota_project_id(
+ &mut self,
+ quota_project_id: &format::QuotaProjectId,
+ ) -> io::Result<()> {
+ seq_write_pxar_struct_entry(
+ &mut self.output,
+ format::PXAR_QUOTA_PROJID,
+ quota_project_id.clone(),
+ )
+ .await
+ }
+
+ async fn encode_filename(&mut self, file_name: &[u8]) -> io::Result<()> {
+ seq_write_pxar_entry_zero(&mut self.output, format::PXAR_FILENAME, file_name).await
+ }
+
+ pub async fn finish(mut self) -> io::Result<()> {
+ let tail_bytes = self.finish_goodbye_table().await?;
+ seq_write_pxar_entry(&mut self.output, format::PXAR_GOODBYE, &tail_bytes).await?;
+ if let Some(parent) = &mut self.parent {
+ let file_offset = self
+ .state
+ .file_offset
+ .expect("internal error: parent set but no file_offset?");
+
+ let end_offset = seq_write_position(&mut self.output).await?;
+
+ parent.items.push(GoodbyeItem {
+ hash: self.state.file_hash,
+ offset: file_offset,
+ size: end_offset - file_offset,
+ });
+ }
+ self.finished = true;
+ Ok(())
+ }
+
+ async fn finish_goodbye_table(&mut self) -> io::Result<Vec<u8>> {
+ let goodbye_offset = seq_write_position(&mut self.output).await?;
+
+ // "take" out the tail (to not leave an array of endian-swapped structs in `self`)
+ let mut tail = take(&mut self.state.items);
+ let tail_size = (tail.len() + 1) * size_of::<GoodbyeItem>();
+ let goodbye_size = tail_size as u64 + size_of::<format::Header>() as u64;
+
+ // sort, then create a BST
+ tail.sort_unstable_by(|a, b| a.hash.cmp(&b.hash));
+
+ let mut bst = Vec::with_capacity(tail.len() + 1);
+ unsafe {
+ bst.set_len(tail.len());
+ }
+ binary_tree_array::copy(tail.len(), |src, dest| {
+ let mut item = tail[src].clone();
+ // fixup the goodbye table offsets to be relative and with the right endianess
+ item.offset = goodbye_offset - item.offset;
+ unsafe {
+ std::ptr::write(&mut bst[dest], item.to_le());
+ }
+ });
+ drop(tail);
+
+ bst.push(
+ GoodbyeItem {
+ hash: format::PXAR_GOODBYE_TAIL_MARKER,
+ offset: goodbye_offset - self.state.entry_offset,
+ size: goodbye_size,
+ }
+ .to_le(),
+ );
+
+ // turn this into a byte vector since after endian-swapping we can no longer guarantee that
+ // the items make sense:
+ let data = bst.as_mut_ptr() as *mut u8;
+ let capacity = bst.capacity() * size_of::<GoodbyeItem>();
+ forget(bst);
+ Ok(unsafe { Vec::from_raw_parts(data, tail_size, capacity) })
+ }
+}
+
+/// Writer for a file object in a directory.
+pub struct FileImpl<'a> {
+ output: &'a mut dyn SeqWrite,
+
+ /// This file's `GoodbyeItem`. FIXME: We currently don't touch this, can we just push it
+ /// directly instead of on Drop of FileImpl?
+ goodbye_item: GoodbyeItem,
+
+ /// While writing data to this file, this is how much space we still have left, this must reach
+ /// exactly zero.
+ remaining_size: u64,
+
+ /// The directory containing this file. This is where we propagate the `IncompleteFile` error
+ /// to, and where we insert our `GoodbyeItem`.
+ parent: &'a mut EncoderState,
+}
+
+impl<'a> Drop for FileImpl<'a> {
+ fn drop(&mut self) {
+ if self.remaining_size != 0 {
+ self.parent.add_error(EncodeError::IncompleteFile);
+ }
+
+ self.parent.items.push(self.goodbye_item.clone());
+ }
+}
+
+impl<'a> FileImpl<'a> {
+ fn check_remaining(&self, size: usize) -> io::Result<()> {
+ if size as u64 > self.remaining_size {
+ io_bail!("attempted to write more than previously allocated");
+ } else {
+ Ok(())
+ }
+ }
+
+ /// Poll write interface to more easily connect to tokio/futures.
+ #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
+ pub fn poll_write(
+ self: Pin<&mut Self>,
+ cx: &mut Context,
+ data: &[u8],
+ ) -> Poll<io::Result<usize>> {
+ let this = self.get_mut();
+ this.check_remaining(data.len())?;
+ let output = unsafe { Pin::new_unchecked(&mut *this.output) };
+ match output.poll_seq_write(cx, data) {
+ Poll::Ready(Ok(put)) => {
+ this.remaining_size -= put as u64;
+ Poll::Ready(Ok(put))
+ }
+ other => other,
+ }
+ }
+
+ /// Poll flush interface to more easily connect to tokio/futures.
+ #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
+ pub fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ unsafe {
+ self.map_unchecked_mut(|this| &mut this.output)
+ .poll_flush(cx)
+ }
+ }
+
+ /// Poll close/shutdown interface to more easily connect to tokio/futures.
+ #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
+ pub fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ unsafe {
+ self.map_unchecked_mut(|this| &mut this.output)
+ .poll_close(cx)
+ }
+ }
+
+ /// Write file data for the current file entry in a pxar archive.
+ ///
+ /// This forwards to the output's `SeqWrite::poll_seq_write` and may write fewer bytes than
+ /// requested. Check the return value for how many. There's also a `write_all` method available
+ /// for convenience.
+ pub async fn write(&mut self, data: &[u8]) -> io::Result<usize> {
+ self.check_remaining(data.len())?;
+ let put = seq_write(&mut self.output, data).await?;
+ self.remaining_size -= put as u64;
+ Ok(put)
+ }
+
+ /// Completely write file data for the current file entry in a pxar archive.
+ pub async fn write_all(&mut self, data: &[u8]) -> io::Result<()> {
+ self.check_remaining(data.len())?;
+ seq_write_all(&mut self.output, data).await?;
+ self.remaining_size -= data.len() as u64;
+ Ok(())
+ }
+}
+
+#[cfg(feature = "tokio-io")]
+impl<'a> tokio::io::AsyncWrite for FileImpl<'a> {
+ fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<io::Result<usize>> {
+ FileImpl::poll_write(self, cx, buf)
+ }
+
+ fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ FileImpl::poll_flush(self, cx)
+ }
+
+ fn poll_shutdown(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ FileImpl::poll_close(self, cx)
+ }
+}
+
+#[cfg(feature = "futures-io")]
+impl<'a> futures::io::AsyncWrite for FileImpl<'a> {
+ fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<io::Result<usize>> {
+ FileImpl::poll_write(self, cx, buf)
+ }
+
+ fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ FileImpl::poll_flush(self, cx)
+ }
+
+ fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<io::Result<()>> {
+ FileImpl::poll_close(self, cx)
+ }
+}
+++ /dev/null
-//! *pxar* binary format definition
-//!
-//! Please note the all values are stored in little endian ordering.
-//!
-//! The Archive contains a list of items. Each item starts with a `Header`, followed by the
-//! item data.
-
-use std::cmp::Ordering;
-use std::ffi::{CStr, OsStr};
-use std::io;
-use std::mem::size_of;
-use std::os::unix::ffi::OsStrExt;
-use std::path::Path;
-
-use endian_trait::Endian;
-use siphasher::sip::SipHasher24;
-
-pub mod acl;
-
-/// While these constants correspond to `libc::S_` constants, we need these to be fixed for the
-/// format itself, so we redefine them here.
-///
-/// Additionally this gets rid of a bunch of casts between u32 and u64.
-///
-/// You can usually find the values for these in `/usr/include/linux/stat.h`.
-#[rustfmt::skip]
-pub mod mode {
- pub const IFMT : u64 = 0o0170000;
-
- pub const IFSOCK : u64 = 0o0140000;
- pub const IFLNK : u64 = 0o0120000;
- pub const IFREG : u64 = 0o0100000;
- pub const IFBLK : u64 = 0o0060000;
- pub const IFDIR : u64 = 0o0040000;
- pub const IFCHR : u64 = 0o0020000;
- pub const IFIFO : u64 = 0o0010000;
-
- pub const ISUID : u64 = 0o0004000;
- pub const ISGID : u64 = 0o0002000;
- pub const ISVTX : u64 = 0o0001000;
-}
-
-pub const PXAR_ENTRY: u64 = 0x1396fabcea5bbb51;
-pub const PXAR_FILENAME: u64 = 0x6dbb6ebcb3161f0b;
-pub const PXAR_SYMLINK: u64 = 0x664a6fb6830e0d6c;
-pub const PXAR_DEVICE: u64 = 0xac3dace369dfe643;
-pub const PXAR_XATTR: u64 = 0xb8157091f80bc486;
-pub const PXAR_ACL_USER: u64 = 0x297dc88b2ef12faf;
-pub const PXAR_ACL_GROUP: u64 = 0x36f2acb56cb3dd0b;
-pub const PXAR_ACL_GROUP_OBJ: u64 = 0x23047110441f38f3;
-pub const PXAR_ACL_DEFAULT: u64 = 0xfe3eeda6823c8cd0;
-pub const PXAR_ACL_DEFAULT_USER: u64 = 0xbdf03df9bd010a91;
-pub const PXAR_ACL_DEFAULT_GROUP: u64 = 0xa0cb1168782d1f51;
-pub const PXAR_FCAPS: u64 = 0xf7267db0afed0629;
-pub const PXAR_QUOTA_PROJID: u64 = 0x161baf2d8772a72b;
-
-/// Marks item as hardlink
-/// compute_goodbye_hash(b"__PROXMOX_FORMAT_HARDLINK__");
-pub const PXAR_HARDLINK: u64 = 0x2c5e06f634f65b86;
-/// Marks the beginnig of the payload (actual content) of regular files
-pub const PXAR_PAYLOAD: u64 = 0x8b9e1d93d6dcffc9;
-/// Marks item as entry of goodbye table
-pub const PXAR_GOODBYE: u64 = 0xdfd35c5e8327c403;
-/// The end marker used in the GOODBYE object
-pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0x57446fa533702943;
-
-#[derive(Debug, Endian)]
-#[repr(C)]
-pub struct Header {
- /// The item type (see `PXAR_` constants).
- pub htype: u64,
- /// The size of the item, including the size of `Header`.
- full_size: u64,
-}
-
-impl Header {
- #[inline]
- pub fn with_full_size(htype: u64, full_size: u64) -> Self {
- Self { htype, full_size }
- }
-
- #[inline]
- pub fn with_content_size(htype: u64, content_size: u64) -> Self {
- Self::with_full_size(htype, content_size + size_of::<Header>() as u64)
- }
-
- #[inline]
- pub fn full_size(&self) -> u64 {
- self.full_size
- }
-
- #[inline]
- pub fn content_size(&self) -> u64 {
- self.full_size() - (size_of::<Self>() as u64)
- }
-}
-
-#[derive(Clone, Debug, Default, Endian)]
-#[repr(C)]
-pub struct Entry {
- pub mode: u64,
- pub flags: u64,
- pub uid: u32,
- pub gid: u32,
- pub mtime: u64,
-}
-
-/// Builder pattern methods.
-impl Entry {
- pub const fn mode(self, mode: u64) -> Self {
- Self { mode, ..self }
- }
-
- pub const fn flags(self, flags: u64) -> Self {
- Self { flags, ..self }
- }
-
- pub const fn uid(self, uid: u32) -> Self {
- Self { uid, ..self }
- }
-
- pub const fn gid(self, gid: u32) -> Self {
- Self { gid, ..self }
- }
-
- pub const fn mtime(self, mtime: u64) -> Self {
- Self { mtime, ..self }
- }
-
- pub const fn set_dir(self) -> Self {
- let mode = self.mode;
- self.mode((mode & !mode::IFMT) | mode::IFDIR)
- }
-
- pub const fn set_regular_file(self) -> Self {
- let mode = self.mode;
- self.mode((mode & !mode::IFMT) | mode::IFREG)
- }
-
- pub const fn set_symlink(self) -> Self {
- let mode = self.mode;
- self.mode((mode & !mode::IFMT) | mode::IFLNK)
- }
-
- pub const fn set_blockdev(self) -> Self {
- let mode = self.mode;
- self.mode((mode & !mode::IFMT) | mode::IFBLK)
- }
-
- pub const fn set_chardev(self) -> Self {
- let mode = self.mode;
- self.mode((mode & !mode::IFMT) | mode::IFCHR)
- }
-
- pub const fn set_fifo(self) -> Self {
- let mode = self.mode;
- self.mode((mode & !mode::IFMT) | mode::IFIFO)
- }
-}
-
-/// Convenience accessor methods.
-impl Entry {
- /// Get the mtime as duration since the epoch.
- pub fn mtime_as_duration(&self) -> std::time::Duration {
- std::time::Duration::from_nanos(self.mtime)
- }
-
- /// Get the file type portion of the mode bitfield.
- pub fn get_file_bits(&self) -> u64 {
- self.mode & mode::IFMT
- }
-
- /// Get the permission portion of the mode bitfield.
- pub fn get_permission_bits(&self) -> u64 {
- self.mode & !mode::IFMT
- }
-}
-
-/// Convenience methods.
-impl Entry {
- /// Get the file type (`mode & mode::IFMT`).
- pub fn file_type(&self) -> u64 {
- self.mode & mode::IFMT
- }
-
- /// Get the file mode bits (`mode & !mode::IFMT`).
- pub fn file_mode(&self) -> u64 {
- self.mode & !mode::IFMT
- }
-
- /// Check whether this is a directory.
- pub fn is_dir(&self) -> bool {
- (self.mode & mode::IFMT) == mode::IFDIR
- }
-
- /// Check whether this is a symbolic link.
- pub fn is_symlink(&self) -> bool {
- (self.mode & mode::IFMT) == mode::IFLNK
- }
-
- /// Check whether this is a device node.
- pub fn is_device(&self) -> bool {
- let fmt = self.mode & mode::IFMT;
- fmt == mode::IFCHR || fmt == mode::IFBLK
- }
-
- /// Check whether this is a block device node.
- pub fn is_blockdev(&self) -> bool {
- let fmt = self.mode & mode::IFMT;
- fmt == mode::IFBLK
- }
-
- /// Check whether this is a character device node.
- pub fn is_chardev(&self) -> bool {
- let fmt = self.mode & mode::IFMT;
- fmt == mode::IFCHR
- }
-
- /// Check whether this is a regular file.
- pub fn is_regular_file(&self) -> bool {
- (self.mode & mode::IFMT) == mode::IFREG
- }
-
- /// Check whether this is a named pipe (FIFO).
- pub fn is_fifo(&self) -> bool {
- (self.mode & mode::IFMT) == mode::IFIFO
- }
-
- /// Check whether this is a named socket.
- pub fn is_socket(&self) -> bool {
- (self.mode & mode::IFMT) == mode::IFSOCK
- }
-}
-
-impl From<&std::fs::Metadata> for Entry {
- fn from(meta: &std::fs::Metadata) -> Entry {
- #[cfg(unix)]
- use std::os::unix::fs::MetadataExt;
-
- let this = Entry::default();
-
- #[cfg(unix)]
- let this = this
- .uid(meta.uid())
- .gid(meta.gid())
- .mode(meta.mode() as u64)
- .mtime(meta.mtime() as u64);
-
- let file_type = meta.file_type();
- let mode = this.mode;
- let this = if file_type.is_dir() {
- this.mode(mode | mode::IFDIR)
- } else if file_type.is_symlink() {
- this.mode(mode | mode::IFLNK)
- } else {
- this.mode(mode | mode::IFREG)
- };
-
- this
- }
-}
-
-#[derive(Clone, Debug)]
-pub struct Filename {
- pub name: Vec<u8>,
-}
-
-#[derive(Clone, Debug)]
-pub struct Symlink {
- pub data: Vec<u8>,
-}
-
-impl Symlink {
- pub fn as_os_str(&self) -> &OsStr {
- self.as_ref()
- }
-}
-
-impl AsRef<[u8]> for Symlink {
- fn as_ref(&self) -> &[u8] {
- &self.data
- }
-}
-
-impl AsRef<OsStr> for Symlink {
- fn as_ref(&self) -> &OsStr {
- OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
- }
-}
-
-#[derive(Clone, Debug)]
-pub struct Hardlink {
- pub offset: u64,
- pub data: Vec<u8>,
-}
-
-impl Hardlink {
- pub fn as_os_str(&self) -> &OsStr {
- self.as_ref()
- }
-}
-
-impl AsRef<[u8]> for Hardlink {
- fn as_ref(&self) -> &[u8] {
- &self.data
- }
-}
-
-impl AsRef<OsStr> for Hardlink {
- fn as_ref(&self) -> &OsStr {
- OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
- }
-}
-
-#[derive(Clone, Debug, Eq)]
-#[repr(C)]
-pub struct XAttr {
- pub(crate) data: Vec<u8>,
- pub(crate) name_len: usize,
-}
-
-impl XAttr {
- pub fn new<N: AsRef<[u8]>, V: AsRef<[u8]>>(name: N, value: V) -> Self {
- let name = name.as_ref();
- let value = value.as_ref();
- let mut data = Vec::with_capacity(name.len() + value.len() + 1);
- data.extend(name);
- data.push(0);
- data.extend(value);
- Self {
- data,
- name_len: name.len(),
- }
- }
-
- pub fn name(&self) -> &CStr {
- unsafe { CStr::from_bytes_with_nul_unchecked(&self.data[..self.name_len + 1]) }
- }
-
- pub fn value(&self) -> &[u8] {
- &self.data[(self.name_len + 1)..]
- }
-}
-
-impl Ord for XAttr {
- fn cmp(&self, other: &XAttr) -> Ordering {
- self.name().cmp(&other.name())
- }
-}
-
-impl PartialOrd for XAttr {
- fn partial_cmp(&self, other: &XAttr) -> Option<Ordering> {
- Some(self.cmp(other))
- }
-}
-
-impl PartialEq for XAttr {
- fn eq(&self, other: &XAttr) -> bool {
- self.name() == other.name()
- }
-}
-
-#[derive(Clone, Debug, Endian)]
-#[repr(C)]
-pub struct Device {
- pub major: u64,
- pub minor: u64,
-}
-
-#[cfg(target_os = "linux")]
-impl Device {
- /// Get a `dev_t` value for this device.
- #[rustfmt::skip]
- pub fn to_dev_t(&self) -> u64 {
- // see bits/sysmacros.h
- ((self.major & 0x0000_0fff) << 8) |
- ((self.major & 0xffff_f000) << 32) |
- (self.minor & 0x0000_00ff) |
- ((self.minor & 0xffff_ff00) << 12)
- }
-
- /// Get a `Device` from a `dev_t` value.
- #[rustfmt::skip]
- pub fn from_dev_t(dev: u64) -> Self {
- // see to_dev_t
- Self {
- major: (dev >> 8) & 0x0000_0fff |
- (dev >> 32) & 0xffff_f000,
- minor: dev & 0x0000_00ff |
- (dev >> 12) & 0xffff_ff00,
- }
- }
-}
-
-#[cfg(all(test, target_os = "linux"))]
-#[test]
-fn test_linux_devices() {
- let c_dev = unsafe { ::libc::makedev(0xabcd_1234, 0xdcba_5678) };
- let dev = Device::from_dev_t(c_dev);
- assert_eq!(dev.to_dev_t(), c_dev);
-}
-
-#[derive(Clone, Debug)]
-#[repr(C)]
-pub struct FCaps {
- pub data: Vec<u8>,
-}
-
-#[derive(Clone, Copy, Debug, Endian)]
-#[repr(C)]
-pub struct QuotaProjectId {
- pub projid: u64,
-}
-
-#[derive(Clone, Debug, Endian)]
-#[repr(C)]
-pub struct GoodbyeItem {
- /// SipHash24 of the directory item name. The last GOODBYE item uses the special hash value
- /// `PXAR_GOODBYE_TAIL_MARKER`.
- pub hash: u64,
-
- /// The offset from the start of the GOODBYE object to the start of the matching directory item
- /// (point to a FILENAME). The last GOODBYE item points to the start of the matching ENTRY
- /// object.
- pub offset: u64,
-
- /// The overall size of the directory item. This includes the FILENAME header. In other words,
- /// `goodbye_start - offset + size` points to the end of the directory.
- ///
- /// The last GOODBYE item repeats the size of the GOODBYE item.
- pub size: u64,
-}
-
-impl GoodbyeItem {
- pub fn new(name: &[u8], offset: u64, size: u64) -> Self {
- let hash = hash_filename(name);
- Self { hash, offset, size }
- }
-}
-
-pub fn hash_filename(name: &[u8]) -> u64 {
- use std::hash::Hasher;
- let mut hasher = SipHasher24::new_with_keys(0x8574442b0f1d84b3, 0x2736ed30d1c22ec1);
- hasher.write(name);
- hasher.finish()
-}
-
-pub fn path_is_legal_component(path: &Path) -> bool {
- let mut components = path.components();
- match components.next() {
- Some(std::path::Component::Normal(_)) => (),
- _ => return false,
- }
- components.next().is_none()
-}
-
-pub fn check_file_name(path: &Path) -> io::Result<()> {
- if !path_is_legal_component(path) {
- io_bail!("invalid file name in archive: {:?}", path);
- } else {
- Ok(())
- }
-}
--- /dev/null
+//! *pxar* binary format definition
+//!
+//! Please note the all values are stored in little endian ordering.
+//!
+//! The Archive contains a list of items. Each item starts with a `Header`, followed by the
+//! item data.
+
+use std::cmp::Ordering;
+use std::ffi::{CStr, OsStr};
+use std::io;
+use std::mem::size_of;
+use std::os::unix::ffi::OsStrExt;
+use std::path::Path;
+
+use endian_trait::Endian;
+use siphasher::sip::SipHasher24;
+
+pub mod acl;
+
+/// While these constants correspond to `libc::S_` constants, we need these to be fixed for the
+/// format itself, so we redefine them here.
+///
+/// Additionally this gets rid of a bunch of casts between u32 and u64.
+///
+/// You can usually find the values for these in `/usr/include/linux/stat.h`.
+#[rustfmt::skip]
+pub mod mode {
+ pub const IFMT : u64 = 0o0170000;
+
+ pub const IFSOCK : u64 = 0o0140000;
+ pub const IFLNK : u64 = 0o0120000;
+ pub const IFREG : u64 = 0o0100000;
+ pub const IFBLK : u64 = 0o0060000;
+ pub const IFDIR : u64 = 0o0040000;
+ pub const IFCHR : u64 = 0o0020000;
+ pub const IFIFO : u64 = 0o0010000;
+
+ pub const ISUID : u64 = 0o0004000;
+ pub const ISGID : u64 = 0o0002000;
+ pub const ISVTX : u64 = 0o0001000;
+}
+
+pub const PXAR_ENTRY: u64 = 0x1396fabcea5bbb51;
+pub const PXAR_FILENAME: u64 = 0x6dbb6ebcb3161f0b;
+pub const PXAR_SYMLINK: u64 = 0x664a6fb6830e0d6c;
+pub const PXAR_DEVICE: u64 = 0xac3dace369dfe643;
+pub const PXAR_XATTR: u64 = 0xb8157091f80bc486;
+pub const PXAR_ACL_USER: u64 = 0x297dc88b2ef12faf;
+pub const PXAR_ACL_GROUP: u64 = 0x36f2acb56cb3dd0b;
+pub const PXAR_ACL_GROUP_OBJ: u64 = 0x23047110441f38f3;
+pub const PXAR_ACL_DEFAULT: u64 = 0xfe3eeda6823c8cd0;
+pub const PXAR_ACL_DEFAULT_USER: u64 = 0xbdf03df9bd010a91;
+pub const PXAR_ACL_DEFAULT_GROUP: u64 = 0xa0cb1168782d1f51;
+pub const PXAR_FCAPS: u64 = 0xf7267db0afed0629;
+pub const PXAR_QUOTA_PROJID: u64 = 0x161baf2d8772a72b;
+
+/// Marks item as hardlink
+/// compute_goodbye_hash(b"__PROXMOX_FORMAT_HARDLINK__");
+pub const PXAR_HARDLINK: u64 = 0x2c5e06f634f65b86;
+/// Marks the beginnig of the payload (actual content) of regular files
+pub const PXAR_PAYLOAD: u64 = 0x8b9e1d93d6dcffc9;
+/// Marks item as entry of goodbye table
+pub const PXAR_GOODBYE: u64 = 0xdfd35c5e8327c403;
+/// The end marker used in the GOODBYE object
+pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0x57446fa533702943;
+
+#[derive(Debug, Endian)]
+#[repr(C)]
+pub struct Header {
+ /// The item type (see `PXAR_` constants).
+ pub htype: u64,
+ /// The size of the item, including the size of `Header`.
+ full_size: u64,
+}
+
+impl Header {
+ #[inline]
+ pub fn with_full_size(htype: u64, full_size: u64) -> Self {
+ Self { htype, full_size }
+ }
+
+ #[inline]
+ pub fn with_content_size(htype: u64, content_size: u64) -> Self {
+ Self::with_full_size(htype, content_size + size_of::<Header>() as u64)
+ }
+
+ #[inline]
+ pub fn full_size(&self) -> u64 {
+ self.full_size
+ }
+
+ #[inline]
+ pub fn content_size(&self) -> u64 {
+ self.full_size() - (size_of::<Self>() as u64)
+ }
+}
+
+#[derive(Clone, Debug, Default, Endian)]
+#[repr(C)]
+pub struct Entry {
+ pub mode: u64,
+ pub flags: u64,
+ pub uid: u32,
+ pub gid: u32,
+ pub mtime: u64,
+}
+
+/// Builder pattern methods.
+impl Entry {
+ pub const fn mode(self, mode: u64) -> Self {
+ Self { mode, ..self }
+ }
+
+ pub const fn flags(self, flags: u64) -> Self {
+ Self { flags, ..self }
+ }
+
+ pub const fn uid(self, uid: u32) -> Self {
+ Self { uid, ..self }
+ }
+
+ pub const fn gid(self, gid: u32) -> Self {
+ Self { gid, ..self }
+ }
+
+ pub const fn mtime(self, mtime: u64) -> Self {
+ Self { mtime, ..self }
+ }
+
+ pub const fn set_dir(self) -> Self {
+ let mode = self.mode;
+ self.mode((mode & !mode::IFMT) | mode::IFDIR)
+ }
+
+ pub const fn set_regular_file(self) -> Self {
+ let mode = self.mode;
+ self.mode((mode & !mode::IFMT) | mode::IFREG)
+ }
+
+ pub const fn set_symlink(self) -> Self {
+ let mode = self.mode;
+ self.mode((mode & !mode::IFMT) | mode::IFLNK)
+ }
+
+ pub const fn set_blockdev(self) -> Self {
+ let mode = self.mode;
+ self.mode((mode & !mode::IFMT) | mode::IFBLK)
+ }
+
+ pub const fn set_chardev(self) -> Self {
+ let mode = self.mode;
+ self.mode((mode & !mode::IFMT) | mode::IFCHR)
+ }
+
+ pub const fn set_fifo(self) -> Self {
+ let mode = self.mode;
+ self.mode((mode & !mode::IFMT) | mode::IFIFO)
+ }
+}
+
+/// Convenience accessor methods.
+impl Entry {
+ /// Get the mtime as duration since the epoch.
+ pub fn mtime_as_duration(&self) -> std::time::Duration {
+ std::time::Duration::from_nanos(self.mtime)
+ }
+
+ /// Get the file type portion of the mode bitfield.
+ pub fn get_file_bits(&self) -> u64 {
+ self.mode & mode::IFMT
+ }
+
+ /// Get the permission portion of the mode bitfield.
+ pub fn get_permission_bits(&self) -> u64 {
+ self.mode & !mode::IFMT
+ }
+}
+
+/// Convenience methods.
+impl Entry {
+ /// Get the file type (`mode & mode::IFMT`).
+ pub fn file_type(&self) -> u64 {
+ self.mode & mode::IFMT
+ }
+
+ /// Get the file mode bits (`mode & !mode::IFMT`).
+ pub fn file_mode(&self) -> u64 {
+ self.mode & !mode::IFMT
+ }
+
+ /// Check whether this is a directory.
+ pub fn is_dir(&self) -> bool {
+ (self.mode & mode::IFMT) == mode::IFDIR
+ }
+
+ /// Check whether this is a symbolic link.
+ pub fn is_symlink(&self) -> bool {
+ (self.mode & mode::IFMT) == mode::IFLNK
+ }
+
+ /// Check whether this is a device node.
+ pub fn is_device(&self) -> bool {
+ let fmt = self.mode & mode::IFMT;
+ fmt == mode::IFCHR || fmt == mode::IFBLK
+ }
+
+ /// Check whether this is a block device node.
+ pub fn is_blockdev(&self) -> bool {
+ let fmt = self.mode & mode::IFMT;
+ fmt == mode::IFBLK
+ }
+
+ /// Check whether this is a character device node.
+ pub fn is_chardev(&self) -> bool {
+ let fmt = self.mode & mode::IFMT;
+ fmt == mode::IFCHR
+ }
+
+ /// Check whether this is a regular file.
+ pub fn is_regular_file(&self) -> bool {
+ (self.mode & mode::IFMT) == mode::IFREG
+ }
+
+ /// Check whether this is a named pipe (FIFO).
+ pub fn is_fifo(&self) -> bool {
+ (self.mode & mode::IFMT) == mode::IFIFO
+ }
+
+ /// Check whether this is a named socket.
+ pub fn is_socket(&self) -> bool {
+ (self.mode & mode::IFMT) == mode::IFSOCK
+ }
+}
+
+impl From<&std::fs::Metadata> for Entry {
+ fn from(meta: &std::fs::Metadata) -> Entry {
+ #[cfg(unix)]
+ use std::os::unix::fs::MetadataExt;
+
+ let this = Entry::default();
+
+ #[cfg(unix)]
+ let this = this
+ .uid(meta.uid())
+ .gid(meta.gid())
+ .mode(meta.mode() as u64)
+ .mtime(meta.mtime() as u64);
+
+ let file_type = meta.file_type();
+ let mode = this.mode;
+ let this = if file_type.is_dir() {
+ this.mode(mode | mode::IFDIR)
+ } else if file_type.is_symlink() {
+ this.mode(mode | mode::IFLNK)
+ } else {
+ this.mode(mode | mode::IFREG)
+ };
+
+ this
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct Filename {
+ pub name: Vec<u8>,
+}
+
+#[derive(Clone, Debug)]
+pub struct Symlink {
+ pub data: Vec<u8>,
+}
+
+impl Symlink {
+ pub fn as_os_str(&self) -> &OsStr {
+ self.as_ref()
+ }
+}
+
+impl AsRef<[u8]> for Symlink {
+ fn as_ref(&self) -> &[u8] {
+ &self.data
+ }
+}
+
+impl AsRef<OsStr> for Symlink {
+ fn as_ref(&self) -> &OsStr {
+ OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
+ }
+}
+
+#[derive(Clone, Debug)]
+pub struct Hardlink {
+ pub offset: u64,
+ pub data: Vec<u8>,
+}
+
+impl Hardlink {
+ pub fn as_os_str(&self) -> &OsStr {
+ self.as_ref()
+ }
+}
+
+impl AsRef<[u8]> for Hardlink {
+ fn as_ref(&self) -> &[u8] {
+ &self.data
+ }
+}
+
+impl AsRef<OsStr> for Hardlink {
+ fn as_ref(&self) -> &OsStr {
+ OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
+ }
+}
+
+#[derive(Clone, Debug, Eq)]
+#[repr(C)]
+pub struct XAttr {
+ pub(crate) data: Vec<u8>,
+ pub(crate) name_len: usize,
+}
+
+impl XAttr {
+ pub fn new<N: AsRef<[u8]>, V: AsRef<[u8]>>(name: N, value: V) -> Self {
+ let name = name.as_ref();
+ let value = value.as_ref();
+ let mut data = Vec::with_capacity(name.len() + value.len() + 1);
+ data.extend(name);
+ data.push(0);
+ data.extend(value);
+ Self {
+ data,
+ name_len: name.len(),
+ }
+ }
+
+ pub fn name(&self) -> &CStr {
+ unsafe { CStr::from_bytes_with_nul_unchecked(&self.data[..self.name_len + 1]) }
+ }
+
+ pub fn value(&self) -> &[u8] {
+ &self.data[(self.name_len + 1)..]
+ }
+}
+
+impl Ord for XAttr {
+ fn cmp(&self, other: &XAttr) -> Ordering {
+ self.name().cmp(&other.name())
+ }
+}
+
+impl PartialOrd for XAttr {
+ fn partial_cmp(&self, other: &XAttr) -> Option<Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl PartialEq for XAttr {
+ fn eq(&self, other: &XAttr) -> bool {
+ self.name() == other.name()
+ }
+}
+
+#[derive(Clone, Debug, Endian)]
+#[repr(C)]
+pub struct Device {
+ pub major: u64,
+ pub minor: u64,
+}
+
+#[cfg(target_os = "linux")]
+impl Device {
+ /// Get a `dev_t` value for this device.
+ #[rustfmt::skip]
+ pub fn to_dev_t(&self) -> u64 {
+ // see bits/sysmacros.h
+ ((self.major & 0x0000_0fff) << 8) |
+ ((self.major & 0xffff_f000) << 32) |
+ (self.minor & 0x0000_00ff) |
+ ((self.minor & 0xffff_ff00) << 12)
+ }
+
+ /// Get a `Device` from a `dev_t` value.
+ #[rustfmt::skip]
+ pub fn from_dev_t(dev: u64) -> Self {
+ // see to_dev_t
+ Self {
+ major: (dev >> 8) & 0x0000_0fff |
+ (dev >> 32) & 0xffff_f000,
+ minor: dev & 0x0000_00ff |
+ (dev >> 12) & 0xffff_ff00,
+ }
+ }
+}
+
+#[cfg(all(test, target_os = "linux"))]
+#[test]
+fn test_linux_devices() {
+ let c_dev = unsafe { ::libc::makedev(0xabcd_1234, 0xdcba_5678) };
+ let dev = Device::from_dev_t(c_dev);
+ assert_eq!(dev.to_dev_t(), c_dev);
+}
+
+#[derive(Clone, Debug)]
+#[repr(C)]
+pub struct FCaps {
+ pub data: Vec<u8>,
+}
+
+#[derive(Clone, Copy, Debug, Endian)]
+#[repr(C)]
+pub struct QuotaProjectId {
+ pub projid: u64,
+}
+
+#[derive(Clone, Debug, Endian)]
+#[repr(C)]
+pub struct GoodbyeItem {
+ /// SipHash24 of the directory item name. The last GOODBYE item uses the special hash value
+ /// `PXAR_GOODBYE_TAIL_MARKER`.
+ pub hash: u64,
+
+ /// The offset from the start of the GOODBYE object to the start of the matching directory item
+ /// (point to a FILENAME). The last GOODBYE item points to the start of the matching ENTRY
+ /// object.
+ pub offset: u64,
+
+ /// The overall size of the directory item. This includes the FILENAME header. In other words,
+ /// `goodbye_start - offset + size` points to the end of the directory.
+ ///
+ /// The last GOODBYE item repeats the size of the GOODBYE item.
+ pub size: u64,
+}
+
+impl GoodbyeItem {
+ pub fn new(name: &[u8], offset: u64, size: u64) -> Self {
+ let hash = hash_filename(name);
+ Self { hash, offset, size }
+ }
+}
+
+pub fn hash_filename(name: &[u8]) -> u64 {
+ use std::hash::Hasher;
+ let mut hasher = SipHasher24::new_with_keys(0x8574442b0f1d84b3, 0x2736ed30d1c22ec1);
+ hasher.write(name);
+ hasher.finish()
+}
+
+pub fn path_is_legal_component(path: &Path) -> bool {
+ let mut components = path.components();
+ match components.next() {
+ Some(std::path::Component::Normal(_)) => (),
+ _ => return false,
+ }
+ components.next().is_none()
+}
+
+pub fn check_file_name(path: &Path) -> io::Result<()> {
+ if !path_is_legal_component(path) {
+ io_bail!("invalid file name in archive: {:?}", path);
+ } else {
+ Ok(())
+ }
+}