1 //! Random access for PXAR files.
3 use std
::ffi
::{OsStr, OsString}
;
5 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
7 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
8 use std
::path
::{Path, PathBuf}
;
11 use std
::task
::{Context, Poll}
;
13 use endian_trait
::Endian
;
15 use crate::binary_tree_array
;
16 use crate::decoder
::{self, DecoderImpl}
;
17 use crate::format
::{self, GoodbyeItem}
;
18 use crate::poll_fn
::poll_fn
;
20 use crate::{Entry, EntryKind}
;
27 pub use sync
::{Accessor, DirEntry, Directory, FileEntry, ReadDir}
;
31 /// Range information used for unsafe raw random access:
32 #[derive(Clone, Debug)]
33 pub struct EntryRangeInfo
{
34 pub filename_header_offset
: Option
<u64>,
35 pub entry_range
: Range
<u64>,
39 pub fn toplevel(entry_range
: Range
<u64>) -> Self {
41 filename_header_offset
: None
,
47 /// Random access read implementation.
54 ) -> Poll
<io
::Result
<usize>>;
57 /// awaitable version of `poll_read_at`.
58 async
fn read_at
<T
>(input
: &T
, buf
: &mut [u8], offset
: u64) -> io
::Result
<usize>
62 poll_fn(|cx
| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }
).await
65 /// `read_exact_at` - since that's what we _actually_ want most of the time.
66 async
fn read_exact_at
<T
>(input
: &T
, mut buf
: &mut [u8], mut offset
: u64) -> io
::Result
<()>
70 while !buf
.is_empty() {
71 match read_at(input
, buf
, offset
).await?
{
72 0 => io_bail
!("unexpected EOF"),
74 buf
= &mut buf
[got
..];
82 /// Helper to read into an `Endian`-implementing `struct`.
83 async
fn read_entry_at
<T
, E
: Endian
>(input
: &T
, offset
: u64) -> io
::Result
<E
>
87 let mut data
= MaybeUninit
::<E
>::uninit();
89 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) }
;
90 read_exact_at(input
, buf
, offset
).await?
;
91 Ok(unsafe { data.assume_init().from_le() }
)
94 /// Helper to read into an allocated byte vector.
95 async
fn read_exact_data_at
<T
>(input
: &T
, size
: usize, offset
: u64) -> io
::Result
<Vec
<u8>>
99 let mut data
= util
::vec_new(size
);
100 read_exact_at(input
, &mut data
[..], offset
).await?
;
104 /// Allow using trait objects for `T: ReadAt`
105 impl<'a
> ReadAt
for &(dyn ReadAt
+ 'a
) {
111 ) -> Poll
<io
::Result
<usize>> {
112 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
116 /// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires
117 /// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that
118 /// implemments `ReadAt` for type monomorphization.
119 impl ReadAt
for Arc
<dyn ReadAt
+ Send
+ Sync
+ '
static> {
125 ) -> Poll
<io
::Result
<usize>> {
126 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
132 /// The goodbye table cache maps goodbye table offsets to cache entries.
133 gbt_cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
136 impl Default
for Caches
{
137 fn default() -> Self {
138 Self { gbt_cache: None }
142 /// The random access state machine implementation.
143 pub(crate) struct AccessorImpl
<T
> {
149 impl<T
: ReadAt
> AccessorImpl
<T
> {
150 pub async
fn new(input
: T
, size
: u64) -> io
::Result
<Self> {
151 if size
< (size_of
::<GoodbyeItem
>() as u64) {
152 io_bail
!("too small to contain a pxar archive");
158 caches
: Arc
::new(Caches
::default()),
162 pub fn size(&self) -> u64 {
166 pub async
fn open_root_ref
<'a
>(&'a
self) -> io
::Result
<DirectoryImpl
<&'a
dyn ReadAt
>> {
167 DirectoryImpl
::open_at_end(
168 &self.input
as &dyn ReadAt
,
171 Arc
::clone(&self.caches
),
176 pub fn set_goodbye_table_cache(
178 cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
180 let new_caches
= Arc
::new(Caches
{
184 self.caches
= new_caches
;
188 async
fn get_decoder
<T
: ReadAt
>(
190 entry_range
: Range
<u64>,
192 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
193 Ok(DecoderImpl
::new_full(SeqReadAtAdapter
::new(input
, entry_range
), path
).await?
)
196 // NOTE: This performs the Decoder::read_next_item() behavior! Keep in mind when changing!
197 async
fn get_decoder_at_filename
<T
: ReadAt
>(
199 entry_range
: Range
<u64>,
201 ) -> io
::Result
<(DecoderImpl
<SeqReadAtAdapter
<T
>>, u64)> {
202 let mut decoder
= get_decoder(input
, entry_range
, path
).await?
;
203 decoder
.path_lengths
.push(0);
204 decoder
.read_next_header().await?
;
205 if decoder
.current_header
.htype
!= format
::PXAR_FILENAME
{
207 "expected filename entry, got {}",
208 decoder
.current_header
,
211 if decoder
.read_current_item().await?
!= decoder
::ItemResult
::Entry
{
212 // impossible, since we checked the header type above for a "proper" error message
213 io_bail
!("unexpected decoder state");
215 let entry_offset
= decoder
::seq_read_position(&mut decoder
.input
)
218 .ok_or_else(|| io_format_err
!("reader provided no offset"))?
;
219 Ok((decoder
, entry_offset
))
222 impl<T
: Clone
+ ReadAt
> AccessorImpl
<T
> {
223 pub async
fn open_root(&self) -> io
::Result
<DirectoryImpl
<T
>> {
224 DirectoryImpl
::open_at_end(
228 Arc
::clone(&self.caches
),
233 /// Allow opening a directory at a specified offset.
234 pub async
unsafe fn open_dir_at_end(&self, offset
: u64) -> io
::Result
<DirectoryImpl
<T
>> {
235 DirectoryImpl
::open_at_end(
239 Arc
::clone(&self.caches
),
244 /// Allow opening a regular file from a specified range.
245 pub async
unsafe fn open_file_at_range(
247 entry_range_info
: &EntryRangeInfo
,
248 ) -> io
::Result
<FileEntryImpl
<T
>> {
249 let mut decoder
= get_decoder(
251 entry_range_info
.entry_range
.clone(),
258 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding file entry"))??
;
260 input
: self.input
.clone(),
262 entry_range_info
: entry_range_info
.clone(),
263 caches
: Arc
::clone(&self.caches
),
267 /// Allow opening arbitrary contents from a specific range.
268 pub unsafe fn open_contents_at_range(&self, range
: Range
<u64>) -> FileContentsImpl
<T
> {
269 FileContentsImpl
::new(self.input
.clone(), range
)
272 /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will
273 /// never know the actual length of the target entry until we're done decoding it, so this
274 /// needs to happen at the accessor level, rather than a "sub-entry-reader".
275 pub async
fn follow_hardlink(&self, entry
: &FileEntryImpl
<T
>) -> io
::Result
<FileEntryImpl
<T
>> {
276 let link_offset
= match entry
.entry
.kind() {
277 EntryKind
::Hardlink(link
) => link
.offset
,
278 _
=> io_bail
!("cannot resolve a non-hardlink"),
281 let entry_file_offset
= entry
283 .filename_header_offset
284 .ok_or_else(|| io_format_err
!("cannot follow hardlink without a file entry header"))?
;
286 if link_offset
> entry_file_offset
{
287 io_bail
!("invalid offset in hardlink");
290 let link_offset
= entry_file_offset
- link_offset
;
292 let (mut decoder
, entry_offset
) =
293 get_decoder_at_filename(self.input
.clone(), link_offset
..self.size
, PathBuf
::new())
299 .ok_or_else(|| io_format_err
!("unexpected EOF while following a hardlink"))??
;
301 EntryKind
::File { offset: None, .. }
=> {
302 io_bail
!("failed to follow hardlink, reader provided no offsets");
305 offset
: Some(offset
),
308 let meta_size
= offset
- link_offset
;
309 let entry_end
= link_offset
+ meta_size
+ size
;
311 input
: self.input
.clone(),
313 entry_range_info
: EntryRangeInfo
{
314 filename_header_offset
: Some(link_offset
),
315 entry_range
: entry_offset
..entry_end
,
317 caches
: Arc
::clone(&self.caches
),
320 _
=> io_bail
!("hardlink does not point to a regular file"),
325 /// The directory random-access state machine implementation.
326 pub(crate) struct DirectoryImpl
<T
> {
331 table
: Arc
<[GoodbyeItem
]>,
336 impl<T
: Clone
+ ReadAt
> DirectoryImpl
<T
> {
337 /// Open a directory ending at the specified position.
338 async
fn open_at_end(
343 ) -> io
::Result
<DirectoryImpl
<T
>> {
344 let tail
= Self::read_tail_entry(&input
, end_offset
).await?
;
346 if end_offset
< tail
.size
{
347 io_bail
!("goodbye tail size out of range");
350 let goodbye_ofs
= end_offset
- tail
.size
;
352 if goodbye_ofs
< tail
.offset
{
353 io_bail
!("goodbye offset out of range");
356 let entry_ofs
= goodbye_ofs
- tail
.offset
;
357 let size
= end_offset
- entry_ofs
;
359 let table
: Option
<Arc
<[GoodbyeItem
]>> = caches
362 .and_then(|cache
| cache
.fetch(goodbye_ofs
));
364 let mut this
= Self {
369 table
: table
.as_ref().map_or_else(|| Arc
::new([]), Arc
::clone
),
375 if this
.table_size() % (size_of
::<GoodbyeItem
>() as u64) != 0 {
376 io_bail
!("invalid goodbye table size: {}", this
.table_size());
380 this
.table
= this
.load_table().await?
;
381 if let Some(ref cache
) = this
.caches
.gbt_cache
{
382 cache
.insert(goodbye_ofs
, Arc
::clone(&this
.table
));
389 /// Load the entire goodbye table:
390 async
fn load_table(&self) -> io
::Result
<Arc
<[GoodbyeItem
]>> {
391 let len
= self.len();
392 let mut data
= Vec
::with_capacity(self.len());
395 let slice
= std
::slice
::from_raw_parts_mut(
396 data
.as_mut_ptr() as *mut u8,
397 len
* size_of
::<GoodbyeItem
>(),
399 read_exact_at(&self.input
, slice
, self.table_offset()).await?
;
406 fn end_offset(&self) -> u64 {
407 self.entry_ofs
+ self.size
411 fn entry_range(&self) -> Range
<u64> {
412 self.entry_ofs
..self.end_offset()
416 fn table_size(&self) -> u64 {
417 (self.end_offset() - self.goodbye_ofs
) - (size_of
::<format
::Header
>() as u64)
421 fn table_offset(&self) -> u64 {
422 self.goodbye_ofs
+ (size_of
::<format
::Header
>() as u64)
425 /// Length *excluding* the tail marker!
427 fn len(&self) -> usize {
428 (self.table_size() / (size_of
::<GoodbyeItem
>() as u64)) as usize - 1
431 /// Read the goodbye tail and perform some sanity checks.
432 async
fn read_tail_entry(input
: &T
, end_offset
: u64) -> io
::Result
<GoodbyeItem
> {
433 if end_offset
< (size_of
::<GoodbyeItem
>() as u64) {
434 io_bail
!("goodbye tail does not fit");
437 let tail_offset
= end_offset
- (size_of
::<GoodbyeItem
>() as u64);
438 let tail
: GoodbyeItem
= read_entry_at(input
, tail_offset
).await?
;
440 if tail
.hash
!= format
::PXAR_GOODBYE_TAIL_MARKER
{
441 io_bail
!("no goodbye tail marker found");
447 /// Get a decoder for the directory contents.
448 pub(crate) async
fn decode_full(&self) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
449 let (dir
, decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
451 io_bail
!("directory does not seem to be a directory");
456 async
fn get_decoder(
458 entry_range
: Range
<u64>,
459 file_name
: Option
<&Path
>,
460 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
465 None
=> self.path
.clone(),
466 Some(file
) => self.path
.join(file
),
472 async
fn decode_one_entry(
474 entry_range
: Range
<u64>,
475 file_name
: Option
<&Path
>,
476 ) -> io
::Result
<(Entry
, DecoderImpl
<SeqReadAtAdapter
<T
>>)> {
477 let mut decoder
= self.get_decoder(entry_range
, file_name
).await?
;
481 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding directory entry"))??
;
485 fn lookup_hash_position(&self, hash
: u64, start
: usize, skip
: usize) -> Option
<usize> {
486 binary_tree_array
::search_by(&self.table
, start
, skip
, |i
| hash
.cmp(&i
.hash
))
489 pub async
fn lookup_self(&self) -> io
::Result
<FileEntryImpl
<T
>> {
490 let (entry
, _decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
492 input
: self.input
.clone(),
494 entry_range_info
: EntryRangeInfo
{
495 filename_header_offset
: None
,
496 entry_range
: self.entry_range(),
498 caches
: Arc
::clone(&self.caches
),
502 /// Lookup a directory entry.
503 pub async
fn lookup(&self, path
: &Path
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
504 let mut cur
: Option
<FileEntryImpl
<T
>> = None
;
506 let mut first
= true;
507 for component
in path
.components() {
508 use std
::path
::Component
;
510 let first
= mem
::replace(&mut first
, false);
512 let component
= match component
{
513 Component
::Normal(path
) => path
,
514 Component
::ParentDir
=> io_bail
!("cannot enter parent directory in archive"),
515 Component
::RootDir
| Component
::CurDir
if first
=> {
516 cur
= Some(self.lookup_self().await?
);
519 Component
::CurDir
=> continue,
520 _
=> io_bail
!("invalid component in path"),
523 let next
= match cur
{
528 .lookup_component(component
)
531 None
=> self.lookup_component(component
).await?
,
544 /// Lookup a single directory entry component (does not handle multiple components in path)
545 pub async
fn lookup_component(&self, path
: &OsStr
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
546 let hash
= format
::hash_filename(path
.as_bytes());
547 let first_index
= match self.lookup_hash_position(hash
, 0, 0) {
548 Some(index
) => index
,
549 None
=> return Ok(None
),
552 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
553 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
554 // a DirEntry::Dir or Dir::Entry.
558 let index
= match self.lookup_hash_position(hash
, first_index
, dup
) {
559 Some(index
) => index
,
560 None
=> return Ok(None
),
563 let cursor
= self.get_cursor(index
).await?
;
564 if cursor
.file_name
== path
{
565 return Ok(Some(cursor
.decode_entry().await?
));
572 async
fn get_cursor
<'a
>(&'a
self, index
: usize) -> io
::Result
<DirEntryImpl
<'a
, T
>> {
573 let entry
= &self.table
[index
];
574 let file_goodbye_ofs
= entry
.offset
;
575 if self.goodbye_ofs
< file_goodbye_ofs
{
576 io_bail
!("invalid file offset");
579 let file_ofs
= self.goodbye_ofs
- file_goodbye_ofs
;
580 let (file_name
, entry_ofs
) = self.read_filename_entry(file_ofs
).await?
;
582 let entry_range
= Range
{
584 end
: file_ofs
+ entry
.size
,
586 if entry_range
.end
< entry_range
.start
{
588 "bad file: invalid entry ranges for {:?}: \
589 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
600 entry_range_info
: EntryRangeInfo
{
601 filename_header_offset
: Some(file_ofs
),
604 caches
: Arc
::clone(&self.caches
),
608 async
fn read_filename_entry(&self, file_ofs
: u64) -> io
::Result
<(PathBuf
, u64)> {
609 let head
: format
::Header
= read_entry_at(&self.input
, file_ofs
).await?
;
610 if head
.htype
!= format
::PXAR_FILENAME
{
611 io_bail
!("expected PXAR_FILENAME header, found: {}", head
);
614 let mut path
= read_exact_data_at(
616 head
.content_size() as usize,
617 file_ofs
+ (size_of_val(&head
) as u64),
621 if path
.pop() != Some(0) {
622 io_bail
!("invalid file name (missing terminating zero)");
625 crate::util
::validate_filename(&path
)?
;
627 let file_name
= PathBuf
::from(OsString
::from_vec(path
));
628 format
::check_file_name(&file_name
)?
;
630 Ok((file_name
, file_ofs
+ head
.full_size()))
633 pub fn read_dir(&self) -> ReadDirImpl
<T
> {
634 ReadDirImpl
::new(self, 0)
637 pub fn entry_count(&self) -> usize {
642 /// A file entry retrieved from a Directory.
644 pub(crate) struct FileEntryImpl
<T
: Clone
+ ReadAt
> {
647 entry_range_info
: EntryRangeInfo
,
651 impl<T
: Clone
+ ReadAt
> FileEntryImpl
<T
> {
652 pub async
fn enter_directory(&self) -> io
::Result
<DirectoryImpl
<T
>> {
653 if !self.entry
.is_dir() {
654 io_bail
!("enter_directory() on a non-directory");
657 DirectoryImpl
::open_at_end(
659 self.entry_range_info
.entry_range
.end
,
660 self.entry
.path
.clone(),
661 Arc
::clone(&self.caches
),
666 /// For use with unsafe accessor methods.
667 pub fn content_range(&self) -> io
::Result
<Option
<Range
<u64>>> {
668 match self.entry
.kind
{
669 EntryKind
::File { offset: None, .. }
=> {
670 io_bail
!("cannot open file, reader provided no offset")
674 offset
: Some(offset
),
675 } => Ok(Some(offset
..(offset
+ size
))),
680 pub async
fn contents(&self) -> io
::Result
<FileContentsImpl
<T
>> {
681 match self.content_range()?
{
682 Some(range
) => Ok(FileContentsImpl
::new(self.input
.clone(), range
)),
683 None
=> io_bail
!("not a file"),
688 pub fn into_entry(self) -> Entry
{
693 pub fn entry(&self) -> &Entry
{
697 /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
699 pub fn entry_range_info(&self) -> &EntryRangeInfo
{
700 &self.entry_range_info
704 /// An iterator over the contents of a directory.
705 pub(crate) struct ReadDirImpl
<'a
, T
> {
706 dir
: &'a DirectoryImpl
<T
>,
710 impl<'a
, T
: Clone
+ ReadAt
> ReadDirImpl
<'a
, T
> {
711 fn new(dir
: &'a DirectoryImpl
<T
>, at
: usize) -> Self {
715 /// Get the next entry.
716 pub async
fn next(&mut self) -> io
::Result
<Option
<DirEntryImpl
<'a
, T
>>> {
717 if self.at
== self.dir
.table
.len() {
720 let cursor
= self.dir
.get_cursor(self.at
).await?
;
726 /// Efficient alternative to `Iterator::skip`.
728 pub fn skip(self, n
: usize) -> Self {
730 at
: (self.at
+ n
).min(self.dir
.table
.len()),
735 /// Efficient alternative to `Iterator::count`.
737 pub fn count(self) -> usize {
742 /// A cursor pointing to a file in a directory.
744 /// At this point only the file name has been read and we remembered the position for finding the
745 /// actual data. This can be upgraded into a FileEntryImpl.
746 pub(crate) struct DirEntryImpl
<'a
, T
: Clone
+ ReadAt
> {
747 dir
: &'a DirectoryImpl
<T
>,
749 entry_range_info
: EntryRangeInfo
,
753 impl<'a
, T
: Clone
+ ReadAt
> DirEntryImpl
<'a
, T
> {
754 pub fn file_name(&self) -> &Path
{
758 async
fn decode_entry(&self) -> io
::Result
<FileEntryImpl
<T
>> {
759 let (entry
, _decoder
) = self
762 self.entry_range_info
.entry_range
.clone(),
763 Some(&self.file_name
),
768 input
: self.dir
.input
.clone(),
770 entry_range_info
: self.entry_range_info
.clone(),
771 caches
: Arc
::clone(&self.caches
),
775 /// Exposed for raw by-offset access methods.
777 pub fn entry_range_info(&self) -> &EntryRangeInfo
{
778 &self.entry_range_info
782 /// A reader for file contents.
783 pub(crate) struct FileContentsImpl
<T
> {
786 /// Absolute offset inside the `input`.
790 impl<T
: Clone
+ ReadAt
> FileContentsImpl
<T
> {
791 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
792 Self { input, range }
796 pub fn file_size(&self) -> u64 {
797 self.range
.end
- self.range
.start
800 async
fn read_at(&self, mut buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
801 let size
= self.file_size();
805 let remaining
= size
- offset
;
807 if remaining
< buf
.len() as u64 {
808 buf
= &mut buf
[..(remaining
as usize)];
811 read_at(&self.input
, buf
, self.range
.start
+ offset
).await
815 impl<T
: Clone
+ ReadAt
> ReadAt
for FileContentsImpl
<T
> {
821 ) -> Poll
<io
::Result
<usize>> {
822 let size
= self.file_size();
824 return Poll
::Ready(Ok(0));
826 let remaining
= size
- offset
;
828 if remaining
< buf
.len() as u64 {
829 buf
= &mut buf
[..(remaining
as usize)];
832 let offset
= self.range
.start
+ offset
;
833 unsafe { self.map_unchecked(|this| &this.input) }
.poll_read_at(cx
, buf
, offset
)
838 pub struct SeqReadAtAdapter
<T
> {
843 impl<T
: ReadAt
> SeqReadAtAdapter
<T
> {
844 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
845 if range
.end
< range
.start
{
846 panic
!("BAD SEQ READ AT ADAPTER");
848 Self { input, range }
852 fn remaining(&self) -> usize {
853 (self.range
.end
- self.range
.start
) as usize
857 impl<T
: ReadAt
> decoder
::SeqRead
for SeqReadAtAdapter
<T
> {
859 self: Pin
<&mut Self>,
862 ) -> Poll
<io
::Result
<usize>> {
863 let len
= buf
.len().min(self.remaining());
864 let buf
= &mut buf
[..len
];
866 let this
= unsafe { self.get_unchecked_mut() }
;
868 let got
= ready
!(unsafe {
869 Pin
::new_unchecked(&this
.input
).poll_read_at(cx
, buf
, this
.range
.start
)
871 this
.range
.start
+= got
as u64;
875 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
876 Poll
::Ready(Some(Ok(self.range
.start
)))