1 //! Random access for PXAR files.
3 use std
::ffi
::{OsStr, OsString}
;
5 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
7 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
8 use std
::path
::{Path, PathBuf}
;
11 use std
::task
::{Context, Poll}
;
13 use endian_trait
::Endian
;
15 use crate::binary_tree_array
;
16 use crate::decoder
::{self, DecoderImpl}
;
17 use crate::format
::{self, GoodbyeItem}
;
18 use crate::poll_fn
::poll_fn
;
20 use crate::{Entry, EntryKind}
;
27 pub use sync
::{Accessor, DirEntry, Directory, FileEntry, ReadDir}
;
31 /// Random access read implementation.
38 ) -> Poll
<io
::Result
<usize>>;
41 /// awaitable version of `poll_read_at`.
42 async
fn read_at
<T
>(input
: &T
, buf
: &mut [u8], offset
: u64) -> io
::Result
<usize>
46 poll_fn(|cx
| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }
).await
49 /// `read_exact_at` - since that's what we _actually_ want most of the time.
50 async
fn read_exact_at
<T
>(input
: &T
, mut buf
: &mut [u8], mut offset
: u64) -> io
::Result
<()>
54 while !buf
.is_empty() {
55 match read_at(input
, buf
, offset
).await?
{
56 0 => io_bail
!("unexpected EOF"),
58 buf
= &mut buf
[got
..];
66 /// Helper to read into an `Endian`-implementing `struct`.
67 async
fn read_entry_at
<T
, E
: Endian
>(input
: &T
, offset
: u64) -> io
::Result
<E
>
71 let mut data
= MaybeUninit
::<E
>::uninit();
73 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) }
;
74 read_exact_at(input
, buf
, offset
).await?
;
75 Ok(unsafe { data.assume_init().from_le() }
)
78 /// Helper to read into an allocated byte vector.
79 async
fn read_exact_data_at
<T
>(input
: &T
, size
: usize, offset
: u64) -> io
::Result
<Vec
<u8>>
83 let mut data
= util
::vec_new(size
);
84 read_exact_at(input
, &mut data
[..], offset
).await?
;
88 /// Allow using trait objects for `T: ReadAt`
89 impl<'a
> ReadAt
for &(dyn ReadAt
+ 'a
) {
95 ) -> Poll
<io
::Result
<usize>> {
96 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
100 /// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires
101 /// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that
102 /// implemments `ReadAt` for type monomorphization.
103 impl ReadAt
for Arc
<dyn ReadAt
+ Send
+ Sync
+ '
static> {
109 ) -> Poll
<io
::Result
<usize>> {
110 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
116 /// The goodbye table cache maps goodbye table offsets to cache entries.
117 gbt_cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
120 impl Default
for Caches
{
121 fn default() -> Self {
122 Self { gbt_cache: None }
126 /// The random access state machine implementation.
127 pub(crate) struct AccessorImpl
<T
> {
133 impl<T
: ReadAt
> AccessorImpl
<T
> {
134 pub async
fn new(input
: T
, size
: u64) -> io
::Result
<Self> {
135 if size
< (size_of
::<GoodbyeItem
>() as u64) {
136 io_bail
!("too small to contain a pxar archive");
142 caches
: Arc
::new(Caches
::default()),
146 pub fn size(&self) -> u64 {
150 pub async
fn open_root_ref
<'a
>(&'a
self) -> io
::Result
<DirectoryImpl
<&'a
dyn ReadAt
>> {
151 DirectoryImpl
::open_at_end(
152 &self.input
as &dyn ReadAt
,
155 Arc
::clone(&self.caches
),
160 pub fn set_goodbye_table_cache(
162 cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
164 let new_caches
= Arc
::new(Caches
{
168 self.caches
= new_caches
;
172 async
fn get_decoder
<T
: ReadAt
>(
174 entry_range
: Range
<u64>,
176 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
177 Ok(DecoderImpl
::new_full(SeqReadAtAdapter
::new(input
, entry_range
), path
).await?
)
180 impl<T
: Clone
+ ReadAt
> AccessorImpl
<T
> {
181 pub async
fn open_root(&self) -> io
::Result
<DirectoryImpl
<T
>> {
182 DirectoryImpl
::open_at_end(
186 Arc
::clone(&self.caches
),
191 /// Allow opening a directory at a specified offset.
192 pub async
unsafe fn open_dir_at_end(&self, offset
: u64) -> io
::Result
<DirectoryImpl
<T
>> {
193 DirectoryImpl
::open_at_end(
197 Arc
::clone(&self.caches
),
202 /// Allow opening a regular file from a specified range.
203 pub async
unsafe fn open_file_at_range(
206 ) -> io
::Result
<FileEntryImpl
<T
>> {
207 let mut decoder
= get_decoder(self.input
.clone(), range
.clone(), PathBuf
::new()).await?
;
211 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding file entry"))??
;
213 input
: self.input
.clone(),
216 caches
: Arc
::clone(&self.caches
),
220 /// Allow opening arbitrary contents from a specific range.
221 pub unsafe fn open_contents_at_range(&self, range
: Range
<u64>) -> FileContentsImpl
<T
> {
222 FileContentsImpl
::new(self.input
.clone(), range
)
225 /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will
226 /// never know the actual length of the target entry until we're done decoding it, so this
227 /// needs to happen at the accessor level, rather than a "sub-entry-reader".
228 pub async
fn follow_hardlink(&self, link
: &format
::Hardlink
) -> io
::Result
<FileEntryImpl
<T
>> {
229 let mut decoder
= get_decoder(
231 link
.offset
..self.size
,
232 PathBuf
::from(link
.as_os_str()),
238 .ok_or_else(|| io_format_err
!("unexpected EOF while following a hardlink"))??
;
240 EntryKind
::File { offset: None, .. }
=> {
241 io_bail
!("failed to follow hardlink, reader provided no offsets");
244 offset
: Some(offset
),
247 let meta_size
= offset
- link
.offset
;
248 let entry_end
= link
.offset
+ meta_size
+ size
;
250 input
: self.input
.clone(),
252 entry_range
: link
.offset
..entry_end
,
253 caches
: Arc
::clone(&self.caches
),
256 _
=> io_bail
!("hardlink does not point to a regular file"),
261 /// The directory random-access state machine implementation.
262 pub(crate) struct DirectoryImpl
<T
> {
267 table
: Arc
<[GoodbyeItem
]>,
272 impl<T
: Clone
+ ReadAt
> DirectoryImpl
<T
> {
273 /// Open a directory ending at the specified position.
274 async
fn open_at_end(
279 ) -> io
::Result
<DirectoryImpl
<T
>> {
280 let tail
= Self::read_tail_entry(&input
, end_offset
).await?
;
282 if end_offset
< tail
.size
{
283 io_bail
!("goodbye tail size out of range");
286 let goodbye_ofs
= end_offset
- tail
.size
;
288 if goodbye_ofs
< tail
.offset
{
289 io_bail
!("goodbye offset out of range");
292 let entry_ofs
= goodbye_ofs
- tail
.offset
;
293 let size
= end_offset
- entry_ofs
;
295 let table
: Option
<Arc
<[GoodbyeItem
]>> = caches
298 .and_then(|cache
| cache
.fetch(goodbye_ofs
));
300 let mut this
= Self {
305 table
: table
.as_ref().map_or_else(|| Arc
::new([]), Arc
::clone
),
311 if this
.table_size() % (size_of
::<GoodbyeItem
>() as u64) != 0 {
312 io_bail
!("invalid goodbye table size: {}", this
.table_size());
316 this
.table
= this
.load_table().await?
;
317 if let Some(ref cache
) = this
.caches
.gbt_cache
{
318 cache
.insert(goodbye_ofs
, Arc
::clone(&this
.table
));
325 /// Load the entire goodbye table:
326 async
fn load_table(&self) -> io
::Result
<Arc
<[GoodbyeItem
]>> {
327 let len
= self.len();
328 let mut data
= Vec
::with_capacity(self.len());
331 let slice
= std
::slice
::from_raw_parts_mut(
332 data
.as_mut_ptr() as *mut u8,
333 len
* size_of
::<GoodbyeItem
>(),
335 read_exact_at(&self.input
, slice
, self.table_offset()).await?
;
342 fn end_offset(&self) -> u64 {
343 self.entry_ofs
+ self.size
347 fn entry_range(&self) -> Range
<u64> {
348 self.entry_ofs
..self.end_offset()
352 fn table_size(&self) -> u64 {
353 (self.end_offset() - self.goodbye_ofs
) - (size_of
::<format
::Header
>() as u64)
357 fn table_offset(&self) -> u64 {
358 self.goodbye_ofs
+ (size_of
::<format
::Header
>() as u64)
361 /// Length *excluding* the tail marker!
363 fn len(&self) -> usize {
364 (self.table_size() / (size_of
::<GoodbyeItem
>() as u64)) as usize - 1
367 /// Read the goodbye tail and perform some sanity checks.
368 async
fn read_tail_entry(input
: &T
, end_offset
: u64) -> io
::Result
<GoodbyeItem
> {
369 if end_offset
< (size_of
::<GoodbyeItem
>() as u64) {
370 io_bail
!("goodbye tail does not fit");
373 let tail_offset
= end_offset
- (size_of
::<GoodbyeItem
>() as u64);
374 let tail
: GoodbyeItem
= read_entry_at(input
, tail_offset
).await?
;
376 if tail
.hash
!= format
::PXAR_GOODBYE_TAIL_MARKER
{
377 io_bail
!("no goodbye tail marker found");
383 /// Get a decoder for the directory contents.
384 pub(crate) async
fn decode_full(&self) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
385 let (dir
, decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
387 io_bail
!("directory does not seem to be a directory");
392 async
fn get_decoder(
394 entry_range
: Range
<u64>,
395 file_name
: Option
<&Path
>,
396 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
401 None
=> self.path
.clone(),
402 Some(file
) => self.path
.join(file
),
408 async
fn decode_one_entry(
410 entry_range
: Range
<u64>,
411 file_name
: Option
<&Path
>,
412 ) -> io
::Result
<(Entry
, DecoderImpl
<SeqReadAtAdapter
<T
>>)> {
413 let mut decoder
= self.get_decoder(entry_range
, file_name
).await?
;
417 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding directory entry"))??
;
421 fn lookup_hash_position(&self, hash
: u64, start
: usize, skip
: usize) -> Option
<usize> {
422 binary_tree_array
::search_by(&self.table
, start
, skip
, |i
| hash
.cmp(&i
.hash
))
425 pub async
fn lookup_self(&self) -> io
::Result
<FileEntryImpl
<T
>> {
426 let (entry
, _decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
428 input
: self.input
.clone(),
430 entry_range
: self.entry_range(),
431 caches
: Arc
::clone(&self.caches
),
435 /// Lookup a directory entry.
436 pub async
fn lookup(&self, path
: &Path
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
437 let mut cur
: Option
<FileEntryImpl
<T
>> = None
;
439 let mut first
= true;
440 for component
in path
.components() {
441 use std
::path
::Component
;
443 let first
= mem
::replace(&mut first
, false);
445 let component
= match component
{
446 Component
::Normal(path
) => path
,
447 Component
::ParentDir
=> io_bail
!("cannot enter parent directory in archive"),
448 Component
::RootDir
| Component
::CurDir
if first
=> {
449 cur
= Some(self.lookup_self().await?
);
452 Component
::CurDir
=> continue,
453 _
=> io_bail
!("invalid component in path"),
456 let next
= match cur
{
461 .lookup_component(component
)
464 None
=> self.lookup_component(component
).await?
,
477 /// Lookup a single directory entry component (does not handle multiple components in path)
478 pub async
fn lookup_component(&self, path
: &OsStr
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
479 let hash
= format
::hash_filename(path
.as_bytes());
480 let first_index
= match self.lookup_hash_position(hash
, 0, 0) {
481 Some(index
) => index
,
482 None
=> return Ok(None
),
485 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
486 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
487 // a DirEntry::Dir or Dir::Entry.
491 let index
= match self.lookup_hash_position(hash
, first_index
, dup
) {
492 Some(index
) => index
,
493 None
=> return Ok(None
),
496 let cursor
= self.get_cursor(index
).await?
;
497 if cursor
.file_name
== path
{
498 return Ok(Some(cursor
.decode_entry().await?
));
505 async
fn get_cursor
<'a
>(&'a
self, index
: usize) -> io
::Result
<DirEntryImpl
<'a
, T
>> {
506 let entry
= &self.table
[index
];
507 let file_goodbye_ofs
= entry
.offset
;
508 if self.goodbye_ofs
< file_goodbye_ofs
{
509 io_bail
!("invalid file offset");
512 let file_ofs
= self.goodbye_ofs
- file_goodbye_ofs
;
513 let (file_name
, entry_ofs
) = self.read_filename_entry(file_ofs
).await?
;
515 let entry_range
= Range
{
517 end
: file_ofs
+ entry
.size
,
519 if entry_range
.end
< entry_range
.start
{
521 "bad file: invalid entry ranges for {:?}: \
522 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
534 caches
: Arc
::clone(&self.caches
),
538 async
fn read_filename_entry(&self, file_ofs
: u64) -> io
::Result
<(PathBuf
, u64)> {
539 let head
: format
::Header
= read_entry_at(&self.input
, file_ofs
).await?
;
540 if head
.htype
!= format
::PXAR_FILENAME
{
541 io_bail
!("expected PXAR_FILENAME header, found: {:x}", head
.htype
);
544 let mut path
= read_exact_data_at(
546 head
.content_size() as usize,
547 file_ofs
+ (size_of_val(&head
) as u64),
551 if path
.pop() != Some(0) {
552 io_bail
!("invalid file name (missing terminating zero)");
556 io_bail
!("invalid empty file name");
559 let file_name
= PathBuf
::from(OsString
::from_vec(path
));
560 format
::check_file_name(&file_name
)?
;
562 Ok((file_name
, file_ofs
+ head
.full_size()))
565 pub fn read_dir(&self) -> ReadDirImpl
<T
> {
566 ReadDirImpl
::new(self, 0)
569 pub fn entry_count(&self) -> usize {
574 /// A file entry retrieved from a Directory.
576 pub(crate) struct FileEntryImpl
<T
: Clone
+ ReadAt
> {
579 entry_range
: Range
<u64>,
583 impl<T
: Clone
+ ReadAt
> FileEntryImpl
<T
> {
584 pub async
fn enter_directory(&self) -> io
::Result
<DirectoryImpl
<T
>> {
585 if !self.entry
.is_dir() {
586 io_bail
!("enter_directory() on a non-directory");
589 DirectoryImpl
::open_at_end(
591 self.entry_range
.end
,
592 self.entry
.path
.clone(),
593 Arc
::clone(&self.caches
),
598 /// For use with unsafe accessor methods.
599 pub fn content_range(&self) -> io
::Result
<Option
<Range
<u64>>> {
600 match self.entry
.kind
{
601 EntryKind
::File { offset: None, .. }
=> {
602 io_bail
!("cannot open file, reader provided no offset")
606 offset
: Some(offset
),
607 } => Ok(Some(offset
..(offset
+ size
))),
612 pub async
fn contents(&self) -> io
::Result
<FileContentsImpl
<T
>> {
613 match self.content_range()?
{
614 Some(range
) => Ok(FileContentsImpl
::new(self.input
.clone(), range
)),
615 None
=> io_bail
!("not a file"),
620 pub fn into_entry(self) -> Entry
{
625 pub fn entry(&self) -> &Entry
{
629 /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
631 pub fn entry_range(&self) -> Range
<u64> {
632 self.entry_range
.clone()
636 /// An iterator over the contents of a directory.
637 pub(crate) struct ReadDirImpl
<'a
, T
> {
638 dir
: &'a DirectoryImpl
<T
>,
642 impl<'a
, T
: Clone
+ ReadAt
> ReadDirImpl
<'a
, T
> {
643 fn new(dir
: &'a DirectoryImpl
<T
>, at
: usize) -> Self {
647 /// Get the next entry.
648 pub async
fn next(&mut self) -> io
::Result
<Option
<DirEntryImpl
<'a
, T
>>> {
649 if self.at
== self.dir
.table
.len() {
652 let cursor
= self.dir
.get_cursor(self.at
).await?
;
658 /// Efficient alternative to `Iterator::skip`.
660 pub fn skip(self, n
: usize) -> Self {
662 at
: (self.at
+ n
).min(self.dir
.table
.len()),
667 /// Efficient alternative to `Iterator::count`.
669 pub fn count(self) -> usize {
674 /// A cursor pointing to a file in a directory.
676 /// At this point only the file name has been read and we remembered the position for finding the
677 /// actual data. This can be upgraded into a FileEntryImpl.
678 pub(crate) struct DirEntryImpl
<'a
, T
: Clone
+ ReadAt
> {
679 dir
: &'a DirectoryImpl
<T
>,
681 entry_range
: Range
<u64>,
685 impl<'a
, T
: Clone
+ ReadAt
> DirEntryImpl
<'a
, T
> {
686 pub fn file_name(&self) -> &Path
{
690 async
fn decode_entry(&self) -> io
::Result
<FileEntryImpl
<T
>> {
691 let (entry
, _decoder
) = self
693 .decode_one_entry(self.entry_range
.clone(), Some(&self.file_name
))
697 input
: self.dir
.input
.clone(),
699 entry_range
: self.entry_range(),
700 caches
: Arc
::clone(&self.caches
),
704 /// Exposed for raw by-offset access methods.
706 pub fn entry_range(&self) -> Range
<u64> {
707 self.entry_range
.clone()
711 /// A reader for file contents.
712 pub(crate) struct FileContentsImpl
<T
> {
715 /// Absolute offset inside the `input`.
719 impl<T
: Clone
+ ReadAt
> FileContentsImpl
<T
> {
720 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
721 Self { input, range }
725 pub fn file_size(&self) -> u64 {
726 self.range
.end
- self.range
.start
729 async
fn read_at(&self, mut buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
730 let size
= self.file_size();
734 let remaining
= size
- offset
;
736 if remaining
< buf
.len() as u64 {
737 buf
= &mut buf
[..(remaining
as usize)];
740 read_at(&self.input
, buf
, self.range
.start
+ offset
).await
744 impl<T
: Clone
+ ReadAt
> ReadAt
for FileContentsImpl
<T
> {
750 ) -> Poll
<io
::Result
<usize>> {
751 let size
= self.file_size();
753 return Poll
::Ready(Ok(0));
755 let remaining
= size
- offset
;
757 if remaining
< buf
.len() as u64 {
758 buf
= &mut buf
[..(remaining
as usize)];
761 let offset
= self.range
.start
+ offset
;
762 unsafe { self.map_unchecked(|this| &this.input) }
.poll_read_at(cx
, buf
, offset
)
767 pub struct SeqReadAtAdapter
<T
> {
772 impl<T
: ReadAt
> SeqReadAtAdapter
<T
> {
773 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
774 if range
.end
< range
.start
{
775 panic
!("BAD SEQ READ AT ADAPTER");
777 Self { input, range }
781 fn remaining(&self) -> usize {
782 (self.range
.end
- self.range
.start
) as usize
786 impl<T
: ReadAt
> decoder
::SeqRead
for SeqReadAtAdapter
<T
> {
788 self: Pin
<&mut Self>,
791 ) -> Poll
<io
::Result
<usize>> {
792 let len
= buf
.len().min(self.remaining());
793 let buf
= &mut buf
[..len
];
795 let this
= unsafe { self.get_unchecked_mut() }
;
797 let got
= ready
!(unsafe {
798 Pin
::new_unchecked(&this
.input
).poll_read_at(cx
, buf
, this
.range
.start
)
800 this
.range
.start
+= got
as u64;
804 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
805 Poll
::Ready(Some(Ok(self.range
.start
)))