1 //! Random access for PXAR files.
3 use std
::ffi
::{OsStr, OsString}
;
5 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
7 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
8 use std
::path
::{Path, PathBuf}
;
11 use std
::task
::{Context, Poll}
;
13 use endian_trait
::Endian
;
15 use crate::binary_tree_array
;
16 use crate::decoder
::{self, DecoderImpl}
;
17 use crate::format
::{self, GoodbyeItem}
;
18 use crate::poll_fn
::poll_fn
;
20 use crate::{Entry, EntryKind}
;
27 pub use sync
::{Accessor, DirEntry, Directory, FileEntry, ReadDir}
;
31 /// Random access read implementation.
38 ) -> Poll
<io
::Result
<usize>>;
41 /// awaitable version of `poll_read_at`.
42 async
fn read_at
<T
>(input
: &T
, buf
: &mut [u8], offset
: u64) -> io
::Result
<usize>
46 poll_fn(|cx
| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }
).await
49 /// `read_exact_at` - since that's what we _actually_ want most of the time.
50 async
fn read_exact_at
<T
>(input
: &T
, mut buf
: &mut [u8], mut offset
: u64) -> io
::Result
<()>
54 while !buf
.is_empty() {
55 match read_at(input
, buf
, offset
).await?
{
56 0 => io_bail
!("unexpected EOF"),
58 buf
= &mut buf
[got
..];
66 /// Helper to read into an `Endian`-implementing `struct`.
67 async
fn read_entry_at
<T
, E
: Endian
>(input
: &T
, offset
: u64) -> io
::Result
<E
>
71 let mut data
= MaybeUninit
::<E
>::uninit();
73 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) }
;
74 read_exact_at(input
, buf
, offset
).await?
;
75 Ok(unsafe { data.assume_init().from_le() }
)
78 /// Helper to read into an allocated byte vector.
79 async
fn read_exact_data_at
<T
>(input
: &T
, size
: usize, offset
: u64) -> io
::Result
<Vec
<u8>>
83 let mut data
= util
::vec_new(size
);
84 read_exact_at(input
, &mut data
[..], offset
).await?
;
88 /// Allow using trait objects for `T: ReadAt`
89 impl<'a
> ReadAt
for &(dyn ReadAt
+ 'a
) {
95 ) -> Poll
<io
::Result
<usize>> {
96 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
100 /// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires
101 /// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that
102 /// implemments `ReadAt` for type monomorphization.
103 impl ReadAt
for Arc
<dyn ReadAt
+ Send
+ Sync
+ '
static> {
109 ) -> Poll
<io
::Result
<usize>> {
110 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
116 /// The goodbye table cache maps goodbye table offsets to cache entries.
117 gbt_cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
120 impl Default
for Caches
{
121 fn default() -> Self {
122 Self { gbt_cache: None }
126 /// The random access state machine implementation.
127 pub(crate) struct AccessorImpl
<T
> {
133 impl<T
: ReadAt
> AccessorImpl
<T
> {
134 pub async
fn new(input
: T
, size
: u64) -> io
::Result
<Self> {
135 if size
< (size_of
::<GoodbyeItem
>() as u64) {
136 io_bail
!("too small to contain a pxar archive");
142 caches
: Arc
::new(Caches
::default()),
146 pub fn size(&self) -> u64 {
150 pub async
fn open_root_ref
<'a
>(&'a
self) -> io
::Result
<DirectoryImpl
<&'a
dyn ReadAt
>> {
151 DirectoryImpl
::open_at_end(
152 &self.input
as &dyn ReadAt
,
155 Arc
::clone(&self.caches
),
160 pub fn set_goodbye_table_cache(
162 cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
164 let new_caches
= Arc
::new(Caches
{
168 self.caches
= new_caches
;
172 async
fn get_decoder
<T
: ReadAt
>(
174 entry_range
: Range
<u64>,
176 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
177 Ok(DecoderImpl
::new_full(SeqReadAtAdapter
::new(input
, entry_range
), path
).await?
)
180 impl<T
: Clone
+ ReadAt
> AccessorImpl
<T
> {
181 pub async
fn open_root(&self) -> io
::Result
<DirectoryImpl
<T
>> {
182 DirectoryImpl
::open_at_end(
186 Arc
::clone(&self.caches
),
191 /// Allow opening a directory at a specified offset.
192 pub async
unsafe fn open_dir_at_end(&self, offset
: u64) -> io
::Result
<DirectoryImpl
<T
>> {
193 DirectoryImpl
::open_at_end(
197 Arc
::clone(&self.caches
),
202 /// Allow opening a regular file from a specified range.
203 pub async
unsafe fn open_file_at_range(
206 ) -> io
::Result
<FileEntryImpl
<T
>> {
207 let mut decoder
= get_decoder(self.input
.clone(), range
.clone(), PathBuf
::new()).await?
;
211 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding file entry"))??
;
213 input
: self.input
.clone(),
216 caches
: Arc
::clone(&self.caches
),
220 /// Allow opening arbitrary contents from a specific range.
221 pub unsafe fn open_contents_at_range(&self, range
: Range
<u64>) -> FileContentsImpl
<T
> {
222 FileContentsImpl
::new(self.input
.clone(), range
)
226 /// The directory random-access state machine implementation.
227 pub(crate) struct DirectoryImpl
<T
> {
232 table
: Arc
<[GoodbyeItem
]>,
237 impl<T
: Clone
+ ReadAt
> DirectoryImpl
<T
> {
238 /// Open a directory ending at the specified position.
239 async
fn open_at_end(
244 ) -> io
::Result
<DirectoryImpl
<T
>> {
245 let tail
= Self::read_tail_entry(&input
, end_offset
).await?
;
247 if end_offset
< tail
.size
{
248 io_bail
!("goodbye tail size out of range");
251 let goodbye_ofs
= end_offset
- tail
.size
;
253 if goodbye_ofs
< tail
.offset
{
254 io_bail
!("goodbye offset out of range");
257 let entry_ofs
= goodbye_ofs
- tail
.offset
;
258 let size
= end_offset
- entry_ofs
;
260 let table
: Option
<Arc
<[GoodbyeItem
]>> = caches
263 .and_then(|cache
| cache
.fetch(goodbye_ofs
));
265 let mut this
= Self {
270 table
: table
.as_ref().map_or_else(|| Arc
::new([]), Arc
::clone
),
276 if this
.table_size() % (size_of
::<GoodbyeItem
>() as u64) != 0 {
277 io_bail
!("invalid goodbye table size: {}", this
.table_size());
281 this
.table
= this
.load_table().await?
;
282 if let Some(ref cache
) = this
.caches
.gbt_cache
{
283 cache
.insert(goodbye_ofs
, Arc
::clone(&this
.table
));
290 /// Load the entire goodbye table:
291 async
fn load_table(&self) -> io
::Result
<Arc
<[GoodbyeItem
]>> {
292 let len
= self.len();
293 let mut data
= Vec
::with_capacity(self.len());
296 let slice
= std
::slice
::from_raw_parts_mut(
297 data
.as_mut_ptr() as *mut u8,
298 len
* size_of
::<GoodbyeItem
>(),
300 read_exact_at(&self.input
, slice
, self.table_offset()).await?
;
307 fn end_offset(&self) -> u64 {
308 self.entry_ofs
+ self.size
312 fn entry_range(&self) -> Range
<u64> {
313 self.entry_ofs
..self.end_offset()
317 fn table_size(&self) -> u64 {
318 (self.end_offset() - self.goodbye_ofs
) - (size_of
::<format
::Header
>() as u64)
322 fn table_offset(&self) -> u64 {
323 self.goodbye_ofs
+ (size_of
::<format
::Header
>() as u64)
326 /// Length *excluding* the tail marker!
328 fn len(&self) -> usize {
329 (self.table_size() / (size_of
::<GoodbyeItem
>() as u64)) as usize - 1
332 /// Read the goodbye tail and perform some sanity checks.
333 async
fn read_tail_entry(input
: &T
, end_offset
: u64) -> io
::Result
<GoodbyeItem
> {
334 if end_offset
< (size_of
::<GoodbyeItem
>() as u64) {
335 io_bail
!("goodbye tail does not fit");
338 let tail_offset
= end_offset
- (size_of
::<GoodbyeItem
>() as u64);
339 let tail
: GoodbyeItem
= read_entry_at(input
, tail_offset
).await?
;
341 if tail
.hash
!= format
::PXAR_GOODBYE_TAIL_MARKER
{
342 io_bail
!("no goodbye tail marker found");
348 /// Get a decoder for the directory contents.
349 pub(crate) async
fn decode_full(&self) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
350 let (dir
, decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
352 io_bail
!("directory does not seem to be a directory");
357 async
fn get_decoder(
359 entry_range
: Range
<u64>,
360 file_name
: Option
<&Path
>,
361 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
366 None
=> self.path
.clone(),
367 Some(file
) => self.path
.join(file
),
373 async
fn decode_one_entry(
375 entry_range
: Range
<u64>,
376 file_name
: Option
<&Path
>,
377 ) -> io
::Result
<(Entry
, DecoderImpl
<SeqReadAtAdapter
<T
>>)> {
378 let mut decoder
= self.get_decoder(entry_range
, file_name
).await?
;
382 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding directory entry"))??
;
386 fn lookup_hash_position(&self, hash
: u64, start
: usize, skip
: usize) -> Option
<usize> {
387 binary_tree_array
::search_by(&self.table
, start
, skip
, |i
| hash
.cmp(&i
.hash
))
390 pub async
fn lookup_self(&self) -> io
::Result
<FileEntryImpl
<T
>> {
391 let (entry
, _decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
393 input
: self.input
.clone(),
395 entry_range
: self.entry_range(),
396 caches
: Arc
::clone(&self.caches
),
400 /// Lookup a directory entry.
401 pub async
fn lookup(&self, path
: &Path
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
402 let mut cur
: Option
<FileEntryImpl
<T
>> = None
;
404 let mut first
= true;
405 for component
in path
.components() {
406 use std
::path
::Component
;
408 let first
= mem
::replace(&mut first
, false);
410 let component
= match component
{
411 Component
::Normal(path
) => path
,
412 Component
::ParentDir
=> io_bail
!("cannot enter parent directory in archive"),
413 Component
::RootDir
| Component
::CurDir
if first
=> {
414 cur
= Some(self.lookup_self().await?
);
417 Component
::CurDir
=> continue,
418 _
=> io_bail
!("invalid component in path"),
421 let next
= match cur
{
426 .lookup_component(component
)
429 None
=> self.lookup_component(component
).await?
,
442 /// Lookup a single directory entry component (does not handle multiple components in path)
443 pub async
fn lookup_component(&self, path
: &OsStr
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
444 let hash
= format
::hash_filename(path
.as_bytes());
445 let first_index
= match self.lookup_hash_position(hash
, 0, 0) {
446 Some(index
) => index
,
447 None
=> return Ok(None
),
450 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
451 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
452 // a DirEntry::Dir or Dir::Entry.
456 let index
= match self.lookup_hash_position(hash
, first_index
, dup
) {
457 Some(index
) => index
,
458 None
=> return Ok(None
),
461 let cursor
= self.get_cursor(index
).await?
;
462 if cursor
.file_name
== path
{
463 return Ok(Some(cursor
.decode_entry().await?
));
470 async
fn get_cursor
<'a
>(&'a
self, index
: usize) -> io
::Result
<DirEntryImpl
<'a
, T
>> {
471 let entry
= &self.table
[index
];
472 let file_goodbye_ofs
= entry
.offset
;
473 if self.goodbye_ofs
< file_goodbye_ofs
{
474 io_bail
!("invalid file offset");
477 let file_ofs
= self.goodbye_ofs
- file_goodbye_ofs
;
478 let (file_name
, entry_ofs
) = self.read_filename_entry(file_ofs
).await?
;
480 let entry_range
= Range
{
482 end
: file_ofs
+ entry
.size
,
484 if entry_range
.end
< entry_range
.start
{
486 "bad file: invalid entry ranges for {:?}: \
487 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
499 caches
: Arc
::clone(&self.caches
),
503 async
fn read_filename_entry(&self, file_ofs
: u64) -> io
::Result
<(PathBuf
, u64)> {
504 let head
: format
::Header
= read_entry_at(&self.input
, file_ofs
).await?
;
505 if head
.htype
!= format
::PXAR_FILENAME
{
506 io_bail
!("expected PXAR_FILENAME header, found: {:x}", head
.htype
);
509 let mut path
= read_exact_data_at(
511 head
.content_size() as usize,
512 file_ofs
+ (size_of_val(&head
) as u64),
516 if path
.pop() != Some(0) {
517 io_bail
!("invalid file name (missing terminating zero)");
521 io_bail
!("invalid empty file name");
524 let file_name
= PathBuf
::from(OsString
::from_vec(path
));
525 format
::check_file_name(&file_name
)?
;
527 Ok((file_name
, file_ofs
+ head
.full_size()))
530 pub fn read_dir(&self) -> ReadDirImpl
<T
> {
531 ReadDirImpl
::new(self, 0)
534 pub fn entry_count(&self) -> usize {
539 /// A file entry retrieved from a Directory.
541 pub(crate) struct FileEntryImpl
<T
: Clone
+ ReadAt
> {
544 entry_range
: Range
<u64>,
548 impl<T
: Clone
+ ReadAt
> FileEntryImpl
<T
> {
549 pub async
fn enter_directory(&self) -> io
::Result
<DirectoryImpl
<T
>> {
550 if !self.entry
.is_dir() {
551 io_bail
!("enter_directory() on a non-directory");
554 DirectoryImpl
::open_at_end(
556 self.entry_range
.end
,
557 self.entry
.path
.clone(),
558 Arc
::clone(&self.caches
),
563 /// For use with unsafe accessor methods.
564 pub fn content_range(&self) -> io
::Result
<Option
<Range
<u64>>> {
565 match self.entry
.kind
{
566 EntryKind
::File { offset: None, .. }
=> {
567 io_bail
!("cannot open file, reader provided no offset")
571 offset
: Some(offset
),
572 } => Ok(Some(offset
..(offset
+ size
))),
577 pub async
fn contents(&self) -> io
::Result
<FileContentsImpl
<T
>> {
578 match self.content_range()?
{
579 Some(range
) => Ok(FileContentsImpl
::new(self.input
.clone(), range
)),
580 None
=> io_bail
!("not a file"),
585 pub fn into_entry(self) -> Entry
{
590 pub fn entry(&self) -> &Entry
{
594 /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
596 pub fn entry_range(&self) -> Range
<u64> {
597 self.entry_range
.clone()
601 /// An iterator over the contents of a directory.
602 pub(crate) struct ReadDirImpl
<'a
, T
> {
603 dir
: &'a DirectoryImpl
<T
>,
607 impl<'a
, T
: Clone
+ ReadAt
> ReadDirImpl
<'a
, T
> {
608 fn new(dir
: &'a DirectoryImpl
<T
>, at
: usize) -> Self {
612 /// Get the next entry.
613 pub async
fn next(&mut self) -> io
::Result
<Option
<DirEntryImpl
<'a
, T
>>> {
614 if self.at
== self.dir
.table
.len() {
617 let cursor
= self.dir
.get_cursor(self.at
).await?
;
623 /// Efficient alternative to `Iterator::skip`.
625 pub fn skip(self, n
: usize) -> Self {
627 at
: (self.at
+ n
).min(self.dir
.table
.len()),
632 /// Efficient alternative to `Iterator::count`.
634 pub fn count(self) -> usize {
639 /// A cursor pointing to a file in a directory.
641 /// At this point only the file name has been read and we remembered the position for finding the
642 /// actual data. This can be upgraded into a FileEntryImpl.
643 pub(crate) struct DirEntryImpl
<'a
, T
: Clone
+ ReadAt
> {
644 dir
: &'a DirectoryImpl
<T
>,
646 entry_range
: Range
<u64>,
650 impl<'a
, T
: Clone
+ ReadAt
> DirEntryImpl
<'a
, T
> {
651 pub fn file_name(&self) -> &Path
{
655 async
fn decode_entry(&self) -> io
::Result
<FileEntryImpl
<T
>> {
656 let (entry
, _decoder
) = self
658 .decode_one_entry(self.entry_range
.clone(), Some(&self.file_name
))
662 input
: self.dir
.input
.clone(),
664 entry_range
: self.entry_range(),
665 caches
: Arc
::clone(&self.caches
),
669 /// Exposed for raw by-offset access methods.
671 pub fn entry_range(&self) -> Range
<u64> {
672 self.entry_range
.clone()
676 /// A reader for file contents.
677 pub(crate) struct FileContentsImpl
<T
> {
680 /// Absolute offset inside the `input`.
684 impl<T
: Clone
+ ReadAt
> FileContentsImpl
<T
> {
685 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
686 Self { input, range }
690 pub fn file_size(&self) -> u64 {
691 self.range
.end
- self.range
.start
694 async
fn read_at(&self, mut buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
695 let size
= self.file_size();
699 let remaining
= size
- offset
;
701 if remaining
< buf
.len() as u64 {
702 buf
= &mut buf
[..(remaining
as usize)];
705 read_at(&self.input
, buf
, self.range
.start
+ offset
).await
709 impl<T
: Clone
+ ReadAt
> ReadAt
for FileContentsImpl
<T
> {
715 ) -> Poll
<io
::Result
<usize>> {
716 let size
= self.file_size();
718 return Poll
::Ready(Ok(0));
720 let remaining
= size
- offset
;
722 if remaining
< buf
.len() as u64 {
723 buf
= &mut buf
[..(remaining
as usize)];
726 let offset
= self.range
.start
+ offset
;
727 unsafe { self.map_unchecked(|this| &this.input) }
.poll_read_at(cx
, buf
, offset
)
732 pub struct SeqReadAtAdapter
<T
> {
737 impl<T
: ReadAt
> SeqReadAtAdapter
<T
> {
738 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
739 if range
.end
< range
.start
{
740 panic
!("BAD SEQ READ AT ADAPTER");
742 Self { input, range }
746 fn remaining(&self) -> usize {
747 (self.range
.end
- self.range
.start
) as usize
751 impl<T
: ReadAt
> decoder
::SeqRead
for SeqReadAtAdapter
<T
> {
753 self: Pin
<&mut Self>,
756 ) -> Poll
<io
::Result
<usize>> {
757 let len
= buf
.len().min(self.remaining());
758 let buf
= &mut buf
[..len
];
760 let this
= unsafe { self.get_unchecked_mut() }
;
762 let got
= ready
!(unsafe {
763 Pin
::new_unchecked(&this
.input
).poll_read_at(cx
, buf
, this
.range
.start
)
765 this
.range
.start
+= got
as u64;
769 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
770 Poll
::Ready(Some(Ok(self.range
.start
)))