1 //! Random access for PXAR files.
3 use std
::ffi
::{OsStr, OsString}
;
5 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
7 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
8 use std
::path
::{Path, PathBuf}
;
11 use std
::task
::{Context, Poll}
;
13 use endian_trait
::Endian
;
15 use crate::binary_tree_array
;
16 use crate::decoder
::{self, DecoderImpl}
;
17 use crate::format
::{self, GoodbyeItem}
;
18 use crate::poll_fn
::poll_fn
;
20 use crate::{Entry, EntryKind}
;
28 pub use sync
::{Accessor, DirEntry, Directory, FileEntry, ReadDir}
;
32 /// Random access read implementation.
39 ) -> Poll
<io
::Result
<usize>>;
42 /// We do not want to bother with actual polling, so we implement `async fn` variants of the above
45 /// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
46 /// we'd then need to define all the `Future` types they return manually and explicitly. Since we
47 /// have no use for them, all we want is the ability to use `async fn`...
49 /// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
50 /// decoder's code, but that's fine.
51 impl<'a
> dyn ReadAt
+ 'a
{
52 /// awaitable version of `poll_read_at`.
53 async
fn read_at(&self, buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
54 poll_fn(|cx
| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }
).await
57 /// `read_exact_at` - since that's what we _actually_ want most of the time.
58 async
fn read_exact_at(&self, mut buf
: &mut [u8], mut offset
: u64) -> io
::Result
<()> {
59 while !buf
.is_empty() {
60 match self.read_at(buf
, offset
).await?
{
61 0 => io_bail
!("unexpected EOF"),
63 buf
= &mut buf
[got
..];
71 /// Helper to read into an `Endian`-implementing `struct`.
72 async
fn read_entry_at
<T
: Endian
>(&self, offset
: u64) -> io
::Result
<T
> {
73 let mut data
= MaybeUninit
::<T
>::uninit();
75 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) }
;
76 self.read_exact_at(buf
, offset
).await?
;
77 Ok(unsafe { data.assume_init().from_le() }
)
80 /// Helper to read into an allocated byte vector.
81 async
fn read_exact_data_at(&self, size
: usize, offset
: u64) -> io
::Result
<Vec
<u8>> {
82 let mut data
= util
::vec_new(size
);
83 self.read_exact_at(&mut data
[..], offset
).await?
;
88 /// Allow using trait objects for `T: ReadAt`
89 impl<'a
> ReadAt
for &(dyn ReadAt
+ 'a
) {
95 ) -> Poll
<io
::Result
<usize>> {
97 Pin
::new_unchecked(&**self).poll_read_at(cx
, buf
, offset
)
104 /// The goodbye table cache maps goodbye table offsets to cache entries.
105 gbt_cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
108 impl Default
for Caches
{
109 fn default() -> Self {
110 Self { gbt_cache: None }
114 /// The random access state machine implementation.
115 pub(crate) struct AccessorImpl
<T
> {
121 impl<T
: ReadAt
> AccessorImpl
<T
> {
122 pub async
fn new(input
: T
, size
: u64) -> io
::Result
<Self> {
123 if size
< (size_of
::<GoodbyeItem
>() as u64) {
124 io_bail
!("too small to contain a pxar archive");
130 caches
: Arc
::new(Caches
::default()),
134 pub async
fn open_root_ref
<'a
>(&'a
self) -> io
::Result
<DirectoryImpl
<&'a
dyn ReadAt
>> {
135 DirectoryImpl
::open_at_end(
136 &self.input
as &dyn ReadAt
,
139 Arc
::clone(&self.caches
),
144 pub fn set_goodbye_table_cache(
146 cache
: Option
<Arc
<dyn Cache
<u64, [GoodbyeItem
]> + Send
+ Sync
>>,
148 let new_caches
= Arc
::new(Caches
{
152 self.caches
= new_caches
;
156 impl<T
: Clone
+ ReadAt
> AccessorImpl
<T
> {
157 pub async
fn open_root(&self) -> io
::Result
<DirectoryImpl
<T
>> {
158 DirectoryImpl
::open_at_end(
162 Arc
::clone(&self.caches
),
167 /// Allow opening a directory at a specified offset.
168 pub async
unsafe fn open_dir_at_end(&self, offset
: u64) -> io
::Result
<DirectoryImpl
<T
>> {
169 DirectoryImpl
::open_at_end(
173 Arc
::clone(&self.caches
),
179 /// The directory random-access state machine implementation.
180 pub(crate) struct DirectoryImpl
<T
> {
185 table
: Arc
<[GoodbyeItem
]>,
190 impl<T
: Clone
+ ReadAt
> DirectoryImpl
<T
> {
191 /// Open a directory ending at the specified position.
192 async
fn open_at_end(
197 ) -> io
::Result
<DirectoryImpl
<T
>> {
198 let tail
= Self::read_tail_entry(&input
, end_offset
).await?
;
200 if end_offset
< tail
.size
{
201 io_bail
!("goodbye tail size out of range");
204 let goodbye_ofs
= end_offset
- tail
.size
;
206 if goodbye_ofs
< tail
.offset
{
207 io_bail
!("goodbye offset out of range");
210 let entry_ofs
= goodbye_ofs
- tail
.offset
;
211 let size
= end_offset
- entry_ofs
;
213 let table
: Option
<Arc
<[GoodbyeItem
]>> = caches
216 .and_then(|cache
| cache
.fetch(goodbye_ofs
));
218 let mut this
= Self {
223 table
: table
.as_ref().map_or_else(|| Arc
::new([]), Arc
::clone
),
229 if this
.table_size() % (size_of
::<GoodbyeItem
>() as u64) != 0 {
230 io_bail
!("invalid goodbye table size: {}", this
.table_size());
234 this
.table
= this
.load_table().await?
;
235 if let Some(ref cache
) = this
.caches
.gbt_cache
{
236 cache
.insert(goodbye_ofs
, Arc
::clone(&this
.table
));
243 /// Load the entire goodbye table:
244 async
fn load_table(&self) -> io
::Result
<Arc
<[GoodbyeItem
]>> {
245 let len
= self.len();
246 let mut data
= Vec
::with_capacity(self.len());
249 let slice
= std
::slice
::from_raw_parts_mut(
250 data
.as_mut_ptr() as *mut u8,
251 len
* size_of
::<GoodbyeItem
>(),
253 (&self.input
as &dyn ReadAt
)
254 .read_exact_at(slice
, self.table_offset())
262 fn end_offset(&self) -> u64 {
263 self.entry_ofs
+ self.size
267 fn entry_range(&self) -> Range
<u64> {
268 self.entry_ofs
..self.end_offset()
272 fn table_size(&self) -> u64 {
273 (self.end_offset() - self.goodbye_ofs
) - (size_of
::<format
::Header
>() as u64)
277 fn table_offset(&self) -> u64 {
278 self.goodbye_ofs
+ (size_of
::<format
::Header
>() as u64)
281 /// Length *excluding* the tail marker!
283 fn len(&self) -> usize {
284 (self.table_size() / (size_of
::<GoodbyeItem
>() as u64)) as usize - 1
287 /// Read the goodbye tail and perform some sanity checks.
288 async
fn read_tail_entry(input
: &'_
dyn ReadAt
, end_offset
: u64) -> io
::Result
<GoodbyeItem
> {
289 if end_offset
< (size_of
::<GoodbyeItem
>() as u64) {
290 io_bail
!("goodbye tail does not fit");
293 let tail_offset
= end_offset
- (size_of
::<GoodbyeItem
>() as u64);
294 let tail
: GoodbyeItem
= input
.read_entry_at(tail_offset
).await?
;
296 if tail
.hash
!= format
::PXAR_GOODBYE_TAIL_MARKER
{
297 io_bail
!("no goodbye tail marker found");
303 /// Get a decoder for the directory contents.
304 pub(crate) async
fn decode_full(&self) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
305 let (dir
, decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
307 io_bail
!("directory does not seem to be a directory");
312 async
fn get_decoder(
314 entry_range
: Range
<u64>,
315 file_name
: Option
<&Path
>,
316 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
317 Ok(DecoderImpl
::new_full(
318 SeqReadAtAdapter
::new(self.input
.clone(), entry_range
),
320 None
=> self.path
.clone(),
321 Some(file
) => self.path
.join(file
),
327 async
fn decode_one_entry(
329 entry_range
: Range
<u64>,
330 file_name
: Option
<&Path
>,
331 ) -> io
::Result
<(Entry
, DecoderImpl
<SeqReadAtAdapter
<T
>>)> {
332 let mut decoder
= self.get_decoder(entry_range
, file_name
).await?
;
336 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding directory entry"))??
;
340 fn lookup_hash_position(&self, hash
: u64, start
: usize, skip
: usize) -> Option
<usize> {
341 binary_tree_array
::search_by(&self.table
, start
, skip
, |i
| hash
.cmp(&i
.hash
))
344 async
fn lookup_self(&self) -> io
::Result
<FileEntryImpl
<T
>> {
345 let (entry
, _decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
347 input
: self.input
.clone(),
349 entry_range
: self.entry_range(),
350 caches
: Arc
::clone(&self.caches
),
354 /// Lookup a directory entry.
355 pub async
fn lookup(&self, path
: &Path
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
356 let mut cur
: Option
<FileEntryImpl
<T
>> = None
;
358 let mut first
= true;
359 for component
in path
.components() {
360 use std
::path
::Component
;
362 let first
= mem
::replace(&mut first
, false);
364 let component
= match component
{
365 Component
::Normal(path
) => path
,
366 Component
::ParentDir
=> io_bail
!("cannot enter parent directory in archive"),
367 Component
::RootDir
| Component
::CurDir
if first
=> {
368 cur
= Some(self.lookup_self().await?
);
371 Component
::CurDir
=> continue,
372 _
=> io_bail
!("invalid component in path"),
375 let next
= match cur
{
380 .lookup_component(component
)
383 None
=> self.lookup_component(component
).await?
,
396 /// Lookup a single directory entry component (does not handle multiple components in path)
397 pub async
fn lookup_component(&self, path
: &OsStr
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
398 let hash
= format
::hash_filename(path
.as_bytes());
399 let first_index
= match self.lookup_hash_position(hash
, 0, 0) {
400 Some(index
) => index
,
401 None
=> return Ok(None
),
404 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
405 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
406 // a DirEntry::Dir or Dir::Entry.
410 let index
= match self.lookup_hash_position(hash
, first_index
, dup
) {
411 Some(index
) => index
,
412 None
=> return Ok(None
),
415 let cursor
= self.get_cursor(index
).await?
;
416 if cursor
.file_name
== path
{
417 return Ok(Some(cursor
.decode_entry().await?
));
424 async
fn get_cursor
<'a
>(&'a
self, index
: usize) -> io
::Result
<DirEntryImpl
<'a
, T
>> {
425 let entry
= &self.table
[index
];
426 let file_goodbye_ofs
= entry
.offset
;
427 if self.goodbye_ofs
< file_goodbye_ofs
{
428 io_bail
!("invalid file offset");
431 let file_ofs
= self.goodbye_ofs
- file_goodbye_ofs
;
432 let (file_name
, entry_ofs
) = self.read_filename_entry(file_ofs
).await?
;
434 let entry_range
= Range
{
436 end
: file_ofs
+ entry
.size
,
438 if entry_range
.end
< entry_range
.start
{
440 "bad file: invalid entry ranges for {:?}: \
441 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
453 caches
: Arc
::clone(&self.caches
),
457 async
fn read_filename_entry(&self, file_ofs
: u64) -> io
::Result
<(PathBuf
, u64)> {
458 let head
: format
::Header
= (&self.input
as &dyn ReadAt
).read_entry_at(file_ofs
).await?
;
459 if head
.htype
!= format
::PXAR_FILENAME
{
460 io_bail
!("expected PXAR_FILENAME header, found: {:x}", head
.htype
);
463 let mut path
= (&self.input
as &dyn ReadAt
)
465 head
.content_size() as usize,
466 file_ofs
+ (size_of_val(&head
) as u64),
470 if path
.pop() != Some(0) {
471 io_bail
!("invalid file name (missing terminating zero)");
475 io_bail
!("invalid empty file name");
478 let file_name
= PathBuf
::from(OsString
::from_vec(path
));
479 format
::check_file_name(&file_name
)?
;
481 Ok((file_name
, file_ofs
+ head
.full_size()))
484 pub fn read_dir(&self) -> ReadDirImpl
<T
> {
485 ReadDirImpl
::new(self, 0)
489 /// A file entry retrieved from a Directory.
490 pub(crate) struct FileEntryImpl
<T
: Clone
+ ReadAt
> {
493 entry_range
: Range
<u64>,
497 impl<T
: Clone
+ ReadAt
> FileEntryImpl
<T
> {
498 pub async
fn enter_directory(&self) -> io
::Result
<DirectoryImpl
<T
>> {
499 if !self.entry
.is_dir() {
500 io_bail
!("enter_directory() on a non-directory");
503 DirectoryImpl
::open_at_end(
505 self.entry_range
.end
,
506 self.entry
.path
.clone(),
507 Arc
::clone(&self.caches
),
512 pub async
fn contents(&self) -> io
::Result
<FileContentsImpl
<T
>> {
513 match self.entry
.kind
{
514 EntryKind
::File { offset: None, .. }
=> {
515 io_bail
!("cannot open file, reader provided no offset")
519 offset
: Some(offset
),
520 } => Ok(FileContentsImpl
::new(
522 offset
..(offset
+ size
),
524 _
=> io_bail
!("not a file"),
529 pub fn into_entry(self) -> Entry
{
534 pub fn entry(&self) -> &Entry
{
538 /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
540 pub fn entry_range(&self) -> Range
<u64> {
541 self.entry_range
.clone()
545 /// An iterator over the contents of a directory.
546 pub(crate) struct ReadDirImpl
<'a
, T
> {
547 dir
: &'a DirectoryImpl
<T
>,
551 impl<'a
, T
: Clone
+ ReadAt
> ReadDirImpl
<'a
, T
> {
552 fn new(dir
: &'a DirectoryImpl
<T
>, at
: usize) -> Self {
556 /// Get the next entry.
557 pub async
fn next(&mut self) -> io
::Result
<Option
<DirEntryImpl
<'a
, T
>>> {
558 if self.at
== self.dir
.table
.len() {
561 let cursor
= self.dir
.get_cursor(self.at
).await?
;
567 /// Efficient alternative to `Iterator::skip`.
569 pub fn skip(self, n
: usize) -> Self {
571 at
: (self.at
+ n
).min(self.dir
.table
.len()),
576 /// Efficient alternative to `Iterator::count`.
578 pub fn count(self) -> usize {
583 /// A cursor pointing to a file in a directory.
585 /// At this point only the file name has been read and we remembered the position for finding the
586 /// actual data. This can be upgraded into a FileEntryImpl.
587 pub(crate) struct DirEntryImpl
<'a
, T
: Clone
+ ReadAt
> {
588 dir
: &'a DirectoryImpl
<T
>,
590 entry_range
: Range
<u64>,
594 impl<'a
, T
: Clone
+ ReadAt
> DirEntryImpl
<'a
, T
> {
595 pub fn file_name(&self) -> &Path
{
599 async
fn decode_entry(&self) -> io
::Result
<FileEntryImpl
<T
>> {
600 let (entry
, _decoder
) = self
602 .decode_one_entry(self.entry_range
.clone(), Some(&self.file_name
))
606 input
: self.dir
.input
.clone(),
608 entry_range
: self.entry_range(),
609 caches
: Arc
::clone(&self.caches
),
613 /// Exposed for raw by-offset access methods.
615 pub fn entry_range(&self) -> Range
<u64> {
616 self.entry_range
.clone()
620 /// A reader for file contents.
621 pub(crate) struct FileContentsImpl
<T
> {
624 /// Absolute offset inside the `input`.
628 impl<T
: Clone
+ ReadAt
> FileContentsImpl
<T
> {
629 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
630 Self { input, range }
634 pub fn file_size(&self) -> u64 {
635 self.range
.end
- self.range
.start
638 async
fn read_at(&self, mut buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
639 let size
= self.file_size();
643 let remaining
= size
- offset
;
645 if remaining
< buf
.len() as u64 {
646 buf
= &mut buf
[..(remaining
as usize)];
649 (&self.input
as &dyn ReadAt
)
650 .read_at(buf
, self.range
.start
+ offset
)
656 pub struct SeqReadAtAdapter
<T
> {
661 impl<T
: ReadAt
> SeqReadAtAdapter
<T
> {
662 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
663 if range
.end
< range
.start
{
664 panic
!("BAD SEQ READ AT ADAPTER");
666 Self { input, range }
670 fn remaining(&self) -> usize {
671 (self.range
.end
- self.range
.start
) as usize
675 impl<T
: ReadAt
> decoder
::SeqRead
for SeqReadAtAdapter
<T
> {
677 self: Pin
<&mut Self>,
680 ) -> Poll
<io
::Result
<usize>> {
681 let len
= buf
.len().min(self.remaining());
682 let buf
= &mut buf
[..len
];
684 let this
= unsafe { self.get_unchecked_mut() }
;
686 let got
= ready
!(unsafe {
687 Pin
::new_unchecked(&this
.input
).poll_read_at(cx
, buf
, this
.range
.start
)
689 this
.range
.start
+= got
as u64;
693 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
694 Poll
::Ready(Some(Ok(self.range
.start
)))