1 //! Random access for PXAR files.
3 use std
::ffi
::{OsStr, OsString}
;
5 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
7 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
8 use std
::path
::{Path, PathBuf}
;
10 use std
::task
::{Context, Poll}
;
12 use endian_trait
::Endian
;
14 use crate::binary_tree_array
;
15 use crate::decoder
::{self, DecoderImpl}
;
16 use crate::format
::{self, GoodbyeItem}
;
17 use crate::poll_fn
::poll_fn
;
19 use crate::{Entry, EntryKind}
;
26 pub use sync
::{Accessor, DirEntry, Directory, FileEntry, ReadDir}
;
28 /// Random access read implementation.
35 ) -> Poll
<io
::Result
<usize>>;
38 /// We do not want to bother with actual polling, so we implement `async fn` variants of the above
41 /// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
42 /// we'd then need to define all the `Future` types they return manually and explicitly. Since we
43 /// have no use for them, all we want is the ability to use `async fn`...
45 /// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
46 /// decoder's code, but that's fine.
47 impl<'a
> dyn ReadAt
+ 'a
{
48 /// awaitable version of `poll_read_at`.
49 async
fn read_at(&self, buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
50 poll_fn(|cx
| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }
).await
53 /// `read_exact_at` - since that's what we _actually_ want most of the time.
54 async
fn read_exact_at(&self, mut buf
: &mut [u8], mut offset
: u64) -> io
::Result
<()> {
55 while !buf
.is_empty() {
56 match self.read_at(buf
, offset
).await?
{
57 0 => io_bail
!("unexpected EOF"),
59 buf
= &mut buf
[got
..];
67 /// Helper to read into an `Endian`-implementing `struct`.
68 async
fn read_entry_at
<T
: Endian
>(&self, offset
: u64) -> io
::Result
<T
> {
69 let mut data
= MaybeUninit
::<T
>::uninit();
71 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) }
;
72 self.read_exact_at(buf
, offset
).await?
;
73 Ok(unsafe { data.assume_init().from_le() }
)
76 /// Helper to read into an allocated byte vector.
77 async
fn read_exact_data_at(&self, size
: usize, offset
: u64) -> io
::Result
<Vec
<u8>> {
78 let mut data
= util
::vec_new(size
);
79 self.read_exact_at(&mut data
[..], offset
).await?
;
84 /// Allow using trait objects for `T: ReadAt`
85 impl<'a
> ReadAt
for &(dyn ReadAt
+ 'a
) {
91 ) -> Poll
<io
::Result
<usize>> {
93 self.map_unchecked(|this
| *this
)
94 .poll_read_at(cx
, buf
, offset
)
99 /// The random access state machine implementation.
100 pub(crate) struct AccessorImpl
<T
> {
105 impl<T
: ReadAt
> AccessorImpl
<T
> {
106 pub async
fn new(input
: T
, size
: u64) -> io
::Result
<Self> {
107 if size
< (size_of
::<GoodbyeItem
>() as u64) {
108 io_bail
!("too small to contain a pxar archive");
110 Ok(Self { input, size }
)
113 pub async
fn open_root_ref
<'a
>(&'a
self) -> io
::Result
<DirectoryImpl
<&'a
dyn ReadAt
>> {
114 DirectoryImpl
::open_at_end(&self.input
as &dyn ReadAt
, self.size
, "/".into()).await
118 impl<T
: Clone
+ ReadAt
> AccessorImpl
<T
> {
119 pub async
fn open_root(&self) -> io
::Result
<DirectoryImpl
<T
>> {
120 DirectoryImpl
::open_at_end(self.input
.clone(), self.size
, "/".into()).await
124 /// The directory random-access state machine implementation.
125 pub(crate) struct DirectoryImpl
<T
> {
130 table
: Box
<[GoodbyeItem
]>,
134 impl<T
: Clone
+ ReadAt
> DirectoryImpl
<T
> {
135 /// Open a directory ending at the specified position.
136 pub(crate) async
fn open_at_end(
140 ) -> io
::Result
<DirectoryImpl
<T
>> {
141 let tail
= Self::read_tail_entry(&input
, end_offset
).await?
;
143 if end_offset
< tail
.size
{
144 io_bail
!("goodbye tail size out of range");
147 let goodbye_ofs
= end_offset
- tail
.size
;
149 if goodbye_ofs
< tail
.offset
{
150 io_bail
!("goodbye offset out of range");
153 let entry_ofs
= goodbye_ofs
- tail
.offset
;
154 let size
= end_offset
- entry_ofs
;
156 let mut this
= Self {
166 if this
.table_size() % (size_of
::<GoodbyeItem
>() as u64) != 0 {
167 io_bail
!("invalid goodbye table size: {}", this
.table_size());
170 this
.table
= this
.load_table().await?
;
175 /// Load the entire goodbye table:
176 async
fn load_table(&self) -> io
::Result
<Box
<[GoodbyeItem
]>> {
177 let len
= self.len();
178 let mut data
= Vec
::with_capacity(self.len());
181 let slice
= std
::slice
::from_raw_parts_mut(
182 data
.as_mut_ptr() as *mut u8,
183 len
* size_of
::<GoodbyeItem
>(),
185 (&self.input
as &dyn ReadAt
)
186 .read_exact_at(slice
, self.table_offset())
190 Ok(data
.into_boxed_slice())
194 fn end_offset(&self) -> u64 {
195 self.entry_ofs
+ self.size
199 fn entry_range(&self) -> Range
<u64> {
200 self.entry_ofs
..self.end_offset()
204 fn table_size(&self) -> u64 {
205 (self.end_offset() - self.goodbye_ofs
) - (size_of
::<format
::Header
>() as u64)
209 fn table_offset(&self) -> u64 {
210 self.goodbye_ofs
+ (size_of
::<format
::Header
>() as u64)
213 /// Length *excluding* the tail marker!
215 fn len(&self) -> usize {
216 (self.table_size() / (size_of
::<GoodbyeItem
>() as u64)) as usize - 1
219 /// Read the goodbye tail and perform some sanity checks.
220 async
fn read_tail_entry(input
: &'_
dyn ReadAt
, end_offset
: u64) -> io
::Result
<GoodbyeItem
> {
221 if end_offset
< (size_of
::<GoodbyeItem
>() as u64) {
222 io_bail
!("goodbye tail does not fit");
225 let tail_offset
= end_offset
- (size_of
::<GoodbyeItem
>() as u64);
226 let tail
: GoodbyeItem
= input
.read_entry_at(tail_offset
).await?
;
228 if tail
.hash
!= format
::PXAR_GOODBYE_TAIL_MARKER
{
229 io_bail
!("no goodbye tail marker found");
235 /// Get a decoder for the directory contents.
236 pub(crate) async
fn decode_full(&self) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
237 let (dir
, decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
239 io_bail
!("directory does not seem to be a directory");
244 async
fn get_decoder(
246 entry_range
: Range
<u64>,
247 file_name
: Option
<&Path
>,
248 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
249 Ok(DecoderImpl
::new_full(
250 SeqReadAtAdapter
::new(self.input
.clone(), entry_range
),
252 None
=> self.path
.clone(),
253 Some(file
) => self.path
.join(file
),
259 async
fn decode_one_entry(
261 entry_range
: Range
<u64>,
262 file_name
: Option
<&Path
>,
263 ) -> io
::Result
<(Entry
, DecoderImpl
<SeqReadAtAdapter
<T
>>)> {
264 let mut decoder
= self.get_decoder(entry_range
, file_name
).await?
;
268 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding directory entry"))??
;
272 fn lookup_hash_position(&self, hash
: u64, start
: usize, skip
: usize) -> Option
<usize> {
273 binary_tree_array
::search_by(&self.table
, start
, skip
, |i
| hash
.cmp(&i
.hash
))
276 async
fn lookup_self(&self) -> io
::Result
<FileEntryImpl
<T
>> {
277 let (entry
, _decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
279 input
: self.input
.clone(),
281 end_offset
: self.end_offset(),
285 /// Lookup a directory entry.
286 pub async
fn lookup(&self, path
: &Path
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
287 let mut cur
: Option
<FileEntryImpl
<T
>> = None
;
289 let mut first
= true;
290 for component
in path
.components() {
291 use std
::path
::Component
;
293 let first
= mem
::replace(&mut first
, false);
295 let component
= match component
{
296 Component
::Normal(path
) => path
,
297 Component
::ParentDir
=> io_bail
!("cannot enter parent directory in archive"),
298 Component
::RootDir
| Component
::CurDir
if first
=> {
299 cur
= Some(self.lookup_self().await?
);
302 Component
::CurDir
=> continue,
303 _
=> io_bail
!("invalid component in path"),
306 let next
= match cur
{
311 .lookup_component(component
)
314 None
=> self.lookup_component(component
).await?
,
327 /// Lookup a single directory entry component (does not handle multiple components in path)
328 pub async
fn lookup_component(&self, path
: &OsStr
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
329 let hash
= format
::hash_filename(path
.as_bytes());
330 let first_index
= match self.lookup_hash_position(hash
, 0, 0) {
331 Some(index
) => index
,
332 None
=> return Ok(None
),
335 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
336 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
337 // a DirEntry::Dir or Dir::Entry.
341 let index
= match self.lookup_hash_position(hash
, first_index
, dup
) {
342 Some(index
) => index
,
343 None
=> return Ok(None
),
346 let cursor
= self.get_cursor(index
).await?
;
347 if cursor
.file_name
== path
{
348 return Ok(Some(cursor
.decode_entry().await?
));
355 async
fn get_cursor
<'a
>(&'a
self, index
: usize) -> io
::Result
<DirEntryImpl
<'a
, T
>> {
356 let entry
= &self.table
[index
];
357 let file_goodbye_ofs
= entry
.offset
;
358 if self.goodbye_ofs
< file_goodbye_ofs
{
359 io_bail
!("invalid file offset");
362 let file_ofs
= self.goodbye_ofs
- file_goodbye_ofs
;
363 let (file_name
, entry_ofs
) = self.read_filename_entry(file_ofs
).await?
;
365 let entry_range
= Range
{
367 end
: file_ofs
+ entry
.size
,
369 if entry_range
.end
< entry_range
.start
{
371 "bad file: invalid entry ranges for {:?}: \
372 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
387 async
fn read_filename_entry(&self, file_ofs
: u64) -> io
::Result
<(PathBuf
, u64)> {
388 let head
: format
::Header
= (&self.input
as &dyn ReadAt
).read_entry_at(file_ofs
).await?
;
389 if head
.htype
!= format
::PXAR_FILENAME
{
390 io_bail
!("expected PXAR_FILENAME header, found: {:x}", head
.htype
);
393 let mut path
= (&self.input
as &dyn ReadAt
)
395 head
.content_size() as usize,
396 file_ofs
+ (size_of_val(&head
) as u64),
400 if path
.pop() != Some(0) {
401 io_bail
!("invalid file name (missing terminating zero)");
405 io_bail
!("invalid empty file name");
408 let file_name
= PathBuf
::from(OsString
::from_vec(path
));
409 format
::check_file_name(&file_name
)?
;
411 Ok((file_name
, file_ofs
+ head
.full_size()))
414 pub fn read_dir(&self) -> ReadDirImpl
<T
> {
415 ReadDirImpl
::new(self, 0)
419 /// A file entry retrieved from a Directory.
420 pub(crate) struct FileEntryImpl
<T
: Clone
+ ReadAt
> {
426 impl<T
: Clone
+ ReadAt
> FileEntryImpl
<T
> {
427 pub async
fn enter_directory(&self) -> io
::Result
<DirectoryImpl
<T
>> {
428 if !self.entry
.is_dir() {
429 io_bail
!("enter_directory() on a non-directory");
432 DirectoryImpl
::open_at_end(self.input
.clone(), self.end_offset
, self.entry
.path
.clone())
436 pub async
fn contents(&self) -> io
::Result
<FileContentsImpl
<T
>> {
437 match self.entry
.kind
{
438 EntryKind
::File { offset: None, .. }
=> {
439 io_bail
!("cannot open file, reader provided no offset")
443 offset
: Some(offset
),
444 } => Ok(FileContentsImpl
::new(
446 offset
..(offset
+ size
),
448 _
=> io_bail
!("not a file"),
453 pub fn into_entry(self) -> Entry
{
458 pub fn entry(&self) -> &Entry
{
463 /// An iterator over the contents of a directory.
464 pub(crate) struct ReadDirImpl
<'a
, T
> {
465 dir
: &'a DirectoryImpl
<T
>,
469 impl<'a
, T
: Clone
+ ReadAt
> ReadDirImpl
<'a
, T
> {
470 fn new(dir
: &'a DirectoryImpl
<T
>, at
: usize) -> Self {
474 /// Get the next entry.
475 pub async
fn next(&mut self) -> io
::Result
<Option
<DirEntryImpl
<'a
, T
>>> {
476 if self.at
== self.dir
.table
.len() {
479 let cursor
= self.dir
.get_cursor(self.at
).await?
;
485 /// Efficient alternative to `Iterator::skip`.
487 pub fn skip(self, n
: usize) -> Self {
489 at
: (self.at
+ n
).min(self.dir
.table
.len()),
494 /// Efficient alternative to `Iterator::count`.
496 pub fn count(self) -> usize {
501 /// A cursor pointing to a file in a directory.
503 /// At this point only the file name has been read and we remembered the position for finding the
504 /// actual data. This can be upgraded into a FileEntryImpl.
505 pub(crate) struct DirEntryImpl
<'a
, T
: Clone
+ ReadAt
> {
506 dir
: &'a DirectoryImpl
<T
>,
508 entry_range
: Range
<u64>,
511 impl<'a
, T
: Clone
+ ReadAt
> DirEntryImpl
<'a
, T
> {
512 pub fn file_name(&self) -> &Path
{
516 async
fn decode_entry(&self) -> io
::Result
<FileEntryImpl
<T
>> {
517 let end_offset
= self.entry_range
.end
;
518 let (entry
, _decoder
) = self
520 .decode_one_entry(self.entry_range
.clone(), Some(&self.file_name
))
524 input
: self.dir
.input
.clone(),
531 /// A reader for file contents.
532 pub(crate) struct FileContentsImpl
<T
> {
535 /// Absolute offset inside the `input`.
539 impl<T
: Clone
+ ReadAt
> FileContentsImpl
<T
> {
540 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
541 Self { input, range }
545 pub fn file_size(&self) -> u64 {
546 self.range
.end
- self.range
.start
549 async
fn read_at(&self, mut buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
550 let size
= self.file_size();
554 let remaining
= size
- offset
;
556 if remaining
< buf
.len() as u64 {
557 buf
= &mut buf
[..(remaining
as usize)];
560 (&self.input
as &dyn ReadAt
)
561 .read_at(buf
, self.range
.start
+ offset
)
567 pub struct SeqReadAtAdapter
<T
> {
572 impl<T
: ReadAt
> SeqReadAtAdapter
<T
> {
573 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
574 if range
.end
< range
.start
{
575 panic
!("BAD SEQ READ AT ADAPTER");
577 Self { input, range }
581 fn remaining(&self) -> usize {
582 (self.range
.end
- self.range
.start
) as usize
586 impl<T
: ReadAt
> decoder
::SeqRead
for SeqReadAtAdapter
<T
> {
588 self: Pin
<&mut Self>,
591 ) -> Poll
<io
::Result
<usize>> {
592 let len
= buf
.len().min(self.remaining());
593 let buf
= &mut buf
[..len
];
595 let this
= unsafe { self.get_unchecked_mut() }
;
597 let got
= ready
!(unsafe {
598 Pin
::new_unchecked(&this
.input
).poll_read_at(cx
, buf
, this
.range
.start
)
600 this
.range
.start
+= got
as u64;
604 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
605 Poll
::Ready(Some(Ok(self.range
.start
)))