1 //! Random access for PXAR files.
3 use std
::ffi
::{OsStr, OsString}
;
5 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
7 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
8 use std
::path
::{Path, PathBuf}
;
10 use std
::task
::{Context, Poll}
;
12 use endian_trait
::Endian
;
14 use crate::decoder
::{self, DecoderImpl}
;
15 use crate::format
::{self, GoodbyeItem}
;
16 use crate::poll_fn
::poll_fn
;
18 use crate::{Entry, EntryKind}
;
24 pub use sync
::Accessor
;
26 /// Random access read implementation.
33 ) -> Poll
<io
::Result
<usize>>;
36 /// We do not want to bother with actual polling, so we implement `async fn` variants of the above
39 /// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
40 /// we'd then need to define all the `Future` types they return manually and explicitly. Since we
41 /// have no use for them, all we want is the ability to use `async fn`...
43 /// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
44 /// decoder's code, but that's fine.
45 impl<'a
> dyn ReadAt
+ 'a
{
46 /// awaitable version of `poll_read_at`.
47 async
fn read_at(&self, buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
48 poll_fn(|cx
| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }
).await
51 /// `read_exact_at` - since that's what we _actually_ want most of the time.
52 async
fn read_exact_at(&self, mut buf
: &mut [u8], mut offset
: u64) -> io
::Result
<()> {
53 while !buf
.is_empty() {
54 match self.read_at(buf
, offset
).await?
{
55 0 => io_bail
!("unexpected EOF"),
57 buf
= &mut buf
[got
..];
65 /// Helper to read into an `Endian`-implementing `struct`.
66 async
fn read_entry_at
<T
: Endian
>(&self, offset
: u64) -> io
::Result
<T
> {
67 let mut data
= MaybeUninit
::<T
>::uninit();
69 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) }
;
70 self.read_exact_at(buf
, offset
).await?
;
71 Ok(unsafe { data.assume_init().from_le() }
)
74 /// Helper to read into an allocated byte vector.
75 async
fn read_exact_data_at(&self, size
: usize, offset
: u64) -> io
::Result
<Vec
<u8>> {
76 let mut data
= util
::vec_new(size
);
77 self.read_exact_at(&mut data
[..], offset
).await?
;
82 /// Allow using trait objects for `T: ReadAt`
83 impl<'a
> ReadAt
for &(dyn ReadAt
+ 'a
) {
89 ) -> Poll
<io
::Result
<usize>> {
91 self.map_unchecked(|this
| *this
)
92 .poll_read_at(cx
, buf
, offset
)
97 /// The random access state machine implementation.
98 pub struct AccessorImpl
<T
> {
103 impl<T
: ReadAt
> AccessorImpl
<T
> {
104 pub async
fn new(input
: T
, size
: u64) -> io
::Result
<Self> {
105 if size
< (size_of
::<GoodbyeItem
>() as u64) {
106 io_bail
!("too small to contain a pxar archive");
108 Ok(Self { input, size }
)
111 pub async
fn open_root_ref
<'a
>(&'a
self) -> io
::Result
<DirectoryImpl
<&'a
dyn ReadAt
>> {
112 DirectoryImpl
::open_at_end(&self.input
as &dyn ReadAt
, self.size
, "/".into()).await
116 impl<T
: Clone
+ ReadAt
> AccessorImpl
<T
> {
117 pub async
fn open_root(&self) -> io
::Result
<DirectoryImpl
<T
>> {
118 DirectoryImpl
::open_at_end(self.input
.clone(), self.size
, "/".into()).await
122 /// The directory random-access state machine implementation.
123 pub struct DirectoryImpl
<T
> {
128 table
: Box
<[GoodbyeItem
]>,
132 impl<T
: Clone
+ ReadAt
> DirectoryImpl
<T
> {
133 /// Open a directory ending at the specified position.
134 pub(crate) async
fn open_at_end(
138 ) -> io
::Result
<DirectoryImpl
<T
>> {
139 let tail
= Self::read_tail_entry(&input
, end_offset
).await?
;
141 if end_offset
< tail
.size
{
142 io_bail
!("goodbye tail size out of range");
145 let goodbye_ofs
= end_offset
- tail
.size
;
147 if goodbye_ofs
< tail
.offset
{
148 io_bail
!("goodbye offset out of range");
151 let entry_ofs
= goodbye_ofs
- tail
.offset
;
152 let size
= end_offset
- entry_ofs
;
154 let mut this
= Self {
164 if this
.table_size() % (size_of
::<GoodbyeItem
>() as u64) != 0 {
165 io_bail
!("invalid goodbye table size: {}", this
.table_size());
168 this
.table
= this
.load_table().await?
;
173 /// Load the entire goodbye table:
174 async
fn load_table(&self) -> io
::Result
<Box
<[GoodbyeItem
]>> {
175 let len
= self.len();
176 let mut data
= Vec
::with_capacity(self.len());
179 let slice
= std
::slice
::from_raw_parts_mut(
180 data
.as_mut_ptr() as *mut u8,
181 len
* size_of_val(&data
[0]),
183 (&self.input
as &dyn ReadAt
)
184 .read_exact_at(slice
, self.table_offset())
188 Ok(data
.into_boxed_slice())
192 fn end_offset(&self) -> u64 {
193 self.entry_ofs
+ self.size
197 fn entry_range(&self) -> Range
<u64> {
198 self.entry_ofs
..self.end_offset()
202 fn table_size(&self) -> u64 {
203 (self.end_offset() - self.goodbye_ofs
) - (size_of
::<format
::Header
>() as u64)
207 fn table_offset(&self) -> u64 {
208 self.goodbye_ofs
+ (size_of
::<format
::Header
>() as u64)
211 /// Length *excluding* the tail marker!
213 fn len(&self) -> usize {
214 (self.table_size() / (size_of
::<GoodbyeItem
>() as u64)) as usize - 1
217 /// Read the goodbye tail and perform some sanity checks.
218 async
fn read_tail_entry(input
: &'_
dyn ReadAt
, end_offset
: u64) -> io
::Result
<GoodbyeItem
> {
219 if end_offset
< (size_of
::<GoodbyeItem
>() as u64) {
220 io_bail
!("goodbye tail does not fit");
223 let tail_offset
= end_offset
- (size_of
::<GoodbyeItem
>() as u64);
224 let tail
: GoodbyeItem
= input
.read_entry_at(tail_offset
).await?
;
226 if tail
.hash
!= format
::PXAR_GOODBYE_TAIL_MARKER
{
227 io_bail
!("no goodbye tail marker found");
233 /// Get a decoder for the directory contents.
234 pub(crate) async
fn decode_full(&self) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
235 let (dir
, decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
237 io_bail
!("directory does not seem to be a directory");
242 async
fn get_decoder(
244 entry_range
: Range
<u64>,
245 file_name
: Option
<&Path
>,
246 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
247 Ok(DecoderImpl
::new_full(
248 SeqReadAtAdapter
::new(self.input
.clone(), entry_range
),
250 None
=> self.path
.clone(),
251 Some(file
) => self.path
.join(file
),
257 async
fn decode_one_entry(
259 entry_range
: Range
<u64>,
260 file_name
: Option
<&Path
>,
261 ) -> io
::Result
<(Entry
, DecoderImpl
<SeqReadAtAdapter
<T
>>)> {
262 let mut decoder
= self.get_decoder(entry_range
, file_name
).await?
;
266 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding directory entry"))??
;
270 fn lookup_hash_position(&self, hash
: u64) -> Option
<usize> {
271 format
::search_binary_tree_array_by(&self.table
, |i
| hash
.cmp(&i
.hash
))
274 async
fn lookup_self(&self) -> io
::Result
<FileEntryImpl
<T
>> {
275 let (entry
, _decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
277 input
: self.input
.clone(),
279 end_offset
: self.end_offset(),
283 /// Lookup a directory entry.
284 pub async
fn lookup(&self, path
: &Path
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
285 let mut cur
: Option
<FileEntryImpl
<T
>> = None
;
287 let mut first
= true;
288 for component
in path
.components() {
289 use std
::path
::Component
;
291 let first
= mem
::replace(&mut first
, false);
293 let component
= match component
{
294 Component
::Normal(path
) => path
,
295 Component
::ParentDir
=> io_bail
!("cannot enter parent directory in archive"),
296 Component
::RootDir
| Component
::CurDir
if first
=> {
297 cur
= Some(self.lookup_self().await?
);
300 Component
::CurDir
=> continue,
301 _
=> io_bail
!("invalid component in path"),
304 let next
= match cur
{
309 .lookup_component(component
)
312 None
=> self.lookup_component(component
).await?
,
325 /// Lookup a single directory entry component (does not handle multiple components in path)
326 pub async
fn lookup_component(&self, path
: &OsStr
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
327 let hash
= format
::hash_filename(path
.as_bytes());
328 let index
= match self.lookup_hash_position(hash
) {
329 Some(index
) => index
,
330 None
=> return Ok(None
),
333 // Lookup FILENAME, if it doesn't match increase index, once found, use the GoodbyeItem's
334 // offset+size as well as the file's Entry to return a DirEntry::Dir or Dir::Entry.
336 while index
< self.table
.len() && self.table
[index
].hash
== hash
{
337 let cursor
= self.get_cursor(index
).await?
;
338 if cursor
.file_name
== path
{
339 return Ok(Some(cursor
.get_entry().await?
));
346 async
fn get_cursor
<'a
>(&'a
self, index
: usize) -> io
::Result
<DirEntryImpl
<'a
, T
>> {
347 let entry
= &self.table
[index
];
348 let file_goodbye_ofs
= entry
.offset
;
349 if self.goodbye_ofs
< file_goodbye_ofs
{
350 io_bail
!("invalid file offset");
353 let file_ofs
= self.goodbye_ofs
- file_goodbye_ofs
;
354 let (file_name
, entry_ofs
) = self.read_filename_entry(file_ofs
).await?
;
361 end
: file_ofs
+ entry
.size
,
366 async
fn read_filename_entry(&self, file_ofs
: u64) -> io
::Result
<(PathBuf
, u64)> {
367 let head
: format
::Header
= (&self.input
as &dyn ReadAt
).read_entry_at(file_ofs
).await?
;
368 if head
.htype
!= format
::PXAR_FILENAME
{
369 io_bail
!("expected PXAR_FILENAME header, found: {:x}", head
.htype
);
372 let mut path
= (&self.input
as &dyn ReadAt
)
374 head
.content_size() as usize,
375 file_ofs
+ (size_of_val(&head
) as u64),
379 if path
.pop() != Some(0) {
380 io_bail
!("invalid file name (missing terminating zero)");
384 io_bail
!("invalid empty file name");
387 let file_name
= PathBuf
::from(OsString
::from_vec(path
));
388 format
::check_file_name(&file_name
)?
;
390 Ok((file_name
, file_ofs
+ head
.full_size()))
393 pub fn read_dir(&self) -> ReadDirImpl
<T
> {
394 ReadDirImpl
::new(self, 0)
398 /// A file entry retrieved from a Directory.
399 pub struct FileEntryImpl
<T
: Clone
+ ReadAt
> {
405 impl<T
: Clone
+ ReadAt
> FileEntryImpl
<T
> {
406 pub async
fn enter_directory(&self) -> io
::Result
<DirectoryImpl
<T
>> {
407 if !self.entry
.is_dir() {
408 io_bail
!("enter_directory() on a non-directory");
411 DirectoryImpl
::open_at_end(self.input
.clone(), self.end_offset
, self.entry
.path
.clone())
415 pub async
fn contents(&self) -> io
::Result
<FileContentsImpl
<T
>> {
416 match self.entry
.kind
{
417 EntryKind
::File { offset: None, .. }
=> {
418 io_bail
!("cannot open file, reader provided no offset")
422 offset
: Some(offset
),
423 } => Ok(FileContentsImpl
::new(
425 offset
..(offset
+ size
),
427 _
=> io_bail
!("not a file"),
432 pub fn into_entry(self) -> Entry
{
437 pub fn entry(&self) -> &Entry
{
442 /// An iterator over the contents of a directory.
443 pub struct ReadDirImpl
<'a
, T
> {
444 dir
: &'a DirectoryImpl
<T
>,
448 impl<'a
, T
: Clone
+ ReadAt
> ReadDirImpl
<'a
, T
> {
449 pub fn new(dir
: &'a DirectoryImpl
<T
>, at
: usize) -> Self {
453 /// Get the next entry.
454 pub async
fn next(&mut self) -> io
::Result
<Option
<DirEntryImpl
<'a
, T
>>> {
455 if self.at
== self.dir
.table
.len() {
458 let cursor
= self.dir
.get_cursor(self.at
).await?
;
464 /// Efficient alternative to `Iterator::skip`.
466 pub fn skip(self, n
: usize) -> Self {
468 at
: (self.at
+ n
).min(self.dir
.table
.len()),
473 /// Efficient alternative to `Iterator::count`.
475 pub fn count(self) -> usize {
480 /// A cursor pointing to a file in a directory.
482 /// At this point only the file name has been read and we remembered the position for finding the
483 /// actual data. This can be upgraded into a FileEntryImpl.
484 pub struct DirEntryImpl
<'a
, T
: Clone
+ ReadAt
> {
485 dir
: &'a DirectoryImpl
<T
>,
487 entry_range
: Range
<u64>,
490 impl<'a
, T
: Clone
+ ReadAt
> DirEntryImpl
<'a
, T
> {
491 pub fn file_name(&self) -> &Path
{
495 pub async
fn get_entry(&self) -> io
::Result
<FileEntryImpl
<T
>> {
496 let end_offset
= self.entry_range
.end
;
497 let (entry
, _decoder
) = self
499 .decode_one_entry(self.entry_range
.clone(), Some(&self.file_name
))
503 input
: self.dir
.input
.clone(),
510 /// A reader for file contents.
511 pub struct FileContentsImpl
<T
> {
514 /// Absolute offset inside the `input`.
518 impl<T
: Clone
+ ReadAt
> FileContentsImpl
<T
> {
519 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
520 Self { input, range }
524 pub fn file_size(&self) -> u64 {
525 self.range
.end
- self.range
.start
528 async
fn read_at(&self, mut buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
529 let size
= self.file_size();
533 let remaining
= size
- offset
;
535 if remaining
< buf
.len() as u64 {
536 buf
= &mut buf
[..(remaining
as usize)];
539 (&self.input
as &dyn ReadAt
)
540 .read_at(buf
, self.range
.start
+ offset
)
546 pub struct SeqReadAtAdapter
<T
> {
551 impl<T
: ReadAt
> SeqReadAtAdapter
<T
> {
552 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
553 Self { input, range }
557 fn remaining(&self) -> usize {
558 (self.range
.end
- self.range
.start
) as usize
562 impl<T
: ReadAt
> decoder
::SeqRead
for SeqReadAtAdapter
<T
> {
564 self: Pin
<&mut Self>,
567 ) -> Poll
<io
::Result
<usize>> {
568 let len
= buf
.len().min(self.remaining());
569 let buf
= &mut buf
[..len
];
571 let this
= unsafe { self.get_unchecked_mut() }
;
573 let got
= ready
!(unsafe {
574 Pin
::new_unchecked(&this
.input
).poll_read_at(cx
, buf
, this
.range
.start
)
576 this
.range
.start
+= got
as u64;
580 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
581 Poll
::Ready(Some(Ok(self.range
.start
)))