1 //! Random access for PXAR files.
3 use std
::ffi
::{OsStr, OsString}
;
5 use std
::mem
::{self, size_of, size_of_val, MaybeUninit}
;
7 use std
::os
::unix
::ffi
::{OsStrExt, OsStringExt}
;
8 use std
::path
::{Path, PathBuf}
;
10 use std
::task
::{Context, Poll}
;
12 use endian_trait
::Endian
;
14 use crate::decoder
::{self, DecoderImpl}
;
15 use crate::format
::{self, GoodbyeItem}
;
16 use crate::poll_fn
::poll_fn
;
18 use crate::{Entry, EntryKind}
;
25 pub use sync
::Accessor
;
27 /// Random access read implementation.
34 ) -> Poll
<io
::Result
<usize>>;
37 /// We do not want to bother with actual polling, so we implement `async fn` variants of the above
40 /// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
41 /// we'd then need to define all the `Future` types they return manually and explicitly. Since we
42 /// have no use for them, all we want is the ability to use `async fn`...
44 /// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
45 /// decoder's code, but that's fine.
46 impl<'a
> dyn ReadAt
+ 'a
{
47 /// awaitable version of `poll_read_at`.
48 async
fn read_at(&self, buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
49 poll_fn(|cx
| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }
).await
52 /// `read_exact_at` - since that's what we _actually_ want most of the time.
53 async
fn read_exact_at(&self, mut buf
: &mut [u8], mut offset
: u64) -> io
::Result
<()> {
54 while !buf
.is_empty() {
55 match self.read_at(buf
, offset
).await?
{
56 0 => io_bail
!("unexpected EOF"),
58 buf
= &mut buf
[got
..];
66 /// Helper to read into an `Endian`-implementing `struct`.
67 async
fn read_entry_at
<T
: Endian
>(&self, offset
: u64) -> io
::Result
<T
> {
68 let mut data
= MaybeUninit
::<T
>::uninit();
70 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) }
;
71 self.read_exact_at(buf
, offset
).await?
;
72 Ok(unsafe { data.assume_init().from_le() }
)
75 /// Helper to read into an allocated byte vector.
76 async
fn read_exact_data_at(&self, size
: usize, offset
: u64) -> io
::Result
<Vec
<u8>> {
77 let mut data
= util
::vec_new(size
);
78 self.read_exact_at(&mut data
[..], offset
).await?
;
83 /// Allow using trait objects for `T: ReadAt`
84 impl<'a
> ReadAt
for &(dyn ReadAt
+ 'a
) {
90 ) -> Poll
<io
::Result
<usize>> {
92 self.map_unchecked(|this
| *this
)
93 .poll_read_at(cx
, buf
, offset
)
98 /// The random access state machine implementation.
99 pub(crate) struct AccessorImpl
<T
> {
104 impl<T
: ReadAt
> AccessorImpl
<T
> {
105 pub async
fn new(input
: T
, size
: u64) -> io
::Result
<Self> {
106 if size
< (size_of
::<GoodbyeItem
>() as u64) {
107 io_bail
!("too small to contain a pxar archive");
109 Ok(Self { input, size }
)
112 pub async
fn open_root_ref
<'a
>(&'a
self) -> io
::Result
<DirectoryImpl
<&'a
dyn ReadAt
>> {
113 DirectoryImpl
::open_at_end(&self.input
as &dyn ReadAt
, self.size
, "/".into()).await
117 impl<T
: Clone
+ ReadAt
> AccessorImpl
<T
> {
118 pub async
fn open_root(&self) -> io
::Result
<DirectoryImpl
<T
>> {
119 DirectoryImpl
::open_at_end(self.input
.clone(), self.size
, "/".into()).await
123 /// The directory random-access state machine implementation.
124 pub(crate) struct DirectoryImpl
<T
> {
129 table
: Box
<[GoodbyeItem
]>,
133 impl<T
: Clone
+ ReadAt
> DirectoryImpl
<T
> {
134 /// Open a directory ending at the specified position.
135 pub(crate) async
fn open_at_end(
139 ) -> io
::Result
<DirectoryImpl
<T
>> {
140 let tail
= Self::read_tail_entry(&input
, end_offset
).await?
;
142 if end_offset
< tail
.size
{
143 io_bail
!("goodbye tail size out of range");
146 let goodbye_ofs
= end_offset
- tail
.size
;
148 if goodbye_ofs
< tail
.offset
{
149 io_bail
!("goodbye offset out of range");
152 let entry_ofs
= goodbye_ofs
- tail
.offset
;
153 let size
= end_offset
- entry_ofs
;
155 let mut this
= Self {
165 if this
.table_size() % (size_of
::<GoodbyeItem
>() as u64) != 0 {
166 io_bail
!("invalid goodbye table size: {}", this
.table_size());
169 this
.table
= this
.load_table().await?
;
174 /// Load the entire goodbye table:
175 async
fn load_table(&self) -> io
::Result
<Box
<[GoodbyeItem
]>> {
176 let len
= self.len();
177 let mut data
= Vec
::with_capacity(self.len());
180 let slice
= std
::slice
::from_raw_parts_mut(
181 data
.as_mut_ptr() as *mut u8,
182 len
* size_of_val(&data
[0]),
184 (&self.input
as &dyn ReadAt
)
185 .read_exact_at(slice
, self.table_offset())
189 Ok(data
.into_boxed_slice())
193 fn end_offset(&self) -> u64 {
194 self.entry_ofs
+ self.size
198 fn entry_range(&self) -> Range
<u64> {
199 self.entry_ofs
..self.end_offset()
203 fn table_size(&self) -> u64 {
204 (self.end_offset() - self.goodbye_ofs
) - (size_of
::<format
::Header
>() as u64)
208 fn table_offset(&self) -> u64 {
209 self.goodbye_ofs
+ (size_of
::<format
::Header
>() as u64)
212 /// Length *excluding* the tail marker!
214 fn len(&self) -> usize {
215 (self.table_size() / (size_of
::<GoodbyeItem
>() as u64)) as usize - 1
218 /// Read the goodbye tail and perform some sanity checks.
219 async
fn read_tail_entry(input
: &'_
dyn ReadAt
, end_offset
: u64) -> io
::Result
<GoodbyeItem
> {
220 if end_offset
< (size_of
::<GoodbyeItem
>() as u64) {
221 io_bail
!("goodbye tail does not fit");
224 let tail_offset
= end_offset
- (size_of
::<GoodbyeItem
>() as u64);
225 let tail
: GoodbyeItem
= input
.read_entry_at(tail_offset
).await?
;
227 if tail
.hash
!= format
::PXAR_GOODBYE_TAIL_MARKER
{
228 io_bail
!("no goodbye tail marker found");
234 /// Get a decoder for the directory contents.
235 pub(crate) async
fn decode_full(&self) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
236 let (dir
, decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
238 io_bail
!("directory does not seem to be a directory");
243 async
fn get_decoder(
245 entry_range
: Range
<u64>,
246 file_name
: Option
<&Path
>,
247 ) -> io
::Result
<DecoderImpl
<SeqReadAtAdapter
<T
>>> {
248 Ok(DecoderImpl
::new_full(
249 SeqReadAtAdapter
::new(self.input
.clone(), entry_range
),
251 None
=> self.path
.clone(),
252 Some(file
) => self.path
.join(file
),
258 async
fn decode_one_entry(
260 entry_range
: Range
<u64>,
261 file_name
: Option
<&Path
>,
262 ) -> io
::Result
<(Entry
, DecoderImpl
<SeqReadAtAdapter
<T
>>)> {
263 let mut decoder
= self.get_decoder(entry_range
, file_name
).await?
;
267 .ok_or_else(|| io_format_err
!("unexpected EOF while decoding directory entry"))??
;
271 fn lookup_hash_position(&self, hash
: u64) -> Option
<usize> {
272 format
::search_binary_tree_array_by(&self.table
, |i
| hash
.cmp(&i
.hash
))
275 async
fn lookup_self(&self) -> io
::Result
<FileEntryImpl
<T
>> {
276 let (entry
, _decoder
) = self.decode_one_entry(self.entry_range(), None
).await?
;
278 input
: self.input
.clone(),
280 end_offset
: self.end_offset(),
284 /// Lookup a directory entry.
285 pub async
fn lookup(&self, path
: &Path
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
286 let mut cur
: Option
<FileEntryImpl
<T
>> = None
;
288 let mut first
= true;
289 for component
in path
.components() {
290 use std
::path
::Component
;
292 let first
= mem
::replace(&mut first
, false);
294 let component
= match component
{
295 Component
::Normal(path
) => path
,
296 Component
::ParentDir
=> io_bail
!("cannot enter parent directory in archive"),
297 Component
::RootDir
| Component
::CurDir
if first
=> {
298 cur
= Some(self.lookup_self().await?
);
301 Component
::CurDir
=> continue,
302 _
=> io_bail
!("invalid component in path"),
305 let next
= match cur
{
310 .lookup_component(component
)
313 None
=> self.lookup_component(component
).await?
,
326 /// Lookup a single directory entry component (does not handle multiple components in path)
327 pub async
fn lookup_component(&self, path
: &OsStr
) -> io
::Result
<Option
<FileEntryImpl
<T
>>> {
328 let hash
= format
::hash_filename(path
.as_bytes());
329 let index
= match self.lookup_hash_position(hash
) {
330 Some(index
) => index
,
331 None
=> return Ok(None
),
334 // Lookup FILENAME, if it doesn't match increase index, once found, use the GoodbyeItem's
335 // offset+size as well as the file's Entry to return a DirEntry::Dir or Dir::Entry.
337 while index
< self.table
.len() && self.table
[index
].hash
== hash
{
338 let cursor
= self.get_cursor(index
).await?
;
339 if cursor
.file_name
== path
{
340 return Ok(Some(cursor
.get_entry().await?
));
347 async
fn get_cursor
<'a
>(&'a
self, index
: usize) -> io
::Result
<DirEntryImpl
<'a
, T
>> {
348 let entry
= &self.table
[index
];
349 let file_goodbye_ofs
= entry
.offset
;
350 if self.goodbye_ofs
< file_goodbye_ofs
{
351 io_bail
!("invalid file offset");
354 let file_ofs
= self.goodbye_ofs
- file_goodbye_ofs
;
355 let (file_name
, entry_ofs
) = self.read_filename_entry(file_ofs
).await?
;
362 end
: file_ofs
+ entry
.size
,
367 async
fn read_filename_entry(&self, file_ofs
: u64) -> io
::Result
<(PathBuf
, u64)> {
368 let head
: format
::Header
= (&self.input
as &dyn ReadAt
).read_entry_at(file_ofs
).await?
;
369 if head
.htype
!= format
::PXAR_FILENAME
{
370 io_bail
!("expected PXAR_FILENAME header, found: {:x}", head
.htype
);
373 let mut path
= (&self.input
as &dyn ReadAt
)
375 head
.content_size() as usize,
376 file_ofs
+ (size_of_val(&head
) as u64),
380 if path
.pop() != Some(0) {
381 io_bail
!("invalid file name (missing terminating zero)");
385 io_bail
!("invalid empty file name");
388 let file_name
= PathBuf
::from(OsString
::from_vec(path
));
389 format
::check_file_name(&file_name
)?
;
391 Ok((file_name
, file_ofs
+ head
.full_size()))
394 pub fn read_dir(&self) -> ReadDirImpl
<T
> {
395 ReadDirImpl
::new(self, 0)
399 /// A file entry retrieved from a Directory.
400 pub(crate) struct FileEntryImpl
<T
: Clone
+ ReadAt
> {
406 impl<T
: Clone
+ ReadAt
> FileEntryImpl
<T
> {
407 pub async
fn enter_directory(&self) -> io
::Result
<DirectoryImpl
<T
>> {
408 if !self.entry
.is_dir() {
409 io_bail
!("enter_directory() on a non-directory");
412 DirectoryImpl
::open_at_end(self.input
.clone(), self.end_offset
, self.entry
.path
.clone())
416 pub async
fn contents(&self) -> io
::Result
<FileContentsImpl
<T
>> {
417 match self.entry
.kind
{
418 EntryKind
::File { offset: None, .. }
=> {
419 io_bail
!("cannot open file, reader provided no offset")
423 offset
: Some(offset
),
424 } => Ok(FileContentsImpl
::new(
426 offset
..(offset
+ size
),
428 _
=> io_bail
!("not a file"),
433 pub fn into_entry(self) -> Entry
{
438 pub fn entry(&self) -> &Entry
{
443 /// An iterator over the contents of a directory.
444 pub(crate) struct ReadDirImpl
<'a
, T
> {
445 dir
: &'a DirectoryImpl
<T
>,
449 impl<'a
, T
: Clone
+ ReadAt
> ReadDirImpl
<'a
, T
> {
450 fn new(dir
: &'a DirectoryImpl
<T
>, at
: usize) -> Self {
454 /// Get the next entry.
455 pub async
fn next(&mut self) -> io
::Result
<Option
<DirEntryImpl
<'a
, T
>>> {
456 if self.at
== self.dir
.table
.len() {
459 let cursor
= self.dir
.get_cursor(self.at
).await?
;
465 /// Efficient alternative to `Iterator::skip`.
467 pub fn skip(self, n
: usize) -> Self {
469 at
: (self.at
+ n
).min(self.dir
.table
.len()),
474 /// Efficient alternative to `Iterator::count`.
476 pub fn count(self) -> usize {
481 /// A cursor pointing to a file in a directory.
483 /// At this point only the file name has been read and we remembered the position for finding the
484 /// actual data. This can be upgraded into a FileEntryImpl.
485 pub(crate) struct DirEntryImpl
<'a
, T
: Clone
+ ReadAt
> {
486 dir
: &'a DirectoryImpl
<T
>,
488 entry_range
: Range
<u64>,
491 impl<'a
, T
: Clone
+ ReadAt
> DirEntryImpl
<'a
, T
> {
492 pub fn file_name(&self) -> &Path
{
496 async
fn get_entry(&self) -> io
::Result
<FileEntryImpl
<T
>> {
497 let end_offset
= self.entry_range
.end
;
498 let (entry
, _decoder
) = self
500 .decode_one_entry(self.entry_range
.clone(), Some(&self.file_name
))
504 input
: self.dir
.input
.clone(),
511 /// A reader for file contents.
512 pub(crate) struct FileContentsImpl
<T
> {
515 /// Absolute offset inside the `input`.
519 impl<T
: Clone
+ ReadAt
> FileContentsImpl
<T
> {
520 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
521 Self { input, range }
525 pub fn file_size(&self) -> u64 {
526 self.range
.end
- self.range
.start
529 async
fn read_at(&self, mut buf
: &mut [u8], offset
: u64) -> io
::Result
<usize> {
530 let size
= self.file_size();
534 let remaining
= size
- offset
;
536 if remaining
< buf
.len() as u64 {
537 buf
= &mut buf
[..(remaining
as usize)];
540 (&self.input
as &dyn ReadAt
)
541 .read_at(buf
, self.range
.start
+ offset
)
547 pub struct SeqReadAtAdapter
<T
> {
552 impl<T
: ReadAt
> SeqReadAtAdapter
<T
> {
553 pub fn new(input
: T
, range
: Range
<u64>) -> Self {
554 Self { input, range }
558 fn remaining(&self) -> usize {
559 (self.range
.end
- self.range
.start
) as usize
563 impl<T
: ReadAt
> decoder
::SeqRead
for SeqReadAtAdapter
<T
> {
565 self: Pin
<&mut Self>,
568 ) -> Poll
<io
::Result
<usize>> {
569 let len
= buf
.len().min(self.remaining());
570 let buf
= &mut buf
[..len
];
572 let this
= unsafe { self.get_unchecked_mut() }
;
574 let got
= ready
!(unsafe {
575 Pin
::new_unchecked(&this
.input
).poll_read_at(cx
, buf
, this
.range
.start
)
577 this
.range
.start
+= got
as u64;
581 fn poll_position(self: Pin
<&mut Self>, _cx
: &mut Context
) -> Poll
<Option
<io
::Result
<u64>>> {
582 Poll
::Ready(Some(Ok(self.range
.start
)))