]> git.proxmox.com Git - pxar.git/blob - src/accessor.rs
expose 'with_goodbye_tables' decoder options
[pxar.git] / src / accessor.rs
1 //! Random access for PXAR files.
2
3 use std::ffi::{OsStr, OsString};
4 use std::io;
5 use std::mem::{self, size_of, size_of_val, MaybeUninit};
6 use std::ops::Range;
7 use std::os::unix::ffi::{OsStrExt, OsStringExt};
8 use std::path::{Path, PathBuf};
9 use std::pin::Pin;
10 use std::task::{Context, Poll};
11
12 use endian_trait::Endian;
13
14 use crate::binary_tree_array;
15 use crate::decoder::{self, DecoderImpl};
16 use crate::format::{self, GoodbyeItem};
17 use crate::poll_fn::poll_fn;
18 use crate::util;
19 use crate::{Entry, EntryKind};
20
21 #[doc(hidden)]
22 pub mod aio;
23 pub mod sync;
24
25 #[doc(inline)]
26 pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir};
27
28 /// Random access read implementation.
29 pub trait ReadAt {
30 fn poll_read_at(
31 self: Pin<&Self>,
32 cx: &mut Context,
33 buf: &mut [u8],
34 offset: u64,
35 ) -> Poll<io::Result<usize>>;
36 }
37
38 /// We do not want to bother with actual polling, so we implement `async fn` variants of the above
39 /// on `dyn ReadAt`.
40 ///
41 /// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
42 /// we'd then need to define all the `Future` types they return manually and explicitly. Since we
43 /// have no use for them, all we want is the ability to use `async fn`...
44 ///
45 /// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
46 /// decoder's code, but that's fine.
47 impl<'a> dyn ReadAt + 'a {
48 /// awaitable version of `poll_read_at`.
49 async fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result<usize> {
50 poll_fn(|cx| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }).await
51 }
52
53 /// `read_exact_at` - since that's what we _actually_ want most of the time.
54 async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> {
55 while !buf.is_empty() {
56 match self.read_at(buf, offset).await? {
57 0 => io_bail!("unexpected EOF"),
58 got => {
59 buf = &mut buf[got..];
60 offset += got as u64;
61 }
62 }
63 }
64 Ok(())
65 }
66
67 /// Helper to read into an `Endian`-implementing `struct`.
68 async fn read_entry_at<T: Endian>(&self, offset: u64) -> io::Result<T> {
69 let mut data = MaybeUninit::<T>::uninit();
70 let buf =
71 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) };
72 self.read_exact_at(buf, offset).await?;
73 Ok(unsafe { data.assume_init().from_le() })
74 }
75
76 /// Helper to read into an allocated byte vector.
77 async fn read_exact_data_at(&self, size: usize, offset: u64) -> io::Result<Vec<u8>> {
78 let mut data = util::vec_new(size);
79 self.read_exact_at(&mut data[..], offset).await?;
80 Ok(data)
81 }
82 }
83
84 /// Allow using trait objects for `T: ReadAt`
85 impl<'a> ReadAt for &(dyn ReadAt + 'a) {
86 fn poll_read_at(
87 self: Pin<&Self>,
88 cx: &mut Context,
89 buf: &mut [u8],
90 offset: u64,
91 ) -> Poll<io::Result<usize>> {
92 unsafe {
93 self.map_unchecked(|this| *this)
94 .poll_read_at(cx, buf, offset)
95 }
96 }
97 }
98
99 /// The random access state machine implementation.
100 pub(crate) struct AccessorImpl<T> {
101 input: T,
102 size: u64,
103 }
104
105 impl<T: ReadAt> AccessorImpl<T> {
106 pub async fn new(input: T, size: u64) -> io::Result<Self> {
107 if size < (size_of::<GoodbyeItem>() as u64) {
108 io_bail!("too small to contain a pxar archive");
109 }
110 Ok(Self { input, size })
111 }
112
113 pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> {
114 DirectoryImpl::open_at_end(&self.input as &dyn ReadAt, self.size, "/".into()).await
115 }
116 }
117
118 impl<T: Clone + ReadAt> AccessorImpl<T> {
119 pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> {
120 DirectoryImpl::open_at_end(self.input.clone(), self.size, "/".into()).await
121 }
122 }
123
124 /// The directory random-access state machine implementation.
125 pub(crate) struct DirectoryImpl<T> {
126 input: T,
127 entry_ofs: u64,
128 goodbye_ofs: u64,
129 size: u64,
130 table: Box<[GoodbyeItem]>,
131 path: PathBuf,
132 }
133
134 impl<T: Clone + ReadAt> DirectoryImpl<T> {
135 /// Open a directory ending at the specified position.
136 pub(crate) async fn open_at_end(
137 input: T,
138 end_offset: u64,
139 path: PathBuf,
140 ) -> io::Result<DirectoryImpl<T>> {
141 let tail = Self::read_tail_entry(&input, end_offset).await?;
142
143 if end_offset < tail.size {
144 io_bail!("goodbye tail size out of range");
145 }
146
147 let goodbye_ofs = end_offset - tail.size;
148
149 if goodbye_ofs < tail.offset {
150 io_bail!("goodbye offset out of range");
151 }
152
153 let entry_ofs = goodbye_ofs - tail.offset;
154 let size = end_offset - entry_ofs;
155
156 let mut this = Self {
157 input,
158 entry_ofs,
159 goodbye_ofs,
160 size,
161 table: Box::new([]),
162 path,
163 };
164
165 // sanity check:
166 if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
167 io_bail!("invalid goodbye table size: {}", this.table_size());
168 }
169
170 this.table = this.load_table().await?;
171
172 Ok(this)
173 }
174
175 /// Load the entire goodbye table:
176 async fn load_table(&self) -> io::Result<Box<[GoodbyeItem]>> {
177 let len = self.len();
178 let mut data = Vec::with_capacity(self.len());
179 unsafe {
180 data.set_len(len);
181 let slice = std::slice::from_raw_parts_mut(
182 data.as_mut_ptr() as *mut u8,
183 len * size_of::<GoodbyeItem>(),
184 );
185 (&self.input as &dyn ReadAt)
186 .read_exact_at(slice, self.table_offset())
187 .await?;
188 drop(slice);
189 }
190 Ok(data.into_boxed_slice())
191 }
192
193 #[inline]
194 fn end_offset(&self) -> u64 {
195 self.entry_ofs + self.size
196 }
197
198 #[inline]
199 fn entry_range(&self) -> Range<u64> {
200 self.entry_ofs..self.end_offset()
201 }
202
203 #[inline]
204 fn table_size(&self) -> u64 {
205 (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
206 }
207
208 #[inline]
209 fn table_offset(&self) -> u64 {
210 self.goodbye_ofs + (size_of::<format::Header>() as u64)
211 }
212
213 /// Length *excluding* the tail marker!
214 #[inline]
215 fn len(&self) -> usize {
216 (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
217 }
218
219 /// Read the goodbye tail and perform some sanity checks.
220 async fn read_tail_entry(input: &'_ dyn ReadAt, end_offset: u64) -> io::Result<GoodbyeItem> {
221 if end_offset < (size_of::<GoodbyeItem>() as u64) {
222 io_bail!("goodbye tail does not fit");
223 }
224
225 let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
226 let tail: GoodbyeItem = input.read_entry_at(tail_offset).await?;
227
228 if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
229 io_bail!("no goodbye tail marker found");
230 }
231
232 Ok(tail)
233 }
234
235 /// Get a decoder for the directory contents.
236 pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
237 let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?;
238 if !dir.is_dir() {
239 io_bail!("directory does not seem to be a directory");
240 }
241 Ok(decoder)
242 }
243
244 async fn get_decoder(
245 &self,
246 entry_range: Range<u64>,
247 file_name: Option<&Path>,
248 ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
249 Ok(DecoderImpl::new_full(
250 SeqReadAtAdapter::new(self.input.clone(), entry_range),
251 match file_name {
252 None => self.path.clone(),
253 Some(file) => self.path.join(file),
254 },
255 )
256 .await?)
257 }
258
259 async fn decode_one_entry(
260 &self,
261 entry_range: Range<u64>,
262 file_name: Option<&Path>,
263 ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
264 let mut decoder = self.get_decoder(entry_range, file_name).await?;
265 let entry = decoder
266 .next()
267 .await
268 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
269 Ok((entry, decoder))
270 }
271
272 fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> {
273 binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash))
274 }
275
276 async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> {
277 let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?;
278 Ok(FileEntryImpl {
279 input: self.input.clone(),
280 entry,
281 end_offset: self.end_offset(),
282 })
283 }
284
285 /// Lookup a directory entry.
286 pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> {
287 let mut cur: Option<FileEntryImpl<T>> = None;
288
289 let mut first = true;
290 for component in path.components() {
291 use std::path::Component;
292
293 let first = mem::replace(&mut first, false);
294
295 let component = match component {
296 Component::Normal(path) => path,
297 Component::ParentDir => io_bail!("cannot enter parent directory in archive"),
298 Component::RootDir | Component::CurDir if first => {
299 cur = Some(self.lookup_self().await?);
300 continue;
301 }
302 Component::CurDir => continue,
303 _ => io_bail!("invalid component in path"),
304 };
305
306 let next = match cur {
307 Some(entry) => {
308 entry
309 .enter_directory()
310 .await?
311 .lookup_component(component)
312 .await?
313 }
314 None => self.lookup_component(component).await?,
315 };
316
317 if next.is_none() {
318 return Ok(None);
319 }
320
321 cur = next;
322 }
323
324 Ok(cur)
325 }
326
327 /// Lookup a single directory entry component (does not handle multiple components in path)
328 pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> {
329 let hash = format::hash_filename(path.as_bytes());
330 let first_index = match self.lookup_hash_position(hash, 0, 0) {
331 Some(index) => index,
332 None => return Ok(None),
333 };
334
335 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
336 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
337 // a DirEntry::Dir or Dir::Entry.
338 //
339 let mut dup = 0;
340 loop {
341 let index = match self.lookup_hash_position(hash, first_index, dup) {
342 Some(index) => index,
343 None => return Ok(None),
344 };
345
346 let cursor = self.get_cursor(index).await?;
347 if cursor.file_name == path {
348 return Ok(Some(cursor.decode_entry().await?));
349 }
350
351 dup += 1;
352 }
353 }
354
355 async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> {
356 let entry = &self.table[index];
357 let file_goodbye_ofs = entry.offset;
358 if self.goodbye_ofs < file_goodbye_ofs {
359 io_bail!("invalid file offset");
360 }
361
362 let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
363 let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
364
365 let entry_range = Range {
366 start: entry_ofs,
367 end: file_ofs + entry.size,
368 };
369 if entry_range.end < entry_range.start {
370 io_bail!(
371 "bad file: invalid entry ranges for {:?}: \
372 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
373 file_name,
374 entry_ofs,
375 file_ofs,
376 entry.size,
377 );
378 }
379
380 Ok(DirEntryImpl {
381 dir: self,
382 file_name,
383 entry_range,
384 })
385 }
386
387 async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
388 let head: format::Header = (&self.input as &dyn ReadAt).read_entry_at(file_ofs).await?;
389 if head.htype != format::PXAR_FILENAME {
390 io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype);
391 }
392
393 let mut path = (&self.input as &dyn ReadAt)
394 .read_exact_data_at(
395 head.content_size() as usize,
396 file_ofs + (size_of_val(&head) as u64),
397 )
398 .await?;
399
400 if path.pop() != Some(0) {
401 io_bail!("invalid file name (missing terminating zero)");
402 }
403
404 if path.is_empty() {
405 io_bail!("invalid empty file name");
406 }
407
408 let file_name = PathBuf::from(OsString::from_vec(path));
409 format::check_file_name(&file_name)?;
410
411 Ok((file_name, file_ofs + head.full_size()))
412 }
413
414 pub fn read_dir(&self) -> ReadDirImpl<T> {
415 ReadDirImpl::new(self, 0)
416 }
417 }
418
419 /// A file entry retrieved from a Directory.
420 pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
421 input: T,
422 entry: Entry,
423 end_offset: u64,
424 }
425
426 impl<T: Clone + ReadAt> FileEntryImpl<T> {
427 pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> {
428 if !self.entry.is_dir() {
429 io_bail!("enter_directory() on a non-directory");
430 }
431
432 DirectoryImpl::open_at_end(self.input.clone(), self.end_offset, self.entry.path.clone())
433 .await
434 }
435
436 pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
437 match self.entry.kind {
438 EntryKind::File { offset: None, .. } => {
439 io_bail!("cannot open file, reader provided no offset")
440 }
441 EntryKind::File {
442 size,
443 offset: Some(offset),
444 } => Ok(FileContentsImpl::new(
445 self.input.clone(),
446 offset..(offset + size),
447 )),
448 _ => io_bail!("not a file"),
449 }
450 }
451
452 #[inline]
453 pub fn into_entry(self) -> Entry {
454 self.entry
455 }
456
457 #[inline]
458 pub fn entry(&self) -> &Entry {
459 &self.entry
460 }
461 }
462
463 /// An iterator over the contents of a directory.
464 pub(crate) struct ReadDirImpl<'a, T> {
465 dir: &'a DirectoryImpl<T>,
466 at: usize,
467 }
468
469 impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> {
470 fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self {
471 Self { dir, at }
472 }
473
474 /// Get the next entry.
475 pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> {
476 if self.at == self.dir.table.len() {
477 Ok(None)
478 } else {
479 let cursor = self.dir.get_cursor(self.at).await?;
480 self.at += 1;
481 Ok(Some(cursor))
482 }
483 }
484
485 /// Efficient alternative to `Iterator::skip`.
486 #[inline]
487 pub fn skip(self, n: usize) -> Self {
488 Self {
489 at: (self.at + n).min(self.dir.table.len()),
490 dir: self.dir,
491 }
492 }
493
494 /// Efficient alternative to `Iterator::count`.
495 #[inline]
496 pub fn count(self) -> usize {
497 self.dir.table.len()
498 }
499 }
500
501 /// A cursor pointing to a file in a directory.
502 ///
503 /// At this point only the file name has been read and we remembered the position for finding the
504 /// actual data. This can be upgraded into a FileEntryImpl.
505 pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> {
506 dir: &'a DirectoryImpl<T>,
507 file_name: PathBuf,
508 entry_range: Range<u64>,
509 }
510
511 impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
512 pub fn file_name(&self) -> &Path {
513 &self.file_name
514 }
515
516 async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> {
517 let end_offset = self.entry_range.end;
518 let (entry, _decoder) = self
519 .dir
520 .decode_one_entry(self.entry_range.clone(), Some(&self.file_name))
521 .await?;
522
523 Ok(FileEntryImpl {
524 input: self.dir.input.clone(),
525 entry,
526 end_offset,
527 })
528 }
529 }
530
531 /// A reader for file contents.
532 pub(crate) struct FileContentsImpl<T> {
533 input: T,
534
535 /// Absolute offset inside the `input`.
536 range: Range<u64>,
537 }
538
539 impl<T: Clone + ReadAt> FileContentsImpl<T> {
540 pub fn new(input: T, range: Range<u64>) -> Self {
541 Self { input, range }
542 }
543
544 #[inline]
545 pub fn file_size(&self) -> u64 {
546 self.range.end - self.range.start
547 }
548
549 async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> {
550 let size = self.file_size();
551 if offset >= size {
552 return Ok(0);
553 }
554 let remaining = size - offset;
555
556 if remaining < buf.len() as u64 {
557 buf = &mut buf[..(remaining as usize)];
558 }
559
560 (&self.input as &dyn ReadAt)
561 .read_at(buf, self.range.start + offset)
562 .await
563 }
564 }
565
566 #[doc(hidden)]
567 pub struct SeqReadAtAdapter<T> {
568 input: T,
569 range: Range<u64>,
570 }
571
572 impl<T: ReadAt> SeqReadAtAdapter<T> {
573 pub fn new(input: T, range: Range<u64>) -> Self {
574 if range.end < range.start {
575 panic!("BAD SEQ READ AT ADAPTER");
576 }
577 Self { input, range }
578 }
579
580 #[inline]
581 fn remaining(&self) -> usize {
582 (self.range.end - self.range.start) as usize
583 }
584 }
585
586 impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> {
587 fn poll_seq_read(
588 self: Pin<&mut Self>,
589 cx: &mut Context,
590 buf: &mut [u8],
591 ) -> Poll<io::Result<usize>> {
592 let len = buf.len().min(self.remaining());
593 let buf = &mut buf[..len];
594
595 let this = unsafe { self.get_unchecked_mut() };
596
597 let got = ready!(unsafe {
598 Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start)
599 })?;
600 this.range.start += got as u64;
601 Poll::Ready(Ok(got))
602 }
603
604 fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
605 Poll::Ready(Some(Ok(self.range.start)))
606 }
607 }