]> git.proxmox.com Git - pxar.git/blame - src/accessor/mod.rs
trivial clippy fixes
[pxar.git] / src / accessor / mod.rs
CommitLineData
6cd4f635
WB
1//! Random access for PXAR files.
2
e5a2495e
WB
3#![deny(missing_docs)]
4
dc4a2854 5use std::ffi::{OsStr, OsString};
e72062a9 6use std::future::Future;
6cd4f635 7use std::io;
dc4a2854 8use std::mem::{self, size_of, size_of_val, MaybeUninit};
6cd4f635
WB
9use std::ops::Range;
10use std::os::unix::ffi::{OsStrExt, OsStringExt};
11use std::path::{Path, PathBuf};
12use std::pin::Pin;
9d8af6f2 13use std::sync::Arc;
6cd4f635
WB
14use std::task::{Context, Poll};
15
16use endian_trait::Endian;
17
fbddffdc 18use crate::binary_tree_array;
6cd4f635
WB
19use crate::decoder::{self, DecoderImpl};
20use crate::format::{self, GoodbyeItem};
6cd4f635 21use crate::util;
98b894a9 22use crate::{Entry, EntryKind};
6cd4f635
WB
23
24pub mod aio;
9d8af6f2 25pub mod cache;
6cd4f635
WB
26pub mod sync;
27
e72062a9
WB
28pub mod read_at;
29
6cd4f635 30#[doc(inline)]
2c23bd09 31pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir};
6cd4f635 32
e72062a9
WB
33#[doc(inline)]
34pub use read_at::{MaybeReady, ReadAt, ReadAtExt, ReadAtOperation};
35
9d8af6f2
WB
36use cache::Cache;
37
06070d26
WB
38/// Range information used for unsafe raw random access:
39#[derive(Clone, Debug)]
40pub struct EntryRangeInfo {
e5a2495e 41 /// Offset to the `FILENAME` header.
06070d26 42 pub filename_header_offset: Option<u64>,
e5a2495e 43 /// Byte range spanning an entry in a pxar archive.
06070d26
WB
44 pub entry_range: Range<u64>,
45}
46
47impl EntryRangeInfo {
e5a2495e 48 /// Shortcut to create the "toplevel" range info without file name header offset.
06070d26
WB
49 pub fn toplevel(entry_range: Range<u64>) -> Self {
50 Self {
51 filename_header_offset: None,
52 entry_range,
53 }
54 }
55}
56
e72062a9 57/// awaitable version of `ReadAt`.
c68a29b5
WB
58async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize>
59where
e72062a9 60 T: ReadAtExt,
c68a29b5 61{
e72062a9 62 input.read_at(buf, offset).await
c68a29b5
WB
63}
64
65/// `read_exact_at` - since that's what we _actually_ want most of the time.
66async fn read_exact_at<T>(input: &T, mut buf: &mut [u8], mut offset: u64) -> io::Result<()>
67where
e72062a9 68 T: ReadAt,
c68a29b5
WB
69{
70 while !buf.is_empty() {
71 match read_at(input, buf, offset).await? {
72 0 => io_bail!("unexpected EOF"),
73 got => {
74 buf = &mut buf[got..];
75 offset += got as u64;
6cd4f635
WB
76 }
77 }
6cd4f635 78 }
c68a29b5
WB
79 Ok(())
80}
6cd4f635 81
c68a29b5
WB
82/// Helper to read into an `Endian`-implementing `struct`.
83async fn read_entry_at<T, E: Endian>(input: &T, offset: u64) -> io::Result<E>
84where
e72062a9 85 T: ReadAt,
c68a29b5
WB
86{
87 let mut data = MaybeUninit::<E>::uninit();
88 let buf =
89 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) };
90 read_exact_at(input, buf, offset).await?;
91 Ok(unsafe { data.assume_init().from_le() })
92}
6cd4f635 93
c68a29b5
WB
94/// Helper to read into an allocated byte vector.
95async fn read_exact_data_at<T>(input: &T, size: usize, offset: u64) -> io::Result<Vec<u8>>
96where
e72062a9 97 T: ReadAt,
c68a29b5 98{
81d50029 99 let mut data = unsafe { util::vec_new_uninitialized(size) };
c68a29b5
WB
100 read_exact_at(input, &mut data[..], offset).await?;
101 Ok(data)
6cd4f635
WB
102}
103
29c17fc0 104/// Allow using trait objects for `T: ReadAt`
e72062a9
WB
105impl<'d> ReadAt for &(dyn ReadAt + 'd) {
106 fn start_read_at<'a>(
107 self: Pin<&'a Self>,
29c17fc0 108 cx: &mut Context,
e72062a9 109 buf: &'a mut [u8],
bb6540d7 110 offset: u64,
e72062a9
WB
111 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
112 unsafe { Pin::new_unchecked(&**self).start_read_at(cx, buf, offset) }
113 }
114
115 fn poll_complete<'a>(
116 self: Pin<&'a Self>,
117 op: ReadAtOperation<'a>,
118 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
119 unsafe { Pin::new_unchecked(&**self).poll_complete(op) }
bb6540d7
WB
120 }
121}
122
123/// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires
124/// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that
125/// implemments `ReadAt` for type monomorphization.
126impl ReadAt for Arc<dyn ReadAt + Send + Sync + 'static> {
e72062a9
WB
127 fn start_read_at<'a>(
128 self: Pin<&'a Self>,
bb6540d7 129 cx: &mut Context,
e72062a9 130 buf: &'a mut [u8],
29c17fc0 131 offset: u64,
e72062a9
WB
132 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
133 unsafe {
134 self.map_unchecked(|this| &**this)
135 .start_read_at(cx, buf, offset)
136 }
137 }
138
139 fn poll_complete<'a>(
140 self: Pin<&'a Self>,
141 op: ReadAtOperation<'a>,
142 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
143 unsafe { self.map_unchecked(|this| &**this).poll_complete(op) }
29c17fc0
WB
144 }
145}
146
cde0236c
WB
147/// Convenience impl for in-memory byte slices.
148impl ReadAt for &'_ [u8] {
149 fn start_read_at<'a>(
150 self: Pin<&'a Self>,
151 _cx: &mut Context,
152 buf: &'a mut [u8],
153 offset: u64,
154 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
155 if offset >= self.len() as u64 {
156 return MaybeReady::Ready(Ok(0));
157 }
158
159 let offset = offset as usize;
160 let end = (offset + buf.len()).min(self.len());
161 let size = end - offset;
162 buf[..size].copy_from_slice(&self[offset..end]);
163 MaybeReady::Ready(Ok(size))
164 }
165
166 fn poll_complete<'a>(
167 self: Pin<&'a Self>,
168 _op: ReadAtOperation<'a>,
169 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
170 panic!("start_read_at on byte slice returned Pending");
171 }
172}
173
81d50029 174#[derive(Clone, Default)]
9d8af6f2
WB
175struct Caches {
176 /// The goodbye table cache maps goodbye table offsets to cache entries.
177 gbt_cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
178}
179
6cd4f635 180/// The random access state machine implementation.
5cf335be 181pub(crate) struct AccessorImpl<T> {
6cd4f635
WB
182 input: T,
183 size: u64,
9d8af6f2 184 caches: Arc<Caches>,
6cd4f635
WB
185}
186
187impl<T: ReadAt> AccessorImpl<T> {
188 pub async fn new(input: T, size: u64) -> io::Result<Self> {
189 if size < (size_of::<GoodbyeItem>() as u64) {
190 io_bail!("too small to contain a pxar archive");
191 }
9d8af6f2
WB
192
193 Ok(Self {
194 input,
195 size,
196 caches: Arc::new(Caches::default()),
197 })
6cd4f635
WB
198 }
199
a2530fb7
WB
200 pub fn size(&self) -> u64 {
201 self.size
202 }
203
1b25fc08 204 pub async fn open_root_ref(&self) -> io::Result<DirectoryImpl<&dyn ReadAt>> {
9d8af6f2
WB
205 DirectoryImpl::open_at_end(
206 &self.input as &dyn ReadAt,
207 self.size,
208 "/".into(),
209 Arc::clone(&self.caches),
210 )
211 .await
29c17fc0 212 }
b764a2b1
WB
213
214 pub fn set_goodbye_table_cache(
215 &mut self,
216 cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
217 ) {
218 let new_caches = Arc::new(Caches {
219 gbt_cache: cache,
1b25fc08 220 //..*self.caches
b764a2b1
WB
221 });
222 self.caches = new_caches;
223 }
29c17fc0
WB
224}
225
6b9e2478
WB
226async fn get_decoder<T: ReadAt>(
227 input: T,
228 entry_range: Range<u64>,
229 path: PathBuf,
230) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
a50514a9 231 DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path, true).await
6b9e2478
WB
232}
233
06070d26
WB
234// NOTE: This performs the Decoder::read_next_item() behavior! Keep in mind when changing!
235async fn get_decoder_at_filename<T: ReadAt>(
236 input: T,
237 entry_range: Range<u64>,
238 path: PathBuf,
239) -> io::Result<(DecoderImpl<SeqReadAtAdapter<T>>, u64)> {
1187920f
WB
240 // Read the header, it should be a FILENAME, then skip over it and its length:
241 let header: format::Header = read_entry_at(&input, entry_range.start).await?;
242 header.check_header_size()?;
243
244 if header.htype != format::PXAR_FILENAME {
245 io_bail!("expected filename entry, got {:?}", header);
06070d26 246 }
1187920f
WB
247
248 let entry_offset = entry_range.start + header.full_size();
249 if entry_offset >= entry_range.end {
250 io_bail!("filename exceeds current file range");
06070d26 251 }
1187920f 252
e72062a9
WB
253 Ok((
254 get_decoder(input, entry_offset..entry_range.end, path).await?,
255 entry_offset,
256 ))
06070d26
WB
257}
258
29c17fc0
WB
259impl<T: Clone + ReadAt> AccessorImpl<T> {
260 pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> {
9d8af6f2
WB
261 DirectoryImpl::open_at_end(
262 self.input.clone(),
263 self.size,
264 "/".into(),
265 Arc::clone(&self.caches),
266 )
267 .await
6cd4f635 268 }
ceb83806
WB
269
270 /// Allow opening a directory at a specified offset.
271 pub async unsafe fn open_dir_at_end(&self, offset: u64) -> io::Result<DirectoryImpl<T>> {
272 DirectoryImpl::open_at_end(
273 self.input.clone(),
274 offset,
275 "/".into(),
276 Arc::clone(&self.caches),
277 )
278 .await
279 }
6b9e2478
WB
280
281 /// Allow opening a regular file from a specified range.
282 pub async unsafe fn open_file_at_range(
283 &self,
06070d26 284 entry_range_info: &EntryRangeInfo,
6b9e2478 285 ) -> io::Result<FileEntryImpl<T>> {
06070d26
WB
286 let mut decoder = get_decoder(
287 self.input.clone(),
288 entry_range_info.entry_range.clone(),
289 PathBuf::new(),
1250e3ea
WB
290 )
291 .await?;
6b9e2478
WB
292 let entry = decoder
293 .next()
294 .await
295 .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
296 Ok(FileEntryImpl {
297 input: self.input.clone(),
298 entry,
06070d26 299 entry_range_info: entry_range_info.clone(),
6b9e2478
WB
300 caches: Arc::clone(&self.caches),
301 })
302 }
303
304 /// Allow opening arbitrary contents from a specific range.
305 pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> {
306 FileContentsImpl::new(self.input.clone(), range)
307 }
6bfadb8a
WB
308
309 /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will
310 /// never know the actual length of the target entry until we're done decoding it, so this
311 /// needs to happen at the accessor level, rather than a "sub-entry-reader".
06070d26
WB
312 pub async fn follow_hardlink(&self, entry: &FileEntryImpl<T>) -> io::Result<FileEntryImpl<T>> {
313 let link_offset = match entry.entry.kind() {
314 EntryKind::Hardlink(link) => link.offset,
315 _ => io_bail!("cannot resolve a non-hardlink"),
316 };
317
318 let entry_file_offset = entry
319 .entry_range_info
320 .filename_header_offset
321 .ok_or_else(|| io_format_err!("cannot follow hardlink without a file entry header"))?;
322
323 if link_offset > entry_file_offset {
324 io_bail!("invalid offset in hardlink");
325 }
326
327 let link_offset = entry_file_offset - link_offset;
328
1250e3ea
WB
329 let (mut decoder, entry_offset) =
330 get_decoder_at_filename(self.input.clone(), link_offset..self.size, PathBuf::new())
331 .await?;
06070d26 332
6bfadb8a
WB
333 let entry = decoder
334 .next()
335 .await
336 .ok_or_else(|| io_format_err!("unexpected EOF while following a hardlink"))??;
1187920f 337
6bfadb8a
WB
338 match entry.kind() {
339 EntryKind::File { offset: None, .. } => {
340 io_bail!("failed to follow hardlink, reader provided no offsets");
341 }
b0487d4f
WB
342 EntryKind::File {
343 offset: Some(offset),
344 size,
345 } => {
06070d26
WB
346 let meta_size = offset - link_offset;
347 let entry_end = link_offset + meta_size + size;
6bfadb8a
WB
348 Ok(FileEntryImpl {
349 input: self.input.clone(),
350 entry,
06070d26
WB
351 entry_range_info: EntryRangeInfo {
352 filename_header_offset: Some(link_offset),
353 entry_range: entry_offset..entry_end,
354 },
6bfadb8a
WB
355 caches: Arc::clone(&self.caches),
356 })
357 }
358 _ => io_bail!("hardlink does not point to a regular file"),
359 }
360 }
6cd4f635
WB
361}
362
363/// The directory random-access state machine implementation.
5cf335be 364pub(crate) struct DirectoryImpl<T> {
29c17fc0 365 input: T,
6cd4f635
WB
366 entry_ofs: u64,
367 goodbye_ofs: u64,
368 size: u64,
9d8af6f2 369 table: Arc<[GoodbyeItem]>,
6cd4f635 370 path: PathBuf,
9d8af6f2 371 caches: Arc<Caches>,
6cd4f635
WB
372}
373
29c17fc0 374impl<T: Clone + ReadAt> DirectoryImpl<T> {
6cd4f635 375 /// Open a directory ending at the specified position.
9d8af6f2 376 async fn open_at_end(
29c17fc0 377 input: T,
6cd4f635
WB
378 end_offset: u64,
379 path: PathBuf,
9d8af6f2 380 caches: Arc<Caches>,
29c17fc0
WB
381 ) -> io::Result<DirectoryImpl<T>> {
382 let tail = Self::read_tail_entry(&input, end_offset).await?;
6cd4f635
WB
383
384 if end_offset < tail.size {
385 io_bail!("goodbye tail size out of range");
386 }
387
388 let goodbye_ofs = end_offset - tail.size;
389
390 if goodbye_ofs < tail.offset {
391 io_bail!("goodbye offset out of range");
392 }
393
394 let entry_ofs = goodbye_ofs - tail.offset;
395 let size = end_offset - entry_ofs;
396
9d8af6f2
WB
397 let table: Option<Arc<[GoodbyeItem]>> = caches
398 .gbt_cache
399 .as_ref()
400 .and_then(|cache| cache.fetch(goodbye_ofs));
401
6cd4f635
WB
402 let mut this = Self {
403 input,
404 entry_ofs,
405 goodbye_ofs,
406 size,
9d8af6f2 407 table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone),
6cd4f635 408 path,
9d8af6f2 409 caches,
6cd4f635
WB
410 };
411
412 // sanity check:
413 if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
414 io_bail!("invalid goodbye table size: {}", this.table_size());
415 }
416
9d8af6f2
WB
417 if table.is_none() {
418 this.table = this.load_table().await?;
419 if let Some(ref cache) = this.caches.gbt_cache {
420 cache.insert(goodbye_ofs, Arc::clone(&this.table));
421 }
422 }
6cd4f635
WB
423
424 Ok(this)
425 }
426
427 /// Load the entire goodbye table:
9d8af6f2 428 async fn load_table(&self) -> io::Result<Arc<[GoodbyeItem]>> {
6cd4f635 429 let len = self.len();
81d50029 430 let mut data;
6cd4f635 431 unsafe {
81d50029 432 data = crate::util::vec_new_uninitialized(self.len());
6cd4f635
WB
433 let slice = std::slice::from_raw_parts_mut(
434 data.as_mut_ptr() as *mut u8,
2c23bd09 435 len * size_of::<GoodbyeItem>(),
6cd4f635 436 );
c68a29b5 437 read_exact_at(&self.input, slice, self.table_offset()).await?;
6cd4f635 438 }
9d8af6f2 439 Ok(Arc::from(data))
6cd4f635
WB
440 }
441
442 #[inline]
443 fn end_offset(&self) -> u64 {
444 self.entry_ofs + self.size
445 }
446
dc4a2854
WB
447 #[inline]
448 fn entry_range(&self) -> Range<u64> {
449 self.entry_ofs..self.end_offset()
450 }
451
6cd4f635
WB
452 #[inline]
453 fn table_size(&self) -> u64 {
454 (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
455 }
456
457 #[inline]
458 fn table_offset(&self) -> u64 {
459 self.goodbye_ofs + (size_of::<format::Header>() as u64)
460 }
461
462 /// Length *excluding* the tail marker!
463 #[inline]
464 fn len(&self) -> usize {
465 (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
466 }
467
468 /// Read the goodbye tail and perform some sanity checks.
c68a29b5 469 async fn read_tail_entry(input: &T, end_offset: u64) -> io::Result<GoodbyeItem> {
6cd4f635
WB
470 if end_offset < (size_of::<GoodbyeItem>() as u64) {
471 io_bail!("goodbye tail does not fit");
472 }
473
474 let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
c68a29b5 475 let tail: GoodbyeItem = read_entry_at(input, tail_offset).await?;
6cd4f635
WB
476
477 if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
478 io_bail!("no goodbye tail marker found");
479 }
480
481 Ok(tail)
482 }
483
484 /// Get a decoder for the directory contents.
29c17fc0 485 pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
dc4a2854 486 let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?;
6cd4f635
WB
487 if !dir.is_dir() {
488 io_bail!("directory does not seem to be a directory");
489 }
490 Ok(decoder)
491 }
492
493 async fn get_decoder(
494 &self,
495 entry_range: Range<u64>,
496 file_name: Option<&Path>,
29c17fc0 497 ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
6b9e2478
WB
498 get_decoder(
499 self.input.clone(),
500 entry_range,
6cd4f635
WB
501 match file_name {
502 None => self.path.clone(),
503 Some(file) => self.path.join(file),
504 },
d3a83ee3
WB
505 )
506 .await
6cd4f635
WB
507 }
508
509 async fn decode_one_entry(
510 &self,
511 entry_range: Range<u64>,
512 file_name: Option<&Path>,
29c17fc0 513 ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
6cd4f635
WB
514 let mut decoder = self.get_decoder(entry_range, file_name).await?;
515 let entry = decoder
516 .next()
517 .await
518 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
519 Ok((entry, decoder))
520 }
521
fbddffdc
WB
522 fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> {
523 binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash))
6cd4f635
WB
524 }
525
a5922fbc 526 pub async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> {
c76d3f98 527 let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?;
dc4a2854
WB
528 Ok(FileEntryImpl {
529 input: self.input.clone(),
530 entry,
06070d26
WB
531 entry_range_info: EntryRangeInfo {
532 filename_header_offset: None,
533 entry_range: self.entry_range(),
534 },
9d8af6f2 535 caches: Arc::clone(&self.caches),
dc4a2854
WB
536 })
537 }
538
6cd4f635 539 /// Lookup a directory entry.
29c17fc0 540 pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> {
dc4a2854
WB
541 let mut cur: Option<FileEntryImpl<T>> = None;
542
543 let mut first = true;
544 for component in path.components() {
545 use std::path::Component;
546
547 let first = mem::replace(&mut first, false);
548
549 let component = match component {
550 Component::Normal(path) => path,
551 Component::ParentDir => io_bail!("cannot enter parent directory in archive"),
552 Component::RootDir | Component::CurDir if first => {
553 cur = Some(self.lookup_self().await?);
554 continue;
555 }
556 Component::CurDir => continue,
557 _ => io_bail!("invalid component in path"),
558 };
559
560 let next = match cur {
561 Some(entry) => {
562 entry
563 .enter_directory()
564 .await?
565 .lookup_component(component)
566 .await?
567 }
568 None => self.lookup_component(component).await?,
569 };
570
571 if next.is_none() {
572 return Ok(None);
573 }
574
575 cur = next;
576 }
577
578 Ok(cur)
579 }
580
581 /// Lookup a single directory entry component (does not handle multiple components in path)
582 pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> {
583 let hash = format::hash_filename(path.as_bytes());
fbddffdc 584 let first_index = match self.lookup_hash_position(hash, 0, 0) {
6cd4f635
WB
585 Some(index) => index,
586 None => return Ok(None),
587 };
588
fbddffdc
WB
589 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
590 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
591 // a DirEntry::Dir or Dir::Entry.
592 //
593 let mut dup = 0;
594 loop {
595 let index = match self.lookup_hash_position(hash, first_index, dup) {
596 Some(index) => index,
597 None => return Ok(None),
598 };
6cd4f635 599
6cd4f635
WB
600 let cursor = self.get_cursor(index).await?;
601 if cursor.file_name == path {
aabb78a4 602 return Ok(Some(cursor.decode_entry().await?));
6cd4f635 603 }
6cd4f635 604
fbddffdc
WB
605 dup += 1;
606 }
6cd4f635
WB
607 }
608
1b25fc08
WB
609 // while clippy is technically right about this, the compiler won't accept it (yet)
610 #[allow(clippy::needless_lifetimes)]
29c17fc0 611 async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> {
6cd4f635
WB
612 let entry = &self.table[index];
613 let file_goodbye_ofs = entry.offset;
614 if self.goodbye_ofs < file_goodbye_ofs {
615 io_bail!("invalid file offset");
616 }
617
618 let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
619 let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
620
70acf637
WB
621 let entry_range = Range {
622 start: entry_ofs,
623 end: file_ofs + entry.size,
624 };
625 if entry_range.end < entry_range.start {
626 io_bail!(
627 "bad file: invalid entry ranges for {:?}: \
628 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
629 file_name,
630 entry_ofs,
631 file_ofs,
632 entry.size,
633 );
634 }
635
6cd4f635
WB
636 Ok(DirEntryImpl {
637 dir: self,
638 file_name,
06070d26
WB
639 entry_range_info: EntryRangeInfo {
640 filename_header_offset: Some(file_ofs),
641 entry_range,
642 },
9d8af6f2 643 caches: Arc::clone(&self.caches),
6cd4f635
WB
644 })
645 }
646
647 async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
c68a29b5 648 let head: format::Header = read_entry_at(&self.input, file_ofs).await?;
6cd4f635 649 if head.htype != format::PXAR_FILENAME {
4a13b8a3 650 io_bail!("expected PXAR_FILENAME header, found: {}", head);
6cd4f635
WB
651 }
652
c68a29b5
WB
653 let mut path = read_exact_data_at(
654 &self.input,
655 head.content_size() as usize,
656 file_ofs + (size_of_val(&head) as u64),
657 )
658 .await?;
6cd4f635
WB
659
660 if path.pop() != Some(0) {
661 io_bail!("invalid file name (missing terminating zero)");
662 }
663
f3ac1c51 664 crate::util::validate_filename(&path)?;
bd99958c 665
6cd4f635
WB
666 let file_name = PathBuf::from(OsString::from_vec(path));
667 format::check_file_name(&file_name)?;
668
669 Ok((file_name, file_ofs + head.full_size()))
670 }
671
29c17fc0 672 pub fn read_dir(&self) -> ReadDirImpl<T> {
6cd4f635
WB
673 ReadDirImpl::new(self, 0)
674 }
d3a83ee3
WB
675
676 pub fn entry_count(&self) -> usize {
677 self.table.len()
678 }
6cd4f635
WB
679}
680
681/// A file entry retrieved from a Directory.
93fa37fb 682#[derive(Clone)]
5cf335be 683pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
29c17fc0 684 input: T,
6cd4f635 685 entry: Entry,
06070d26 686 entry_range_info: EntryRangeInfo,
9d8af6f2 687 caches: Arc<Caches>,
6cd4f635
WB
688}
689
29c17fc0
WB
690impl<T: Clone + ReadAt> FileEntryImpl<T> {
691 pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> {
6cd4f635
WB
692 if !self.entry.is_dir() {
693 io_bail!("enter_directory() on a non-directory");
694 }
695
9d8af6f2
WB
696 DirectoryImpl::open_at_end(
697 self.input.clone(),
06070d26 698 self.entry_range_info.entry_range.end,
9d8af6f2
WB
699 self.entry.path.clone(),
700 Arc::clone(&self.caches),
701 )
702 .await
6cd4f635
WB
703 }
704
6b9e2478
WB
705 /// For use with unsafe accessor methods.
706 pub fn content_range(&self) -> io::Result<Option<Range<u64>>> {
98b894a9 707 match self.entry.kind {
c76d3f98
WB
708 EntryKind::File { offset: None, .. } => {
709 io_bail!("cannot open file, reader provided no offset")
710 }
711 EntryKind::File {
712 size,
713 offset: Some(offset),
6b9e2478
WB
714 } => Ok(Some(offset..(offset + size))),
715 _ => Ok(None),
716 }
717 }
718
719 pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
720 match self.content_range()? {
721 Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)),
722 None => io_bail!("not a file"),
98b894a9
WB
723 }
724 }
725
6cd4f635
WB
726 #[inline]
727 pub fn into_entry(self) -> Entry {
728 self.entry
729 }
730
731 #[inline]
732 pub fn entry(&self) -> &Entry {
733 &self.entry
734 }
ceb83806
WB
735
736 /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
737 #[inline]
06070d26
WB
738 pub fn entry_range_info(&self) -> &EntryRangeInfo {
739 &self.entry_range_info
ceb83806 740 }
6cd4f635
WB
741}
742
743/// An iterator over the contents of a directory.
5cf335be 744pub(crate) struct ReadDirImpl<'a, T> {
29c17fc0 745 dir: &'a DirectoryImpl<T>,
6cd4f635
WB
746 at: usize,
747}
748
29c17fc0 749impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> {
5cf335be 750 fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self {
6cd4f635
WB
751 Self { dir, at }
752 }
753
98b894a9 754 /// Get the next entry.
29c17fc0 755 pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> {
6cd4f635
WB
756 if self.at == self.dir.table.len() {
757 Ok(None)
758 } else {
759 let cursor = self.dir.get_cursor(self.at).await?;
760 self.at += 1;
761 Ok(Some(cursor))
762 }
763 }
98b894a9
WB
764
765 /// Efficient alternative to `Iterator::skip`.
766 #[inline]
767 pub fn skip(self, n: usize) -> Self {
768 Self {
769 at: (self.at + n).min(self.dir.table.len()),
770 dir: self.dir,
771 }
772 }
773
774 /// Efficient alternative to `Iterator::count`.
775 #[inline]
776 pub fn count(self) -> usize {
777 self.dir.table.len()
778 }
6cd4f635
WB
779}
780
781/// A cursor pointing to a file in a directory.
782///
783/// At this point only the file name has been read and we remembered the position for finding the
784/// actual data. This can be upgraded into a FileEntryImpl.
5cf335be 785pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> {
29c17fc0 786 dir: &'a DirectoryImpl<T>,
6cd4f635 787 file_name: PathBuf,
06070d26 788 entry_range_info: EntryRangeInfo,
9d8af6f2 789 caches: Arc<Caches>,
6cd4f635
WB
790}
791
29c17fc0 792impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
6cd4f635
WB
793 pub fn file_name(&self) -> &Path {
794 &self.file_name
795 }
796
aabb78a4 797 async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> {
c76d3f98 798 let (entry, _decoder) = self
6cd4f635 799 .dir
1250e3ea
WB
800 .decode_one_entry(
801 self.entry_range_info.entry_range.clone(),
802 Some(&self.file_name),
803 )
6cd4f635 804 .await?;
6cd4f635
WB
805
806 Ok(FileEntryImpl {
29c17fc0 807 input: self.dir.input.clone(),
6cd4f635 808 entry,
06070d26 809 entry_range_info: self.entry_range_info.clone(),
9d8af6f2 810 caches: Arc::clone(&self.caches),
6cd4f635
WB
811 })
812 }
ceb83806
WB
813
814 /// Exposed for raw by-offset access methods.
815 #[inline]
06070d26
WB
816 pub fn entry_range_info(&self) -> &EntryRangeInfo {
817 &self.entry_range_info
ceb83806 818 }
6cd4f635
WB
819}
820
98b894a9 821/// A reader for file contents.
e72062a9 822#[derive(Clone)]
5cf335be 823pub(crate) struct FileContentsImpl<T> {
98b894a9
WB
824 input: T,
825
826 /// Absolute offset inside the `input`.
827 range: Range<u64>,
828}
829
830impl<T: Clone + ReadAt> FileContentsImpl<T> {
831 pub fn new(input: T, range: Range<u64>) -> Self {
832 Self { input, range }
833 }
834
835 #[inline]
836 pub fn file_size(&self) -> u64 {
837 self.range.end - self.range.start
838 }
839
840 async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> {
841 let size = self.file_size();
842 if offset >= size {
843 return Ok(0);
844 }
845 let remaining = size - offset;
846
847 if remaining < buf.len() as u64 {
848 buf = &mut buf[..(remaining as usize)];
849 }
850
c68a29b5 851 read_at(&self.input, buf, self.range.start + offset).await
98b894a9
WB
852 }
853}
854
d3a83ee3 855impl<T: Clone + ReadAt> ReadAt for FileContentsImpl<T> {
e72062a9
WB
856 fn start_read_at<'a>(
857 self: Pin<&'a Self>,
d3a83ee3 858 cx: &mut Context,
e72062a9 859 mut buf: &'a mut [u8],
d3a83ee3 860 offset: u64,
e72062a9 861 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
d3a83ee3
WB
862 let size = self.file_size();
863 if offset >= size {
e72062a9 864 return MaybeReady::Ready(Ok(0));
d3a83ee3
WB
865 }
866 let remaining = size - offset;
867
868 if remaining < buf.len() as u64 {
869 buf = &mut buf[..(remaining as usize)];
870 }
871
872 let offset = self.range.start + offset;
e72062a9
WB
873 unsafe { self.map_unchecked(|this| &this.input) }.start_read_at(cx, buf, offset)
874 }
875
876 fn poll_complete<'a>(
877 self: Pin<&'a Self>,
878 op: ReadAtOperation<'a>,
879 ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> {
880 unsafe { self.map_unchecked(|this| &this.input) }.poll_complete(op)
d3a83ee3
WB
881 }
882}
883
1b25fc08
WB
884/// File content read future result.
885struct ReadResult {
886 len: usize,
887 buffer: Vec<u8>,
888}
889
e5a2495e
WB
890/// A `SeqRead` adapter for a specific range inside another reader, with a temporary buffer due
891/// to lifetime constraints.
6cd4f635 892#[doc(hidden)]
29c17fc0
WB
893pub struct SeqReadAtAdapter<T> {
894 input: T,
6cd4f635 895 range: Range<u64>,
e72062a9 896 buffer: Vec<u8>,
1b25fc08 897 future: Option<Pin<Box<dyn Future<Output = io::Result<ReadResult>> + 'static>>>,
e72062a9
WB
898}
899
900// We lose `Send` via the boxed trait object and don't want to force the trait object to
901// potentially be more strict than `T`, so we leave it as it is ans implement Send and Sync
902// depending on T.
81d50029 903#[allow(clippy::non_send_fields_in_send_ty)]
e72062a9
WB
904unsafe impl<T: Send> Send for SeqReadAtAdapter<T> {}
905unsafe impl<T: Sync> Sync for SeqReadAtAdapter<T> {}
906
907impl<T> Drop for SeqReadAtAdapter<T> {
908 fn drop(&mut self) {
909 // drop order
910 self.future = None;
911 }
6cd4f635
WB
912}
913
29c17fc0 914impl<T: ReadAt> SeqReadAtAdapter<T> {
e5a2495e 915 /// Create a new `SeqRead` adapter given a range.
29c17fc0 916 pub fn new(input: T, range: Range<u64>) -> Self {
70acf637
WB
917 if range.end < range.start {
918 panic!("BAD SEQ READ AT ADAPTER");
919 }
e72062a9
WB
920 Self {
921 input,
922 range,
923 buffer: Vec::new(),
924 future: None,
925 }
6cd4f635
WB
926 }
927
928 #[inline]
929 fn remaining(&self) -> usize {
930 (self.range.end - self.range.start) as usize
931 }
932}
933
29c17fc0 934impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> {
6cd4f635
WB
935 fn poll_seq_read(
936 self: Pin<&mut Self>,
937 cx: &mut Context,
e72062a9 938 dest: &mut [u8],
6cd4f635 939 ) -> Poll<io::Result<usize>> {
e72062a9
WB
940 let len = dest.len().min(self.remaining());
941 let dest = &mut dest[..len];
6cd4f635 942
29c17fc0 943 let this = unsafe { self.get_unchecked_mut() };
e72062a9
WB
944 loop {
945 match this.future.take() {
946 None => {
947 let mut buffer = mem::take(&mut this.buffer);
948 util::scale_read_buffer(&mut buffer, dest.len());
949
950 // Note that we're pinned and we have a drop-handler which forces self.future
951 // to be dropped before `input`, so putting a reference to self.input into the
952 // future should be ok!
953 let reader = &this.input;
954
955 let at = this.range.start;
1b25fc08 956 let future: Pin<Box<dyn Future<Output = io::Result<ReadResult>>>> =
e72062a9 957 Box::pin(async move {
1b25fc08
WB
958 let len = reader.read_at(&mut buffer, at).await?;
959 io::Result::Ok(ReadResult { len, buffer })
e72062a9
WB
960 });
961 // Ditch the self-reference life-time now:
962 this.future = Some(unsafe { mem::transmute(future) });
963 }
964 Some(mut fut) => match fut.as_mut().poll(cx) {
965 Poll::Pending => {
966 this.future = Some(fut);
967 return Poll::Pending;
968 }
969 Poll::Ready(Err(err)) => return Poll::Ready(Err(err)),
1b25fc08 970 Poll::Ready(Ok(ReadResult { len: got, buffer })) => {
e72062a9
WB
971 this.buffer = buffer;
972 this.range.start += got as u64;
973 let len = got.min(dest.len());
974 dest[..len].copy_from_slice(&this.buffer[..len]);
975 return Poll::Ready(Ok(len));
976 }
977 },
978 }
979 }
6cd4f635
WB
980 }
981
982 fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
983 Poll::Ready(Some(Ok(self.range.start)))
984 }
985}