]> git.proxmox.com Git - pxar.git/blame - src/accessor/mod.rs
header: implement Display
[pxar.git] / src / accessor / mod.rs
CommitLineData
6cd4f635
WB
1//! Random access for PXAR files.
2
dc4a2854 3use std::ffi::{OsStr, OsString};
6cd4f635 4use std::io;
dc4a2854 5use std::mem::{self, size_of, size_of_val, MaybeUninit};
6cd4f635
WB
6use std::ops::Range;
7use std::os::unix::ffi::{OsStrExt, OsStringExt};
8use std::path::{Path, PathBuf};
9use std::pin::Pin;
9d8af6f2 10use std::sync::Arc;
6cd4f635
WB
11use std::task::{Context, Poll};
12
13use endian_trait::Endian;
14
fbddffdc 15use crate::binary_tree_array;
6cd4f635
WB
16use crate::decoder::{self, DecoderImpl};
17use crate::format::{self, GoodbyeItem};
18use crate::poll_fn::poll_fn;
19use crate::util;
98b894a9 20use crate::{Entry, EntryKind};
6cd4f635
WB
21
22pub mod aio;
9d8af6f2 23pub mod cache;
6cd4f635
WB
24pub mod sync;
25
26#[doc(inline)]
2c23bd09 27pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir};
6cd4f635 28
9d8af6f2
WB
29use cache::Cache;
30
06070d26
WB
31/// Range information used for unsafe raw random access:
32#[derive(Clone, Debug)]
33pub struct EntryRangeInfo {
34 pub filename_header_offset: Option<u64>,
35 pub entry_range: Range<u64>,
36}
37
38impl EntryRangeInfo {
39 pub fn toplevel(entry_range: Range<u64>) -> Self {
40 Self {
41 filename_header_offset: None,
42 entry_range,
43 }
44 }
45}
46
6cd4f635
WB
47/// Random access read implementation.
48pub trait ReadAt {
49 fn poll_read_at(
50 self: Pin<&Self>,
51 cx: &mut Context,
52 buf: &mut [u8],
53 offset: u64,
54 ) -> Poll<io::Result<usize>>;
55}
56
c68a29b5
WB
57/// awaitable version of `poll_read_at`.
58async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize>
59where
60 T: ReadAt + ?Sized,
61{
62 poll_fn(|cx| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }).await
63}
64
65/// `read_exact_at` - since that's what we _actually_ want most of the time.
66async fn read_exact_at<T>(input: &T, mut buf: &mut [u8], mut offset: u64) -> io::Result<()>
67where
68 T: ReadAt + ?Sized,
69{
70 while !buf.is_empty() {
71 match read_at(input, buf, offset).await? {
72 0 => io_bail!("unexpected EOF"),
73 got => {
74 buf = &mut buf[got..];
75 offset += got as u64;
6cd4f635
WB
76 }
77 }
6cd4f635 78 }
c68a29b5
WB
79 Ok(())
80}
6cd4f635 81
c68a29b5
WB
82/// Helper to read into an `Endian`-implementing `struct`.
83async fn read_entry_at<T, E: Endian>(input: &T, offset: u64) -> io::Result<E>
84where
85 T: ReadAt + ?Sized,
86{
87 let mut data = MaybeUninit::<E>::uninit();
88 let buf =
89 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) };
90 read_exact_at(input, buf, offset).await?;
91 Ok(unsafe { data.assume_init().from_le() })
92}
6cd4f635 93
c68a29b5
WB
94/// Helper to read into an allocated byte vector.
95async fn read_exact_data_at<T>(input: &T, size: usize, offset: u64) -> io::Result<Vec<u8>>
96where
97 T: ReadAt + ?Sized,
98{
99 let mut data = util::vec_new(size);
100 read_exact_at(input, &mut data[..], offset).await?;
101 Ok(data)
6cd4f635
WB
102}
103
29c17fc0
WB
104/// Allow using trait objects for `T: ReadAt`
105impl<'a> ReadAt for &(dyn ReadAt + 'a) {
106 fn poll_read_at(
107 self: Pin<&Self>,
108 cx: &mut Context,
bb6540d7
WB
109 buf: &mut [u8],
110 offset: u64,
111 ) -> Poll<io::Result<usize>> {
112 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
113 }
114}
115
116/// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires
117/// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that
118/// implemments `ReadAt` for type monomorphization.
119impl ReadAt for Arc<dyn ReadAt + Send + Sync + 'static> {
120 fn poll_read_at(
121 self: Pin<&Self>,
122 cx: &mut Context,
29c17fc0
WB
123 buf: &mut [u8],
124 offset: u64,
125 ) -> Poll<io::Result<usize>> {
d3a83ee3 126 unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) }
29c17fc0
WB
127 }
128}
129
b764a2b1 130#[derive(Clone)]
9d8af6f2
WB
131struct Caches {
132 /// The goodbye table cache maps goodbye table offsets to cache entries.
133 gbt_cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
134}
135
136impl Default for Caches {
137 fn default() -> Self {
138 Self { gbt_cache: None }
139 }
140}
141
6cd4f635 142/// The random access state machine implementation.
5cf335be 143pub(crate) struct AccessorImpl<T> {
6cd4f635
WB
144 input: T,
145 size: u64,
9d8af6f2 146 caches: Arc<Caches>,
6cd4f635
WB
147}
148
149impl<T: ReadAt> AccessorImpl<T> {
150 pub async fn new(input: T, size: u64) -> io::Result<Self> {
151 if size < (size_of::<GoodbyeItem>() as u64) {
152 io_bail!("too small to contain a pxar archive");
153 }
9d8af6f2
WB
154
155 Ok(Self {
156 input,
157 size,
158 caches: Arc::new(Caches::default()),
159 })
6cd4f635
WB
160 }
161
a2530fb7
WB
162 pub fn size(&self) -> u64 {
163 self.size
164 }
165
29c17fc0 166 pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> {
9d8af6f2
WB
167 DirectoryImpl::open_at_end(
168 &self.input as &dyn ReadAt,
169 self.size,
170 "/".into(),
171 Arc::clone(&self.caches),
172 )
173 .await
29c17fc0 174 }
b764a2b1
WB
175
176 pub fn set_goodbye_table_cache(
177 &mut self,
178 cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>,
179 ) {
180 let new_caches = Arc::new(Caches {
181 gbt_cache: cache,
182 ..*self.caches
183 });
184 self.caches = new_caches;
185 }
29c17fc0
WB
186}
187
6b9e2478
WB
188async fn get_decoder<T: ReadAt>(
189 input: T,
190 entry_range: Range<u64>,
191 path: PathBuf,
192) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
d3a83ee3 193 Ok(DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path).await?)
6b9e2478
WB
194}
195
06070d26
WB
196// NOTE: This performs the Decoder::read_next_item() behavior! Keep in mind when changing!
197async fn get_decoder_at_filename<T: ReadAt>(
198 input: T,
199 entry_range: Range<u64>,
200 path: PathBuf,
201) -> io::Result<(DecoderImpl<SeqReadAtAdapter<T>>, u64)> {
202 let mut decoder = get_decoder(input, entry_range, path).await?;
203 decoder.path_lengths.push(0);
204 decoder.read_next_header().await?;
205 if decoder.current_header.htype != format::PXAR_FILENAME {
1250e3ea 206 io_bail!(
4a13b8a3
FG
207 "expected filename entry, got {}",
208 decoder.current_header,
1250e3ea 209 );
06070d26
WB
210 }
211 if decoder.read_current_item().await? != decoder::ItemResult::Entry {
212 // impossible, since we checked the header type above for a "proper" error message
213 io_bail!("unexpected decoder state");
214 }
1250e3ea
WB
215 let entry_offset = decoder::seq_read_position(&mut decoder.input)
216 .await
217 .transpose()?
06070d26
WB
218 .ok_or_else(|| io_format_err!("reader provided no offset"))?;
219 Ok((decoder, entry_offset))
220}
221
29c17fc0
WB
222impl<T: Clone + ReadAt> AccessorImpl<T> {
223 pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> {
9d8af6f2
WB
224 DirectoryImpl::open_at_end(
225 self.input.clone(),
226 self.size,
227 "/".into(),
228 Arc::clone(&self.caches),
229 )
230 .await
6cd4f635 231 }
ceb83806
WB
232
233 /// Allow opening a directory at a specified offset.
234 pub async unsafe fn open_dir_at_end(&self, offset: u64) -> io::Result<DirectoryImpl<T>> {
235 DirectoryImpl::open_at_end(
236 self.input.clone(),
237 offset,
238 "/".into(),
239 Arc::clone(&self.caches),
240 )
241 .await
242 }
6b9e2478
WB
243
244 /// Allow opening a regular file from a specified range.
245 pub async unsafe fn open_file_at_range(
246 &self,
06070d26 247 entry_range_info: &EntryRangeInfo,
6b9e2478 248 ) -> io::Result<FileEntryImpl<T>> {
06070d26
WB
249 let mut decoder = get_decoder(
250 self.input.clone(),
251 entry_range_info.entry_range.clone(),
252 PathBuf::new(),
1250e3ea
WB
253 )
254 .await?;
6b9e2478
WB
255 let entry = decoder
256 .next()
257 .await
258 .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??;
259 Ok(FileEntryImpl {
260 input: self.input.clone(),
261 entry,
06070d26 262 entry_range_info: entry_range_info.clone(),
6b9e2478
WB
263 caches: Arc::clone(&self.caches),
264 })
265 }
266
267 /// Allow opening arbitrary contents from a specific range.
268 pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> {
269 FileContentsImpl::new(self.input.clone(), range)
270 }
6bfadb8a
WB
271
272 /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will
273 /// never know the actual length of the target entry until we're done decoding it, so this
274 /// needs to happen at the accessor level, rather than a "sub-entry-reader".
06070d26
WB
275 pub async fn follow_hardlink(&self, entry: &FileEntryImpl<T>) -> io::Result<FileEntryImpl<T>> {
276 let link_offset = match entry.entry.kind() {
277 EntryKind::Hardlink(link) => link.offset,
278 _ => io_bail!("cannot resolve a non-hardlink"),
279 };
280
281 let entry_file_offset = entry
282 .entry_range_info
283 .filename_header_offset
284 .ok_or_else(|| io_format_err!("cannot follow hardlink without a file entry header"))?;
285
286 if link_offset > entry_file_offset {
287 io_bail!("invalid offset in hardlink");
288 }
289
290 let link_offset = entry_file_offset - link_offset;
291
1250e3ea
WB
292 let (mut decoder, entry_offset) =
293 get_decoder_at_filename(self.input.clone(), link_offset..self.size, PathBuf::new())
294 .await?;
06070d26 295
6bfadb8a
WB
296 let entry = decoder
297 .next()
298 .await
299 .ok_or_else(|| io_format_err!("unexpected EOF while following a hardlink"))??;
300 match entry.kind() {
301 EntryKind::File { offset: None, .. } => {
302 io_bail!("failed to follow hardlink, reader provided no offsets");
303 }
b0487d4f
WB
304 EntryKind::File {
305 offset: Some(offset),
306 size,
307 } => {
06070d26
WB
308 let meta_size = offset - link_offset;
309 let entry_end = link_offset + meta_size + size;
6bfadb8a
WB
310 Ok(FileEntryImpl {
311 input: self.input.clone(),
312 entry,
06070d26
WB
313 entry_range_info: EntryRangeInfo {
314 filename_header_offset: Some(link_offset),
315 entry_range: entry_offset..entry_end,
316 },
6bfadb8a
WB
317 caches: Arc::clone(&self.caches),
318 })
319 }
320 _ => io_bail!("hardlink does not point to a regular file"),
321 }
322 }
6cd4f635
WB
323}
324
325/// The directory random-access state machine implementation.
5cf335be 326pub(crate) struct DirectoryImpl<T> {
29c17fc0 327 input: T,
6cd4f635
WB
328 entry_ofs: u64,
329 goodbye_ofs: u64,
330 size: u64,
9d8af6f2 331 table: Arc<[GoodbyeItem]>,
6cd4f635 332 path: PathBuf,
9d8af6f2 333 caches: Arc<Caches>,
6cd4f635
WB
334}
335
29c17fc0 336impl<T: Clone + ReadAt> DirectoryImpl<T> {
6cd4f635 337 /// Open a directory ending at the specified position.
9d8af6f2 338 async fn open_at_end(
29c17fc0 339 input: T,
6cd4f635
WB
340 end_offset: u64,
341 path: PathBuf,
9d8af6f2 342 caches: Arc<Caches>,
29c17fc0
WB
343 ) -> io::Result<DirectoryImpl<T>> {
344 let tail = Self::read_tail_entry(&input, end_offset).await?;
6cd4f635
WB
345
346 if end_offset < tail.size {
347 io_bail!("goodbye tail size out of range");
348 }
349
350 let goodbye_ofs = end_offset - tail.size;
351
352 if goodbye_ofs < tail.offset {
353 io_bail!("goodbye offset out of range");
354 }
355
356 let entry_ofs = goodbye_ofs - tail.offset;
357 let size = end_offset - entry_ofs;
358
9d8af6f2
WB
359 let table: Option<Arc<[GoodbyeItem]>> = caches
360 .gbt_cache
361 .as_ref()
362 .and_then(|cache| cache.fetch(goodbye_ofs));
363
6cd4f635
WB
364 let mut this = Self {
365 input,
366 entry_ofs,
367 goodbye_ofs,
368 size,
9d8af6f2 369 table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone),
6cd4f635 370 path,
9d8af6f2 371 caches,
6cd4f635
WB
372 };
373
374 // sanity check:
375 if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
376 io_bail!("invalid goodbye table size: {}", this.table_size());
377 }
378
9d8af6f2
WB
379 if table.is_none() {
380 this.table = this.load_table().await?;
381 if let Some(ref cache) = this.caches.gbt_cache {
382 cache.insert(goodbye_ofs, Arc::clone(&this.table));
383 }
384 }
6cd4f635
WB
385
386 Ok(this)
387 }
388
389 /// Load the entire goodbye table:
9d8af6f2 390 async fn load_table(&self) -> io::Result<Arc<[GoodbyeItem]>> {
6cd4f635
WB
391 let len = self.len();
392 let mut data = Vec::with_capacity(self.len());
393 unsafe {
394 data.set_len(len);
395 let slice = std::slice::from_raw_parts_mut(
396 data.as_mut_ptr() as *mut u8,
2c23bd09 397 len * size_of::<GoodbyeItem>(),
6cd4f635 398 );
c68a29b5 399 read_exact_at(&self.input, slice, self.table_offset()).await?;
6cd4f635
WB
400 drop(slice);
401 }
9d8af6f2 402 Ok(Arc::from(data))
6cd4f635
WB
403 }
404
405 #[inline]
406 fn end_offset(&self) -> u64 {
407 self.entry_ofs + self.size
408 }
409
dc4a2854
WB
410 #[inline]
411 fn entry_range(&self) -> Range<u64> {
412 self.entry_ofs..self.end_offset()
413 }
414
6cd4f635
WB
415 #[inline]
416 fn table_size(&self) -> u64 {
417 (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
418 }
419
420 #[inline]
421 fn table_offset(&self) -> u64 {
422 self.goodbye_ofs + (size_of::<format::Header>() as u64)
423 }
424
425 /// Length *excluding* the tail marker!
426 #[inline]
427 fn len(&self) -> usize {
428 (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
429 }
430
431 /// Read the goodbye tail and perform some sanity checks.
c68a29b5 432 async fn read_tail_entry(input: &T, end_offset: u64) -> io::Result<GoodbyeItem> {
6cd4f635
WB
433 if end_offset < (size_of::<GoodbyeItem>() as u64) {
434 io_bail!("goodbye tail does not fit");
435 }
436
437 let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
c68a29b5 438 let tail: GoodbyeItem = read_entry_at(input, tail_offset).await?;
6cd4f635
WB
439
440 if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
441 io_bail!("no goodbye tail marker found");
442 }
443
444 Ok(tail)
445 }
446
447 /// Get a decoder for the directory contents.
29c17fc0 448 pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
dc4a2854 449 let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?;
6cd4f635
WB
450 if !dir.is_dir() {
451 io_bail!("directory does not seem to be a directory");
452 }
453 Ok(decoder)
454 }
455
456 async fn get_decoder(
457 &self,
458 entry_range: Range<u64>,
459 file_name: Option<&Path>,
29c17fc0 460 ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
6b9e2478
WB
461 get_decoder(
462 self.input.clone(),
463 entry_range,
6cd4f635
WB
464 match file_name {
465 None => self.path.clone(),
466 Some(file) => self.path.join(file),
467 },
d3a83ee3
WB
468 )
469 .await
6cd4f635
WB
470 }
471
472 async fn decode_one_entry(
473 &self,
474 entry_range: Range<u64>,
475 file_name: Option<&Path>,
29c17fc0 476 ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
6cd4f635
WB
477 let mut decoder = self.get_decoder(entry_range, file_name).await?;
478 let entry = decoder
479 .next()
480 .await
481 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
482 Ok((entry, decoder))
483 }
484
fbddffdc
WB
485 fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> {
486 binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash))
6cd4f635
WB
487 }
488
a5922fbc 489 pub async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> {
c76d3f98 490 let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?;
dc4a2854
WB
491 Ok(FileEntryImpl {
492 input: self.input.clone(),
493 entry,
06070d26
WB
494 entry_range_info: EntryRangeInfo {
495 filename_header_offset: None,
496 entry_range: self.entry_range(),
497 },
9d8af6f2 498 caches: Arc::clone(&self.caches),
dc4a2854
WB
499 })
500 }
501
6cd4f635 502 /// Lookup a directory entry.
29c17fc0 503 pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> {
dc4a2854
WB
504 let mut cur: Option<FileEntryImpl<T>> = None;
505
506 let mut first = true;
507 for component in path.components() {
508 use std::path::Component;
509
510 let first = mem::replace(&mut first, false);
511
512 let component = match component {
513 Component::Normal(path) => path,
514 Component::ParentDir => io_bail!("cannot enter parent directory in archive"),
515 Component::RootDir | Component::CurDir if first => {
516 cur = Some(self.lookup_self().await?);
517 continue;
518 }
519 Component::CurDir => continue,
520 _ => io_bail!("invalid component in path"),
521 };
522
523 let next = match cur {
524 Some(entry) => {
525 entry
526 .enter_directory()
527 .await?
528 .lookup_component(component)
529 .await?
530 }
531 None => self.lookup_component(component).await?,
532 };
533
534 if next.is_none() {
535 return Ok(None);
536 }
537
538 cur = next;
539 }
540
541 Ok(cur)
542 }
543
544 /// Lookup a single directory entry component (does not handle multiple components in path)
545 pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> {
546 let hash = format::hash_filename(path.as_bytes());
fbddffdc 547 let first_index = match self.lookup_hash_position(hash, 0, 0) {
6cd4f635
WB
548 Some(index) => index,
549 None => return Ok(None),
550 };
551
fbddffdc
WB
552 // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate
553 // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return
554 // a DirEntry::Dir or Dir::Entry.
555 //
556 let mut dup = 0;
557 loop {
558 let index = match self.lookup_hash_position(hash, first_index, dup) {
559 Some(index) => index,
560 None => return Ok(None),
561 };
6cd4f635 562
6cd4f635
WB
563 let cursor = self.get_cursor(index).await?;
564 if cursor.file_name == path {
aabb78a4 565 return Ok(Some(cursor.decode_entry().await?));
6cd4f635 566 }
6cd4f635 567
fbddffdc
WB
568 dup += 1;
569 }
6cd4f635
WB
570 }
571
29c17fc0 572 async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> {
6cd4f635
WB
573 let entry = &self.table[index];
574 let file_goodbye_ofs = entry.offset;
575 if self.goodbye_ofs < file_goodbye_ofs {
576 io_bail!("invalid file offset");
577 }
578
579 let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
580 let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
581
70acf637
WB
582 let entry_range = Range {
583 start: entry_ofs,
584 end: file_ofs + entry.size,
585 };
586 if entry_range.end < entry_range.start {
587 io_bail!(
588 "bad file: invalid entry ranges for {:?}: \
589 start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}",
590 file_name,
591 entry_ofs,
592 file_ofs,
593 entry.size,
594 );
595 }
596
6cd4f635
WB
597 Ok(DirEntryImpl {
598 dir: self,
599 file_name,
06070d26
WB
600 entry_range_info: EntryRangeInfo {
601 filename_header_offset: Some(file_ofs),
602 entry_range,
603 },
9d8af6f2 604 caches: Arc::clone(&self.caches),
6cd4f635
WB
605 })
606 }
607
608 async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
c68a29b5 609 let head: format::Header = read_entry_at(&self.input, file_ofs).await?;
6cd4f635 610 if head.htype != format::PXAR_FILENAME {
4a13b8a3 611 io_bail!("expected PXAR_FILENAME header, found: {}", head);
6cd4f635
WB
612 }
613
c68a29b5
WB
614 let mut path = read_exact_data_at(
615 &self.input,
616 head.content_size() as usize,
617 file_ofs + (size_of_val(&head) as u64),
618 )
619 .await?;
6cd4f635
WB
620
621 if path.pop() != Some(0) {
622 io_bail!("invalid file name (missing terminating zero)");
623 }
624
f3ac1c51 625 crate::util::validate_filename(&path)?;
bd99958c 626
6cd4f635
WB
627 let file_name = PathBuf::from(OsString::from_vec(path));
628 format::check_file_name(&file_name)?;
629
630 Ok((file_name, file_ofs + head.full_size()))
631 }
632
29c17fc0 633 pub fn read_dir(&self) -> ReadDirImpl<T> {
6cd4f635
WB
634 ReadDirImpl::new(self, 0)
635 }
d3a83ee3
WB
636
637 pub fn entry_count(&self) -> usize {
638 self.table.len()
639 }
6cd4f635
WB
640}
641
642/// A file entry retrieved from a Directory.
93fa37fb 643#[derive(Clone)]
5cf335be 644pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
29c17fc0 645 input: T,
6cd4f635 646 entry: Entry,
06070d26 647 entry_range_info: EntryRangeInfo,
9d8af6f2 648 caches: Arc<Caches>,
6cd4f635
WB
649}
650
29c17fc0
WB
651impl<T: Clone + ReadAt> FileEntryImpl<T> {
652 pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> {
6cd4f635
WB
653 if !self.entry.is_dir() {
654 io_bail!("enter_directory() on a non-directory");
655 }
656
9d8af6f2
WB
657 DirectoryImpl::open_at_end(
658 self.input.clone(),
06070d26 659 self.entry_range_info.entry_range.end,
9d8af6f2
WB
660 self.entry.path.clone(),
661 Arc::clone(&self.caches),
662 )
663 .await
6cd4f635
WB
664 }
665
6b9e2478
WB
666 /// For use with unsafe accessor methods.
667 pub fn content_range(&self) -> io::Result<Option<Range<u64>>> {
98b894a9 668 match self.entry.kind {
c76d3f98
WB
669 EntryKind::File { offset: None, .. } => {
670 io_bail!("cannot open file, reader provided no offset")
671 }
672 EntryKind::File {
673 size,
674 offset: Some(offset),
6b9e2478
WB
675 } => Ok(Some(offset..(offset + size))),
676 _ => Ok(None),
677 }
678 }
679
680 pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
681 match self.content_range()? {
682 Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)),
683 None => io_bail!("not a file"),
98b894a9
WB
684 }
685 }
686
6cd4f635
WB
687 #[inline]
688 pub fn into_entry(self) -> Entry {
689 self.entry
690 }
691
692 #[inline]
693 pub fn entry(&self) -> &Entry {
694 &self.entry
695 }
ceb83806
WB
696
697 /// Exposed for raw by-offset access methods (use with `open_dir_at_end`).
698 #[inline]
06070d26
WB
699 pub fn entry_range_info(&self) -> &EntryRangeInfo {
700 &self.entry_range_info
ceb83806 701 }
6cd4f635
WB
702}
703
704/// An iterator over the contents of a directory.
5cf335be 705pub(crate) struct ReadDirImpl<'a, T> {
29c17fc0 706 dir: &'a DirectoryImpl<T>,
6cd4f635
WB
707 at: usize,
708}
709
29c17fc0 710impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> {
5cf335be 711 fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self {
6cd4f635
WB
712 Self { dir, at }
713 }
714
98b894a9 715 /// Get the next entry.
29c17fc0 716 pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> {
6cd4f635
WB
717 if self.at == self.dir.table.len() {
718 Ok(None)
719 } else {
720 let cursor = self.dir.get_cursor(self.at).await?;
721 self.at += 1;
722 Ok(Some(cursor))
723 }
724 }
98b894a9
WB
725
726 /// Efficient alternative to `Iterator::skip`.
727 #[inline]
728 pub fn skip(self, n: usize) -> Self {
729 Self {
730 at: (self.at + n).min(self.dir.table.len()),
731 dir: self.dir,
732 }
733 }
734
735 /// Efficient alternative to `Iterator::count`.
736 #[inline]
737 pub fn count(self) -> usize {
738 self.dir.table.len()
739 }
6cd4f635
WB
740}
741
742/// A cursor pointing to a file in a directory.
743///
744/// At this point only the file name has been read and we remembered the position for finding the
745/// actual data. This can be upgraded into a FileEntryImpl.
5cf335be 746pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> {
29c17fc0 747 dir: &'a DirectoryImpl<T>,
6cd4f635 748 file_name: PathBuf,
06070d26 749 entry_range_info: EntryRangeInfo,
9d8af6f2 750 caches: Arc<Caches>,
6cd4f635
WB
751}
752
29c17fc0 753impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
6cd4f635
WB
754 pub fn file_name(&self) -> &Path {
755 &self.file_name
756 }
757
aabb78a4 758 async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> {
c76d3f98 759 let (entry, _decoder) = self
6cd4f635 760 .dir
1250e3ea
WB
761 .decode_one_entry(
762 self.entry_range_info.entry_range.clone(),
763 Some(&self.file_name),
764 )
6cd4f635 765 .await?;
6cd4f635
WB
766
767 Ok(FileEntryImpl {
29c17fc0 768 input: self.dir.input.clone(),
6cd4f635 769 entry,
06070d26 770 entry_range_info: self.entry_range_info.clone(),
9d8af6f2 771 caches: Arc::clone(&self.caches),
6cd4f635
WB
772 })
773 }
ceb83806
WB
774
775 /// Exposed for raw by-offset access methods.
776 #[inline]
06070d26
WB
777 pub fn entry_range_info(&self) -> &EntryRangeInfo {
778 &self.entry_range_info
ceb83806 779 }
6cd4f635
WB
780}
781
98b894a9 782/// A reader for file contents.
5cf335be 783pub(crate) struct FileContentsImpl<T> {
98b894a9
WB
784 input: T,
785
786 /// Absolute offset inside the `input`.
787 range: Range<u64>,
788}
789
790impl<T: Clone + ReadAt> FileContentsImpl<T> {
791 pub fn new(input: T, range: Range<u64>) -> Self {
792 Self { input, range }
793 }
794
795 #[inline]
796 pub fn file_size(&self) -> u64 {
797 self.range.end - self.range.start
798 }
799
800 async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> {
801 let size = self.file_size();
802 if offset >= size {
803 return Ok(0);
804 }
805 let remaining = size - offset;
806
807 if remaining < buf.len() as u64 {
808 buf = &mut buf[..(remaining as usize)];
809 }
810
c68a29b5 811 read_at(&self.input, buf, self.range.start + offset).await
98b894a9
WB
812 }
813}
814
d3a83ee3
WB
815impl<T: Clone + ReadAt> ReadAt for FileContentsImpl<T> {
816 fn poll_read_at(
817 self: Pin<&Self>,
818 cx: &mut Context,
819 mut buf: &mut [u8],
820 offset: u64,
821 ) -> Poll<io::Result<usize>> {
822 let size = self.file_size();
823 if offset >= size {
824 return Poll::Ready(Ok(0));
825 }
826 let remaining = size - offset;
827
828 if remaining < buf.len() as u64 {
829 buf = &mut buf[..(remaining as usize)];
830 }
831
832 let offset = self.range.start + offset;
833 unsafe { self.map_unchecked(|this| &this.input) }.poll_read_at(cx, buf, offset)
834 }
835}
836
6cd4f635 837#[doc(hidden)]
29c17fc0
WB
838pub struct SeqReadAtAdapter<T> {
839 input: T,
6cd4f635
WB
840 range: Range<u64>,
841}
842
29c17fc0
WB
843impl<T: ReadAt> SeqReadAtAdapter<T> {
844 pub fn new(input: T, range: Range<u64>) -> Self {
70acf637
WB
845 if range.end < range.start {
846 panic!("BAD SEQ READ AT ADAPTER");
847 }
6cd4f635
WB
848 Self { input, range }
849 }
850
851 #[inline]
852 fn remaining(&self) -> usize {
853 (self.range.end - self.range.start) as usize
854 }
855}
856
29c17fc0 857impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> {
6cd4f635
WB
858 fn poll_seq_read(
859 self: Pin<&mut Self>,
860 cx: &mut Context,
861 buf: &mut [u8],
862 ) -> Poll<io::Result<usize>> {
863 let len = buf.len().min(self.remaining());
864 let buf = &mut buf[..len];
865
29c17fc0 866 let this = unsafe { self.get_unchecked_mut() };
6cd4f635
WB
867
868 let got = ready!(unsafe {
29c17fc0 869 Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start)
6cd4f635
WB
870 })?;
871 this.range.start += got as u64;
872 Poll::Ready(Ok(got))
873 }
874
875 fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
876 Poll::Ready(Some(Ok(self.range.start)))
877 }
878}