]> git.proxmox.com Git - pxar.git/blob - src/accessor.rs
pxarcmd example: implement 'cat'
[pxar.git] / src / accessor.rs
1 //! Random access for PXAR files.
2
3 use std::ffi::{OsStr, OsString};
4 use std::io;
5 use std::mem::{self, size_of, size_of_val, MaybeUninit};
6 use std::ops::Range;
7 use std::os::unix::ffi::{OsStrExt, OsStringExt};
8 use std::path::{Path, PathBuf};
9 use std::pin::Pin;
10 use std::task::{Context, Poll};
11
12 use endian_trait::Endian;
13
14 use crate::decoder::{self, DecoderImpl};
15 use crate::format::{self, GoodbyeItem};
16 use crate::poll_fn::poll_fn;
17 use crate::util;
18 use crate::{Entry, EntryKind};
19
20 pub mod aio;
21 pub mod sync;
22
23 #[doc(inline)]
24 pub use sync::Accessor;
25
26 /// Random access read implementation.
27 pub trait ReadAt {
28 fn poll_read_at(
29 self: Pin<&Self>,
30 cx: &mut Context,
31 buf: &mut [u8],
32 offset: u64,
33 ) -> Poll<io::Result<usize>>;
34 }
35
36 /// We do not want to bother with actual polling, so we implement `async fn` variants of the above
37 /// on `dyn ReadAt`.
38 ///
39 /// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
40 /// we'd then need to define all the `Future` types they return manually and explicitly. Since we
41 /// have no use for them, all we want is the ability to use `async fn`...
42 ///
43 /// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
44 /// decoder's code, but that's fine.
45 impl<'a> dyn ReadAt + 'a {
46 /// awaitable version of `poll_read_at`.
47 async fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result<usize> {
48 poll_fn(|cx| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }).await
49 }
50
51 /// `read_exact_at` - since that's what we _actually_ want most of the time.
52 async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> {
53 while !buf.is_empty() {
54 match self.read_at(buf, offset).await? {
55 0 => io_bail!("unexpected EOF"),
56 got => {
57 buf = &mut buf[got..];
58 offset += got as u64;
59 }
60 }
61 }
62 Ok(())
63 }
64
65 /// Helper to read into an `Endian`-implementing `struct`.
66 async fn read_entry_at<T: Endian>(&self, offset: u64) -> io::Result<T> {
67 let mut data = MaybeUninit::<T>::uninit();
68 let buf =
69 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) };
70 self.read_exact_at(buf, offset).await?;
71 Ok(unsafe { data.assume_init().from_le() })
72 }
73
74 /// Helper to read into an allocated byte vector.
75 async fn read_exact_data_at(&self, size: usize, offset: u64) -> io::Result<Vec<u8>> {
76 let mut data = util::vec_new(size);
77 self.read_exact_at(&mut data[..], offset).await?;
78 Ok(data)
79 }
80 }
81
82 /// Allow using trait objects for `T: ReadAt`
83 impl<'a> ReadAt for &(dyn ReadAt + 'a) {
84 fn poll_read_at(
85 self: Pin<&Self>,
86 cx: &mut Context,
87 buf: &mut [u8],
88 offset: u64,
89 ) -> Poll<io::Result<usize>> {
90 unsafe {
91 self.map_unchecked(|this| *this)
92 .poll_read_at(cx, buf, offset)
93 }
94 }
95 }
96
97 /// The random access state machine implementation.
98 pub struct AccessorImpl<T> {
99 input: T,
100 size: u64,
101 }
102
103 impl<T: ReadAt> AccessorImpl<T> {
104 pub async fn new(input: T, size: u64) -> io::Result<Self> {
105 if size < (size_of::<GoodbyeItem>() as u64) {
106 io_bail!("too small to contain a pxar archive");
107 }
108 Ok(Self { input, size })
109 }
110
111 pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> {
112 DirectoryImpl::open_at_end(&self.input as &dyn ReadAt, self.size, "/".into()).await
113 }
114 }
115
116 impl<T: Clone + ReadAt> AccessorImpl<T> {
117 pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> {
118 DirectoryImpl::open_at_end(self.input.clone(), self.size, "/".into()).await
119 }
120 }
121
122 /// The directory random-access state machine implementation.
123 pub struct DirectoryImpl<T> {
124 input: T,
125 entry_ofs: u64,
126 goodbye_ofs: u64,
127 size: u64,
128 table: Box<[GoodbyeItem]>,
129 path: PathBuf,
130 }
131
132 impl<T: Clone + ReadAt> DirectoryImpl<T> {
133 /// Open a directory ending at the specified position.
134 pub(crate) async fn open_at_end(
135 input: T,
136 end_offset: u64,
137 path: PathBuf,
138 ) -> io::Result<DirectoryImpl<T>> {
139 let tail = Self::read_tail_entry(&input, end_offset).await?;
140
141 if end_offset < tail.size {
142 io_bail!("goodbye tail size out of range");
143 }
144
145 let goodbye_ofs = end_offset - tail.size;
146
147 if goodbye_ofs < tail.offset {
148 io_bail!("goodbye offset out of range");
149 }
150
151 let entry_ofs = goodbye_ofs - tail.offset;
152 let size = end_offset - entry_ofs;
153
154 let mut this = Self {
155 input,
156 entry_ofs,
157 goodbye_ofs,
158 size,
159 table: Box::new([]),
160 path,
161 };
162
163 // sanity check:
164 if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
165 io_bail!("invalid goodbye table size: {}", this.table_size());
166 }
167
168 this.table = this.load_table().await?;
169
170 Ok(this)
171 }
172
173 /// Load the entire goodbye table:
174 async fn load_table(&self) -> io::Result<Box<[GoodbyeItem]>> {
175 let len = self.len();
176 let mut data = Vec::with_capacity(self.len());
177 unsafe {
178 data.set_len(len);
179 let slice = std::slice::from_raw_parts_mut(
180 data.as_mut_ptr() as *mut u8,
181 len * size_of_val(&data[0]),
182 );
183 (&self.input as &dyn ReadAt)
184 .read_exact_at(slice, self.table_offset())
185 .await?;
186 drop(slice);
187 }
188 Ok(data.into_boxed_slice())
189 }
190
191 #[inline]
192 fn end_offset(&self) -> u64 {
193 self.entry_ofs + self.size
194 }
195
196 #[inline]
197 fn entry_range(&self) -> Range<u64> {
198 self.entry_ofs..self.end_offset()
199 }
200
201 #[inline]
202 fn table_size(&self) -> u64 {
203 (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
204 }
205
206 #[inline]
207 fn table_offset(&self) -> u64 {
208 self.goodbye_ofs + (size_of::<format::Header>() as u64)
209 }
210
211 /// Length *excluding* the tail marker!
212 #[inline]
213 fn len(&self) -> usize {
214 (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
215 }
216
217 /// Read the goodbye tail and perform some sanity checks.
218 async fn read_tail_entry(input: &'_ dyn ReadAt, end_offset: u64) -> io::Result<GoodbyeItem> {
219 if end_offset < (size_of::<GoodbyeItem>() as u64) {
220 io_bail!("goodbye tail does not fit");
221 }
222
223 let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
224 let tail: GoodbyeItem = input.read_entry_at(tail_offset).await?;
225
226 if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
227 io_bail!("no goodbye tail marker found");
228 }
229
230 Ok(tail)
231 }
232
233 /// Get a decoder for the directory contents.
234 pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
235 let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?;
236 if !dir.is_dir() {
237 io_bail!("directory does not seem to be a directory");
238 }
239 Ok(decoder)
240 }
241
242 async fn get_decoder(
243 &self,
244 entry_range: Range<u64>,
245 file_name: Option<&Path>,
246 ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
247 Ok(DecoderImpl::new_full(
248 SeqReadAtAdapter::new(self.input.clone(), entry_range),
249 match file_name {
250 None => self.path.clone(),
251 Some(file) => self.path.join(file),
252 },
253 )
254 .await?)
255 }
256
257 async fn decode_one_entry(
258 &self,
259 entry_range: Range<u64>,
260 file_name: Option<&Path>,
261 ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
262 let mut decoder = self.get_decoder(entry_range, file_name).await?;
263 let entry = decoder
264 .next()
265 .await
266 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
267 Ok((entry, decoder))
268 }
269
270 fn lookup_hash_position(&self, hash: u64) -> Option<usize> {
271 format::search_binary_tree_array_by(&self.table, |i| hash.cmp(&i.hash))
272 }
273
274 async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> {
275 let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?;
276 Ok(FileEntryImpl {
277 input: self.input.clone(),
278 entry,
279 end_offset: self.end_offset(),
280 })
281 }
282
283 /// Lookup a directory entry.
284 pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> {
285 let mut cur: Option<FileEntryImpl<T>> = None;
286
287 let mut first = true;
288 for component in path.components() {
289 use std::path::Component;
290
291 let first = mem::replace(&mut first, false);
292
293 let component = match component {
294 Component::Normal(path) => path,
295 Component::ParentDir => io_bail!("cannot enter parent directory in archive"),
296 Component::RootDir | Component::CurDir if first => {
297 cur = Some(self.lookup_self().await?);
298 continue;
299 }
300 Component::CurDir => continue,
301 _ => io_bail!("invalid component in path"),
302 };
303
304 let next = match cur {
305 Some(entry) => {
306 entry
307 .enter_directory()
308 .await?
309 .lookup_component(component)
310 .await?
311 }
312 None => self.lookup_component(component).await?,
313 };
314
315 if next.is_none() {
316 return Ok(None);
317 }
318
319 cur = next;
320 }
321
322 Ok(cur)
323 }
324
325 /// Lookup a single directory entry component (does not handle multiple components in path)
326 pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> {
327 let hash = format::hash_filename(path.as_bytes());
328 let index = match self.lookup_hash_position(hash) {
329 Some(index) => index,
330 None => return Ok(None),
331 };
332
333 // Lookup FILENAME, if it doesn't match increase index, once found, use the GoodbyeItem's
334 // offset+size as well as the file's Entry to return a DirEntry::Dir or Dir::Entry.
335
336 while index < self.table.len() && self.table[index].hash == hash {
337 let cursor = self.get_cursor(index).await?;
338 if cursor.file_name == path {
339 return Ok(Some(cursor.get_entry().await?));
340 }
341 }
342
343 Ok(None)
344 }
345
346 async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> {
347 let entry = &self.table[index];
348 let file_goodbye_ofs = entry.offset;
349 if self.goodbye_ofs < file_goodbye_ofs {
350 io_bail!("invalid file offset");
351 }
352
353 let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
354 let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
355
356 Ok(DirEntryImpl {
357 dir: self,
358 file_name,
359 entry_range: Range {
360 start: entry_ofs,
361 end: file_ofs + entry.size,
362 },
363 })
364 }
365
366 async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
367 let head: format::Header = (&self.input as &dyn ReadAt).read_entry_at(file_ofs).await?;
368 if head.htype != format::PXAR_FILENAME {
369 io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype);
370 }
371
372 let mut path = (&self.input as &dyn ReadAt)
373 .read_exact_data_at(
374 head.content_size() as usize,
375 file_ofs + (size_of_val(&head) as u64),
376 )
377 .await?;
378
379 if path.pop() != Some(0) {
380 io_bail!("invalid file name (missing terminating zero)");
381 }
382
383 if path.is_empty() {
384 io_bail!("invalid empty file name");
385 }
386
387 let file_name = PathBuf::from(OsString::from_vec(path));
388 format::check_file_name(&file_name)?;
389
390 Ok((file_name, file_ofs + head.full_size()))
391 }
392
393 pub fn read_dir(&self) -> ReadDirImpl<T> {
394 ReadDirImpl::new(self, 0)
395 }
396 }
397
398 /// A file entry retrieved from a Directory.
399 pub struct FileEntryImpl<T: Clone + ReadAt> {
400 input: T,
401 entry: Entry,
402 end_offset: u64,
403 }
404
405 impl<T: Clone + ReadAt> FileEntryImpl<T> {
406 pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> {
407 if !self.entry.is_dir() {
408 io_bail!("enter_directory() on a non-directory");
409 }
410
411 DirectoryImpl::open_at_end(self.input.clone(), self.end_offset, self.entry.path.clone())
412 .await
413 }
414
415 pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
416 match self.entry.kind {
417 EntryKind::File { offset: None, .. } => {
418 io_bail!("cannot open file, reader provided no offset")
419 }
420 EntryKind::File {
421 size,
422 offset: Some(offset),
423 } => Ok(FileContentsImpl::new(
424 self.input.clone(),
425 offset..(offset + size),
426 )),
427 _ => io_bail!("not a file"),
428 }
429 }
430
431 #[inline]
432 pub fn into_entry(self) -> Entry {
433 self.entry
434 }
435
436 #[inline]
437 pub fn entry(&self) -> &Entry {
438 &self.entry
439 }
440 }
441
442 /// An iterator over the contents of a directory.
443 pub struct ReadDirImpl<'a, T> {
444 dir: &'a DirectoryImpl<T>,
445 at: usize,
446 }
447
448 impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> {
449 pub fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self {
450 Self { dir, at }
451 }
452
453 /// Get the next entry.
454 pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> {
455 if self.at == self.dir.table.len() {
456 Ok(None)
457 } else {
458 let cursor = self.dir.get_cursor(self.at).await?;
459 self.at += 1;
460 Ok(Some(cursor))
461 }
462 }
463
464 /// Efficient alternative to `Iterator::skip`.
465 #[inline]
466 pub fn skip(self, n: usize) -> Self {
467 Self {
468 at: (self.at + n).min(self.dir.table.len()),
469 dir: self.dir,
470 }
471 }
472
473 /// Efficient alternative to `Iterator::count`.
474 #[inline]
475 pub fn count(self) -> usize {
476 self.dir.table.len()
477 }
478 }
479
480 /// A cursor pointing to a file in a directory.
481 ///
482 /// At this point only the file name has been read and we remembered the position for finding the
483 /// actual data. This can be upgraded into a FileEntryImpl.
484 pub struct DirEntryImpl<'a, T: Clone + ReadAt> {
485 dir: &'a DirectoryImpl<T>,
486 file_name: PathBuf,
487 entry_range: Range<u64>,
488 }
489
490 impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
491 pub fn file_name(&self) -> &Path {
492 &self.file_name
493 }
494
495 pub async fn get_entry(&self) -> io::Result<FileEntryImpl<T>> {
496 let end_offset = self.entry_range.end;
497 let (entry, _decoder) = self
498 .dir
499 .decode_one_entry(self.entry_range.clone(), Some(&self.file_name))
500 .await?;
501
502 Ok(FileEntryImpl {
503 input: self.dir.input.clone(),
504 entry,
505 end_offset,
506 })
507 }
508 }
509
510 /// A reader for file contents.
511 pub struct FileContentsImpl<T> {
512 input: T,
513
514 /// Absolute offset inside the `input`.
515 range: Range<u64>,
516 }
517
518 impl<T: Clone + ReadAt> FileContentsImpl<T> {
519 pub fn new(input: T, range: Range<u64>) -> Self {
520 Self { input, range }
521 }
522
523 #[inline]
524 pub fn file_size(&self) -> u64 {
525 self.range.end - self.range.start
526 }
527
528 async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> {
529 let size = self.file_size();
530 if offset >= size {
531 return Ok(0);
532 }
533 let remaining = size - offset;
534
535 if remaining < buf.len() as u64 {
536 buf = &mut buf[..(remaining as usize)];
537 }
538
539 (&self.input as &dyn ReadAt)
540 .read_at(buf, self.range.start + offset)
541 .await
542 }
543 }
544
545 #[doc(hidden)]
546 pub struct SeqReadAtAdapter<T> {
547 input: T,
548 range: Range<u64>,
549 }
550
551 impl<T: ReadAt> SeqReadAtAdapter<T> {
552 pub fn new(input: T, range: Range<u64>) -> Self {
553 Self { input, range }
554 }
555
556 #[inline]
557 fn remaining(&self) -> usize {
558 (self.range.end - self.range.start) as usize
559 }
560 }
561
562 impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> {
563 fn poll_seq_read(
564 self: Pin<&mut Self>,
565 cx: &mut Context,
566 buf: &mut [u8],
567 ) -> Poll<io::Result<usize>> {
568 let len = buf.len().min(self.remaining());
569 let buf = &mut buf[..len];
570
571 let this = unsafe { self.get_unchecked_mut() };
572
573 let got = ready!(unsafe {
574 Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start)
575 })?;
576 this.range.start += got as u64;
577 Poll::Ready(Ok(got))
578 }
579
580 fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
581 Poll::Ready(Some(Ok(self.range.start)))
582 }
583 }