]> git.proxmox.com Git - pxar.git/blame - src/accessor.rs
don't use size_of_val for possibly empty array elements
[pxar.git] / src / accessor.rs
CommitLineData
6cd4f635
WB
1//! Random access for PXAR files.
2
dc4a2854 3use std::ffi::{OsStr, OsString};
6cd4f635 4use std::io;
dc4a2854 5use std::mem::{self, size_of, size_of_val, MaybeUninit};
6cd4f635
WB
6use std::ops::Range;
7use std::os::unix::ffi::{OsStrExt, OsStringExt};
8use std::path::{Path, PathBuf};
9use std::pin::Pin;
10use std::task::{Context, Poll};
11
12use endian_trait::Endian;
13
14use crate::decoder::{self, DecoderImpl};
15use crate::format::{self, GoodbyeItem};
16use crate::poll_fn::poll_fn;
17use crate::util;
98b894a9 18use crate::{Entry, EntryKind};
6cd4f635 19
9de9cca9 20#[doc(hidden)]
6cd4f635
WB
21pub mod aio;
22pub mod sync;
23
24#[doc(inline)]
2c23bd09 25pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir};
6cd4f635
WB
26
27/// Random access read implementation.
28pub trait ReadAt {
29 fn poll_read_at(
30 self: Pin<&Self>,
31 cx: &mut Context,
32 buf: &mut [u8],
33 offset: u64,
34 ) -> Poll<io::Result<usize>>;
35}
36
37/// We do not want to bother with actual polling, so we implement `async fn` variants of the above
38/// on `dyn ReadAt`.
39///
40/// The reason why this is not an internal `ReadAtExt` trait like `AsyncReadExt` is simply that
41/// we'd then need to define all the `Future` types they return manually and explicitly. Since we
42/// have no use for them, all we want is the ability to use `async fn`...
43///
44/// The downside is that we need some `(&mut self.input as &mut dyn ReadAt)` casts in the
45/// decoder's code, but that's fine.
46impl<'a> dyn ReadAt + 'a {
47 /// awaitable version of `poll_read_at`.
48 async fn read_at(&self, buf: &mut [u8], offset: u64) -> io::Result<usize> {
49 poll_fn(|cx| unsafe { Pin::new_unchecked(self).poll_read_at(cx, buf, offset) }).await
50 }
51
52 /// `read_exact_at` - since that's what we _actually_ want most of the time.
53 async fn read_exact_at(&self, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> {
54 while !buf.is_empty() {
55 match self.read_at(buf, offset).await? {
56 0 => io_bail!("unexpected EOF"),
57 got => {
58 buf = &mut buf[got..];
59 offset += got as u64;
60 }
61 }
62 }
63 Ok(())
64 }
65
66 /// Helper to read into an `Endian`-implementing `struct`.
67 async fn read_entry_at<T: Endian>(&self, offset: u64) -> io::Result<T> {
68 let mut data = MaybeUninit::<T>::uninit();
69 let buf =
70 unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<T>()) };
71 self.read_exact_at(buf, offset).await?;
72 Ok(unsafe { data.assume_init().from_le() })
73 }
74
75 /// Helper to read into an allocated byte vector.
76 async fn read_exact_data_at(&self, size: usize, offset: u64) -> io::Result<Vec<u8>> {
77 let mut data = util::vec_new(size);
78 self.read_exact_at(&mut data[..], offset).await?;
79 Ok(data)
80 }
81}
82
29c17fc0
WB
83/// Allow using trait objects for `T: ReadAt`
84impl<'a> ReadAt for &(dyn ReadAt + 'a) {
85 fn poll_read_at(
86 self: Pin<&Self>,
87 cx: &mut Context,
88 buf: &mut [u8],
89 offset: u64,
90 ) -> Poll<io::Result<usize>> {
91 unsafe {
92 self.map_unchecked(|this| *this)
93 .poll_read_at(cx, buf, offset)
94 }
95 }
96}
97
6cd4f635 98/// The random access state machine implementation.
5cf335be 99pub(crate) struct AccessorImpl<T> {
6cd4f635
WB
100 input: T,
101 size: u64,
102}
103
104impl<T: ReadAt> AccessorImpl<T> {
105 pub async fn new(input: T, size: u64) -> io::Result<Self> {
106 if size < (size_of::<GoodbyeItem>() as u64) {
107 io_bail!("too small to contain a pxar archive");
108 }
109 Ok(Self { input, size })
110 }
111
29c17fc0
WB
112 pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> {
113 DirectoryImpl::open_at_end(&self.input as &dyn ReadAt, self.size, "/".into()).await
114 }
115}
116
117impl<T: Clone + ReadAt> AccessorImpl<T> {
118 pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> {
119 DirectoryImpl::open_at_end(self.input.clone(), self.size, "/".into()).await
6cd4f635
WB
120 }
121}
122
123/// The directory random-access state machine implementation.
5cf335be 124pub(crate) struct DirectoryImpl<T> {
29c17fc0 125 input: T,
6cd4f635
WB
126 entry_ofs: u64,
127 goodbye_ofs: u64,
128 size: u64,
129 table: Box<[GoodbyeItem]>,
130 path: PathBuf,
131}
132
29c17fc0 133impl<T: Clone + ReadAt> DirectoryImpl<T> {
6cd4f635
WB
134 /// Open a directory ending at the specified position.
135 pub(crate) async fn open_at_end(
29c17fc0 136 input: T,
6cd4f635
WB
137 end_offset: u64,
138 path: PathBuf,
29c17fc0
WB
139 ) -> io::Result<DirectoryImpl<T>> {
140 let tail = Self::read_tail_entry(&input, end_offset).await?;
6cd4f635
WB
141
142 if end_offset < tail.size {
143 io_bail!("goodbye tail size out of range");
144 }
145
146 let goodbye_ofs = end_offset - tail.size;
147
148 if goodbye_ofs < tail.offset {
149 io_bail!("goodbye offset out of range");
150 }
151
152 let entry_ofs = goodbye_ofs - tail.offset;
153 let size = end_offset - entry_ofs;
154
155 let mut this = Self {
156 input,
157 entry_ofs,
158 goodbye_ofs,
159 size,
160 table: Box::new([]),
161 path,
162 };
163
164 // sanity check:
165 if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 {
166 io_bail!("invalid goodbye table size: {}", this.table_size());
167 }
168
169 this.table = this.load_table().await?;
170
171 Ok(this)
172 }
173
174 /// Load the entire goodbye table:
175 async fn load_table(&self) -> io::Result<Box<[GoodbyeItem]>> {
176 let len = self.len();
177 let mut data = Vec::with_capacity(self.len());
178 unsafe {
179 data.set_len(len);
180 let slice = std::slice::from_raw_parts_mut(
181 data.as_mut_ptr() as *mut u8,
2c23bd09 182 len * size_of::<GoodbyeItem>(),
6cd4f635 183 );
29c17fc0
WB
184 (&self.input as &dyn ReadAt)
185 .read_exact_at(slice, self.table_offset())
186 .await?;
6cd4f635
WB
187 drop(slice);
188 }
189 Ok(data.into_boxed_slice())
190 }
191
192 #[inline]
193 fn end_offset(&self) -> u64 {
194 self.entry_ofs + self.size
195 }
196
dc4a2854
WB
197 #[inline]
198 fn entry_range(&self) -> Range<u64> {
199 self.entry_ofs..self.end_offset()
200 }
201
6cd4f635
WB
202 #[inline]
203 fn table_size(&self) -> u64 {
204 (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64)
205 }
206
207 #[inline]
208 fn table_offset(&self) -> u64 {
209 self.goodbye_ofs + (size_of::<format::Header>() as u64)
210 }
211
212 /// Length *excluding* the tail marker!
213 #[inline]
214 fn len(&self) -> usize {
215 (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1
216 }
217
218 /// Read the goodbye tail and perform some sanity checks.
29c17fc0 219 async fn read_tail_entry(input: &'_ dyn ReadAt, end_offset: u64) -> io::Result<GoodbyeItem> {
6cd4f635
WB
220 if end_offset < (size_of::<GoodbyeItem>() as u64) {
221 io_bail!("goodbye tail does not fit");
222 }
223
224 let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64);
225 let tail: GoodbyeItem = input.read_entry_at(tail_offset).await?;
226
227 if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER {
228 io_bail!("no goodbye tail marker found");
229 }
230
231 Ok(tail)
232 }
233
234 /// Get a decoder for the directory contents.
29c17fc0 235 pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
dc4a2854 236 let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?;
6cd4f635
WB
237 if !dir.is_dir() {
238 io_bail!("directory does not seem to be a directory");
239 }
240 Ok(decoder)
241 }
242
243 async fn get_decoder(
244 &self,
245 entry_range: Range<u64>,
246 file_name: Option<&Path>,
29c17fc0 247 ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> {
6cd4f635 248 Ok(DecoderImpl::new_full(
29c17fc0 249 SeqReadAtAdapter::new(self.input.clone(), entry_range),
6cd4f635
WB
250 match file_name {
251 None => self.path.clone(),
252 Some(file) => self.path.join(file),
253 },
254 )
255 .await?)
256 }
257
258 async fn decode_one_entry(
259 &self,
260 entry_range: Range<u64>,
261 file_name: Option<&Path>,
29c17fc0 262 ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> {
6cd4f635
WB
263 let mut decoder = self.get_decoder(entry_range, file_name).await?;
264 let entry = decoder
265 .next()
266 .await
267 .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??;
268 Ok((entry, decoder))
269 }
270
271 fn lookup_hash_position(&self, hash: u64) -> Option<usize> {
272 format::search_binary_tree_array_by(&self.table, |i| hash.cmp(&i.hash))
273 }
274
dc4a2854 275 async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> {
c76d3f98 276 let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?;
dc4a2854
WB
277 Ok(FileEntryImpl {
278 input: self.input.clone(),
279 entry,
dc4a2854
WB
280 end_offset: self.end_offset(),
281 })
282 }
283
6cd4f635 284 /// Lookup a directory entry.
29c17fc0 285 pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> {
dc4a2854
WB
286 let mut cur: Option<FileEntryImpl<T>> = None;
287
288 let mut first = true;
289 for component in path.components() {
290 use std::path::Component;
291
292 let first = mem::replace(&mut first, false);
293
294 let component = match component {
295 Component::Normal(path) => path,
296 Component::ParentDir => io_bail!("cannot enter parent directory in archive"),
297 Component::RootDir | Component::CurDir if first => {
298 cur = Some(self.lookup_self().await?);
299 continue;
300 }
301 Component::CurDir => continue,
302 _ => io_bail!("invalid component in path"),
303 };
304
305 let next = match cur {
306 Some(entry) => {
307 entry
308 .enter_directory()
309 .await?
310 .lookup_component(component)
311 .await?
312 }
313 None => self.lookup_component(component).await?,
314 };
315
316 if next.is_none() {
317 return Ok(None);
318 }
319
320 cur = next;
321 }
322
323 Ok(cur)
324 }
325
326 /// Lookup a single directory entry component (does not handle multiple components in path)
327 pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> {
328 let hash = format::hash_filename(path.as_bytes());
6cd4f635
WB
329 let index = match self.lookup_hash_position(hash) {
330 Some(index) => index,
331 None => return Ok(None),
332 };
333
334 // Lookup FILENAME, if it doesn't match increase index, once found, use the GoodbyeItem's
335 // offset+size as well as the file's Entry to return a DirEntry::Dir or Dir::Entry.
336
337 while index < self.table.len() && self.table[index].hash == hash {
338 let cursor = self.get_cursor(index).await?;
339 if cursor.file_name == path {
aabb78a4 340 return Ok(Some(cursor.decode_entry().await?));
6cd4f635
WB
341 }
342 }
343
344 Ok(None)
345 }
346
29c17fc0 347 async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> {
6cd4f635
WB
348 let entry = &self.table[index];
349 let file_goodbye_ofs = entry.offset;
350 if self.goodbye_ofs < file_goodbye_ofs {
351 io_bail!("invalid file offset");
352 }
353
354 let file_ofs = self.goodbye_ofs - file_goodbye_ofs;
355 let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?;
356
357 Ok(DirEntryImpl {
358 dir: self,
359 file_name,
360 entry_range: Range {
361 start: entry_ofs,
362 end: file_ofs + entry.size,
363 },
364 })
365 }
366
367 async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> {
29c17fc0 368 let head: format::Header = (&self.input as &dyn ReadAt).read_entry_at(file_ofs).await?;
6cd4f635
WB
369 if head.htype != format::PXAR_FILENAME {
370 io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype);
371 }
372
29c17fc0 373 let mut path = (&self.input as &dyn ReadAt)
6cd4f635
WB
374 .read_exact_data_at(
375 head.content_size() as usize,
376 file_ofs + (size_of_val(&head) as u64),
377 )
378 .await?;
379
380 if path.pop() != Some(0) {
381 io_bail!("invalid file name (missing terminating zero)");
382 }
383
384 if path.is_empty() {
385 io_bail!("invalid empty file name");
386 }
387
388 let file_name = PathBuf::from(OsString::from_vec(path));
389 format::check_file_name(&file_name)?;
390
391 Ok((file_name, file_ofs + head.full_size()))
392 }
393
29c17fc0 394 pub fn read_dir(&self) -> ReadDirImpl<T> {
6cd4f635
WB
395 ReadDirImpl::new(self, 0)
396 }
397}
398
399/// A file entry retrieved from a Directory.
5cf335be 400pub(crate) struct FileEntryImpl<T: Clone + ReadAt> {
29c17fc0 401 input: T,
6cd4f635 402 entry: Entry,
6cd4f635
WB
403 end_offset: u64,
404}
405
29c17fc0
WB
406impl<T: Clone + ReadAt> FileEntryImpl<T> {
407 pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> {
6cd4f635
WB
408 if !self.entry.is_dir() {
409 io_bail!("enter_directory() on a non-directory");
410 }
411
29c17fc0 412 DirectoryImpl::open_at_end(self.input.clone(), self.end_offset, self.entry.path.clone())
6cd4f635
WB
413 .await
414 }
415
98b894a9 416 pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> {
98b894a9 417 match self.entry.kind {
c76d3f98
WB
418 EntryKind::File { offset: None, .. } => {
419 io_bail!("cannot open file, reader provided no offset")
420 }
421 EntryKind::File {
422 size,
423 offset: Some(offset),
424 } => Ok(FileContentsImpl::new(
98b894a9
WB
425 self.input.clone(),
426 offset..(offset + size),
427 )),
428 _ => io_bail!("not a file"),
429 }
430 }
431
6cd4f635
WB
432 #[inline]
433 pub fn into_entry(self) -> Entry {
434 self.entry
435 }
436
437 #[inline]
438 pub fn entry(&self) -> &Entry {
439 &self.entry
440 }
441}
442
443/// An iterator over the contents of a directory.
5cf335be 444pub(crate) struct ReadDirImpl<'a, T> {
29c17fc0 445 dir: &'a DirectoryImpl<T>,
6cd4f635
WB
446 at: usize,
447}
448
29c17fc0 449impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> {
5cf335be 450 fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self {
6cd4f635
WB
451 Self { dir, at }
452 }
453
98b894a9 454 /// Get the next entry.
29c17fc0 455 pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> {
6cd4f635
WB
456 if self.at == self.dir.table.len() {
457 Ok(None)
458 } else {
459 let cursor = self.dir.get_cursor(self.at).await?;
460 self.at += 1;
461 Ok(Some(cursor))
462 }
463 }
98b894a9
WB
464
465 /// Efficient alternative to `Iterator::skip`.
466 #[inline]
467 pub fn skip(self, n: usize) -> Self {
468 Self {
469 at: (self.at + n).min(self.dir.table.len()),
470 dir: self.dir,
471 }
472 }
473
474 /// Efficient alternative to `Iterator::count`.
475 #[inline]
476 pub fn count(self) -> usize {
477 self.dir.table.len()
478 }
6cd4f635
WB
479}
480
481/// A cursor pointing to a file in a directory.
482///
483/// At this point only the file name has been read and we remembered the position for finding the
484/// actual data. This can be upgraded into a FileEntryImpl.
5cf335be 485pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> {
29c17fc0 486 dir: &'a DirectoryImpl<T>,
6cd4f635
WB
487 file_name: PathBuf,
488 entry_range: Range<u64>,
489}
490
29c17fc0 491impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> {
6cd4f635
WB
492 pub fn file_name(&self) -> &Path {
493 &self.file_name
494 }
495
aabb78a4 496 async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> {
6cd4f635 497 let end_offset = self.entry_range.end;
c76d3f98 498 let (entry, _decoder) = self
6cd4f635
WB
499 .dir
500 .decode_one_entry(self.entry_range.clone(), Some(&self.file_name))
501 .await?;
6cd4f635
WB
502
503 Ok(FileEntryImpl {
29c17fc0 504 input: self.dir.input.clone(),
6cd4f635 505 entry,
6cd4f635
WB
506 end_offset,
507 })
508 }
509}
510
98b894a9 511/// A reader for file contents.
5cf335be 512pub(crate) struct FileContentsImpl<T> {
98b894a9
WB
513 input: T,
514
515 /// Absolute offset inside the `input`.
516 range: Range<u64>,
517}
518
519impl<T: Clone + ReadAt> FileContentsImpl<T> {
520 pub fn new(input: T, range: Range<u64>) -> Self {
521 Self { input, range }
522 }
523
524 #[inline]
525 pub fn file_size(&self) -> u64 {
526 self.range.end - self.range.start
527 }
528
529 async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> {
530 let size = self.file_size();
531 if offset >= size {
532 return Ok(0);
533 }
534 let remaining = size - offset;
535
536 if remaining < buf.len() as u64 {
537 buf = &mut buf[..(remaining as usize)];
538 }
539
c76d3f98
WB
540 (&self.input as &dyn ReadAt)
541 .read_at(buf, self.range.start + offset)
542 .await
98b894a9
WB
543 }
544}
545
6cd4f635 546#[doc(hidden)]
29c17fc0
WB
547pub struct SeqReadAtAdapter<T> {
548 input: T,
6cd4f635
WB
549 range: Range<u64>,
550}
551
29c17fc0
WB
552impl<T: ReadAt> SeqReadAtAdapter<T> {
553 pub fn new(input: T, range: Range<u64>) -> Self {
6cd4f635
WB
554 Self { input, range }
555 }
556
557 #[inline]
558 fn remaining(&self) -> usize {
559 (self.range.end - self.range.start) as usize
560 }
561}
562
29c17fc0 563impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> {
6cd4f635
WB
564 fn poll_seq_read(
565 self: Pin<&mut Self>,
566 cx: &mut Context,
567 buf: &mut [u8],
568 ) -> Poll<io::Result<usize>> {
569 let len = buf.len().min(self.remaining());
570 let buf = &mut buf[..len];
571
29c17fc0 572 let this = unsafe { self.get_unchecked_mut() };
6cd4f635
WB
573
574 let got = ready!(unsafe {
29c17fc0 575 Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start)
6cd4f635
WB
576 })?;
577 this.range.start += got as u64;
578 Poll::Ready(Ok(got))
579 }
580
581 fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> {
582 Poll::Ready(Some(Ok(self.range.start)))
583 }
584}