]>
Commit | Line | Data |
---|---|---|
6cd4f635 WB |
1 | //! Random access for PXAR files. |
2 | ||
e5a2495e WB |
3 | #![deny(missing_docs)] |
4 | ||
dc4a2854 | 5 | use std::ffi::{OsStr, OsString}; |
e72062a9 | 6 | use std::future::Future; |
6cd4f635 | 7 | use std::io; |
dc4a2854 | 8 | use std::mem::{self, size_of, size_of_val, MaybeUninit}; |
6cd4f635 WB |
9 | use std::ops::Range; |
10 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; | |
11 | use std::path::{Path, PathBuf}; | |
12 | use std::pin::Pin; | |
9d8af6f2 | 13 | use std::sync::Arc; |
6cd4f635 WB |
14 | use std::task::{Context, Poll}; |
15 | ||
16 | use endian_trait::Endian; | |
17 | ||
fbddffdc | 18 | use crate::binary_tree_array; |
6cd4f635 WB |
19 | use crate::decoder::{self, DecoderImpl}; |
20 | use crate::format::{self, GoodbyeItem}; | |
6cd4f635 | 21 | use crate::util; |
98b894a9 | 22 | use crate::{Entry, EntryKind}; |
6cd4f635 WB |
23 | |
24 | pub mod aio; | |
9d8af6f2 | 25 | pub mod cache; |
6cd4f635 WB |
26 | pub mod sync; |
27 | ||
e72062a9 WB |
28 | pub mod read_at; |
29 | ||
6cd4f635 | 30 | #[doc(inline)] |
2c23bd09 | 31 | pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir}; |
6cd4f635 | 32 | |
e72062a9 WB |
33 | #[doc(inline)] |
34 | pub use read_at::{MaybeReady, ReadAt, ReadAtExt, ReadAtOperation}; | |
35 | ||
9d8af6f2 WB |
36 | use cache::Cache; |
37 | ||
06070d26 WB |
38 | /// Range information used for unsafe raw random access: |
39 | #[derive(Clone, Debug)] | |
40 | pub struct EntryRangeInfo { | |
e5a2495e | 41 | /// Offset to the `FILENAME` header. |
06070d26 | 42 | pub filename_header_offset: Option<u64>, |
e5a2495e | 43 | /// Byte range spanning an entry in a pxar archive. |
06070d26 WB |
44 | pub entry_range: Range<u64>, |
45 | } | |
46 | ||
47 | impl EntryRangeInfo { | |
e5a2495e | 48 | /// Shortcut to create the "toplevel" range info without file name header offset. |
06070d26 WB |
49 | pub fn toplevel(entry_range: Range<u64>) -> Self { |
50 | Self { | |
51 | filename_header_offset: None, | |
52 | entry_range, | |
53 | } | |
54 | } | |
55 | } | |
56 | ||
e72062a9 | 57 | /// awaitable version of `ReadAt`. |
c68a29b5 WB |
58 | async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize> |
59 | where | |
e72062a9 | 60 | T: ReadAtExt, |
c68a29b5 | 61 | { |
e72062a9 | 62 | input.read_at(buf, offset).await |
c68a29b5 WB |
63 | } |
64 | ||
65 | /// `read_exact_at` - since that's what we _actually_ want most of the time. | |
66 | async fn read_exact_at<T>(input: &T, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> | |
67 | where | |
e72062a9 | 68 | T: ReadAt, |
c68a29b5 WB |
69 | { |
70 | while !buf.is_empty() { | |
71 | match read_at(input, buf, offset).await? { | |
72 | 0 => io_bail!("unexpected EOF"), | |
73 | got => { | |
74 | buf = &mut buf[got..]; | |
75 | offset += got as u64; | |
6cd4f635 WB |
76 | } |
77 | } | |
6cd4f635 | 78 | } |
c68a29b5 WB |
79 | Ok(()) |
80 | } | |
6cd4f635 | 81 | |
c68a29b5 WB |
82 | /// Helper to read into an `Endian`-implementing `struct`. |
83 | async fn read_entry_at<T, E: Endian>(input: &T, offset: u64) -> io::Result<E> | |
84 | where | |
e72062a9 | 85 | T: ReadAt, |
c68a29b5 WB |
86 | { |
87 | let mut data = MaybeUninit::<E>::uninit(); | |
88 | let buf = | |
89 | unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) }; | |
90 | read_exact_at(input, buf, offset).await?; | |
91 | Ok(unsafe { data.assume_init().from_le() }) | |
92 | } | |
6cd4f635 | 93 | |
c68a29b5 WB |
94 | /// Helper to read into an allocated byte vector. |
95 | async fn read_exact_data_at<T>(input: &T, size: usize, offset: u64) -> io::Result<Vec<u8>> | |
96 | where | |
e72062a9 | 97 | T: ReadAt, |
c68a29b5 | 98 | { |
81d50029 | 99 | let mut data = unsafe { util::vec_new_uninitialized(size) }; |
c68a29b5 WB |
100 | read_exact_at(input, &mut data[..], offset).await?; |
101 | Ok(data) | |
6cd4f635 WB |
102 | } |
103 | ||
29c17fc0 | 104 | /// Allow using trait objects for `T: ReadAt` |
e72062a9 WB |
105 | impl<'d> ReadAt for &(dyn ReadAt + 'd) { |
106 | fn start_read_at<'a>( | |
107 | self: Pin<&'a Self>, | |
29c17fc0 | 108 | cx: &mut Context, |
e72062a9 | 109 | buf: &'a mut [u8], |
bb6540d7 | 110 | offset: u64, |
e72062a9 WB |
111 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { |
112 | unsafe { Pin::new_unchecked(&**self).start_read_at(cx, buf, offset) } | |
113 | } | |
114 | ||
115 | fn poll_complete<'a>( | |
116 | self: Pin<&'a Self>, | |
117 | op: ReadAtOperation<'a>, | |
118 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
119 | unsafe { Pin::new_unchecked(&**self).poll_complete(op) } | |
bb6540d7 WB |
120 | } |
121 | } | |
122 | ||
123 | /// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires | |
124 | /// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that | |
125 | /// implemments `ReadAt` for type monomorphization. | |
126 | impl ReadAt for Arc<dyn ReadAt + Send + Sync + 'static> { | |
e72062a9 WB |
127 | fn start_read_at<'a>( |
128 | self: Pin<&'a Self>, | |
bb6540d7 | 129 | cx: &mut Context, |
e72062a9 | 130 | buf: &'a mut [u8], |
29c17fc0 | 131 | offset: u64, |
e72062a9 WB |
132 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { |
133 | unsafe { | |
134 | self.map_unchecked(|this| &**this) | |
135 | .start_read_at(cx, buf, offset) | |
136 | } | |
137 | } | |
138 | ||
139 | fn poll_complete<'a>( | |
140 | self: Pin<&'a Self>, | |
141 | op: ReadAtOperation<'a>, | |
142 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
143 | unsafe { self.map_unchecked(|this| &**this).poll_complete(op) } | |
29c17fc0 WB |
144 | } |
145 | } | |
146 | ||
cde0236c WB |
147 | /// Convenience impl for in-memory byte slices. |
148 | impl ReadAt for &'_ [u8] { | |
149 | fn start_read_at<'a>( | |
150 | self: Pin<&'a Self>, | |
151 | _cx: &mut Context, | |
152 | buf: &'a mut [u8], | |
153 | offset: u64, | |
154 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
155 | if offset >= self.len() as u64 { | |
156 | return MaybeReady::Ready(Ok(0)); | |
157 | } | |
158 | ||
159 | let offset = offset as usize; | |
160 | let end = (offset + buf.len()).min(self.len()); | |
161 | let size = end - offset; | |
162 | buf[..size].copy_from_slice(&self[offset..end]); | |
163 | MaybeReady::Ready(Ok(size)) | |
164 | } | |
165 | ||
166 | fn poll_complete<'a>( | |
167 | self: Pin<&'a Self>, | |
168 | _op: ReadAtOperation<'a>, | |
169 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
170 | panic!("start_read_at on byte slice returned Pending"); | |
171 | } | |
172 | } | |
173 | ||
81d50029 | 174 | #[derive(Clone, Default)] |
9d8af6f2 WB |
175 | struct Caches { |
176 | /// The goodbye table cache maps goodbye table offsets to cache entries. | |
177 | gbt_cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>, | |
178 | } | |
179 | ||
6cd4f635 | 180 | /// The random access state machine implementation. |
5cf335be | 181 | pub(crate) struct AccessorImpl<T> { |
6cd4f635 WB |
182 | input: T, |
183 | size: u64, | |
9d8af6f2 | 184 | caches: Arc<Caches>, |
6cd4f635 WB |
185 | } |
186 | ||
187 | impl<T: ReadAt> AccessorImpl<T> { | |
188 | pub async fn new(input: T, size: u64) -> io::Result<Self> { | |
189 | if size < (size_of::<GoodbyeItem>() as u64) { | |
190 | io_bail!("too small to contain a pxar archive"); | |
191 | } | |
9d8af6f2 WB |
192 | |
193 | Ok(Self { | |
194 | input, | |
195 | size, | |
196 | caches: Arc::new(Caches::default()), | |
197 | }) | |
6cd4f635 WB |
198 | } |
199 | ||
a2530fb7 WB |
200 | pub fn size(&self) -> u64 { |
201 | self.size | |
202 | } | |
203 | ||
1b25fc08 | 204 | pub async fn open_root_ref(&self) -> io::Result<DirectoryImpl<&dyn ReadAt>> { |
9d8af6f2 WB |
205 | DirectoryImpl::open_at_end( |
206 | &self.input as &dyn ReadAt, | |
207 | self.size, | |
208 | "/".into(), | |
209 | Arc::clone(&self.caches), | |
210 | ) | |
211 | .await | |
29c17fc0 | 212 | } |
b764a2b1 WB |
213 | |
214 | pub fn set_goodbye_table_cache( | |
215 | &mut self, | |
216 | cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>, | |
217 | ) { | |
218 | let new_caches = Arc::new(Caches { | |
219 | gbt_cache: cache, | |
1b25fc08 | 220 | //..*self.caches |
b764a2b1 WB |
221 | }); |
222 | self.caches = new_caches; | |
223 | } | |
29c17fc0 WB |
224 | } |
225 | ||
6b9e2478 WB |
226 | async fn get_decoder<T: ReadAt>( |
227 | input: T, | |
228 | entry_range: Range<u64>, | |
229 | path: PathBuf, | |
230 | ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { | |
a50514a9 | 231 | DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path, true).await |
6b9e2478 WB |
232 | } |
233 | ||
06070d26 WB |
234 | // NOTE: This performs the Decoder::read_next_item() behavior! Keep in mind when changing! |
235 | async fn get_decoder_at_filename<T: ReadAt>( | |
236 | input: T, | |
237 | entry_range: Range<u64>, | |
238 | path: PathBuf, | |
239 | ) -> io::Result<(DecoderImpl<SeqReadAtAdapter<T>>, u64)> { | |
1187920f WB |
240 | // Read the header, it should be a FILENAME, then skip over it and its length: |
241 | let header: format::Header = read_entry_at(&input, entry_range.start).await?; | |
242 | header.check_header_size()?; | |
243 | ||
244 | if header.htype != format::PXAR_FILENAME { | |
245 | io_bail!("expected filename entry, got {:?}", header); | |
06070d26 | 246 | } |
1187920f WB |
247 | |
248 | let entry_offset = entry_range.start + header.full_size(); | |
249 | if entry_offset >= entry_range.end { | |
250 | io_bail!("filename exceeds current file range"); | |
06070d26 | 251 | } |
1187920f | 252 | |
e72062a9 WB |
253 | Ok(( |
254 | get_decoder(input, entry_offset..entry_range.end, path).await?, | |
255 | entry_offset, | |
256 | )) | |
06070d26 WB |
257 | } |
258 | ||
29c17fc0 WB |
259 | impl<T: Clone + ReadAt> AccessorImpl<T> { |
260 | pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> { | |
9d8af6f2 WB |
261 | DirectoryImpl::open_at_end( |
262 | self.input.clone(), | |
263 | self.size, | |
264 | "/".into(), | |
265 | Arc::clone(&self.caches), | |
266 | ) | |
267 | .await | |
6cd4f635 | 268 | } |
ceb83806 WB |
269 | |
270 | /// Allow opening a directory at a specified offset. | |
271 | pub async unsafe fn open_dir_at_end(&self, offset: u64) -> io::Result<DirectoryImpl<T>> { | |
272 | DirectoryImpl::open_at_end( | |
273 | self.input.clone(), | |
274 | offset, | |
275 | "/".into(), | |
276 | Arc::clone(&self.caches), | |
277 | ) | |
278 | .await | |
279 | } | |
6b9e2478 WB |
280 | |
281 | /// Allow opening a regular file from a specified range. | |
282 | pub async unsafe fn open_file_at_range( | |
283 | &self, | |
06070d26 | 284 | entry_range_info: &EntryRangeInfo, |
6b9e2478 | 285 | ) -> io::Result<FileEntryImpl<T>> { |
06070d26 WB |
286 | let mut decoder = get_decoder( |
287 | self.input.clone(), | |
288 | entry_range_info.entry_range.clone(), | |
289 | PathBuf::new(), | |
1250e3ea WB |
290 | ) |
291 | .await?; | |
6b9e2478 WB |
292 | let entry = decoder |
293 | .next() | |
294 | .await | |
295 | .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??; | |
296 | Ok(FileEntryImpl { | |
297 | input: self.input.clone(), | |
298 | entry, | |
06070d26 | 299 | entry_range_info: entry_range_info.clone(), |
6b9e2478 WB |
300 | caches: Arc::clone(&self.caches), |
301 | }) | |
302 | } | |
303 | ||
304 | /// Allow opening arbitrary contents from a specific range. | |
305 | pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> { | |
306 | FileContentsImpl::new(self.input.clone(), range) | |
307 | } | |
6bfadb8a WB |
308 | |
309 | /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will | |
310 | /// never know the actual length of the target entry until we're done decoding it, so this | |
311 | /// needs to happen at the accessor level, rather than a "sub-entry-reader". | |
06070d26 WB |
312 | pub async fn follow_hardlink(&self, entry: &FileEntryImpl<T>) -> io::Result<FileEntryImpl<T>> { |
313 | let link_offset = match entry.entry.kind() { | |
314 | EntryKind::Hardlink(link) => link.offset, | |
315 | _ => io_bail!("cannot resolve a non-hardlink"), | |
316 | }; | |
317 | ||
318 | let entry_file_offset = entry | |
319 | .entry_range_info | |
320 | .filename_header_offset | |
321 | .ok_or_else(|| io_format_err!("cannot follow hardlink without a file entry header"))?; | |
322 | ||
323 | if link_offset > entry_file_offset { | |
324 | io_bail!("invalid offset in hardlink"); | |
325 | } | |
326 | ||
327 | let link_offset = entry_file_offset - link_offset; | |
328 | ||
1250e3ea WB |
329 | let (mut decoder, entry_offset) = |
330 | get_decoder_at_filename(self.input.clone(), link_offset..self.size, PathBuf::new()) | |
331 | .await?; | |
06070d26 | 332 | |
6bfadb8a WB |
333 | let entry = decoder |
334 | .next() | |
335 | .await | |
336 | .ok_or_else(|| io_format_err!("unexpected EOF while following a hardlink"))??; | |
1187920f | 337 | |
6bfadb8a WB |
338 | match entry.kind() { |
339 | EntryKind::File { offset: None, .. } => { | |
340 | io_bail!("failed to follow hardlink, reader provided no offsets"); | |
341 | } | |
b0487d4f WB |
342 | EntryKind::File { |
343 | offset: Some(offset), | |
344 | size, | |
345 | } => { | |
06070d26 WB |
346 | let meta_size = offset - link_offset; |
347 | let entry_end = link_offset + meta_size + size; | |
6bfadb8a WB |
348 | Ok(FileEntryImpl { |
349 | input: self.input.clone(), | |
350 | entry, | |
06070d26 WB |
351 | entry_range_info: EntryRangeInfo { |
352 | filename_header_offset: Some(link_offset), | |
353 | entry_range: entry_offset..entry_end, | |
354 | }, | |
6bfadb8a WB |
355 | caches: Arc::clone(&self.caches), |
356 | }) | |
357 | } | |
358 | _ => io_bail!("hardlink does not point to a regular file"), | |
359 | } | |
360 | } | |
6cd4f635 WB |
361 | } |
362 | ||
363 | /// The directory random-access state machine implementation. | |
5cf335be | 364 | pub(crate) struct DirectoryImpl<T> { |
29c17fc0 | 365 | input: T, |
6cd4f635 WB |
366 | entry_ofs: u64, |
367 | goodbye_ofs: u64, | |
368 | size: u64, | |
9d8af6f2 | 369 | table: Arc<[GoodbyeItem]>, |
6cd4f635 | 370 | path: PathBuf, |
9d8af6f2 | 371 | caches: Arc<Caches>, |
6cd4f635 WB |
372 | } |
373 | ||
29c17fc0 | 374 | impl<T: Clone + ReadAt> DirectoryImpl<T> { |
6cd4f635 | 375 | /// Open a directory ending at the specified position. |
9d8af6f2 | 376 | async fn open_at_end( |
29c17fc0 | 377 | input: T, |
6cd4f635 WB |
378 | end_offset: u64, |
379 | path: PathBuf, | |
9d8af6f2 | 380 | caches: Arc<Caches>, |
29c17fc0 WB |
381 | ) -> io::Result<DirectoryImpl<T>> { |
382 | let tail = Self::read_tail_entry(&input, end_offset).await?; | |
6cd4f635 WB |
383 | |
384 | if end_offset < tail.size { | |
385 | io_bail!("goodbye tail size out of range"); | |
386 | } | |
387 | ||
388 | let goodbye_ofs = end_offset - tail.size; | |
389 | ||
390 | if goodbye_ofs < tail.offset { | |
391 | io_bail!("goodbye offset out of range"); | |
392 | } | |
393 | ||
394 | let entry_ofs = goodbye_ofs - tail.offset; | |
395 | let size = end_offset - entry_ofs; | |
396 | ||
9d8af6f2 WB |
397 | let table: Option<Arc<[GoodbyeItem]>> = caches |
398 | .gbt_cache | |
399 | .as_ref() | |
400 | .and_then(|cache| cache.fetch(goodbye_ofs)); | |
401 | ||
6cd4f635 WB |
402 | let mut this = Self { |
403 | input, | |
404 | entry_ofs, | |
405 | goodbye_ofs, | |
406 | size, | |
9d8af6f2 | 407 | table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone), |
6cd4f635 | 408 | path, |
9d8af6f2 | 409 | caches, |
6cd4f635 WB |
410 | }; |
411 | ||
412 | // sanity check: | |
413 | if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 { | |
414 | io_bail!("invalid goodbye table size: {}", this.table_size()); | |
415 | } | |
416 | ||
9d8af6f2 WB |
417 | if table.is_none() { |
418 | this.table = this.load_table().await?; | |
419 | if let Some(ref cache) = this.caches.gbt_cache { | |
420 | cache.insert(goodbye_ofs, Arc::clone(&this.table)); | |
421 | } | |
422 | } | |
6cd4f635 WB |
423 | |
424 | Ok(this) | |
425 | } | |
426 | ||
427 | /// Load the entire goodbye table: | |
9d8af6f2 | 428 | async fn load_table(&self) -> io::Result<Arc<[GoodbyeItem]>> { |
6cd4f635 | 429 | let len = self.len(); |
81d50029 | 430 | let mut data; |
6cd4f635 | 431 | unsafe { |
81d50029 | 432 | data = crate::util::vec_new_uninitialized(self.len()); |
6cd4f635 WB |
433 | let slice = std::slice::from_raw_parts_mut( |
434 | data.as_mut_ptr() as *mut u8, | |
2c23bd09 | 435 | len * size_of::<GoodbyeItem>(), |
6cd4f635 | 436 | ); |
c68a29b5 | 437 | read_exact_at(&self.input, slice, self.table_offset()).await?; |
6cd4f635 | 438 | } |
9d8af6f2 | 439 | Ok(Arc::from(data)) |
6cd4f635 WB |
440 | } |
441 | ||
442 | #[inline] | |
443 | fn end_offset(&self) -> u64 { | |
444 | self.entry_ofs + self.size | |
445 | } | |
446 | ||
dc4a2854 WB |
447 | #[inline] |
448 | fn entry_range(&self) -> Range<u64> { | |
449 | self.entry_ofs..self.end_offset() | |
450 | } | |
451 | ||
6cd4f635 WB |
452 | #[inline] |
453 | fn table_size(&self) -> u64 { | |
454 | (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64) | |
455 | } | |
456 | ||
457 | #[inline] | |
458 | fn table_offset(&self) -> u64 { | |
459 | self.goodbye_ofs + (size_of::<format::Header>() as u64) | |
460 | } | |
461 | ||
462 | /// Length *excluding* the tail marker! | |
463 | #[inline] | |
464 | fn len(&self) -> usize { | |
465 | (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1 | |
466 | } | |
467 | ||
468 | /// Read the goodbye tail and perform some sanity checks. | |
c68a29b5 | 469 | async fn read_tail_entry(input: &T, end_offset: u64) -> io::Result<GoodbyeItem> { |
6cd4f635 WB |
470 | if end_offset < (size_of::<GoodbyeItem>() as u64) { |
471 | io_bail!("goodbye tail does not fit"); | |
472 | } | |
473 | ||
474 | let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64); | |
c68a29b5 | 475 | let tail: GoodbyeItem = read_entry_at(input, tail_offset).await?; |
6cd4f635 WB |
476 | |
477 | if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER { | |
478 | io_bail!("no goodbye tail marker found"); | |
479 | } | |
480 | ||
481 | Ok(tail) | |
482 | } | |
483 | ||
484 | /// Get a decoder for the directory contents. | |
29c17fc0 | 485 | pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { |
dc4a2854 | 486 | let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?; |
6cd4f635 WB |
487 | if !dir.is_dir() { |
488 | io_bail!("directory does not seem to be a directory"); | |
489 | } | |
490 | Ok(decoder) | |
491 | } | |
492 | ||
493 | async fn get_decoder( | |
494 | &self, | |
495 | entry_range: Range<u64>, | |
496 | file_name: Option<&Path>, | |
29c17fc0 | 497 | ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { |
6b9e2478 WB |
498 | get_decoder( |
499 | self.input.clone(), | |
500 | entry_range, | |
6cd4f635 WB |
501 | match file_name { |
502 | None => self.path.clone(), | |
503 | Some(file) => self.path.join(file), | |
504 | }, | |
d3a83ee3 WB |
505 | ) |
506 | .await | |
6cd4f635 WB |
507 | } |
508 | ||
509 | async fn decode_one_entry( | |
510 | &self, | |
511 | entry_range: Range<u64>, | |
512 | file_name: Option<&Path>, | |
29c17fc0 | 513 | ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> { |
6cd4f635 WB |
514 | let mut decoder = self.get_decoder(entry_range, file_name).await?; |
515 | let entry = decoder | |
516 | .next() | |
517 | .await | |
518 | .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; | |
519 | Ok((entry, decoder)) | |
520 | } | |
521 | ||
fbddffdc WB |
522 | fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> { |
523 | binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash)) | |
6cd4f635 WB |
524 | } |
525 | ||
a5922fbc | 526 | pub async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> { |
c76d3f98 | 527 | let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?; |
dc4a2854 WB |
528 | Ok(FileEntryImpl { |
529 | input: self.input.clone(), | |
530 | entry, | |
06070d26 WB |
531 | entry_range_info: EntryRangeInfo { |
532 | filename_header_offset: None, | |
533 | entry_range: self.entry_range(), | |
534 | }, | |
9d8af6f2 | 535 | caches: Arc::clone(&self.caches), |
dc4a2854 WB |
536 | }) |
537 | } | |
538 | ||
6cd4f635 | 539 | /// Lookup a directory entry. |
29c17fc0 | 540 | pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> { |
dc4a2854 WB |
541 | let mut cur: Option<FileEntryImpl<T>> = None; |
542 | ||
543 | let mut first = true; | |
544 | for component in path.components() { | |
545 | use std::path::Component; | |
546 | ||
547 | let first = mem::replace(&mut first, false); | |
548 | ||
549 | let component = match component { | |
550 | Component::Normal(path) => path, | |
551 | Component::ParentDir => io_bail!("cannot enter parent directory in archive"), | |
552 | Component::RootDir | Component::CurDir if first => { | |
553 | cur = Some(self.lookup_self().await?); | |
554 | continue; | |
555 | } | |
556 | Component::CurDir => continue, | |
557 | _ => io_bail!("invalid component in path"), | |
558 | }; | |
559 | ||
560 | let next = match cur { | |
561 | Some(entry) => { | |
562 | entry | |
563 | .enter_directory() | |
564 | .await? | |
565 | .lookup_component(component) | |
566 | .await? | |
567 | } | |
568 | None => self.lookup_component(component).await?, | |
569 | }; | |
570 | ||
571 | if next.is_none() { | |
572 | return Ok(None); | |
573 | } | |
574 | ||
575 | cur = next; | |
576 | } | |
577 | ||
578 | Ok(cur) | |
579 | } | |
580 | ||
581 | /// Lookup a single directory entry component (does not handle multiple components in path) | |
582 | pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> { | |
583 | let hash = format::hash_filename(path.as_bytes()); | |
fbddffdc | 584 | let first_index = match self.lookup_hash_position(hash, 0, 0) { |
6cd4f635 WB |
585 | Some(index) => index, |
586 | None => return Ok(None), | |
587 | }; | |
588 | ||
fbddffdc WB |
589 | // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate |
590 | // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return | |
591 | // a DirEntry::Dir or Dir::Entry. | |
592 | // | |
593 | let mut dup = 0; | |
594 | loop { | |
595 | let index = match self.lookup_hash_position(hash, first_index, dup) { | |
596 | Some(index) => index, | |
597 | None => return Ok(None), | |
598 | }; | |
6cd4f635 | 599 | |
6cd4f635 WB |
600 | let cursor = self.get_cursor(index).await?; |
601 | if cursor.file_name == path { | |
aabb78a4 | 602 | return Ok(Some(cursor.decode_entry().await?)); |
6cd4f635 | 603 | } |
6cd4f635 | 604 | |
fbddffdc WB |
605 | dup += 1; |
606 | } | |
6cd4f635 WB |
607 | } |
608 | ||
1b25fc08 WB |
609 | // while clippy is technically right about this, the compiler won't accept it (yet) |
610 | #[allow(clippy::needless_lifetimes)] | |
29c17fc0 | 611 | async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> { |
6cd4f635 WB |
612 | let entry = &self.table[index]; |
613 | let file_goodbye_ofs = entry.offset; | |
614 | if self.goodbye_ofs < file_goodbye_ofs { | |
615 | io_bail!("invalid file offset"); | |
616 | } | |
617 | ||
618 | let file_ofs = self.goodbye_ofs - file_goodbye_ofs; | |
619 | let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?; | |
620 | ||
70acf637 WB |
621 | let entry_range = Range { |
622 | start: entry_ofs, | |
623 | end: file_ofs + entry.size, | |
624 | }; | |
625 | if entry_range.end < entry_range.start { | |
626 | io_bail!( | |
627 | "bad file: invalid entry ranges for {:?}: \ | |
628 | start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}", | |
629 | file_name, | |
630 | entry_ofs, | |
631 | file_ofs, | |
632 | entry.size, | |
633 | ); | |
634 | } | |
635 | ||
6cd4f635 WB |
636 | Ok(DirEntryImpl { |
637 | dir: self, | |
638 | file_name, | |
06070d26 WB |
639 | entry_range_info: EntryRangeInfo { |
640 | filename_header_offset: Some(file_ofs), | |
641 | entry_range, | |
642 | }, | |
9d8af6f2 | 643 | caches: Arc::clone(&self.caches), |
6cd4f635 WB |
644 | }) |
645 | } | |
646 | ||
647 | async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> { | |
c68a29b5 | 648 | let head: format::Header = read_entry_at(&self.input, file_ofs).await?; |
6cd4f635 | 649 | if head.htype != format::PXAR_FILENAME { |
4a13b8a3 | 650 | io_bail!("expected PXAR_FILENAME header, found: {}", head); |
6cd4f635 WB |
651 | } |
652 | ||
c68a29b5 WB |
653 | let mut path = read_exact_data_at( |
654 | &self.input, | |
655 | head.content_size() as usize, | |
656 | file_ofs + (size_of_val(&head) as u64), | |
657 | ) | |
658 | .await?; | |
6cd4f635 WB |
659 | |
660 | if path.pop() != Some(0) { | |
661 | io_bail!("invalid file name (missing terminating zero)"); | |
662 | } | |
663 | ||
f3ac1c51 | 664 | crate::util::validate_filename(&path)?; |
bd99958c | 665 | |
6cd4f635 WB |
666 | let file_name = PathBuf::from(OsString::from_vec(path)); |
667 | format::check_file_name(&file_name)?; | |
668 | ||
669 | Ok((file_name, file_ofs + head.full_size())) | |
670 | } | |
671 | ||
29c17fc0 | 672 | pub fn read_dir(&self) -> ReadDirImpl<T> { |
6cd4f635 WB |
673 | ReadDirImpl::new(self, 0) |
674 | } | |
d3a83ee3 WB |
675 | |
676 | pub fn entry_count(&self) -> usize { | |
677 | self.table.len() | |
678 | } | |
6cd4f635 WB |
679 | } |
680 | ||
681 | /// A file entry retrieved from a Directory. | |
93fa37fb | 682 | #[derive(Clone)] |
5cf335be | 683 | pub(crate) struct FileEntryImpl<T: Clone + ReadAt> { |
29c17fc0 | 684 | input: T, |
6cd4f635 | 685 | entry: Entry, |
06070d26 | 686 | entry_range_info: EntryRangeInfo, |
9d8af6f2 | 687 | caches: Arc<Caches>, |
6cd4f635 WB |
688 | } |
689 | ||
29c17fc0 WB |
690 | impl<T: Clone + ReadAt> FileEntryImpl<T> { |
691 | pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> { | |
6cd4f635 WB |
692 | if !self.entry.is_dir() { |
693 | io_bail!("enter_directory() on a non-directory"); | |
694 | } | |
695 | ||
9d8af6f2 WB |
696 | DirectoryImpl::open_at_end( |
697 | self.input.clone(), | |
06070d26 | 698 | self.entry_range_info.entry_range.end, |
9d8af6f2 WB |
699 | self.entry.path.clone(), |
700 | Arc::clone(&self.caches), | |
701 | ) | |
702 | .await | |
6cd4f635 WB |
703 | } |
704 | ||
6b9e2478 WB |
705 | /// For use with unsafe accessor methods. |
706 | pub fn content_range(&self) -> io::Result<Option<Range<u64>>> { | |
98b894a9 | 707 | match self.entry.kind { |
c76d3f98 WB |
708 | EntryKind::File { offset: None, .. } => { |
709 | io_bail!("cannot open file, reader provided no offset") | |
710 | } | |
711 | EntryKind::File { | |
712 | size, | |
713 | offset: Some(offset), | |
6b9e2478 WB |
714 | } => Ok(Some(offset..(offset + size))), |
715 | _ => Ok(None), | |
716 | } | |
717 | } | |
718 | ||
719 | pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> { | |
720 | match self.content_range()? { | |
721 | Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)), | |
722 | None => io_bail!("not a file"), | |
98b894a9 WB |
723 | } |
724 | } | |
725 | ||
6cd4f635 WB |
726 | #[inline] |
727 | pub fn into_entry(self) -> Entry { | |
728 | self.entry | |
729 | } | |
730 | ||
731 | #[inline] | |
732 | pub fn entry(&self) -> &Entry { | |
733 | &self.entry | |
734 | } | |
ceb83806 WB |
735 | |
736 | /// Exposed for raw by-offset access methods (use with `open_dir_at_end`). | |
737 | #[inline] | |
06070d26 WB |
738 | pub fn entry_range_info(&self) -> &EntryRangeInfo { |
739 | &self.entry_range_info | |
ceb83806 | 740 | } |
6cd4f635 WB |
741 | } |
742 | ||
743 | /// An iterator over the contents of a directory. | |
5cf335be | 744 | pub(crate) struct ReadDirImpl<'a, T> { |
29c17fc0 | 745 | dir: &'a DirectoryImpl<T>, |
6cd4f635 WB |
746 | at: usize, |
747 | } | |
748 | ||
29c17fc0 | 749 | impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> { |
5cf335be | 750 | fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self { |
6cd4f635 WB |
751 | Self { dir, at } |
752 | } | |
753 | ||
98b894a9 | 754 | /// Get the next entry. |
29c17fc0 | 755 | pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> { |
6cd4f635 WB |
756 | if self.at == self.dir.table.len() { |
757 | Ok(None) | |
758 | } else { | |
759 | let cursor = self.dir.get_cursor(self.at).await?; | |
760 | self.at += 1; | |
761 | Ok(Some(cursor)) | |
762 | } | |
763 | } | |
98b894a9 WB |
764 | |
765 | /// Efficient alternative to `Iterator::skip`. | |
766 | #[inline] | |
767 | pub fn skip(self, n: usize) -> Self { | |
768 | Self { | |
769 | at: (self.at + n).min(self.dir.table.len()), | |
770 | dir: self.dir, | |
771 | } | |
772 | } | |
773 | ||
774 | /// Efficient alternative to `Iterator::count`. | |
775 | #[inline] | |
776 | pub fn count(self) -> usize { | |
777 | self.dir.table.len() | |
778 | } | |
6cd4f635 WB |
779 | } |
780 | ||
781 | /// A cursor pointing to a file in a directory. | |
782 | /// | |
783 | /// At this point only the file name has been read and we remembered the position for finding the | |
784 | /// actual data. This can be upgraded into a FileEntryImpl. | |
5cf335be | 785 | pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> { |
29c17fc0 | 786 | dir: &'a DirectoryImpl<T>, |
6cd4f635 | 787 | file_name: PathBuf, |
06070d26 | 788 | entry_range_info: EntryRangeInfo, |
9d8af6f2 | 789 | caches: Arc<Caches>, |
6cd4f635 WB |
790 | } |
791 | ||
29c17fc0 | 792 | impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> { |
6cd4f635 WB |
793 | pub fn file_name(&self) -> &Path { |
794 | &self.file_name | |
795 | } | |
796 | ||
aabb78a4 | 797 | async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> { |
c76d3f98 | 798 | let (entry, _decoder) = self |
6cd4f635 | 799 | .dir |
1250e3ea WB |
800 | .decode_one_entry( |
801 | self.entry_range_info.entry_range.clone(), | |
802 | Some(&self.file_name), | |
803 | ) | |
6cd4f635 | 804 | .await?; |
6cd4f635 WB |
805 | |
806 | Ok(FileEntryImpl { | |
29c17fc0 | 807 | input: self.dir.input.clone(), |
6cd4f635 | 808 | entry, |
06070d26 | 809 | entry_range_info: self.entry_range_info.clone(), |
9d8af6f2 | 810 | caches: Arc::clone(&self.caches), |
6cd4f635 WB |
811 | }) |
812 | } | |
ceb83806 WB |
813 | |
814 | /// Exposed for raw by-offset access methods. | |
815 | #[inline] | |
06070d26 WB |
816 | pub fn entry_range_info(&self) -> &EntryRangeInfo { |
817 | &self.entry_range_info | |
ceb83806 | 818 | } |
6cd4f635 WB |
819 | } |
820 | ||
98b894a9 | 821 | /// A reader for file contents. |
e72062a9 | 822 | #[derive(Clone)] |
5cf335be | 823 | pub(crate) struct FileContentsImpl<T> { |
98b894a9 WB |
824 | input: T, |
825 | ||
826 | /// Absolute offset inside the `input`. | |
827 | range: Range<u64>, | |
828 | } | |
829 | ||
830 | impl<T: Clone + ReadAt> FileContentsImpl<T> { | |
831 | pub fn new(input: T, range: Range<u64>) -> Self { | |
832 | Self { input, range } | |
833 | } | |
834 | ||
835 | #[inline] | |
836 | pub fn file_size(&self) -> u64 { | |
837 | self.range.end - self.range.start | |
838 | } | |
839 | ||
840 | async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> { | |
841 | let size = self.file_size(); | |
842 | if offset >= size { | |
843 | return Ok(0); | |
844 | } | |
845 | let remaining = size - offset; | |
846 | ||
847 | if remaining < buf.len() as u64 { | |
848 | buf = &mut buf[..(remaining as usize)]; | |
849 | } | |
850 | ||
c68a29b5 | 851 | read_at(&self.input, buf, self.range.start + offset).await |
98b894a9 WB |
852 | } |
853 | } | |
854 | ||
d3a83ee3 | 855 | impl<T: Clone + ReadAt> ReadAt for FileContentsImpl<T> { |
e72062a9 WB |
856 | fn start_read_at<'a>( |
857 | self: Pin<&'a Self>, | |
d3a83ee3 | 858 | cx: &mut Context, |
e72062a9 | 859 | mut buf: &'a mut [u8], |
d3a83ee3 | 860 | offset: u64, |
e72062a9 | 861 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { |
d3a83ee3 WB |
862 | let size = self.file_size(); |
863 | if offset >= size { | |
e72062a9 | 864 | return MaybeReady::Ready(Ok(0)); |
d3a83ee3 WB |
865 | } |
866 | let remaining = size - offset; | |
867 | ||
868 | if remaining < buf.len() as u64 { | |
869 | buf = &mut buf[..(remaining as usize)]; | |
870 | } | |
871 | ||
872 | let offset = self.range.start + offset; | |
e72062a9 WB |
873 | unsafe { self.map_unchecked(|this| &this.input) }.start_read_at(cx, buf, offset) |
874 | } | |
875 | ||
876 | fn poll_complete<'a>( | |
877 | self: Pin<&'a Self>, | |
878 | op: ReadAtOperation<'a>, | |
879 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
880 | unsafe { self.map_unchecked(|this| &this.input) }.poll_complete(op) | |
d3a83ee3 WB |
881 | } |
882 | } | |
883 | ||
1b25fc08 WB |
884 | /// File content read future result. |
885 | struct ReadResult { | |
886 | len: usize, | |
887 | buffer: Vec<u8>, | |
888 | } | |
889 | ||
e5a2495e WB |
890 | /// A `SeqRead` adapter for a specific range inside another reader, with a temporary buffer due |
891 | /// to lifetime constraints. | |
6cd4f635 | 892 | #[doc(hidden)] |
29c17fc0 WB |
893 | pub struct SeqReadAtAdapter<T> { |
894 | input: T, | |
6cd4f635 | 895 | range: Range<u64>, |
e72062a9 | 896 | buffer: Vec<u8>, |
1b25fc08 | 897 | future: Option<Pin<Box<dyn Future<Output = io::Result<ReadResult>> + 'static>>>, |
e72062a9 WB |
898 | } |
899 | ||
900 | // We lose `Send` via the boxed trait object and don't want to force the trait object to | |
901 | // potentially be more strict than `T`, so we leave it as it is ans implement Send and Sync | |
902 | // depending on T. | |
81d50029 | 903 | #[allow(clippy::non_send_fields_in_send_ty)] |
e72062a9 WB |
904 | unsafe impl<T: Send> Send for SeqReadAtAdapter<T> {} |
905 | unsafe impl<T: Sync> Sync for SeqReadAtAdapter<T> {} | |
906 | ||
907 | impl<T> Drop for SeqReadAtAdapter<T> { | |
908 | fn drop(&mut self) { | |
909 | // drop order | |
910 | self.future = None; | |
911 | } | |
6cd4f635 WB |
912 | } |
913 | ||
29c17fc0 | 914 | impl<T: ReadAt> SeqReadAtAdapter<T> { |
e5a2495e | 915 | /// Create a new `SeqRead` adapter given a range. |
29c17fc0 | 916 | pub fn new(input: T, range: Range<u64>) -> Self { |
70acf637 WB |
917 | if range.end < range.start { |
918 | panic!("BAD SEQ READ AT ADAPTER"); | |
919 | } | |
e72062a9 WB |
920 | Self { |
921 | input, | |
922 | range, | |
923 | buffer: Vec::new(), | |
924 | future: None, | |
925 | } | |
6cd4f635 WB |
926 | } |
927 | ||
928 | #[inline] | |
929 | fn remaining(&self) -> usize { | |
930 | (self.range.end - self.range.start) as usize | |
931 | } | |
932 | } | |
933 | ||
29c17fc0 | 934 | impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> { |
6cd4f635 WB |
935 | fn poll_seq_read( |
936 | self: Pin<&mut Self>, | |
937 | cx: &mut Context, | |
e72062a9 | 938 | dest: &mut [u8], |
6cd4f635 | 939 | ) -> Poll<io::Result<usize>> { |
e72062a9 WB |
940 | let len = dest.len().min(self.remaining()); |
941 | let dest = &mut dest[..len]; | |
6cd4f635 | 942 | |
29c17fc0 | 943 | let this = unsafe { self.get_unchecked_mut() }; |
e72062a9 WB |
944 | loop { |
945 | match this.future.take() { | |
946 | None => { | |
947 | let mut buffer = mem::take(&mut this.buffer); | |
948 | util::scale_read_buffer(&mut buffer, dest.len()); | |
949 | ||
950 | // Note that we're pinned and we have a drop-handler which forces self.future | |
951 | // to be dropped before `input`, so putting a reference to self.input into the | |
952 | // future should be ok! | |
953 | let reader = &this.input; | |
954 | ||
955 | let at = this.range.start; | |
1b25fc08 | 956 | let future: Pin<Box<dyn Future<Output = io::Result<ReadResult>>>> = |
e72062a9 | 957 | Box::pin(async move { |
1b25fc08 WB |
958 | let len = reader.read_at(&mut buffer, at).await?; |
959 | io::Result::Ok(ReadResult { len, buffer }) | |
e72062a9 WB |
960 | }); |
961 | // Ditch the self-reference life-time now: | |
962 | this.future = Some(unsafe { mem::transmute(future) }); | |
963 | } | |
964 | Some(mut fut) => match fut.as_mut().poll(cx) { | |
965 | Poll::Pending => { | |
966 | this.future = Some(fut); | |
967 | return Poll::Pending; | |
968 | } | |
969 | Poll::Ready(Err(err)) => return Poll::Ready(Err(err)), | |
1b25fc08 | 970 | Poll::Ready(Ok(ReadResult { len: got, buffer })) => { |
e72062a9 WB |
971 | this.buffer = buffer; |
972 | this.range.start += got as u64; | |
973 | let len = got.min(dest.len()); | |
974 | dest[..len].copy_from_slice(&this.buffer[..len]); | |
975 | return Poll::Ready(Ok(len)); | |
976 | } | |
977 | }, | |
978 | } | |
979 | } | |
6cd4f635 WB |
980 | } |
981 | ||
982 | fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> { | |
983 | Poll::Ready(Some(Ok(self.range.start))) | |
984 | } | |
985 | } |