]>
Commit | Line | Data |
---|---|---|
6cd4f635 WB |
1 | //! Random access for PXAR files. |
2 | ||
dc4a2854 | 3 | use std::ffi::{OsStr, OsString}; |
6cd4f635 | 4 | use std::io; |
dc4a2854 | 5 | use std::mem::{self, size_of, size_of_val, MaybeUninit}; |
6cd4f635 WB |
6 | use std::ops::Range; |
7 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; | |
8 | use std::path::{Path, PathBuf}; | |
9 | use std::pin::Pin; | |
9d8af6f2 | 10 | use std::sync::Arc; |
6cd4f635 WB |
11 | use std::task::{Context, Poll}; |
12 | ||
13 | use endian_trait::Endian; | |
14 | ||
fbddffdc | 15 | use crate::binary_tree_array; |
6cd4f635 WB |
16 | use crate::decoder::{self, DecoderImpl}; |
17 | use crate::format::{self, GoodbyeItem}; | |
18 | use crate::poll_fn::poll_fn; | |
19 | use crate::util; | |
98b894a9 | 20 | use crate::{Entry, EntryKind}; |
6cd4f635 WB |
21 | |
22 | pub mod aio; | |
9d8af6f2 | 23 | pub mod cache; |
6cd4f635 WB |
24 | pub mod sync; |
25 | ||
26 | #[doc(inline)] | |
2c23bd09 | 27 | pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir}; |
6cd4f635 | 28 | |
9d8af6f2 WB |
29 | use cache::Cache; |
30 | ||
06070d26 WB |
31 | /// Range information used for unsafe raw random access: |
32 | #[derive(Clone, Debug)] | |
33 | pub struct EntryRangeInfo { | |
34 | pub filename_header_offset: Option<u64>, | |
35 | pub entry_range: Range<u64>, | |
36 | } | |
37 | ||
38 | impl EntryRangeInfo { | |
39 | pub fn toplevel(entry_range: Range<u64>) -> Self { | |
40 | Self { | |
41 | filename_header_offset: None, | |
42 | entry_range, | |
43 | } | |
44 | } | |
45 | } | |
46 | ||
6cd4f635 WB |
47 | /// Random access read implementation. |
48 | pub trait ReadAt { | |
49 | fn poll_read_at( | |
50 | self: Pin<&Self>, | |
51 | cx: &mut Context, | |
52 | buf: &mut [u8], | |
53 | offset: u64, | |
54 | ) -> Poll<io::Result<usize>>; | |
55 | } | |
56 | ||
c68a29b5 WB |
57 | /// awaitable version of `poll_read_at`. |
58 | async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize> | |
59 | where | |
60 | T: ReadAt + ?Sized, | |
61 | { | |
62 | poll_fn(|cx| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }).await | |
63 | } | |
64 | ||
65 | /// `read_exact_at` - since that's what we _actually_ want most of the time. | |
66 | async fn read_exact_at<T>(input: &T, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> | |
67 | where | |
68 | T: ReadAt + ?Sized, | |
69 | { | |
70 | while !buf.is_empty() { | |
71 | match read_at(input, buf, offset).await? { | |
72 | 0 => io_bail!("unexpected EOF"), | |
73 | got => { | |
74 | buf = &mut buf[got..]; | |
75 | offset += got as u64; | |
6cd4f635 WB |
76 | } |
77 | } | |
6cd4f635 | 78 | } |
c68a29b5 WB |
79 | Ok(()) |
80 | } | |
6cd4f635 | 81 | |
c68a29b5 WB |
82 | /// Helper to read into an `Endian`-implementing `struct`. |
83 | async fn read_entry_at<T, E: Endian>(input: &T, offset: u64) -> io::Result<E> | |
84 | where | |
85 | T: ReadAt + ?Sized, | |
86 | { | |
87 | let mut data = MaybeUninit::<E>::uninit(); | |
88 | let buf = | |
89 | unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) }; | |
90 | read_exact_at(input, buf, offset).await?; | |
91 | Ok(unsafe { data.assume_init().from_le() }) | |
92 | } | |
6cd4f635 | 93 | |
c68a29b5 WB |
94 | /// Helper to read into an allocated byte vector. |
95 | async fn read_exact_data_at<T>(input: &T, size: usize, offset: u64) -> io::Result<Vec<u8>> | |
96 | where | |
97 | T: ReadAt + ?Sized, | |
98 | { | |
99 | let mut data = util::vec_new(size); | |
100 | read_exact_at(input, &mut data[..], offset).await?; | |
101 | Ok(data) | |
6cd4f635 WB |
102 | } |
103 | ||
29c17fc0 WB |
104 | /// Allow using trait objects for `T: ReadAt` |
105 | impl<'a> ReadAt for &(dyn ReadAt + 'a) { | |
106 | fn poll_read_at( | |
107 | self: Pin<&Self>, | |
108 | cx: &mut Context, | |
bb6540d7 WB |
109 | buf: &mut [u8], |
110 | offset: u64, | |
111 | ) -> Poll<io::Result<usize>> { | |
112 | unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) } | |
113 | } | |
114 | } | |
115 | ||
116 | /// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires | |
117 | /// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that | |
118 | /// implemments `ReadAt` for type monomorphization. | |
119 | impl ReadAt for Arc<dyn ReadAt + Send + Sync + 'static> { | |
120 | fn poll_read_at( | |
121 | self: Pin<&Self>, | |
122 | cx: &mut Context, | |
29c17fc0 WB |
123 | buf: &mut [u8], |
124 | offset: u64, | |
125 | ) -> Poll<io::Result<usize>> { | |
d3a83ee3 | 126 | unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) } |
29c17fc0 WB |
127 | } |
128 | } | |
129 | ||
b764a2b1 | 130 | #[derive(Clone)] |
9d8af6f2 WB |
131 | struct Caches { |
132 | /// The goodbye table cache maps goodbye table offsets to cache entries. | |
133 | gbt_cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>, | |
134 | } | |
135 | ||
136 | impl Default for Caches { | |
137 | fn default() -> Self { | |
138 | Self { gbt_cache: None } | |
139 | } | |
140 | } | |
141 | ||
6cd4f635 | 142 | /// The random access state machine implementation. |
5cf335be | 143 | pub(crate) struct AccessorImpl<T> { |
6cd4f635 WB |
144 | input: T, |
145 | size: u64, | |
9d8af6f2 | 146 | caches: Arc<Caches>, |
6cd4f635 WB |
147 | } |
148 | ||
149 | impl<T: ReadAt> AccessorImpl<T> { | |
150 | pub async fn new(input: T, size: u64) -> io::Result<Self> { | |
151 | if size < (size_of::<GoodbyeItem>() as u64) { | |
152 | io_bail!("too small to contain a pxar archive"); | |
153 | } | |
9d8af6f2 WB |
154 | |
155 | Ok(Self { | |
156 | input, | |
157 | size, | |
158 | caches: Arc::new(Caches::default()), | |
159 | }) | |
6cd4f635 WB |
160 | } |
161 | ||
a2530fb7 WB |
162 | pub fn size(&self) -> u64 { |
163 | self.size | |
164 | } | |
165 | ||
29c17fc0 | 166 | pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> { |
9d8af6f2 WB |
167 | DirectoryImpl::open_at_end( |
168 | &self.input as &dyn ReadAt, | |
169 | self.size, | |
170 | "/".into(), | |
171 | Arc::clone(&self.caches), | |
172 | ) | |
173 | .await | |
29c17fc0 | 174 | } |
b764a2b1 WB |
175 | |
176 | pub fn set_goodbye_table_cache( | |
177 | &mut self, | |
178 | cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>, | |
179 | ) { | |
180 | let new_caches = Arc::new(Caches { | |
181 | gbt_cache: cache, | |
182 | ..*self.caches | |
183 | }); | |
184 | self.caches = new_caches; | |
185 | } | |
29c17fc0 WB |
186 | } |
187 | ||
6b9e2478 WB |
188 | async fn get_decoder<T: ReadAt>( |
189 | input: T, | |
190 | entry_range: Range<u64>, | |
191 | path: PathBuf, | |
192 | ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { | |
d3a83ee3 | 193 | Ok(DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path).await?) |
6b9e2478 WB |
194 | } |
195 | ||
06070d26 WB |
196 | // NOTE: This performs the Decoder::read_next_item() behavior! Keep in mind when changing! |
197 | async fn get_decoder_at_filename<T: ReadAt>( | |
198 | input: T, | |
199 | entry_range: Range<u64>, | |
200 | path: PathBuf, | |
201 | ) -> io::Result<(DecoderImpl<SeqReadAtAdapter<T>>, u64)> { | |
202 | let mut decoder = get_decoder(input, entry_range, path).await?; | |
203 | decoder.path_lengths.push(0); | |
204 | decoder.read_next_header().await?; | |
205 | if decoder.current_header.htype != format::PXAR_FILENAME { | |
1250e3ea WB |
206 | io_bail!( |
207 | "expected filename entry, got {:?}", | |
208 | decoder.current_header.htype | |
209 | ); | |
06070d26 WB |
210 | } |
211 | if decoder.read_current_item().await? != decoder::ItemResult::Entry { | |
212 | // impossible, since we checked the header type above for a "proper" error message | |
213 | io_bail!("unexpected decoder state"); | |
214 | } | |
1250e3ea WB |
215 | let entry_offset = decoder::seq_read_position(&mut decoder.input) |
216 | .await | |
217 | .transpose()? | |
06070d26 WB |
218 | .ok_or_else(|| io_format_err!("reader provided no offset"))?; |
219 | Ok((decoder, entry_offset)) | |
220 | } | |
221 | ||
29c17fc0 WB |
222 | impl<T: Clone + ReadAt> AccessorImpl<T> { |
223 | pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> { | |
9d8af6f2 WB |
224 | DirectoryImpl::open_at_end( |
225 | self.input.clone(), | |
226 | self.size, | |
227 | "/".into(), | |
228 | Arc::clone(&self.caches), | |
229 | ) | |
230 | .await | |
6cd4f635 | 231 | } |
ceb83806 WB |
232 | |
233 | /// Allow opening a directory at a specified offset. | |
234 | pub async unsafe fn open_dir_at_end(&self, offset: u64) -> io::Result<DirectoryImpl<T>> { | |
235 | DirectoryImpl::open_at_end( | |
236 | self.input.clone(), | |
237 | offset, | |
238 | "/".into(), | |
239 | Arc::clone(&self.caches), | |
240 | ) | |
241 | .await | |
242 | } | |
6b9e2478 WB |
243 | |
244 | /// Allow opening a regular file from a specified range. | |
245 | pub async unsafe fn open_file_at_range( | |
246 | &self, | |
06070d26 | 247 | entry_range_info: &EntryRangeInfo, |
6b9e2478 | 248 | ) -> io::Result<FileEntryImpl<T>> { |
06070d26 WB |
249 | let mut decoder = get_decoder( |
250 | self.input.clone(), | |
251 | entry_range_info.entry_range.clone(), | |
252 | PathBuf::new(), | |
1250e3ea WB |
253 | ) |
254 | .await?; | |
6b9e2478 WB |
255 | let entry = decoder |
256 | .next() | |
257 | .await | |
258 | .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??; | |
259 | Ok(FileEntryImpl { | |
260 | input: self.input.clone(), | |
261 | entry, | |
06070d26 | 262 | entry_range_info: entry_range_info.clone(), |
6b9e2478 WB |
263 | caches: Arc::clone(&self.caches), |
264 | }) | |
265 | } | |
266 | ||
267 | /// Allow opening arbitrary contents from a specific range. | |
268 | pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> { | |
269 | FileContentsImpl::new(self.input.clone(), range) | |
270 | } | |
6bfadb8a WB |
271 | |
272 | /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will | |
273 | /// never know the actual length of the target entry until we're done decoding it, so this | |
274 | /// needs to happen at the accessor level, rather than a "sub-entry-reader". | |
06070d26 WB |
275 | pub async fn follow_hardlink(&self, entry: &FileEntryImpl<T>) -> io::Result<FileEntryImpl<T>> { |
276 | let link_offset = match entry.entry.kind() { | |
277 | EntryKind::Hardlink(link) => link.offset, | |
278 | _ => io_bail!("cannot resolve a non-hardlink"), | |
279 | }; | |
280 | ||
281 | let entry_file_offset = entry | |
282 | .entry_range_info | |
283 | .filename_header_offset | |
284 | .ok_or_else(|| io_format_err!("cannot follow hardlink without a file entry header"))?; | |
285 | ||
286 | if link_offset > entry_file_offset { | |
287 | io_bail!("invalid offset in hardlink"); | |
288 | } | |
289 | ||
290 | let link_offset = entry_file_offset - link_offset; | |
291 | ||
1250e3ea WB |
292 | let (mut decoder, entry_offset) = |
293 | get_decoder_at_filename(self.input.clone(), link_offset..self.size, PathBuf::new()) | |
294 | .await?; | |
06070d26 | 295 | |
6bfadb8a WB |
296 | let entry = decoder |
297 | .next() | |
298 | .await | |
299 | .ok_or_else(|| io_format_err!("unexpected EOF while following a hardlink"))??; | |
300 | match entry.kind() { | |
301 | EntryKind::File { offset: None, .. } => { | |
302 | io_bail!("failed to follow hardlink, reader provided no offsets"); | |
303 | } | |
b0487d4f WB |
304 | EntryKind::File { |
305 | offset: Some(offset), | |
306 | size, | |
307 | } => { | |
06070d26 WB |
308 | let meta_size = offset - link_offset; |
309 | let entry_end = link_offset + meta_size + size; | |
6bfadb8a WB |
310 | Ok(FileEntryImpl { |
311 | input: self.input.clone(), | |
312 | entry, | |
06070d26 WB |
313 | entry_range_info: EntryRangeInfo { |
314 | filename_header_offset: Some(link_offset), | |
315 | entry_range: entry_offset..entry_end, | |
316 | }, | |
6bfadb8a WB |
317 | caches: Arc::clone(&self.caches), |
318 | }) | |
319 | } | |
320 | _ => io_bail!("hardlink does not point to a regular file"), | |
321 | } | |
322 | } | |
6cd4f635 WB |
323 | } |
324 | ||
325 | /// The directory random-access state machine implementation. | |
5cf335be | 326 | pub(crate) struct DirectoryImpl<T> { |
29c17fc0 | 327 | input: T, |
6cd4f635 WB |
328 | entry_ofs: u64, |
329 | goodbye_ofs: u64, | |
330 | size: u64, | |
9d8af6f2 | 331 | table: Arc<[GoodbyeItem]>, |
6cd4f635 | 332 | path: PathBuf, |
9d8af6f2 | 333 | caches: Arc<Caches>, |
6cd4f635 WB |
334 | } |
335 | ||
29c17fc0 | 336 | impl<T: Clone + ReadAt> DirectoryImpl<T> { |
6cd4f635 | 337 | /// Open a directory ending at the specified position. |
9d8af6f2 | 338 | async fn open_at_end( |
29c17fc0 | 339 | input: T, |
6cd4f635 WB |
340 | end_offset: u64, |
341 | path: PathBuf, | |
9d8af6f2 | 342 | caches: Arc<Caches>, |
29c17fc0 WB |
343 | ) -> io::Result<DirectoryImpl<T>> { |
344 | let tail = Self::read_tail_entry(&input, end_offset).await?; | |
6cd4f635 WB |
345 | |
346 | if end_offset < tail.size { | |
347 | io_bail!("goodbye tail size out of range"); | |
348 | } | |
349 | ||
350 | let goodbye_ofs = end_offset - tail.size; | |
351 | ||
352 | if goodbye_ofs < tail.offset { | |
353 | io_bail!("goodbye offset out of range"); | |
354 | } | |
355 | ||
356 | let entry_ofs = goodbye_ofs - tail.offset; | |
357 | let size = end_offset - entry_ofs; | |
358 | ||
9d8af6f2 WB |
359 | let table: Option<Arc<[GoodbyeItem]>> = caches |
360 | .gbt_cache | |
361 | .as_ref() | |
362 | .and_then(|cache| cache.fetch(goodbye_ofs)); | |
363 | ||
6cd4f635 WB |
364 | let mut this = Self { |
365 | input, | |
366 | entry_ofs, | |
367 | goodbye_ofs, | |
368 | size, | |
9d8af6f2 | 369 | table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone), |
6cd4f635 | 370 | path, |
9d8af6f2 | 371 | caches, |
6cd4f635 WB |
372 | }; |
373 | ||
374 | // sanity check: | |
375 | if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 { | |
376 | io_bail!("invalid goodbye table size: {}", this.table_size()); | |
377 | } | |
378 | ||
9d8af6f2 WB |
379 | if table.is_none() { |
380 | this.table = this.load_table().await?; | |
381 | if let Some(ref cache) = this.caches.gbt_cache { | |
382 | cache.insert(goodbye_ofs, Arc::clone(&this.table)); | |
383 | } | |
384 | } | |
6cd4f635 WB |
385 | |
386 | Ok(this) | |
387 | } | |
388 | ||
389 | /// Load the entire goodbye table: | |
9d8af6f2 | 390 | async fn load_table(&self) -> io::Result<Arc<[GoodbyeItem]>> { |
6cd4f635 WB |
391 | let len = self.len(); |
392 | let mut data = Vec::with_capacity(self.len()); | |
393 | unsafe { | |
394 | data.set_len(len); | |
395 | let slice = std::slice::from_raw_parts_mut( | |
396 | data.as_mut_ptr() as *mut u8, | |
2c23bd09 | 397 | len * size_of::<GoodbyeItem>(), |
6cd4f635 | 398 | ); |
c68a29b5 | 399 | read_exact_at(&self.input, slice, self.table_offset()).await?; |
6cd4f635 WB |
400 | drop(slice); |
401 | } | |
9d8af6f2 | 402 | Ok(Arc::from(data)) |
6cd4f635 WB |
403 | } |
404 | ||
405 | #[inline] | |
406 | fn end_offset(&self) -> u64 { | |
407 | self.entry_ofs + self.size | |
408 | } | |
409 | ||
dc4a2854 WB |
410 | #[inline] |
411 | fn entry_range(&self) -> Range<u64> { | |
412 | self.entry_ofs..self.end_offset() | |
413 | } | |
414 | ||
6cd4f635 WB |
415 | #[inline] |
416 | fn table_size(&self) -> u64 { | |
417 | (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64) | |
418 | } | |
419 | ||
420 | #[inline] | |
421 | fn table_offset(&self) -> u64 { | |
422 | self.goodbye_ofs + (size_of::<format::Header>() as u64) | |
423 | } | |
424 | ||
425 | /// Length *excluding* the tail marker! | |
426 | #[inline] | |
427 | fn len(&self) -> usize { | |
428 | (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1 | |
429 | } | |
430 | ||
431 | /// Read the goodbye tail and perform some sanity checks. | |
c68a29b5 | 432 | async fn read_tail_entry(input: &T, end_offset: u64) -> io::Result<GoodbyeItem> { |
6cd4f635 WB |
433 | if end_offset < (size_of::<GoodbyeItem>() as u64) { |
434 | io_bail!("goodbye tail does not fit"); | |
435 | } | |
436 | ||
437 | let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64); | |
c68a29b5 | 438 | let tail: GoodbyeItem = read_entry_at(input, tail_offset).await?; |
6cd4f635 WB |
439 | |
440 | if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER { | |
441 | io_bail!("no goodbye tail marker found"); | |
442 | } | |
443 | ||
444 | Ok(tail) | |
445 | } | |
446 | ||
447 | /// Get a decoder for the directory contents. | |
29c17fc0 | 448 | pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { |
dc4a2854 | 449 | let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?; |
6cd4f635 WB |
450 | if !dir.is_dir() { |
451 | io_bail!("directory does not seem to be a directory"); | |
452 | } | |
453 | Ok(decoder) | |
454 | } | |
455 | ||
456 | async fn get_decoder( | |
457 | &self, | |
458 | entry_range: Range<u64>, | |
459 | file_name: Option<&Path>, | |
29c17fc0 | 460 | ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { |
6b9e2478 WB |
461 | get_decoder( |
462 | self.input.clone(), | |
463 | entry_range, | |
6cd4f635 WB |
464 | match file_name { |
465 | None => self.path.clone(), | |
466 | Some(file) => self.path.join(file), | |
467 | }, | |
d3a83ee3 WB |
468 | ) |
469 | .await | |
6cd4f635 WB |
470 | } |
471 | ||
472 | async fn decode_one_entry( | |
473 | &self, | |
474 | entry_range: Range<u64>, | |
475 | file_name: Option<&Path>, | |
29c17fc0 | 476 | ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> { |
6cd4f635 WB |
477 | let mut decoder = self.get_decoder(entry_range, file_name).await?; |
478 | let entry = decoder | |
479 | .next() | |
480 | .await | |
481 | .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; | |
482 | Ok((entry, decoder)) | |
483 | } | |
484 | ||
fbddffdc WB |
485 | fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> { |
486 | binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash)) | |
6cd4f635 WB |
487 | } |
488 | ||
a5922fbc | 489 | pub async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> { |
c76d3f98 | 490 | let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?; |
dc4a2854 WB |
491 | Ok(FileEntryImpl { |
492 | input: self.input.clone(), | |
493 | entry, | |
06070d26 WB |
494 | entry_range_info: EntryRangeInfo { |
495 | filename_header_offset: None, | |
496 | entry_range: self.entry_range(), | |
497 | }, | |
9d8af6f2 | 498 | caches: Arc::clone(&self.caches), |
dc4a2854 WB |
499 | }) |
500 | } | |
501 | ||
6cd4f635 | 502 | /// Lookup a directory entry. |
29c17fc0 | 503 | pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> { |
dc4a2854 WB |
504 | let mut cur: Option<FileEntryImpl<T>> = None; |
505 | ||
506 | let mut first = true; | |
507 | for component in path.components() { | |
508 | use std::path::Component; | |
509 | ||
510 | let first = mem::replace(&mut first, false); | |
511 | ||
512 | let component = match component { | |
513 | Component::Normal(path) => path, | |
514 | Component::ParentDir => io_bail!("cannot enter parent directory in archive"), | |
515 | Component::RootDir | Component::CurDir if first => { | |
516 | cur = Some(self.lookup_self().await?); | |
517 | continue; | |
518 | } | |
519 | Component::CurDir => continue, | |
520 | _ => io_bail!("invalid component in path"), | |
521 | }; | |
522 | ||
523 | let next = match cur { | |
524 | Some(entry) => { | |
525 | entry | |
526 | .enter_directory() | |
527 | .await? | |
528 | .lookup_component(component) | |
529 | .await? | |
530 | } | |
531 | None => self.lookup_component(component).await?, | |
532 | }; | |
533 | ||
534 | if next.is_none() { | |
535 | return Ok(None); | |
536 | } | |
537 | ||
538 | cur = next; | |
539 | } | |
540 | ||
541 | Ok(cur) | |
542 | } | |
543 | ||
544 | /// Lookup a single directory entry component (does not handle multiple components in path) | |
545 | pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> { | |
546 | let hash = format::hash_filename(path.as_bytes()); | |
fbddffdc | 547 | let first_index = match self.lookup_hash_position(hash, 0, 0) { |
6cd4f635 WB |
548 | Some(index) => index, |
549 | None => return Ok(None), | |
550 | }; | |
551 | ||
fbddffdc WB |
552 | // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate |
553 | // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return | |
554 | // a DirEntry::Dir or Dir::Entry. | |
555 | // | |
556 | let mut dup = 0; | |
557 | loop { | |
558 | let index = match self.lookup_hash_position(hash, first_index, dup) { | |
559 | Some(index) => index, | |
560 | None => return Ok(None), | |
561 | }; | |
6cd4f635 | 562 | |
6cd4f635 WB |
563 | let cursor = self.get_cursor(index).await?; |
564 | if cursor.file_name == path { | |
aabb78a4 | 565 | return Ok(Some(cursor.decode_entry().await?)); |
6cd4f635 | 566 | } |
6cd4f635 | 567 | |
fbddffdc WB |
568 | dup += 1; |
569 | } | |
6cd4f635 WB |
570 | } |
571 | ||
29c17fc0 | 572 | async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> { |
6cd4f635 WB |
573 | let entry = &self.table[index]; |
574 | let file_goodbye_ofs = entry.offset; | |
575 | if self.goodbye_ofs < file_goodbye_ofs { | |
576 | io_bail!("invalid file offset"); | |
577 | } | |
578 | ||
579 | let file_ofs = self.goodbye_ofs - file_goodbye_ofs; | |
580 | let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?; | |
581 | ||
70acf637 WB |
582 | let entry_range = Range { |
583 | start: entry_ofs, | |
584 | end: file_ofs + entry.size, | |
585 | }; | |
586 | if entry_range.end < entry_range.start { | |
587 | io_bail!( | |
588 | "bad file: invalid entry ranges for {:?}: \ | |
589 | start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}", | |
590 | file_name, | |
591 | entry_ofs, | |
592 | file_ofs, | |
593 | entry.size, | |
594 | ); | |
595 | } | |
596 | ||
6cd4f635 WB |
597 | Ok(DirEntryImpl { |
598 | dir: self, | |
599 | file_name, | |
06070d26 WB |
600 | entry_range_info: EntryRangeInfo { |
601 | filename_header_offset: Some(file_ofs), | |
602 | entry_range, | |
603 | }, | |
9d8af6f2 | 604 | caches: Arc::clone(&self.caches), |
6cd4f635 WB |
605 | }) |
606 | } | |
607 | ||
608 | async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> { | |
c68a29b5 | 609 | let head: format::Header = read_entry_at(&self.input, file_ofs).await?; |
6cd4f635 WB |
610 | if head.htype != format::PXAR_FILENAME { |
611 | io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype); | |
612 | } | |
613 | ||
c68a29b5 WB |
614 | let mut path = read_exact_data_at( |
615 | &self.input, | |
616 | head.content_size() as usize, | |
617 | file_ofs + (size_of_val(&head) as u64), | |
618 | ) | |
619 | .await?; | |
6cd4f635 WB |
620 | |
621 | if path.pop() != Some(0) { | |
622 | io_bail!("invalid file name (missing terminating zero)"); | |
623 | } | |
624 | ||
625 | if path.is_empty() { | |
626 | io_bail!("invalid empty file name"); | |
627 | } | |
628 | ||
bd99958c WB |
629 | if path.contains(&b'/') { |
630 | io_bail!("illegal path found (contains slashes, this is a security concern)"); | |
631 | } | |
632 | ||
6cd4f635 WB |
633 | let file_name = PathBuf::from(OsString::from_vec(path)); |
634 | format::check_file_name(&file_name)?; | |
635 | ||
636 | Ok((file_name, file_ofs + head.full_size())) | |
637 | } | |
638 | ||
29c17fc0 | 639 | pub fn read_dir(&self) -> ReadDirImpl<T> { |
6cd4f635 WB |
640 | ReadDirImpl::new(self, 0) |
641 | } | |
d3a83ee3 WB |
642 | |
643 | pub fn entry_count(&self) -> usize { | |
644 | self.table.len() | |
645 | } | |
6cd4f635 WB |
646 | } |
647 | ||
648 | /// A file entry retrieved from a Directory. | |
93fa37fb | 649 | #[derive(Clone)] |
5cf335be | 650 | pub(crate) struct FileEntryImpl<T: Clone + ReadAt> { |
29c17fc0 | 651 | input: T, |
6cd4f635 | 652 | entry: Entry, |
06070d26 | 653 | entry_range_info: EntryRangeInfo, |
9d8af6f2 | 654 | caches: Arc<Caches>, |
6cd4f635 WB |
655 | } |
656 | ||
29c17fc0 WB |
657 | impl<T: Clone + ReadAt> FileEntryImpl<T> { |
658 | pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> { | |
6cd4f635 WB |
659 | if !self.entry.is_dir() { |
660 | io_bail!("enter_directory() on a non-directory"); | |
661 | } | |
662 | ||
9d8af6f2 WB |
663 | DirectoryImpl::open_at_end( |
664 | self.input.clone(), | |
06070d26 | 665 | self.entry_range_info.entry_range.end, |
9d8af6f2 WB |
666 | self.entry.path.clone(), |
667 | Arc::clone(&self.caches), | |
668 | ) | |
669 | .await | |
6cd4f635 WB |
670 | } |
671 | ||
6b9e2478 WB |
672 | /// For use with unsafe accessor methods. |
673 | pub fn content_range(&self) -> io::Result<Option<Range<u64>>> { | |
98b894a9 | 674 | match self.entry.kind { |
c76d3f98 WB |
675 | EntryKind::File { offset: None, .. } => { |
676 | io_bail!("cannot open file, reader provided no offset") | |
677 | } | |
678 | EntryKind::File { | |
679 | size, | |
680 | offset: Some(offset), | |
6b9e2478 WB |
681 | } => Ok(Some(offset..(offset + size))), |
682 | _ => Ok(None), | |
683 | } | |
684 | } | |
685 | ||
686 | pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> { | |
687 | match self.content_range()? { | |
688 | Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)), | |
689 | None => io_bail!("not a file"), | |
98b894a9 WB |
690 | } |
691 | } | |
692 | ||
6cd4f635 WB |
693 | #[inline] |
694 | pub fn into_entry(self) -> Entry { | |
695 | self.entry | |
696 | } | |
697 | ||
698 | #[inline] | |
699 | pub fn entry(&self) -> &Entry { | |
700 | &self.entry | |
701 | } | |
ceb83806 WB |
702 | |
703 | /// Exposed for raw by-offset access methods (use with `open_dir_at_end`). | |
704 | #[inline] | |
06070d26 WB |
705 | pub fn entry_range_info(&self) -> &EntryRangeInfo { |
706 | &self.entry_range_info | |
ceb83806 | 707 | } |
6cd4f635 WB |
708 | } |
709 | ||
710 | /// An iterator over the contents of a directory. | |
5cf335be | 711 | pub(crate) struct ReadDirImpl<'a, T> { |
29c17fc0 | 712 | dir: &'a DirectoryImpl<T>, |
6cd4f635 WB |
713 | at: usize, |
714 | } | |
715 | ||
29c17fc0 | 716 | impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> { |
5cf335be | 717 | fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self { |
6cd4f635 WB |
718 | Self { dir, at } |
719 | } | |
720 | ||
98b894a9 | 721 | /// Get the next entry. |
29c17fc0 | 722 | pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> { |
6cd4f635 WB |
723 | if self.at == self.dir.table.len() { |
724 | Ok(None) | |
725 | } else { | |
726 | let cursor = self.dir.get_cursor(self.at).await?; | |
727 | self.at += 1; | |
728 | Ok(Some(cursor)) | |
729 | } | |
730 | } | |
98b894a9 WB |
731 | |
732 | /// Efficient alternative to `Iterator::skip`. | |
733 | #[inline] | |
734 | pub fn skip(self, n: usize) -> Self { | |
735 | Self { | |
736 | at: (self.at + n).min(self.dir.table.len()), | |
737 | dir: self.dir, | |
738 | } | |
739 | } | |
740 | ||
741 | /// Efficient alternative to `Iterator::count`. | |
742 | #[inline] | |
743 | pub fn count(self) -> usize { | |
744 | self.dir.table.len() | |
745 | } | |
6cd4f635 WB |
746 | } |
747 | ||
748 | /// A cursor pointing to a file in a directory. | |
749 | /// | |
750 | /// At this point only the file name has been read and we remembered the position for finding the | |
751 | /// actual data. This can be upgraded into a FileEntryImpl. | |
5cf335be | 752 | pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> { |
29c17fc0 | 753 | dir: &'a DirectoryImpl<T>, |
6cd4f635 | 754 | file_name: PathBuf, |
06070d26 | 755 | entry_range_info: EntryRangeInfo, |
9d8af6f2 | 756 | caches: Arc<Caches>, |
6cd4f635 WB |
757 | } |
758 | ||
29c17fc0 | 759 | impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> { |
6cd4f635 WB |
760 | pub fn file_name(&self) -> &Path { |
761 | &self.file_name | |
762 | } | |
763 | ||
aabb78a4 | 764 | async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> { |
c76d3f98 | 765 | let (entry, _decoder) = self |
6cd4f635 | 766 | .dir |
1250e3ea WB |
767 | .decode_one_entry( |
768 | self.entry_range_info.entry_range.clone(), | |
769 | Some(&self.file_name), | |
770 | ) | |
6cd4f635 | 771 | .await?; |
6cd4f635 WB |
772 | |
773 | Ok(FileEntryImpl { | |
29c17fc0 | 774 | input: self.dir.input.clone(), |
6cd4f635 | 775 | entry, |
06070d26 | 776 | entry_range_info: self.entry_range_info.clone(), |
9d8af6f2 | 777 | caches: Arc::clone(&self.caches), |
6cd4f635 WB |
778 | }) |
779 | } | |
ceb83806 WB |
780 | |
781 | /// Exposed for raw by-offset access methods. | |
782 | #[inline] | |
06070d26 WB |
783 | pub fn entry_range_info(&self) -> &EntryRangeInfo { |
784 | &self.entry_range_info | |
ceb83806 | 785 | } |
6cd4f635 WB |
786 | } |
787 | ||
98b894a9 | 788 | /// A reader for file contents. |
5cf335be | 789 | pub(crate) struct FileContentsImpl<T> { |
98b894a9 WB |
790 | input: T, |
791 | ||
792 | /// Absolute offset inside the `input`. | |
793 | range: Range<u64>, | |
794 | } | |
795 | ||
796 | impl<T: Clone + ReadAt> FileContentsImpl<T> { | |
797 | pub fn new(input: T, range: Range<u64>) -> Self { | |
798 | Self { input, range } | |
799 | } | |
800 | ||
801 | #[inline] | |
802 | pub fn file_size(&self) -> u64 { | |
803 | self.range.end - self.range.start | |
804 | } | |
805 | ||
806 | async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> { | |
807 | let size = self.file_size(); | |
808 | if offset >= size { | |
809 | return Ok(0); | |
810 | } | |
811 | let remaining = size - offset; | |
812 | ||
813 | if remaining < buf.len() as u64 { | |
814 | buf = &mut buf[..(remaining as usize)]; | |
815 | } | |
816 | ||
c68a29b5 | 817 | read_at(&self.input, buf, self.range.start + offset).await |
98b894a9 WB |
818 | } |
819 | } | |
820 | ||
d3a83ee3 WB |
821 | impl<T: Clone + ReadAt> ReadAt for FileContentsImpl<T> { |
822 | fn poll_read_at( | |
823 | self: Pin<&Self>, | |
824 | cx: &mut Context, | |
825 | mut buf: &mut [u8], | |
826 | offset: u64, | |
827 | ) -> Poll<io::Result<usize>> { | |
828 | let size = self.file_size(); | |
829 | if offset >= size { | |
830 | return Poll::Ready(Ok(0)); | |
831 | } | |
832 | let remaining = size - offset; | |
833 | ||
834 | if remaining < buf.len() as u64 { | |
835 | buf = &mut buf[..(remaining as usize)]; | |
836 | } | |
837 | ||
838 | let offset = self.range.start + offset; | |
839 | unsafe { self.map_unchecked(|this| &this.input) }.poll_read_at(cx, buf, offset) | |
840 | } | |
841 | } | |
842 | ||
6cd4f635 | 843 | #[doc(hidden)] |
29c17fc0 WB |
844 | pub struct SeqReadAtAdapter<T> { |
845 | input: T, | |
6cd4f635 WB |
846 | range: Range<u64>, |
847 | } | |
848 | ||
29c17fc0 WB |
849 | impl<T: ReadAt> SeqReadAtAdapter<T> { |
850 | pub fn new(input: T, range: Range<u64>) -> Self { | |
70acf637 WB |
851 | if range.end < range.start { |
852 | panic!("BAD SEQ READ AT ADAPTER"); | |
853 | } | |
6cd4f635 WB |
854 | Self { input, range } |
855 | } | |
856 | ||
857 | #[inline] | |
858 | fn remaining(&self) -> usize { | |
859 | (self.range.end - self.range.start) as usize | |
860 | } | |
861 | } | |
862 | ||
29c17fc0 | 863 | impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> { |
6cd4f635 WB |
864 | fn poll_seq_read( |
865 | self: Pin<&mut Self>, | |
866 | cx: &mut Context, | |
867 | buf: &mut [u8], | |
868 | ) -> Poll<io::Result<usize>> { | |
869 | let len = buf.len().min(self.remaining()); | |
870 | let buf = &mut buf[..len]; | |
871 | ||
29c17fc0 | 872 | let this = unsafe { self.get_unchecked_mut() }; |
6cd4f635 WB |
873 | |
874 | let got = ready!(unsafe { | |
29c17fc0 | 875 | Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start) |
6cd4f635 WB |
876 | })?; |
877 | this.range.start += got as u64; | |
878 | Poll::Ready(Ok(got)) | |
879 | } | |
880 | ||
881 | fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> { | |
882 | Poll::Ready(Some(Ok(self.range.start))) | |
883 | } | |
884 | } |