]>
Commit | Line | Data |
---|---|---|
6cd4f635 WB |
1 | //! Random access for PXAR files. |
2 | ||
dc4a2854 | 3 | use std::ffi::{OsStr, OsString}; |
6cd4f635 | 4 | use std::io; |
dc4a2854 | 5 | use std::mem::{self, size_of, size_of_val, MaybeUninit}; |
6cd4f635 WB |
6 | use std::ops::Range; |
7 | use std::os::unix::ffi::{OsStrExt, OsStringExt}; | |
8 | use std::path::{Path, PathBuf}; | |
9 | use std::pin::Pin; | |
9d8af6f2 | 10 | use std::sync::Arc; |
6cd4f635 WB |
11 | use std::task::{Context, Poll}; |
12 | ||
13 | use endian_trait::Endian; | |
14 | ||
fbddffdc | 15 | use crate::binary_tree_array; |
6cd4f635 WB |
16 | use crate::decoder::{self, DecoderImpl}; |
17 | use crate::format::{self, GoodbyeItem}; | |
18 | use crate::poll_fn::poll_fn; | |
19 | use crate::util; | |
98b894a9 | 20 | use crate::{Entry, EntryKind}; |
6cd4f635 WB |
21 | |
22 | pub mod aio; | |
9d8af6f2 | 23 | pub mod cache; |
6cd4f635 WB |
24 | pub mod sync; |
25 | ||
26 | #[doc(inline)] | |
2c23bd09 | 27 | pub use sync::{Accessor, DirEntry, Directory, FileEntry, ReadDir}; |
6cd4f635 | 28 | |
9d8af6f2 WB |
29 | use cache::Cache; |
30 | ||
06070d26 WB |
31 | /// Range information used for unsafe raw random access: |
32 | #[derive(Clone, Debug)] | |
33 | pub struct EntryRangeInfo { | |
34 | pub filename_header_offset: Option<u64>, | |
35 | pub entry_range: Range<u64>, | |
36 | } | |
37 | ||
38 | impl EntryRangeInfo { | |
39 | pub fn toplevel(entry_range: Range<u64>) -> Self { | |
40 | Self { | |
41 | filename_header_offset: None, | |
42 | entry_range, | |
43 | } | |
44 | } | |
45 | } | |
46 | ||
6cd4f635 WB |
47 | /// Random access read implementation. |
48 | pub trait ReadAt { | |
49 | fn poll_read_at( | |
50 | self: Pin<&Self>, | |
51 | cx: &mut Context, | |
52 | buf: &mut [u8], | |
53 | offset: u64, | |
54 | ) -> Poll<io::Result<usize>>; | |
55 | } | |
56 | ||
c68a29b5 WB |
57 | /// awaitable version of `poll_read_at`. |
58 | async fn read_at<T>(input: &T, buf: &mut [u8], offset: u64) -> io::Result<usize> | |
59 | where | |
60 | T: ReadAt + ?Sized, | |
61 | { | |
62 | poll_fn(|cx| unsafe { Pin::new_unchecked(input).poll_read_at(cx, buf, offset) }).await | |
63 | } | |
64 | ||
65 | /// `read_exact_at` - since that's what we _actually_ want most of the time. | |
66 | async fn read_exact_at<T>(input: &T, mut buf: &mut [u8], mut offset: u64) -> io::Result<()> | |
67 | where | |
68 | T: ReadAt + ?Sized, | |
69 | { | |
70 | while !buf.is_empty() { | |
71 | match read_at(input, buf, offset).await? { | |
72 | 0 => io_bail!("unexpected EOF"), | |
73 | got => { | |
74 | buf = &mut buf[got..]; | |
75 | offset += got as u64; | |
6cd4f635 WB |
76 | } |
77 | } | |
6cd4f635 | 78 | } |
c68a29b5 WB |
79 | Ok(()) |
80 | } | |
6cd4f635 | 81 | |
c68a29b5 WB |
82 | /// Helper to read into an `Endian`-implementing `struct`. |
83 | async fn read_entry_at<T, E: Endian>(input: &T, offset: u64) -> io::Result<E> | |
84 | where | |
85 | T: ReadAt + ?Sized, | |
86 | { | |
87 | let mut data = MaybeUninit::<E>::uninit(); | |
88 | let buf = | |
89 | unsafe { std::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, size_of::<E>()) }; | |
90 | read_exact_at(input, buf, offset).await?; | |
91 | Ok(unsafe { data.assume_init().from_le() }) | |
92 | } | |
6cd4f635 | 93 | |
c68a29b5 WB |
94 | /// Helper to read into an allocated byte vector. |
95 | async fn read_exact_data_at<T>(input: &T, size: usize, offset: u64) -> io::Result<Vec<u8>> | |
96 | where | |
97 | T: ReadAt + ?Sized, | |
98 | { | |
99 | let mut data = util::vec_new(size); | |
100 | read_exact_at(input, &mut data[..], offset).await?; | |
101 | Ok(data) | |
6cd4f635 WB |
102 | } |
103 | ||
29c17fc0 WB |
104 | /// Allow using trait objects for `T: ReadAt` |
105 | impl<'a> ReadAt for &(dyn ReadAt + 'a) { | |
106 | fn poll_read_at( | |
107 | self: Pin<&Self>, | |
108 | cx: &mut Context, | |
bb6540d7 WB |
109 | buf: &mut [u8], |
110 | offset: u64, | |
111 | ) -> Poll<io::Result<usize>> { | |
112 | unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) } | |
113 | } | |
114 | } | |
115 | ||
116 | /// Convenience impl for `Arc<dyn ReadAt + Send + Sync + 'static>`. Since `ReadAt` only requires | |
117 | /// immutable `&self`, this adds some convenience by allowing to just `Arc` any `'static` type that | |
118 | /// implemments `ReadAt` for type monomorphization. | |
119 | impl ReadAt for Arc<dyn ReadAt + Send + Sync + 'static> { | |
120 | fn poll_read_at( | |
121 | self: Pin<&Self>, | |
122 | cx: &mut Context, | |
29c17fc0 WB |
123 | buf: &mut [u8], |
124 | offset: u64, | |
125 | ) -> Poll<io::Result<usize>> { | |
d3a83ee3 | 126 | unsafe { Pin::new_unchecked(&**self).poll_read_at(cx, buf, offset) } |
29c17fc0 WB |
127 | } |
128 | } | |
129 | ||
b764a2b1 | 130 | #[derive(Clone)] |
9d8af6f2 WB |
131 | struct Caches { |
132 | /// The goodbye table cache maps goodbye table offsets to cache entries. | |
133 | gbt_cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>, | |
134 | } | |
135 | ||
136 | impl Default for Caches { | |
137 | fn default() -> Self { | |
138 | Self { gbt_cache: None } | |
139 | } | |
140 | } | |
141 | ||
6cd4f635 | 142 | /// The random access state machine implementation. |
5cf335be | 143 | pub(crate) struct AccessorImpl<T> { |
6cd4f635 WB |
144 | input: T, |
145 | size: u64, | |
9d8af6f2 | 146 | caches: Arc<Caches>, |
6cd4f635 WB |
147 | } |
148 | ||
149 | impl<T: ReadAt> AccessorImpl<T> { | |
150 | pub async fn new(input: T, size: u64) -> io::Result<Self> { | |
151 | if size < (size_of::<GoodbyeItem>() as u64) { | |
152 | io_bail!("too small to contain a pxar archive"); | |
153 | } | |
9d8af6f2 WB |
154 | |
155 | Ok(Self { | |
156 | input, | |
157 | size, | |
158 | caches: Arc::new(Caches::default()), | |
159 | }) | |
6cd4f635 WB |
160 | } |
161 | ||
a2530fb7 WB |
162 | pub fn size(&self) -> u64 { |
163 | self.size | |
164 | } | |
165 | ||
29c17fc0 | 166 | pub async fn open_root_ref<'a>(&'a self) -> io::Result<DirectoryImpl<&'a dyn ReadAt>> { |
9d8af6f2 WB |
167 | DirectoryImpl::open_at_end( |
168 | &self.input as &dyn ReadAt, | |
169 | self.size, | |
170 | "/".into(), | |
171 | Arc::clone(&self.caches), | |
172 | ) | |
173 | .await | |
29c17fc0 | 174 | } |
b764a2b1 WB |
175 | |
176 | pub fn set_goodbye_table_cache( | |
177 | &mut self, | |
178 | cache: Option<Arc<dyn Cache<u64, [GoodbyeItem]> + Send + Sync>>, | |
179 | ) { | |
180 | let new_caches = Arc::new(Caches { | |
181 | gbt_cache: cache, | |
182 | ..*self.caches | |
183 | }); | |
184 | self.caches = new_caches; | |
185 | } | |
29c17fc0 WB |
186 | } |
187 | ||
6b9e2478 WB |
188 | async fn get_decoder<T: ReadAt>( |
189 | input: T, | |
190 | entry_range: Range<u64>, | |
191 | path: PathBuf, | |
192 | ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { | |
d3a83ee3 | 193 | Ok(DecoderImpl::new_full(SeqReadAtAdapter::new(input, entry_range), path).await?) |
6b9e2478 WB |
194 | } |
195 | ||
06070d26 WB |
196 | // NOTE: This performs the Decoder::read_next_item() behavior! Keep in mind when changing! |
197 | async fn get_decoder_at_filename<T: ReadAt>( | |
198 | input: T, | |
199 | entry_range: Range<u64>, | |
200 | path: PathBuf, | |
201 | ) -> io::Result<(DecoderImpl<SeqReadAtAdapter<T>>, u64)> { | |
202 | let mut decoder = get_decoder(input, entry_range, path).await?; | |
203 | decoder.path_lengths.push(0); | |
204 | decoder.read_next_header().await?; | |
205 | if decoder.current_header.htype != format::PXAR_FILENAME { | |
206 | io_bail!("expected filename entry, got {:?}", decoder.current_header.htype); | |
207 | } | |
208 | if decoder.read_current_item().await? != decoder::ItemResult::Entry { | |
209 | // impossible, since we checked the header type above for a "proper" error message | |
210 | io_bail!("unexpected decoder state"); | |
211 | } | |
212 | let entry_offset = decoder::seq_read_position(&mut decoder.input).await.transpose()? | |
213 | .ok_or_else(|| io_format_err!("reader provided no offset"))?; | |
214 | Ok((decoder, entry_offset)) | |
215 | } | |
216 | ||
29c17fc0 WB |
217 | impl<T: Clone + ReadAt> AccessorImpl<T> { |
218 | pub async fn open_root(&self) -> io::Result<DirectoryImpl<T>> { | |
9d8af6f2 WB |
219 | DirectoryImpl::open_at_end( |
220 | self.input.clone(), | |
221 | self.size, | |
222 | "/".into(), | |
223 | Arc::clone(&self.caches), | |
224 | ) | |
225 | .await | |
6cd4f635 | 226 | } |
ceb83806 WB |
227 | |
228 | /// Allow opening a directory at a specified offset. | |
229 | pub async unsafe fn open_dir_at_end(&self, offset: u64) -> io::Result<DirectoryImpl<T>> { | |
230 | DirectoryImpl::open_at_end( | |
231 | self.input.clone(), | |
232 | offset, | |
233 | "/".into(), | |
234 | Arc::clone(&self.caches), | |
235 | ) | |
236 | .await | |
237 | } | |
6b9e2478 WB |
238 | |
239 | /// Allow opening a regular file from a specified range. | |
240 | pub async unsafe fn open_file_at_range( | |
241 | &self, | |
06070d26 | 242 | entry_range_info: &EntryRangeInfo, |
6b9e2478 | 243 | ) -> io::Result<FileEntryImpl<T>> { |
06070d26 WB |
244 | let mut decoder = get_decoder( |
245 | self.input.clone(), | |
246 | entry_range_info.entry_range.clone(), | |
247 | PathBuf::new(), | |
248 | ).await?; | |
6b9e2478 WB |
249 | let entry = decoder |
250 | .next() | |
251 | .await | |
252 | .ok_or_else(|| io_format_err!("unexpected EOF while decoding file entry"))??; | |
253 | Ok(FileEntryImpl { | |
254 | input: self.input.clone(), | |
255 | entry, | |
06070d26 | 256 | entry_range_info: entry_range_info.clone(), |
6b9e2478 WB |
257 | caches: Arc::clone(&self.caches), |
258 | }) | |
259 | } | |
260 | ||
261 | /// Allow opening arbitrary contents from a specific range. | |
262 | pub unsafe fn open_contents_at_range(&self, range: Range<u64>) -> FileContentsImpl<T> { | |
263 | FileContentsImpl::new(self.input.clone(), range) | |
264 | } | |
6bfadb8a WB |
265 | |
266 | /// Following a hardlink breaks a couple of conventions we otherwise have, particularly we will | |
267 | /// never know the actual length of the target entry until we're done decoding it, so this | |
268 | /// needs to happen at the accessor level, rather than a "sub-entry-reader". | |
06070d26 WB |
269 | pub async fn follow_hardlink(&self, entry: &FileEntryImpl<T>) -> io::Result<FileEntryImpl<T>> { |
270 | let link_offset = match entry.entry.kind() { | |
271 | EntryKind::Hardlink(link) => link.offset, | |
272 | _ => io_bail!("cannot resolve a non-hardlink"), | |
273 | }; | |
274 | ||
275 | let entry_file_offset = entry | |
276 | .entry_range_info | |
277 | .filename_header_offset | |
278 | .ok_or_else(|| io_format_err!("cannot follow hardlink without a file entry header"))?; | |
279 | ||
280 | if link_offset > entry_file_offset { | |
281 | io_bail!("invalid offset in hardlink"); | |
282 | } | |
283 | ||
284 | let link_offset = entry_file_offset - link_offset; | |
285 | ||
286 | let (mut decoder, entry_offset) = get_decoder_at_filename( | |
6bfadb8a | 287 | self.input.clone(), |
06070d26 WB |
288 | link_offset..self.size, |
289 | PathBuf::new(), | |
b0487d4f WB |
290 | ) |
291 | .await?; | |
06070d26 | 292 | |
6bfadb8a WB |
293 | let entry = decoder |
294 | .next() | |
295 | .await | |
296 | .ok_or_else(|| io_format_err!("unexpected EOF while following a hardlink"))??; | |
297 | match entry.kind() { | |
298 | EntryKind::File { offset: None, .. } => { | |
299 | io_bail!("failed to follow hardlink, reader provided no offsets"); | |
300 | } | |
b0487d4f WB |
301 | EntryKind::File { |
302 | offset: Some(offset), | |
303 | size, | |
304 | } => { | |
06070d26 WB |
305 | let meta_size = offset - link_offset; |
306 | let entry_end = link_offset + meta_size + size; | |
6bfadb8a WB |
307 | Ok(FileEntryImpl { |
308 | input: self.input.clone(), | |
309 | entry, | |
06070d26 WB |
310 | entry_range_info: EntryRangeInfo { |
311 | filename_header_offset: Some(link_offset), | |
312 | entry_range: entry_offset..entry_end, | |
313 | }, | |
6bfadb8a WB |
314 | caches: Arc::clone(&self.caches), |
315 | }) | |
316 | } | |
317 | _ => io_bail!("hardlink does not point to a regular file"), | |
318 | } | |
319 | } | |
6cd4f635 WB |
320 | } |
321 | ||
322 | /// The directory random-access state machine implementation. | |
5cf335be | 323 | pub(crate) struct DirectoryImpl<T> { |
29c17fc0 | 324 | input: T, |
6cd4f635 WB |
325 | entry_ofs: u64, |
326 | goodbye_ofs: u64, | |
327 | size: u64, | |
9d8af6f2 | 328 | table: Arc<[GoodbyeItem]>, |
6cd4f635 | 329 | path: PathBuf, |
9d8af6f2 | 330 | caches: Arc<Caches>, |
6cd4f635 WB |
331 | } |
332 | ||
29c17fc0 | 333 | impl<T: Clone + ReadAt> DirectoryImpl<T> { |
6cd4f635 | 334 | /// Open a directory ending at the specified position. |
9d8af6f2 | 335 | async fn open_at_end( |
29c17fc0 | 336 | input: T, |
6cd4f635 WB |
337 | end_offset: u64, |
338 | path: PathBuf, | |
9d8af6f2 | 339 | caches: Arc<Caches>, |
29c17fc0 WB |
340 | ) -> io::Result<DirectoryImpl<T>> { |
341 | let tail = Self::read_tail_entry(&input, end_offset).await?; | |
6cd4f635 WB |
342 | |
343 | if end_offset < tail.size { | |
344 | io_bail!("goodbye tail size out of range"); | |
345 | } | |
346 | ||
347 | let goodbye_ofs = end_offset - tail.size; | |
348 | ||
349 | if goodbye_ofs < tail.offset { | |
350 | io_bail!("goodbye offset out of range"); | |
351 | } | |
352 | ||
353 | let entry_ofs = goodbye_ofs - tail.offset; | |
354 | let size = end_offset - entry_ofs; | |
355 | ||
9d8af6f2 WB |
356 | let table: Option<Arc<[GoodbyeItem]>> = caches |
357 | .gbt_cache | |
358 | .as_ref() | |
359 | .and_then(|cache| cache.fetch(goodbye_ofs)); | |
360 | ||
6cd4f635 WB |
361 | let mut this = Self { |
362 | input, | |
363 | entry_ofs, | |
364 | goodbye_ofs, | |
365 | size, | |
9d8af6f2 | 366 | table: table.as_ref().map_or_else(|| Arc::new([]), Arc::clone), |
6cd4f635 | 367 | path, |
9d8af6f2 | 368 | caches, |
6cd4f635 WB |
369 | }; |
370 | ||
371 | // sanity check: | |
372 | if this.table_size() % (size_of::<GoodbyeItem>() as u64) != 0 { | |
373 | io_bail!("invalid goodbye table size: {}", this.table_size()); | |
374 | } | |
375 | ||
9d8af6f2 WB |
376 | if table.is_none() { |
377 | this.table = this.load_table().await?; | |
378 | if let Some(ref cache) = this.caches.gbt_cache { | |
379 | cache.insert(goodbye_ofs, Arc::clone(&this.table)); | |
380 | } | |
381 | } | |
6cd4f635 WB |
382 | |
383 | Ok(this) | |
384 | } | |
385 | ||
386 | /// Load the entire goodbye table: | |
9d8af6f2 | 387 | async fn load_table(&self) -> io::Result<Arc<[GoodbyeItem]>> { |
6cd4f635 WB |
388 | let len = self.len(); |
389 | let mut data = Vec::with_capacity(self.len()); | |
390 | unsafe { | |
391 | data.set_len(len); | |
392 | let slice = std::slice::from_raw_parts_mut( | |
393 | data.as_mut_ptr() as *mut u8, | |
2c23bd09 | 394 | len * size_of::<GoodbyeItem>(), |
6cd4f635 | 395 | ); |
c68a29b5 | 396 | read_exact_at(&self.input, slice, self.table_offset()).await?; |
6cd4f635 WB |
397 | drop(slice); |
398 | } | |
9d8af6f2 | 399 | Ok(Arc::from(data)) |
6cd4f635 WB |
400 | } |
401 | ||
402 | #[inline] | |
403 | fn end_offset(&self) -> u64 { | |
404 | self.entry_ofs + self.size | |
405 | } | |
406 | ||
dc4a2854 WB |
407 | #[inline] |
408 | fn entry_range(&self) -> Range<u64> { | |
409 | self.entry_ofs..self.end_offset() | |
410 | } | |
411 | ||
6cd4f635 WB |
412 | #[inline] |
413 | fn table_size(&self) -> u64 { | |
414 | (self.end_offset() - self.goodbye_ofs) - (size_of::<format::Header>() as u64) | |
415 | } | |
416 | ||
417 | #[inline] | |
418 | fn table_offset(&self) -> u64 { | |
419 | self.goodbye_ofs + (size_of::<format::Header>() as u64) | |
420 | } | |
421 | ||
422 | /// Length *excluding* the tail marker! | |
423 | #[inline] | |
424 | fn len(&self) -> usize { | |
425 | (self.table_size() / (size_of::<GoodbyeItem>() as u64)) as usize - 1 | |
426 | } | |
427 | ||
428 | /// Read the goodbye tail and perform some sanity checks. | |
c68a29b5 | 429 | async fn read_tail_entry(input: &T, end_offset: u64) -> io::Result<GoodbyeItem> { |
6cd4f635 WB |
430 | if end_offset < (size_of::<GoodbyeItem>() as u64) { |
431 | io_bail!("goodbye tail does not fit"); | |
432 | } | |
433 | ||
434 | let tail_offset = end_offset - (size_of::<GoodbyeItem>() as u64); | |
c68a29b5 | 435 | let tail: GoodbyeItem = read_entry_at(input, tail_offset).await?; |
6cd4f635 WB |
436 | |
437 | if tail.hash != format::PXAR_GOODBYE_TAIL_MARKER { | |
438 | io_bail!("no goodbye tail marker found"); | |
439 | } | |
440 | ||
441 | Ok(tail) | |
442 | } | |
443 | ||
444 | /// Get a decoder for the directory contents. | |
29c17fc0 | 445 | pub(crate) async fn decode_full(&self) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { |
dc4a2854 | 446 | let (dir, decoder) = self.decode_one_entry(self.entry_range(), None).await?; |
6cd4f635 WB |
447 | if !dir.is_dir() { |
448 | io_bail!("directory does not seem to be a directory"); | |
449 | } | |
450 | Ok(decoder) | |
451 | } | |
452 | ||
453 | async fn get_decoder( | |
454 | &self, | |
455 | entry_range: Range<u64>, | |
456 | file_name: Option<&Path>, | |
29c17fc0 | 457 | ) -> io::Result<DecoderImpl<SeqReadAtAdapter<T>>> { |
6b9e2478 WB |
458 | get_decoder( |
459 | self.input.clone(), | |
460 | entry_range, | |
6cd4f635 WB |
461 | match file_name { |
462 | None => self.path.clone(), | |
463 | Some(file) => self.path.join(file), | |
464 | }, | |
d3a83ee3 WB |
465 | ) |
466 | .await | |
6cd4f635 WB |
467 | } |
468 | ||
469 | async fn decode_one_entry( | |
470 | &self, | |
471 | entry_range: Range<u64>, | |
472 | file_name: Option<&Path>, | |
29c17fc0 | 473 | ) -> io::Result<(Entry, DecoderImpl<SeqReadAtAdapter<T>>)> { |
6cd4f635 WB |
474 | let mut decoder = self.get_decoder(entry_range, file_name).await?; |
475 | let entry = decoder | |
476 | .next() | |
477 | .await | |
478 | .ok_or_else(|| io_format_err!("unexpected EOF while decoding directory entry"))??; | |
479 | Ok((entry, decoder)) | |
480 | } | |
481 | ||
fbddffdc WB |
482 | fn lookup_hash_position(&self, hash: u64, start: usize, skip: usize) -> Option<usize> { |
483 | binary_tree_array::search_by(&self.table, start, skip, |i| hash.cmp(&i.hash)) | |
6cd4f635 WB |
484 | } |
485 | ||
a5922fbc | 486 | pub async fn lookup_self(&self) -> io::Result<FileEntryImpl<T>> { |
c76d3f98 | 487 | let (entry, _decoder) = self.decode_one_entry(self.entry_range(), None).await?; |
dc4a2854 WB |
488 | Ok(FileEntryImpl { |
489 | input: self.input.clone(), | |
490 | entry, | |
06070d26 WB |
491 | entry_range_info: EntryRangeInfo { |
492 | filename_header_offset: None, | |
493 | entry_range: self.entry_range(), | |
494 | }, | |
9d8af6f2 | 495 | caches: Arc::clone(&self.caches), |
dc4a2854 WB |
496 | }) |
497 | } | |
498 | ||
6cd4f635 | 499 | /// Lookup a directory entry. |
29c17fc0 | 500 | pub async fn lookup(&self, path: &Path) -> io::Result<Option<FileEntryImpl<T>>> { |
dc4a2854 WB |
501 | let mut cur: Option<FileEntryImpl<T>> = None; |
502 | ||
503 | let mut first = true; | |
504 | for component in path.components() { | |
505 | use std::path::Component; | |
506 | ||
507 | let first = mem::replace(&mut first, false); | |
508 | ||
509 | let component = match component { | |
510 | Component::Normal(path) => path, | |
511 | Component::ParentDir => io_bail!("cannot enter parent directory in archive"), | |
512 | Component::RootDir | Component::CurDir if first => { | |
513 | cur = Some(self.lookup_self().await?); | |
514 | continue; | |
515 | } | |
516 | Component::CurDir => continue, | |
517 | _ => io_bail!("invalid component in path"), | |
518 | }; | |
519 | ||
520 | let next = match cur { | |
521 | Some(entry) => { | |
522 | entry | |
523 | .enter_directory() | |
524 | .await? | |
525 | .lookup_component(component) | |
526 | .await? | |
527 | } | |
528 | None => self.lookup_component(component).await?, | |
529 | }; | |
530 | ||
531 | if next.is_none() { | |
532 | return Ok(None); | |
533 | } | |
534 | ||
535 | cur = next; | |
536 | } | |
537 | ||
538 | Ok(cur) | |
539 | } | |
540 | ||
541 | /// Lookup a single directory entry component (does not handle multiple components in path) | |
542 | pub async fn lookup_component(&self, path: &OsStr) -> io::Result<Option<FileEntryImpl<T>>> { | |
543 | let hash = format::hash_filename(path.as_bytes()); | |
fbddffdc | 544 | let first_index = match self.lookup_hash_position(hash, 0, 0) { |
6cd4f635 WB |
545 | Some(index) => index, |
546 | None => return Ok(None), | |
547 | }; | |
548 | ||
fbddffdc WB |
549 | // Lookup FILENAME, if the hash matches but the filename doesn't, check for a duplicate |
550 | // hash once found, use the GoodbyeItem's offset+size as well as the file's Entry to return | |
551 | // a DirEntry::Dir or Dir::Entry. | |
552 | // | |
553 | let mut dup = 0; | |
554 | loop { | |
555 | let index = match self.lookup_hash_position(hash, first_index, dup) { | |
556 | Some(index) => index, | |
557 | None => return Ok(None), | |
558 | }; | |
6cd4f635 | 559 | |
6cd4f635 WB |
560 | let cursor = self.get_cursor(index).await?; |
561 | if cursor.file_name == path { | |
aabb78a4 | 562 | return Ok(Some(cursor.decode_entry().await?)); |
6cd4f635 | 563 | } |
6cd4f635 | 564 | |
fbddffdc WB |
565 | dup += 1; |
566 | } | |
6cd4f635 WB |
567 | } |
568 | ||
29c17fc0 | 569 | async fn get_cursor<'a>(&'a self, index: usize) -> io::Result<DirEntryImpl<'a, T>> { |
6cd4f635 WB |
570 | let entry = &self.table[index]; |
571 | let file_goodbye_ofs = entry.offset; | |
572 | if self.goodbye_ofs < file_goodbye_ofs { | |
573 | io_bail!("invalid file offset"); | |
574 | } | |
575 | ||
576 | let file_ofs = self.goodbye_ofs - file_goodbye_ofs; | |
577 | let (file_name, entry_ofs) = self.read_filename_entry(file_ofs).await?; | |
578 | ||
70acf637 WB |
579 | let entry_range = Range { |
580 | start: entry_ofs, | |
581 | end: file_ofs + entry.size, | |
582 | }; | |
583 | if entry_range.end < entry_range.start { | |
584 | io_bail!( | |
585 | "bad file: invalid entry ranges for {:?}: \ | |
586 | start=0x{:x}, file_ofs=0x{:x}, size=0x{:x}", | |
587 | file_name, | |
588 | entry_ofs, | |
589 | file_ofs, | |
590 | entry.size, | |
591 | ); | |
592 | } | |
593 | ||
6cd4f635 WB |
594 | Ok(DirEntryImpl { |
595 | dir: self, | |
596 | file_name, | |
06070d26 WB |
597 | entry_range_info: EntryRangeInfo { |
598 | filename_header_offset: Some(file_ofs), | |
599 | entry_range, | |
600 | }, | |
9d8af6f2 | 601 | caches: Arc::clone(&self.caches), |
6cd4f635 WB |
602 | }) |
603 | } | |
604 | ||
605 | async fn read_filename_entry(&self, file_ofs: u64) -> io::Result<(PathBuf, u64)> { | |
c68a29b5 | 606 | let head: format::Header = read_entry_at(&self.input, file_ofs).await?; |
6cd4f635 WB |
607 | if head.htype != format::PXAR_FILENAME { |
608 | io_bail!("expected PXAR_FILENAME header, found: {:x}", head.htype); | |
609 | } | |
610 | ||
c68a29b5 WB |
611 | let mut path = read_exact_data_at( |
612 | &self.input, | |
613 | head.content_size() as usize, | |
614 | file_ofs + (size_of_val(&head) as u64), | |
615 | ) | |
616 | .await?; | |
6cd4f635 WB |
617 | |
618 | if path.pop() != Some(0) { | |
619 | io_bail!("invalid file name (missing terminating zero)"); | |
620 | } | |
621 | ||
622 | if path.is_empty() { | |
623 | io_bail!("invalid empty file name"); | |
624 | } | |
625 | ||
626 | let file_name = PathBuf::from(OsString::from_vec(path)); | |
627 | format::check_file_name(&file_name)?; | |
628 | ||
629 | Ok((file_name, file_ofs + head.full_size())) | |
630 | } | |
631 | ||
29c17fc0 | 632 | pub fn read_dir(&self) -> ReadDirImpl<T> { |
6cd4f635 WB |
633 | ReadDirImpl::new(self, 0) |
634 | } | |
d3a83ee3 WB |
635 | |
636 | pub fn entry_count(&self) -> usize { | |
637 | self.table.len() | |
638 | } | |
6cd4f635 WB |
639 | } |
640 | ||
641 | /// A file entry retrieved from a Directory. | |
93fa37fb | 642 | #[derive(Clone)] |
5cf335be | 643 | pub(crate) struct FileEntryImpl<T: Clone + ReadAt> { |
29c17fc0 | 644 | input: T, |
6cd4f635 | 645 | entry: Entry, |
06070d26 | 646 | entry_range_info: EntryRangeInfo, |
9d8af6f2 | 647 | caches: Arc<Caches>, |
6cd4f635 WB |
648 | } |
649 | ||
29c17fc0 WB |
650 | impl<T: Clone + ReadAt> FileEntryImpl<T> { |
651 | pub async fn enter_directory(&self) -> io::Result<DirectoryImpl<T>> { | |
6cd4f635 WB |
652 | if !self.entry.is_dir() { |
653 | io_bail!("enter_directory() on a non-directory"); | |
654 | } | |
655 | ||
9d8af6f2 WB |
656 | DirectoryImpl::open_at_end( |
657 | self.input.clone(), | |
06070d26 | 658 | self.entry_range_info.entry_range.end, |
9d8af6f2 WB |
659 | self.entry.path.clone(), |
660 | Arc::clone(&self.caches), | |
661 | ) | |
662 | .await | |
6cd4f635 WB |
663 | } |
664 | ||
6b9e2478 WB |
665 | /// For use with unsafe accessor methods. |
666 | pub fn content_range(&self) -> io::Result<Option<Range<u64>>> { | |
98b894a9 | 667 | match self.entry.kind { |
c76d3f98 WB |
668 | EntryKind::File { offset: None, .. } => { |
669 | io_bail!("cannot open file, reader provided no offset") | |
670 | } | |
671 | EntryKind::File { | |
672 | size, | |
673 | offset: Some(offset), | |
6b9e2478 WB |
674 | } => Ok(Some(offset..(offset + size))), |
675 | _ => Ok(None), | |
676 | } | |
677 | } | |
678 | ||
679 | pub async fn contents(&self) -> io::Result<FileContentsImpl<T>> { | |
680 | match self.content_range()? { | |
681 | Some(range) => Ok(FileContentsImpl::new(self.input.clone(), range)), | |
682 | None => io_bail!("not a file"), | |
98b894a9 WB |
683 | } |
684 | } | |
685 | ||
6cd4f635 WB |
686 | #[inline] |
687 | pub fn into_entry(self) -> Entry { | |
688 | self.entry | |
689 | } | |
690 | ||
691 | #[inline] | |
692 | pub fn entry(&self) -> &Entry { | |
693 | &self.entry | |
694 | } | |
ceb83806 WB |
695 | |
696 | /// Exposed for raw by-offset access methods (use with `open_dir_at_end`). | |
697 | #[inline] | |
06070d26 WB |
698 | pub fn entry_range_info(&self) -> &EntryRangeInfo { |
699 | &self.entry_range_info | |
ceb83806 | 700 | } |
6cd4f635 WB |
701 | } |
702 | ||
703 | /// An iterator over the contents of a directory. | |
5cf335be | 704 | pub(crate) struct ReadDirImpl<'a, T> { |
29c17fc0 | 705 | dir: &'a DirectoryImpl<T>, |
6cd4f635 WB |
706 | at: usize, |
707 | } | |
708 | ||
29c17fc0 | 709 | impl<'a, T: Clone + ReadAt> ReadDirImpl<'a, T> { |
5cf335be | 710 | fn new(dir: &'a DirectoryImpl<T>, at: usize) -> Self { |
6cd4f635 WB |
711 | Self { dir, at } |
712 | } | |
713 | ||
98b894a9 | 714 | /// Get the next entry. |
29c17fc0 | 715 | pub async fn next(&mut self) -> io::Result<Option<DirEntryImpl<'a, T>>> { |
6cd4f635 WB |
716 | if self.at == self.dir.table.len() { |
717 | Ok(None) | |
718 | } else { | |
719 | let cursor = self.dir.get_cursor(self.at).await?; | |
720 | self.at += 1; | |
721 | Ok(Some(cursor)) | |
722 | } | |
723 | } | |
98b894a9 WB |
724 | |
725 | /// Efficient alternative to `Iterator::skip`. | |
726 | #[inline] | |
727 | pub fn skip(self, n: usize) -> Self { | |
728 | Self { | |
729 | at: (self.at + n).min(self.dir.table.len()), | |
730 | dir: self.dir, | |
731 | } | |
732 | } | |
733 | ||
734 | /// Efficient alternative to `Iterator::count`. | |
735 | #[inline] | |
736 | pub fn count(self) -> usize { | |
737 | self.dir.table.len() | |
738 | } | |
6cd4f635 WB |
739 | } |
740 | ||
741 | /// A cursor pointing to a file in a directory. | |
742 | /// | |
743 | /// At this point only the file name has been read and we remembered the position for finding the | |
744 | /// actual data. This can be upgraded into a FileEntryImpl. | |
5cf335be | 745 | pub(crate) struct DirEntryImpl<'a, T: Clone + ReadAt> { |
29c17fc0 | 746 | dir: &'a DirectoryImpl<T>, |
6cd4f635 | 747 | file_name: PathBuf, |
06070d26 | 748 | entry_range_info: EntryRangeInfo, |
9d8af6f2 | 749 | caches: Arc<Caches>, |
6cd4f635 WB |
750 | } |
751 | ||
29c17fc0 | 752 | impl<'a, T: Clone + ReadAt> DirEntryImpl<'a, T> { |
6cd4f635 WB |
753 | pub fn file_name(&self) -> &Path { |
754 | &self.file_name | |
755 | } | |
756 | ||
aabb78a4 | 757 | async fn decode_entry(&self) -> io::Result<FileEntryImpl<T>> { |
c76d3f98 | 758 | let (entry, _decoder) = self |
6cd4f635 | 759 | .dir |
06070d26 | 760 | .decode_one_entry(self.entry_range_info.entry_range.clone(), Some(&self.file_name)) |
6cd4f635 | 761 | .await?; |
6cd4f635 WB |
762 | |
763 | Ok(FileEntryImpl { | |
29c17fc0 | 764 | input: self.dir.input.clone(), |
6cd4f635 | 765 | entry, |
06070d26 | 766 | entry_range_info: self.entry_range_info.clone(), |
9d8af6f2 | 767 | caches: Arc::clone(&self.caches), |
6cd4f635 WB |
768 | }) |
769 | } | |
ceb83806 WB |
770 | |
771 | /// Exposed for raw by-offset access methods. | |
772 | #[inline] | |
06070d26 WB |
773 | pub fn entry_range_info(&self) -> &EntryRangeInfo { |
774 | &self.entry_range_info | |
ceb83806 | 775 | } |
6cd4f635 WB |
776 | } |
777 | ||
98b894a9 | 778 | /// A reader for file contents. |
5cf335be | 779 | pub(crate) struct FileContentsImpl<T> { |
98b894a9 WB |
780 | input: T, |
781 | ||
782 | /// Absolute offset inside the `input`. | |
783 | range: Range<u64>, | |
784 | } | |
785 | ||
786 | impl<T: Clone + ReadAt> FileContentsImpl<T> { | |
787 | pub fn new(input: T, range: Range<u64>) -> Self { | |
788 | Self { input, range } | |
789 | } | |
790 | ||
791 | #[inline] | |
792 | pub fn file_size(&self) -> u64 { | |
793 | self.range.end - self.range.start | |
794 | } | |
795 | ||
796 | async fn read_at(&self, mut buf: &mut [u8], offset: u64) -> io::Result<usize> { | |
797 | let size = self.file_size(); | |
798 | if offset >= size { | |
799 | return Ok(0); | |
800 | } | |
801 | let remaining = size - offset; | |
802 | ||
803 | if remaining < buf.len() as u64 { | |
804 | buf = &mut buf[..(remaining as usize)]; | |
805 | } | |
806 | ||
c68a29b5 | 807 | read_at(&self.input, buf, self.range.start + offset).await |
98b894a9 WB |
808 | } |
809 | } | |
810 | ||
d3a83ee3 WB |
811 | impl<T: Clone + ReadAt> ReadAt for FileContentsImpl<T> { |
812 | fn poll_read_at( | |
813 | self: Pin<&Self>, | |
814 | cx: &mut Context, | |
815 | mut buf: &mut [u8], | |
816 | offset: u64, | |
817 | ) -> Poll<io::Result<usize>> { | |
818 | let size = self.file_size(); | |
819 | if offset >= size { | |
820 | return Poll::Ready(Ok(0)); | |
821 | } | |
822 | let remaining = size - offset; | |
823 | ||
824 | if remaining < buf.len() as u64 { | |
825 | buf = &mut buf[..(remaining as usize)]; | |
826 | } | |
827 | ||
828 | let offset = self.range.start + offset; | |
829 | unsafe { self.map_unchecked(|this| &this.input) }.poll_read_at(cx, buf, offset) | |
830 | } | |
831 | } | |
832 | ||
6cd4f635 | 833 | #[doc(hidden)] |
29c17fc0 WB |
834 | pub struct SeqReadAtAdapter<T> { |
835 | input: T, | |
6cd4f635 WB |
836 | range: Range<u64>, |
837 | } | |
838 | ||
29c17fc0 WB |
839 | impl<T: ReadAt> SeqReadAtAdapter<T> { |
840 | pub fn new(input: T, range: Range<u64>) -> Self { | |
70acf637 WB |
841 | if range.end < range.start { |
842 | panic!("BAD SEQ READ AT ADAPTER"); | |
843 | } | |
6cd4f635 WB |
844 | Self { input, range } |
845 | } | |
846 | ||
847 | #[inline] | |
848 | fn remaining(&self) -> usize { | |
849 | (self.range.end - self.range.start) as usize | |
850 | } | |
851 | } | |
852 | ||
29c17fc0 | 853 | impl<T: ReadAt> decoder::SeqRead for SeqReadAtAdapter<T> { |
6cd4f635 WB |
854 | fn poll_seq_read( |
855 | self: Pin<&mut Self>, | |
856 | cx: &mut Context, | |
857 | buf: &mut [u8], | |
858 | ) -> Poll<io::Result<usize>> { | |
859 | let len = buf.len().min(self.remaining()); | |
860 | let buf = &mut buf[..len]; | |
861 | ||
29c17fc0 | 862 | let this = unsafe { self.get_unchecked_mut() }; |
6cd4f635 WB |
863 | |
864 | let got = ready!(unsafe { | |
29c17fc0 | 865 | Pin::new_unchecked(&this.input).poll_read_at(cx, buf, this.range.start) |
6cd4f635 WB |
866 | })?; |
867 | this.range.start += got as u64; | |
868 | Poll::Ready(Ok(got)) | |
869 | } | |
870 | ||
871 | fn poll_position(self: Pin<&mut Self>, _cx: &mut Context) -> Poll<Option<io::Result<u64>>> { | |
872 | Poll::Ready(Some(Ok(self.range.start))) | |
873 | } | |
874 | } |