1 //! *pxar* format decoder for seekable files
3 //! This module contain the code to decode *pxar* archive files.
5 use std
::convert
::TryFrom
;
6 use std
::ffi
::{OsString, OsStr}
;
7 use std
::io
::{Read, Seek, SeekFrom}
;
8 use std
::path
::{Path, PathBuf}
;
9 use std
::os
::unix
::ffi
::OsStrExt
;
14 use super::binary_search_tree
::search_binary_tree_by
;
15 use super::format_definition
::*;
16 use super::sequential_decoder
::SequentialDecoder
;
17 use super::match_pattern
::MatchPattern
;
19 use proxmox
::tools
::io
::ReadExt
;
21 pub struct DirectoryEntry
{
22 /// Points to the `PxarEntry` of the directory
24 /// Points past the goodbye table tail
27 pub filename
: OsString
,
28 /// Entry (mode, permissions)
30 /// Extended attributes
31 pub xattr
: PxarAttributes
,
36 /// Trait to create ReadSeek Decoder trait objects.
37 trait ReadSeek
: Read
+ Seek {}
38 impl <R
: Read
+ Seek
> ReadSeek
for R {}
40 // This one needs Read+Seek
42 inner
: SequentialDecoder
<Box
<dyn ReadSeek
+ Send
>>,
47 const HEADER_SIZE
: u64 = std
::mem
::size_of
::<PxarHeader
>() as u64;
48 const GOODBYE_ITEM_SIZE
: u64 = std
::mem
::size_of
::<PxarGoodbyeItem
>() as u64;
51 pub fn new
<R
: Read
+ Seek
+ Send
+ '
static>(mut reader
: R
) -> Result
<Self, Error
> {
52 let root_end
= reader
.seek(SeekFrom
::End(0))?
;
53 let boxed_reader
: Box
<dyn ReadSeek
+ '
static + Send
> = Box
::new(reader
);
54 let inner
= SequentialDecoder
::new(boxed_reader
, super::flags
::DEFAULT
);
56 Ok(Self { inner, root_start: 0, root_end }
)
59 pub fn set_callback
<F
: Fn(&Path
) -> Result
<(), Error
> + Send
+ '
static>(&mut self, callback
: F
) {
60 self.inner
.set_callback(callback
);
63 pub fn root(&mut self) -> Result
<DirectoryEntry
, Error
> {
64 self.seek(SeekFrom
::Start(0))?
;
65 let header
: PxarHeader
= self.inner
.read_item()?
;
66 check_ca_header
::<PxarEntry
>(&header
, PXAR_ENTRY
)?
;
67 let entry
: PxarEntry
= self.inner
.read_item()?
;
68 let (header
, xattr
) = self.inner
.read_attributes()?
;
69 let size
= match header
.htype
{
70 PXAR_PAYLOAD
=> header
.size
- HEADER_SIZE
,
75 start
: self.root_start
,
77 filename
: OsString
::new(), // Empty
84 fn seek(&mut self, pos
: SeekFrom
) -> Result
<u64, Error
> {
85 let pos
= self.inner
.get_reader_mut().seek(pos
)?
;
89 pub(crate) fn root_end_offset(&self) -> u64 {
93 /// Restore the subarchive starting at `dir` to the provided target `path`.
95 /// Only restore the content matched by the MatchPattern `pattern`.
96 /// An empty Vec `pattern` means restore all.
97 pub fn restore(&mut self, dir
: &DirectoryEntry
, path
: &Path
, pattern
: &Vec
<MatchPattern
>) -> Result
<(), Error
> {
98 let start
= dir
.start
;
99 self.seek(SeekFrom
::Start(start
))?
;
100 self.inner
.restore(path
, pattern
)?
;
105 pub(crate) fn read_directory_entry(
109 ) -> Result
<DirectoryEntry
, Error
> {
110 self.seek(SeekFrom
::Start(start
))?
;
112 let head
: PxarHeader
= self.inner
.read_item()?
;
114 if head
.htype
!= PXAR_FILENAME
{
115 bail
!("wrong filename header type for object [{}..{}]", start
, end
);
118 let entry_start
= start
+ head
.size
;
120 let filename
= self.inner
.read_filename(head
.size
)?
;
122 let head
: PxarHeader
= self.inner
.read_item()?
;
123 if head
.htype
== PXAR_FORMAT_HARDLINK
{
124 let (_
, offset
) = self.inner
.read_hardlink(head
.size
)?
;
125 // TODO: Howto find correct end offset for hardlink target?
126 // This is a bit tricky since we cannot find correct end in an efficient
127 // way, on the other hand it doesn't really matter (for now) since target
128 // is never a directory and end is not used in such cases.
129 return self.read_directory_entry(start
- offset
, end
);
131 check_ca_header
::<PxarEntry
>(&head
, PXAR_ENTRY
)?
;
132 let entry
: PxarEntry
= self.inner
.read_item()?
;
133 let (header
, xattr
) = self.inner
.read_attributes()?
;
134 let size
= match header
.htype
{
135 PXAR_PAYLOAD
=> header
.size
- HEADER_SIZE
,
149 /// Return the goodbye table based on the provided end offset.
151 /// Get the goodbye table entries and the start and end offsets of the
152 /// items they reference.
153 /// If the start offset is provided, we use that to check the consistency of
154 /// the data, else the start offset calculated based on the goodbye tail is
156 pub(crate) fn goodbye_table(
160 ) -> Result
<Vec
<(PxarGoodbyeItem
, u64, u64)>, Error
> {
161 self.seek(SeekFrom
::Start(end
- GOODBYE_ITEM_SIZE
))?
;
163 let tail
: PxarGoodbyeItem
= self.inner
.read_item()?
;
164 if tail
.hash
!= PXAR_GOODBYE_TAIL_MARKER
{
165 bail
!("missing goodbye tail marker for object at offset {}", end
);
168 // If the start offset was provided, we use and check based on that.
169 // If not, we rely on the offset calculated from the goodbye table entry.
170 let start
= start
.unwrap_or(end
- tail
.offset
- tail
.size
);
171 let goodbye_table_size
= tail
.size
;
172 if goodbye_table_size
< (HEADER_SIZE
+ GOODBYE_ITEM_SIZE
) {
173 bail
!("short goodbye table size for object [{}..{}]", start
, end
);
176 let goodbye_inner_size
= goodbye_table_size
- HEADER_SIZE
- GOODBYE_ITEM_SIZE
;
177 if (goodbye_inner_size
% GOODBYE_ITEM_SIZE
) != 0 {
179 "wrong goodbye inner table size for entry [{}..{}]",
185 let goodbye_start
= end
- goodbye_table_size
;
186 if tail
.offset
!= (goodbye_start
- start
) {
188 "wrong offset in goodbye tail marker for entry [{}..{}]",
194 self.seek(SeekFrom
::Start(goodbye_start
))?
;
195 let head
: PxarHeader
= self.inner
.read_item()?
;
196 if head
.htype
!= PXAR_GOODBYE
{
198 "wrong goodbye table header type for entry [{}..{}]",
204 if head
.size
!= goodbye_table_size
{
205 bail
!("wrong goodbye table size for entry [{}..{}]", start
, end
);
208 let mut gb_entries
= Vec
::new();
209 for i
in 0..goodbye_inner_size
/ GOODBYE_ITEM_SIZE
{
210 let item
: PxarGoodbyeItem
= self.inner
.read_item()?
;
211 if item
.offset
> (goodbye_start
- start
) {
213 "goodbye entry {} offset out of range [{}..{}] {} {} {}",
222 let item_start
= goodbye_start
- item
.offset
;
223 let item_end
= item_start
+ item
.size
;
224 if item_end
> goodbye_start
{
225 bail
!("goodbye entry {} end out of range [{}..{}]", i
, start
, end
);
227 gb_entries
.push((item
, item_start
, item_end
));
233 pub fn list_dir(&mut self, dir
: &DirectoryEntry
) -> Result
<Vec
<DirectoryEntry
>, Error
> {
234 let start
= dir
.start
;
237 //println!("list_dir1: {} {}", start, end);
239 if (end
- start
) < (HEADER_SIZE
+ GOODBYE_ITEM_SIZE
) {
240 bail
!("detected short object [{}..{}]", start
, end
);
243 let mut result
= vec
![];
244 let goodbye_table
= self.goodbye_table(Some(start
), end
)?
;
245 for (_
, item_start
, item_end
) in goodbye_table
{
246 let entry
= self.read_directory_entry(item_start
, item_end
)?
;
247 //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
254 pub fn print_filenames
<W
: std
::io
::Write
>(
257 prefix
: &mut PathBuf
,
258 dir
: &DirectoryEntry
,
259 ) -> Result
<(), Error
> {
260 let mut list
= self.list_dir(dir
)?
;
262 list
.sort_unstable_by(|a
, b
| a
.filename
.cmp(&b
.filename
));
265 prefix
.push(item
.filename
.clone());
267 let mode
= item
.entry
.mode
as u32;
269 let ifmt
= mode
& libc
::S_IFMT
;
271 writeln
!(output
, "{:?}", prefix
)?
;
274 libc
::S_IFDIR
=> self.print_filenames(output
, prefix
, item
)?
,
275 libc
::S_IFREG
| libc
::S_IFLNK
| libc
::S_IFBLK
| libc
::S_IFCHR
=> {}
276 _
=> bail
!("unknown item mode/type for {:?}", prefix
),
285 /// Lookup the item identified by `filename` in the provided `DirectoryEntry`.
287 /// Calculates the hash of the filename and searches for matching entries in
288 /// the goodbye table of the provided `DirectoryEntry`.
289 /// If found, also the filename is compared to avoid hash collision.
290 /// If the filename does not match, the search resumes with the next entry in
291 /// the goodbye table.
292 /// If there is no entry with matching `filename`, `Ok(None)` is returned.
295 dir
: &DirectoryEntry
,
297 ) -> Result
<Option
<DirectoryEntry
>, Error
> {
298 let gbt
= self.goodbye_table(Some(dir
.start
), dir
.end
)?
;
299 let hash
= compute_goodbye_hash(filename
.as_bytes());
301 let mut start_idx
= 0;
302 let mut skip_multiple
= 0;
304 // Search for the next goodbye entry with matching hash.
305 let idx
= search_binary_tree_by(
309 |idx
| hash
.cmp(&gbt
[idx
].0.hash
),
311 let (_item
, start
, end
) = match idx
{
312 Some(idx
) => &gbt
[idx
],
313 None
=> return Ok(None
),
316 // At this point it is not clear if the item is a directory or not,
317 // this has to be decided based on the entry mode.
318 // `Decoder`s attributes function accepts both, offsets pointing to
319 // the start of an item (PXAR_FILENAME) or the GOODBYE_TAIL_MARKER in
320 // case of directories, so the use of start offset is fine for both
322 let (entry_name
, entry
, xattr
, size
) = self.attributes(*start
)?
;
324 // Possible hash collision, need to check if the found entry is indeed
325 // the filename to lookup.
326 if entry_name
== filename
{
327 let dir_entry
= DirectoryEntry
{
328 start
: *start
+ HEADER_SIZE
+ entry_name
.len() as u64 + 1,
330 filename
: entry_name
,
335 return Ok(Some(dir_entry
));
337 // Hash collision, check the next entry in the goodbye table by starting
338 // from given index but skipping one more match (so hash at index itself).
339 start_idx
= idx
.unwrap();
344 /// Get attributes for the archive item located at `offset`.
346 /// Returns the entry, attributes and the payload size for the item.
347 /// For regular archive itmes a `PXAR_FILENAME` or a `PXAR_ENTRY` header is
348 /// expected at `offset`.
349 /// For directories, `offset` might also (but not necessarily) point at the
350 /// directories `PXAR_GOODBYE_TAIL_MARKER`. This is not mandatory and it can
351 /// also directly point to its `PXAR_FILENAME` or `PXAR_ENTRY`, thereby
352 /// avoiding an additional seek.
353 pub fn attributes(&mut self, offset
: u64) -> Result
<(OsString
, PxarEntry
, PxarAttributes
, u64), Error
> {
354 self.seek(SeekFrom
::Start(offset
))?
;
356 let mut marker
: u64 = self.inner
.read_item()?
;
357 if marker
== PXAR_GOODBYE_TAIL_MARKER
{
358 let dir_offset
: u64 = self.inner
.read_item()?
;
359 let gb_size
: u64 = self.inner
.read_item()?
;
360 let distance
= i64::try_from(dir_offset
+ gb_size
)?
;
361 self.seek(SeekFrom
::Current(0 - distance
))?
;
362 marker
= self.inner
.read_item()?
;
365 let filename
= if marker
== PXAR_FILENAME
{
366 let size
: u64 = self.inner
.read_item()?
;
367 let filename
= self.inner
.read_filename(size
)?
;
368 marker
= self.inner
.read_item()?
;
374 if marker
== PXAR_FORMAT_HARDLINK
{
375 let size
: u64 = self.inner
.read_item()?
;
376 let (_
, diff
) = self.inner
.read_hardlink(size
)?
;
377 // Make sure to return the original filename,
378 // not the one read from the hardlink.
379 let (_
, entry
, xattr
, file_size
) = self.attributes(offset
- diff
)?
;
380 return Ok((filename
, entry
, xattr
, file_size
));
383 if marker
!= PXAR_ENTRY
{
384 bail
!("Expected PXAR_ENTRY, found 0x{:x?}", marker
);
386 let _size
: u64 = self.inner
.read_item()?
;
387 let entry
: PxarEntry
= self.inner
.read_item()?
;
388 let (header
, xattr
) = self.inner
.read_attributes()?
;
389 let file_size
= match header
.htype
{
390 PXAR_PAYLOAD
=> header
.size
- HEADER_SIZE
,
394 Ok((filename
, entry
, xattr
, file_size
))
397 /// Opens the file by validating the given `offset` and returning its attrs,
399 pub fn open(&mut self, offset
: u64) -> Result
<(OsString
, PxarEntry
, PxarAttributes
, u64), Error
> {
400 self.attributes(offset
)
403 /// Read the payload of the file given by `offset`.
405 /// This will read the file by first seeking to `offset` within the archive,
406 /// check if there is indeed a valid item with payload and then read `size`
407 /// bytes of content starting from `data_offset`.
408 /// If EOF is reached before reading `size` bytes, the reduced buffer is
410 pub fn read(&mut self, offset
: u64, size
: usize, data_offset
: u64) -> Result
<Vec
<u8>, Error
> {
411 self.seek(SeekFrom
::Start(offset
))?
;
412 let head
: PxarHeader
= self.inner
.read_item()?
;
413 if head
.htype
!= PXAR_FILENAME
{
414 bail
!("Expected PXAR_FILENAME, encountered 0x{:x?}", head
.htype
);
416 let _filename
= self.inner
.read_filename(head
.size
)?
;
418 let head
: PxarHeader
= self.inner
.read_item()?
;
419 if head
.htype
== PXAR_FORMAT_HARDLINK
{
420 let (_
, diff
) = self.inner
.read_hardlink(head
.size
)?
;
421 return self.read(offset
- diff
, size
, data_offset
);
423 check_ca_header
::<PxarEntry
>(&head
, PXAR_ENTRY
)?
;
424 let _
: PxarEntry
= self.inner
.read_item()?
;
426 let (header
, _
) = self.inner
.read_attributes()?
;
427 if header
.htype
!= PXAR_PAYLOAD
{
428 bail
!("Expected PXAR_PAYLOAD, found 0x{:x?}", header
.htype
);
431 let payload_size
= header
.size
- HEADER_SIZE
;
432 if data_offset
>= payload_size
{
433 return Ok(Vec
::new());
436 let len
= if data_offset
+ u64::try_from(size
)?
> payload_size
{
437 usize::try_from(payload_size
- data_offset
)?
441 self.inner
.skip_bytes(usize::try_from(data_offset
)?
)?
;
442 let data
= self.inner
.get_reader_mut().read_exact_allocated(len
)?
;
447 /// Read the target of a hardlink in the archive.
448 pub fn read_link(&mut self, offset
: u64) -> Result
<(PathBuf
, PxarEntry
), Error
> {
449 self.seek(SeekFrom
::Start(offset
))?
;
450 let mut header
: PxarHeader
= self.inner
.read_item()?
;
451 if header
.htype
!= PXAR_FILENAME
{
452 bail
!("Expected PXAR_FILENAME, encountered 0x{:x?}", header
.htype
);
454 let _filename
= self.inner
.read_filename(header
.size
)?
;
456 header
= self.inner
.read_item()?
;
457 check_ca_header
::<PxarEntry
>(&header
, PXAR_ENTRY
)?
;
458 let entry
: PxarEntry
= self.inner
.read_item()?
;
460 header
= self.inner
.read_item()?
;
461 if header
.htype
!= PXAR_SYMLINK
{
462 bail
!("Expected PXAR_SYMLINK, encountered 0x{:x?}", header
.htype
);
464 let target
= self.inner
.read_link(header
.size
)?
;