1 //! *pxar* format decoder for seekable files
3 //! This module contain the code to decode *pxar* archive files.
5 use std
::convert
::TryFrom
;
6 use std
::ffi
::{OsString, OsStr}
;
7 use std
::io
::{Read, Seek, SeekFrom}
;
8 use std
::path
::{Path, PathBuf}
;
9 use std
::os
::unix
::ffi
::OsStrExt
;
11 use anyhow
::{bail, format_err, Error}
;
14 use super::binary_search_tree
::search_binary_tree_by
;
15 use super::format_definition
::*;
16 use super::sequential_decoder
::SequentialDecoder
;
17 use super::match_pattern
::MatchPattern
;
19 use proxmox
::tools
::io
::ReadExt
;
21 pub struct DirectoryEntry
{
22 /// Points to the `PxarEntry` of the directory
24 /// Points past the goodbye table tail
27 pub filename
: OsString
,
28 /// Entry (mode, permissions)
30 /// Extended attributes
31 pub xattr
: PxarAttributes
,
34 /// Target path for symbolic links
35 pub target
: Option
<PathBuf
>,
36 /// Start offset of the payload if present.
37 pub payload_offset
: Option
<u64>,
40 /// Trait to create ReadSeek Decoder trait objects.
41 trait ReadSeek
: Read
+ Seek {}
42 impl <R
: Read
+ Seek
> ReadSeek
for R {}
44 // This one needs Read+Seek
46 inner
: SequentialDecoder
<Box
<dyn ReadSeek
+ Send
>>,
51 const HEADER_SIZE
: u64 = std
::mem
::size_of
::<PxarHeader
>() as u64;
52 const GOODBYE_ITEM_SIZE
: u64 = std
::mem
::size_of
::<PxarGoodbyeItem
>() as u64;
55 pub fn new
<R
: Read
+ Seek
+ Send
+ '
static>(mut reader
: R
) -> Result
<Self, Error
> {
56 let root_end
= reader
.seek(SeekFrom
::End(0))?
;
57 let boxed_reader
: Box
<dyn ReadSeek
+ '
static + Send
> = Box
::new(reader
);
58 let inner
= SequentialDecoder
::new(boxed_reader
, super::flags
::DEFAULT
);
60 Ok(Self { inner, root_start: 0, root_end }
)
63 pub fn set_callback
<F
: Fn(&Path
) -> Result
<(), Error
> + Send
+ '
static>(&mut self, callback
: F
) {
64 self.inner
.set_callback(callback
);
67 pub fn root(&mut self) -> Result
<DirectoryEntry
, Error
> {
68 self.seek(SeekFrom
::Start(0))?
;
69 let header
: PxarHeader
= self.inner
.read_item()?
;
70 check_ca_header
::<PxarEntry
>(&header
, PXAR_ENTRY
)?
;
71 let entry
: PxarEntry
= self.inner
.read_item()?
;
72 let (header
, xattr
) = self.inner
.read_attributes()?
;
73 let (size
, payload_offset
) = match header
.htype
{
74 PXAR_PAYLOAD
=> (header
.size
- HEADER_SIZE
, Some(self.seek(SeekFrom
::Current(0))?
)),
79 start
: self.root_start
,
81 filename
: OsString
::new(), // Empty
90 fn seek(&mut self, pos
: SeekFrom
) -> Result
<u64, Error
> {
91 let pos
= self.inner
.get_reader_mut().seek(pos
)?
;
95 pub(crate) fn root_end_offset(&self) -> u64 {
99 /// Restore the subarchive starting at `dir` to the provided target `path`.
101 /// Only restore the content matched by the MatchPattern `pattern`.
102 /// An empty Vec `pattern` means restore all.
103 pub fn restore(&mut self, dir
: &DirectoryEntry
, path
: &Path
, pattern
: &Vec
<MatchPattern
>) -> Result
<(), Error
> {
104 let start
= dir
.start
;
105 self.seek(SeekFrom
::Start(start
))?
;
106 self.inner
.restore(path
, pattern
)?
;
111 pub(crate) fn read_directory_entry(
115 ) -> Result
<DirectoryEntry
, Error
> {
116 self.seek(SeekFrom
::Start(start
))?
;
118 let head
: PxarHeader
= self.inner
.read_item()?
;
120 if head
.htype
!= PXAR_FILENAME
{
121 bail
!("wrong filename header type for object [{}..{}]", start
, end
);
124 let entry_start
= start
+ head
.size
;
126 let filename
= self.inner
.read_filename(head
.size
)?
;
128 let head
: PxarHeader
= self.inner
.read_item()?
;
129 if head
.htype
== PXAR_FORMAT_HARDLINK
{
130 let (_
, offset
) = self.inner
.read_hardlink(head
.size
)?
;
131 // TODO: Howto find correct end offset for hardlink target?
132 // This is a bit tricky since we cannot find correct end in an efficient
133 // way, on the other hand it doesn't really matter (for now) since target
134 // is never a directory and end is not used in such cases.
135 return self.read_directory_entry(start
- offset
, end
);
137 check_ca_header
::<PxarEntry
>(&head
, PXAR_ENTRY
)?
;
138 let entry
: PxarEntry
= self.inner
.read_item()?
;
139 let (header
, xattr
) = self.inner
.read_attributes()?
;
140 let (size
, payload_offset
, target
) = match header
.htype
{
142 (header
.size
- HEADER_SIZE
, Some(self.seek(SeekFrom
::Current(0))?
), None
),
144 (header
.size
- HEADER_SIZE
, None
, Some(self.inner
.read_link(header
.size
)?
)),
145 _
=> (0, None
, None
),
160 /// Return the goodbye table based on the provided end offset.
162 /// Get the goodbye table entries and the start and end offsets of the
163 /// items they reference.
164 /// If the start offset is provided, we use that to check the consistency of
165 /// the data, else the start offset calculated based on the goodbye tail is
167 pub(crate) fn goodbye_table(
171 ) -> Result
<Vec
<(PxarGoodbyeItem
, u64, u64)>, Error
> {
172 self.seek(SeekFrom
::Start(end
- GOODBYE_ITEM_SIZE
))?
;
174 let tail
: PxarGoodbyeItem
= self.inner
.read_item()?
;
175 if tail
.hash
!= PXAR_GOODBYE_TAIL_MARKER
{
176 bail
!("missing goodbye tail marker for object at offset {}", end
);
179 // If the start offset was provided, we use and check based on that.
180 // If not, we rely on the offset calculated from the goodbye table entry.
181 let start
= start
.unwrap_or(end
- tail
.offset
- tail
.size
);
182 let goodbye_table_size
= tail
.size
;
183 if goodbye_table_size
< (HEADER_SIZE
+ GOODBYE_ITEM_SIZE
) {
184 bail
!("short goodbye table size for object [{}..{}]", start
, end
);
187 let goodbye_inner_size
= goodbye_table_size
- HEADER_SIZE
- GOODBYE_ITEM_SIZE
;
188 if (goodbye_inner_size
% GOODBYE_ITEM_SIZE
) != 0 {
190 "wrong goodbye inner table size for entry [{}..{}]",
196 let goodbye_start
= end
- goodbye_table_size
;
197 if tail
.offset
!= (goodbye_start
- start
) {
199 "wrong offset in goodbye tail marker for entry [{}..{}]",
205 self.seek(SeekFrom
::Start(goodbye_start
))?
;
206 let head
: PxarHeader
= self.inner
.read_item()?
;
207 if head
.htype
!= PXAR_GOODBYE
{
209 "wrong goodbye table header type for entry [{}..{}]",
215 if head
.size
!= goodbye_table_size
{
216 bail
!("wrong goodbye table size for entry [{}..{}]", start
, end
);
219 let mut gb_entries
= Vec
::new();
220 for i
in 0..goodbye_inner_size
/ GOODBYE_ITEM_SIZE
{
221 let item
: PxarGoodbyeItem
= self.inner
.read_item()?
;
222 if item
.offset
> (goodbye_start
- start
) {
224 "goodbye entry {} offset out of range [{}..{}] {} {} {}",
233 let item_start
= goodbye_start
- item
.offset
;
234 let item_end
= item_start
+ item
.size
;
235 if item_end
> goodbye_start
{
236 bail
!("goodbye entry {} end out of range [{}..{}]", i
, start
, end
);
238 gb_entries
.push((item
, item_start
, item_end
));
244 pub fn list_dir(&mut self, dir
: &DirectoryEntry
) -> Result
<Vec
<DirectoryEntry
>, Error
> {
245 let start
= dir
.start
;
248 //println!("list_dir1: {} {}", start, end);
250 if (end
- start
) < (HEADER_SIZE
+ GOODBYE_ITEM_SIZE
) {
251 bail
!("detected short object [{}..{}]", start
, end
);
254 let mut result
= vec
![];
255 let goodbye_table
= self.goodbye_table(Some(start
), end
)?
;
256 for (_
, item_start
, item_end
) in goodbye_table
{
257 let entry
= self.read_directory_entry(item_start
, item_end
)?
;
258 //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
265 pub fn print_filenames
<W
: std
::io
::Write
>(
268 prefix
: &mut PathBuf
,
269 dir
: &DirectoryEntry
,
270 ) -> Result
<(), Error
> {
271 let mut list
= self.list_dir(dir
)?
;
273 list
.sort_unstable_by(|a
, b
| a
.filename
.cmp(&b
.filename
));
276 prefix
.push(item
.filename
.clone());
278 let mode
= item
.entry
.mode
as u32;
280 let ifmt
= mode
& libc
::S_IFMT
;
282 writeln
!(output
, "{:?}", prefix
)?
;
285 libc
::S_IFDIR
=> self.print_filenames(output
, prefix
, item
)?
,
286 libc
::S_IFREG
| libc
::S_IFLNK
| libc
::S_IFBLK
| libc
::S_IFCHR
=> {}
287 _
=> bail
!("unknown item mode/type for {:?}", prefix
),
296 /// Lookup the item identified by `filename` in the provided `DirectoryEntry`.
298 /// Calculates the hash of the filename and searches for matching entries in
299 /// the goodbye table of the provided `DirectoryEntry`.
300 /// If found, also the filename is compared to avoid hash collision.
301 /// If the filename does not match, the search resumes with the next entry in
302 /// the goodbye table.
303 /// If there is no entry with matching `filename`, `Ok(None)` is returned.
306 dir
: &DirectoryEntry
,
308 ) -> Result
<Option
<DirectoryEntry
>, Error
> {
309 let gbt
= self.goodbye_table(Some(dir
.start
), dir
.end
)?
;
310 let hash
= compute_goodbye_hash(filename
.as_bytes());
312 let mut start_idx
= 0;
313 let mut skip_multiple
= 0;
315 // Search for the next goodbye entry with matching hash.
316 let idx
= search_binary_tree_by(
320 |idx
| hash
.cmp(&gbt
[idx
].0.hash
),
322 let (_item
, start
, end
) = match idx
{
323 Some(idx
) => &gbt
[idx
],
324 None
=> return Ok(None
),
327 let entry
= self.read_directory_entry(*start
, *end
)?
;
329 // Possible hash collision, need to check if the found entry is indeed
330 // the filename to lookup.
331 if entry
.filename
== filename
{
332 return Ok(Some(entry
));
334 // Hash collision, check the next entry in the goodbye table by starting
335 // from given index but skipping one more match (so hash at index itself).
336 start_idx
= idx
.unwrap();
341 /// Read the payload of the file given by `entry`.
343 /// This will read a files payload as raw bytes starting from `offset` after
344 /// the payload marker, reading `size` bytes.
345 /// If the payload from `offset` to EOF is smaller than `size` bytes, the
346 /// buffer with reduced size is returned.
347 /// If `offset` is larger than the payload size of the `DirectoryEntry`, an
348 /// empty buffer is returned.
349 pub fn read(&mut self, entry
: &DirectoryEntry
, size
: usize, offset
: u64) -> Result
<Vec
<u8>, Error
> {
350 let start_offset
= entry
.payload_offset
351 .ok_or_else(|| format_err
!("entry has no payload offset"))?
;
352 if offset
>= entry
.size
{
353 return Ok(Vec
::new());
355 let len
= if u64::try_from(size
)?
> entry
.size
{
356 usize::try_from(entry
.size
)?
360 self.seek(SeekFrom
::Start(start_offset
+ offset
))?
;
361 let data
= self.inner
.get_reader_mut().read_exact_allocated(len
)?
;