]>
git.proxmox.com Git - proxmox-backup.git/blob - src/pxar/decoder.rs
2ce89b588b6ce5359059192f51561593c5a8dcc2
1 //! *pxar* format decoder for seekable files
3 //! This module contain the code to decode *pxar* archive files.
5 use std
::convert
::TryFrom
;
6 use std
::ffi
::{OsString, OsStr}
;
7 use std
::io
::{Read, Seek, SeekFrom}
;
8 use std
::path
::{Path, PathBuf}
;
9 use std
::os
::unix
::ffi
::OsStrExt
;
14 use super::format_definition
::*;
15 use super::sequential_decoder
::SequentialDecoder
;
16 use super::match_pattern
::MatchPattern
;
18 use proxmox
::tools
::io
::ReadExt
;
20 pub struct DirectoryEntry
{
21 /// Points to the `PxarEntry` of the directory
23 /// Points past the goodbye table tail
25 pub filename
: OsString
,
29 // This one needs Read+Seek
30 pub struct Decoder
<R
: Read
+ Seek
, F
: Fn(&Path
) -> Result
<(), Error
>> {
31 inner
: SequentialDecoder
<R
, F
>,
36 const HEADER_SIZE
: u64 = std
::mem
::size_of
::<PxarHeader
>() as u64;
37 const GOODBYE_ITEM_SIZE
: u64 = std
::mem
::size_of
::<PxarGoodbyeItem
>() as u64;
39 impl<R
: Read
+ Seek
, F
: Fn(&Path
) -> Result
<(), Error
>> Decoder
<R
, F
> {
40 pub fn new(mut reader
: R
, callback
: F
) -> Result
<Self, Error
> {
41 let root_end
= reader
.seek(SeekFrom
::End(0))?
;
44 inner
: SequentialDecoder
::new(reader
, super::flags
::DEFAULT
, callback
),
50 pub fn root(&mut self) -> Result
<DirectoryEntry
, Error
> {
51 self.seek(SeekFrom
::Start(0))?
;
52 let header
: PxarHeader
= self.inner
.read_item()?
;
53 check_ca_header
::<PxarEntry
>(&header
, PXAR_ENTRY
)?
;
54 let entry
: PxarEntry
= self.inner
.read_item()?
;
56 start
: self.root_start
,
58 filename
: OsString
::new(), // Empty
63 fn seek(&mut self, pos
: SeekFrom
) -> Result
<u64, Error
> {
64 let pos
= self.inner
.get_reader_mut().seek(pos
)?
;
68 pub(crate) fn root_end_offset(&self) -> u64 {
72 /// Restore the subarchive starting at `dir` to the provided target `path`.
74 /// Only restore the content matched by the MatchPattern `pattern`.
75 /// An empty Vec `pattern` means restore all.
76 pub fn restore(&mut self, dir
: &DirectoryEntry
, path
: &Path
, pattern
: &Vec
<MatchPattern
>) -> Result
<(), Error
> {
77 let start
= dir
.start
;
78 self.seek(SeekFrom
::Start(start
))?
;
79 self.inner
.restore(path
, pattern
)?
;
84 pub(crate) fn read_directory_entry(
88 ) -> Result
<DirectoryEntry
, Error
> {
89 self.seek(SeekFrom
::Start(start
))?
;
91 let head
: PxarHeader
= self.inner
.read_item()?
;
93 if head
.htype
!= PXAR_FILENAME
{
94 bail
!("wrong filename header type for object [{}..{}]", start
, end
);
97 let entry_start
= start
+ head
.size
;
99 let filename
= self.inner
.read_filename(head
.size
)?
;
101 let head
: PxarHeader
= self.inner
.read_item()?
;
102 if head
.htype
== PXAR_FORMAT_HARDLINK
{
103 let (_
, offset
) = self.inner
.read_hardlink(head
.size
)?
;
104 // TODO: Howto find correct end offset for hardlink target?
105 // This is a bit tricky since we cannot find correct end in an efficient
106 // way, on the other hand it doesn't really matter (for now) since target
107 // is never a directory and end is not used in such cases.
108 return self.read_directory_entry(start
- offset
, end
);
110 check_ca_header
::<PxarEntry
>(&head
, PXAR_ENTRY
)?
;
111 let entry
: PxarEntry
= self.inner
.read_item()?
;
121 /// Return the goodbye table based on the provided end offset.
123 /// Get the goodbye table entries and the start and end offsets of the
124 /// items they reference.
125 /// If the start offset is provided, we use that to check the consistency of
126 /// the data, else the start offset calculated based on the goodbye tail is
128 pub(crate) fn goodbye_table(
132 ) -> Result
<Vec
<(PxarGoodbyeItem
, u64, u64)>, Error
> {
133 self.seek(SeekFrom
::Start(end
- GOODBYE_ITEM_SIZE
))?
;
135 let tail
: PxarGoodbyeItem
= self.inner
.read_item()?
;
136 if tail
.hash
!= PXAR_GOODBYE_TAIL_MARKER
{
137 bail
!("missing goodbye tail marker for object at offset {}", end
);
140 // If the start offset was provided, we use and check based on that.
141 // If not, we rely on the offset calculated from the goodbye table entry.
142 let start
= start
.unwrap_or(end
- tail
.offset
- tail
.size
);
143 let goodbye_table_size
= tail
.size
;
144 if goodbye_table_size
< (HEADER_SIZE
+ GOODBYE_ITEM_SIZE
) {
145 bail
!("short goodbye table size for object [{}..{}]", start
, end
);
148 let goodbye_inner_size
= goodbye_table_size
- HEADER_SIZE
- GOODBYE_ITEM_SIZE
;
149 if (goodbye_inner_size
% GOODBYE_ITEM_SIZE
) != 0 {
151 "wrong goodbye inner table size for entry [{}..{}]",
157 let goodbye_start
= end
- goodbye_table_size
;
158 if tail
.offset
!= (goodbye_start
- start
) {
160 "wrong offset in goodbye tail marker for entry [{}..{}]",
166 self.seek(SeekFrom
::Start(goodbye_start
))?
;
167 let head
: PxarHeader
= self.inner
.read_item()?
;
168 if head
.htype
!= PXAR_GOODBYE
{
170 "wrong goodbye table header type for entry [{}..{}]",
176 if head
.size
!= goodbye_table_size
{
177 bail
!("wrong goodbye table size for entry [{}..{}]", start
, end
);
180 let mut gb_entries
= Vec
::new();
181 for i
in 0..goodbye_inner_size
/ GOODBYE_ITEM_SIZE
{
182 let item
: PxarGoodbyeItem
= self.inner
.read_item()?
;
183 if item
.offset
> (goodbye_start
- start
) {
185 "goodbye entry {} offset out of range [{}..{}] {} {} {}",
194 let item_start
= goodbye_start
- item
.offset
;
195 let item_end
= item_start
+ item
.size
;
196 if item_end
> goodbye_start
{
197 bail
!("goodbye entry {} end out of range [{}..{}]", i
, start
, end
);
199 gb_entries
.push((item
, item_start
, item_end
));
205 pub fn list_dir(&mut self, dir
: &DirectoryEntry
) -> Result
<Vec
<DirectoryEntry
>, Error
> {
206 let start
= dir
.start
;
209 //println!("list_dir1: {} {}", start, end);
211 if (end
- start
) < (HEADER_SIZE
+ GOODBYE_ITEM_SIZE
) {
212 bail
!("detected short object [{}..{}]", start
, end
);
215 let mut result
= vec
![];
216 let goodbye_table
= self.goodbye_table(Some(start
), end
)?
;
217 for (_
, item_start
, item_end
) in goodbye_table
{
218 let entry
= self.read_directory_entry(item_start
, item_end
)?
;
219 //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
226 pub fn print_filenames
<W
: std
::io
::Write
>(
229 prefix
: &mut PathBuf
,
230 dir
: &DirectoryEntry
,
231 ) -> Result
<(), Error
> {
232 let mut list
= self.list_dir(dir
)?
;
234 list
.sort_unstable_by(|a
, b
| a
.filename
.cmp(&b
.filename
));
237 prefix
.push(item
.filename
.clone());
239 let mode
= item
.entry
.mode
as u32;
241 let ifmt
= mode
& libc
::S_IFMT
;
243 writeln
!(output
, "{:?}", prefix
)?
;
246 libc
::S_IFDIR
=> self.print_filenames(output
, prefix
, item
)?
,
247 libc
::S_IFREG
| libc
::S_IFLNK
| libc
::S_IFBLK
| libc
::S_IFCHR
=> {}
248 _
=> bail
!("unknown item mode/type for {:?}", prefix
),
257 /// Lookup the item identified by `filename` in the provided `DirectoryEntry`.
259 /// Calculates the hash of the filename and searches for matching entries in
260 /// the goodbye table of the provided `DirectoryEntry`.
261 /// If found, also the filename is compared to avoid hash collision.
262 /// If the filename does not match, the search resumes with the next entry in
263 /// the goodbye table.
264 /// If there is no entry with matching `filename`, `Ok(None)` is returned.
267 dir
: &DirectoryEntry
,
269 ) -> Result
<Option
<(DirectoryEntry
, PxarAttributes
)>, Error
> {
270 let gbt
= self.goodbye_table(Some(dir
.start
), dir
.end
)?
;
271 let hash
= compute_goodbye_hash(filename
.as_bytes());
273 let mut iterator
= gbt
.iter();
275 // Search for the next goodbye entry with matching hash.
276 let (start
, end
) = match iterator
.find(|(i
, _
, _
)| i
.hash
== hash
) {
277 Some((_item
, start
, end
)) => (start
, end
),
278 None
=> return Ok(None
),
281 // At this point it is not clear if the item is a directory or not,
282 // this has to be decided based on the entry mode.
283 // `Decoder`s attributes function accepts both, offsets pointing to
284 // the start of an item (PXAR_FILENAME) or the GOODBYE_TAIL_MARKER in
285 // case of directories, so the use of start offset is fine for both
287 let (entry_name
, entry
, attr
, _payload_size
) = self.attributes(*start
)?
;
289 // Possible hash collision, need to check if the found entry is indeed
290 // the filename to lookup.
291 if entry_name
== filename
{
292 let dir_entry
= DirectoryEntry
{
293 start
: *start
+ HEADER_SIZE
+ entry_name
.len() as u64 + 1,
295 filename
: entry_name
,
298 return Ok(Some((dir_entry
, attr
)));
303 /// Get attributes for the archive item located at `offset`.
305 /// Returns the entry, attributes and the payload size for the item.
306 /// For regular archive itmes a `PXAR_FILENAME` or a `PXAR_ENTRY` header is
307 /// expected at `offset`.
308 /// For directories, `offset` might also (but not necessarily) point at the
309 /// directories `PXAR_GOODBYE_TAIL_MARKER`. This is not mandatory and it can
310 /// also directly point to its `PXAR_FILENAME` or `PXAR_ENTRY`, thereby
311 /// avoiding an additional seek.
312 pub fn attributes(&mut self, offset
: u64) -> Result
<(OsString
, PxarEntry
, PxarAttributes
, u64), Error
> {
313 self.seek(SeekFrom
::Start(offset
))?
;
315 let mut marker
: u64 = self.inner
.read_item()?
;
316 if marker
== PXAR_GOODBYE_TAIL_MARKER
{
317 let dir_offset
: u64 = self.inner
.read_item()?
;
318 let gb_size
: u64 = self.inner
.read_item()?
;
319 let distance
= i64::try_from(dir_offset
+ gb_size
)?
;
320 self.seek(SeekFrom
::Current(0 - distance
))?
;
321 marker
= self.inner
.read_item()?
;
324 let filename
= if marker
== PXAR_FILENAME
{
325 let size
: u64 = self.inner
.read_item()?
;
326 let filename
= self.inner
.read_filename(size
)?
;
327 marker
= self.inner
.read_item()?
;
333 if marker
== PXAR_FORMAT_HARDLINK
{
334 let size
: u64 = self.inner
.read_item()?
;
335 let (_
, diff
) = self.inner
.read_hardlink(size
)?
;
336 return self.attributes(offset
- diff
);
339 if marker
!= PXAR_ENTRY
{
340 bail
!("Expected PXAR_ENTRY, found 0x{:x?}", marker
);
342 let _size
: u64 = self.inner
.read_item()?
;
343 let entry
: PxarEntry
= self.inner
.read_item()?
;
344 let (header
, xattr
) = self.inner
.read_attributes()?
;
345 let file_size
= match header
.htype
{
346 PXAR_PAYLOAD
=> header
.size
- HEADER_SIZE
,
350 Ok((filename
, entry
, xattr
, file_size
))
353 /// Opens the file by validating the given `offset` and returning its attrs,
355 pub fn open(&mut self, offset
: u64) -> Result
<(OsString
, PxarEntry
, PxarAttributes
, u64), Error
> {
356 self.attributes(offset
)
359 /// Read the payload of the file given by `offset`.
361 /// This will read the file by first seeking to `offset` within the archive,
362 /// check if there is indeed a valid item with payload and then read `size`
363 /// bytes of content starting from `data_offset`.
364 /// If EOF is reached before reading `size` bytes, the reduced buffer is
366 pub fn read(&mut self, offset
: u64, size
: usize, data_offset
: u64) -> Result
<Vec
<u8>, Error
> {
367 self.seek(SeekFrom
::Start(offset
))?
;
368 let head
: PxarHeader
= self.inner
.read_item()?
;
369 if head
.htype
!= PXAR_FILENAME
{
370 bail
!("Expected PXAR_FILENAME, encountered 0x{:x?}", head
.htype
);
372 let _filename
= self.inner
.read_filename(head
.size
)?
;
374 let head
: PxarHeader
= self.inner
.read_item()?
;
375 check_ca_header
::<PxarEntry
>(&head
, PXAR_ENTRY
)?
;
376 let _
: PxarEntry
= self.inner
.read_item()?
;
378 let (header
, _
) = self.inner
.read_attributes()?
;
379 if header
.htype
!= PXAR_PAYLOAD
{
380 bail
!("Expected PXAR_PAYLOAD, found 0x{:x?}", header
.htype
);
383 let payload_size
= header
.size
- HEADER_SIZE
;
384 if data_offset
>= payload_size
{
385 return Ok(Vec
::new());
388 let len
= if data_offset
+ u64::try_from(size
)?
> payload_size
{
389 usize::try_from(payload_size
- data_offset
)?
393 self.inner
.skip_bytes(usize::try_from(data_offset
)?
)?
;
394 let data
= self.inner
.get_reader_mut().read_exact_allocated(len
)?
;
399 /// Read the target of a hardlink in the archive.
400 pub fn read_link(&mut self, offset
: u64) -> Result
<(PathBuf
, PxarEntry
), Error
> {
401 self.seek(SeekFrom
::Start(offset
))?
;
402 let mut header
: PxarHeader
= self.inner
.read_item()?
;
403 if header
.htype
!= PXAR_FILENAME
{
404 bail
!("Expected PXAR_FILENAME, encountered 0x{:x?}", header
.htype
);
406 let _filename
= self.inner
.read_filename(header
.size
)?
;
408 header
= self.inner
.read_item()?
;
409 check_ca_header
::<PxarEntry
>(&header
, PXAR_ENTRY
)?
;
410 let entry
: PxarEntry
= self.inner
.read_item()?
;
412 header
= self.inner
.read_item()?
;
413 if header
.htype
!= PXAR_SYMLINK
{
414 bail
!("Expected PXAR_SYMLINK, encountered 0x{:x?}", header
.htype
);
416 let target
= self.inner
.read_link(header
.size
)?
;