]> git.proxmox.com Git - proxmox-backup.git/blob - src/pxar/decoder.rs
pxar::decoder::Decoder: include xattrs and payload size in `DirectoryEntry`.
[proxmox-backup.git] / src / pxar / decoder.rs
1 //! *pxar* format decoder for seekable files
2 //!
3 //! This module contain the code to decode *pxar* archive files.
4
5 use std::convert::TryFrom;
6 use std::ffi::{OsString, OsStr};
7 use std::io::{Read, Seek, SeekFrom};
8 use std::path::{Path, PathBuf};
9 use std::os::unix::ffi::OsStrExt;
10
11 use failure::*;
12 use libc;
13
14 use super::binary_search_tree::search_binary_tree_by;
15 use super::format_definition::*;
16 use super::sequential_decoder::SequentialDecoder;
17 use super::match_pattern::MatchPattern;
18
19 use proxmox::tools::io::ReadExt;
20
21 pub struct DirectoryEntry {
22 /// Points to the `PxarEntry` of the directory
23 start: u64,
24 /// Points past the goodbye table tail
25 end: u64,
26 /// Filename of entry
27 pub filename: OsString,
28 /// Entry (mode, permissions)
29 pub entry: PxarEntry,
30 /// Extended attributes
31 pub xattr: PxarAttributes,
32 /// Payload size
33 pub size: u64,
34 }
35
36 /// Trait to create ReadSeek Decoder trait objects.
37 trait ReadSeek: Read + Seek {}
38 impl <R: Read + Seek> ReadSeek for R {}
39
40 // This one needs Read+Seek
41 pub struct Decoder {
42 inner: SequentialDecoder<Box<dyn ReadSeek + Send>>,
43 root_start: u64,
44 root_end: u64,
45 }
46
47 const HEADER_SIZE: u64 = std::mem::size_of::<PxarHeader>() as u64;
48 const GOODBYE_ITEM_SIZE: u64 = std::mem::size_of::<PxarGoodbyeItem>() as u64;
49
50 impl Decoder {
51 pub fn new<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self, Error> {
52 let root_end = reader.seek(SeekFrom::End(0))?;
53 let boxed_reader: Box<dyn ReadSeek + 'static + Send> = Box::new(reader);
54 let inner = SequentialDecoder::new(boxed_reader, super::flags::DEFAULT);
55
56 Ok(Self { inner, root_start: 0, root_end })
57 }
58
59 pub fn set_callback<F: Fn(&Path) -> Result<(), Error> + Send + 'static>(&mut self, callback: F ) {
60 self.inner.set_callback(callback);
61 }
62
63 pub fn root(&mut self) -> Result<DirectoryEntry, Error> {
64 self.seek(SeekFrom::Start(0))?;
65 let header: PxarHeader = self.inner.read_item()?;
66 check_ca_header::<PxarEntry>(&header, PXAR_ENTRY)?;
67 let entry: PxarEntry = self.inner.read_item()?;
68 let (header, xattr) = self.inner.read_attributes()?;
69 let size = match header.htype {
70 PXAR_PAYLOAD => header.size - HEADER_SIZE,
71 _ => 0,
72 };
73
74 Ok(DirectoryEntry {
75 start: self.root_start,
76 end: self.root_end,
77 filename: OsString::new(), // Empty
78 entry,
79 xattr,
80 size,
81 })
82 }
83
84 fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
85 let pos = self.inner.get_reader_mut().seek(pos)?;
86 Ok(pos)
87 }
88
89 pub(crate) fn root_end_offset(&self) -> u64 {
90 self.root_end
91 }
92
93 /// Restore the subarchive starting at `dir` to the provided target `path`.
94 ///
95 /// Only restore the content matched by the MatchPattern `pattern`.
96 /// An empty Vec `pattern` means restore all.
97 pub fn restore(&mut self, dir: &DirectoryEntry, path: &Path, pattern: &Vec<MatchPattern>) -> Result<(), Error> {
98 let start = dir.start;
99 self.seek(SeekFrom::Start(start))?;
100 self.inner.restore(path, pattern)?;
101
102 Ok(())
103 }
104
105 pub(crate) fn read_directory_entry(
106 &mut self,
107 start: u64,
108 end: u64,
109 ) -> Result<DirectoryEntry, Error> {
110 self.seek(SeekFrom::Start(start))?;
111
112 let head: PxarHeader = self.inner.read_item()?;
113
114 if head.htype != PXAR_FILENAME {
115 bail!("wrong filename header type for object [{}..{}]", start, end);
116 }
117
118 let entry_start = start + head.size;
119
120 let filename = self.inner.read_filename(head.size)?;
121
122 let head: PxarHeader = self.inner.read_item()?;
123 if head.htype == PXAR_FORMAT_HARDLINK {
124 let (_, offset) = self.inner.read_hardlink(head.size)?;
125 // TODO: Howto find correct end offset for hardlink target?
126 // This is a bit tricky since we cannot find correct end in an efficient
127 // way, on the other hand it doesn't really matter (for now) since target
128 // is never a directory and end is not used in such cases.
129 return self.read_directory_entry(start - offset, end);
130 }
131 check_ca_header::<PxarEntry>(&head, PXAR_ENTRY)?;
132 let entry: PxarEntry = self.inner.read_item()?;
133 let (header, xattr) = self.inner.read_attributes()?;
134 let size = match header.htype {
135 PXAR_PAYLOAD => header.size - HEADER_SIZE,
136 _ => 0,
137 };
138
139 Ok(DirectoryEntry {
140 start: entry_start,
141 end,
142 filename,
143 entry,
144 xattr,
145 size,
146 })
147 }
148
149 /// Return the goodbye table based on the provided end offset.
150 ///
151 /// Get the goodbye table entries and the start and end offsets of the
152 /// items they reference.
153 /// If the start offset is provided, we use that to check the consistency of
154 /// the data, else the start offset calculated based on the goodbye tail is
155 /// used.
156 pub(crate) fn goodbye_table(
157 &mut self,
158 start: Option<u64>,
159 end: u64,
160 ) -> Result<Vec<(PxarGoodbyeItem, u64, u64)>, Error> {
161 self.seek(SeekFrom::Start(end - GOODBYE_ITEM_SIZE))?;
162
163 let tail: PxarGoodbyeItem = self.inner.read_item()?;
164 if tail.hash != PXAR_GOODBYE_TAIL_MARKER {
165 bail!("missing goodbye tail marker for object at offset {}", end);
166 }
167
168 // If the start offset was provided, we use and check based on that.
169 // If not, we rely on the offset calculated from the goodbye table entry.
170 let start = start.unwrap_or(end - tail.offset - tail.size);
171 let goodbye_table_size = tail.size;
172 if goodbye_table_size < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
173 bail!("short goodbye table size for object [{}..{}]", start, end);
174 }
175
176 let goodbye_inner_size = goodbye_table_size - HEADER_SIZE - GOODBYE_ITEM_SIZE;
177 if (goodbye_inner_size % GOODBYE_ITEM_SIZE) != 0 {
178 bail!(
179 "wrong goodbye inner table size for entry [{}..{}]",
180 start,
181 end
182 );
183 }
184
185 let goodbye_start = end - goodbye_table_size;
186 if tail.offset != (goodbye_start - start) {
187 bail!(
188 "wrong offset in goodbye tail marker for entry [{}..{}]",
189 start,
190 end
191 );
192 }
193
194 self.seek(SeekFrom::Start(goodbye_start))?;
195 let head: PxarHeader = self.inner.read_item()?;
196 if head.htype != PXAR_GOODBYE {
197 bail!(
198 "wrong goodbye table header type for entry [{}..{}]",
199 start,
200 end
201 );
202 }
203
204 if head.size != goodbye_table_size {
205 bail!("wrong goodbye table size for entry [{}..{}]", start, end);
206 }
207
208 let mut gb_entries = Vec::new();
209 for i in 0..goodbye_inner_size / GOODBYE_ITEM_SIZE {
210 let item: PxarGoodbyeItem = self.inner.read_item()?;
211 if item.offset > (goodbye_start - start) {
212 bail!(
213 "goodbye entry {} offset out of range [{}..{}] {} {} {}",
214 i,
215 start,
216 end,
217 item.offset,
218 goodbye_start,
219 start
220 );
221 }
222 let item_start = goodbye_start - item.offset;
223 let item_end = item_start + item.size;
224 if item_end > goodbye_start {
225 bail!("goodbye entry {} end out of range [{}..{}]", i, start, end);
226 }
227 gb_entries.push((item, item_start, item_end));
228 }
229
230 Ok(gb_entries)
231 }
232
233 pub fn list_dir(&mut self, dir: &DirectoryEntry) -> Result<Vec<DirectoryEntry>, Error> {
234 let start = dir.start;
235 let end = dir.end;
236
237 //println!("list_dir1: {} {}", start, end);
238
239 if (end - start) < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
240 bail!("detected short object [{}..{}]", start, end);
241 }
242
243 let mut result = vec![];
244 let goodbye_table = self.goodbye_table(Some(start), end)?;
245 for (_, item_start, item_end) in goodbye_table {
246 let entry = self.read_directory_entry(item_start, item_end)?;
247 //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
248 result.push(entry);
249 }
250
251 Ok(result)
252 }
253
254 pub fn print_filenames<W: std::io::Write>(
255 &mut self,
256 output: &mut W,
257 prefix: &mut PathBuf,
258 dir: &DirectoryEntry,
259 ) -> Result<(), Error> {
260 let mut list = self.list_dir(dir)?;
261
262 list.sort_unstable_by(|a, b| a.filename.cmp(&b.filename));
263
264 for item in &list {
265 prefix.push(item.filename.clone());
266
267 let mode = item.entry.mode as u32;
268
269 let ifmt = mode & libc::S_IFMT;
270
271 writeln!(output, "{:?}", prefix)?;
272
273 match ifmt {
274 libc::S_IFDIR => self.print_filenames(output, prefix, item)?,
275 libc::S_IFREG | libc::S_IFLNK | libc::S_IFBLK | libc::S_IFCHR => {}
276 _ => bail!("unknown item mode/type for {:?}", prefix),
277 }
278
279 prefix.pop();
280 }
281
282 Ok(())
283 }
284
285 /// Lookup the item identified by `filename` in the provided `DirectoryEntry`.
286 ///
287 /// Calculates the hash of the filename and searches for matching entries in
288 /// the goodbye table of the provided `DirectoryEntry`.
289 /// If found, also the filename is compared to avoid hash collision.
290 /// If the filename does not match, the search resumes with the next entry in
291 /// the goodbye table.
292 /// If there is no entry with matching `filename`, `Ok(None)` is returned.
293 pub fn lookup(
294 &mut self,
295 dir: &DirectoryEntry,
296 filename: &OsStr,
297 ) -> Result<Option<DirectoryEntry>, Error> {
298 let gbt = self.goodbye_table(Some(dir.start), dir.end)?;
299 let hash = compute_goodbye_hash(filename.as_bytes());
300
301 let mut start_idx = 0;
302 let mut skip_multiple = 0;
303 loop {
304 // Search for the next goodbye entry with matching hash.
305 let idx = search_binary_tree_by(
306 start_idx,
307 gbt.len(),
308 skip_multiple,
309 |idx| hash.cmp(&gbt[idx].0.hash),
310 );
311 let (_item, start, end) = match idx {
312 Some(idx) => &gbt[idx],
313 None => return Ok(None),
314 };
315
316 // At this point it is not clear if the item is a directory or not,
317 // this has to be decided based on the entry mode.
318 // `Decoder`s attributes function accepts both, offsets pointing to
319 // the start of an item (PXAR_FILENAME) or the GOODBYE_TAIL_MARKER in
320 // case of directories, so the use of start offset is fine for both
321 // cases.
322 let (entry_name, entry, xattr, size) = self.attributes(*start)?;
323
324 // Possible hash collision, need to check if the found entry is indeed
325 // the filename to lookup.
326 if entry_name == filename {
327 let dir_entry = DirectoryEntry {
328 start: *start + HEADER_SIZE + entry_name.len() as u64 + 1,
329 end: *end,
330 filename: entry_name,
331 entry,
332 xattr,
333 size,
334 };
335 return Ok(Some(dir_entry));
336 }
337 // Hash collision, check the next entry in the goodbye table by starting
338 // from given index but skipping one more match (so hash at index itself).
339 start_idx = idx.unwrap();
340 skip_multiple = 1;
341 }
342 }
343
344 /// Get attributes for the archive item located at `offset`.
345 ///
346 /// Returns the entry, attributes and the payload size for the item.
347 /// For regular archive itmes a `PXAR_FILENAME` or a `PXAR_ENTRY` header is
348 /// expected at `offset`.
349 /// For directories, `offset` might also (but not necessarily) point at the
350 /// directories `PXAR_GOODBYE_TAIL_MARKER`. This is not mandatory and it can
351 /// also directly point to its `PXAR_FILENAME` or `PXAR_ENTRY`, thereby
352 /// avoiding an additional seek.
353 pub fn attributes(&mut self, offset: u64) -> Result<(OsString, PxarEntry, PxarAttributes, u64), Error> {
354 self.seek(SeekFrom::Start(offset))?;
355
356 let mut marker: u64 = self.inner.read_item()?;
357 if marker == PXAR_GOODBYE_TAIL_MARKER {
358 let dir_offset: u64 = self.inner.read_item()?;
359 let gb_size: u64 = self.inner.read_item()?;
360 let distance = i64::try_from(dir_offset + gb_size)?;
361 self.seek(SeekFrom::Current(0 - distance))?;
362 marker = self.inner.read_item()?;
363 }
364
365 let filename = if marker == PXAR_FILENAME {
366 let size: u64 = self.inner.read_item()?;
367 let filename = self.inner.read_filename(size)?;
368 marker = self.inner.read_item()?;
369 filename
370 } else {
371 OsString::new()
372 };
373
374 if marker == PXAR_FORMAT_HARDLINK {
375 let size: u64 = self.inner.read_item()?;
376 let (_, diff) = self.inner.read_hardlink(size)?;
377 // Make sure to return the original filename,
378 // not the one read from the hardlink.
379 let (_, entry, xattr, file_size) = self.attributes(offset - diff)?;
380 return Ok((filename, entry, xattr, file_size));
381 }
382
383 if marker != PXAR_ENTRY {
384 bail!("Expected PXAR_ENTRY, found 0x{:x?}", marker);
385 }
386 let _size: u64 = self.inner.read_item()?;
387 let entry: PxarEntry = self.inner.read_item()?;
388 let (header, xattr) = self.inner.read_attributes()?;
389 let file_size = match header.htype {
390 PXAR_PAYLOAD => header.size - HEADER_SIZE,
391 _ => 0,
392 };
393
394 Ok((filename, entry, xattr, file_size))
395 }
396
397 /// Opens the file by validating the given `offset` and returning its attrs,
398 /// xattrs and size.
399 pub fn open(&mut self, offset: u64) -> Result<(OsString, PxarEntry, PxarAttributes, u64), Error> {
400 self.attributes(offset)
401 }
402
403 /// Read the payload of the file given by `offset`.
404 ///
405 /// This will read the file by first seeking to `offset` within the archive,
406 /// check if there is indeed a valid item with payload and then read `size`
407 /// bytes of content starting from `data_offset`.
408 /// If EOF is reached before reading `size` bytes, the reduced buffer is
409 /// returned.
410 pub fn read(&mut self, offset: u64, size: usize, data_offset: u64) -> Result<Vec<u8>, Error> {
411 self.seek(SeekFrom::Start(offset))?;
412 let head: PxarHeader = self.inner.read_item()?;
413 if head.htype != PXAR_FILENAME {
414 bail!("Expected PXAR_FILENAME, encountered 0x{:x?}", head.htype);
415 }
416 let _filename = self.inner.read_filename(head.size)?;
417
418 let head: PxarHeader = self.inner.read_item()?;
419 if head.htype == PXAR_FORMAT_HARDLINK {
420 let (_, diff) = self.inner.read_hardlink(head.size)?;
421 return self.read(offset - diff, size, data_offset);
422 }
423 check_ca_header::<PxarEntry>(&head, PXAR_ENTRY)?;
424 let _: PxarEntry = self.inner.read_item()?;
425
426 let (header, _) = self.inner.read_attributes()?;
427 if header.htype != PXAR_PAYLOAD {
428 bail!("Expected PXAR_PAYLOAD, found 0x{:x?}", header.htype);
429 }
430
431 let payload_size = header.size - HEADER_SIZE;
432 if data_offset >= payload_size {
433 return Ok(Vec::new());
434 }
435
436 let len = if data_offset + u64::try_from(size)? > payload_size {
437 usize::try_from(payload_size - data_offset)?
438 } else {
439 size
440 };
441 self.inner.skip_bytes(usize::try_from(data_offset)?)?;
442 let data = self.inner.get_reader_mut().read_exact_allocated(len)?;
443
444 Ok(data)
445 }
446
447 /// Read the target of a hardlink in the archive.
448 pub fn read_link(&mut self, offset: u64) -> Result<(PathBuf, PxarEntry), Error> {
449 self.seek(SeekFrom::Start(offset))?;
450 let mut header: PxarHeader = self.inner.read_item()?;
451 if header.htype != PXAR_FILENAME {
452 bail!("Expected PXAR_FILENAME, encountered 0x{:x?}", header.htype);
453 }
454 let _filename = self.inner.read_filename(header.size)?;
455
456 header = self.inner.read_item()?;
457 check_ca_header::<PxarEntry>(&header, PXAR_ENTRY)?;
458 let entry: PxarEntry = self.inner.read_item()?;
459
460 header = self.inner.read_item()?;
461 if header.htype != PXAR_SYMLINK {
462 bail!("Expected PXAR_SYMLINK, encountered 0x{:x?}", header.htype);
463 }
464 let target = self.inner.read_link(header.size)?;
465
466 Ok((target, entry))
467 }
468 }