]> git.proxmox.com Git - proxmox-backup.git/blob - src/pxar/decoder.rs
Cargo.toml: pathpatterns, pxar, proxmox-fuse
[proxmox-backup.git] / src / pxar / decoder.rs
1 //! *pxar* format decoder for seekable files
2 //!
3 //! This module contain the code to decode *pxar* archive files.
4
5 use std::convert::TryFrom;
6 use std::ffi::{OsString, OsStr};
7 use std::io::{Read, Seek, SeekFrom};
8 use std::path::{Path, PathBuf};
9 use std::os::unix::ffi::OsStrExt;
10
11 use anyhow::{bail, format_err, Error};
12 use libc;
13
14 use super::binary_search_tree::search_binary_tree_by;
15 use super::format_definition::*;
16 use super::sequential_decoder::SequentialDecoder;
17 use super::match_pattern::MatchPattern;
18
19 use proxmox::tools::io::ReadExt;
20
21 pub struct DirectoryEntry {
22 /// Points to the `PxarEntry` of the directory
23 start: u64,
24 /// Points past the goodbye table tail
25 end: u64,
26 /// Filename of entry
27 pub filename: OsString,
28 /// Entry (mode, permissions)
29 pub entry: PxarEntry,
30 /// Extended attributes
31 pub xattr: PxarAttributes,
32 /// Payload size
33 pub size: u64,
34 /// Target path for symbolic links
35 pub target: Option<PathBuf>,
36 /// Start offset of the payload if present.
37 pub payload_offset: Option<u64>,
38 }
39
40 /// Trait to create ReadSeek Decoder trait objects.
41 trait ReadSeek: Read + Seek {}
42 impl <R: Read + Seek> ReadSeek for R {}
43
44 // This one needs Read+Seek
45 pub struct Decoder {
46 inner: SequentialDecoder<Box<dyn ReadSeek + Send>>,
47 root_start: u64,
48 root_end: u64,
49 }
50
51 const HEADER_SIZE: u64 = std::mem::size_of::<PxarHeader>() as u64;
52 const GOODBYE_ITEM_SIZE: u64 = std::mem::size_of::<PxarGoodbyeItem>() as u64;
53
54 impl Decoder {
55 pub fn new<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self, Error> {
56 let root_end = reader.seek(SeekFrom::End(0))?;
57 let boxed_reader: Box<dyn ReadSeek + 'static + Send> = Box::new(reader);
58 let inner = SequentialDecoder::new(boxed_reader, super::flags::DEFAULT);
59
60 Ok(Self { inner, root_start: 0, root_end })
61 }
62
63 pub fn set_callback<F: Fn(&Path) -> Result<(), Error> + Send + 'static>(&mut self, callback: F ) {
64 self.inner.set_callback(callback);
65 }
66
67 pub fn root(&mut self) -> Result<DirectoryEntry, Error> {
68 self.seek(SeekFrom::Start(0))?;
69 let header: PxarHeader = self.inner.read_item()?;
70 check_ca_header::<PxarEntry>(&header, PXAR_ENTRY)?;
71 let entry: PxarEntry = self.inner.read_item()?;
72 let (header, xattr) = self.inner.read_attributes()?;
73 let (size, payload_offset) = match header.htype {
74 PXAR_PAYLOAD => (header.size - HEADER_SIZE, Some(self.seek(SeekFrom::Current(0))?)),
75 _ => (0, None),
76 };
77
78 Ok(DirectoryEntry {
79 start: self.root_start,
80 end: self.root_end,
81 filename: OsString::new(), // Empty
82 entry,
83 xattr,
84 size,
85 target: None,
86 payload_offset,
87 })
88 }
89
90 fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
91 let pos = self.inner.get_reader_mut().seek(pos)?;
92 Ok(pos)
93 }
94
95 pub(crate) fn root_end_offset(&self) -> u64 {
96 self.root_end
97 }
98
99 /// Restore the subarchive starting at `dir` to the provided target `path`.
100 ///
101 /// Only restore the content matched by the MatchPattern `pattern`.
102 /// An empty Vec `pattern` means restore all.
103 pub fn restore(&mut self, dir: &DirectoryEntry, path: &Path, pattern: &Vec<MatchPattern>) -> Result<(), Error> {
104 let start = dir.start;
105 self.seek(SeekFrom::Start(start))?;
106 self.inner.restore(path, pattern)?;
107
108 Ok(())
109 }
110
111 pub(crate) fn read_directory_entry(
112 &mut self,
113 start: u64,
114 end: u64,
115 ) -> Result<DirectoryEntry, Error> {
116 self.seek(SeekFrom::Start(start))?;
117
118 let head: PxarHeader = self.inner.read_item()?;
119
120 if head.htype != PXAR_FILENAME {
121 bail!("wrong filename header type for object [{}..{}]", start, end);
122 }
123
124 let entry_start = start + head.size;
125
126 let filename = self.inner.read_filename(head.size)?;
127
128 let head: PxarHeader = self.inner.read_item()?;
129 if head.htype == PXAR_FORMAT_HARDLINK {
130 let (_, offset) = self.inner.read_hardlink(head.size)?;
131 // TODO: Howto find correct end offset for hardlink target?
132 // This is a bit tricky since we cannot find correct end in an efficient
133 // way, on the other hand it doesn't really matter (for now) since target
134 // is never a directory and end is not used in such cases.
135 return self.read_directory_entry(start - offset, end);
136 }
137 check_ca_header::<PxarEntry>(&head, PXAR_ENTRY)?;
138 let entry: PxarEntry = self.inner.read_item()?;
139 let (header, xattr) = self.inner.read_attributes()?;
140 let (size, payload_offset, target) = match header.htype {
141 PXAR_PAYLOAD =>
142 (header.size - HEADER_SIZE, Some(self.seek(SeekFrom::Current(0))?), None),
143 PXAR_SYMLINK =>
144 (header.size - HEADER_SIZE, None, Some(self.inner.read_link(header.size)?)),
145 _ => (0, None, None),
146 };
147
148 Ok(DirectoryEntry {
149 start: entry_start,
150 end,
151 filename,
152 entry,
153 xattr,
154 size,
155 target,
156 payload_offset,
157 })
158 }
159
160 /// Return the goodbye table based on the provided end offset.
161 ///
162 /// Get the goodbye table entries and the start and end offsets of the
163 /// items they reference.
164 /// If the start offset is provided, we use that to check the consistency of
165 /// the data, else the start offset calculated based on the goodbye tail is
166 /// used.
167 pub(crate) fn goodbye_table(
168 &mut self,
169 start: Option<u64>,
170 end: u64,
171 ) -> Result<Vec<(PxarGoodbyeItem, u64, u64)>, Error> {
172 self.seek(SeekFrom::Start(end - GOODBYE_ITEM_SIZE))?;
173
174 let tail: PxarGoodbyeItem = self.inner.read_item()?;
175 if tail.hash != PXAR_GOODBYE_TAIL_MARKER {
176 bail!("missing goodbye tail marker for object at offset {}", end);
177 }
178
179 // If the start offset was provided, we use and check based on that.
180 // If not, we rely on the offset calculated from the goodbye table entry.
181 let start = start.unwrap_or(end - tail.offset - tail.size);
182 let goodbye_table_size = tail.size;
183 if goodbye_table_size < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
184 bail!("short goodbye table size for object [{}..{}]", start, end);
185 }
186
187 let goodbye_inner_size = goodbye_table_size - HEADER_SIZE - GOODBYE_ITEM_SIZE;
188 if (goodbye_inner_size % GOODBYE_ITEM_SIZE) != 0 {
189 bail!(
190 "wrong goodbye inner table size for entry [{}..{}]",
191 start,
192 end
193 );
194 }
195
196 let goodbye_start = end - goodbye_table_size;
197 if tail.offset != (goodbye_start - start) {
198 bail!(
199 "wrong offset in goodbye tail marker for entry [{}..{}]",
200 start,
201 end
202 );
203 }
204
205 self.seek(SeekFrom::Start(goodbye_start))?;
206 let head: PxarHeader = self.inner.read_item()?;
207 if head.htype != PXAR_GOODBYE {
208 bail!(
209 "wrong goodbye table header type for entry [{}..{}]",
210 start,
211 end
212 );
213 }
214
215 if head.size != goodbye_table_size {
216 bail!("wrong goodbye table size for entry [{}..{}]", start, end);
217 }
218
219 let mut gb_entries = Vec::new();
220 for i in 0..goodbye_inner_size / GOODBYE_ITEM_SIZE {
221 let item: PxarGoodbyeItem = self.inner.read_item()?;
222 if item.offset > (goodbye_start - start) {
223 bail!(
224 "goodbye entry {} offset out of range [{}..{}] {} {} {}",
225 i,
226 start,
227 end,
228 item.offset,
229 goodbye_start,
230 start
231 );
232 }
233 let item_start = goodbye_start - item.offset;
234 let item_end = item_start + item.size;
235 if item_end > goodbye_start {
236 bail!("goodbye entry {} end out of range [{}..{}]", i, start, end);
237 }
238 gb_entries.push((item, item_start, item_end));
239 }
240
241 Ok(gb_entries)
242 }
243
244 pub fn list_dir(&mut self, dir: &DirectoryEntry) -> Result<Vec<DirectoryEntry>, Error> {
245 let start = dir.start;
246 let end = dir.end;
247
248 //println!("list_dir1: {} {}", start, end);
249
250 if (end - start) < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
251 bail!("detected short object [{}..{}]", start, end);
252 }
253
254 let mut result = vec![];
255 let goodbye_table = self.goodbye_table(Some(start), end)?;
256 for (_, item_start, item_end) in goodbye_table {
257 let entry = self.read_directory_entry(item_start, item_end)?;
258 //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
259 result.push(entry);
260 }
261
262 Ok(result)
263 }
264
265 pub fn print_filenames<W: std::io::Write>(
266 &mut self,
267 output: &mut W,
268 prefix: &mut PathBuf,
269 dir: &DirectoryEntry,
270 ) -> Result<(), Error> {
271 let mut list = self.list_dir(dir)?;
272
273 list.sort_unstable_by(|a, b| a.filename.cmp(&b.filename));
274
275 for item in &list {
276 prefix.push(item.filename.clone());
277
278 let mode = item.entry.mode as u32;
279
280 let ifmt = mode & libc::S_IFMT;
281
282 writeln!(output, "{:?}", prefix)?;
283
284 match ifmt {
285 libc::S_IFDIR => self.print_filenames(output, prefix, item)?,
286 libc::S_IFREG | libc::S_IFLNK | libc::S_IFBLK | libc::S_IFCHR => {}
287 _ => bail!("unknown item mode/type for {:?}", prefix),
288 }
289
290 prefix.pop();
291 }
292
293 Ok(())
294 }
295
296 /// Lookup the item identified by `filename` in the provided `DirectoryEntry`.
297 ///
298 /// Calculates the hash of the filename and searches for matching entries in
299 /// the goodbye table of the provided `DirectoryEntry`.
300 /// If found, also the filename is compared to avoid hash collision.
301 /// If the filename does not match, the search resumes with the next entry in
302 /// the goodbye table.
303 /// If there is no entry with matching `filename`, `Ok(None)` is returned.
304 pub fn lookup(
305 &mut self,
306 dir: &DirectoryEntry,
307 filename: &OsStr,
308 ) -> Result<Option<DirectoryEntry>, Error> {
309 let gbt = self.goodbye_table(Some(dir.start), dir.end)?;
310 let hash = compute_goodbye_hash(filename.as_bytes());
311
312 let mut start_idx = 0;
313 let mut skip_multiple = 0;
314 loop {
315 // Search for the next goodbye entry with matching hash.
316 let idx = search_binary_tree_by(
317 start_idx,
318 gbt.len(),
319 skip_multiple,
320 |idx| hash.cmp(&gbt[idx].0.hash),
321 );
322 let (_item, start, end) = match idx {
323 Some(idx) => &gbt[idx],
324 None => return Ok(None),
325 };
326
327 let entry = self.read_directory_entry(*start, *end)?;
328
329 // Possible hash collision, need to check if the found entry is indeed
330 // the filename to lookup.
331 if entry.filename == filename {
332 return Ok(Some(entry));
333 }
334 // Hash collision, check the next entry in the goodbye table by starting
335 // from given index but skipping one more match (so hash at index itself).
336 start_idx = idx.unwrap();
337 skip_multiple = 1;
338 }
339 }
340
341 /// Read the payload of the file given by `entry`.
342 ///
343 /// This will read a files payload as raw bytes starting from `offset` after
344 /// the payload marker, reading `size` bytes.
345 /// If the payload from `offset` to EOF is smaller than `size` bytes, the
346 /// buffer with reduced size is returned.
347 /// If `offset` is larger than the payload size of the `DirectoryEntry`, an
348 /// empty buffer is returned.
349 pub fn read(&mut self, entry: &DirectoryEntry, size: usize, offset: u64) -> Result<Vec<u8>, Error> {
350 let start_offset = entry.payload_offset
351 .ok_or_else(|| format_err!("entry has no payload offset"))?;
352 if offset >= entry.size {
353 return Ok(Vec::new());
354 }
355 let len = if u64::try_from(size)? > entry.size {
356 usize::try_from(entry.size)?
357 } else {
358 size
359 };
360 self.seek(SeekFrom::Start(start_offset + offset))?;
361 let data = self.inner.get_reader_mut().read_exact_allocated(len)?;
362
363 Ok(data)
364 }
365 }