]> git.proxmox.com Git - proxmox-backup.git/blame - src/pxar/decoder.rs
Cargo.toml: pathpatterns, pxar, proxmox-fuse
[proxmox-backup.git] / src / pxar / decoder.rs
CommitLineData
46cfe7ab
DM
1//! *pxar* format decoder for seekable files
2//!
3//! This module contain the code to decode *pxar* archive files.
4
bbd055bf 5use std::convert::TryFrom;
e6662118 6use std::ffi::{OsString, OsStr};
bbd055bf
CE
7use std::io::{Read, Seek, SeekFrom};
8use std::path::{Path, PathBuf};
e6662118 9use std::os::unix::ffi::OsStrExt;
bbd055bf 10
f7d4e4b5 11use anyhow::{bail, format_err, Error};
bbd055bf 12use libc;
46cfe7ab 13
02491b8f 14use super::binary_search_tree::search_binary_tree_by;
46cfe7ab 15use super::format_definition::*;
33ad183a
CE
16use super::sequential_decoder::SequentialDecoder;
17use super::match_pattern::MatchPattern;
46cfe7ab 18
bbd055bf 19use proxmox::tools::io::ReadExt;
46cfe7ab 20
f50b4fd6 21pub struct DirectoryEntry {
fb2554de 22 /// Points to the `PxarEntry` of the directory
46cfe7ab 23 start: u64,
fb2554de 24 /// Points past the goodbye table tail
46cfe7ab 25 end: u64,
90fc97af 26 /// Filename of entry
46cfe7ab 27 pub filename: OsString,
90fc97af 28 /// Entry (mode, permissions)
5e50c606 29 pub entry: PxarEntry,
90fc97af
CE
30 /// Extended attributes
31 pub xattr: PxarAttributes,
32 /// Payload size
33 pub size: u64,
a8aff353
CE
34 /// Target path for symbolic links
35 pub target: Option<PathBuf>,
63698e72
CE
36 /// Start offset of the payload if present.
37 pub payload_offset: Option<u64>,
46cfe7ab
DM
38}
39
99b5b6cb
DM
40/// Trait to create ReadSeek Decoder trait objects.
41trait ReadSeek: Read + Seek {}
42impl <R: Read + Seek> ReadSeek for R {}
43
46cfe7ab 44// This one needs Read+Seek
99b5b6cb 45pub struct Decoder {
b9799012 46 inner: SequentialDecoder<Box<dyn ReadSeek + Send>>,
46cfe7ab
DM
47 root_start: u64,
48 root_end: u64,
49}
50
5e50c606 51const HEADER_SIZE: u64 = std::mem::size_of::<PxarHeader>() as u64;
7d26720e 52const GOODBYE_ITEM_SIZE: u64 = std::mem::size_of::<PxarGoodbyeItem>() as u64;
46cfe7ab 53
99b5b6cb 54impl Decoder {
b9799012 55 pub fn new<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self, Error> {
46cfe7ab 56 let root_end = reader.seek(SeekFrom::End(0))?;
b9799012 57 let boxed_reader: Box<dyn ReadSeek + 'static + Send> = Box::new(reader);
99b5b6cb 58 let inner = SequentialDecoder::new(boxed_reader, super::flags::DEFAULT);
f701d033
DM
59
60 Ok(Self { inner, root_start: 0, root_end })
61 }
46cfe7ab 62
b9799012 63 pub fn set_callback<F: Fn(&Path) -> Result<(), Error> + Send + 'static>(&mut self, callback: F ) {
f701d033 64 self.inner.set_callback(callback);
46cfe7ab
DM
65 }
66
58262f40
CE
67 pub fn root(&mut self) -> Result<DirectoryEntry, Error> {
68 self.seek(SeekFrom::Start(0))?;
69 let header: PxarHeader = self.inner.read_item()?;
70 check_ca_header::<PxarEntry>(&header, PXAR_ENTRY)?;
71 let entry: PxarEntry = self.inner.read_item()?;
90fc97af 72 let (header, xattr) = self.inner.read_attributes()?;
63698e72
CE
73 let (size, payload_offset) = match header.htype {
74 PXAR_PAYLOAD => (header.size - HEADER_SIZE, Some(self.seek(SeekFrom::Current(0))?)),
75 _ => (0, None),
90fc97af
CE
76 };
77
58262f40 78 Ok(DirectoryEntry {
46cfe7ab
DM
79 start: self.root_start,
80 end: self.root_end,
81 filename: OsString::new(), // Empty
653b1ca1 82 entry,
90fc97af
CE
83 xattr,
84 size,
a8aff353 85 target: None,
63698e72 86 payload_offset,
58262f40 87 })
46cfe7ab
DM
88 }
89
90 fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
91 let pos = self.inner.get_reader_mut().seek(pos)?;
92 Ok(pos)
93 }
94
c7fee396
CE
95 pub(crate) fn root_end_offset(&self) -> u64 {
96 self.root_end
97 }
98
33ad183a
CE
99 /// Restore the subarchive starting at `dir` to the provided target `path`.
100 ///
101 /// Only restore the content matched by the MatchPattern `pattern`.
102 /// An empty Vec `pattern` means restore all.
103 pub fn restore(&mut self, dir: &DirectoryEntry, path: &Path, pattern: &Vec<MatchPattern>) -> Result<(), Error> {
46cfe7ab 104 let start = dir.start;
46cfe7ab 105 self.seek(SeekFrom::Start(start))?;
33ad183a 106 self.inner.restore(path, pattern)?;
46cfe7ab
DM
107
108 Ok(())
109 }
110
ac12570e
CE
111 pub(crate) fn read_directory_entry(
112 &mut self,
113 start: u64,
114 end: u64,
115 ) -> Result<DirectoryEntry, Error> {
46cfe7ab
DM
116 self.seek(SeekFrom::Start(start))?;
117
5e50c606 118 let head: PxarHeader = self.inner.read_item()?;
46cfe7ab 119
5e50c606 120 if head.htype != PXAR_FILENAME {
46cfe7ab
DM
121 bail!("wrong filename header type for object [{}..{}]", start, end);
122 }
123
124 let entry_start = start + head.size;
125
126 let filename = self.inner.read_filename(head.size)?;
127
5e50c606 128 let head: PxarHeader = self.inner.read_item()?;
e5471b48
CE
129 if head.htype == PXAR_FORMAT_HARDLINK {
130 let (_, offset) = self.inner.read_hardlink(head.size)?;
132cb0d0
CE
131 // TODO: Howto find correct end offset for hardlink target?
132 // This is a bit tricky since we cannot find correct end in an efficient
133 // way, on the other hand it doesn't really matter (for now) since target
134 // is never a directory and end is not used in such cases.
e5471b48
CE
135 return self.read_directory_entry(start - offset, end);
136 }
5e50c606
CE
137 check_ca_header::<PxarEntry>(&head, PXAR_ENTRY)?;
138 let entry: PxarEntry = self.inner.read_item()?;
90fc97af 139 let (header, xattr) = self.inner.read_attributes()?;
351b913d
CE
140 let (size, payload_offset, target) = match header.htype {
141 PXAR_PAYLOAD =>
142 (header.size - HEADER_SIZE, Some(self.seek(SeekFrom::Current(0))?), None),
143 PXAR_SYMLINK =>
144 (header.size - HEADER_SIZE, None, Some(self.inner.read_link(header.size)?)),
145 _ => (0, None, None),
a8aff353 146 };
46cfe7ab 147
f50b4fd6 148 Ok(DirectoryEntry {
46cfe7ab 149 start: entry_start,
653b1ca1
WB
150 end,
151 filename,
46cfe7ab 152 entry,
90fc97af
CE
153 xattr,
154 size,
a8aff353 155 target,
63698e72 156 payload_offset,
46cfe7ab
DM
157 })
158 }
159
d00097a0
CE
160 /// Return the goodbye table based on the provided end offset.
161 ///
162 /// Get the goodbye table entries and the start and end offsets of the
163 /// items they reference.
164 /// If the start offset is provided, we use that to check the consistency of
165 /// the data, else the start offset calculated based on the goodbye tail is
166 /// used.
167 pub(crate) fn goodbye_table(
168 &mut self,
169 start: Option<u64>,
170 end: u64,
171 ) -> Result<Vec<(PxarGoodbyeItem, u64, u64)>, Error> {
46cfe7ab
DM
172 self.seek(SeekFrom::Start(end - GOODBYE_ITEM_SIZE))?;
173
d00097a0
CE
174 let tail: PxarGoodbyeItem = self.inner.read_item()?;
175 if tail.hash != PXAR_GOODBYE_TAIL_MARKER {
176 bail!("missing goodbye tail marker for object at offset {}", end);
46cfe7ab
DM
177 }
178
d00097a0
CE
179 // If the start offset was provided, we use and check based on that.
180 // If not, we rely on the offset calculated from the goodbye table entry.
181 let start = start.unwrap_or(end - tail.offset - tail.size);
182 let goodbye_table_size = tail.size;
46cfe7ab
DM
183 if goodbye_table_size < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
184 bail!("short goodbye table size for object [{}..{}]", start, end);
46cfe7ab 185 }
d00097a0 186
46cfe7ab
DM
187 let goodbye_inner_size = goodbye_table_size - HEADER_SIZE - GOODBYE_ITEM_SIZE;
188 if (goodbye_inner_size % GOODBYE_ITEM_SIZE) != 0 {
3626ac61
CE
189 bail!(
190 "wrong goodbye inner table size for entry [{}..{}]",
191 start,
192 end
193 );
46cfe7ab
DM
194 }
195
196 let goodbye_start = end - goodbye_table_size;
d00097a0 197 if tail.offset != (goodbye_start - start) {
3626ac61
CE
198 bail!(
199 "wrong offset in goodbye tail marker for entry [{}..{}]",
200 start,
201 end
202 );
46cfe7ab
DM
203 }
204
205 self.seek(SeekFrom::Start(goodbye_start))?;
5e50c606 206 let head: PxarHeader = self.inner.read_item()?;
5e50c606 207 if head.htype != PXAR_GOODBYE {
3626ac61
CE
208 bail!(
209 "wrong goodbye table header type for entry [{}..{}]",
210 start,
211 end
212 );
46cfe7ab
DM
213 }
214
215 if head.size != goodbye_table_size {
216 bail!("wrong goodbye table size for entry [{}..{}]", start, end);
217 }
218
d00097a0 219 let mut gb_entries = Vec::new();
3626ac61 220 for i in 0..goodbye_inner_size / GOODBYE_ITEM_SIZE {
5e50c606 221 let item: PxarGoodbyeItem = self.inner.read_item()?;
46cfe7ab 222 if item.offset > (goodbye_start - start) {
3626ac61
CE
223 bail!(
224 "goodbye entry {} offset out of range [{}..{}] {} {} {}",
225 i,
226 start,
227 end,
228 item.offset,
229 goodbye_start,
230 start
231 );
46cfe7ab
DM
232 }
233 let item_start = goodbye_start - item.offset;
234 let item_end = item_start + item.size;
235 if item_end > goodbye_start {
3626ac61 236 bail!("goodbye entry {} end out of range [{}..{}]", i, start, end);
46cfe7ab 237 }
d00097a0
CE
238 gb_entries.push((item, item_start, item_end));
239 }
240
241 Ok(gb_entries)
242 }
46cfe7ab 243
d00097a0
CE
244 pub fn list_dir(&mut self, dir: &DirectoryEntry) -> Result<Vec<DirectoryEntry>, Error> {
245 let start = dir.start;
246 let end = dir.end;
247
248 //println!("list_dir1: {} {}", start, end);
249
250 if (end - start) < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
251 bail!("detected short object [{}..{}]", start, end);
46cfe7ab
DM
252 }
253
254 let mut result = vec![];
d00097a0
CE
255 let goodbye_table = self.goodbye_table(Some(start), end)?;
256 for (_, item_start, item_end) in goodbye_table {
46cfe7ab
DM
257 let entry = self.read_directory_entry(item_start, item_end)?;
258 //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
259 result.push(entry);
260 }
261
262 Ok(result)
263 }
264
265 pub fn print_filenames<W: std::io::Write>(
266 &mut self,
267 output: &mut W,
268 prefix: &mut PathBuf,
f50b4fd6 269 dir: &DirectoryEntry,
46cfe7ab 270 ) -> Result<(), Error> {
46cfe7ab
DM
271 let mut list = self.list_dir(dir)?;
272
273 list.sort_unstable_by(|a, b| a.filename.cmp(&b.filename));
274
275 for item in &list {
46cfe7ab
DM
276 prefix.push(item.filename.clone());
277
278 let mode = item.entry.mode as u32;
279
280 let ifmt = mode & libc::S_IFMT;
281
9307279f 282 writeln!(output, "{:?}", prefix)?;
46cfe7ab 283
132cb0d0
CE
284 match ifmt {
285 libc::S_IFDIR => self.print_filenames(output, prefix, item)?,
286 libc::S_IFREG | libc::S_IFLNK | libc::S_IFBLK | libc::S_IFCHR => {}
287 _ => bail!("unknown item mode/type for {:?}", prefix),
46cfe7ab
DM
288 }
289
290 prefix.pop();
291 }
292
293 Ok(())
294 }
bbd055bf 295
e6662118
CE
296 /// Lookup the item identified by `filename` in the provided `DirectoryEntry`.
297 ///
298 /// Calculates the hash of the filename and searches for matching entries in
299 /// the goodbye table of the provided `DirectoryEntry`.
300 /// If found, also the filename is compared to avoid hash collision.
301 /// If the filename does not match, the search resumes with the next entry in
302 /// the goodbye table.
303 /// If there is no entry with matching `filename`, `Ok(None)` is returned.
304 pub fn lookup(
305 &mut self,
306 dir: &DirectoryEntry,
307 filename: &OsStr,
90fc97af 308 ) -> Result<Option<DirectoryEntry>, Error> {
e6662118
CE
309 let gbt = self.goodbye_table(Some(dir.start), dir.end)?;
310 let hash = compute_goodbye_hash(filename.as_bytes());
311
02491b8f
CE
312 let mut start_idx = 0;
313 let mut skip_multiple = 0;
e6662118
CE
314 loop {
315 // Search for the next goodbye entry with matching hash.
02491b8f
CE
316 let idx = search_binary_tree_by(
317 start_idx,
318 gbt.len(),
319 skip_multiple,
320 |idx| hash.cmp(&gbt[idx].0.hash),
321 );
322 let (_item, start, end) = match idx {
323 Some(idx) => &gbt[idx],
e6662118
CE
324 None => return Ok(None),
325 };
326
67444407 327 let entry = self.read_directory_entry(*start, *end)?;
e6662118
CE
328
329 // Possible hash collision, need to check if the found entry is indeed
330 // the filename to lookup.
67444407
CE
331 if entry.filename == filename {
332 return Ok(Some(entry));
e6662118 333 }
02491b8f
CE
334 // Hash collision, check the next entry in the goodbye table by starting
335 // from given index but skipping one more match (so hash at index itself).
336 start_idx = idx.unwrap();
337 skip_multiple = 1;
e6662118
CE
338 }
339 }
340
63698e72 341 /// Read the payload of the file given by `entry`.
bbd055bf 342 ///
63698e72
CE
343 /// This will read a files payload as raw bytes starting from `offset` after
344 /// the payload marker, reading `size` bytes.
345 /// If the payload from `offset` to EOF is smaller than `size` bytes, the
346 /// buffer with reduced size is returned.
347 /// If `offset` is larger than the payload size of the `DirectoryEntry`, an
348 /// empty buffer is returned.
349 pub fn read(&mut self, entry: &DirectoryEntry, size: usize, offset: u64) -> Result<Vec<u8>, Error> {
350 let start_offset = entry.payload_offset
351 .ok_or_else(|| format_err!("entry has no payload offset"))?;
352 if offset >= entry.size {
bbd055bf
CE
353 return Ok(Vec::new());
354 }
63698e72
CE
355 let len = if u64::try_from(size)? > entry.size {
356 usize::try_from(entry.size)?
bbd055bf
CE
357 } else {
358 size
359 };
63698e72 360 self.seek(SeekFrom::Start(start_offset + offset))?;
bbd055bf
CE
361 let data = self.inner.get_reader_mut().read_exact_allocated(len)?;
362
363 Ok(data)
364 }
46cfe7ab 365}