]> git.proxmox.com Git - proxmox-backup.git/blob - src/pxar/decoder.rs
src/pxar/decoder.rs: return also payload size in lookup
[proxmox-backup.git] / src / pxar / decoder.rs
1 //! *pxar* format decoder for seekable files
2 //!
3 //! This module contain the code to decode *pxar* archive files.
4
5 use std::convert::TryFrom;
6 use std::ffi::{OsString, OsStr};
7 use std::io::{Read, Seek, SeekFrom};
8 use std::path::{Path, PathBuf};
9 use std::os::unix::ffi::OsStrExt;
10
11 use failure::*;
12 use libc;
13
14 use super::format_definition::*;
15 use super::sequential_decoder::SequentialDecoder;
16 use super::match_pattern::MatchPattern;
17
18 use proxmox::tools::io::ReadExt;
19
20 pub struct DirectoryEntry {
21 /// Points to the `PxarEntry` of the directory
22 start: u64,
23 /// Points past the goodbye table tail
24 end: u64,
25 pub filename: OsString,
26 pub entry: PxarEntry,
27 }
28
29 // This one needs Read+Seek
30 pub struct Decoder<R: Read + Seek, F: Fn(&Path) -> Result<(), Error>> {
31 inner: SequentialDecoder<R, F>,
32 root_start: u64,
33 root_end: u64,
34 }
35
36 const HEADER_SIZE: u64 = std::mem::size_of::<PxarHeader>() as u64;
37 const GOODBYE_ITEM_SIZE: u64 = std::mem::size_of::<PxarGoodbyeItem>() as u64;
38
39 impl<R: Read + Seek, F: Fn(&Path) -> Result<(), Error>> Decoder<R, F> {
40 pub fn new(mut reader: R, callback: F) -> Result<Self, Error> {
41 let root_end = reader.seek(SeekFrom::End(0))?;
42
43 Ok(Self {
44 inner: SequentialDecoder::new(reader, super::flags::DEFAULT, callback),
45 root_start: 0,
46 root_end,
47 })
48 }
49
50 pub fn root(&mut self) -> Result<DirectoryEntry, Error> {
51 self.seek(SeekFrom::Start(0))?;
52 let header: PxarHeader = self.inner.read_item()?;
53 check_ca_header::<PxarEntry>(&header, PXAR_ENTRY)?;
54 let entry: PxarEntry = self.inner.read_item()?;
55 Ok(DirectoryEntry {
56 start: self.root_start,
57 end: self.root_end,
58 filename: OsString::new(), // Empty
59 entry,
60 })
61 }
62
63 fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> {
64 let pos = self.inner.get_reader_mut().seek(pos)?;
65 Ok(pos)
66 }
67
68 pub(crate) fn root_end_offset(&self) -> u64 {
69 self.root_end
70 }
71
72 /// Restore the subarchive starting at `dir` to the provided target `path`.
73 ///
74 /// Only restore the content matched by the MatchPattern `pattern`.
75 /// An empty Vec `pattern` means restore all.
76 pub fn restore(&mut self, dir: &DirectoryEntry, path: &Path, pattern: &Vec<MatchPattern>) -> Result<(), Error> {
77 let start = dir.start;
78 self.seek(SeekFrom::Start(start))?;
79 self.inner.restore(path, pattern)?;
80
81 Ok(())
82 }
83
84 pub(crate) fn read_directory_entry(
85 &mut self,
86 start: u64,
87 end: u64,
88 ) -> Result<DirectoryEntry, Error> {
89 self.seek(SeekFrom::Start(start))?;
90
91 let head: PxarHeader = self.inner.read_item()?;
92
93 if head.htype != PXAR_FILENAME {
94 bail!("wrong filename header type for object [{}..{}]", start, end);
95 }
96
97 let entry_start = start + head.size;
98
99 let filename = self.inner.read_filename(head.size)?;
100
101 let head: PxarHeader = self.inner.read_item()?;
102 if head.htype == PXAR_FORMAT_HARDLINK {
103 let (_, offset) = self.inner.read_hardlink(head.size)?;
104 // TODO: Howto find correct end offset for hardlink target?
105 // This is a bit tricky since we cannot find correct end in an efficient
106 // way, on the other hand it doesn't really matter (for now) since target
107 // is never a directory and end is not used in such cases.
108 return self.read_directory_entry(start - offset, end);
109 }
110 check_ca_header::<PxarEntry>(&head, PXAR_ENTRY)?;
111 let entry: PxarEntry = self.inner.read_item()?;
112
113 Ok(DirectoryEntry {
114 start: entry_start,
115 end,
116 filename,
117 entry,
118 })
119 }
120
121 /// Return the goodbye table based on the provided end offset.
122 ///
123 /// Get the goodbye table entries and the start and end offsets of the
124 /// items they reference.
125 /// If the start offset is provided, we use that to check the consistency of
126 /// the data, else the start offset calculated based on the goodbye tail is
127 /// used.
128 pub(crate) fn goodbye_table(
129 &mut self,
130 start: Option<u64>,
131 end: u64,
132 ) -> Result<Vec<(PxarGoodbyeItem, u64, u64)>, Error> {
133 self.seek(SeekFrom::Start(end - GOODBYE_ITEM_SIZE))?;
134
135 let tail: PxarGoodbyeItem = self.inner.read_item()?;
136 if tail.hash != PXAR_GOODBYE_TAIL_MARKER {
137 bail!("missing goodbye tail marker for object at offset {}", end);
138 }
139
140 // If the start offset was provided, we use and check based on that.
141 // If not, we rely on the offset calculated from the goodbye table entry.
142 let start = start.unwrap_or(end - tail.offset - tail.size);
143 let goodbye_table_size = tail.size;
144 if goodbye_table_size < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
145 bail!("short goodbye table size for object [{}..{}]", start, end);
146 }
147
148 let goodbye_inner_size = goodbye_table_size - HEADER_SIZE - GOODBYE_ITEM_SIZE;
149 if (goodbye_inner_size % GOODBYE_ITEM_SIZE) != 0 {
150 bail!(
151 "wrong goodbye inner table size for entry [{}..{}]",
152 start,
153 end
154 );
155 }
156
157 let goodbye_start = end - goodbye_table_size;
158 if tail.offset != (goodbye_start - start) {
159 bail!(
160 "wrong offset in goodbye tail marker for entry [{}..{}]",
161 start,
162 end
163 );
164 }
165
166 self.seek(SeekFrom::Start(goodbye_start))?;
167 let head: PxarHeader = self.inner.read_item()?;
168 if head.htype != PXAR_GOODBYE {
169 bail!(
170 "wrong goodbye table header type for entry [{}..{}]",
171 start,
172 end
173 );
174 }
175
176 if head.size != goodbye_table_size {
177 bail!("wrong goodbye table size for entry [{}..{}]", start, end);
178 }
179
180 let mut gb_entries = Vec::new();
181 for i in 0..goodbye_inner_size / GOODBYE_ITEM_SIZE {
182 let item: PxarGoodbyeItem = self.inner.read_item()?;
183 if item.offset > (goodbye_start - start) {
184 bail!(
185 "goodbye entry {} offset out of range [{}..{}] {} {} {}",
186 i,
187 start,
188 end,
189 item.offset,
190 goodbye_start,
191 start
192 );
193 }
194 let item_start = goodbye_start - item.offset;
195 let item_end = item_start + item.size;
196 if item_end > goodbye_start {
197 bail!("goodbye entry {} end out of range [{}..{}]", i, start, end);
198 }
199 gb_entries.push((item, item_start, item_end));
200 }
201
202 Ok(gb_entries)
203 }
204
205 pub fn list_dir(&mut self, dir: &DirectoryEntry) -> Result<Vec<DirectoryEntry>, Error> {
206 let start = dir.start;
207 let end = dir.end;
208
209 //println!("list_dir1: {} {}", start, end);
210
211 if (end - start) < (HEADER_SIZE + GOODBYE_ITEM_SIZE) {
212 bail!("detected short object [{}..{}]", start, end);
213 }
214
215 let mut result = vec![];
216 let goodbye_table = self.goodbye_table(Some(start), end)?;
217 for (_, item_start, item_end) in goodbye_table {
218 let entry = self.read_directory_entry(item_start, item_end)?;
219 //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename);
220 result.push(entry);
221 }
222
223 Ok(result)
224 }
225
226 pub fn print_filenames<W: std::io::Write>(
227 &mut self,
228 output: &mut W,
229 prefix: &mut PathBuf,
230 dir: &DirectoryEntry,
231 ) -> Result<(), Error> {
232 let mut list = self.list_dir(dir)?;
233
234 list.sort_unstable_by(|a, b| a.filename.cmp(&b.filename));
235
236 for item in &list {
237 prefix.push(item.filename.clone());
238
239 let mode = item.entry.mode as u32;
240
241 let ifmt = mode & libc::S_IFMT;
242
243 writeln!(output, "{:?}", prefix)?;
244
245 match ifmt {
246 libc::S_IFDIR => self.print_filenames(output, prefix, item)?,
247 libc::S_IFREG | libc::S_IFLNK | libc::S_IFBLK | libc::S_IFCHR => {}
248 _ => bail!("unknown item mode/type for {:?}", prefix),
249 }
250
251 prefix.pop();
252 }
253
254 Ok(())
255 }
256
257 /// Lookup the item identified by `filename` in the provided `DirectoryEntry`.
258 ///
259 /// Calculates the hash of the filename and searches for matching entries in
260 /// the goodbye table of the provided `DirectoryEntry`.
261 /// If found, also the filename is compared to avoid hash collision.
262 /// If the filename does not match, the search resumes with the next entry in
263 /// the goodbye table.
264 /// If there is no entry with matching `filename`, `Ok(None)` is returned.
265 pub fn lookup(
266 &mut self,
267 dir: &DirectoryEntry,
268 filename: &OsStr,
269 ) -> Result<Option<(DirectoryEntry, PxarAttributes, u64)>, Error> {
270 let gbt = self.goodbye_table(Some(dir.start), dir.end)?;
271 let hash = compute_goodbye_hash(filename.as_bytes());
272
273 let mut iterator = gbt.iter();
274 loop {
275 // Search for the next goodbye entry with matching hash.
276 let (start, end) = match iterator.find(|(i, _, _)| i.hash == hash) {
277 Some((_item, start, end)) => (start, end),
278 None => return Ok(None),
279 };
280
281 // At this point it is not clear if the item is a directory or not,
282 // this has to be decided based on the entry mode.
283 // `Decoder`s attributes function accepts both, offsets pointing to
284 // the start of an item (PXAR_FILENAME) or the GOODBYE_TAIL_MARKER in
285 // case of directories, so the use of start offset is fine for both
286 // cases.
287 let (entry_name, entry, attr, payload_size) = self.attributes(*start)?;
288
289 // Possible hash collision, need to check if the found entry is indeed
290 // the filename to lookup.
291 if entry_name == filename {
292 let dir_entry = DirectoryEntry {
293 start: *start + HEADER_SIZE + entry_name.len() as u64 + 1,
294 end: *end,
295 filename: entry_name,
296 entry,
297 };
298 return Ok(Some((dir_entry, attr, payload_size)));
299 }
300 }
301 }
302
303 /// Get attributes for the archive item located at `offset`.
304 ///
305 /// Returns the entry, attributes and the payload size for the item.
306 /// For regular archive itmes a `PXAR_FILENAME` or a `PXAR_ENTRY` header is
307 /// expected at `offset`.
308 /// For directories, `offset` might also (but not necessarily) point at the
309 /// directories `PXAR_GOODBYE_TAIL_MARKER`. This is not mandatory and it can
310 /// also directly point to its `PXAR_FILENAME` or `PXAR_ENTRY`, thereby
311 /// avoiding an additional seek.
312 pub fn attributes(&mut self, offset: u64) -> Result<(OsString, PxarEntry, PxarAttributes, u64), Error> {
313 self.seek(SeekFrom::Start(offset))?;
314
315 let mut marker: u64 = self.inner.read_item()?;
316 if marker == PXAR_GOODBYE_TAIL_MARKER {
317 let dir_offset: u64 = self.inner.read_item()?;
318 let gb_size: u64 = self.inner.read_item()?;
319 let distance = i64::try_from(dir_offset + gb_size)?;
320 self.seek(SeekFrom::Current(0 - distance))?;
321 marker = self.inner.read_item()?;
322 }
323
324 let filename = if marker == PXAR_FILENAME {
325 let size: u64 = self.inner.read_item()?;
326 let filename = self.inner.read_filename(size)?;
327 marker = self.inner.read_item()?;
328 filename
329 } else {
330 OsString::new()
331 };
332
333 if marker == PXAR_FORMAT_HARDLINK {
334 let size: u64 = self.inner.read_item()?;
335 let (_, diff) = self.inner.read_hardlink(size)?;
336 return self.attributes(offset - diff);
337 }
338
339 if marker != PXAR_ENTRY {
340 bail!("Expected PXAR_ENTRY, found 0x{:x?}", marker);
341 }
342 let _size: u64 = self.inner.read_item()?;
343 let entry: PxarEntry = self.inner.read_item()?;
344 let (header, xattr) = self.inner.read_attributes()?;
345 let file_size = match header.htype {
346 PXAR_PAYLOAD => header.size - HEADER_SIZE,
347 _ => 0,
348 };
349
350 Ok((filename, entry, xattr, file_size))
351 }
352
353 /// Opens the file by validating the given `offset` and returning its attrs,
354 /// xattrs and size.
355 pub fn open(&mut self, offset: u64) -> Result<(OsString, PxarEntry, PxarAttributes, u64), Error> {
356 self.attributes(offset)
357 }
358
359 /// Read the payload of the file given by `offset`.
360 ///
361 /// This will read the file by first seeking to `offset` within the archive,
362 /// check if there is indeed a valid item with payload and then read `size`
363 /// bytes of content starting from `data_offset`.
364 /// If EOF is reached before reading `size` bytes, the reduced buffer is
365 /// returned.
366 pub fn read(&mut self, offset: u64, size: usize, data_offset: u64) -> Result<Vec<u8>, Error> {
367 self.seek(SeekFrom::Start(offset))?;
368 let head: PxarHeader = self.inner.read_item()?;
369 if head.htype != PXAR_FILENAME {
370 bail!("Expected PXAR_FILENAME, encountered 0x{:x?}", head.htype);
371 }
372 let _filename = self.inner.read_filename(head.size)?;
373
374 let head: PxarHeader = self.inner.read_item()?;
375 check_ca_header::<PxarEntry>(&head, PXAR_ENTRY)?;
376 let _: PxarEntry = self.inner.read_item()?;
377
378 let (header, _) = self.inner.read_attributes()?;
379 if header.htype != PXAR_PAYLOAD {
380 bail!("Expected PXAR_PAYLOAD, found 0x{:x?}", header.htype);
381 }
382
383 let payload_size = header.size - HEADER_SIZE;
384 if data_offset >= payload_size {
385 return Ok(Vec::new());
386 }
387
388 let len = if data_offset + u64::try_from(size)? > payload_size {
389 usize::try_from(payload_size - data_offset)?
390 } else {
391 size
392 };
393 self.inner.skip_bytes(usize::try_from(data_offset)?)?;
394 let data = self.inner.get_reader_mut().read_exact_allocated(len)?;
395
396 Ok(data)
397 }
398
399 /// Read the target of a hardlink in the archive.
400 pub fn read_link(&mut self, offset: u64) -> Result<(PathBuf, PxarEntry), Error> {
401 self.seek(SeekFrom::Start(offset))?;
402 let mut header: PxarHeader = self.inner.read_item()?;
403 if header.htype != PXAR_FILENAME {
404 bail!("Expected PXAR_FILENAME, encountered 0x{:x?}", header.htype);
405 }
406 let _filename = self.inner.read_filename(header.size)?;
407
408 header = self.inner.read_item()?;
409 check_ca_header::<PxarEntry>(&header, PXAR_ENTRY)?;
410 let entry: PxarEntry = self.inner.read_item()?;
411
412 header = self.inner.read_item()?;
413 if header.htype != PXAR_SYMLINK {
414 bail!("Expected PXAR_SYMLINK, encountered 0x{:x?}", header.htype);
415 }
416 let target = self.inner.read_link(header.size)?;
417
418 Ok((target, entry))
419 }
420 }