]>
Commit | Line | Data |
---|---|---|
46cfe7ab DM |
1 | //! *pxar* format decoder for seekable files |
2 | //! | |
3 | //! This module contain the code to decode *pxar* archive files. | |
4 | ||
bbd055bf | 5 | use std::convert::TryFrom; |
e6662118 | 6 | use std::ffi::{OsString, OsStr}; |
bbd055bf CE |
7 | use std::io::{Read, Seek, SeekFrom}; |
8 | use std::path::{Path, PathBuf}; | |
e6662118 | 9 | use std::os::unix::ffi::OsStrExt; |
bbd055bf | 10 | |
f7d4e4b5 | 11 | use anyhow::{bail, format_err, Error}; |
bbd055bf | 12 | use libc; |
46cfe7ab | 13 | |
02491b8f | 14 | use super::binary_search_tree::search_binary_tree_by; |
46cfe7ab | 15 | use super::format_definition::*; |
33ad183a CE |
16 | use super::sequential_decoder::SequentialDecoder; |
17 | use super::match_pattern::MatchPattern; | |
46cfe7ab | 18 | |
bbd055bf | 19 | use proxmox::tools::io::ReadExt; |
46cfe7ab | 20 | |
f50b4fd6 | 21 | pub struct DirectoryEntry { |
fb2554de | 22 | /// Points to the `PxarEntry` of the directory |
46cfe7ab | 23 | start: u64, |
fb2554de | 24 | /// Points past the goodbye table tail |
46cfe7ab | 25 | end: u64, |
90fc97af | 26 | /// Filename of entry |
46cfe7ab | 27 | pub filename: OsString, |
90fc97af | 28 | /// Entry (mode, permissions) |
5e50c606 | 29 | pub entry: PxarEntry, |
90fc97af CE |
30 | /// Extended attributes |
31 | pub xattr: PxarAttributes, | |
32 | /// Payload size | |
33 | pub size: u64, | |
a8aff353 CE |
34 | /// Target path for symbolic links |
35 | pub target: Option<PathBuf>, | |
63698e72 CE |
36 | /// Start offset of the payload if present. |
37 | pub payload_offset: Option<u64>, | |
46cfe7ab DM |
38 | } |
39 | ||
99b5b6cb DM |
40 | /// Trait to create ReadSeek Decoder trait objects. |
41 | trait ReadSeek: Read + Seek {} | |
42 | impl <R: Read + Seek> ReadSeek for R {} | |
43 | ||
46cfe7ab | 44 | // This one needs Read+Seek |
99b5b6cb | 45 | pub struct Decoder { |
b9799012 | 46 | inner: SequentialDecoder<Box<dyn ReadSeek + Send>>, |
46cfe7ab DM |
47 | root_start: u64, |
48 | root_end: u64, | |
49 | } | |
50 | ||
5e50c606 | 51 | const HEADER_SIZE: u64 = std::mem::size_of::<PxarHeader>() as u64; |
7d26720e | 52 | const GOODBYE_ITEM_SIZE: u64 = std::mem::size_of::<PxarGoodbyeItem>() as u64; |
46cfe7ab | 53 | |
99b5b6cb | 54 | impl Decoder { |
b9799012 | 55 | pub fn new<R: Read + Seek + Send + 'static>(mut reader: R) -> Result<Self, Error> { |
46cfe7ab | 56 | let root_end = reader.seek(SeekFrom::End(0))?; |
b9799012 | 57 | let boxed_reader: Box<dyn ReadSeek + 'static + Send> = Box::new(reader); |
99b5b6cb | 58 | let inner = SequentialDecoder::new(boxed_reader, super::flags::DEFAULT); |
f701d033 DM |
59 | |
60 | Ok(Self { inner, root_start: 0, root_end }) | |
61 | } | |
46cfe7ab | 62 | |
b9799012 | 63 | pub fn set_callback<F: Fn(&Path) -> Result<(), Error> + Send + 'static>(&mut self, callback: F ) { |
f701d033 | 64 | self.inner.set_callback(callback); |
46cfe7ab DM |
65 | } |
66 | ||
58262f40 CE |
67 | pub fn root(&mut self) -> Result<DirectoryEntry, Error> { |
68 | self.seek(SeekFrom::Start(0))?; | |
69 | let header: PxarHeader = self.inner.read_item()?; | |
70 | check_ca_header::<PxarEntry>(&header, PXAR_ENTRY)?; | |
71 | let entry: PxarEntry = self.inner.read_item()?; | |
90fc97af | 72 | let (header, xattr) = self.inner.read_attributes()?; |
63698e72 CE |
73 | let (size, payload_offset) = match header.htype { |
74 | PXAR_PAYLOAD => (header.size - HEADER_SIZE, Some(self.seek(SeekFrom::Current(0))?)), | |
75 | _ => (0, None), | |
90fc97af CE |
76 | }; |
77 | ||
58262f40 | 78 | Ok(DirectoryEntry { |
46cfe7ab DM |
79 | start: self.root_start, |
80 | end: self.root_end, | |
81 | filename: OsString::new(), // Empty | |
653b1ca1 | 82 | entry, |
90fc97af CE |
83 | xattr, |
84 | size, | |
a8aff353 | 85 | target: None, |
63698e72 | 86 | payload_offset, |
58262f40 | 87 | }) |
46cfe7ab DM |
88 | } |
89 | ||
90 | fn seek(&mut self, pos: SeekFrom) -> Result<u64, Error> { | |
91 | let pos = self.inner.get_reader_mut().seek(pos)?; | |
92 | Ok(pos) | |
93 | } | |
94 | ||
c7fee396 CE |
95 | pub(crate) fn root_end_offset(&self) -> u64 { |
96 | self.root_end | |
97 | } | |
98 | ||
33ad183a CE |
99 | /// Restore the subarchive starting at `dir` to the provided target `path`. |
100 | /// | |
101 | /// Only restore the content matched by the MatchPattern `pattern`. | |
102 | /// An empty Vec `pattern` means restore all. | |
103 | pub fn restore(&mut self, dir: &DirectoryEntry, path: &Path, pattern: &Vec<MatchPattern>) -> Result<(), Error> { | |
46cfe7ab | 104 | let start = dir.start; |
46cfe7ab | 105 | self.seek(SeekFrom::Start(start))?; |
33ad183a | 106 | self.inner.restore(path, pattern)?; |
46cfe7ab DM |
107 | |
108 | Ok(()) | |
109 | } | |
110 | ||
ac12570e CE |
111 | pub(crate) fn read_directory_entry( |
112 | &mut self, | |
113 | start: u64, | |
114 | end: u64, | |
115 | ) -> Result<DirectoryEntry, Error> { | |
46cfe7ab DM |
116 | self.seek(SeekFrom::Start(start))?; |
117 | ||
5e50c606 | 118 | let head: PxarHeader = self.inner.read_item()?; |
46cfe7ab | 119 | |
5e50c606 | 120 | if head.htype != PXAR_FILENAME { |
46cfe7ab DM |
121 | bail!("wrong filename header type for object [{}..{}]", start, end); |
122 | } | |
123 | ||
124 | let entry_start = start + head.size; | |
125 | ||
126 | let filename = self.inner.read_filename(head.size)?; | |
127 | ||
5e50c606 | 128 | let head: PxarHeader = self.inner.read_item()?; |
e5471b48 CE |
129 | if head.htype == PXAR_FORMAT_HARDLINK { |
130 | let (_, offset) = self.inner.read_hardlink(head.size)?; | |
132cb0d0 CE |
131 | // TODO: Howto find correct end offset for hardlink target? |
132 | // This is a bit tricky since we cannot find correct end in an efficient | |
133 | // way, on the other hand it doesn't really matter (for now) since target | |
134 | // is never a directory and end is not used in such cases. | |
e5471b48 CE |
135 | return self.read_directory_entry(start - offset, end); |
136 | } | |
5e50c606 CE |
137 | check_ca_header::<PxarEntry>(&head, PXAR_ENTRY)?; |
138 | let entry: PxarEntry = self.inner.read_item()?; | |
90fc97af | 139 | let (header, xattr) = self.inner.read_attributes()?; |
351b913d CE |
140 | let (size, payload_offset, target) = match header.htype { |
141 | PXAR_PAYLOAD => | |
142 | (header.size - HEADER_SIZE, Some(self.seek(SeekFrom::Current(0))?), None), | |
143 | PXAR_SYMLINK => | |
144 | (header.size - HEADER_SIZE, None, Some(self.inner.read_link(header.size)?)), | |
145 | _ => (0, None, None), | |
a8aff353 | 146 | }; |
46cfe7ab | 147 | |
f50b4fd6 | 148 | Ok(DirectoryEntry { |
46cfe7ab | 149 | start: entry_start, |
653b1ca1 WB |
150 | end, |
151 | filename, | |
46cfe7ab | 152 | entry, |
90fc97af CE |
153 | xattr, |
154 | size, | |
a8aff353 | 155 | target, |
63698e72 | 156 | payload_offset, |
46cfe7ab DM |
157 | }) |
158 | } | |
159 | ||
d00097a0 CE |
160 | /// Return the goodbye table based on the provided end offset. |
161 | /// | |
162 | /// Get the goodbye table entries and the start and end offsets of the | |
163 | /// items they reference. | |
164 | /// If the start offset is provided, we use that to check the consistency of | |
165 | /// the data, else the start offset calculated based on the goodbye tail is | |
166 | /// used. | |
167 | pub(crate) fn goodbye_table( | |
168 | &mut self, | |
169 | start: Option<u64>, | |
170 | end: u64, | |
171 | ) -> Result<Vec<(PxarGoodbyeItem, u64, u64)>, Error> { | |
46cfe7ab DM |
172 | self.seek(SeekFrom::Start(end - GOODBYE_ITEM_SIZE))?; |
173 | ||
d00097a0 CE |
174 | let tail: PxarGoodbyeItem = self.inner.read_item()?; |
175 | if tail.hash != PXAR_GOODBYE_TAIL_MARKER { | |
176 | bail!("missing goodbye tail marker for object at offset {}", end); | |
46cfe7ab DM |
177 | } |
178 | ||
d00097a0 CE |
179 | // If the start offset was provided, we use and check based on that. |
180 | // If not, we rely on the offset calculated from the goodbye table entry. | |
181 | let start = start.unwrap_or(end - tail.offset - tail.size); | |
182 | let goodbye_table_size = tail.size; | |
46cfe7ab DM |
183 | if goodbye_table_size < (HEADER_SIZE + GOODBYE_ITEM_SIZE) { |
184 | bail!("short goodbye table size for object [{}..{}]", start, end); | |
46cfe7ab | 185 | } |
d00097a0 | 186 | |
46cfe7ab DM |
187 | let goodbye_inner_size = goodbye_table_size - HEADER_SIZE - GOODBYE_ITEM_SIZE; |
188 | if (goodbye_inner_size % GOODBYE_ITEM_SIZE) != 0 { | |
3626ac61 CE |
189 | bail!( |
190 | "wrong goodbye inner table size for entry [{}..{}]", | |
191 | start, | |
192 | end | |
193 | ); | |
46cfe7ab DM |
194 | } |
195 | ||
196 | let goodbye_start = end - goodbye_table_size; | |
d00097a0 | 197 | if tail.offset != (goodbye_start - start) { |
3626ac61 CE |
198 | bail!( |
199 | "wrong offset in goodbye tail marker for entry [{}..{}]", | |
200 | start, | |
201 | end | |
202 | ); | |
46cfe7ab DM |
203 | } |
204 | ||
205 | self.seek(SeekFrom::Start(goodbye_start))?; | |
5e50c606 | 206 | let head: PxarHeader = self.inner.read_item()?; |
5e50c606 | 207 | if head.htype != PXAR_GOODBYE { |
3626ac61 CE |
208 | bail!( |
209 | "wrong goodbye table header type for entry [{}..{}]", | |
210 | start, | |
211 | end | |
212 | ); | |
46cfe7ab DM |
213 | } |
214 | ||
215 | if head.size != goodbye_table_size { | |
216 | bail!("wrong goodbye table size for entry [{}..{}]", start, end); | |
217 | } | |
218 | ||
d00097a0 | 219 | let mut gb_entries = Vec::new(); |
3626ac61 | 220 | for i in 0..goodbye_inner_size / GOODBYE_ITEM_SIZE { |
5e50c606 | 221 | let item: PxarGoodbyeItem = self.inner.read_item()?; |
46cfe7ab | 222 | if item.offset > (goodbye_start - start) { |
3626ac61 CE |
223 | bail!( |
224 | "goodbye entry {} offset out of range [{}..{}] {} {} {}", | |
225 | i, | |
226 | start, | |
227 | end, | |
228 | item.offset, | |
229 | goodbye_start, | |
230 | start | |
231 | ); | |
46cfe7ab DM |
232 | } |
233 | let item_start = goodbye_start - item.offset; | |
234 | let item_end = item_start + item.size; | |
235 | if item_end > goodbye_start { | |
3626ac61 | 236 | bail!("goodbye entry {} end out of range [{}..{}]", i, start, end); |
46cfe7ab | 237 | } |
d00097a0 CE |
238 | gb_entries.push((item, item_start, item_end)); |
239 | } | |
240 | ||
241 | Ok(gb_entries) | |
242 | } | |
46cfe7ab | 243 | |
d00097a0 CE |
244 | pub fn list_dir(&mut self, dir: &DirectoryEntry) -> Result<Vec<DirectoryEntry>, Error> { |
245 | let start = dir.start; | |
246 | let end = dir.end; | |
247 | ||
248 | //println!("list_dir1: {} {}", start, end); | |
249 | ||
250 | if (end - start) < (HEADER_SIZE + GOODBYE_ITEM_SIZE) { | |
251 | bail!("detected short object [{}..{}]", start, end); | |
46cfe7ab DM |
252 | } |
253 | ||
254 | let mut result = vec![]; | |
d00097a0 CE |
255 | let goodbye_table = self.goodbye_table(Some(start), end)?; |
256 | for (_, item_start, item_end) in goodbye_table { | |
46cfe7ab DM |
257 | let entry = self.read_directory_entry(item_start, item_end)?; |
258 | //println!("ENTRY: {} {} {:?}", item_start, item_end, entry.filename); | |
259 | result.push(entry); | |
260 | } | |
261 | ||
262 | Ok(result) | |
263 | } | |
264 | ||
265 | pub fn print_filenames<W: std::io::Write>( | |
266 | &mut self, | |
267 | output: &mut W, | |
268 | prefix: &mut PathBuf, | |
f50b4fd6 | 269 | dir: &DirectoryEntry, |
46cfe7ab | 270 | ) -> Result<(), Error> { |
46cfe7ab DM |
271 | let mut list = self.list_dir(dir)?; |
272 | ||
273 | list.sort_unstable_by(|a, b| a.filename.cmp(&b.filename)); | |
274 | ||
275 | for item in &list { | |
46cfe7ab DM |
276 | prefix.push(item.filename.clone()); |
277 | ||
278 | let mode = item.entry.mode as u32; | |
279 | ||
280 | let ifmt = mode & libc::S_IFMT; | |
281 | ||
9307279f | 282 | writeln!(output, "{:?}", prefix)?; |
46cfe7ab | 283 | |
132cb0d0 CE |
284 | match ifmt { |
285 | libc::S_IFDIR => self.print_filenames(output, prefix, item)?, | |
286 | libc::S_IFREG | libc::S_IFLNK | libc::S_IFBLK | libc::S_IFCHR => {} | |
287 | _ => bail!("unknown item mode/type for {:?}", prefix), | |
46cfe7ab DM |
288 | } |
289 | ||
290 | prefix.pop(); | |
291 | } | |
292 | ||
293 | Ok(()) | |
294 | } | |
bbd055bf | 295 | |
e6662118 CE |
296 | /// Lookup the item identified by `filename` in the provided `DirectoryEntry`. |
297 | /// | |
298 | /// Calculates the hash of the filename and searches for matching entries in | |
299 | /// the goodbye table of the provided `DirectoryEntry`. | |
300 | /// If found, also the filename is compared to avoid hash collision. | |
301 | /// If the filename does not match, the search resumes with the next entry in | |
302 | /// the goodbye table. | |
303 | /// If there is no entry with matching `filename`, `Ok(None)` is returned. | |
304 | pub fn lookup( | |
305 | &mut self, | |
306 | dir: &DirectoryEntry, | |
307 | filename: &OsStr, | |
90fc97af | 308 | ) -> Result<Option<DirectoryEntry>, Error> { |
e6662118 CE |
309 | let gbt = self.goodbye_table(Some(dir.start), dir.end)?; |
310 | let hash = compute_goodbye_hash(filename.as_bytes()); | |
311 | ||
02491b8f CE |
312 | let mut start_idx = 0; |
313 | let mut skip_multiple = 0; | |
e6662118 CE |
314 | loop { |
315 | // Search for the next goodbye entry with matching hash. | |
02491b8f CE |
316 | let idx = search_binary_tree_by( |
317 | start_idx, | |
318 | gbt.len(), | |
319 | skip_multiple, | |
320 | |idx| hash.cmp(&gbt[idx].0.hash), | |
321 | ); | |
322 | let (_item, start, end) = match idx { | |
323 | Some(idx) => &gbt[idx], | |
e6662118 CE |
324 | None => return Ok(None), |
325 | }; | |
326 | ||
67444407 | 327 | let entry = self.read_directory_entry(*start, *end)?; |
e6662118 CE |
328 | |
329 | // Possible hash collision, need to check if the found entry is indeed | |
330 | // the filename to lookup. | |
67444407 CE |
331 | if entry.filename == filename { |
332 | return Ok(Some(entry)); | |
e6662118 | 333 | } |
02491b8f CE |
334 | // Hash collision, check the next entry in the goodbye table by starting |
335 | // from given index but skipping one more match (so hash at index itself). | |
336 | start_idx = idx.unwrap(); | |
337 | skip_multiple = 1; | |
e6662118 CE |
338 | } |
339 | } | |
340 | ||
63698e72 | 341 | /// Read the payload of the file given by `entry`. |
bbd055bf | 342 | /// |
63698e72 CE |
343 | /// This will read a files payload as raw bytes starting from `offset` after |
344 | /// the payload marker, reading `size` bytes. | |
345 | /// If the payload from `offset` to EOF is smaller than `size` bytes, the | |
346 | /// buffer with reduced size is returned. | |
347 | /// If `offset` is larger than the payload size of the `DirectoryEntry`, an | |
348 | /// empty buffer is returned. | |
349 | pub fn read(&mut self, entry: &DirectoryEntry, size: usize, offset: u64) -> Result<Vec<u8>, Error> { | |
350 | let start_offset = entry.payload_offset | |
351 | .ok_or_else(|| format_err!("entry has no payload offset"))?; | |
352 | if offset >= entry.size { | |
bbd055bf CE |
353 | return Ok(Vec::new()); |
354 | } | |
63698e72 CE |
355 | let len = if u64::try_from(size)? > entry.size { |
356 | usize::try_from(entry.size)? | |
bbd055bf CE |
357 | } else { |
358 | size | |
359 | }; | |
63698e72 | 360 | self.seek(SeekFrom::Start(start_offset + offset))?; |
bbd055bf CE |
361 | let data = self.inner.get_reader_mut().read_exact_allocated(len)?; |
362 | ||
363 | Ok(data) | |
364 | } | |
46cfe7ab | 365 | } |