]> git.proxmox.com Git - proxmox-backup.git/blob - src/pxar/extract.rs
pxar: make extractor state more reusable
[proxmox-backup.git] / src / pxar / extract.rs
1 //! Code for extraction of pxar contents onto the file system.
2
3 use std::convert::TryFrom;
4 use std::ffi::{CStr, CString, OsStr, OsString};
5 use std::io;
6 use std::os::unix::ffi::OsStrExt;
7 use std::os::unix::io::{AsRawFd, FromRawFd, RawFd};
8 use std::path::Path;
9
10 use anyhow::{bail, format_err, Error};
11 use nix::dir::Dir;
12 use nix::fcntl::OFlag;
13 use nix::sys::stat::Mode;
14
15 use pathpatterns::{MatchEntry, MatchList, MatchType};
16 use pxar::format::Device;
17 use pxar::Metadata;
18
19 use proxmox::c_result;
20 use proxmox::tools::fs::{create_path, CreateOptions};
21
22 use crate::pxar::dir_stack::PxarDirStack;
23 use crate::pxar::Flags;
24 use crate::pxar::metadata;
25
26 pub fn extract_archive<T, F>(
27 mut decoder: pxar::decoder::Decoder<T>,
28 destination: &Path,
29 match_list: &[MatchEntry],
30 feature_flags: Flags,
31 allow_existing_dirs: bool,
32 mut callback: F,
33 ) -> Result<(), Error>
34 where
35 T: pxar::decoder::SeqRead,
36 F: FnMut(&Path),
37 {
38 // we use this to keep track of our directory-traversal
39 decoder.enable_goodbye_entries(true);
40
41 let root = decoder
42 .next()
43 .ok_or_else(|| format_err!("found empty pxar archive"))?
44 .map_err(|err| format_err!("error reading pxar archive: {}", err))?;
45
46 if !root.is_dir() {
47 bail!("pxar archive does not start with a directory entry!");
48 }
49
50 create_path(
51 &destination,
52 None,
53 Some(CreateOptions::new().perm(Mode::from_bits_truncate(0o700))),
54 )
55 .map_err(|err| format_err!("error creating directory {:?}: {}", destination, err))?;
56
57 let dir = Dir::open(
58 destination,
59 OFlag::O_DIRECTORY | OFlag::O_CLOEXEC,
60 Mode::empty(),
61 )
62 .map_err(|err| format_err!("unable to open target directory {:?}: {}", destination, err,))?;
63
64 let mut extractor = Extractor::new(
65 dir,
66 root.metadata().clone(),
67 allow_existing_dirs,
68 feature_flags,
69 );
70
71 let mut match_stack = Vec::new();
72 let mut current_match = true;
73 while let Some(entry) = decoder.next() {
74 use pxar::EntryKind;
75
76 let entry = entry.map_err(|err| format_err!("error reading pxar archive: {}", err))?;
77
78 let file_name_os = entry.file_name();
79
80 // safety check: a file entry in an archive must never contain slashes:
81 if file_name_os.as_bytes().contains(&b'/') {
82 bail!("archive file entry contains slashes, which is invalid and a security concern");
83 }
84
85 let file_name = CString::new(file_name_os.as_bytes())
86 .map_err(|_| format_err!("encountered file name with null-bytes"))?;
87
88 let metadata = entry.metadata();
89
90 let match_result = match_list.matches(
91 entry.path().as_os_str().as_bytes(),
92 Some(metadata.file_type() as u32),
93 );
94
95 let did_match = match match_result {
96 Some(MatchType::Include) => true,
97 Some(MatchType::Exclude) => false,
98 None => current_match,
99 };
100 match (did_match, entry.kind()) {
101 (_, EntryKind::Directory) => {
102 callback(entry.path());
103
104 let create = current_match && match_result != Some(MatchType::Exclude);
105 extractor.enter_directory(file_name_os.to_owned(), metadata.clone(), create)?;
106
107 // We're starting a new directory, push our old matching state and replace it with
108 // our new one:
109 match_stack.push(current_match);
110 current_match = did_match;
111
112 Ok(())
113 }
114 (_, EntryKind::GoodbyeTable) => {
115 // go up a directory
116 extractor
117 .leave_directory()
118 .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
119
120 // We left a directory, also get back our previous matching state. This is in sync
121 // with `dir_stack` so this should never be empty except for the final goodbye
122 // table, in which case we get back to the default of `true`.
123 current_match = match_stack.pop().unwrap_or(true);
124
125 Ok(())
126 }
127 (true, EntryKind::Symlink(link)) => {
128 callback(entry.path());
129 extractor.extract_symlink(&file_name, metadata, link.as_ref())
130 }
131 (true, EntryKind::Hardlink(link)) => {
132 callback(entry.path());
133 extractor.extract_hardlink(&file_name, metadata, link.as_os_str())
134 }
135 (true, EntryKind::Device(dev)) => {
136 if extractor.contains_flags(Flags::WITH_DEVICE_NODES) {
137 callback(entry.path());
138 extractor.extract_device(&file_name, metadata, dev)
139 } else {
140 Ok(())
141 }
142 }
143 (true, EntryKind::Fifo) => {
144 if extractor.contains_flags(Flags::WITH_FIFOS) {
145 callback(entry.path());
146 extractor.extract_special(&file_name, metadata, 0)
147 } else {
148 Ok(())
149 }
150 }
151 (true, EntryKind::Socket) => {
152 if extractor.contains_flags(Flags::WITH_SOCKETS) {
153 callback(entry.path());
154 extractor.extract_special(&file_name, metadata, 0)
155 } else {
156 Ok(())
157 }
158 }
159 (true, EntryKind::File { size, .. }) => extractor.extract_file(
160 &file_name,
161 metadata,
162 *size,
163 &mut decoder.contents().ok_or_else(|| {
164 format_err!("found regular file entry without contents in archive")
165 })?,
166 ),
167 (false, _) => Ok(()), // skip this
168 }
169 .map_err(|err| format_err!("error at entry {:?}: {}", file_name_os, err))?;
170 }
171
172 if !extractor.dir_stack.is_empty() {
173 bail!("unexpected eof while decoding pxar archive");
174 }
175
176 Ok(())
177 }
178
179 /// Common state for file extraction.
180 pub(crate) struct Extractor {
181 feature_flags: Flags,
182 allow_existing_dirs: bool,
183 dir_stack: PxarDirStack,
184 }
185
186 impl Extractor {
187 /// Create a new extractor state for a target directory.
188 pub fn new(
189 root_dir: Dir,
190 metadata: Metadata,
191 allow_existing_dirs: bool,
192 feature_flags: Flags,
193 ) -> Self {
194 Self {
195 dir_stack: PxarDirStack::new(root_dir, metadata),
196 allow_existing_dirs,
197 feature_flags,
198 }
199 }
200
201 /// When encountering a directory during extraction, this is used to keep track of it. If
202 /// `create` is true it is immediately created and its metadata will be updated once we leave
203 /// it. If `create` is false it will only be created if it is going to have any actual content.
204 pub fn enter_directory(
205 &mut self,
206 file_name: OsString,
207 metadata: Metadata,
208 create: bool,
209 ) -> Result<(), Error> {
210 self.dir_stack.push(file_name, metadata)?;
211
212 if create {
213 self.dir_stack.create_last_dir(self.allow_existing_dirs)?;
214 }
215
216 Ok(())
217 }
218
219 /// When done with a directory we need to make sure we're
220 pub fn leave_directory(&mut self) -> Result<(), Error> {
221 let dir = self
222 .dir_stack
223 .pop()
224 .map_err(|err| format_err!("unexpected end of directory entry: {}", err))?
225 .ok_or_else(|| format_err!("broken pxar archive (directory stack underrun)"))?;
226
227 if let Some(fd) = dir.try_as_raw_fd() {
228 metadata::apply(
229 self.feature_flags,
230 dir.metadata(),
231 fd,
232 &CString::new(dir.file_name().as_bytes())?,
233 )?;
234 }
235
236 Ok(())
237 }
238
239 fn contains_flags(&self, flag: Flags) -> bool {
240 self.feature_flags.contains(flag)
241 }
242
243 fn parent_fd(&mut self) -> Result<RawFd, Error> {
244 self.dir_stack.last_dir_fd(self.allow_existing_dirs)
245 }
246
247 fn extract_symlink(
248 &mut self,
249 file_name: &CStr,
250 metadata: &Metadata,
251 link: &OsStr,
252 ) -> Result<(), Error> {
253 let parent = self.parent_fd()?;
254 nix::unistd::symlinkat(link, Some(parent), file_name)?;
255 metadata::apply_at(self.feature_flags, metadata, parent, file_name)
256 }
257
258 fn extract_hardlink(
259 &mut self,
260 file_name: &CStr,
261 _metadata: &Metadata, // for now we don't use this because hardlinks don't need it...
262 link: &OsStr,
263 ) -> Result<(), Error> {
264 crate::pxar::tools::assert_relative_path(link)?;
265
266 let parent = self.parent_fd()?;
267 let root = self.dir_stack.root_dir_fd()?;
268 let target = CString::new(link.as_bytes())?;
269 nix::unistd::linkat(
270 Some(root),
271 target.as_c_str(),
272 Some(parent),
273 file_name,
274 nix::unistd::LinkatFlags::NoSymlinkFollow,
275 )?;
276
277 Ok(())
278 }
279
280 fn extract_device(
281 &mut self,
282 file_name: &CStr,
283 metadata: &Metadata,
284 device: &Device,
285 ) -> Result<(), Error> {
286 self.extract_special(file_name, metadata, device.to_dev_t())
287 }
288
289 fn extract_special(
290 &mut self,
291 file_name: &CStr,
292 metadata: &Metadata,
293 device: libc::dev_t,
294 ) -> Result<(), Error> {
295 let mode = metadata.stat.mode;
296 let mode = u32::try_from(mode).map_err(|_| {
297 format_err!(
298 "device node's mode contains illegal bits: 0x{:x} (0o{:o})",
299 mode,
300 mode,
301 )
302 })?;
303 let parent = self.parent_fd()?;
304 unsafe { c_result!(libc::mknodat(parent, file_name.as_ptr(), mode, device)) }
305 .map_err(|err| format_err!("failed to create device node: {}", err))?;
306
307 metadata::apply_at(self.feature_flags, metadata, parent, file_name)
308 }
309
310 fn extract_file(
311 &mut self,
312 file_name: &CStr,
313 metadata: &Metadata,
314 size: u64,
315 contents: &mut dyn io::Read,
316 ) -> Result<(), Error> {
317 let parent = self.parent_fd()?;
318 let mut file = unsafe {
319 std::fs::File::from_raw_fd(nix::fcntl::openat(
320 parent,
321 file_name,
322 OFlag::O_CREAT | OFlag::O_WRONLY | OFlag::O_CLOEXEC,
323 Mode::from_bits(0o600).unwrap(),
324 )?)
325 };
326
327 let extracted = io::copy(&mut *contents, &mut file)?;
328 if size != extracted {
329 bail!("extracted {} bytes of a file of {} bytes", extracted, size);
330 }
331
332 metadata::apply(self.feature_flags, metadata, file.as_raw_fd(), file_name)
333 }
334 }