]>
Commit | Line | Data |
---|---|---|
7cac9316 XL |
1 | use std::borrow::Cow; |
2 | use std::cmp; | |
3 | use std::fs; | |
74b04a01 | 4 | use std::fs::OpenOptions; |
7cac9316 XL |
5 | use std::io::prelude::*; |
6 | use std::io::{self, Error, ErrorKind, SeekFrom}; | |
7 | use std::marker; | |
8faf50e0 | 8 | use std::path::{Component, Path, PathBuf}; |
7cac9316 XL |
9 | |
10 | use filetime::{self, FileTime}; | |
11 | ||
74b04a01 XL |
12 | use crate::archive::ArchiveInner; |
13 | use crate::error::TarError; | |
14 | use crate::header::bytes2path; | |
15 | use crate::other; | |
74b04a01 | 16 | use crate::{Archive, Header, PaxExtensions}; |
7cac9316 XL |
17 | |
18 | /// A read-only view into an entry of an archive. | |
19 | /// | |
20 | /// This structure is a window into a portion of a borrowed archive which can | |
21 | /// be inspected. It acts as a file handle by implementing the Reader trait. An | |
22 | /// entry cannot be rewritten once inserted into an archive. | |
23 | pub struct Entry<'a, R: 'a + Read> { | |
24 | fields: EntryFields<'a>, | |
25 | _ignored: marker::PhantomData<&'a Archive<R>>, | |
26 | } | |
27 | ||
28 | // private implementation detail of `Entry`, but concrete (no type parameters) | |
29 | // and also all-public to be constructed from other modules. | |
30 | pub struct EntryFields<'a> { | |
31 | pub long_pathname: Option<Vec<u8>>, | |
32 | pub long_linkname: Option<Vec<u8>>, | |
33 | pub pax_extensions: Option<Vec<u8>>, | |
34 | pub header: Header, | |
35 | pub size: u64, | |
36 | pub header_pos: u64, | |
37 | pub file_pos: u64, | |
38 | pub data: Vec<EntryIo<'a>>, | |
39 | pub unpack_xattrs: bool, | |
40 | pub preserve_permissions: bool, | |
a1dfa0c6 | 41 | pub preserve_mtime: bool, |
17df50a5 | 42 | pub overwrite: bool, |
7cac9316 XL |
43 | } |
44 | ||
45 | pub enum EntryIo<'a> { | |
46 | Pad(io::Take<io::Repeat>), | |
3dfed10e | 47 | Data(io::Take<&'a ArchiveInner<dyn Read + 'a>>), |
7cac9316 XL |
48 | } |
49 | ||
74b04a01 XL |
50 | /// When unpacking items the unpacked thing is returned to allow custom |
51 | /// additional handling by users. Today the File is returned, in future | |
52 | /// the enum may be extended with kinds for links, directories etc. | |
53 | #[derive(Debug)] | |
54 | pub enum Unpacked { | |
55 | /// A file was unpacked. | |
56 | File(std::fs::File), | |
57 | /// A directory, hardlink, symlink, or other node was unpacked. | |
58 | #[doc(hidden)] | |
59 | __Nonexhaustive, | |
60 | } | |
61 | ||
7cac9316 XL |
62 | impl<'a, R: Read> Entry<'a, R> { |
63 | /// Returns the path name for this entry. | |
64 | /// | |
3dfed10e | 65 | /// This method may fail if the pathname is not valid Unicode and this is |
7cac9316 XL |
66 | /// called on a Windows platform. |
67 | /// | |
68 | /// Note that this function will convert any `\` characters to directory | |
69 | /// separators, and it will not always return the same value as | |
70 | /// `self.header().path()` as some archive formats have support for longer | |
71 | /// path names described in separate entries. | |
72 | /// | |
73 | /// It is recommended to use this method instead of inspecting the `header` | |
74 | /// directly to ensure that various archive formats are handled correctly. | |
75 | pub fn path(&self) -> io::Result<Cow<Path>> { | |
76 | self.fields.path() | |
77 | } | |
78 | ||
79 | /// Returns the raw bytes listed for this entry. | |
80 | /// | |
81 | /// Note that this function will convert any `\` characters to directory | |
82 | /// separators, and it will not always return the same value as | |
83 | /// `self.header().path_bytes()` as some archive formats have support for | |
84 | /// longer path names described in separate entries. | |
85 | pub fn path_bytes(&self) -> Cow<[u8]> { | |
86 | self.fields.path_bytes() | |
87 | } | |
88 | ||
89 | /// Returns the link name for this entry, if any is found. | |
90 | /// | |
3dfed10e | 91 | /// This method may fail if the pathname is not valid Unicode and this is |
7cac9316 XL |
92 | /// called on a Windows platform. `Ok(None)` being returned, however, |
93 | /// indicates that the link name was not present. | |
94 | /// | |
95 | /// Note that this function will convert any `\` characters to directory | |
96 | /// separators, and it will not always return the same value as | |
97 | /// `self.header().link_name()` as some archive formats have support for | |
98 | /// longer path names described in separate entries. | |
99 | /// | |
100 | /// It is recommended to use this method instead of inspecting the `header` | |
101 | /// directly to ensure that various archive formats are handled correctly. | |
102 | pub fn link_name(&self) -> io::Result<Option<Cow<Path>>> { | |
103 | self.fields.link_name() | |
104 | } | |
105 | ||
106 | /// Returns the link name for this entry, in bytes, if listed. | |
107 | /// | |
108 | /// Note that this will not always return the same value as | |
109 | /// `self.header().link_name_bytes()` as some archive formats have support for | |
110 | /// longer path names described in separate entries. | |
111 | pub fn link_name_bytes(&self) -> Option<Cow<[u8]>> { | |
112 | self.fields.link_name_bytes() | |
113 | } | |
114 | ||
115 | /// Returns an iterator over the pax extensions contained in this entry. | |
116 | /// | |
117 | /// Pax extensions are a form of archive where extra metadata is stored in | |
118 | /// key/value pairs in entries before the entry they're intended to | |
119 | /// describe. For example this can be used to describe long file name or | |
120 | /// other metadata like atime/ctime/mtime in more precision. | |
121 | /// | |
122 | /// The returned iterator will yield key/value pairs for each extension. | |
123 | /// | |
124 | /// `None` will be returned if this entry does not indicate that it itself | |
125 | /// contains extensions, or if there were no previous extensions describing | |
126 | /// it. | |
127 | /// | |
128 | /// Note that global pax extensions are intended to be applied to all | |
129 | /// archive entries. | |
130 | /// | |
131 | /// Also note that this function will read the entire entry if the entry | |
132 | /// itself is a list of extensions. | |
133 | pub fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { | |
134 | self.fields.pax_extensions() | |
135 | } | |
136 | ||
137 | /// Returns access to the header of this entry in the archive. | |
138 | /// | |
3dfed10e | 139 | /// This provides access to the metadata for this entry in the archive. |
7cac9316 XL |
140 | pub fn header(&self) -> &Header { |
141 | &self.fields.header | |
142 | } | |
143 | ||
17df50a5 XL |
144 | /// Returns access to the size of this entry in the archive. |
145 | /// | |
146 | /// In the event the size is stored in a pax extension, that size value | |
147 | /// will be referenced. Otherwise, the entry size will be stored in the header. | |
148 | pub fn size(&self) -> u64 { | |
149 | self.fields.size | |
150 | } | |
151 | ||
7cac9316 XL |
152 | /// Returns the starting position, in bytes, of the header of this entry in |
153 | /// the archive. | |
154 | /// | |
155 | /// The header is always a contiguous section of 512 bytes, so if the | |
156 | /// underlying reader implements `Seek`, then the slice from `header_pos` to | |
157 | /// `header_pos + 512` contains the raw header bytes. | |
158 | pub fn raw_header_position(&self) -> u64 { | |
159 | self.fields.header_pos | |
160 | } | |
161 | ||
162 | /// Returns the starting position, in bytes, of the file of this entry in | |
163 | /// the archive. | |
164 | /// | |
165 | /// If the file of this entry is continuous (e.g. not a sparse file), and | |
166 | /// if the underlying reader implements `Seek`, then the slice from | |
167 | /// `file_pos` to `file_pos + entry_size` contains the raw file bytes. | |
168 | pub fn raw_file_position(&self) -> u64 { | |
169 | self.fields.file_pos | |
170 | } | |
171 | ||
172 | /// Writes this file to the specified location. | |
173 | /// | |
174 | /// This function will write the entire contents of this file into the | |
175 | /// location specified by `dst`. Metadata will also be propagated to the | |
176 | /// path `dst`. | |
177 | /// | |
178 | /// This function will create a file at the path `dst`, and it is required | |
179 | /// that the intermediate directories are created. Any existing file at the | |
180 | /// location `dst` will be overwritten. | |
181 | /// | |
182 | /// > **Note**: This function does not have as many sanity checks as | |
183 | /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're | |
184 | /// > thinking of unpacking untrusted tarballs you may want to review the | |
185 | /// > implementations of the previous two functions and perhaps implement | |
186 | /// > similar logic yourself. | |
187 | /// | |
188 | /// # Examples | |
189 | /// | |
190 | /// ```no_run | |
191 | /// use std::fs::File; | |
192 | /// use tar::Archive; | |
193 | /// | |
194 | /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); | |
195 | /// | |
196 | /// for (i, file) in ar.entries().unwrap().enumerate() { | |
197 | /// let mut file = file.unwrap(); | |
198 | /// file.unpack(format!("file-{}", i)).unwrap(); | |
199 | /// } | |
200 | /// ``` | |
74b04a01 | 201 | pub fn unpack<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<Unpacked> { |
ff7c6d11 | 202 | self.fields.unpack(None, dst.as_ref()) |
7cac9316 XL |
203 | } |
204 | ||
205 | /// Extracts this file under the specified path, avoiding security issues. | |
206 | /// | |
207 | /// This function will write the entire contents of this file into the | |
208 | /// location obtained by appending the path of this file in the archive to | |
209 | /// `dst`, creating any intermediate directories if needed. Metadata will | |
210 | /// also be propagated to the path `dst`. Any existing file at the location | |
211 | /// `dst` will be overwritten. | |
212 | /// | |
213 | /// This function carefully avoids writing outside of `dst`. If the file has | |
214 | /// a '..' in its path, this function will skip it and return false. | |
215 | /// | |
216 | /// # Examples | |
217 | /// | |
218 | /// ```no_run | |
219 | /// use std::fs::File; | |
220 | /// use tar::Archive; | |
221 | /// | |
222 | /// let mut ar = Archive::new(File::open("foo.tar").unwrap()); | |
223 | /// | |
224 | /// for (i, file) in ar.entries().unwrap().enumerate() { | |
225 | /// let mut file = file.unwrap(); | |
226 | /// file.unpack_in("target").unwrap(); | |
227 | /// } | |
228 | /// ``` | |
229 | pub fn unpack_in<P: AsRef<Path>>(&mut self, dst: P) -> io::Result<bool> { | |
230 | self.fields.unpack_in(dst.as_ref()) | |
231 | } | |
232 | ||
233 | /// Indicate whether extended file attributes (xattrs on Unix) are preserved | |
234 | /// when unpacking this entry. | |
235 | /// | |
236 | /// This flag is disabled by default and is currently only implemented on | |
237 | /// Unix using xattr support. This may eventually be implemented for | |
238 | /// Windows, however, if other archive implementations are found which do | |
239 | /// this as well. | |
240 | pub fn set_unpack_xattrs(&mut self, unpack_xattrs: bool) { | |
241 | self.fields.unpack_xattrs = unpack_xattrs; | |
242 | } | |
243 | ||
244 | /// Indicate whether extended permissions (like suid on Unix) are preserved | |
245 | /// when unpacking this entry. | |
246 | /// | |
247 | /// This flag is disabled by default and is currently only implemented on | |
248 | /// Unix. | |
249 | pub fn set_preserve_permissions(&mut self, preserve: bool) { | |
250 | self.fields.preserve_permissions = preserve; | |
251 | } | |
a1dfa0c6 XL |
252 | |
253 | /// Indicate whether access time information is preserved when unpacking | |
254 | /// this entry. | |
255 | /// | |
256 | /// This flag is enabled by default. | |
257 | pub fn set_preserve_mtime(&mut self, preserve: bool) { | |
258 | self.fields.preserve_mtime = preserve; | |
259 | } | |
7cac9316 XL |
260 | } |
261 | ||
262 | impl<'a, R: Read> Read for Entry<'a, R> { | |
263 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { | |
264 | self.fields.read(into) | |
265 | } | |
266 | } | |
267 | ||
268 | impl<'a> EntryFields<'a> { | |
269 | pub fn from<R: Read>(entry: Entry<R>) -> EntryFields { | |
270 | entry.fields | |
271 | } | |
272 | ||
273 | pub fn into_entry<R: Read>(self) -> Entry<'a, R> { | |
274 | Entry { | |
275 | fields: self, | |
276 | _ignored: marker::PhantomData, | |
277 | } | |
278 | } | |
279 | ||
280 | pub fn read_all(&mut self) -> io::Result<Vec<u8>> { | |
281 | // Preallocate some data but don't let ourselves get too crazy now. | |
282 | let cap = cmp::min(self.size, 128 * 1024); | |
283 | let mut v = Vec::with_capacity(cap as usize); | |
284 | self.read_to_end(&mut v).map(|_| v) | |
285 | } | |
286 | ||
287 | fn path(&self) -> io::Result<Cow<Path>> { | |
288 | bytes2path(self.path_bytes()) | |
289 | } | |
290 | ||
291 | fn path_bytes(&self) -> Cow<[u8]> { | |
292 | match self.long_pathname { | |
293 | Some(ref bytes) => { | |
294 | if let Some(&0) = bytes.last() { | |
295 | Cow::Borrowed(&bytes[..bytes.len() - 1]) | |
296 | } else { | |
297 | Cow::Borrowed(bytes) | |
298 | } | |
299 | } | |
8faf50e0 XL |
300 | None => { |
301 | if let Some(ref pax) = self.pax_extensions { | |
923072b8 | 302 | let pax = PaxExtensions::new(pax) |
8faf50e0 XL |
303 | .filter_map(|f| f.ok()) |
304 | .find(|f| f.key_bytes() == b"path") | |
305 | .map(|f| f.value_bytes()); | |
306 | if let Some(field) = pax { | |
a1dfa0c6 | 307 | return Cow::Borrowed(field); |
8faf50e0 XL |
308 | } |
309 | } | |
310 | self.header.path_bytes() | |
311 | } | |
7cac9316 XL |
312 | } |
313 | } | |
314 | ||
83c7162d XL |
315 | /// Gets the path in a "lossy" way, used for error reporting ONLY. |
316 | fn path_lossy(&self) -> String { | |
317 | String::from_utf8_lossy(&self.path_bytes()).to_string() | |
318 | } | |
319 | ||
7cac9316 XL |
320 | fn link_name(&self) -> io::Result<Option<Cow<Path>>> { |
321 | match self.link_name_bytes() { | |
322 | Some(bytes) => bytes2path(bytes).map(Some), | |
323 | None => Ok(None), | |
324 | } | |
325 | } | |
326 | ||
327 | fn link_name_bytes(&self) -> Option<Cow<[u8]>> { | |
328 | match self.long_linkname { | |
329 | Some(ref bytes) => { | |
330 | if let Some(&0) = bytes.last() { | |
331 | Some(Cow::Borrowed(&bytes[..bytes.len() - 1])) | |
332 | } else { | |
333 | Some(Cow::Borrowed(bytes)) | |
334 | } | |
335 | } | |
94222f64 XL |
336 | None => { |
337 | if let Some(ref pax) = self.pax_extensions { | |
923072b8 | 338 | let pax = PaxExtensions::new(pax) |
94222f64 XL |
339 | .filter_map(|f| f.ok()) |
340 | .find(|f| f.key_bytes() == b"linkpath") | |
341 | .map(|f| f.value_bytes()); | |
342 | if let Some(field) = pax { | |
343 | return Some(Cow::Borrowed(field)); | |
344 | } | |
345 | } | |
346 | self.header.link_name_bytes() | |
347 | } | |
7cac9316 XL |
348 | } |
349 | } | |
350 | ||
351 | fn pax_extensions(&mut self) -> io::Result<Option<PaxExtensions>> { | |
352 | if self.pax_extensions.is_none() { | |
a1dfa0c6 XL |
353 | if !self.header.entry_type().is_pax_global_extensions() |
354 | && !self.header.entry_type().is_pax_local_extensions() | |
355 | { | |
356 | return Ok(None); | |
7cac9316 | 357 | } |
8faf50e0 | 358 | self.pax_extensions = Some(self.read_all()?); |
7cac9316 | 359 | } |
923072b8 FG |
360 | Ok(Some(PaxExtensions::new( |
361 | self.pax_extensions.as_ref().unwrap(), | |
362 | ))) | |
7cac9316 XL |
363 | } |
364 | ||
365 | fn unpack_in(&mut self, dst: &Path) -> io::Result<bool> { | |
366 | // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3: | |
367 | // * Leading '/'s are trimmed. For example, `///test` is treated as | |
368 | // `test`. | |
369 | // * If the filename contains '..', then the file is skipped when | |
370 | // extracting the tarball. | |
371 | // * '//' within a filename is effectively skipped. An error is | |
372 | // logged, but otherwise the effect is as if any two or more | |
373 | // adjacent '/'s within the filename were consolidated into one | |
374 | // '/'. | |
375 | // | |
376 | // Most of this is handled by the `path` module of the standard | |
377 | // library, but we specially handle a few cases here as well. | |
378 | ||
379 | let mut file_dst = dst.to_path_buf(); | |
380 | { | |
8faf50e0 | 381 | let path = self.path().map_err(|e| { |
a1dfa0c6 | 382 | TarError::new( |
923072b8 | 383 | format!("invalid path in entry header: {}", self.path_lossy()), |
a1dfa0c6 XL |
384 | e, |
385 | ) | |
8faf50e0 | 386 | })?; |
7cac9316 XL |
387 | for part in path.components() { |
388 | match part { | |
389 | // Leading '/' characters, root paths, and '.' | |
390 | // components are just ignored and treated as "empty | |
391 | // components" | |
a1dfa0c6 | 392 | Component::Prefix(..) | Component::RootDir | Component::CurDir => continue, |
7cac9316 XL |
393 | |
394 | // If any part of the filename is '..', then skip over | |
395 | // unpacking the file to prevent directory traversal | |
396 | // security issues. See, e.g.: CVE-2001-1267, | |
397 | // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131 | |
398 | Component::ParentDir => return Ok(false), | |
399 | ||
400 | Component::Normal(part) => file_dst.push(part), | |
401 | } | |
402 | } | |
403 | } | |
404 | ||
405 | // Skip cases where only slashes or '.' parts were seen, because | |
406 | // this is effectively an empty filename. | |
407 | if *dst == *file_dst { | |
408 | return Ok(true); | |
409 | } | |
410 | ||
411 | // Skip entries without a parent (i.e. outside of FS root) | |
412 | let parent = match file_dst.parent() { | |
413 | Some(p) => p, | |
a1dfa0c6 | 414 | None => return Ok(false), |
7cac9316 XL |
415 | }; |
416 | ||
94222f64 | 417 | self.ensure_dir_created(&dst, parent) |
923072b8 | 418 | .map_err(|e| TarError::new(format!("failed to create `{}`", parent.display()), e))?; |
7cac9316 | 419 | |
8faf50e0 | 420 | let canon_target = self.validate_inside_dst(&dst, parent)?; |
7cac9316 | 421 | |
a1dfa0c6 | 422 | self.unpack(Some(&canon_target), &file_dst) |
923072b8 | 423 | .map_err(|e| TarError::new(format!("failed to unpack `{}`", file_dst.display()), e))?; |
7cac9316 XL |
424 | |
425 | Ok(true) | |
426 | } | |
427 | ||
a1dfa0c6 XL |
428 | /// Unpack as destination directory `dst`. |
429 | fn unpack_dir(&mut self, dst: &Path) -> io::Result<()> { | |
430 | // If the directory already exists just let it slide | |
74b04a01 XL |
431 | fs::create_dir(dst).or_else(|err| { |
432 | if err.kind() == ErrorKind::AlreadyExists { | |
433 | let prev = fs::metadata(dst); | |
434 | if prev.map(|m| m.is_dir()).unwrap_or(false) { | |
435 | return Ok(()); | |
436 | } | |
437 | } | |
438 | Err(Error::new( | |
a1dfa0c6 XL |
439 | err.kind(), |
440 | format!("{} when creating dir {}", err, dst.display()), | |
74b04a01 XL |
441 | )) |
442 | }) | |
a1dfa0c6 XL |
443 | } |
444 | ||
7cac9316 | 445 | /// Returns access to the header of this entry in the archive. |
74b04a01 | 446 | fn unpack(&mut self, target_base: Option<&Path>, dst: &Path) -> io::Result<Unpacked> { |
7cac9316 | 447 | let kind = self.header.entry_type(); |
a1dfa0c6 | 448 | |
7cac9316 | 449 | if kind.is_dir() { |
74b04a01 XL |
450 | self.unpack_dir(dst)?; |
451 | if let Ok(mode) = self.header.mode() { | |
452 | set_perms(dst, None, mode, self.preserve_permissions)?; | |
453 | } | |
454 | return Ok(Unpacked::__Nonexhaustive); | |
7cac9316 | 455 | } else if kind.is_hard_link() || kind.is_symlink() { |
8faf50e0 | 456 | let src = match self.link_name()? { |
7cac9316 | 457 | Some(name) => name, |
a1dfa0c6 XL |
458 | None => { |
459 | return Err(other(&format!( | |
460 | "hard link listed for {} but no link name found", | |
461 | String::from_utf8_lossy(self.header.as_bytes()) | |
74b04a01 | 462 | ))); |
a1dfa0c6 | 463 | } |
7cac9316 XL |
464 | }; |
465 | ||
466 | if src.iter().count() == 0 { | |
83c7162d XL |
467 | return Err(other(&format!( |
468 | "symlink destination for {} is empty", | |
469 | String::from_utf8_lossy(self.header.as_bytes()) | |
470 | ))); | |
7cac9316 XL |
471 | } |
472 | ||
74b04a01 | 473 | if kind.is_hard_link() { |
ff7c6d11 | 474 | let link_src = match target_base { |
8faf50e0 XL |
475 | // If we're unpacking within a directory then ensure that |
476 | // the destination of this hard link is both present and | |
477 | // inside our own directory. This is needed because we want | |
478 | // to make sure to not overwrite anything outside the root. | |
479 | // | |
480 | // Note that this logic is only needed for hard links | |
481 | // currently. With symlinks the `validate_inside_dst` which | |
482 | // happens before this method as part of `unpack_in` will | |
483 | // use canonicalization to ensure this guarantee. For hard | |
484 | // links though they're canonicalized to their existing path | |
485 | // so we need to validate at this time. | |
486 | Some(ref p) => { | |
487 | let link_src = p.join(src); | |
488 | self.validate_inside_dst(p, &link_src)?; | |
489 | link_src | |
490 | } | |
ff7c6d11 | 491 | None => src.into_owned(), |
ff7c6d11 | 492 | }; |
a1dfa0c6 XL |
493 | fs::hard_link(&link_src, dst).map_err(|err| { |
494 | Error::new( | |
495 | err.kind(), | |
496 | format!( | |
497 | "{} when hard linking {} to {}", | |
498 | err, | |
499 | link_src.display(), | |
500 | dst.display() | |
501 | ), | |
83c7162d | 502 | ) |
74b04a01 | 503 | })?; |
7cac9316 | 504 | } else { |
17df50a5 XL |
505 | symlink(&src, dst) |
506 | .or_else(|err_io| { | |
507 | if err_io.kind() == io::ErrorKind::AlreadyExists && self.overwrite { | |
508 | // remove dest and try once more | |
509 | std::fs::remove_file(dst).and_then(|()| symlink(&src, dst)) | |
510 | } else { | |
511 | Err(err_io) | |
512 | } | |
513 | }) | |
514 | .map_err(|err| { | |
515 | Error::new( | |
516 | err.kind(), | |
517 | format!( | |
518 | "{} when symlinking {} to {}", | |
519 | err, | |
520 | src.display(), | |
521 | dst.display() | |
522 | ), | |
523 | ) | |
524 | })?; | |
7cac9316 | 525 | }; |
74b04a01 XL |
526 | return Ok(Unpacked::__Nonexhaustive); |
527 | ||
528 | #[cfg(target_arch = "wasm32")] | |
529 | #[allow(unused_variables)] | |
530 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { | |
531 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) | |
532 | } | |
7cac9316 XL |
533 | |
534 | #[cfg(windows)] | |
535 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { | |
536 | ::std::os::windows::fs::symlink_file(src, dst) | |
537 | } | |
83c7162d | 538 | |
3dfed10e | 539 | #[cfg(unix)] |
7cac9316 XL |
540 | fn symlink(src: &Path, dst: &Path) -> io::Result<()> { |
541 | ::std::os::unix::fs::symlink(src, dst) | |
542 | } | |
a1dfa0c6 XL |
543 | } else if kind.is_pax_global_extensions() |
544 | || kind.is_pax_local_extensions() | |
545 | || kind.is_gnu_longname() | |
546 | || kind.is_gnu_longlink() | |
547 | { | |
74b04a01 | 548 | return Ok(Unpacked::__Nonexhaustive); |
7cac9316 XL |
549 | }; |
550 | ||
a1dfa0c6 XL |
551 | // Old BSD-tar compatibility. |
552 | // Names that have a trailing slash should be treated as a directory. | |
553 | // Only applies to old headers. | |
74b04a01 XL |
554 | if self.header.as_ustar().is_none() && self.path_bytes().ends_with(b"/") { |
555 | self.unpack_dir(dst)?; | |
556 | if let Ok(mode) = self.header.mode() { | |
557 | set_perms(dst, None, mode, self.preserve_permissions)?; | |
558 | } | |
559 | return Ok(Unpacked::__Nonexhaustive); | |
a1dfa0c6 XL |
560 | } |
561 | ||
7cac9316 XL |
562 | // Note the lack of `else` clause above. According to the FreeBSD |
563 | // documentation: | |
564 | // | |
565 | // > A POSIX-compliant implementation must treat any unrecognized | |
566 | // > typeflag value as a regular file. | |
567 | // | |
568 | // As a result if we don't recognize the kind we just write out the file | |
569 | // as we would normally. | |
570 | ||
74b04a01 XL |
571 | // Ensure we write a new file rather than overwriting in-place which |
572 | // is attackable; if an existing file is found unlink it. | |
573 | fn open(dst: &Path) -> io::Result<std::fs::File> { | |
574 | OpenOptions::new().write(true).create_new(true).open(dst) | |
17df50a5 | 575 | } |
74b04a01 XL |
576 | let mut f = (|| -> io::Result<std::fs::File> { |
577 | let mut f = open(dst).or_else(|err| { | |
578 | if err.kind() != ErrorKind::AlreadyExists { | |
579 | Err(err) | |
17df50a5 | 580 | } else if self.overwrite { |
74b04a01 XL |
581 | match fs::remove_file(dst) { |
582 | Ok(()) => open(dst), | |
583 | Err(ref e) if e.kind() == io::ErrorKind::NotFound => open(dst), | |
584 | Err(e) => Err(e), | |
585 | } | |
17df50a5 XL |
586 | } else { |
587 | Err(err) | |
74b04a01 XL |
588 | } |
589 | })?; | |
7cac9316 XL |
590 | for io in self.data.drain(..) { |
591 | match io { | |
592 | EntryIo::Data(mut d) => { | |
593 | let expected = d.limit(); | |
8faf50e0 | 594 | if io::copy(&mut d, &mut f)? != expected { |
7cac9316 XL |
595 | return Err(other("failed to write entire file")); |
596 | } | |
597 | } | |
598 | EntryIo::Pad(d) => { | |
599 | // TODO: checked cast to i64 | |
600 | let to = SeekFrom::Current(d.limit() as i64); | |
8faf50e0 XL |
601 | let size = f.seek(to)?; |
602 | f.set_len(size)?; | |
7cac9316 XL |
603 | } |
604 | } | |
605 | } | |
74b04a01 XL |
606 | Ok(f) |
607 | })() | |
608 | .map_err(|e| { | |
7cac9316 | 609 | let header = self.header.path_bytes(); |
a1dfa0c6 | 610 | TarError::new( |
923072b8 | 611 | format!( |
a1dfa0c6 XL |
612 | "failed to unpack `{}` into `{}`", |
613 | String::from_utf8_lossy(&header), | |
614 | dst.display() | |
615 | ), | |
616 | e, | |
617 | ) | |
8faf50e0 | 618 | })?; |
7cac9316 | 619 | |
a1dfa0c6 XL |
620 | if self.preserve_mtime { |
621 | if let Ok(mtime) = self.header.mtime() { | |
17df50a5 XL |
622 | // For some more information on this see the comments in |
623 | // `Header::fill_platform_from`, but the general idea is that | |
624 | // we're trying to avoid 0-mtime files coming out of archives | |
625 | // since some tools don't ingest them well. Perhaps one day | |
626 | // when Cargo stops working with 0-mtime archives we can remove | |
627 | // this. | |
628 | let mtime = if mtime == 0 { 1 } else { mtime }; | |
a1dfa0c6 | 629 | let mtime = FileTime::from_unix_time(mtime as i64, 0); |
74b04a01 | 630 | filetime::set_file_handle_times(&f, Some(mtime), Some(mtime)).map_err(|e| { |
923072b8 | 631 | TarError::new(format!("failed to set mtime for `{}`", dst.display()), e) |
a1dfa0c6 XL |
632 | })?; |
633 | } | |
7cac9316 XL |
634 | } |
635 | if let Ok(mode) = self.header.mode() { | |
74b04a01 XL |
636 | set_perms(dst, Some(&mut f), mode, self.preserve_permissions)?; |
637 | } | |
638 | if self.unpack_xattrs { | |
639 | set_xattrs(self, dst)?; | |
640 | } | |
641 | return Ok(Unpacked::File(f)); | |
642 | ||
643 | fn set_perms( | |
644 | dst: &Path, | |
645 | f: Option<&mut std::fs::File>, | |
646 | mode: u32, | |
647 | preserve: bool, | |
648 | ) -> Result<(), TarError> { | |
649 | _set_perms(dst, f, mode, preserve).map_err(|e| { | |
a1dfa0c6 | 650 | TarError::new( |
923072b8 | 651 | format!( |
a1dfa0c6 XL |
652 | "failed to set permissions to {:o} \ |
653 | for `{}`", | |
654 | mode, | |
655 | dst.display() | |
656 | ), | |
657 | e, | |
658 | ) | |
74b04a01 | 659 | }) |
7cac9316 | 660 | } |
7cac9316 | 661 | |
3dfed10e | 662 | #[cfg(unix)] |
74b04a01 XL |
663 | fn _set_perms( |
664 | dst: &Path, | |
665 | f: Option<&mut std::fs::File>, | |
666 | mode: u32, | |
667 | preserve: bool, | |
668 | ) -> io::Result<()> { | |
7cac9316 XL |
669 | use std::os::unix::prelude::*; |
670 | ||
a1dfa0c6 | 671 | let mode = if preserve { mode } else { mode & 0o777 }; |
ff7c6d11 | 672 | let perm = fs::Permissions::from_mode(mode as _); |
74b04a01 XL |
673 | match f { |
674 | Some(f) => f.set_permissions(perm), | |
675 | None => fs::set_permissions(dst, perm), | |
676 | } | |
7cac9316 | 677 | } |
74b04a01 | 678 | |
7cac9316 | 679 | #[cfg(windows)] |
74b04a01 XL |
680 | fn _set_perms( |
681 | dst: &Path, | |
682 | f: Option<&mut std::fs::File>, | |
683 | mode: u32, | |
684 | _preserve: bool, | |
685 | ) -> io::Result<()> { | |
686 | if mode & 0o200 == 0o200 { | |
687 | return Ok(()); | |
688 | } | |
689 | match f { | |
690 | Some(f) => { | |
691 | let mut perm = f.metadata()?.permissions(); | |
692 | perm.set_readonly(true); | |
693 | f.set_permissions(perm) | |
694 | } | |
695 | None => { | |
696 | let mut perm = fs::metadata(dst)?.permissions(); | |
697 | perm.set_readonly(true); | |
698 | fs::set_permissions(dst, perm) | |
699 | } | |
700 | } | |
701 | } | |
702 | ||
703 | #[cfg(target_arch = "wasm32")] | |
704 | #[allow(unused_variables)] | |
705 | fn _set_perms( | |
706 | dst: &Path, | |
707 | f: Option<&mut std::fs::File>, | |
708 | mode: u32, | |
709 | _preserve: bool, | |
710 | ) -> io::Result<()> { | |
711 | Err(io::Error::new(io::ErrorKind::Other, "Not implemented")) | |
7cac9316 XL |
712 | } |
713 | ||
714 | #[cfg(all(unix, feature = "xattr"))] | |
715 | fn set_xattrs(me: &mut EntryFields, dst: &Path) -> io::Result<()> { | |
7cac9316 | 716 | use std::ffi::OsStr; |
a1dfa0c6 | 717 | use std::os::unix::prelude::*; |
7cac9316 XL |
718 | |
719 | let exts = match me.pax_extensions() { | |
720 | Ok(Some(e)) => e, | |
721 | _ => return Ok(()), | |
722 | }; | |
a1dfa0c6 XL |
723 | let exts = exts |
724 | .filter_map(|e| e.ok()) | |
725 | .filter_map(|e| { | |
726 | let key = e.key_bytes(); | |
727 | let prefix = b"SCHILY.xattr."; | |
728 | if key.starts_with(prefix) { | |
729 | Some((&key[prefix.len()..], e)) | |
730 | } else { | |
731 | None | |
732 | } | |
733 | }) | |
734 | .map(|(key, e)| (OsStr::from_bytes(key), e.value_bytes())); | |
7cac9316 XL |
735 | |
736 | for (key, value) in exts { | |
8faf50e0 | 737 | xattr::set(dst, key, value).map_err(|e| { |
a1dfa0c6 | 738 | TarError::new( |
923072b8 | 739 | format!( |
a1dfa0c6 XL |
740 | "failed to set extended \ |
741 | attributes to {}. \ | |
742 | Xattrs: key={:?}, value={:?}.", | |
743 | dst.display(), | |
744 | key, | |
745 | String::from_utf8_lossy(value) | |
746 | ), | |
747 | e, | |
748 | ) | |
8faf50e0 | 749 | })?; |
7cac9316 XL |
750 | } |
751 | ||
752 | Ok(()) | |
753 | } | |
754 | // Windows does not completely support posix xattrs | |
755 | // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT | |
3dfed10e | 756 | #[cfg(any(windows, not(feature = "xattr"), target_arch = "wasm32"))] |
7cac9316 XL |
757 | fn set_xattrs(_: &mut EntryFields, _: &Path) -> io::Result<()> { |
758 | Ok(()) | |
759 | } | |
760 | } | |
8faf50e0 | 761 | |
94222f64 XL |
762 | fn ensure_dir_created(&self, dst: &Path, dir: &Path) -> io::Result<()> { |
763 | let mut ancestor = dir; | |
764 | let mut dirs_to_create = Vec::new(); | |
765 | while ancestor.symlink_metadata().is_err() { | |
766 | dirs_to_create.push(ancestor); | |
767 | if let Some(parent) = ancestor.parent() { | |
768 | ancestor = parent; | |
769 | } else { | |
770 | break; | |
771 | } | |
772 | } | |
773 | for ancestor in dirs_to_create.into_iter().rev() { | |
774 | if let Some(parent) = ancestor.parent() { | |
775 | self.validate_inside_dst(dst, parent)?; | |
776 | } | |
777 | fs::create_dir_all(ancestor)?; | |
778 | } | |
779 | Ok(()) | |
780 | } | |
781 | ||
8faf50e0 XL |
782 | fn validate_inside_dst(&self, dst: &Path, file_dst: &Path) -> io::Result<PathBuf> { |
783 | // Abort if target (canonical) parent is outside of `dst` | |
784 | let canon_parent = file_dst.canonicalize().map_err(|err| { | |
785 | Error::new( | |
786 | err.kind(), | |
787 | format!("{} while canonicalizing {}", err, file_dst.display()), | |
788 | ) | |
789 | })?; | |
790 | let canon_target = dst.canonicalize().map_err(|err| { | |
791 | Error::new( | |
792 | err.kind(), | |
793 | format!("{} while canonicalizing {}", err, dst.display()), | |
794 | ) | |
795 | })?; | |
796 | if !canon_parent.starts_with(&canon_target) { | |
797 | let err = TarError::new( | |
923072b8 | 798 | format!( |
8faf50e0 XL |
799 | "trying to unpack outside of destination path: {}", |
800 | canon_target.display() | |
801 | ), | |
802 | // TODO: use ErrorKind::InvalidInput here? (minor breaking change) | |
803 | Error::new(ErrorKind::Other, "Invalid argument"), | |
804 | ); | |
805 | return Err(err.into()); | |
806 | } | |
807 | Ok(canon_target) | |
808 | } | |
7cac9316 XL |
809 | } |
810 | ||
811 | impl<'a> Read for EntryFields<'a> { | |
812 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { | |
813 | loop { | |
814 | match self.data.get_mut(0).map(|io| io.read(into)) { | |
a1dfa0c6 XL |
815 | Some(Ok(0)) => { |
816 | self.data.remove(0); | |
817 | } | |
7cac9316 XL |
818 | Some(r) => return r, |
819 | None => return Ok(0), | |
820 | } | |
821 | } | |
822 | } | |
823 | } | |
824 | ||
825 | impl<'a> Read for EntryIo<'a> { | |
826 | fn read(&mut self, into: &mut [u8]) -> io::Result<usize> { | |
827 | match *self { | |
828 | EntryIo::Pad(ref mut io) => io.read(into), | |
829 | EntryIo::Data(ref mut io) => io.read(into), | |
830 | } | |
831 | } | |
832 | } |