4 use std
::fs
::OpenOptions
;
5 use std
::io
::prelude
::*;
6 use std
::io
::{self, Error, ErrorKind, SeekFrom}
;
8 use std
::path
::{Component, Path, PathBuf}
;
10 use filetime
::{self, FileTime}
;
12 use crate::archive
::ArchiveInner
;
13 use crate::error
::TarError
;
14 use crate::header
::bytes2path
;
16 use crate::{Archive, Header, PaxExtensions}
;
18 /// A read-only view into an entry of an archive.
20 /// This structure is a window into a portion of a borrowed archive which can
21 /// be inspected. It acts as a file handle by implementing the Reader trait. An
22 /// entry cannot be rewritten once inserted into an archive.
23 pub struct Entry
<'a
, R
: 'a
+ Read
> {
24 fields
: EntryFields
<'a
>,
25 _ignored
: marker
::PhantomData
<&'a Archive
<R
>>,
28 // private implementation detail of `Entry`, but concrete (no type parameters)
29 // and also all-public to be constructed from other modules.
30 pub struct EntryFields
<'a
> {
31 pub long_pathname
: Option
<Vec
<u8>>,
32 pub long_linkname
: Option
<Vec
<u8>>,
33 pub pax_extensions
: Option
<Vec
<u8>>,
38 pub data
: Vec
<EntryIo
<'a
>>,
39 pub unpack_xattrs
: bool
,
40 pub preserve_permissions
: bool
,
41 pub preserve_mtime
: bool
,
45 pub enum EntryIo
<'a
> {
46 Pad(io
::Take
<io
::Repeat
>),
47 Data(io
::Take
<&'a ArchiveInner
<dyn Read
+ 'a
>>),
50 /// When unpacking items the unpacked thing is returned to allow custom
51 /// additional handling by users. Today the File is returned, in future
52 /// the enum may be extended with kinds for links, directories etc.
55 /// A file was unpacked.
57 /// A directory, hardlink, symlink, or other node was unpacked.
62 impl<'a
, R
: Read
> Entry
<'a
, R
> {
63 /// Returns the path name for this entry.
65 /// This method may fail if the pathname is not valid Unicode and this is
66 /// called on a Windows platform.
68 /// Note that this function will convert any `\` characters to directory
69 /// separators, and it will not always return the same value as
70 /// `self.header().path()` as some archive formats have support for longer
71 /// path names described in separate entries.
73 /// It is recommended to use this method instead of inspecting the `header`
74 /// directly to ensure that various archive formats are handled correctly.
75 pub fn path(&self) -> io
::Result
<Cow
<Path
>> {
79 /// Returns the raw bytes listed for this entry.
81 /// Note that this function will convert any `\` characters to directory
82 /// separators, and it will not always return the same value as
83 /// `self.header().path_bytes()` as some archive formats have support for
84 /// longer path names described in separate entries.
85 pub fn path_bytes(&self) -> Cow
<[u8]> {
86 self.fields
.path_bytes()
89 /// Returns the link name for this entry, if any is found.
91 /// This method may fail if the pathname is not valid Unicode and this is
92 /// called on a Windows platform. `Ok(None)` being returned, however,
93 /// indicates that the link name was not present.
95 /// Note that this function will convert any `\` characters to directory
96 /// separators, and it will not always return the same value as
97 /// `self.header().link_name()` as some archive formats have support for
98 /// longer path names described in separate entries.
100 /// It is recommended to use this method instead of inspecting the `header`
101 /// directly to ensure that various archive formats are handled correctly.
102 pub fn link_name(&self) -> io
::Result
<Option
<Cow
<Path
>>> {
103 self.fields
.link_name()
106 /// Returns the link name for this entry, in bytes, if listed.
108 /// Note that this will not always return the same value as
109 /// `self.header().link_name_bytes()` as some archive formats have support for
110 /// longer path names described in separate entries.
111 pub fn link_name_bytes(&self) -> Option
<Cow
<[u8]>> {
112 self.fields
.link_name_bytes()
115 /// Returns an iterator over the pax extensions contained in this entry.
117 /// Pax extensions are a form of archive where extra metadata is stored in
118 /// key/value pairs in entries before the entry they're intended to
119 /// describe. For example this can be used to describe long file name or
120 /// other metadata like atime/ctime/mtime in more precision.
122 /// The returned iterator will yield key/value pairs for each extension.
124 /// `None` will be returned if this entry does not indicate that it itself
125 /// contains extensions, or if there were no previous extensions describing
128 /// Note that global pax extensions are intended to be applied to all
131 /// Also note that this function will read the entire entry if the entry
132 /// itself is a list of extensions.
133 pub fn pax_extensions(&mut self) -> io
::Result
<Option
<PaxExtensions
>> {
134 self.fields
.pax_extensions()
137 /// Returns access to the header of this entry in the archive.
139 /// This provides access to the metadata for this entry in the archive.
140 pub fn header(&self) -> &Header
{
144 /// Returns access to the size of this entry in the archive.
146 /// In the event the size is stored in a pax extension, that size value
147 /// will be referenced. Otherwise, the entry size will be stored in the header.
148 pub fn size(&self) -> u64 {
152 /// Returns the starting position, in bytes, of the header of this entry in
155 /// The header is always a contiguous section of 512 bytes, so if the
156 /// underlying reader implements `Seek`, then the slice from `header_pos` to
157 /// `header_pos + 512` contains the raw header bytes.
158 pub fn raw_header_position(&self) -> u64 {
159 self.fields
.header_pos
162 /// Returns the starting position, in bytes, of the file of this entry in
165 /// If the file of this entry is continuous (e.g. not a sparse file), and
166 /// if the underlying reader implements `Seek`, then the slice from
167 /// `file_pos` to `file_pos + entry_size` contains the raw file bytes.
168 pub fn raw_file_position(&self) -> u64 {
172 /// Writes this file to the specified location.
174 /// This function will write the entire contents of this file into the
175 /// location specified by `dst`. Metadata will also be propagated to the
178 /// This function will create a file at the path `dst`, and it is required
179 /// that the intermediate directories are created. Any existing file at the
180 /// location `dst` will be overwritten.
182 /// > **Note**: This function does not have as many sanity checks as
183 /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're
184 /// > thinking of unpacking untrusted tarballs you may want to review the
185 /// > implementations of the previous two functions and perhaps implement
186 /// > similar logic yourself.
191 /// use std::fs::File;
192 /// use tar::Archive;
194 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
196 /// for (i, file) in ar.entries().unwrap().enumerate() {
197 /// let mut file = file.unwrap();
198 /// file.unpack(format!("file-{}", i)).unwrap();
201 pub fn unpack
<P
: AsRef
<Path
>>(&mut self, dst
: P
) -> io
::Result
<Unpacked
> {
202 self.fields
.unpack(None
, dst
.as_ref())
205 /// Extracts this file under the specified path, avoiding security issues.
207 /// This function will write the entire contents of this file into the
208 /// location obtained by appending the path of this file in the archive to
209 /// `dst`, creating any intermediate directories if needed. Metadata will
210 /// also be propagated to the path `dst`. Any existing file at the location
211 /// `dst` will be overwritten.
213 /// This function carefully avoids writing outside of `dst`. If the file has
214 /// a '..' in its path, this function will skip it and return false.
219 /// use std::fs::File;
220 /// use tar::Archive;
222 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
224 /// for (i, file) in ar.entries().unwrap().enumerate() {
225 /// let mut file = file.unwrap();
226 /// file.unpack_in("target").unwrap();
229 pub fn unpack_in
<P
: AsRef
<Path
>>(&mut self, dst
: P
) -> io
::Result
<bool
> {
230 self.fields
.unpack_in(dst
.as_ref())
233 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
234 /// when unpacking this entry.
236 /// This flag is disabled by default and is currently only implemented on
237 /// Unix using xattr support. This may eventually be implemented for
238 /// Windows, however, if other archive implementations are found which do
240 pub fn set_unpack_xattrs(&mut self, unpack_xattrs
: bool
) {
241 self.fields
.unpack_xattrs
= unpack_xattrs
;
244 /// Indicate whether extended permissions (like suid on Unix) are preserved
245 /// when unpacking this entry.
247 /// This flag is disabled by default and is currently only implemented on
249 pub fn set_preserve_permissions(&mut self, preserve
: bool
) {
250 self.fields
.preserve_permissions
= preserve
;
253 /// Indicate whether access time information is preserved when unpacking
256 /// This flag is enabled by default.
257 pub fn set_preserve_mtime(&mut self, preserve
: bool
) {
258 self.fields
.preserve_mtime
= preserve
;
262 impl<'a
, R
: Read
> Read
for Entry
<'a
, R
> {
263 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
264 self.fields
.read(into
)
268 impl<'a
> EntryFields
<'a
> {
269 pub fn from
<R
: Read
>(entry
: Entry
<R
>) -> EntryFields
{
273 pub fn into_entry
<R
: Read
>(self) -> Entry
<'a
, R
> {
276 _ignored
: marker
::PhantomData
,
280 pub fn read_all(&mut self) -> io
::Result
<Vec
<u8>> {
281 // Preallocate some data but don't let ourselves get too crazy now.
282 let cap
= cmp
::min(self.size
, 128 * 1024);
283 let mut v
= Vec
::with_capacity(cap
as usize);
284 self.read_to_end(&mut v
).map(|_
| v
)
287 fn path(&self) -> io
::Result
<Cow
<Path
>> {
288 bytes2path(self.path_bytes())
291 fn path_bytes(&self) -> Cow
<[u8]> {
292 match self.long_pathname
{
294 if let Some(&0) = bytes
.last() {
295 Cow
::Borrowed(&bytes
[..bytes
.len() - 1])
301 if let Some(ref pax
) = self.pax_extensions
{
302 let pax
= PaxExtensions
::new(pax
)
303 .filter_map(|f
| f
.ok())
304 .find(|f
| f
.key_bytes() == b
"path")
305 .map(|f
| f
.value_bytes());
306 if let Some(field
) = pax
{
307 return Cow
::Borrowed(field
);
310 self.header
.path_bytes()
315 /// Gets the path in a "lossy" way, used for error reporting ONLY.
316 fn path_lossy(&self) -> String
{
317 String
::from_utf8_lossy(&self.path_bytes()).to_string()
320 fn link_name(&self) -> io
::Result
<Option
<Cow
<Path
>>> {
321 match self.link_name_bytes() {
322 Some(bytes
) => bytes2path(bytes
).map(Some
),
327 fn link_name_bytes(&self) -> Option
<Cow
<[u8]>> {
328 match self.long_linkname
{
330 if let Some(&0) = bytes
.last() {
331 Some(Cow
::Borrowed(&bytes
[..bytes
.len() - 1]))
333 Some(Cow
::Borrowed(bytes
))
337 if let Some(ref pax
) = self.pax_extensions
{
338 let pax
= PaxExtensions
::new(pax
)
339 .filter_map(|f
| f
.ok())
340 .find(|f
| f
.key_bytes() == b
"linkpath")
341 .map(|f
| f
.value_bytes());
342 if let Some(field
) = pax
{
343 return Some(Cow
::Borrowed(field
));
346 self.header
.link_name_bytes()
351 fn pax_extensions(&mut self) -> io
::Result
<Option
<PaxExtensions
>> {
352 if self.pax_extensions
.is_none() {
353 if !self.header
.entry_type().is_pax_global_extensions()
354 && !self.header
.entry_type().is_pax_local_extensions()
358 self.pax_extensions
= Some(self.read_all()?
);
360 Ok(Some(PaxExtensions
::new(
361 self.pax_extensions
.as_ref().unwrap(),
365 fn unpack_in(&mut self, dst
: &Path
) -> io
::Result
<bool
> {
366 // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
367 // * Leading '/'s are trimmed. For example, `///test` is treated as
369 // * If the filename contains '..', then the file is skipped when
370 // extracting the tarball.
371 // * '//' within a filename is effectively skipped. An error is
372 // logged, but otherwise the effect is as if any two or more
373 // adjacent '/'s within the filename were consolidated into one
376 // Most of this is handled by the `path` module of the standard
377 // library, but we specially handle a few cases here as well.
379 let mut file_dst
= dst
.to_path_buf();
381 let path
= self.path().map_err(|e
| {
383 format
!("invalid path in entry header: {}", self.path_lossy()),
387 for part
in path
.components() {
389 // Leading '/' characters, root paths, and '.'
390 // components are just ignored and treated as "empty
392 Component
::Prefix(..) | Component
::RootDir
| Component
::CurDir
=> continue,
394 // If any part of the filename is '..', then skip over
395 // unpacking the file to prevent directory traversal
396 // security issues. See, e.g.: CVE-2001-1267,
397 // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
398 Component
::ParentDir
=> return Ok(false),
400 Component
::Normal(part
) => file_dst
.push(part
),
405 // Skip cases where only slashes or '.' parts were seen, because
406 // this is effectively an empty filename.
407 if *dst
== *file_dst
{
411 // Skip entries without a parent (i.e. outside of FS root)
412 let parent
= match file_dst
.parent() {
414 None
=> return Ok(false),
417 self.ensure_dir_created(&dst
, parent
)
418 .map_err(|e
| TarError
::new(format
!("failed to create `{}`", parent
.display()), e
))?
;
420 let canon_target
= self.validate_inside_dst(&dst
, parent
)?
;
422 self.unpack(Some(&canon_target
), &file_dst
)
423 .map_err(|e
| TarError
::new(format
!("failed to unpack `{}`", file_dst
.display()), e
))?
;
428 /// Unpack as destination directory `dst`.
429 fn unpack_dir(&mut self, dst
: &Path
) -> io
::Result
<()> {
430 // If the directory already exists just let it slide
431 fs
::create_dir(dst
).or_else(|err
| {
432 if err
.kind() == ErrorKind
::AlreadyExists
{
433 let prev
= fs
::metadata(dst
);
434 if prev
.map(|m
| m
.is_dir()).unwrap_or(false) {
440 format
!("{} when creating dir {}", err
, dst
.display()),
445 /// Returns access to the header of this entry in the archive.
446 fn unpack(&mut self, target_base
: Option
<&Path
>, dst
: &Path
) -> io
::Result
<Unpacked
> {
447 let kind
= self.header
.entry_type();
450 self.unpack_dir(dst
)?
;
451 if let Ok(mode
) = self.header
.mode() {
452 set_perms(dst
, None
, mode
, self.preserve_permissions
)?
;
454 return Ok(Unpacked
::__Nonexhaustive
);
455 } else if kind
.is_hard_link() || kind
.is_symlink() {
456 let src
= match self.link_name()?
{
459 return Err(other(&format
!(
460 "hard link listed for {} but no link name found",
461 String
::from_utf8_lossy(self.header
.as_bytes())
466 if src
.iter().count() == 0 {
467 return Err(other(&format
!(
468 "symlink destination for {} is empty",
469 String
::from_utf8_lossy(self.header
.as_bytes())
473 if kind
.is_hard_link() {
474 let link_src
= match target_base
{
475 // If we're unpacking within a directory then ensure that
476 // the destination of this hard link is both present and
477 // inside our own directory. This is needed because we want
478 // to make sure to not overwrite anything outside the root.
480 // Note that this logic is only needed for hard links
481 // currently. With symlinks the `validate_inside_dst` which
482 // happens before this method as part of `unpack_in` will
483 // use canonicalization to ensure this guarantee. For hard
484 // links though they're canonicalized to their existing path
485 // so we need to validate at this time.
487 let link_src
= p
.join(src
);
488 self.validate_inside_dst(p
, &link_src
)?
;
491 None
=> src
.into_owned(),
493 fs
::hard_link(&link_src
, dst
).map_err(|err
| {
497 "{} when hard linking {} to {}",
507 if err_io
.kind() == io
::ErrorKind
::AlreadyExists
&& self.overwrite
{
508 // remove dest and try once more
509 std
::fs
::remove_file(dst
).and_then(|()| symlink(&src
, dst
))
518 "{} when symlinking {} to {}",
526 return Ok(Unpacked
::__Nonexhaustive
);
528 #[cfg(target_arch = "wasm32")]
529 #[allow(unused_variables)]
530 fn symlink(src
: &Path
, dst
: &Path
) -> io
::Result
<()> {
531 Err(io
::Error
::new(io
::ErrorKind
::Other
, "Not implemented"))
535 fn symlink(src
: &Path
, dst
: &Path
) -> io
::Result
<()> {
536 ::std
::os
::windows
::fs
::symlink_file(src
, dst
)
540 fn symlink(src
: &Path
, dst
: &Path
) -> io
::Result
<()> {
541 ::std
::os
::unix
::fs
::symlink(src
, dst
)
543 } else if kind
.is_pax_global_extensions()
544 || kind
.is_pax_local_extensions()
545 || kind
.is_gnu_longname()
546 || kind
.is_gnu_longlink()
548 return Ok(Unpacked
::__Nonexhaustive
);
551 // Old BSD-tar compatibility.
552 // Names that have a trailing slash should be treated as a directory.
553 // Only applies to old headers.
554 if self.header
.as_ustar().is_none() && self.path_bytes().ends_with(b
"/") {
555 self.unpack_dir(dst
)?
;
556 if let Ok(mode
) = self.header
.mode() {
557 set_perms(dst
, None
, mode
, self.preserve_permissions
)?
;
559 return Ok(Unpacked
::__Nonexhaustive
);
562 // Note the lack of `else` clause above. According to the FreeBSD
565 // > A POSIX-compliant implementation must treat any unrecognized
566 // > typeflag value as a regular file.
568 // As a result if we don't recognize the kind we just write out the file
569 // as we would normally.
571 // Ensure we write a new file rather than overwriting in-place which
572 // is attackable; if an existing file is found unlink it.
573 fn open(dst
: &Path
) -> io
::Result
<std
::fs
::File
> {
574 OpenOptions
::new().write(true).create_new(true).open(dst
)
576 let mut f
= (|| -> io
::Result
<std
::fs
::File
> {
577 let mut f
= open(dst
).or_else(|err
| {
578 if err
.kind() != ErrorKind
::AlreadyExists
{
580 } else if self.overwrite
{
581 match fs
::remove_file(dst
) {
583 Err(ref e
) if e
.kind() == io
::ErrorKind
::NotFound
=> open(dst
),
590 for io
in self.data
.drain(..) {
592 EntryIo
::Data(mut d
) => {
593 let expected
= d
.limit();
594 if io
::copy(&mut d
, &mut f
)?
!= expected
{
595 return Err(other("failed to write entire file"));
599 // TODO: checked cast to i64
600 let to
= SeekFrom
::Current(d
.limit() as i64);
601 let size
= f
.seek(to
)?
;
609 let header
= self.header
.path_bytes();
612 "failed to unpack `{}` into `{}`",
613 String
::from_utf8_lossy(&header
),
620 if self.preserve_mtime
{
621 if let Ok(mtime
) = self.header
.mtime() {
622 // For some more information on this see the comments in
623 // `Header::fill_platform_from`, but the general idea is that
624 // we're trying to avoid 0-mtime files coming out of archives
625 // since some tools don't ingest them well. Perhaps one day
626 // when Cargo stops working with 0-mtime archives we can remove
628 let mtime
= if mtime
== 0 { 1 }
else { mtime }
;
629 let mtime
= FileTime
::from_unix_time(mtime
as i64, 0);
630 filetime
::set_file_handle_times(&f
, Some(mtime
), Some(mtime
)).map_err(|e
| {
631 TarError
::new(format
!("failed to set mtime for `{}`", dst
.display()), e
)
635 if let Ok(mode
) = self.header
.mode() {
636 set_perms(dst
, Some(&mut f
), mode
, self.preserve_permissions
)?
;
638 if self.unpack_xattrs
{
639 set_xattrs(self, dst
)?
;
641 return Ok(Unpacked
::File(f
));
645 f
: Option
<&mut std
::fs
::File
>,
648 ) -> Result
<(), TarError
> {
649 _set_perms(dst
, f
, mode
, preserve
).map_err(|e
| {
652 "failed to set permissions to {:o} \
665 f
: Option
<&mut std
::fs
::File
>,
668 ) -> io
::Result
<()> {
669 use std
::os
::unix
::prelude
::*;
671 let mode
= if preserve { mode }
else { mode & 0o777 }
;
672 let perm
= fs
::Permissions
::from_mode(mode
as _
);
674 Some(f
) => f
.set_permissions(perm
),
675 None
=> fs
::set_permissions(dst
, perm
),
682 f
: Option
<&mut std
::fs
::File
>,
685 ) -> io
::Result
<()> {
686 if mode
& 0o200 == 0o200 {
691 let mut perm
= f
.metadata()?
.permissions();
692 perm
.set_readonly(true);
693 f
.set_permissions(perm
)
696 let mut perm
= fs
::metadata(dst
)?
.permissions();
697 perm
.set_readonly(true);
698 fs
::set_permissions(dst
, perm
)
703 #[cfg(target_arch = "wasm32")]
704 #[allow(unused_variables)]
707 f
: Option
<&mut std
::fs
::File
>,
710 ) -> io
::Result
<()> {
711 Err(io
::Error
::new(io
::ErrorKind
::Other
, "Not implemented"))
714 #[cfg(all(unix, feature = "xattr"))]
715 fn set_xattrs(me
: &mut EntryFields
, dst
: &Path
) -> io
::Result
<()> {
717 use std
::os
::unix
::prelude
::*;
719 let exts
= match me
.pax_extensions() {
724 .filter_map(|e
| e
.ok())
726 let key
= e
.key_bytes();
727 let prefix
= b
"SCHILY.xattr.";
728 if key
.starts_with(prefix
) {
729 Some((&key
[prefix
.len()..], e
))
734 .map(|(key
, e
)| (OsStr
::from_bytes(key
), e
.value_bytes()));
736 for (key
, value
) in exts
{
737 xattr
::set(dst
, key
, value
).map_err(|e
| {
740 "failed to set extended \
742 Xattrs: key={:?}, value={:?}.",
745 String
::from_utf8_lossy(value
)
754 // Windows does not completely support posix xattrs
755 // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT
756 #[cfg(any(windows, not(feature = "xattr"), target_arch = "wasm32"))]
757 fn set_xattrs(_
: &mut EntryFields
, _
: &Path
) -> io
::Result
<()> {
762 fn ensure_dir_created(&self, dst
: &Path
, dir
: &Path
) -> io
::Result
<()> {
763 let mut ancestor
= dir
;
764 let mut dirs_to_create
= Vec
::new();
765 while ancestor
.symlink_metadata().is_err() {
766 dirs_to_create
.push(ancestor
);
767 if let Some(parent
) = ancestor
.parent() {
773 for ancestor
in dirs_to_create
.into_iter().rev() {
774 if let Some(parent
) = ancestor
.parent() {
775 self.validate_inside_dst(dst
, parent
)?
;
777 fs
::create_dir_all(ancestor
)?
;
782 fn validate_inside_dst(&self, dst
: &Path
, file_dst
: &Path
) -> io
::Result
<PathBuf
> {
783 // Abort if target (canonical) parent is outside of `dst`
784 let canon_parent
= file_dst
.canonicalize().map_err(|err
| {
787 format
!("{} while canonicalizing {}", err
, file_dst
.display()),
790 let canon_target
= dst
.canonicalize().map_err(|err
| {
793 format
!("{} while canonicalizing {}", err
, dst
.display()),
796 if !canon_parent
.starts_with(&canon_target
) {
797 let err
= TarError
::new(
799 "trying to unpack outside of destination path: {}",
800 canon_target
.display()
802 // TODO: use ErrorKind::InvalidInput here? (minor breaking change)
803 Error
::new(ErrorKind
::Other
, "Invalid argument"),
805 return Err(err
.into());
811 impl<'a
> Read
for EntryFields
<'a
> {
812 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
814 match self.data
.get_mut(0).map(|io
| io
.read(into
)) {
819 None
=> return Ok(0),
825 impl<'a
> Read
for EntryIo
<'a
> {
826 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
828 EntryIo
::Pad(ref mut io
) => io
.read(into
),
829 EntryIo
::Data(ref mut io
) => io
.read(into
),