4 use std
::fs
::OpenOptions
;
5 use std
::io
::prelude
::*;
6 use std
::io
::{self, Error, ErrorKind, SeekFrom}
;
8 use std
::path
::{Component, Path, PathBuf}
;
10 use filetime
::{self, FileTime}
;
12 use crate::archive
::ArchiveInner
;
13 use crate::error
::TarError
;
14 use crate::header
::bytes2path
;
16 use crate::pax
::pax_extensions
;
17 use crate::{Archive, Header, PaxExtensions}
;
19 /// A read-only view into an entry of an archive.
21 /// This structure is a window into a portion of a borrowed archive which can
22 /// be inspected. It acts as a file handle by implementing the Reader trait. An
23 /// entry cannot be rewritten once inserted into an archive.
24 pub struct Entry
<'a
, R
: 'a
+ Read
> {
25 fields
: EntryFields
<'a
>,
26 _ignored
: marker
::PhantomData
<&'a Archive
<R
>>,
29 // private implementation detail of `Entry`, but concrete (no type parameters)
30 // and also all-public to be constructed from other modules.
31 pub struct EntryFields
<'a
> {
32 pub long_pathname
: Option
<Vec
<u8>>,
33 pub long_linkname
: Option
<Vec
<u8>>,
34 pub pax_extensions
: Option
<Vec
<u8>>,
39 pub data
: Vec
<EntryIo
<'a
>>,
40 pub unpack_xattrs
: bool
,
41 pub preserve_permissions
: bool
,
42 pub preserve_mtime
: bool
,
45 pub enum EntryIo
<'a
> {
46 Pad(io
::Take
<io
::Repeat
>),
47 Data(io
::Take
<&'a ArchiveInner
<Read
+ 'a
>>),
50 /// When unpacking items the unpacked thing is returned to allow custom
51 /// additional handling by users. Today the File is returned, in future
52 /// the enum may be extended with kinds for links, directories etc.
55 /// A file was unpacked.
57 /// A directory, hardlink, symlink, or other node was unpacked.
62 impl<'a
, R
: Read
> Entry
<'a
, R
> {
63 /// Returns the path name for this entry.
65 /// This method may fail if the pathname is not valid unicode and this is
66 /// called on a Windows platform.
68 /// Note that this function will convert any `\` characters to directory
69 /// separators, and it will not always return the same value as
70 /// `self.header().path()` as some archive formats have support for longer
71 /// path names described in separate entries.
73 /// It is recommended to use this method instead of inspecting the `header`
74 /// directly to ensure that various archive formats are handled correctly.
75 pub fn path(&self) -> io
::Result
<Cow
<Path
>> {
79 /// Returns the raw bytes listed for this entry.
81 /// Note that this function will convert any `\` characters to directory
82 /// separators, and it will not always return the same value as
83 /// `self.header().path_bytes()` as some archive formats have support for
84 /// longer path names described in separate entries.
85 pub fn path_bytes(&self) -> Cow
<[u8]> {
86 self.fields
.path_bytes()
89 /// Returns the link name for this entry, if any is found.
91 /// This method may fail if the pathname is not valid unicode and this is
92 /// called on a Windows platform. `Ok(None)` being returned, however,
93 /// indicates that the link name was not present.
95 /// Note that this function will convert any `\` characters to directory
96 /// separators, and it will not always return the same value as
97 /// `self.header().link_name()` as some archive formats have support for
98 /// longer path names described in separate entries.
100 /// It is recommended to use this method instead of inspecting the `header`
101 /// directly to ensure that various archive formats are handled correctly.
102 pub fn link_name(&self) -> io
::Result
<Option
<Cow
<Path
>>> {
103 self.fields
.link_name()
106 /// Returns the link name for this entry, in bytes, if listed.
108 /// Note that this will not always return the same value as
109 /// `self.header().link_name_bytes()` as some archive formats have support for
110 /// longer path names described in separate entries.
111 pub fn link_name_bytes(&self) -> Option
<Cow
<[u8]>> {
112 self.fields
.link_name_bytes()
115 /// Returns an iterator over the pax extensions contained in this entry.
117 /// Pax extensions are a form of archive where extra metadata is stored in
118 /// key/value pairs in entries before the entry they're intended to
119 /// describe. For example this can be used to describe long file name or
120 /// other metadata like atime/ctime/mtime in more precision.
122 /// The returned iterator will yield key/value pairs for each extension.
124 /// `None` will be returned if this entry does not indicate that it itself
125 /// contains extensions, or if there were no previous extensions describing
128 /// Note that global pax extensions are intended to be applied to all
131 /// Also note that this function will read the entire entry if the entry
132 /// itself is a list of extensions.
133 pub fn pax_extensions(&mut self) -> io
::Result
<Option
<PaxExtensions
>> {
134 self.fields
.pax_extensions()
137 /// Returns access to the header of this entry in the archive.
139 /// This provides access to the the metadata for this entry in the archive.
140 pub fn header(&self) -> &Header
{
144 /// Returns the starting position, in bytes, of the header of this entry in
147 /// The header is always a contiguous section of 512 bytes, so if the
148 /// underlying reader implements `Seek`, then the slice from `header_pos` to
149 /// `header_pos + 512` contains the raw header bytes.
150 pub fn raw_header_position(&self) -> u64 {
151 self.fields
.header_pos
154 /// Returns the starting position, in bytes, of the file of this entry in
157 /// If the file of this entry is continuous (e.g. not a sparse file), and
158 /// if the underlying reader implements `Seek`, then the slice from
159 /// `file_pos` to `file_pos + entry_size` contains the raw file bytes.
160 pub fn raw_file_position(&self) -> u64 {
164 /// Writes this file to the specified location.
166 /// This function will write the entire contents of this file into the
167 /// location specified by `dst`. Metadata will also be propagated to the
170 /// This function will create a file at the path `dst`, and it is required
171 /// that the intermediate directories are created. Any existing file at the
172 /// location `dst` will be overwritten.
174 /// > **Note**: This function does not have as many sanity checks as
175 /// > `Archive::unpack` or `Entry::unpack_in`. As a result if you're
176 /// > thinking of unpacking untrusted tarballs you may want to review the
177 /// > implementations of the previous two functions and perhaps implement
178 /// > similar logic yourself.
183 /// use std::fs::File;
184 /// use tar::Archive;
186 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
188 /// for (i, file) in ar.entries().unwrap().enumerate() {
189 /// let mut file = file.unwrap();
190 /// file.unpack(format!("file-{}", i)).unwrap();
193 pub fn unpack
<P
: AsRef
<Path
>>(&mut self, dst
: P
) -> io
::Result
<Unpacked
> {
194 self.fields
.unpack(None
, dst
.as_ref())
197 /// Extracts this file under the specified path, avoiding security issues.
199 /// This function will write the entire contents of this file into the
200 /// location obtained by appending the path of this file in the archive to
201 /// `dst`, creating any intermediate directories if needed. Metadata will
202 /// also be propagated to the path `dst`. Any existing file at the location
203 /// `dst` will be overwritten.
205 /// This function carefully avoids writing outside of `dst`. If the file has
206 /// a '..' in its path, this function will skip it and return false.
211 /// use std::fs::File;
212 /// use tar::Archive;
214 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
216 /// for (i, file) in ar.entries().unwrap().enumerate() {
217 /// let mut file = file.unwrap();
218 /// file.unpack_in("target").unwrap();
221 pub fn unpack_in
<P
: AsRef
<Path
>>(&mut self, dst
: P
) -> io
::Result
<bool
> {
222 self.fields
.unpack_in(dst
.as_ref())
225 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
226 /// when unpacking this entry.
228 /// This flag is disabled by default and is currently only implemented on
229 /// Unix using xattr support. This may eventually be implemented for
230 /// Windows, however, if other archive implementations are found which do
232 pub fn set_unpack_xattrs(&mut self, unpack_xattrs
: bool
) {
233 self.fields
.unpack_xattrs
= unpack_xattrs
;
236 /// Indicate whether extended permissions (like suid on Unix) are preserved
237 /// when unpacking this entry.
239 /// This flag is disabled by default and is currently only implemented on
241 pub fn set_preserve_permissions(&mut self, preserve
: bool
) {
242 self.fields
.preserve_permissions
= preserve
;
245 /// Indicate whether access time information is preserved when unpacking
248 /// This flag is enabled by default.
249 pub fn set_preserve_mtime(&mut self, preserve
: bool
) {
250 self.fields
.preserve_mtime
= preserve
;
254 impl<'a
, R
: Read
> Read
for Entry
<'a
, R
> {
255 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
256 self.fields
.read(into
)
260 impl<'a
> EntryFields
<'a
> {
261 pub fn from
<R
: Read
>(entry
: Entry
<R
>) -> EntryFields
{
265 pub fn into_entry
<R
: Read
>(self) -> Entry
<'a
, R
> {
268 _ignored
: marker
::PhantomData
,
272 pub fn read_all(&mut self) -> io
::Result
<Vec
<u8>> {
273 // Preallocate some data but don't let ourselves get too crazy now.
274 let cap
= cmp
::min(self.size
, 128 * 1024);
275 let mut v
= Vec
::with_capacity(cap
as usize);
276 self.read_to_end(&mut v
).map(|_
| v
)
279 fn path(&self) -> io
::Result
<Cow
<Path
>> {
280 bytes2path(self.path_bytes())
283 fn path_bytes(&self) -> Cow
<[u8]> {
284 match self.long_pathname
{
286 if let Some(&0) = bytes
.last() {
287 Cow
::Borrowed(&bytes
[..bytes
.len() - 1])
293 if let Some(ref pax
) = self.pax_extensions
{
294 let pax
= pax_extensions(pax
)
295 .filter_map(|f
| f
.ok())
296 .find(|f
| f
.key_bytes() == b
"path")
297 .map(|f
| f
.value_bytes());
298 if let Some(field
) = pax
{
299 return Cow
::Borrowed(field
);
302 self.header
.path_bytes()
307 /// Gets the path in a "lossy" way, used for error reporting ONLY.
308 fn path_lossy(&self) -> String
{
309 String
::from_utf8_lossy(&self.path_bytes()).to_string()
312 fn link_name(&self) -> io
::Result
<Option
<Cow
<Path
>>> {
313 match self.link_name_bytes() {
314 Some(bytes
) => bytes2path(bytes
).map(Some
),
319 fn link_name_bytes(&self) -> Option
<Cow
<[u8]>> {
320 match self.long_linkname
{
322 if let Some(&0) = bytes
.last() {
323 Some(Cow
::Borrowed(&bytes
[..bytes
.len() - 1]))
325 Some(Cow
::Borrowed(bytes
))
328 None
=> self.header
.link_name_bytes(),
332 fn pax_extensions(&mut self) -> io
::Result
<Option
<PaxExtensions
>> {
333 if self.pax_extensions
.is_none() {
334 if !self.header
.entry_type().is_pax_global_extensions()
335 && !self.header
.entry_type().is_pax_local_extensions()
339 self.pax_extensions
= Some(self.read_all()?
);
341 Ok(Some(pax_extensions(self.pax_extensions
.as_ref().unwrap())))
344 fn unpack_in(&mut self, dst
: &Path
) -> io
::Result
<bool
> {
345 // Notes regarding bsdtar 2.8.3 / libarchive 2.8.3:
346 // * Leading '/'s are trimmed. For example, `///test` is treated as
348 // * If the filename contains '..', then the file is skipped when
349 // extracting the tarball.
350 // * '//' within a filename is effectively skipped. An error is
351 // logged, but otherwise the effect is as if any two or more
352 // adjacent '/'s within the filename were consolidated into one
355 // Most of this is handled by the `path` module of the standard
356 // library, but we specially handle a few cases here as well.
358 let mut file_dst
= dst
.to_path_buf();
360 let path
= self.path().map_err(|e
| {
362 &format
!("invalid path in entry header: {}", self.path_lossy()),
366 for part
in path
.components() {
368 // Leading '/' characters, root paths, and '.'
369 // components are just ignored and treated as "empty
371 Component
::Prefix(..) | Component
::RootDir
| Component
::CurDir
=> continue,
373 // If any part of the filename is '..', then skip over
374 // unpacking the file to prevent directory traversal
375 // security issues. See, e.g.: CVE-2001-1267,
376 // CVE-2002-0399, CVE-2005-1918, CVE-2007-4131
377 Component
::ParentDir
=> return Ok(false),
379 Component
::Normal(part
) => file_dst
.push(part
),
384 // Skip cases where only slashes or '.' parts were seen, because
385 // this is effectively an empty filename.
386 if *dst
== *file_dst
{
390 // Skip entries without a parent (i.e. outside of FS root)
391 let parent
= match file_dst
.parent() {
393 None
=> return Ok(false),
396 if parent
.symlink_metadata().is_err() {
397 fs
::create_dir_all(&parent
).map_err(|e
| {
398 TarError
::new(&format
!("failed to create `{}`", parent
.display()), e
)
402 let canon_target
= self.validate_inside_dst(&dst
, parent
)?
;
404 self.unpack(Some(&canon_target
), &file_dst
)
405 .map_err(|e
| TarError
::new(&format
!("failed to unpack `{}`", file_dst
.display()), e
))?
;
410 /// Unpack as destination directory `dst`.
411 fn unpack_dir(&mut self, dst
: &Path
) -> io
::Result
<()> {
412 // If the directory already exists just let it slide
413 fs
::create_dir(dst
).or_else(|err
| {
414 if err
.kind() == ErrorKind
::AlreadyExists
{
415 let prev
= fs
::metadata(dst
);
416 if prev
.map(|m
| m
.is_dir()).unwrap_or(false) {
422 format
!("{} when creating dir {}", err
, dst
.display()),
427 /// Returns access to the header of this entry in the archive.
428 fn unpack(&mut self, target_base
: Option
<&Path
>, dst
: &Path
) -> io
::Result
<Unpacked
> {
429 let kind
= self.header
.entry_type();
432 self.unpack_dir(dst
)?
;
433 if let Ok(mode
) = self.header
.mode() {
434 set_perms(dst
, None
, mode
, self.preserve_permissions
)?
;
436 return Ok(Unpacked
::__Nonexhaustive
);
437 } else if kind
.is_hard_link() || kind
.is_symlink() {
438 let src
= match self.link_name()?
{
441 return Err(other(&format
!(
442 "hard link listed for {} but no link name found",
443 String
::from_utf8_lossy(self.header
.as_bytes())
448 if src
.iter().count() == 0 {
449 return Err(other(&format
!(
450 "symlink destination for {} is empty",
451 String
::from_utf8_lossy(self.header
.as_bytes())
455 if kind
.is_hard_link() {
456 let link_src
= match target_base
{
457 // If we're unpacking within a directory then ensure that
458 // the destination of this hard link is both present and
459 // inside our own directory. This is needed because we want
460 // to make sure to not overwrite anything outside the root.
462 // Note that this logic is only needed for hard links
463 // currently. With symlinks the `validate_inside_dst` which
464 // happens before this method as part of `unpack_in` will
465 // use canonicalization to ensure this guarantee. For hard
466 // links though they're canonicalized to their existing path
467 // so we need to validate at this time.
469 let link_src
= p
.join(src
);
470 self.validate_inside_dst(p
, &link_src
)?
;
473 None
=> src
.into_owned(),
475 fs
::hard_link(&link_src
, dst
).map_err(|err
| {
479 "{} when hard linking {} to {}",
487 symlink(&src
, dst
).map_err(|err
| {
491 "{} when symlinking {} to {}",
499 return Ok(Unpacked
::__Nonexhaustive
);
501 #[cfg(target_arch = "wasm32")]
502 #[allow(unused_variables)]
503 fn symlink(src
: &Path
, dst
: &Path
) -> io
::Result
<()> {
504 Err(io
::Error
::new(io
::ErrorKind
::Other
, "Not implemented"))
508 fn symlink(src
: &Path
, dst
: &Path
) -> io
::Result
<()> {
509 ::std
::os
::windows
::fs
::symlink_file(src
, dst
)
512 #[cfg(any(unix, target_os = "redox"))]
513 fn symlink(src
: &Path
, dst
: &Path
) -> io
::Result
<()> {
514 ::std
::os
::unix
::fs
::symlink(src
, dst
)
516 } else if kind
.is_pax_global_extensions()
517 || kind
.is_pax_local_extensions()
518 || kind
.is_gnu_longname()
519 || kind
.is_gnu_longlink()
521 return Ok(Unpacked
::__Nonexhaustive
);
524 // Old BSD-tar compatibility.
525 // Names that have a trailing slash should be treated as a directory.
526 // Only applies to old headers.
527 if self.header
.as_ustar().is_none() && self.path_bytes().ends_with(b
"/") {
528 self.unpack_dir(dst
)?
;
529 if let Ok(mode
) = self.header
.mode() {
530 set_perms(dst
, None
, mode
, self.preserve_permissions
)?
;
532 return Ok(Unpacked
::__Nonexhaustive
);
535 // Note the lack of `else` clause above. According to the FreeBSD
538 // > A POSIX-compliant implementation must treat any unrecognized
539 // > typeflag value as a regular file.
541 // As a result if we don't recognize the kind we just write out the file
542 // as we would normally.
544 // Ensure we write a new file rather than overwriting in-place which
545 // is attackable; if an existing file is found unlink it.
546 fn open(dst
: &Path
) -> io
::Result
<std
::fs
::File
> {
547 OpenOptions
::new().write(true).create_new(true).open(dst
)
549 let mut f
= (|| -> io
::Result
<std
::fs
::File
> {
550 let mut f
= open(dst
).or_else(|err
| {
551 if err
.kind() != ErrorKind
::AlreadyExists
{
554 match fs
::remove_file(dst
) {
556 Err(ref e
) if e
.kind() == io
::ErrorKind
::NotFound
=> open(dst
),
561 for io
in self.data
.drain(..) {
563 EntryIo
::Data(mut d
) => {
564 let expected
= d
.limit();
565 if io
::copy(&mut d
, &mut f
)?
!= expected
{
566 return Err(other("failed to write entire file"));
570 // TODO: checked cast to i64
571 let to
= SeekFrom
::Current(d
.limit() as i64);
572 let size
= f
.seek(to
)?
;
580 let header
= self.header
.path_bytes();
583 "failed to unpack `{}` into `{}`",
584 String
::from_utf8_lossy(&header
),
591 if self.preserve_mtime
{
592 if let Ok(mtime
) = self.header
.mtime() {
593 let mtime
= FileTime
::from_unix_time(mtime
as i64, 0);
594 filetime
::set_file_handle_times(&f
, Some(mtime
), Some(mtime
)).map_err(|e
| {
595 TarError
::new(&format
!("failed to set mtime for `{}`", dst
.display()), e
)
599 if let Ok(mode
) = self.header
.mode() {
600 set_perms(dst
, Some(&mut f
), mode
, self.preserve_permissions
)?
;
602 if self.unpack_xattrs
{
603 set_xattrs(self, dst
)?
;
605 return Ok(Unpacked
::File(f
));
609 f
: Option
<&mut std
::fs
::File
>,
612 ) -> Result
<(), TarError
> {
613 _set_perms(dst
, f
, mode
, preserve
).map_err(|e
| {
616 "failed to set permissions to {:o} \
626 #[cfg(any(unix, target_os = "redox"))]
629 f
: Option
<&mut std
::fs
::File
>,
632 ) -> io
::Result
<()> {
633 use std
::os
::unix
::prelude
::*;
635 let mode
= if preserve { mode }
else { mode & 0o777 }
;
636 let perm
= fs
::Permissions
::from_mode(mode
as _
);
638 Some(f
) => f
.set_permissions(perm
),
639 None
=> fs
::set_permissions(dst
, perm
),
646 f
: Option
<&mut std
::fs
::File
>,
649 ) -> io
::Result
<()> {
650 if mode
& 0o200 == 0o200 {
655 let mut perm
= f
.metadata()?
.permissions();
656 perm
.set_readonly(true);
657 f
.set_permissions(perm
)
660 let mut perm
= fs
::metadata(dst
)?
.permissions();
661 perm
.set_readonly(true);
662 fs
::set_permissions(dst
, perm
)
667 #[cfg(target_arch = "wasm32")]
668 #[allow(unused_variables)]
671 f
: Option
<&mut std
::fs
::File
>,
674 ) -> io
::Result
<()> {
675 Err(io
::Error
::new(io
::ErrorKind
::Other
, "Not implemented"))
678 #[cfg(all(unix, feature = "xattr"))]
679 fn set_xattrs(me
: &mut EntryFields
, dst
: &Path
) -> io
::Result
<()> {
681 use std
::os
::unix
::prelude
::*;
683 let exts
= match me
.pax_extensions() {
688 .filter_map(|e
| e
.ok())
690 let key
= e
.key_bytes();
691 let prefix
= b
"SCHILY.xattr.";
692 if key
.starts_with(prefix
) {
693 Some((&key
[prefix
.len()..], e
))
698 .map(|(key
, e
)| (OsStr
::from_bytes(key
), e
.value_bytes()));
700 for (key
, value
) in exts
{
701 xattr
::set(dst
, key
, value
).map_err(|e
| {
704 "failed to set extended \
706 Xattrs: key={:?}, value={:?}.",
709 String
::from_utf8_lossy(value
)
718 // Windows does not completely support posix xattrs
719 // https://en.wikipedia.org/wiki/Extended_file_attributes#Windows_NT
723 not(feature
= "xattr"),
724 target_arch
= "wasm32"
726 fn set_xattrs(_
: &mut EntryFields
, _
: &Path
) -> io
::Result
<()> {
731 fn validate_inside_dst(&self, dst
: &Path
, file_dst
: &Path
) -> io
::Result
<PathBuf
> {
732 // Abort if target (canonical) parent is outside of `dst`
733 let canon_parent
= file_dst
.canonicalize().map_err(|err
| {
736 format
!("{} while canonicalizing {}", err
, file_dst
.display()),
739 let canon_target
= dst
.canonicalize().map_err(|err
| {
742 format
!("{} while canonicalizing {}", err
, dst
.display()),
745 if !canon_parent
.starts_with(&canon_target
) {
746 let err
= TarError
::new(
748 "trying to unpack outside of destination path: {}",
749 canon_target
.display()
751 // TODO: use ErrorKind::InvalidInput here? (minor breaking change)
752 Error
::new(ErrorKind
::Other
, "Invalid argument"),
754 return Err(err
.into());
760 impl<'a
> Read
for EntryFields
<'a
> {
761 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
763 match self.data
.get_mut(0).map(|io
| io
.read(into
)) {
768 None
=> return Ok(0),
774 impl<'a
> Read
for EntryIo
<'a
> {
775 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
777 EntryIo
::Pad(ref mut io
) => io
.read(into
),
778 EntryIo
::Data(ref mut io
) => io
.read(into
),