1 use std
::cell
::{Cell, RefCell}
;
3 use std
::convert
::TryFrom
;
5 use std
::io
::prelude
::*;
6 use std
::io
::{self, SeekFrom}
;
10 use crate::entry
::{EntryFields, EntryIo}
;
11 use crate::error
::TarError
;
13 use crate::pax
::pax_extensions_size
;
14 use crate::{Entry, GnuExtSparseHeader, GnuSparseHeader, Header}
;
16 /// A top-level representation of an archive file.
18 /// This archive can have an entry added to it and it can be iterated over.
19 pub struct Archive
<R
: ?Sized
+ Read
> {
20 inner
: ArchiveInner
<R
>,
23 pub struct ArchiveInner
<R
: ?Sized
> {
26 preserve_permissions
: bool
,
33 /// An iterator over the entries of an archive.
34 pub struct Entries
<'a
, R
: 'a
+ Read
> {
35 fields
: EntriesFields
<'a
>,
36 _ignored
: marker
::PhantomData
<&'a Archive
<R
>>,
39 trait SeekRead
: Read
+ Seek {}
40 impl<R
: Read
+ Seek
> SeekRead
for R {}
42 struct EntriesFields
<'a
> {
43 archive
: &'a Archive
<dyn Read
+ 'a
>,
44 seekable_archive
: Option
<&'a Archive
<dyn SeekRead
+ 'a
>>,
50 impl<R
: Read
> Archive
<R
> {
51 /// Create a new archive with the underlying object as the reader.
52 pub fn new(obj
: R
) -> Archive
<R
> {
56 preserve_permissions
: false,
60 obj
: RefCell
::new(obj
),
66 /// Unwrap this archive, returning the underlying object.
67 pub fn into_inner(self) -> R
{
68 self.inner
.obj
.into_inner()
71 /// Construct an iterator over the entries in this archive.
73 /// Note that care must be taken to consider each entry within an archive in
74 /// sequence. If entries are processed out of sequence (from what the
75 /// iterator returns), then the contents read for each entry may be
77 pub fn entries(&mut self) -> io
::Result
<Entries
<R
>> {
78 let me
: &mut Archive
<dyn Read
> = self;
79 me
._entries(None
).map(|fields
| Entries
{
81 _ignored
: marker
::PhantomData
,
85 /// Unpacks the contents tarball into the specified `dst`.
87 /// This function will iterate over the entire contents of this tarball,
88 /// extracting each file in turn to the location specified by the entry's
91 /// This operation is relatively sensitive in that it will not write files
92 /// outside of the path specified by `dst`. Files in the archive which have
93 /// a '..' in their path are skipped during the unpacking process.
98 /// use std::fs::File;
101 /// let mut ar = Archive::new(File::open("foo.tar").unwrap());
102 /// ar.unpack("foo").unwrap();
104 pub fn unpack
<P
: AsRef
<Path
>>(&mut self, dst
: P
) -> io
::Result
<()> {
105 let me
: &mut Archive
<dyn Read
> = self;
106 me
._unpack(dst
.as_ref())
109 /// Indicate whether extended file attributes (xattrs on Unix) are preserved
110 /// when unpacking this archive.
112 /// This flag is disabled by default and is currently only implemented on
113 /// Unix using xattr support. This may eventually be implemented for
114 /// Windows, however, if other archive implementations are found which do
116 pub fn set_unpack_xattrs(&mut self, unpack_xattrs
: bool
) {
117 self.inner
.unpack_xattrs
= unpack_xattrs
;
120 /// Indicate whether extended permissions (like suid on Unix) are preserved
121 /// when unpacking this entry.
123 /// This flag is disabled by default and is currently only implemented on
125 pub fn set_preserve_permissions(&mut self, preserve
: bool
) {
126 self.inner
.preserve_permissions
= preserve
;
129 /// Indicate whether files and symlinks should be overwritten on extraction.
130 pub fn set_overwrite(&mut self, overwrite
: bool
) {
131 self.inner
.overwrite
= overwrite
;
134 /// Indicate whether access time information is preserved when unpacking
137 /// This flag is enabled by default.
138 pub fn set_preserve_mtime(&mut self, preserve
: bool
) {
139 self.inner
.preserve_mtime
= preserve
;
142 /// Ignore zeroed headers, which would otherwise indicate to the archive that it has no more
145 /// This can be used in case multiple tar archives have been concatenated together.
146 pub fn set_ignore_zeros(&mut self, ignore_zeros
: bool
) {
147 self.inner
.ignore_zeros
= ignore_zeros
;
151 impl<R
: Seek
+ Read
> Archive
<R
> {
152 /// Construct an iterator over the entries in this archive for a seekable
153 /// reader. Seek will be used to efficiently skip over file contents.
155 /// Note that care must be taken to consider each entry within an archive in
156 /// sequence. If entries are processed out of sequence (from what the
157 /// iterator returns), then the contents read for each entry may be
159 pub fn entries_with_seek(&mut self) -> io
::Result
<Entries
<R
>> {
160 let me
: &Archive
<dyn Read
> = self;
161 let me_seekable
: &Archive
<dyn SeekRead
> = self;
162 me
._entries(Some(me_seekable
)).map(|fields
| Entries
{
164 _ignored
: marker
::PhantomData
,
169 impl Archive
<dyn Read
+ '_
> {
172 seekable_archive
: Option
<&'a Archive
<dyn SeekRead
+ 'a
>>,
173 ) -> io
::Result
<EntriesFields
<'a
>> {
174 if self.inner
.pos
.get() != 0 {
176 "cannot call entries unless archive is at \
189 fn _unpack(&mut self, dst
: &Path
) -> io
::Result
<()> {
190 if dst
.symlink_metadata().is_err() {
191 fs
::create_dir_all(&dst
)
192 .map_err(|e
| TarError
::new(format
!("failed to create `{}`", dst
.display()), e
))?
;
195 // Canonicalizing the dst directory will prepend the path with '\\?\'
196 // on windows which will allow windows APIs to treat the path as an
197 // extended-length path with a 32,767 character limit. Otherwise all
198 // unpacked paths over 260 characters will fail on creation with a
199 // NotFound exception.
200 let dst
= &dst
.canonicalize().unwrap_or(dst
.to_path_buf());
202 // Delay any directory entries until the end (they will be created if needed by
203 // descendants), to ensure that directory permissions do not interfer with descendant
205 let mut directories
= Vec
::new();
206 for entry
in self._entries(None
)?
{
207 let mut file
= entry
.map_err(|e
| TarError
::new("failed to iterate over archive", e
))?
;
208 if file
.header().entry_type() == crate::EntryType
::Directory
{
209 directories
.push(file
);
211 file
.unpack_in(dst
)?
;
214 for mut dir
in directories
{
222 impl<'a
, R
: Read
> Entries
<'a
, R
> {
223 /// Indicates whether this iterator will return raw entries or not.
225 /// If the raw list of entries are returned, then no preprocessing happens
226 /// on account of this library, for example taking into account GNU long name
227 /// or long link archive members. Raw iteration is disabled by default.
228 pub fn raw(self, raw
: bool
) -> Entries
<'a
, R
> {
230 fields
: EntriesFields
{
234 _ignored
: marker
::PhantomData
,
238 impl<'a
, R
: Read
> Iterator
for Entries
<'a
, R
> {
239 type Item
= io
::Result
<Entry
<'a
, R
>>;
241 fn next(&mut self) -> Option
<io
::Result
<Entry
<'a
, R
>>> {
244 .map(|result
| result
.map(|e
| EntryFields
::from(e
).into_entry()))
248 impl<'a
> EntriesFields
<'a
> {
251 pax_size
: Option
<u64>,
252 ) -> io
::Result
<Option
<Entry
<'a
, io
::Empty
>>> {
253 let mut header
= Header
::new_old();
254 let mut header_pos
= self.next
;
256 // Seek to the start of the next header in the archive
257 let delta
= self.next
- self.archive
.inner
.pos
.get();
260 // EOF is an indicator that we are at the end of the archive.
261 if !try_read_all(&mut &self.archive
.inner
, header
.as_mut_bytes())?
{
265 // If a header is not all zeros, we have another valid header.
266 // Otherwise, check if we are ignoring zeros and continue, or break as if this is the
267 // end of the archive.
268 if !header
.as_bytes().iter().all(|i
| *i
== 0) {
273 if !self.archive
.inner
.ignore_zeros
{
277 header_pos
= self.next
;
280 // Make sure the checksum is ok
281 let sum
= header
.as_bytes()[..148]
283 .chain(&header
.as_bytes()[156..])
284 .fold(0, |a
, b
| a
+ (*b
as u32))
286 let cksum
= header
.cksum()?
;
288 return Err(other("archive header checksum mismatch"));
291 let file_pos
= self.next
;
292 let mut size
= header
.entry_size()?
;
294 if let Some(pax_size
) = pax_size
{
298 let ret
= EntryFields
{
300 header_pos
: header_pos
,
302 data
: vec
![EntryIo
::Data((&self.archive
.inner
).take(size
))],
306 pax_extensions
: None
,
307 unpack_xattrs
: self.archive
.inner
.unpack_xattrs
,
308 preserve_permissions
: self.archive
.inner
.preserve_permissions
,
309 preserve_mtime
: self.archive
.inner
.preserve_mtime
,
310 overwrite
: self.archive
.inner
.overwrite
,
313 // Store where the next entry is, rounding up by 512 bytes (the size of
317 .ok_or_else(|| other("size overflow"))?
;
320 .checked_add(size
& !(512 - 1))
321 .ok_or_else(|| other("size overflow"))?
;
323 Ok(Some(ret
.into_entry()))
326 fn next_entry(&mut self) -> io
::Result
<Option
<Entry
<'a
, io
::Empty
>>> {
328 return self.next_entry_raw(None
);
331 let mut gnu_longname
= None
;
332 let mut gnu_longlink
= None
;
333 let mut pax_extensions
= None
;
334 let mut pax_size
= None
;
335 let mut processed
= 0;
338 let entry
= match self.next_entry_raw(pax_size
)?
{
339 Some(entry
) => entry
,
340 None
if processed
> 1 => {
342 "members found describing a future member \
343 but no future member found",
346 None
=> return Ok(None
),
349 let is_recognized_header
=
350 entry
.header().as_gnu().is_some() || entry
.header().as_ustar().is_some();
352 if is_recognized_header
&& entry
.header().entry_type().is_gnu_longname() {
353 if gnu_longname
.is_some() {
355 "two long name entries describing \
359 gnu_longname
= Some(EntryFields
::from(entry
).read_all()?
);
363 if is_recognized_header
&& entry
.header().entry_type().is_gnu_longlink() {
364 if gnu_longlink
.is_some() {
366 "two long name entries describing \
370 gnu_longlink
= Some(EntryFields
::from(entry
).read_all()?
);
374 if is_recognized_header
&& entry
.header().entry_type().is_pax_local_extensions() {
375 if pax_extensions
.is_some() {
377 "two pax extensions entries describing \
381 pax_extensions
= Some(EntryFields
::from(entry
).read_all()?
);
382 if let Some(pax_extensions_ref
) = &pax_extensions
{
383 pax_size
= pax_extensions_size(pax_extensions_ref
);
388 let mut fields
= EntryFields
::from(entry
);
389 fields
.long_pathname
= gnu_longname
;
390 fields
.long_linkname
= gnu_longlink
;
391 fields
.pax_extensions
= pax_extensions
;
392 self.parse_sparse_header(&mut fields
)?
;
393 return Ok(Some(fields
.into_entry()));
397 fn parse_sparse_header(&mut self, entry
: &mut EntryFields
<'a
>) -> io
::Result
<()> {
398 if !entry
.header
.entry_type().is_gnu_sparse() {
401 let gnu
= match entry
.header
.as_gnu() {
403 None
=> return Err(other("sparse entry type listed but not GNU header")),
406 // Sparse files are represented internally as a list of blocks that are
407 // read. Blocks are either a bunch of 0's or they're data from the
408 // underlying archive.
410 // Blocks of a sparse file are described by the `GnuSparseHeader`
411 // structure, some of which are contained in `GnuHeader` but some of
412 // which may also be contained after the first header in further
415 // We read off all the blocks here and use the `add_block` function to
416 // incrementally add them to the list of I/O block (in `entry.data`).
417 // The `add_block` function also validates that each chunk comes after
418 // the previous, we don't overrun the end of the file, and each block is
419 // aligned to a 512-byte boundary in the archive itself.
421 // At the end we verify that the sparse file size (`Header::size`) is
422 // the same as the current offset (described by the list of blocks) as
423 // well as the amount of data read equals the size of the entry
424 // (`Header::entry_size`).
425 entry
.data
.truncate(0);
428 let mut remaining
= entry
.size
;
430 let data
= &mut entry
.data
;
431 let reader
= &self.archive
.inner
;
432 let size
= entry
.size
;
433 let mut add_block
= |block
: &GnuSparseHeader
| -> io
::Result
<_
> {
434 if block
.is_empty() {
437 let off
= block
.offset()?
;
438 let len
= block
.length()?
;
439 if len
!= 0 && (size
- remaining
) % 512 != 0 {
441 "previous block in sparse file was not \
442 aligned to 512-byte boundary",
444 } else if off
< cur
{
446 "out of order or overlapping sparse \
449 } else if cur
< off
{
450 let block
= io
::repeat(0).take(off
- cur
);
451 data
.push(EntryIo
::Pad(block
));
455 .ok_or_else(|| other("more bytes listed in sparse file than u64 can hold"))?
;
456 remaining
= remaining
.checked_sub(len
).ok_or_else(|| {
458 "sparse file consumed more data than the header \
462 data
.push(EntryIo
::Data(reader
.take(len
)));
465 for block
in gnu
.sparse
.iter() {
468 if gnu
.is_extended() {
469 let mut ext
= GnuExtSparseHeader
::new();
470 ext
.isextended
[0] = 1;
471 while ext
.is_extended() {
472 if !try_read_all(&mut &self.archive
.inner
, ext
.as_mut_bytes())?
{
473 return Err(other("failed to read extension"));
477 for block
in ext
.sparse
.iter() {
483 if cur
!= gnu
.real_size()?
{
485 "mismatch in sparse file chunks and \
492 "mismatch in sparse file chunks and \
493 entry size in header",
499 fn skip(&mut self, mut amt
: u64) -> io
::Result
<()> {
500 if let Some(seekable_archive
) = self.seekable_archive
{
501 let pos
= io
::SeekFrom
::Current(
502 i64::try_from(amt
).map_err(|_
| other("seek position out of bounds"))?
,
504 (&seekable_archive
.inner
).seek(pos
)?
;
506 let mut buf
= [0u8; 4096 * 8];
508 let n
= cmp
::min(amt
, buf
.len() as u64);
509 let n
= (&self.archive
.inner
).read(&mut buf
[..n
as usize])?
;
511 return Err(other("unexpected EOF during skip"));
520 impl<'a
> Iterator
for EntriesFields
<'a
> {
521 type Item
= io
::Result
<Entry
<'a
, io
::Empty
>>;
523 fn next(&mut self) -> Option
<io
::Result
<Entry
<'a
, io
::Empty
>>> {
527 match self.next_entry() {
528 Ok(Some(e
)) => Some(Ok(e
)),
542 impl<'a
, R
: ?Sized
+ Read
> Read
for &'a ArchiveInner
<R
> {
543 fn read(&mut self, into
: &mut [u8]) -> io
::Result
<usize> {
544 let i
= self.obj
.borrow_mut().read(into
)?
;
545 self.pos
.set(self.pos
.get() + i
as u64);
550 impl<'a
, R
: ?Sized
+ Seek
> Seek
for &'a ArchiveInner
<R
> {
551 fn seek(&mut self, pos
: SeekFrom
) -> io
::Result
<u64> {
552 let pos
= self.obj
.borrow_mut().seek(pos
)?
;
558 /// Try to fill the buffer from the reader.
560 /// If the reader reaches its end before filling the buffer at all, returns `false`.
561 /// Otherwise returns `true`.
562 fn try_read_all
<R
: Read
>(r
: &mut R
, buf
: &mut [u8]) -> io
::Result
<bool
> {
564 while read
< buf
.len() {
565 match r
.read(&mut buf
[read
..])?
{
571 return Err(other("failed to read entire block"));