1 //! The `pxar` encoder state machine.
3 //! This is the implementation used by both the synchronous and async pxar wrappers.
5 use std
::cell
::RefCell
;
7 use std
::mem
::{forget, size_of, size_of_val, take}
;
8 use std
::os
::unix
::ffi
::OsStrExt
;
12 use std
::task
::{Context, Poll}
;
14 use endian_trait
::Endian
;
16 use crate::binary_tree_array
;
17 use crate::decoder
::{self, SeqRead}
;
18 use crate::format
::{self, GoodbyeItem}
;
19 use crate::poll_fn
::poll_fn
;
26 pub use sync
::Encoder
;
28 /// File reference used to create hard links.
29 #[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd)]
30 pub struct LinkOffset(u64);
34 pub fn raw(self) -> u64 {
39 /// Sequential write interface used by the encoder's state machine.
41 /// This is our internal writer trait which is available for `std::io::Write` types in the
42 /// synchronous wrapper and for both `tokio` and `future` `AsyncWrite` types in the asynchronous
49 ) -> Poll
<io
::Result
<usize>>;
51 fn poll_flush(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>>;
53 fn poll_close(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>>;
55 /// While writing to a pxar archive we need to remember how much dat we've written to track some
56 /// offsets. Particularly items like the goodbye table need to be able to compute offsets to
57 /// further back in the archive.
58 fn poll_position(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<u64>>;
60 /// To avoid recursively borrowing each time we nest into a subdirectory we add this helper.
61 /// Otherwise starting a subdirectory will get a trait object pointing to `T`, nesting another
62 /// subdirectory in that would have a trait object pointing to the trait object, and so on.
63 fn as_trait_object(&mut self) -> &mut dyn SeqWrite
67 self as &mut dyn SeqWrite
71 /// Allow using trait objects for generics taking a `SeqWrite`.
72 impl<'a
> SeqWrite
for &mut (dyn SeqWrite
+ 'a
) {
77 ) -> Poll
<io
::Result
<usize>> {
79 self.map_unchecked_mut(|this
| &mut **this
)
80 .poll_seq_write(cx
, buf
)
84 fn poll_flush(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
85 unsafe { self.map_unchecked_mut(|this| &mut **this).poll_flush(cx) }
88 fn poll_close(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
89 unsafe { self.map_unchecked_mut(|this| &mut **this).poll_close(cx) }
92 fn poll_position(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<u64>> {
93 unsafe { self.map_unchecked_mut(|this| &mut **this).poll_position(cx) }
96 fn as_trait_object(&mut self) -> &mut dyn SeqWrite
104 /// awaitable version of `poll_position`.
105 async
fn seq_write_position
<T
: SeqWrite
+ ?Sized
>(output
: &mut T
) -> io
::Result
<u64> {
106 poll_fn(move |cx
| unsafe { Pin::new_unchecked(&mut *output).poll_position(cx) }
).await
109 /// awaitable verison of `poll_seq_write`.
110 async
fn seq_write
<T
: SeqWrite
+ ?Sized
>(output
: &mut T
, buf
: &[u8]) -> io
::Result
<usize> {
111 poll_fn(|cx
| unsafe { Pin::new_unchecked(&mut *output).poll_seq_write(cx, buf) }
).await
114 /// Write the entire contents of a buffer, handling short writes.
115 async
fn seq_write_all
<T
: SeqWrite
+ ?Sized
>(output
: &mut T
, mut buf
: &[u8]) -> io
::Result
<()> {
116 while !buf
.is_empty() {
117 let got
= seq_write(&mut *output
, buf
).await?
;
123 /// Write an endian-swappable struct.
124 async
fn seq_write_struct
<E
: Endian
, T
>(output
: &mut T
, data
: E
) -> io
::Result
<()>
126 T
: SeqWrite
+ ?Sized
,
128 let data
= data
.to_le();
129 seq_write_all(output
, unsafe {
130 std
::slice
::from_raw_parts(&data
as *const E
as *const u8, size_of_val(&data
))
135 /// Write a pxar entry.
136 async
fn seq_write_pxar_entry
<T
>(output
: &mut T
, htype
: u64, data
: &[u8]) -> io
::Result
<()>
138 T
: SeqWrite
+ ?Sized
,
142 format
::Header
::with_content_size(htype
, data
.len() as u64),
145 seq_write_all(output
, data
).await
148 /// Write a pxar entry terminated by an additional zero which is not contained in the provided
150 async
fn seq_write_pxar_entry_zero
<T
>(output
: &mut T
, htype
: u64, data
: &[u8]) -> io
::Result
<()>
152 T
: SeqWrite
+ ?Sized
,
156 format
::Header
::with_content_size(htype
, 1 + data
.len() as u64),
159 seq_write_all(&mut *output
, data
).await?
;
160 seq_write_all(output
, &[0u8]).await
163 /// Write a pxar entry consiting of an endian-swappable struct.
164 async
fn seq_write_pxar_struct_entry
<E
, T
>(output
: &mut T
, htype
: u64, data
: E
) -> io
::Result
<()>
166 T
: SeqWrite
+ ?Sized
,
169 let data
= data
.to_le();
170 seq_write_pxar_entry(output
, htype
, unsafe {
171 std
::slice
::from_raw_parts(&data
as *const E
as *const u8, size_of_val(&data
))
176 /// Error conditions caused by wrong usage of this crate.
177 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
178 pub enum EncodeError
{
179 /// The user dropped a `File` without without finishing writing all of its contents.
181 /// This is required because the payload lengths is written out at the begining and decoding
182 /// requires there to follow the right amount of data.
185 /// The user dropped a directory without finalizing it.
187 /// Finalizing is required to build the goodbye table at the end of a directory.
192 struct EncoderState
{
193 /// Goodbye items for this directory, excluding the tail.
194 items
: Vec
<GoodbyeItem
>,
196 /// User caused error conditions.
197 encode_error
: Option
<EncodeError
>,
199 /// Offset of this directory's ENTRY.
202 /// Offset to this directory's first FILENAME.
205 /// If this is a subdirectory, this points to the this directory's FILENAME.
206 file_offset
: Option
<u64>,
208 /// If this is a subdirectory, this contains this directory's hash for the goodbye item.
213 fn merge_error(&mut self, error
: Option
<EncodeError
>) {
214 // one error is enough:
215 if self.encode_error
.is_none() {
216 self.encode_error
= error
;
220 fn add_error(&mut self, error
: EncodeError
) {
221 self.merge_error(Some(error
));
225 /// The encoder state machine implementation for a directory.
227 /// We use `async fn` to implement the encoder state machine so that we can easily plug in both
228 /// synchronous or `async` I/O objects in as output.
229 pub(crate) struct EncoderImpl
<'a
, T
: SeqWrite
+ 'a
> {
232 parent
: Option
<&'a
mut EncoderState
>,
235 /// Since only the "current" entry can be actively writing files, we share the file copy
237 file_copy_buffer
: Rc
<RefCell
<Vec
<u8>>>,
240 impl<'a
, T
: SeqWrite
+ 'a
> Drop
for EncoderImpl
<'a
, T
> {
242 if let Some(ref mut parent
) = self.parent
{
244 parent
.merge_error(self.state
.encode_error
);
246 parent
.add_error(EncodeError
::IncompleteDirectory
);
248 } else if !self.finished
{
249 // FIXME: how do we deal with this?
250 // eprintln!("Encoder dropped without finishing!");
255 impl<'a
, T
: SeqWrite
+ 'a
> EncoderImpl
<'a
, T
> {
256 pub async
fn new(output
: T
, metadata
: &Metadata
) -> io
::Result
<EncoderImpl
<'a
, T
>> {
257 if !metadata
.is_dir() {
258 io_bail
!("directory metadata must contain the directory mode flag");
260 let mut this
= Self {
262 state
: EncoderState
::default(),
265 file_copy_buffer
: Rc
::new(RefCell
::new(crate::util
::vec_new(1024 * 1024))),
268 this
.encode_metadata(metadata
).await?
;
269 this
.state
.files_offset
= seq_write_position(&mut this
.output
).await?
;
274 fn check(&self) -> io
::Result
<()> {
275 match self.state
.encode_error
{
276 Some(EncodeError
::IncompleteFile
) => io_bail
!("incomplete file"),
277 Some(EncodeError
::IncompleteDirectory
) => io_bail
!("directory not finalized"),
282 pub async
fn create_file
<'b
>(
287 ) -> io
::Result
<FileImpl
<'b
>>
291 self.create_file_do(metadata
, file_name
.as_os_str().as_bytes(), file_size
)
295 async
fn create_file_do
<'b
>(
300 ) -> io
::Result
<FileImpl
<'b
>>
306 let file_offset
= seq_write_position(&mut self.output
).await?
;
307 self.start_file_do(Some(metadata
), file_name
).await?
;
311 format
::Header
::with_content_size(format
::PXAR_PAYLOAD
, file_size
),
315 let payload_data_offset
= seq_write_position(&mut self.output
).await?
;
317 let meta_size
= payload_data_offset
- file_offset
;
320 output
: &mut self.output
,
321 goodbye_item
: GoodbyeItem
{
322 hash
: format
::hash_filename(file_name
),
324 size
: file_size
+ meta_size
,
326 remaining_size
: file_size
,
327 parent
: &mut self.state
,
331 /// Return a file offset usable with `add_hardlink`.
332 pub async
fn add_file(
337 content
: &mut dyn SeqRead
,
338 ) -> io
::Result
<LinkOffset
> {
339 let buf
= Rc
::clone(&self.file_copy_buffer
);
340 let mut file
= self.create_file(metadata
, file_name
, file_size
).await?
;
341 let mut buf
= buf
.borrow_mut();
343 let got
= decoder
::seq_read(&mut *content
, &mut buf
[..]).await?
;
347 file
.write_all(&buf
[..got
]).await?
;
350 Ok(file
.file_offset())
353 /// Return a file offset usable with `add_hardlink`.
354 pub async
fn add_symlink(
359 ) -> io
::Result
<()> {
360 let _ofs
: LinkOffset
= self
364 Some((format
::PXAR_SYMLINK
, target
.as_os_str().as_bytes())),
370 /// Return a file offset usable with `add_hardlink`.
371 pub async
fn add_hardlink(
375 target_offset
: LinkOffset
,
376 ) -> io
::Result
<()> {
377 let current_offset
= seq_write_position(&mut self.output
).await?
;
378 if current_offset
<= target_offset
.0 {
379 io_bail
!("invalid hardlink offset, can only point to prior files");
382 let offset_bytes
= (current_offset
- target_offset
.0).to_le_bytes();
383 let target_bytes
= target
.as_os_str().as_bytes();
384 let mut hardlink
= Vec
::with_capacity(offset_bytes
.len() + target_bytes
.len());
385 hardlink
.extend(&offset_bytes
);
386 hardlink
.extend(target_bytes
);
387 let _this_offset
: LinkOffset
= self
388 .add_file_entry(None
, file_name
, Some((format
::PXAR_HARDLINK
, &hardlink
)))
393 /// Return a file offset usable with `add_hardlink`.
394 pub async
fn add_device(
398 device
: format
::Device
,
399 ) -> io
::Result
<()> {
400 if !metadata
.is_device() {
401 io_bail
!("entry added via add_device must have a device mode in its metadata");
404 let device
= device
.to_le();
405 let device
= unsafe {
406 std
::slice
::from_raw_parts(
407 &device
as *const format
::Device
as *const u8,
408 size_of
::<format
::Device
>(),
411 let _ofs
: LinkOffset
= self
415 Some((format
::PXAR_DEVICE
, device
)),
421 /// Return a file offset usable with `add_hardlink`.
422 pub async
fn add_fifo(&mut self, metadata
: &Metadata
, file_name
: &Path
) -> io
::Result
<()> {
423 if !metadata
.is_fifo() {
424 io_bail
!("entry added via add_device must be of type fifo in its metadata");
427 let _ofs
: LinkOffset
= self.add_file_entry(Some(metadata
), file_name
, None
).await?
;
431 /// Return a file offset usable with `add_hardlink`.
432 pub async
fn add_socket(&mut self, metadata
: &Metadata
, file_name
: &Path
) -> io
::Result
<()> {
433 if !metadata
.is_socket() {
434 io_bail
!("entry added via add_device must be of type socket in its metadata");
437 let _ofs
: LinkOffset
= self.add_file_entry(Some(metadata
), file_name
, None
).await?
;
441 /// Return a file offset usable with `add_hardlink`.
442 async
fn add_file_entry(
444 metadata
: Option
<&Metadata
>,
446 entry_htype_data
: Option
<(u64, &[u8])>,
447 ) -> io
::Result
<LinkOffset
> {
450 let file_offset
= seq_write_position(&mut self.output
).await?
;
452 let file_name
= file_name
.as_os_str().as_bytes();
454 self.start_file_do(metadata
, file_name
).await?
;
455 if let Some((htype
, entry_data
)) = entry_htype_data
{
456 seq_write_pxar_entry_zero(&mut self.output
, htype
, entry_data
).await?
;
459 let end_offset
= seq_write_position(&mut self.output
).await?
;
461 self.state
.items
.push(GoodbyeItem
{
462 hash
: format
::hash_filename(file_name
),
464 size
: end_offset
- file_offset
,
467 Ok(LinkOffset(file_offset
))
472 async
fn position(&mut self) -> io
::Result
<u64> {
473 seq_write_position(&mut self.output
).await
476 pub async
fn create_directory
<'b
>(
480 ) -> io
::Result
<EncoderImpl
<'b
, &'b
mut dyn SeqWrite
>>
486 if !metadata
.is_dir() {
487 io_bail
!("directory metadata must contain the directory mode flag");
490 let file_name
= file_name
.as_os_str().as_bytes();
491 let file_hash
= format
::hash_filename(file_name
);
493 let file_offset
= self.position().await?
;
494 self.encode_filename(file_name
).await?
;
496 let entry_offset
= self.position().await?
;
497 self.encode_metadata(&metadata
).await?
;
499 let files_offset
= self.position().await?
;
502 output
: self.output
.as_trait_object(),
503 state
: EncoderState
{
506 file_offset
: Some(file_offset
),
507 file_hash
: file_hash
,
510 parent
: Some(&mut self.state
),
512 file_copy_buffer
: Rc
::clone(&self.file_copy_buffer
),
516 async
fn start_file_do(
518 metadata
: Option
<&Metadata
>,
520 ) -> io
::Result
<()> {
521 self.encode_filename(file_name
).await?
;
522 if let Some(metadata
) = metadata
{
523 self.encode_metadata(&metadata
).await?
;
528 async
fn encode_metadata(&mut self, metadata
: &Metadata
) -> io
::Result
<()> {
529 seq_write_pxar_struct_entry(&mut self.output
, format
::PXAR_ENTRY
, metadata
.stat
.clone())
532 for xattr
in &metadata
.xattrs
{
533 self.write_xattr(xattr
).await?
;
536 self.write_acls(&metadata
.acl
).await?
;
538 if let Some(fcaps
) = &metadata
.fcaps
{
539 self.write_file_capabilities(fcaps
).await?
;
542 if let Some(qpid
) = &metadata
.quota_project_id
{
543 self.write_quota_project_id(qpid
).await?
;
549 async
fn write_xattr(&mut self, xattr
: &format
::XAttr
) -> io
::Result
<()> {
550 seq_write_pxar_entry(&mut self.output
, format
::PXAR_XATTR
, &xattr
.data
).await
553 async
fn write_acls(&mut self, acl
: &crate::Acl
) -> io
::Result
<()> {
554 for acl
in &acl
.users
{
555 seq_write_pxar_struct_entry(&mut self.output
, format
::PXAR_ACL_USER
, acl
.clone())
559 for acl
in &acl
.groups
{
560 seq_write_pxar_struct_entry(&mut self.output
, format
::PXAR_ACL_GROUP
, acl
.clone())
564 if let Some(acl
) = &acl
.group_obj
{
565 seq_write_pxar_struct_entry(&mut self.output
, format
::PXAR_ACL_GROUP_OBJ
, acl
.clone())
569 if let Some(acl
) = &acl
.default {
570 seq_write_pxar_struct_entry(&mut self.output
, format
::PXAR_ACL_DEFAULT
, acl
.clone())
574 for acl
in &acl
.default_users
{
575 seq_write_pxar_struct_entry(
577 format
::PXAR_ACL_DEFAULT_USER
,
583 for acl
in &acl
.default_groups
{
584 seq_write_pxar_struct_entry(
586 format
::PXAR_ACL_DEFAULT_GROUP
,
595 async
fn write_file_capabilities(&mut self, fcaps
: &format
::FCaps
) -> io
::Result
<()> {
596 seq_write_pxar_entry(&mut self.output
, format
::PXAR_FCAPS
, &fcaps
.data
).await
599 async
fn write_quota_project_id(
601 quota_project_id
: &format
::QuotaProjectId
,
602 ) -> io
::Result
<()> {
603 seq_write_pxar_struct_entry(
605 format
::PXAR_QUOTA_PROJID
,
606 quota_project_id
.clone(),
611 async
fn encode_filename(&mut self, file_name
: &[u8]) -> io
::Result
<()> {
612 crate::util
::validate_filename(file_name
)?
;
613 seq_write_pxar_entry_zero(&mut self.output
, format
::PXAR_FILENAME
, file_name
).await
616 pub async
fn finish(mut self) -> io
::Result
<()> {
617 let tail_bytes
= self.finish_goodbye_table().await?
;
618 seq_write_pxar_entry(&mut self.output
, format
::PXAR_GOODBYE
, &tail_bytes
).await?
;
619 if let Some(parent
) = &mut self.parent
{
620 let file_offset
= self
623 .expect("internal error: parent set but no file_offset?");
625 let end_offset
= seq_write_position(&mut self.output
).await?
;
627 parent
.items
.push(GoodbyeItem
{
628 hash
: self.state
.file_hash
,
630 size
: end_offset
- file_offset
,
633 self.finished
= true;
637 async
fn finish_goodbye_table(&mut self) -> io
::Result
<Vec
<u8>> {
638 let goodbye_offset
= seq_write_position(&mut self.output
).await?
;
640 // "take" out the tail (to not leave an array of endian-swapped structs in `self`)
641 let mut tail
= take(&mut self.state
.items
);
642 let tail_size
= (tail
.len() + 1) * size_of
::<GoodbyeItem
>();
643 let goodbye_size
= tail_size
as u64 + size_of
::<format
::Header
>() as u64;
645 // sort, then create a BST
646 tail
.sort_unstable_by(|a
, b
| a
.hash
.cmp(&b
.hash
));
648 let mut bst
= Vec
::with_capacity(tail
.len() + 1);
650 bst
.set_len(tail
.len());
652 binary_tree_array
::copy(tail
.len(), |src
, dest
| {
653 let mut item
= tail
[src
].clone();
654 // fixup the goodbye table offsets to be relative and with the right endianess
655 item
.offset
= goodbye_offset
- item
.offset
;
657 std
::ptr
::write(&mut bst
[dest
], item
.to_le());
664 hash
: format
::PXAR_GOODBYE_TAIL_MARKER
,
665 offset
: goodbye_offset
- self.state
.entry_offset
,
671 // turn this into a byte vector since after endian-swapping we can no longer guarantee that
672 // the items make sense:
673 let data
= bst
.as_mut_ptr() as *mut u8;
674 let capacity
= bst
.capacity() * size_of
::<GoodbyeItem
>();
676 Ok(unsafe { Vec::from_raw_parts(data, tail_size, capacity) }
)
680 /// Writer for a file object in a directory.
681 pub struct FileImpl
<'a
> {
682 output
: &'a
mut dyn SeqWrite
,
684 /// This file's `GoodbyeItem`. FIXME: We currently don't touch this, can we just push it
685 /// directly instead of on Drop of FileImpl?
686 goodbye_item
: GoodbyeItem
,
688 /// While writing data to this file, this is how much space we still have left, this must reach
692 /// The directory containing this file. This is where we propagate the `IncompleteFile` error
693 /// to, and where we insert our `GoodbyeItem`.
694 parent
: &'a
mut EncoderState
,
697 impl<'a
> Drop
for FileImpl
<'a
> {
699 if self.remaining_size
!= 0 {
700 self.parent
.add_error(EncodeError
::IncompleteFile
);
703 self.parent
.items
.push(self.goodbye_item
.clone());
707 impl<'a
> FileImpl
<'a
> {
708 /// Get the file offset to be able to reference it with `add_hardlink`.
709 pub fn file_offset(&self) -> LinkOffset
{
710 LinkOffset(self.goodbye_item
.offset
)
713 fn check_remaining(&self, size
: usize) -> io
::Result
<()> {
714 if size
as u64 > self.remaining_size
{
715 io_bail
!("attempted to write more than previously allocated");
721 /// Poll write interface to more easily connect to tokio/futures.
722 #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
724 self: Pin
<&mut Self>,
727 ) -> Poll
<io
::Result
<usize>> {
728 let this
= self.get_mut();
729 this
.check_remaining(data
.len())?
;
730 let output
= unsafe { Pin::new_unchecked(&mut *this.output) }
;
731 match output
.poll_seq_write(cx
, data
) {
732 Poll
::Ready(Ok(put
)) => {
733 this
.remaining_size
-= put
as u64;
740 /// Poll flush interface to more easily connect to tokio/futures.
741 #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
742 pub fn poll_flush(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
744 self.map_unchecked_mut(|this
| &mut this
.output
)
749 /// Poll close/shutdown interface to more easily connect to tokio/futures.
750 #[cfg(any(feature = "tokio-io", feature = "futures-io"))]
751 pub fn poll_close(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
753 self.map_unchecked_mut(|this
| &mut this
.output
)
758 /// Write file data for the current file entry in a pxar archive.
760 /// This forwards to the output's `SeqWrite::poll_seq_write` and may write fewer bytes than
761 /// requested. Check the return value for how many. There's also a `write_all` method available
763 pub async
fn write(&mut self, data
: &[u8]) -> io
::Result
<usize> {
764 self.check_remaining(data
.len())?
;
765 let put
= seq_write(&mut self.output
, data
).await?
;
766 self.remaining_size
-= put
as u64;
770 /// Completely write file data for the current file entry in a pxar archive.
771 pub async
fn write_all(&mut self, data
: &[u8]) -> io
::Result
<()> {
772 self.check_remaining(data
.len())?
;
773 seq_write_all(&mut self.output
, data
).await?
;
774 self.remaining_size
-= data
.len() as u64;
779 #[cfg(feature = "tokio-io")]
780 impl<'a
> tokio
::io
::AsyncWrite
for FileImpl
<'a
> {
781 fn poll_write(self: Pin
<&mut Self>, cx
: &mut Context
, buf
: &[u8]) -> Poll
<io
::Result
<usize>> {
782 FileImpl
::poll_write(self, cx
, buf
)
785 fn poll_flush(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
786 FileImpl
::poll_flush(self, cx
)
789 fn poll_shutdown(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
790 FileImpl
::poll_close(self, cx
)
794 #[cfg(feature = "futures-io")]
795 impl<'a
> futures
::io
::AsyncWrite
for FileImpl
<'a
> {
796 fn poll_write(self: Pin
<&mut Self>, cx
: &mut Context
, buf
: &[u8]) -> Poll
<io
::Result
<usize>> {
797 FileImpl
::poll_write(self, cx
, buf
)
800 fn poll_flush(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
801 FileImpl
::poll_flush(self, cx
)
804 fn poll_close(self: Pin
<&mut Self>, cx
: &mut Context
) -> Poll
<io
::Result
<()>> {
805 FileImpl
::poll_close(self, cx
)