]> git.proxmox.com Git - pxar.git/blob - src/format/mod.rs
add format description to format module
[pxar.git] / src / format / mod.rs
1 //! *pxar* binary format definition
2 //!
3 //! Please note the all values are stored in little endian ordering.
4 //!
5 //! The Archive contains a list of items. Each item starts with a `Header`, followed by the
6 //! item data.
7 //!
8 //! An archive contains items in the following order:
9 //! * `ENTRY` -- containing general stat() data and related bits
10 //! * `XATTR` -- one extended attribute
11 //! * ... -- more of these when there are multiple defined
12 //! * `ACL_USER` -- one `USER ACL` entry
13 //! * ... -- more of these when there are multiple defined
14 //! * `ACL_GROUP` -- one `GROUP ACL` entry
15 //! * ... -- more of these when there are multiple defined
16 //! * `ACL_GROUP_OBJ` -- The `ACL_GROUP_OBJ`
17 //! * `ACL_DEFAULT` -- The various default ACL fields if there's one defined
18 //! * `ACL_DEFAULT_USER` -- one USER ACL entry
19 //! * ... -- more of these when multiple are defined
20 //! * `ACL_DEFAULT_GROUP` -- one GROUP ACL entry
21 //! * ... -- more of these when multiple are defined
22 //! * `FCAPS` -- file capability in Linux disk format
23 //! * `QUOTA_PROJECT_ID` -- the ext4/xfs quota project ID
24 //! * `PAYLOAD` -- file contents, if it is one
25 //! * `SYMLINK` -- symlink target, if it is one
26 //! * `DEVICE` -- device major/minor, if it is a block/char device
27 //!
28 //! If we are serializing a directory, then this is followed by:
29 //!
30 //! * `FILENAME` -- name of the first directory entry (strictly ordered!)
31 //! * `<archive>` -- serialization of the first directory entry's metadata and contents,
32 //! following the exact same archive format
33 //! * `FILENAME` -- name of the second directory entry (strictly ordered!)
34 //! * `<archive>` -- serialization of the second directory entry
35 //! * ...
36 //! * `GOODBYE` -- lookup table at the end of a list of directory entries
37
38 use std::cmp::Ordering;
39 use std::ffi::{CStr, OsStr};
40 use std::fmt;
41 use std::fmt::Display;
42 use std::io;
43 use std::mem::size_of;
44 use std::os::unix::ffi::OsStrExt;
45 use std::path::Path;
46
47 use endian_trait::Endian;
48 use siphasher::sip::SipHasher24;
49
50 pub mod acl;
51
52 // generated with:
53 // $ echo -n 'PROXMOX ARCHIVE FORMAT' | sha1sum | sed -re 's/^(.{16})(.{16}).*$/0x\1, 0x\2/'
54 pub const PXAR_HASH_KEY_1: u64 = 0x83ac3f1cfbb450db;
55 pub const PXAR_HASH_KEY_2: u64 = 0xaa4f1b6879369fbd;
56
57 /// While these constants correspond to `libc::S_` constants, we need these to be fixed for the
58 /// format itself, so we redefine them here.
59 ///
60 /// Additionally this gets rid of a bunch of casts between u32 and u64.
61 ///
62 /// You can usually find the values for these in `/usr/include/linux/stat.h`.
63 #[rustfmt::skip]
64 pub mod mode {
65 pub const IFMT : u64 = 0o0170000;
66
67 pub const IFSOCK : u64 = 0o0140000;
68 pub const IFLNK : u64 = 0o0120000;
69 pub const IFREG : u64 = 0o0100000;
70 pub const IFBLK : u64 = 0o0060000;
71 pub const IFDIR : u64 = 0o0040000;
72 pub const IFCHR : u64 = 0o0020000;
73 pub const IFIFO : u64 = 0o0010000;
74
75 pub const ISUID : u64 = 0o0004000;
76 pub const ISGID : u64 = 0o0002000;
77 pub const ISVTX : u64 = 0o0001000;
78 }
79
80 pub const PXAR_ENTRY: u64 = 0x11da850a1c1cceff;
81 pub const PXAR_FILENAME: u64 = 0x16701121063917b3;
82 pub const PXAR_SYMLINK: u64 = 0x27f971e7dbf5dc5f;
83 pub const PXAR_DEVICE: u64 = 0x9fc9e906586d5ce9;
84 pub const PXAR_XATTR: u64 = 0x0dab0229b57dcd03;
85 pub const PXAR_ACL_USER: u64 = 0x2ce8540a457d55b8;
86 pub const PXAR_ACL_GROUP: u64 = 0x136e3eceb04c03ab;
87 pub const PXAR_ACL_GROUP_OBJ: u64 = 0x10868031e9582876;
88 pub const PXAR_ACL_DEFAULT: u64 = 0xbbbb13415a6896f5;
89 pub const PXAR_ACL_DEFAULT_USER: u64 = 0xc89357b40532cd1f;
90 pub const PXAR_ACL_DEFAULT_GROUP: u64 = 0xf90a8a5816038ffe;
91 pub const PXAR_FCAPS: u64 = 0x2da9dd9db5f7fb67;
92 pub const PXAR_QUOTA_PROJID: u64 = 0xe07540e82f7d1cbb;
93 /// Marks item as hardlink
94 pub const PXAR_HARDLINK: u64 = 0x51269c8422bd7275;
95 /// Marks the beginnig of the payload (actual content) of regular files
96 pub const PXAR_PAYLOAD: u64 = 0x28147a1b0b7c1a25;
97 /// Marks item as entry of goodbye table
98 pub const PXAR_GOODBYE: u64 = 0x2fec4fa642d5731d;
99 /// The end marker used in the GOODBYE object
100 pub const PXAR_GOODBYE_TAIL_MARKER: u64 = 0xef5eed5b753e1555;
101
102 #[derive(Debug, Endian)]
103 #[repr(C)]
104 pub struct Header {
105 /// The item type (see `PXAR_` constants).
106 pub htype: u64,
107 /// The size of the item, including the size of `Header`.
108 full_size: u64,
109 }
110
111 impl Header {
112 #[inline]
113 pub fn with_full_size(htype: u64, full_size: u64) -> Self {
114 Self { htype, full_size }
115 }
116
117 #[inline]
118 pub fn with_content_size(htype: u64, content_size: u64) -> Self {
119 Self::with_full_size(htype, content_size + size_of::<Header>() as u64)
120 }
121
122 #[inline]
123 pub fn full_size(&self) -> u64 {
124 self.full_size
125 }
126
127 #[inline]
128 pub fn content_size(&self) -> u64 {
129 self.full_size() - (size_of::<Self>() as u64)
130 }
131
132 #[inline]
133 pub fn max_content_size(&self) -> u64 {
134 match self.htype {
135 // + null-termination
136 PXAR_FILENAME => crate::util::MAX_FILENAME_LEN + 1,
137 // + null-termination
138 PXAR_SYMLINK => crate::util::MAX_PATH_LEN + 1,
139 // + null-termination + offset
140 PXAR_HARDLINK => crate::util::MAX_PATH_LEN + 1 + (size_of::<u64>() as u64),
141 PXAR_DEVICE => size_of::<Device>() as u64,
142 PXAR_XATTR | PXAR_FCAPS => crate::util::MAX_XATTR_LEN,
143 PXAR_ACL_USER | PXAR_ACL_DEFAULT_USER => size_of::<acl::User>() as u64,
144 PXAR_ACL_GROUP | PXAR_ACL_DEFAULT_GROUP => size_of::<acl::Group>() as u64,
145 PXAR_ACL_DEFAULT => size_of::<acl::Default>() as u64,
146 PXAR_ACL_GROUP_OBJ => size_of::<acl::GroupObject>() as u64,
147 PXAR_QUOTA_PROJID => size_of::<QuotaProjectId>() as u64,
148 PXAR_ENTRY => size_of::<Entry>() as u64,
149 PXAR_PAYLOAD | PXAR_GOODBYE => std::u64::MAX - (size_of::<Self>() as u64),
150 _ => std::u64::MAX - (size_of::<Self>() as u64),
151 }
152 }
153
154 #[inline]
155 pub fn check_header_size(&self) -> io::Result<()> {
156 if self.full_size() < size_of::<Header>() as u64 {
157 io_bail!("invalid header {} - too small ({})", self, self.full_size());
158 }
159
160 if self.content_size() > self.max_content_size() {
161 io_bail!(
162 "invalid content size ({} > {}) of entry with {}",
163 self.content_size(),
164 self.max_content_size(),
165 self
166 );
167 }
168 Ok(())
169 }
170 }
171
172 impl Display for Header {
173 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
174 let readable = match self.htype {
175 PXAR_FILENAME => "FILENAME",
176 PXAR_SYMLINK => "SYMLINK",
177 PXAR_HARDLINK => "HARDLINK",
178 PXAR_DEVICE => "DEVICE",
179 PXAR_XATTR => "XATTR",
180 PXAR_FCAPS => "FCAPS",
181 PXAR_ACL_USER => "ACL_USER",
182 PXAR_ACL_DEFAULT_USER => "ACL_DEFAULT_USER",
183 PXAR_ACL_GROUP => "ACL_GROUP",
184 PXAR_ACL_DEFAULT_GROUP => "ACL_DEFAULT_GROUP",
185 PXAR_ACL_DEFAULT => "ACL_DEFAULT",
186 PXAR_ACL_GROUP_OBJ => "ACL_GROUP_OBJ",
187 PXAR_QUOTA_PROJID => "QUOTA_PROJID",
188 PXAR_ENTRY => "ENTRY",
189 PXAR_PAYLOAD => "PAYLOAD",
190 PXAR_GOODBYE => "GOODBYE",
191 _ => "UNKNOWN",
192 };
193 write!(f, "{} header ({:x})", readable, self.htype)
194 }
195 }
196
197 #[derive(Clone, Debug, Default, Endian)]
198 #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
199 #[repr(C)]
200 pub struct Entry {
201 pub mode: u64,
202 pub flags: u64,
203 pub uid: u32,
204 pub gid: u32,
205 pub mtime: u64,
206 }
207
208 /// Builder pattern methods.
209 impl Entry {
210 pub const fn mode(self, mode: u64) -> Self {
211 Self { mode, ..self }
212 }
213
214 pub const fn flags(self, flags: u64) -> Self {
215 Self { flags, ..self }
216 }
217
218 pub const fn uid(self, uid: u32) -> Self {
219 Self { uid, ..self }
220 }
221
222 pub const fn gid(self, gid: u32) -> Self {
223 Self { gid, ..self }
224 }
225
226 pub const fn mtime(self, mtime: u64) -> Self {
227 Self { mtime, ..self }
228 }
229
230 pub const fn set_dir(self) -> Self {
231 let mode = self.mode;
232 self.mode((mode & !mode::IFMT) | mode::IFDIR)
233 }
234
235 pub const fn set_regular_file(self) -> Self {
236 let mode = self.mode;
237 self.mode((mode & !mode::IFMT) | mode::IFREG)
238 }
239
240 pub const fn set_symlink(self) -> Self {
241 let mode = self.mode;
242 self.mode((mode & !mode::IFMT) | mode::IFLNK)
243 }
244
245 pub const fn set_blockdev(self) -> Self {
246 let mode = self.mode;
247 self.mode((mode & !mode::IFMT) | mode::IFBLK)
248 }
249
250 pub const fn set_chardev(self) -> Self {
251 let mode = self.mode;
252 self.mode((mode & !mode::IFMT) | mode::IFCHR)
253 }
254
255 pub const fn set_fifo(self) -> Self {
256 let mode = self.mode;
257 self.mode((mode & !mode::IFMT) | mode::IFIFO)
258 }
259 }
260
261 /// Convenience accessor methods.
262 impl Entry {
263 /// Get the mtime as duration since the epoch.
264 pub fn mtime_as_duration(&self) -> std::time::Duration {
265 std::time::Duration::from_nanos(self.mtime)
266 }
267
268 /// Get the file type portion of the mode bitfield.
269 pub fn get_file_bits(&self) -> u64 {
270 self.mode & mode::IFMT
271 }
272
273 /// Get the permission portion of the mode bitfield.
274 pub fn get_permission_bits(&self) -> u64 {
275 self.mode & !mode::IFMT
276 }
277 }
278
279 /// Convenience methods.
280 impl Entry {
281 /// Get the file type (`mode & mode::IFMT`).
282 pub fn file_type(&self) -> u64 {
283 self.mode & mode::IFMT
284 }
285
286 /// Get the file mode bits (`mode & !mode::IFMT`).
287 pub fn file_mode(&self) -> u64 {
288 self.mode & !mode::IFMT
289 }
290
291 /// Check whether this is a directory.
292 pub fn is_dir(&self) -> bool {
293 (self.mode & mode::IFMT) == mode::IFDIR
294 }
295
296 /// Check whether this is a symbolic link.
297 pub fn is_symlink(&self) -> bool {
298 (self.mode & mode::IFMT) == mode::IFLNK
299 }
300
301 /// Check whether this is a device node.
302 pub fn is_device(&self) -> bool {
303 let fmt = self.mode & mode::IFMT;
304 fmt == mode::IFCHR || fmt == mode::IFBLK
305 }
306
307 /// Check whether this is a block device node.
308 pub fn is_blockdev(&self) -> bool {
309 let fmt = self.mode & mode::IFMT;
310 fmt == mode::IFBLK
311 }
312
313 /// Check whether this is a character device node.
314 pub fn is_chardev(&self) -> bool {
315 let fmt = self.mode & mode::IFMT;
316 fmt == mode::IFCHR
317 }
318
319 /// Check whether this is a regular file.
320 pub fn is_regular_file(&self) -> bool {
321 (self.mode & mode::IFMT) == mode::IFREG
322 }
323
324 /// Check whether this is a named pipe (FIFO).
325 pub fn is_fifo(&self) -> bool {
326 (self.mode & mode::IFMT) == mode::IFIFO
327 }
328
329 /// Check whether this is a named socket.
330 pub fn is_socket(&self) -> bool {
331 (self.mode & mode::IFMT) == mode::IFSOCK
332 }
333 }
334
335 impl From<&std::fs::Metadata> for Entry {
336 fn from(meta: &std::fs::Metadata) -> Entry {
337 #[cfg(unix)]
338 use std::os::unix::fs::MetadataExt;
339
340 let this = Entry::default();
341
342 #[cfg(unix)]
343 let this = this
344 .uid(meta.uid())
345 .gid(meta.gid())
346 .mode(meta.mode() as u64);
347
348 let this = match meta.modified() {
349 Ok(mtime) => this.mtime(
350 mtime
351 .duration_since(std::time::SystemTime::UNIX_EPOCH)
352 .map(|dur| dur.as_nanos() as u64)
353 .unwrap_or(0u64),
354 ),
355 Err(_) => this,
356 };
357
358 let file_type = meta.file_type();
359 let mode = this.mode;
360 if file_type.is_dir() {
361 this.mode(mode | mode::IFDIR)
362 } else if file_type.is_symlink() {
363 this.mode(mode | mode::IFLNK)
364 } else {
365 this.mode(mode | mode::IFREG)
366 }
367 }
368 }
369
370 #[derive(Clone, Debug)]
371 pub struct Filename {
372 pub name: Vec<u8>,
373 }
374
375 #[derive(Clone, Debug)]
376 pub struct Symlink {
377 pub data: Vec<u8>,
378 }
379
380 impl Symlink {
381 pub fn as_os_str(&self) -> &OsStr {
382 self.as_ref()
383 }
384 }
385
386 impl AsRef<[u8]> for Symlink {
387 fn as_ref(&self) -> &[u8] {
388 &self.data
389 }
390 }
391
392 impl AsRef<OsStr> for Symlink {
393 fn as_ref(&self) -> &OsStr {
394 OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
395 }
396 }
397
398 #[derive(Clone, Debug)]
399 pub struct Hardlink {
400 pub offset: u64,
401 pub data: Vec<u8>,
402 }
403
404 impl Hardlink {
405 pub fn as_os_str(&self) -> &OsStr {
406 self.as_ref()
407 }
408 }
409
410 impl AsRef<[u8]> for Hardlink {
411 fn as_ref(&self) -> &[u8] {
412 &self.data
413 }
414 }
415
416 impl AsRef<OsStr> for Hardlink {
417 fn as_ref(&self) -> &OsStr {
418 OsStr::from_bytes(&self.data[..self.data.len().max(1) - 1])
419 }
420 }
421
422 #[derive(Clone, Debug, Eq)]
423 #[repr(C)]
424 pub struct XAttr {
425 pub(crate) data: Vec<u8>,
426 pub(crate) name_len: usize,
427 }
428
429 impl XAttr {
430 pub fn new<N: AsRef<[u8]>, V: AsRef<[u8]>>(name: N, value: V) -> Self {
431 let name = name.as_ref();
432 let value = value.as_ref();
433 let mut data = Vec::with_capacity(name.len() + value.len() + 1);
434 data.extend(name);
435 data.push(0);
436 data.extend(value);
437 Self {
438 data,
439 name_len: name.len(),
440 }
441 }
442
443 pub fn name(&self) -> &CStr {
444 unsafe { CStr::from_bytes_with_nul_unchecked(&self.data[..self.name_len + 1]) }
445 }
446
447 pub fn value(&self) -> &[u8] {
448 &self.data[(self.name_len + 1)..]
449 }
450 }
451
452 impl Ord for XAttr {
453 fn cmp(&self, other: &XAttr) -> Ordering {
454 self.name().cmp(&other.name())
455 }
456 }
457
458 impl PartialOrd for XAttr {
459 fn partial_cmp(&self, other: &XAttr) -> Option<Ordering> {
460 Some(self.cmp(other))
461 }
462 }
463
464 impl PartialEq for XAttr {
465 fn eq(&self, other: &XAttr) -> bool {
466 self.name() == other.name()
467 }
468 }
469
470 #[derive(Clone, Debug, Endian, Eq, PartialEq)]
471 #[repr(C)]
472 pub struct Device {
473 pub major: u64,
474 pub minor: u64,
475 }
476
477 #[cfg(target_os = "linux")]
478 impl Device {
479 /// Get a `dev_t` value for this device.
480 #[rustfmt::skip]
481 pub fn to_dev_t(&self) -> u64 {
482 // see bits/sysmacros.h
483 ((self.major & 0x0000_0fff) << 8) |
484 ((self.major & 0xffff_f000) << 32) |
485 (self.minor & 0x0000_00ff) |
486 ((self.minor & 0xffff_ff00) << 12)
487 }
488
489 /// Get a `Device` from a `dev_t` value.
490 #[rustfmt::skip]
491 pub fn from_dev_t(dev: u64) -> Self {
492 // see to_dev_t
493 Self {
494 major: (dev >> 8) & 0x0000_0fff |
495 (dev >> 32) & 0xffff_f000,
496 minor: dev & 0x0000_00ff |
497 (dev >> 12) & 0xffff_ff00,
498 }
499 }
500 }
501
502 #[cfg(all(test, target_os = "linux"))]
503 #[test]
504 fn test_linux_devices() {
505 let c_dev = unsafe { ::libc::makedev(0xabcd_1234, 0xdcba_5678) };
506 let dev = Device::from_dev_t(c_dev);
507 assert_eq!(dev.to_dev_t(), c_dev);
508 }
509
510 #[derive(Clone, Debug)]
511 #[cfg_attr(feature = "test-harness", derive(Eq, PartialEq))]
512 #[repr(C)]
513 pub struct FCaps {
514 pub data: Vec<u8>,
515 }
516
517 #[derive(Clone, Copy, Debug, Endian, Eq, PartialEq)]
518 #[repr(C)]
519 pub struct QuotaProjectId {
520 pub projid: u64,
521 }
522
523 #[derive(Clone, Debug, Endian)]
524 #[repr(C)]
525 pub struct GoodbyeItem {
526 /// SipHash24 of the directory item name. The last GOODBYE item uses the special hash value
527 /// `PXAR_GOODBYE_TAIL_MARKER`.
528 pub hash: u64,
529
530 /// The offset from the start of the GOODBYE object to the start of the matching directory item
531 /// (point to a FILENAME). The last GOODBYE item points to the start of the matching ENTRY
532 /// object.
533 pub offset: u64,
534
535 /// The overall size of the directory item. This includes the FILENAME header. In other words,
536 /// `goodbye_start - offset + size` points to the end of the directory.
537 ///
538 /// The last GOODBYE item repeats the size of the GOODBYE item.
539 pub size: u64,
540 }
541
542 impl GoodbyeItem {
543 pub fn new(name: &[u8], offset: u64, size: u64) -> Self {
544 let hash = hash_filename(name);
545 Self { hash, offset, size }
546 }
547 }
548
549 pub fn hash_filename(name: &[u8]) -> u64 {
550 use std::hash::Hasher;
551
552 let mut hasher = SipHasher24::new_with_keys(PXAR_HASH_KEY_1, PXAR_HASH_KEY_2);
553 hasher.write(name);
554 hasher.finish()
555 }
556
557 pub fn path_is_legal_component(path: &Path) -> bool {
558 let mut components = path.components();
559 match components.next() {
560 Some(std::path::Component::Normal(_)) => (),
561 _ => return false,
562 }
563 components.next().is_none()
564 }
565
566 pub fn check_file_name(path: &Path) -> io::Result<()> {
567 if !path_is_legal_component(path) {
568 io_bail!("invalid file name in archive: {:?}", path);
569 } else {
570 Ok(())
571 }
572 }