-use failure::*;
+use std::convert::TryFrom;
use std::ffi::{CStr, CString, OsStr};
-use std::os::unix::ffi::OsStrExt;
+use std::fmt;
use std::io::{Read, Write, Seek, SeekFrom};
-use std::convert::TryFrom;
+use std::os::unix::ffi::OsStrExt;
-use chrono::offset::{TimeZone, Local};
+use anyhow::{bail, format_err, Error};
+use pathpatterns::{MatchList, MatchType};
use proxmox::tools::io::ReadExt;
-use crate::pxar::catalog::{BackupCatalogWriter, CatalogEntryType};
use crate::backup::file_formats::PROXMOX_CATALOG_FILE_MAGIC_1_0;
+use crate::pxar::catalog::BackupCatalogWriter;
+
+#[repr(u8)]
+#[derive(Copy,Clone,PartialEq)]
+pub(crate) enum CatalogEntryType {
+ Directory = b'd',
+ File = b'f',
+ Symlink = b'l',
+ Hardlink = b'h',
+ BlockDevice = b'b',
+ CharDevice = b'c',
+ Fifo = b'p', // Fifo,Pipe
+ Socket = b's',
+}
-struct DirEntry {
- name: Vec<u8>,
- attr: DirEntryAttribute,
+impl TryFrom<u8> for CatalogEntryType {
+ type Error=Error;
+
+ fn try_from(value: u8) -> Result<Self, Error> {
+ Ok(match value {
+ b'd' => CatalogEntryType::Directory,
+ b'f' => CatalogEntryType::File,
+ b'l' => CatalogEntryType::Symlink,
+ b'h' => CatalogEntryType::Hardlink,
+ b'b' => CatalogEntryType::BlockDevice,
+ b'c' => CatalogEntryType::CharDevice,
+ b'p' => CatalogEntryType::Fifo,
+ b's' => CatalogEntryType::Socket,
+ _ => bail!("invalid CatalogEntryType value '{}'", char::from(value)),
+ })
+ }
+}
+
+impl From<&DirEntryAttribute> for CatalogEntryType {
+ fn from(value: &DirEntryAttribute) -> Self {
+ match value {
+ DirEntryAttribute::Directory { .. } => CatalogEntryType::Directory,
+ DirEntryAttribute::File { .. } => CatalogEntryType::File,
+ DirEntryAttribute::Symlink => CatalogEntryType::Symlink,
+ DirEntryAttribute::Hardlink => CatalogEntryType::Hardlink,
+ DirEntryAttribute::BlockDevice => CatalogEntryType::BlockDevice,
+ DirEntryAttribute::CharDevice => CatalogEntryType::CharDevice,
+ DirEntryAttribute::Fifo => CatalogEntryType::Fifo,
+ DirEntryAttribute::Socket => CatalogEntryType::Socket,
+ }
+ }
}
-enum DirEntryAttribute {
+impl fmt::Display for CatalogEntryType {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "{}", char::from(*self as u8))
+ }
+}
+
+/// Represents a named directory entry
+///
+/// The ``attr`` property contain the exact type with type specific
+/// attributes.
+#[derive(Clone, PartialEq)]
+pub struct DirEntry {
+ pub name: Vec<u8>,
+ pub attr: DirEntryAttribute,
+}
+
+/// Used to specific additional attributes inside DirEntry
+#[derive(Clone, Debug, PartialEq)]
+pub enum DirEntryAttribute {
Directory { start: u64 },
- File { size: u64, mtime: u64 },
+ File { size: u64, mtime: i64 },
Symlink,
Hardlink,
BlockDevice,
Socket,
}
+impl DirEntry {
+
+ fn new(etype: CatalogEntryType, name: Vec<u8>, start: u64, size: u64, mtime: i64) -> Self {
+ match etype {
+ CatalogEntryType::Directory => {
+ DirEntry { name, attr: DirEntryAttribute::Directory { start } }
+ }
+ CatalogEntryType::File => {
+ DirEntry { name, attr: DirEntryAttribute::File { size, mtime } }
+ }
+ CatalogEntryType::Symlink => {
+ DirEntry { name, attr: DirEntryAttribute::Symlink }
+ }
+ CatalogEntryType::Hardlink => {
+ DirEntry { name, attr: DirEntryAttribute::Hardlink }
+ }
+ CatalogEntryType::BlockDevice => {
+ DirEntry { name, attr: DirEntryAttribute::BlockDevice }
+ }
+ CatalogEntryType::CharDevice => {
+ DirEntry { name, attr: DirEntryAttribute::CharDevice }
+ }
+ CatalogEntryType::Fifo => {
+ DirEntry { name, attr: DirEntryAttribute::Fifo }
+ }
+ CatalogEntryType::Socket => {
+ DirEntry { name, attr: DirEntryAttribute::Socket }
+ }
+ }
+ }
+
+ /// Get file mode bits for this entry to be used with the `MatchList` api.
+ pub fn get_file_mode(&self) -> Option<u32> {
+ Some(
+ match self.attr {
+ DirEntryAttribute::Directory { .. } => pxar::mode::IFDIR,
+ DirEntryAttribute::File { .. } => pxar::mode::IFREG,
+ DirEntryAttribute::Symlink => pxar::mode::IFLNK,
+ DirEntryAttribute::Hardlink => return None,
+ DirEntryAttribute::BlockDevice => pxar::mode::IFBLK,
+ DirEntryAttribute::CharDevice => pxar::mode::IFCHR,
+ DirEntryAttribute::Fifo => pxar::mode::IFIFO,
+ DirEntryAttribute::Socket => pxar::mode::IFSOCK,
+ }
+ as u32
+ )
+ }
+
+ /// Check if DirEntry is a directory
+ pub fn is_directory(&self) -> bool {
+ matches!(self.attr, DirEntryAttribute::Directory { .. })
+ }
+
+ /// Check if DirEntry is a symlink
+ pub fn is_symlink(&self) -> bool {
+ matches!(self.attr, DirEntryAttribute::Symlink { .. })
+ }
+}
+
struct DirInfo {
name: CString,
entries: Vec<DirEntry>,
catalog_encode_u64(writer, name.len() as u64)?;
writer.write_all(name)?;
catalog_encode_u64(writer, *size)?;
- catalog_encode_u64(writer, *mtime)?;
+ catalog_encode_i64(writer, *mtime)?;
}
DirEntry { name, attr: DirEntryAttribute::Symlink } => {
writer.write_all(&[CatalogEntryType::Symlink as u8])?;
Ok((self.name, data))
}
- fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, u64) -> Result<(), Error>>(
+ fn parse<C: FnMut(CatalogEntryType, &[u8], u64, u64, i64) -> Result<bool, Error>>(
data: &[u8],
mut callback: C,
) -> Result<(), Error> {
let name = &mut name_buf[0..name_len];
cursor.read_exact(name)?;
- match etype {
+ let cont = match etype {
CatalogEntryType::Directory => {
let offset = catalog_decode_u64(&mut cursor)?;
- callback(etype, name, offset, 0, 0)?;
+ callback(etype, name, offset, 0, 0)?
}
CatalogEntryType::File => {
let size = catalog_decode_u64(&mut cursor)?;
- let mtime = catalog_decode_u64(&mut cursor)?;
- callback(etype, name, 0, size, mtime)?;
+ let mtime = catalog_decode_i64(&mut cursor)?;
+ callback(etype, name, 0, size, mtime)?
}
_ => {
- callback(etype, name, 0, 0, 0)?;
+ callback(etype, name, 0, 0, 0)?
}
+ };
+ if !cont {
+ return Ok(());
}
}
}
}
+/// Write small catalog files
+///
+/// A Catalogs simply contains list of files and directories
+/// (directory tree). They are use to find content without having to
+/// search the real archive (which may be large). For files, they
+/// include the last modification time and file size.
pub struct CatalogWriter<W> {
writer: W,
dirstack: Vec<DirInfo>,
impl <W: Write> CatalogWriter<W> {
+ /// Create a new CatalogWriter instance
pub fn new(writer: W) -> Result<Self, Error> {
let mut me = Self { writer, dirstack: vec![ DirInfo::new_rootdir() ], pos: 0 };
me.write_all(&PROXMOX_CATALOG_FILE_MAGIC_1_0)?;
Ok(())
}
+ /// Finish writing, flush all data
+ ///
+ /// This need to be called before drop.
pub fn finish(&mut self) -> Result<(), Error> {
if self.dirstack.len() != 1 {
bail!("unable to finish catalog at level {}", self.dirstack.len());
Ok(())
}
- fn add_file(&mut self, name: &CStr, size: u64, mtime: u64) -> Result<(), Error> {
+ fn add_file(&mut self, name: &CStr, size: u64, mtime: i64) -> Result<(), Error> {
let dir = self.dirstack.last_mut().ok_or_else(|| format_err!("outside root"))?;
let name = name.to_bytes().to_vec();
dir.entries.push(DirEntry { name, attr: DirEntryAttribute::File { size, mtime } });
}
}
-// fixme: move to somehere else?
-/// Implement Write to tokio mpsc channel Sender
-pub struct SenderWriter(tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>);
-
-impl SenderWriter {
- pub fn new(sender: tokio::sync::mpsc::Sender<Result<Vec<u8>, Error>>) -> Self {
- Self(sender)
- }
-}
-
-impl Write for SenderWriter {
- fn write(&mut self, buf: &[u8]) -> Result<usize, std::io::Error> {
- futures::executor::block_on(async move {
- self.0.send(Ok(buf.to_vec())).await
- .map_err(|err| std::io::Error::new(std::io::ErrorKind::Other, err.to_string()))?;
- Ok(buf.len())
- })
- }
-
- fn flush(&mut self) -> Result<(), std::io::Error> {
- Ok(())
- }
-}
-
+/// Read Catalog files
pub struct CatalogReader<R> {
reader: R,
}
impl <R: Read + Seek> CatalogReader<R> {
+ /// Create a new CatalogReader instance
pub fn new(reader: R) -> Self {
Self { reader }
}
+ /// Print whole catalog to stdout
pub fn dump(&mut self) -> Result<(), Error> {
- self.reader.seek(SeekFrom::End(-8))?;
+ let root = self.root()?;
+ match root {
+ DirEntry { attr: DirEntryAttribute::Directory { start }, .. }=> {
+ self.dump_dir(std::path::Path::new("./"), start)
+ }
+ _ => unreachable!(),
+ }
+ }
+ /// Get the root DirEntry
+ pub fn root(&mut self) -> Result<DirEntry, Error> {
+ // Root dir is special
+ self.reader.seek(SeekFrom::Start(0))?;
+ let mut magic = [ 0u8; 8];
+ self.reader.read_exact(&mut magic)?;
+ if magic != PROXMOX_CATALOG_FILE_MAGIC_1_0 {
+ bail!("got unexpected magic number for catalog");
+ }
+ self.reader.seek(SeekFrom::End(-8))?;
let start = unsafe { self.reader.read_le_value::<u64>()? };
+ Ok(DirEntry { name: b"".to_vec(), attr: DirEntryAttribute::Directory { start } })
+ }
+
+ /// Read all directory entries
+ pub fn read_dir(
+ &mut self,
+ parent: &DirEntry,
+ ) -> Result<Vec<DirEntry>, Error> {
+
+ let start = match parent.attr {
+ DirEntryAttribute::Directory { start } => start,
+ _ => bail!("parent is not a directory - internal error"),
+ };
+
+ let data = self.read_raw_dirinfo_block(start)?;
+
+ let mut entry_list = Vec::new();
+
+ DirInfo::parse(&data, |etype, name, offset, size, mtime| {
+ let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
+ entry_list.push(entry);
+ Ok(true)
+ })?;
- self.dump_dir(std::path::Path::new("./"), start)
+ Ok(entry_list)
}
- pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
+ /// Lookup a DirEntry from an absolute path
+ pub fn lookup_recursive(
+ &mut self,
+ path: &[u8],
+ ) -> Result<DirEntry, Error> {
+ let mut current = self.root()?;
+ if path == b"/" {
+ return Ok(current);
+ }
- self.reader.seek(SeekFrom::Start(start))?;
+ let components = if !path.is_empty() && path[0] == b'/' {
+ &path[1..]
+ } else {
+ path
+ }.split(|c| *c == b'/');
+
+ for comp in components {
+ if let Some(entry) = self.lookup(¤t, comp)? {
+ current = entry;
+ } else {
+ bail!("path {:?} not found in catalog", String::from_utf8_lossy(&path));
+ }
+ }
+ Ok(current)
+ }
- let size = catalog_decode_u64(&mut self.reader)?;
+ /// Lockup a DirEntry inside a parent directory
+ pub fn lookup(
+ &mut self,
+ parent: &DirEntry,
+ filename: &[u8],
+ ) -> Result<Option<DirEntry>, Error> {
- if size < 1 { bail!("got small directory size {}", size) };
+ let start = match parent.attr {
+ DirEntryAttribute::Directory { start } => start,
+ _ => bail!("parent is not a directory - internal error"),
+ };
+ let data = self.read_raw_dirinfo_block(start)?;
+
+ let mut item = None;
+ DirInfo::parse(&data, |etype, name, offset, size, mtime| {
+ if name != filename {
+ return Ok(true);
+ }
+
+ let entry = DirEntry::new(etype, name.to_vec(), start - offset, size, mtime);
+ item = Some(entry);
+ Ok(false) // stop parsing
+ })?;
+
+ Ok(item)
+ }
+
+ /// Read the raw directory info block from current reader position.
+ fn read_raw_dirinfo_block(&mut self, start: u64) -> Result<Vec<u8>, Error> {
+ self.reader.seek(SeekFrom::Start(start))?;
+ let size = catalog_decode_u64(&mut self.reader)?;
+ if size < 1 { bail!("got small directory size {}", size) };
let data = self.reader.read_exact_allocated(size as usize)?;
+ Ok(data)
+ }
+
+ /// Print the content of a directory to stdout
+ pub fn dump_dir(&mut self, prefix: &std::path::Path, start: u64) -> Result<(), Error> {
+
+ let data = self.read_raw_dirinfo_block(start)?;
DirInfo::parse(&data, |etype, name, offset, size, mtime| {
self.dump_dir(&path, pos)?;
}
CatalogEntryType::File => {
- let dt = Local.timestamp(mtime as i64, 0);
+ let mut mtime_string = mtime.to_string();
+ if let Ok(s) = proxmox::tools::time::strftime_local("%FT%TZ", mtime as i64) {
+ mtime_string = s;
+ }
println!(
"{} {:?} {} {}",
etype,
path,
size,
- dt.to_rfc3339_opts(chrono::SecondsFormat::Secs, false),
+ mtime_string,
);
}
_ => {
}
}
- Ok(())
+ Ok(true)
})
}
+
+ /// Finds all entries matching the given match patterns and calls the
+ /// provided callback on them.
+ pub fn find(
+ &mut self,
+ parent: &DirEntry,
+ file_path: &mut Vec<u8>,
+ match_list: &impl MatchList, //&[MatchEntry],
+ callback: &mut dyn FnMut(&[u8]) -> Result<(), Error>,
+ ) -> Result<(), Error> {
+ let file_len = file_path.len();
+ for e in self.read_dir(parent)? {
+ let is_dir = e.is_directory();
+ file_path.truncate(file_len);
+ if !e.name.starts_with(b"/") {
+ file_path.reserve(e.name.len() + 1);
+ file_path.push(b'/');
+ }
+ file_path.extend(&e.name);
+ match match_list.matches(&file_path, e.get_file_mode()) {
+ Some(MatchType::Exclude) => continue,
+ Some(MatchType::Include) => callback(&file_path)?,
+ None => (),
+ }
+ if is_dir {
+ self.find(&e, file_path, match_list, callback)?;
+ }
+ }
+ file_path.truncate(file_len);
+
+ Ok(())
+ }
+}
+
+/// Serialize i64 as short, variable length byte sequence
+///
+/// Stores 7 bits per byte, Bit 8 indicates the end of the sequence (when not set).
+/// If the value is negative, we end with a zero byte (0x00).
+#[allow(clippy::neg_multiply)]
+pub fn catalog_encode_i64<W: Write>(writer: &mut W, v: i64) -> Result<(), Error> {
+ let mut enc = Vec::new();
+
+ let mut d = if v < 0 {
+ (-1 * (v + 1)) as u64 + 1 // also handles i64::MIN
+ } else {
+ v as u64
+ };
+
+ loop {
+ if d < 128 {
+ if v < 0 {
+ enc.push(128 | d as u8);
+ enc.push(0u8);
+ } else {
+ enc.push(d as u8);
+ }
+ break;
+ }
+ enc.push((128 | (d & 127)) as u8);
+ d >>= 7;
+ }
+ writer.write_all(&enc)?;
+
+ Ok(())
+}
+
+/// Deserialize i64 from variable length byte sequence
+///
+/// We currently read maximal 11 bytes, which give a maximum of 70 bits + sign.
+/// this method is compatible with catalog_encode_u64 iff the
+/// value encoded is <= 2^63 (values > 2^63 cannot be represented in an i64)
+#[allow(clippy::neg_multiply)]
+pub fn catalog_decode_i64<R: Read>(reader: &mut R) -> Result<i64, Error> {
+
+ let mut v: u64 = 0;
+ let mut buf = [0u8];
+
+ for i in 0..11 { // only allow 11 bytes (70 bits + sign marker)
+ if buf.is_empty() {
+ bail!("decode_i64 failed - unexpected EOB");
+ }
+ reader.read_exact(&mut buf)?;
+
+ let t = buf[0];
+
+ if t == 0 {
+ if v == 0 {
+ return Ok(0);
+ }
+ return Ok(((v - 1) as i64 * -1) - 1); // also handles i64::MIN
+ } else if t < 128 {
+ v |= (t as u64) << (i*7);
+ return Ok(v as i64);
+ } else {
+ v |= ((t & 127) as u64) << (i*7);
+ }
+ }
+
+ bail!("decode_i64 failed - missing end marker");
}
/// Serialize u64 as short, variable length byte sequence
///
/// Stores 7 bits per byte, Bit 8 indicates the end of the sequence (when not set).
-/// We limit values to a maximum of 2^63.
pub fn catalog_encode_u64<W: Write>(writer: &mut W, v: u64) -> Result<(), Error> {
let mut enc = Vec::new();
- if (v & (1<<63)) != 0 { bail!("catalog_encode_u64 failed - value >= 2^63"); }
let mut d = v;
loop {
if d < 128 {
break;
}
enc.push((128 | (d & 127)) as u8);
- d = d >> 7;
+ d >>= 7;
}
writer.write_all(&enc)?;
/// Deserialize u64 from variable length byte sequence
///
-/// We currently read maximal 9 bytes, which give a maximum of 63 bits.
+/// We currently read maximal 10 bytes, which give a maximum of 70 bits,
+/// but we currently only encode up to 64 bits
pub fn catalog_decode_u64<R: Read>(reader: &mut R) -> Result<u64, Error> {
let mut v: u64 = 0;
let mut buf = [0u8];
- for i in 0..9 { // only allow 9 bytes (63 bits)
+ for i in 0..10 { // only allow 10 bytes (70 bits)
if buf.is_empty() {
bail!("decode_u64 failed - unexpected EOB");
}
assert!(decoded == value);
}
+ test_encode_decode(u64::MIN);
+ test_encode_decode(126);
+ test_encode_decode((1<<12)-1);
+ test_encode_decode((1<<20)-1);
+ test_encode_decode((1<<50)-1);
+ test_encode_decode(u64::MAX);
+}
+
+#[test]
+fn test_catalog_i64_encoder() {
+
+ fn test_encode_decode(value: i64) {
+
+ let mut data = Vec::new();
+ catalog_encode_i64(&mut data, value).unwrap();
+
+ let slice = &mut &data[..];
+ let decoded = catalog_decode_i64(slice).unwrap();
+
+ assert!(decoded == value);
+ }
+
+ test_encode_decode(0);
+ test_encode_decode(-0);
+ test_encode_decode(126);
+ test_encode_decode(-126);
+ test_encode_decode((1<<12)-1);
+ test_encode_decode(-(1<<12)-1);
+ test_encode_decode((1<<20)-1);
+ test_encode_decode(-(1<<20)-1);
+ test_encode_decode(i64::MIN);
+ test_encode_decode(i64::MAX);
+}
+
+#[test]
+fn test_catalog_i64_compatibility() {
+
+ fn test_encode_decode(value: u64) {
+
+ let mut data = Vec::new();
+ catalog_encode_u64(&mut data, value).unwrap();
+
+ let slice = &mut &data[..];
+ let decoded = catalog_decode_i64(slice).unwrap() as u64;
+
+ assert!(decoded == value);
+ }
+
+ test_encode_decode(u64::MIN);
test_encode_decode(126);
test_encode_decode((1<<12)-1);
test_encode_decode((1<<20)-1);
test_encode_decode((1<<50)-1);
- test_encode_decode((1<<63)-1);
+ test_encode_decode(u64::MAX);
}