]> git.proxmox.com Git - proxmox-backup.git/blobdiff - src/pxar/encoder.rs
Cargo.toml: pathpatterns, pxar, proxmox-fuse
[proxmox-backup.git] / src / pxar / encoder.rs
index d427e7a3d378e6806590b258876e9c533e13bfaf..f438e39a7ff5e0c78b189f62f82e3b9dfb5a04c3 100644 (file)
@@ -2,7 +2,7 @@
 //!
 //! This module contain the code to generate *pxar* archive files.
 use std::collections::{HashMap, HashSet};
-use std::ffi::CStr;
+use std::ffi::{CStr, CString};
 use std::io::Write;
 use std::os::unix::ffi::OsStrExt;
 use std::os::unix::io::AsRawFd;
@@ -10,7 +10,7 @@ use std::os::unix::io::RawFd;
 use std::path::{Path, PathBuf};
 
 use endian_trait::Endian;
-use failure::*;
+use anyhow::{bail, format_err, Error};
 use nix::errno::Errno;
 use nix::fcntl::OFlag;
 use nix::sys::stat::FileStat;
@@ -24,16 +24,11 @@ use super::catalog::BackupCatalogWriter;
 use super::flags;
 use super::format_definition::*;
 use super::helper::*;
-use super::match_pattern::{MatchPattern, MatchType};
+use super::match_pattern::{MatchPattern, MatchPatternSlice, MatchType};
 use crate::tools::acl;
 use crate::tools::fs;
 use crate::tools::xattr;
 
-/// The format requires to build sorted directory lookup tables in
-/// memory, so we restrict the number of allowed entries to limit
-/// maximum memory usage.
-pub const MAX_DIRECTORY_ENTRIES: usize = 256 * 1024;
-
 #[derive(Eq, PartialEq, Hash)]
 struct HardLinkInfo {
     st_dev: u64,
@@ -55,6 +50,8 @@ pub struct Encoder<'a, W: Write, C: BackupCatalogWriter> {
     // Flags signaling features supported by the filesystem
     fs_feature_flags: u64,
     hardlinks: HashMap<HardLinkInfo, (PathBuf, u64)>,
+    entry_counter: usize,
+    entry_max: usize,
 }
 
 impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
@@ -81,6 +78,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         verbose: bool,
         skip_lost_and_found: bool, // fixme: should be a feature flag ??
         feature_flags: u64,
+        mut excludes: Vec<MatchPattern>,
+        entry_max: usize,
     ) -> Result<(), Error> {
         const FILE_COPY_BUFFER_SIZE: usize = 1024 * 1024;
 
@@ -125,17 +124,23 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
             feature_flags,
             fs_feature_flags,
             hardlinks: HashMap::new(),
+            entry_counter: 0,
+            entry_max,
         };
 
         if verbose {
             println!("{:?}", me.full_path());
         }
 
-        let mut excludes = Vec::new();
         if skip_lost_and_found {
             excludes.push(MatchPattern::from_line(b"**/lost+found").unwrap().unwrap());
         }
-        me.encode_dir(dir, &stat, magic, excludes)?;
+        let mut exclude_slices = Vec::new();
+        for excl in &excludes {
+            exclude_slices.push(excl.as_slice());
+        }
+
+        me.encode_dir(dir, &stat, magic, exclude_slices)?;
 
         Ok(())
     }
@@ -216,7 +221,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         }
 
         let flags = flags::feature_flags_from_chattr(attr as u32);
-        entry.flags = entry.flags | flags;
+        entry.flags |= flags;
 
         Ok(())
     }
@@ -241,7 +246,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         }
 
         let flags = flags::feature_flags_from_fat_attr(attr);
-        entry.flags = entry.flags | flags;
+        entry.flags |= flags;
 
         Ok(())
     }
@@ -282,7 +287,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
             Err(err) => bail!("read_xattrs failed for {:?} - {}", self.full_path(), err),
         };
 
-        for name in xattr_names.split(|c| *c == b'\0') {
+        for name in &xattr_names {
             // Only extract the relevant extended attributes
             if !xattr::is_valid_xattr_name(&name) {
                 continue;
@@ -302,7 +307,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                 }
             } else if self.has_features(flags::WITH_XATTRS) {
                 xattrs.push(PxarXAttr {
-                    name: name.to_vec(),
+                    name: name.to_bytes().to_vec(),
                     value,
                 });
             }
@@ -470,12 +475,12 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
 
                 let projid = fsxattr.fsx_projid as u64;
                 if projid == 0 {
-                    return Ok(None);
+                    Ok(None)
                 } else {
-                    return Ok(Some(PxarQuotaProjID { projid }));
+                    Ok(Some(PxarQuotaProjID { projid }))
                 }
             }
-            _ => return Ok(None),
+            _ => Ok(None),
         }
     }
 
@@ -621,16 +626,18 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         dir: &mut nix::dir::Dir,
         dir_stat: &FileStat,
         magic: i64,
-        match_pattern: Vec<MatchPattern>,
+        match_pattern: Vec<MatchPatternSlice>,
     ) -> Result<(), Error> {
         //println!("encode_dir: {:?} start {}", self.full_path(), self.writer_pos);
 
-        let mut name_list = vec![];
+        let mut name_list = Vec::new();
 
         let rawfd = dir.as_raw_fd();
 
         let dir_start_pos = self.writer_pos;
 
+        let is_root = dir_start_pos == 0;
+
         let mut dir_entry = self.create_entry(&dir_stat)?;
 
         self.read_chattr(rawfd, &mut dir_entry)?;
@@ -677,26 +684,41 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         let include_children;
         if is_virtual_file_system(magic) {
             include_children = false;
+        } else if let Some(set) = &self.device_set {
+            include_children = set.contains(&dir_stat.st_dev);
         } else {
-            if let Some(set) = &self.device_set {
-                include_children = set.contains(&dir_stat.st_dev);
-            } else {
-                include_children = true;
-            }
+            include_children = true;
         }
 
         // Expand the exclude match pattern inherited from the parent by local entries, if present
         let mut local_match_pattern = match_pattern.clone();
-        let pxar_exclude = match MatchPattern::from_file(rawfd, ".pxarexclude") {
-            Ok(Some((mut excludes, buffer, stat))) => {
-                local_match_pattern.append(&mut excludes);
-                Some((buffer, stat))
+        let (pxar_exclude, excludes) = match MatchPattern::from_file(rawfd, ".pxarexclude") {
+            Ok(Some((excludes, buffer, stat))) => {
+                (Some((buffer, stat)), excludes)
+            }
+            Ok(None) => (None, Vec::new()),
+            Err(nix::Error::Sys(Errno::EACCES)) => {
+                // No permission to read .pxarexclude, ignore its contents.
+                eprintln!(
+                    "ignoring match patterns in {:?}: open file failed - EACCES",
+                    self.full_path().join(".pxarexclude"),
+                );
+                (None, Vec::new())
             }
-            Ok(None) => None,
             Err(err) => bail!("error while reading exclude file - {}", err),
         };
+        for excl in &excludes {
+            local_match_pattern.push(excl.as_slice());
+        }
 
         if include_children {
+            // Exclude patterns passed via the CLI are stored as '.pxarexclude-cli'
+            // in the root directory of the archive.
+            if is_root && !match_pattern.is_empty() {
+                let filename = CString::new(".pxarexclude-cli")?;
+                name_list.push((filename, *dir_stat, match_pattern.clone()));
+            }
+
             for entry in dir.iter() {
                 let entry = entry
                     .map_err(|err| format_err!("readir {:?} failed - {}", self.full_path(), err))?;
@@ -706,6 +728,13 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                 if name == b".\0" || name == b"..\0" {
                     continue;
                 }
+                // Do not store a ".pxarexclude-cli" file found in the archive root,
+                // as this would confilict with new cli passed exclude patterns,
+                // if present.
+                if is_root && name == b".pxarexclude-cli\0" {
+                    eprintln!("skip existing '.pxarexclude-cli' in archive root.");
+                    continue;
+                }
 
                 let stat = match nix::sys::stat::fstatat(
                     rawfd,
@@ -721,22 +750,28 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                     Err(err) => bail!("fstat {:?} failed - {}", self.full_path(), err),
                 };
 
-                match match_filename(&filename, &stat, &local_match_pattern)? {
+                match MatchPatternSlice::match_filename_exclude(
+                    &filename,
+                    is_directory(&stat),
+                    &local_match_pattern,
+                )? {
                     (MatchType::Positive, _) => {
                         let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes());
                         eprintln!(
-                            "matched by .pxarexclude entry - skipping: {:?}",
+                            "matched by exclude pattern - skipping: {:?}",
                             self.full_path().join(filename_osstr)
                         );
                     }
-                    (_, child_pattern) => name_list.push((filename, stat, child_pattern)),
+                    (_, child_pattern) => {
+                        self.entry_counter += 1;
+                        name_list.push((filename, stat, child_pattern));
+                    }
                 }
 
-                if name_list.len() > MAX_DIRECTORY_ENTRIES {
+                if self.entry_counter > self.entry_max {
                     bail!(
-                        "too many directory items in {:?} (> {})",
-                        self.full_path(),
-                        MAX_DIRECTORY_ENTRIES
+                        "exceeded max number of entries (> {})",
+                        self.entry_max
                     );
                 }
             }
@@ -745,13 +780,15 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         }
 
         name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0));
+        let num_entries = name_list.len();
 
-        let mut goodbye_items = vec![];
+        let mut goodbye_items = Vec::with_capacity(num_entries);
 
         for (filename, stat, exclude_list) in name_list {
             let start_pos = self.writer_pos;
 
             if filename.as_bytes() == b".pxarexclude" {
+                // pxar_exclude is none in case of error EACCES.
                 if let Some((ref content, ref stat)) = pxar_exclude {
                     let filefd = match nix::fcntl::openat(
                         rawfd,
@@ -764,6 +801,14 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                             self.report_vanished_file(&self.full_path())?;
                             continue;
                         }
+                        Err(nix::Error::Sys(Errno::EACCES)) => {
+                            let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes());
+                            eprintln!(
+                                "skipping {:?}: open file failed - EACCES",
+                                self.full_path().join(filename_osstr),
+                            );
+                            continue;
+                        }
                         Err(err) => {
                             let filename_osstr = std::ffi::OsStr::from_bytes(filename.to_bytes());
                             bail!(
@@ -785,8 +830,20 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                         catalog.add_file(&filename, stat.st_size as u64, stat.st_mtime as u64)?;
                     }
                     self.encode_pxar_exclude(filefd, stat, child_magic, content)?;
-                    continue;
                 }
+                continue;
+            }
+
+            if is_root && filename.as_bytes() == b".pxarexclude-cli" {
+                // '.pxarexclude-cli' is used to store the exclude MatchPatterns
+                // passed via the cli in the root directory of the archive.
+                self.write_filename(&filename)?;
+                let content = MatchPatternSlice::to_bytes(&exclude_list);
+                if let Some(ref mut catalog) = self.catalog {
+                    catalog.add_file(&filename, content.len() as u64, 0)?;
+                }
+                self.encode_pxar_exclude_cli(stat.st_uid, stat.st_gid, 0, &content)?;
+                continue;
             }
 
             self.relative_path
@@ -806,7 +863,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                     Ok(dir) => dir,
                     Err(nix::Error::Sys(Errno::ENOENT)) => {
                         self.report_vanished_file(&self.full_path())?;
-                        continue; // fixme!!
+                        self.relative_path.pop();
+                        continue;
+                    }
+                    Err(nix::Error::Sys(Errno::EACCES)) => {
+                        eprintln!(
+                            "skipping {:?}: open dir failed - EACCES",
+                            self.full_path(),
+                        );
+                        self.relative_path.pop();
+                        continue;
                     }
                     Err(err) => bail!("open dir {:?} failed - {}", self.full_path(), err),
                 };
@@ -860,6 +926,15 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                         Ok(filefd) => filefd,
                         Err(nix::Error::Sys(Errno::ENOENT)) => {
                             self.report_vanished_file(&self.full_path())?;
+                            self.relative_path.pop();
+                            continue;
+                        }
+                        Err(nix::Error::Sys(Errno::EACCES)) => {
+                            eprintln!(
+                                "skipping {:?}: open file failed - EACCES",
+                                self.full_path(),
+                            );
+                            self.relative_path.pop();
                             continue;
                         }
                         Err(err) => bail!("open file {:?} failed - {}", self.full_path(), err),
@@ -902,6 +977,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                     }
                     Err(nix::Error::Sys(Errno::ENOENT)) => {
                         self.report_vanished_file(&self.full_path())?;
+                        self.relative_path.pop();
                         continue;
                     }
                     Err(err) => bail!("readlink {:?} failed - {}", self.full_path(), err),
@@ -919,6 +995,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                     self.encode_device(&stat)?;
                 } else {
                     eprintln!("skip device node: {:?}", self.full_path());
+                    self.relative_path.pop();
+                    continue;
                 }
             } else if is_fifo(&stat) {
                 if self.has_features(flags::WITH_FIFOS) {
@@ -929,6 +1007,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                     self.encode_special(&stat)?;
                 } else {
                     eprintln!("skip fifo: {:?}", self.full_path());
+                    self.relative_path.pop();
+                    continue;
                 }
             } else if is_socket(&stat) {
                 if self.has_features(flags::WITH_SOCKETS) {
@@ -939,6 +1019,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                     self.encode_special(&stat)?;
                 } else {
                     eprintln!("skip socket: {:?}", self.full_path());
+                    self.relative_path.pop();
+                    continue;
                 }
             } else {
                 bail!(
@@ -970,6 +1052,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         let goodbye_offset = self.writer_pos - dir_start_pos;
 
         self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?;
+        self.entry_counter -= num_entries;
 
         //println!("encode_dir: {:?} end1 {}", self.full_path(), self.writer_pos);
         Ok(())
@@ -1007,12 +1090,10 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         let include_payload;
         if is_virtual_file_system(magic) {
             include_payload = false;
+        } else if let Some(ref set) = &self.device_set {
+            include_payload = set.contains(&stat.st_dev);
         } else {
-            if let Some(ref set) = &self.device_set {
-                include_payload = set.contains(&stat.st_dev);
-            } else {
-                include_payload = true;
-            }
+            include_payload = true;
         }
 
         if !include_payload {
@@ -1036,7 +1117,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                 if pos != size {
                     // Note:: casync format cannot handle that
                     bail!(
-                        "detected shrinked file {:?} ({} < {})",
+                        "detected shrunk file {:?} ({} < {})",
                         self.full_path(),
                         pos,
                         size
@@ -1149,12 +1230,10 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         let include_payload;
         if is_virtual_file_system(magic) {
             include_payload = false;
+        } else if let Some(set) = &self.device_set {
+            include_payload = set.contains(&stat.st_dev);
         } else {
-            if let Some(set) = &self.device_set {
-                include_payload = set.contains(&stat.st_dev);
-            } else {
-                include_payload = true;
-            }
+            include_payload = true;
         }
 
         if !include_payload {
@@ -1204,32 +1283,6 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
     }
 }
 
-// If there is a match, an updated MatchPattern list to pass to the matched child is returned.
-fn match_filename(
-    filename: &CStr,
-    stat: &FileStat,
-    match_pattern: &Vec<MatchPattern>,
-) -> Result<(MatchType, Vec<MatchPattern>), Error> {
-    let mut child_pattern = Vec::new();
-    let mut match_state = MatchType::None;
-
-    for pattern in match_pattern {
-        match pattern.matches_filename(filename, is_directory(&stat))? {
-            MatchType::None => {}
-            MatchType::Positive => match_state = MatchType::Positive,
-            MatchType::Negative => match_state = MatchType::Negative,
-            match_type => {
-                if match_state != MatchType::Positive && match_state != MatchType::Negative {
-                    match_state = match_type;
-                }
-                child_pattern.push(pattern.get_rest_pattern());
-            }
-        }
-    }
-
-    Ok((match_state, child_pattern))
-}
-
 fn errno_is_unsupported(errno: Errno) -> bool {
     match errno {
         Errno::ENOTTY | Errno::ENOSYS | Errno::EBADF | Errno::EOPNOTSUPP | Errno::EINVAL => true,