]> git.proxmox.com Git - proxmox-backup.git/commitdiff
pxar: encoder: limit number of max entries held at once in memory during archive...
authorChristian Ebner <c.ebner@proxmox.com>
Fri, 10 Jan 2020 11:50:06 +0000 (12:50 +0100)
committerDietmar Maurer <dietmar@proxmox.com>
Fri, 10 Jan 2020 12:45:08 +0000 (13:45 +0100)
Limit the total number of entries and therefore the approximate memory
consumption instead of doing this on a per directory basis as it was previously.
This makes more sense as it limits not only the width but also the depth of the
directory tree.

Further, instead of hardcoding this value, allow to pass this information as
additional optional parameter 'entires-max'.
By this, creation of the archive with directories containing a large number of
entries is possible.

Signed-off-by: Christian Ebner <c.ebner@proxmox.com>
src/bin/proxmox-backup-client.rs
src/bin/pxar.rs
src/client/pxar_backup_stream.rs
src/pxar/encoder.rs
src/pxar/format_definition.rs
tests/catar.rs

index 984eb30cf4efec38a1a3d8ab18dcf0fbbfbc0bd9..356813c9e9d159fa33e27bca5fab510ac6383914 100644 (file)
@@ -231,9 +231,17 @@ async fn backup_directory<P: AsRef<Path>>(
     skip_lost_and_found: bool,
     crypt_config: Option<Arc<CryptConfig>>,
     catalog: Arc<Mutex<CatalogWriter<SenderWriter>>>,
+    entries_max: usize,
 ) -> Result<BackupStats, Error> {
 
-    let pxar_stream = PxarBackupStream::open(dir_path.as_ref(), device_set, verbose, skip_lost_and_found, catalog)?;
+    let pxar_stream = PxarBackupStream::open(
+        dir_path.as_ref(),
+        device_set,
+        verbose,
+        skip_lost_and_found,
+        catalog,
+        entries_max,
+    )?;
     let mut chunk_stream = ChunkStream::new(pxar_stream, chunk_size);
 
     let (mut tx, rx) = mpsc::channel(10); // allow to buffer 10 chunks
@@ -776,6 +784,12 @@ fn spawn_catalog_upload(
                schema: CHUNK_SIZE_SCHEMA,
                optional: true,
            },
+           "entries-max": {
+               type: Integer,
+               description: "Max number of entries to hold in memory.",
+               optional: true,
+               default: pxar::ENCODER_MAX_ENTRIES as isize,
+           },
        }
    }
 )]
@@ -812,6 +826,8 @@ async fn create_backup(
 
     let include_dev = param["include-dev"].as_array();
 
+    let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64);
+
     let mut devices = if all_file_systems { None } else { Some(HashSet::new()) };
 
     if let Some(include_dev) = include_dev {
@@ -960,6 +976,7 @@ async fn create_backup(
                     skip_lost_and_found,
                     crypt_config.clone(),
                     catalog.clone(),
+                    entries_max as usize,
                 ).await?;
                 manifest.add_file(target, stats.size, stats.csum)?;
                 catalog.lock().unwrap().end_directory()?;
index 87c42ccdaa61768f006bcd438b4d78582aa4f206..a05f94d4d413ab77d65aba5ca933ce50c0b1ce64 100644 (file)
@@ -178,6 +178,7 @@ fn create_archive(
     let no_sockets = param["no-sockets"].as_bool().unwrap_or(false);
     let empty = Vec::new();
     let exclude_pattern = param["exclude"].as_array().unwrap_or(&empty);
+    let entries_max = param["entries-max"].as_u64().unwrap_or(pxar::ENCODER_MAX_ENTRIES as u64);
 
     let devices = if all_file_systems { None } else { Some(HashSet::new()) };
 
@@ -232,6 +233,7 @@ fn create_archive(
         false,
         feature_flags,
         pattern_list,
+        entries_max as usize,
     )?;
 
     writer.flush()?;
@@ -342,6 +344,15 @@ const API_METHOD_CREATE_ARCHIVE: ApiMethod = ApiMethod::new(
                     &StringSchema::new("Path or pattern matching files to restore.").schema()
                 ).schema()
             ),
+            (
+                "entries-max",
+                true,
+                &IntegerSchema::new("Max number of entries loaded at once into memory")
+                    .default(pxar::ENCODER_MAX_ENTRIES as isize)
+                    .minimum(0)
+                    .maximum(std::isize::MAX)
+                    .schema()
+            ),
         ]),
     )
 );
index 8a1abd03cae9d7c420f7c97de4619340044d018b..4ec3bc3d74ee9ae0a20516d9ca4bb6779fc0a418 100644 (file)
@@ -48,6 +48,7 @@ impl PxarBackupStream {
         verbose: bool,
         skip_lost_and_found: bool,
         catalog: Arc<Mutex<CatalogWriter<W>>>,
+        entries_max: usize,
     ) -> Result<Self, Error> {
 
         let (rx, tx) = nix::unistd::pipe()?;
@@ -73,6 +74,7 @@ impl PxarBackupStream {
                 skip_lost_and_found,
                 pxar::flags::DEFAULT,
                 exclude_pattern,
+                entries_max,
             ) {
                 let mut error = error2.lock().unwrap();
                 *error = Some(err.to_string());
@@ -95,12 +97,13 @@ impl PxarBackupStream {
         verbose: bool,
         skip_lost_and_found: bool,
         catalog: Arc<Mutex<CatalogWriter<W>>>,
+        entries_max: usize,
     ) -> Result<Self, Error> {
 
         let dir = nix::dir::Dir::open(dirname, OFlag::O_DIRECTORY, Mode::empty())?;
         let path = std::path::PathBuf::from(dirname);
 
-        Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog)
+        Self::new(dir, path, device_set, verbose, skip_lost_and_found, catalog, entries_max)
     }
 }
 
index 05747e6d4572928ed7ffea50fe0780fe3f5b9306..f65805bd848283fadc33368df480b2dc79381c0d 100644 (file)
@@ -29,11 +29,6 @@ use crate::tools::acl;
 use crate::tools::fs;
 use crate::tools::xattr;
 
-/// The format requires to build sorted directory lookup tables in
-/// memory, so we restrict the number of allowed entries to limit
-/// maximum memory usage.
-pub const MAX_DIRECTORY_ENTRIES: usize = 256 * 1024;
-
 #[derive(Eq, PartialEq, Hash)]
 struct HardLinkInfo {
     st_dev: u64,
@@ -55,6 +50,8 @@ pub struct Encoder<'a, W: Write, C: BackupCatalogWriter> {
     // Flags signaling features supported by the filesystem
     fs_feature_flags: u64,
     hardlinks: HashMap<HardLinkInfo, (PathBuf, u64)>,
+    entry_counter: usize,
+    entry_max: usize,
 }
 
 impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
@@ -82,6 +79,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         skip_lost_and_found: bool, // fixme: should be a feature flag ??
         feature_flags: u64,
         mut excludes: Vec<MatchPattern>,
+        entry_max: usize,
     ) -> Result<(), Error> {
         const FILE_COPY_BUFFER_SIZE: usize = 1024 * 1024;
 
@@ -126,6 +124,8 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
             feature_flags,
             fs_feature_flags,
             hardlinks: HashMap::new(),
+            entry_counter: 0,
+            entry_max,
         };
 
         if verbose {
@@ -762,14 +762,16 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
                             self.full_path().join(filename_osstr)
                         );
                     }
-                    (_, child_pattern) => name_list.push((filename, stat, child_pattern)),
+                    (_, child_pattern) => {
+                        self.entry_counter += 1;
+                        name_list.push((filename, stat, child_pattern));
+                    }
                 }
 
-                if name_list.len() > MAX_DIRECTORY_ENTRIES {
+                if self.entry_counter > self.entry_max {
                     bail!(
-                        "too many directory items in {:?} (> {})",
-                        self.full_path(),
-                        MAX_DIRECTORY_ENTRIES
+                        "exceeded max number of entries (> {})",
+                        self.entry_max
                     );
                 }
             }
@@ -778,8 +780,9 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         }
 
         name_list.sort_unstable_by(|a, b| a.0.cmp(&b.0));
+        let num_entries = name_list.len();
 
-        let mut goodbye_items = Vec::with_capacity(name_list.len());
+        let mut goodbye_items = Vec::with_capacity(num_entries);
 
         for (filename, stat, exclude_list) in name_list {
             let start_pos = self.writer_pos;
@@ -1049,6 +1052,7 @@ impl<'a, W: Write, C: BackupCatalogWriter> Encoder<'a, W, C> {
         let goodbye_offset = self.writer_pos - dir_start_pos;
 
         self.write_goodbye_table(goodbye_offset, &mut goodbye_items)?;
+        self.entry_counter -= num_entries;
 
         //println!("encode_dir: {:?} end1 {}", self.full_path(), self.writer_pos);
         Ok(())
index b80fc0c0e2cbb36ee77cba9e85b1f1c0204568dd..75c2edcf382b8392506fdbfb74dc76593af2849a 100644 (file)
@@ -256,3 +256,8 @@ pub fn check_ca_header<T>(head: &PxarHeader, htype: u64) -> Result<(), Error> {
 
     Ok(())
 }
+
+/// The format requires to build sorted directory lookup tables in
+/// memory, so we restrict the number of allowed entries to limit
+/// maximum memory usage.
+pub const ENCODER_MAX_ENTRIES: usize = 1024 * 1024;
index d45b63b2c93f1be02636ab520e3d364f304db869..ed3e67029e99a5a51cb3152a97022c10cd6d32a8 100644 (file)
@@ -37,6 +37,7 @@ fn run_test(dir_name: &str) -> Result<(), Error> {
         false,
         flags::DEFAULT,
         Vec::new(),
+        ENCODER_MAX_ENTRIES,
     )?;
 
     Command::new("cmp")