src/backup.rs

   1 //! This module implements the proxmox backup data storage
   2 //!
   3 //! Proxmox backup splits large files into chunks, and stores them
   4 //! deduplicated using a content addressable storage format.
   5 //!
   6 //! A chunk is simply defined as binary blob, which is stored inside a
   7 //! `ChunkStore`, addressed by the SHA256 digest of the binary blob.
   8 //!
   9 //! Index files are used to reconstruct the original file. They
  10 //! basically contain a list of SHA256 checksums. The `DynamicIndex*`
  11 //! format is able to deal with dynamic chunk sizes, whereas the
  12 //! `FixedIndex*` format is an optimization to store a list of equal
  13 //! sized chunks.
  14 //!
  15 //! # ChunkStore Locking
  16 //!
  17 //! We need to be able to restart the proxmox-backup service daemons,
  18 //! so that we can update the software without rebooting the host. But
  19 //! such restarts must not abort running backup jobs, so we need to
  20 //! keep the old service running until those jobs are finished. This
  21 //! implies that we need some kind of locking for the
  22 //! ChunkStore. Please note that it is perfectly valid to have
  23 //! multiple parallel ChunkStore writers, even when they write the
  24 //! same chunk (because the chunk would have the same name and the
  25 //! same data). The only real problem is garbage collection, because
  26 //! we need to avoid deleting chunks which are still referenced.
  27 //!
  28 //! * Read Index Files:
  29 //!
  30 //!   Acquire shared lock for .idx files.
  31 //!
  32 //!
  33 //! * Delete Index Files:
  34 //!
  35 //!   Acquire exclusive lock for .idx files. This makes sure that we do
  36 //!   not delete index files while they are still in use.
  37 //!
  38 //!
  39 //! * Create Index Files:
  40 //!
  41 //!   Acquire shared lock for ChunkStore (process wide).
  42 //!
  43 //!   Note: When creating .idx files, we create temporary a (.tmp) file,
  44 //!   then do an atomic rename ...
  45 //!
  46 //!
  47 //! * Garbage Collect:
  48 //!
  49 //!   Acquire exclusive lock for ChunkStore (process wide). If we have
  50 //!   already a shared lock for the ChunkStore, try to upgrade that
  51 //!   lock.
  52 //!
  53 //!
  54 //! * Server Restart
  55 //!
  56 //!   Try to abort the running garbage collection to release exclusive
  57 //!   ChunkStore locks ASAP. Start the new service with the existing listening
  58 //!   socket.
  59 //!
  60 //!
  61 //! # Garbage Collection (GC)
  62 //!
  63 //! Deleting backups is as easy as deleting the corresponding .idx
  64 //! files. Unfortunately, this does not free up any storage, because
  65 //! those files just contain references to chunks.
  66 //!
  67 //! To free up some storage, we run a garbage collection process at
  68 //! regular intervals. The collector uses a mark and sweep
  69 //! approach. In the first phase, it scans all .idx files to mark used
  70 //! chunks. The second phase then removes all unmarked chunks from the
  71 //! store.
  72 //!
  73 //! The above locking mechanism makes sure that we are the only
  74 //! process running GC. But we still want to be able to create backups
  75 //! during GC, so there may be multiple backup threads/tasks
  76 //! running. Either started before GC started, or started while GC is
  77 //! running.
  78 //!
  79 //! ## `atime` based GC
  80 //!
  81 //! The idea here is to mark chunks by updating the `atime` (access
  82 //! timestamp) on the chunk file. This is quite simple and does not
  83 //! need additional RAM.
  84 //!
  85 //! One minor problem is that recent Linux versions use the `relatime`
  86 //! mount flag by default for performance reasons (yes, we want
  87 //! that). When enabled, `atime` data is written to the disk only if
  88 //! the file has been modified since the `atime` data was last updated
  89 //! (`mtime`), or if the file was last accessed more than a certain
  90 //! amount of time ago (by default 24h). So we may only delete chunks
  91 //! with `atime` older than 24 hours.
  92 //!
  93 //! Another problem arises from running backups. The mark phase does
  94 //! not find any chunks from those backups, because there is no .idx
  95 //! file for them (created after the backup). Chunks created or
  96 //! touched by those backups may have an `atime` as old as the start
  97 //! time of those backups. Please note that the backup start time may
  98 //! predate the GC start time. So we may only delete chunks older than
  99 //! the start time of those running backup jobs.
 100 //!
 101 //!
 102 //! ## Store `marks` in RAM using a HASH
 103 //!
 104 //! Not sure if this is better. TODO
 105
 106 use anyhow::{bail, Error};
 107
 108 // Note: .pcat1 => Proxmox Catalog Format version 1
 109 pub const CATALOG_NAME: &str = "catalog.pcat1.didx";
 110
 111 #[macro_export]
 112 macro_rules! PROXMOX_BACKUP_PROTOCOL_ID_V1 {
 113     () =>  { "proxmox-backup-protocol-v1" }
 114 }
 115
 116 #[macro_export]
 117 macro_rules! PROXMOX_BACKUP_READER_PROTOCOL_ID_V1 {
 118     () =>  { "proxmox-backup-reader-protocol-v1" }
 119 }
 120
 121 /// Unix system user used by proxmox-backup-proxy
 122 pub const BACKUP_USER_NAME: &str = "backup";
 123 /// Unix system group used by proxmox-backup-proxy
 124 pub const BACKUP_GROUP_NAME: &str = "backup";
 125
 126 /// Return User info for the 'backup' user (``getpwnam_r(3)``)
 127 pub fn backup_user() -> Result<nix::unistd::User, Error> {
 128     match nix::unistd::User::from_name(BACKUP_USER_NAME)? {
 129         Some(user) => Ok(user),
 130         None => bail!("Unable to lookup backup user."),
 131     }
 132 }
 133
 134 /// Return Group info for the 'backup' group (``getgrnam(3)``)
 135 pub fn backup_group() -> Result<nix::unistd::Group, Error> {
 136     match nix::unistd::Group::from_name(BACKUP_GROUP_NAME)? {
 137         Some(group) => Ok(group),
 138         None => bail!("Unable to lookup backup user."),
 139     }
 140 }
 141
 142 mod file_formats;
 143 pub use file_formats::*;
 144
 145 mod manifest;
 146 pub use manifest::*;
 147
 148 mod crypt_config;
 149 pub use crypt_config::*;
 150
 151 mod key_derivation;
 152 pub use key_derivation::*;
 153
 154 mod crypt_reader;
 155 pub use crypt_reader::*;
 156
 157 mod crypt_writer;
 158 pub use crypt_writer::*;
 159
 160 mod checksum_reader;
 161 pub use checksum_reader::*;
 162
 163 mod checksum_writer;
 164 pub use checksum_writer::*;
 165
 166 mod chunker;
 167 pub use chunker::*;
 168
 169 mod data_blob;
 170 pub use data_blob::*;
 171
 172 mod data_blob_reader;
 173 pub use data_blob_reader::*;
 174
 175 mod data_blob_writer;
 176 pub use data_blob_writer::*;
 177
 178 mod catalog;
 179 pub use catalog::*;
 180
 181 mod chunk_stream;
 182 pub use chunk_stream::*;
 183
 184 mod chunk_stat;
 185 pub use chunk_stat::*;
 186
 187 mod read_chunk;
 188 pub use read_chunk::*;
 189
 190 mod chunk_store;
 191 pub use chunk_store::*;
 192
 193 mod index;
 194 pub use index::*;
 195
 196 mod fixed_index;
 197 pub use fixed_index::*;
 198
 199 mod dynamic_index;
 200 pub use dynamic_index::*;
 201
 202 mod backup_info;
 203 pub use backup_info::*;
 204
 205 mod prune;
 206 pub use prune::*;
 207
 208 mod datastore;
 209 pub use datastore::*;
 210
 211 mod verify;
 212 pub use verify::*;
 213
 214 mod catalog_shell;
 215 pub use catalog_shell::*;
 216
 217 mod async_index_reader;
 218 pub use async_index_reader::*;