]> git.proxmox.com Git - proxmox-backup.git/blame - src/backup.rs
require square brackets for ipv6 addresses
[proxmox-backup.git] / src / backup.rs
CommitLineData
39a4df61 1//! This module implements the proxmox backup data storage
d78345bc 2//!
39a4df61
DM
3//! Proxmox backup splits large files into chunks, and stores them
4//! deduplicated using a content addressable storage format.
d78345bc 5//!
39a4df61
DM
6//! A chunk is simply defined as binary blob, which is stored inside a
7//! `ChunkStore`, addressed by the SHA256 digest of the binary blob.
8//!
9//! Index files are used to reconstruct the original file. They
10//! basically contain a list of SHA256 checksums. The `DynamicIndex*`
11//! format is able to deal with dynamic chunk sizes, whereas the
12//! `FixedIndex*` format is an optimization to store a list of equal
13//! sized chunks.
04652189
DM
14//!
15//! # ChunkStore Locking
16//!
17//! We need to be able to restart the proxmox-backup service daemons,
18//! so that we can update the software without rebooting the host. But
19//! such restarts must not abort running backup jobs, so we need to
20//! keep the old service running until those jobs are finished. This
c8ec450e 21//! implies that we need some kind of locking for the
04652189
DM
22//! ChunkStore. Please note that it is perfectly valid to have
23//! multiple parallel ChunkStore writers, even when they write the
24//! same chunk (because the chunk would have the same name and the
25//! same data). The only real problem is garbage collection, because
26//! we need to avoid deleting chunks which are still referenced.
27//!
28//! * Read Index Files:
29//!
30//! Acquire shared lock for .idx files.
31//!
32//!
33//! * Delete Index Files:
34//!
35//! Acquire exclusive lock for .idx files. This makes sure that we do
36//! not delete index files while they are still in use.
37//!
38//!
39//! * Create Index Files:
40//!
8a475734 41//! Acquire shared lock for ChunkStore (process wide).
04652189 42//!
faa8e694 43//! Note: When creating .idx files, we create temporary a (.tmp) file,
c8ec450e 44//! then do an atomic rename ...
04652189
DM
45//!
46//!
47//! * Garbage Collect:
48//!
8a475734 49//! Acquire exclusive lock for ChunkStore (process wide). If we have
faa8e694 50//! already a shared lock for the ChunkStore, try to upgrade that
8a475734 51//! lock.
04652189
DM
52//!
53//!
54//! * Server Restart
55//!
faa8e694
AL
56//! Try to abort the running garbage collection to release exclusive
57//! ChunkStore locks ASAP. Start the new service with the existing listening
04652189
DM
58//! socket.
59//!
8a475734 60//!
c8ec450e 61//! # Garbage Collection (GC)
8a475734
DM
62//!
63//! Deleting backups is as easy as deleting the corresponding .idx
64//! files. Unfortunately, this does not free up any storage, because
faa8e694 65//! those files just contain references to chunks.
8a475734
DM
66//!
67//! To free up some storage, we run a garbage collection process at
faa8e694 68//! regular intervals. The collector uses a mark and sweep
c374f054
DM
69//! approach. In the first phase, it scans all .idx files to mark used
70//! chunks. The second phase then removes all unmarked chunks from the
8a475734
DM
71//! store.
72//!
73//! The above locking mechanism makes sure that we are the only
c8ec450e
DM
74//! process running GC. But we still want to be able to create backups
75//! during GC, so there may be multiple backup threads/tasks
76//! running. Either started before GC started, or started while GC is
77//! running.
8a475734 78//!
c8ec450e 79//! ## `atime` based GC
8a475734 80//!
c8ec450e
DM
81//! The idea here is to mark chunks by updating the `atime` (access
82//! timestamp) on the chunk file. This is quite simple and does not
c374f054 83//! need additional RAM.
c8ec450e
DM
84//!
85//! One minor problem is that recent Linux versions use the `relatime`
86//! mount flag by default for performance reasons (yes, we want
87//! that). When enabled, `atime` data is written to the disk only if
88//! the file has been modified since the `atime` data was last updated
89//! (`mtime`), or if the file was last accessed more than a certain
c374f054
DM
90//! amount of time ago (by default 24h). So we may only delete chunks
91//! with `atime` older than 24 hours.
92//!
faa8e694 93//! Another problem arises from running backups. The mark phase does
c374f054
DM
94//! not find any chunks from those backups, because there is no .idx
95//! file for them (created after the backup). Chunks created or
96//! touched by those backups may have an `atime` as old as the start
faa8e694
AL
97//! time of those backups. Please note that the backup start time may
98//! predate the GC start time. So we may only delete chunks older than
c374f054 99//! the start time of those running backup jobs.
c8ec450e 100//!
c8ec450e
DM
101//!
102//! ## Store `marks` in RAM using a HASH
103//!
104//! Not sure if this is better. TODO
cbdd8c54 105
f7d4e4b5 106use anyhow::{bail, Error};
f74a03da 107
bf6e3217
DM
108// Note: .pcat1 => Proxmox Catalog Format version 1
109pub const CATALOG_NAME: &str = "catalog.pcat1.didx";
36493d4d 110
986bef16
DM
111#[macro_export]
112macro_rules! PROXMOX_BACKUP_PROTOCOL_ID_V1 {
113 () => { "proxmox-backup-protocol-v1" }
114}
c9ec0956 115
dd066d28
DM
116#[macro_export]
117macro_rules! PROXMOX_BACKUP_READER_PROTOCOL_ID_V1 {
118 () => { "proxmox-backup-reader-protocol-v1" }
119}
120
f74a03da
DM
121/// Unix system user used by proxmox-backup-proxy
122pub const BACKUP_USER_NAME: &str = "backup";
a6ed5e12
TL
123/// Unix system group used by proxmox-backup-proxy
124pub const BACKUP_GROUP_NAME: &str = "backup";
f74a03da
DM
125
126/// Return User info for the 'backup' user (``getpwnam_r(3)``)
127pub fn backup_user() -> Result<nix::unistd::User, Error> {
128 match nix::unistd::User::from_name(BACKUP_USER_NAME)? {
129 Some(user) => Ok(user),
130 None => bail!("Unable to lookup backup user."),
131 }
132}
133
a6ed5e12
TL
134/// Return Group info for the 'backup' group (``getgrnam(3)``)
135pub fn backup_group() -> Result<nix::unistd::Group, Error> {
136 match nix::unistd::Group::from_name(BACKUP_GROUP_NAME)? {
137 Some(group) => Ok(group),
138 None => bail!("Unable to lookup backup user."),
139 }
140}
141
991abfa8
DM
142mod file_formats;
143pub use file_formats::*;
a7dd4830 144
59e9ba01
DM
145mod manifest;
146pub use manifest::*;
147
c38266c1
DM
148mod crypt_config;
149pub use crypt_config::*;
48b4b40b 150
826f309b
DM
151mod key_derivation;
152pub use key_derivation::*;
153
018d11bb
DM
154mod crypt_reader;
155pub use crypt_reader::*;
156
157mod crypt_writer;
158pub use crypt_writer::*;
159
160mod checksum_reader;
161pub use checksum_reader::*;
162
163mod checksum_writer;
164pub use checksum_writer::*;
165
7d83440c
WB
166mod chunker;
167pub use chunker::*;
168
3025b3a5
DM
169mod data_blob;
170pub use data_blob::*;
171
018d11bb
DM
172mod data_blob_reader;
173pub use data_blob_reader::*;
174
175mod data_blob_writer;
176pub use data_blob_writer::*;
177
89245fb5
DM
178mod catalog;
179pub use catalog::*;
9d135fe6 180
dafc27ae
DM
181mod chunk_stream;
182pub use chunk_stream::*;
183
7e336555
DM
184mod chunk_stat;
185pub use chunk_stat::*;
186
b8506736
DM
187mod read_chunk;
188pub use read_chunk::*;
189
e5064ba6
DM
190mod chunk_store;
191pub use chunk_store::*;
192
7bc1d727
WB
193mod index;
194pub use index::*;
195
e5064ba6
DM
196mod fixed_index;
197pub use fixed_index::*;
198
199mod dynamic_index;
200pub use dynamic_index::*;
201
b3483782
DM
202mod backup_info;
203pub use backup_info::*;
204
dc188491
DM
205mod prune;
206pub use prune::*;
207
e5064ba6
DM
208mod datastore;
209pub use datastore::*;
f14c96ea 210
c2009e53
DM
211mod verify;
212pub use verify::*;
213
f14c96ea
CE
214mod catalog_shell;
215pub use catalog_shell::*;
4a3adc3d
DC
216
217mod async_index_reader;
218pub use async_index_reader::*;