]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup.rs
REST: don't print CSRF token
[proxmox-backup.git] / src / backup.rs
1 //! This module implements the proxmox backup data storage
2 //!
3 //! Proxmox backup splits large files into chunks, and stores them
4 //! deduplicated using a content addressable storage format.
5 //!
6 //! A chunk is simply defined as binary blob, which is stored inside a
7 //! `ChunkStore`, addressed by the SHA256 digest of the binary blob.
8 //!
9 //! Index files are used to reconstruct the original file. They
10 //! basically contain a list of SHA256 checksums. The `DynamicIndex*`
11 //! format is able to deal with dynamic chunk sizes, whereas the
12 //! `FixedIndex*` format is an optimization to store a list of equal
13 //! sized chunks.
14 //!
15 //! # ChunkStore Locking
16 //!
17 //! We need to be able to restart the proxmox-backup service daemons,
18 //! so that we can update the software without rebooting the host. But
19 //! such restarts must not abort running backup jobs, so we need to
20 //! keep the old service running until those jobs are finished. This
21 //! implies that we need some kind of locking for the
22 //! ChunkStore. Please note that it is perfectly valid to have
23 //! multiple parallel ChunkStore writers, even when they write the
24 //! same chunk (because the chunk would have the same name and the
25 //! same data). The only real problem is garbage collection, because
26 //! we need to avoid deleting chunks which are still referenced.
27 //!
28 //! * Read Index Files:
29 //!
30 //! Acquire shared lock for .idx files.
31 //!
32 //!
33 //! * Delete Index Files:
34 //!
35 //! Acquire exclusive lock for .idx files. This makes sure that we do
36 //! not delete index files while they are still in use.
37 //!
38 //!
39 //! * Create Index Files:
40 //!
41 //! Acquire shared lock for ChunkStore (process wide).
42 //!
43 //! Note: When creating .idx files, we create temporary a (.tmp) file,
44 //! then do an atomic rename ...
45 //!
46 //!
47 //! * Garbage Collect:
48 //!
49 //! Acquire exclusive lock for ChunkStore (process wide). If we have
50 //! already a shared lock for the ChunkStore, try to upgrade that
51 //! lock.
52 //!
53 //!
54 //! * Server Restart
55 //!
56 //! Try to abort the running garbage collection to release exclusive
57 //! ChunkStore locks ASAP. Start the new service with the existing listening
58 //! socket.
59 //!
60 //!
61 //! # Garbage Collection (GC)
62 //!
63 //! Deleting backups is as easy as deleting the corresponding .idx
64 //! files. Unfortunately, this does not free up any storage, because
65 //! those files just contain references to chunks.
66 //!
67 //! To free up some storage, we run a garbage collection process at
68 //! regular intervals. The collector uses a mark and sweep
69 //! approach. In the first phase, it scans all .idx files to mark used
70 //! chunks. The second phase then removes all unmarked chunks from the
71 //! store.
72 //!
73 //! The above locking mechanism makes sure that we are the only
74 //! process running GC. But we still want to be able to create backups
75 //! during GC, so there may be multiple backup threads/tasks
76 //! running. Either started before GC started, or started while GC is
77 //! running.
78 //!
79 //! ## `atime` based GC
80 //!
81 //! The idea here is to mark chunks by updating the `atime` (access
82 //! timestamp) on the chunk file. This is quite simple and does not
83 //! need additional RAM.
84 //!
85 //! One minor problem is that recent Linux versions use the `relatime`
86 //! mount flag by default for performance reasons (yes, we want
87 //! that). When enabled, `atime` data is written to the disk only if
88 //! the file has been modified since the `atime` data was last updated
89 //! (`mtime`), or if the file was last accessed more than a certain
90 //! amount of time ago (by default 24h). So we may only delete chunks
91 //! with `atime` older than 24 hours.
92 //!
93 //! Another problem arises from running backups. The mark phase does
94 //! not find any chunks from those backups, because there is no .idx
95 //! file for them (created after the backup). Chunks created or
96 //! touched by those backups may have an `atime` as old as the start
97 //! time of those backups. Please note that the backup start time may
98 //! predate the GC start time. So we may only delete chunks older than
99 //! the start time of those running backup jobs.
100 //!
101 //!
102 //! ## Store `marks` in RAM using a HASH
103 //!
104 //! Not sure if this is better. TODO
105
106 use anyhow::{bail, Error};
107
108 // Note: .pcat1 => Proxmox Catalog Format version 1
109 pub const CATALOG_NAME: &str = "catalog.pcat1.didx";
110
111 #[macro_export]
112 macro_rules! PROXMOX_BACKUP_PROTOCOL_ID_V1 {
113 () => { "proxmox-backup-protocol-v1" }
114 }
115
116 #[macro_export]
117 macro_rules! PROXMOX_BACKUP_READER_PROTOCOL_ID_V1 {
118 () => { "proxmox-backup-reader-protocol-v1" }
119 }
120
121 /// Unix system user used by proxmox-backup-proxy
122 pub const BACKUP_USER_NAME: &str = "backup";
123 /// Unix system group used by proxmox-backup-proxy
124 pub const BACKUP_GROUP_NAME: &str = "backup";
125
126 /// Return User info for the 'backup' user (``getpwnam_r(3)``)
127 pub fn backup_user() -> Result<nix::unistd::User, Error> {
128 match nix::unistd::User::from_name(BACKUP_USER_NAME)? {
129 Some(user) => Ok(user),
130 None => bail!("Unable to lookup backup user."),
131 }
132 }
133
134 /// Return Group info for the 'backup' group (``getgrnam(3)``)
135 pub fn backup_group() -> Result<nix::unistd::Group, Error> {
136 match nix::unistd::Group::from_name(BACKUP_GROUP_NAME)? {
137 Some(group) => Ok(group),
138 None => bail!("Unable to lookup backup user."),
139 }
140 }
141
142 mod file_formats;
143 pub use file_formats::*;
144
145 mod manifest;
146 pub use manifest::*;
147
148 mod crypt_config;
149 pub use crypt_config::*;
150
151 mod key_derivation;
152 pub use key_derivation::*;
153
154 mod crypt_reader;
155 pub use crypt_reader::*;
156
157 mod crypt_writer;
158 pub use crypt_writer::*;
159
160 mod checksum_reader;
161 pub use checksum_reader::*;
162
163 mod checksum_writer;
164 pub use checksum_writer::*;
165
166 mod chunker;
167 pub use chunker::*;
168
169 mod data_blob;
170 pub use data_blob::*;
171
172 mod data_blob_reader;
173 pub use data_blob_reader::*;
174
175 mod data_blob_writer;
176 pub use data_blob_writer::*;
177
178 mod catalog;
179 pub use catalog::*;
180
181 mod chunk_stream;
182 pub use chunk_stream::*;
183
184 mod chunk_stat;
185 pub use chunk_stat::*;
186
187 mod read_chunk;
188 pub use read_chunk::*;
189
190 mod chunk_store;
191 pub use chunk_store::*;
192
193 mod index;
194 pub use index::*;
195
196 mod fixed_index;
197 pub use fixed_index::*;
198
199 mod dynamic_index;
200 pub use dynamic_index::*;
201
202 mod backup_info;
203 pub use backup_info::*;
204
205 mod prune;
206 pub use prune::*;
207
208 mod datastore;
209 pub use datastore::*;
210
211 mod verify;
212 pub use verify::*;
213
214 mod catalog_shell;
215 pub use catalog_shell::*;
216
217 mod async_index_reader;
218 pub use async_index_reader::*;