]>
Commit | Line | Data |
---|---|---|
39a4df61 | 1 | //! This module implements the proxmox backup data storage |
d78345bc | 2 | //! |
39a4df61 DM |
3 | //! Proxmox backup splits large files into chunks, and stores them |
4 | //! deduplicated using a content addressable storage format. | |
d78345bc | 5 | //! |
39a4df61 DM |
6 | //! A chunk is simply defined as binary blob, which is stored inside a |
7 | //! `ChunkStore`, addressed by the SHA256 digest of the binary blob. | |
8 | //! | |
9 | //! Index files are used to reconstruct the original file. They | |
10 | //! basically contain a list of SHA256 checksums. The `DynamicIndex*` | |
11 | //! format is able to deal with dynamic chunk sizes, whereas the | |
12 | //! `FixedIndex*` format is an optimization to store a list of equal | |
13 | //! sized chunks. | |
04652189 DM |
14 | //! |
15 | //! # ChunkStore Locking | |
16 | //! | |
17 | //! We need to be able to restart the proxmox-backup service daemons, | |
18 | //! so that we can update the software without rebooting the host. But | |
19 | //! such restarts must not abort running backup jobs, so we need to | |
20 | //! keep the old service running until those jobs are finished. This | |
c8ec450e | 21 | //! implies that we need some kind of locking for the |
04652189 DM |
22 | //! ChunkStore. Please note that it is perfectly valid to have |
23 | //! multiple parallel ChunkStore writers, even when they write the | |
24 | //! same chunk (because the chunk would have the same name and the | |
25 | //! same data). The only real problem is garbage collection, because | |
26 | //! we need to avoid deleting chunks which are still referenced. | |
27 | //! | |
28 | //! * Read Index Files: | |
29 | //! | |
30 | //! Acquire shared lock for .idx files. | |
31 | //! | |
32 | //! | |
33 | //! * Delete Index Files: | |
34 | //! | |
35 | //! Acquire exclusive lock for .idx files. This makes sure that we do | |
36 | //! not delete index files while they are still in use. | |
37 | //! | |
38 | //! | |
39 | //! * Create Index Files: | |
40 | //! | |
8a475734 | 41 | //! Acquire shared lock for ChunkStore (process wide). |
04652189 | 42 | //! |
faa8e694 | 43 | //! Note: When creating .idx files, we create temporary a (.tmp) file, |
c8ec450e | 44 | //! then do an atomic rename ... |
04652189 DM |
45 | //! |
46 | //! | |
47 | //! * Garbage Collect: | |
48 | //! | |
8a475734 | 49 | //! Acquire exclusive lock for ChunkStore (process wide). If we have |
faa8e694 | 50 | //! already a shared lock for the ChunkStore, try to upgrade that |
8a475734 | 51 | //! lock. |
04652189 DM |
52 | //! |
53 | //! | |
54 | //! * Server Restart | |
55 | //! | |
faa8e694 AL |
56 | //! Try to abort the running garbage collection to release exclusive |
57 | //! ChunkStore locks ASAP. Start the new service with the existing listening | |
04652189 DM |
58 | //! socket. |
59 | //! | |
8a475734 | 60 | //! |
c8ec450e | 61 | //! # Garbage Collection (GC) |
8a475734 DM |
62 | //! |
63 | //! Deleting backups is as easy as deleting the corresponding .idx | |
64 | //! files. Unfortunately, this does not free up any storage, because | |
faa8e694 | 65 | //! those files just contain references to chunks. |
8a475734 DM |
66 | //! |
67 | //! To free up some storage, we run a garbage collection process at | |
faa8e694 | 68 | //! regular intervals. The collector uses a mark and sweep |
c374f054 DM |
69 | //! approach. In the first phase, it scans all .idx files to mark used |
70 | //! chunks. The second phase then removes all unmarked chunks from the | |
8a475734 DM |
71 | //! store. |
72 | //! | |
73 | //! The above locking mechanism makes sure that we are the only | |
c8ec450e DM |
74 | //! process running GC. But we still want to be able to create backups |
75 | //! during GC, so there may be multiple backup threads/tasks | |
76 | //! running. Either started before GC started, or started while GC is | |
77 | //! running. | |
8a475734 | 78 | //! |
c8ec450e | 79 | //! ## `atime` based GC |
8a475734 | 80 | //! |
c8ec450e DM |
81 | //! The idea here is to mark chunks by updating the `atime` (access |
82 | //! timestamp) on the chunk file. This is quite simple and does not | |
c374f054 | 83 | //! need additional RAM. |
c8ec450e DM |
84 | //! |
85 | //! One minor problem is that recent Linux versions use the `relatime` | |
86 | //! mount flag by default for performance reasons (yes, we want | |
87 | //! that). When enabled, `atime` data is written to the disk only if | |
88 | //! the file has been modified since the `atime` data was last updated | |
89 | //! (`mtime`), or if the file was last accessed more than a certain | |
c374f054 DM |
90 | //! amount of time ago (by default 24h). So we may only delete chunks |
91 | //! with `atime` older than 24 hours. | |
92 | //! | |
faa8e694 | 93 | //! Another problem arises from running backups. The mark phase does |
c374f054 DM |
94 | //! not find any chunks from those backups, because there is no .idx |
95 | //! file for them (created after the backup). Chunks created or | |
96 | //! touched by those backups may have an `atime` as old as the start | |
faa8e694 AL |
97 | //! time of those backups. Please note that the backup start time may |
98 | //! predate the GC start time. So we may only delete chunks older than | |
c374f054 | 99 | //! the start time of those running backup jobs. |
c8ec450e | 100 | //! |
c8ec450e DM |
101 | //! |
102 | //! ## Store `marks` in RAM using a HASH | |
103 | //! | |
104 | //! Not sure if this is better. TODO | |
cbdd8c54 | 105 | |
f7d4e4b5 | 106 | use anyhow::{bail, Error}; |
f74a03da | 107 | |
bf6e3217 DM |
108 | // Note: .pcat1 => Proxmox Catalog Format version 1 |
109 | pub const CATALOG_NAME: &str = "catalog.pcat1.didx"; | |
36493d4d | 110 | |
986bef16 DM |
111 | #[macro_export] |
112 | macro_rules! PROXMOX_BACKUP_PROTOCOL_ID_V1 { | |
113 | () => { "proxmox-backup-protocol-v1" } | |
114 | } | |
c9ec0956 | 115 | |
dd066d28 DM |
116 | #[macro_export] |
117 | macro_rules! PROXMOX_BACKUP_READER_PROTOCOL_ID_V1 { | |
118 | () => { "proxmox-backup-reader-protocol-v1" } | |
119 | } | |
120 | ||
f74a03da DM |
121 | /// Unix system user used by proxmox-backup-proxy |
122 | pub const BACKUP_USER_NAME: &str = "backup"; | |
a6ed5e12 TL |
123 | /// Unix system group used by proxmox-backup-proxy |
124 | pub const BACKUP_GROUP_NAME: &str = "backup"; | |
f74a03da DM |
125 | |
126 | /// Return User info for the 'backup' user (``getpwnam_r(3)``) | |
127 | pub fn backup_user() -> Result<nix::unistd::User, Error> { | |
128 | match nix::unistd::User::from_name(BACKUP_USER_NAME)? { | |
129 | Some(user) => Ok(user), | |
130 | None => bail!("Unable to lookup backup user."), | |
131 | } | |
132 | } | |
133 | ||
a6ed5e12 TL |
134 | /// Return Group info for the 'backup' group (``getgrnam(3)``) |
135 | pub fn backup_group() -> Result<nix::unistd::Group, Error> { | |
136 | match nix::unistd::Group::from_name(BACKUP_GROUP_NAME)? { | |
137 | Some(group) => Ok(group), | |
138 | None => bail!("Unable to lookup backup user."), | |
139 | } | |
140 | } | |
141 | ||
991abfa8 DM |
142 | mod file_formats; |
143 | pub use file_formats::*; | |
a7dd4830 | 144 | |
59e9ba01 DM |
145 | mod manifest; |
146 | pub use manifest::*; | |
147 | ||
c38266c1 DM |
148 | mod crypt_config; |
149 | pub use crypt_config::*; | |
48b4b40b | 150 | |
826f309b DM |
151 | mod key_derivation; |
152 | pub use key_derivation::*; | |
153 | ||
018d11bb DM |
154 | mod crypt_reader; |
155 | pub use crypt_reader::*; | |
156 | ||
157 | mod crypt_writer; | |
158 | pub use crypt_writer::*; | |
159 | ||
160 | mod checksum_reader; | |
161 | pub use checksum_reader::*; | |
162 | ||
163 | mod checksum_writer; | |
164 | pub use checksum_writer::*; | |
165 | ||
7d83440c WB |
166 | mod chunker; |
167 | pub use chunker::*; | |
168 | ||
3025b3a5 DM |
169 | mod data_blob; |
170 | pub use data_blob::*; | |
171 | ||
018d11bb DM |
172 | mod data_blob_reader; |
173 | pub use data_blob_reader::*; | |
174 | ||
175 | mod data_blob_writer; | |
176 | pub use data_blob_writer::*; | |
177 | ||
89245fb5 DM |
178 | mod catalog; |
179 | pub use catalog::*; | |
9d135fe6 | 180 | |
dafc27ae DM |
181 | mod chunk_stream; |
182 | pub use chunk_stream::*; | |
183 | ||
7e336555 DM |
184 | mod chunk_stat; |
185 | pub use chunk_stat::*; | |
186 | ||
b8506736 DM |
187 | mod read_chunk; |
188 | pub use read_chunk::*; | |
189 | ||
e5064ba6 DM |
190 | mod chunk_store; |
191 | pub use chunk_store::*; | |
192 | ||
7bc1d727 WB |
193 | mod index; |
194 | pub use index::*; | |
195 | ||
e5064ba6 DM |
196 | mod fixed_index; |
197 | pub use fixed_index::*; | |
198 | ||
199 | mod dynamic_index; | |
200 | pub use dynamic_index::*; | |
201 | ||
b3483782 DM |
202 | mod backup_info; |
203 | pub use backup_info::*; | |
204 | ||
dc188491 DM |
205 | mod prune; |
206 | pub use prune::*; | |
207 | ||
e5064ba6 DM |
208 | mod datastore; |
209 | pub use datastore::*; | |
f14c96ea | 210 | |
c2009e53 DM |
211 | mod verify; |
212 | pub use verify::*; | |
213 | ||
f14c96ea CE |
214 | mod catalog_shell; |
215 | pub use catalog_shell::*; | |
4a3adc3d DC |
216 | |
217 | mod async_index_reader; | |
218 | pub use async_index_reader::*; |