]> git.proxmox.com Git - proxmox-backup.git/blob - pbs-datastore/src/lib.rs
cfe399218427d46cddd9ac8637a80f29a05bae77
[proxmox-backup.git] / pbs-datastore / src / lib.rs
1 //! This module implements the data storage and access layer.
2 //!
3 //! # Data formats
4 //!
5 //! PBS splits large files into chunks, and stores them deduplicated using
6 //! a content addressable storage format.
7 //!
8 //! Backup snapshots are stored as folders containing a manifest file and
9 //! potentially one or more index or blob files.
10 //!
11 //! The manifest contains hashes of all other files and can be signed by
12 //! the client.
13 //!
14 //! Blob files contain data directly. They are used for config files and
15 //! the like.
16 //!
17 //! Index files are used to reconstruct an original file. They contain a
18 //! list of SHA256 checksums. The `DynamicIndex*` format is able to deal
19 //! with dynamic chunk sizes (CT and host backups), whereas the
20 //! `FixedIndex*` format is an optimization to store a list of equal sized
21 //! chunks (VMs, whole block devices).
22 //!
23 //! A chunk is defined as a binary blob, which is stored inside a
24 //! [ChunkStore](struct.ChunkStore.html) instead of the backup directory
25 //! directly, and can be addressed by its SHA256 digest.
26 //!
27 //!
28 //! # Garbage Collection (GC)
29 //!
30 //! Deleting backups is as easy as deleting the corresponding .idx files.
31 //! However, this does not free up any storage, because those files just
32 //! contain references to chunks.
33 //!
34 //! To free up some storage, we run a garbage collection process at
35 //! regular intervals. The collector uses a mark and sweep approach. In
36 //! the first phase, it scans all .idx files to mark used chunks. The
37 //! second phase then removes all unmarked chunks from the store.
38 //!
39 //! The locking mechanisms mentioned below make sure that we are the only
40 //! process running GC. We still want to be able to create backups during
41 //! GC, so there may be multiple backup threads/tasks running, either
42 //! started before GC, or while GC is running.
43 //!
44 //! ## `atime` based GC
45 //!
46 //! The idea here is to mark chunks by updating the `atime` (access
47 //! timestamp) on the chunk file. This is quite simple and does not need
48 //! additional RAM.
49 //!
50 //! One minor problem is that recent Linux versions use the `relatime`
51 //! mount flag by default for performance reasons (and we want that). When
52 //! enabled, `atime` data is written to the disk only if the file has been
53 //! modified since the `atime` data was last updated (`mtime`), or if the
54 //! file was last accessed more than a certain amount of time ago (by
55 //! default 24h). So we may only delete chunks with `atime` older than 24
56 //! hours.
57 //!
58 //! Another problem arises from running backups. The mark phase does not
59 //! find any chunks from those backups, because there is no .idx file for
60 //! them (created after the backup). Chunks created or touched by those
61 //! backups may have an `atime` as old as the start time of those backups.
62 //! Please note that the backup start time may predate the GC start time.
63 //! So we may only delete chunks older than the start time of those
64 //! running backup jobs, which might be more than 24h back (this is the
65 //! reason why ProcessLocker exclusive locks only have to be exclusive
66 //! between processes, since within one we can determine the age of the
67 //! oldest shared lock).
68 //!
69 //! ## Store `marks` in RAM using a HASH
70 //!
71 //! Might be better. Under investigation.
72 //!
73 //!
74 //! # Locking
75 //!
76 //! Since PBS allows multiple potentially interfering operations at the
77 //! same time (e.g. garbage collect, prune, multiple backup creations
78 //! (only in separate groups), forget, ...), these need to lock against
79 //! each other in certain scenarios. There is no overarching global lock
80 //! though, instead always the finest grained lock possible is used,
81 //! because running these operations concurrently is treated as a feature
82 //! on its own.
83 //!
84 //! ## Inter-process Locking
85 //!
86 //! We need to be able to restart the proxmox-backup service daemons, so
87 //! that we can update the software without rebooting the host. But such
88 //! restarts must not abort running backup jobs, so we need to keep the
89 //! old service running until those jobs are finished. This implies that
90 //! we need some kind of locking for modifying chunks and indices in the
91 //! ChunkStore.
92 //!
93 //! Please note that it is perfectly valid to have multiple
94 //! parallel ChunkStore writers, even when they write the same chunk
95 //! (because the chunk would have the same name and the same data, and
96 //! writes are completed atomically via a rename). The only problem is
97 //! garbage collection, because we need to avoid deleting chunks which are
98 //! still referenced.
99 //!
100 //! To do this we use the
101 //! [ProcessLocker](../tools/struct.ProcessLocker.html).
102 //!
103 //! ### ChunkStore-wide
104 //!
105 //! * Create Index Files:
106 //!
107 //! Acquire shared lock for ChunkStore.
108 //!
109 //! Note: When creating .idx files, we create a temporary .tmp file,
110 //! then do an atomic rename.
111 //!
112 //! * Garbage Collect:
113 //!
114 //! Acquire exclusive lock for ChunkStore. If we have
115 //! already a shared lock for the ChunkStore, try to upgrade that
116 //! lock.
117 //!
118 //! Exclusive locks only work _between processes_. It is valid to have an
119 //! exclusive and one or more shared locks held within one process. Writing
120 //! chunks within one process is synchronized using the gc_mutex.
121 //!
122 //! On server restart, we stop any running GC in the old process to avoid
123 //! having the exclusive lock held for too long.
124 //!
125 //! ## Locking table
126 //!
127 //! Below table shows all operations that play a role in locking, and which
128 //! mechanisms are used to make their concurrent usage safe.
129 //!
130 //! | starting ><br>v during | read index file | create index file | GC mark | GC sweep | update manifest | forget | prune | create backup | verify | reader api |
131 //! |-|-|-|-|-|-|-|-|-|-|-|
132 //! | **read index file** | / | / | / | / | / | mmap stays valid, oldest_shared_lock prevents GC | see forget column | / | / | / |
133 //! | **create index file** | / | / | / | / | / | / | / | /, happens at the end, after all chunks are touched | /, only happens without a manifest | / |
134 //! | **GC mark** | / | Datastore process-lock shared | gc_mutex, exclusive ProcessLocker | gc_mutex | /, GC only cares about index files, not manifests | tells GC about removed chunks | see forget column | /, index files don’t exist yet | / | / |
135 //! | **GC sweep** | / | Datastore process-lock shared | gc_mutex, exclusive ProcessLocker | gc_mutex | / | /, chunks already marked | see forget column | chunks get touched; chunk_store.mutex; oldest PL lock | / | / |
136 //! | **update manifest** | / | / | / | / | update_manifest lock | update_manifest lock, remove dir under lock | see forget column | /, “write manifest” happens at the end | /, can call “write manifest”, see that column | / |
137 //! | **forget** | / | / | removed_during_gc mutex is held during unlink | marking done, doesn’t matter if forgotten now | update_manifest lock, forget waits for lock | /, unlink is atomic | causes forget to fail, but that’s OK | running backup has snapshot flock | /, potentially detects missing folder | shared snap flock |
138 //! | **prune** | / | / | see forget row | see forget row | see forget row | causes warn in prune, but no error | see forget column | running and last non-running can’t be pruned | see forget row | shared snap flock |
139 //! | **create backup** | / | only time this happens, thus has snapshot flock | / | chunks get touched; chunk_store.mutex; oldest PL lock | no lock, but cannot exist beforehand | snapshot flock, can’t be forgotten | running and last non-running can’t be pruned | snapshot group flock, only one running per group | /, won’t be verified since manifest missing | / |
140 //! | **verify** | / | / | / | / | see “update manifest” row | /, potentially detects missing folder | see forget column | / | /, but useless (“update manifest” protects itself) | / |
141 //! | **reader api** | / | / | / | /, open snap can’t be forgotten, so ref must exist | / | prevented by shared snap flock | prevented by shared snap flock | / | / | /, lock is shared |!
142 //! * / = no interaction
143 //! * shared/exclusive from POV of 'starting' process
144
145 // Note: .pcat1 => Proxmox Catalog Format version 1
146 pub const CATALOG_NAME: &str = "catalog.pcat1.didx";
147
148 #[macro_export]
149 macro_rules! PROXMOX_BACKUP_PROTOCOL_ID_V1 {
150 () => {
151 "proxmox-backup-protocol-v1"
152 };
153 }
154
155 #[macro_export]
156 macro_rules! PROXMOX_BACKUP_READER_PROTOCOL_ID_V1 {
157 () => {
158 "proxmox-backup-reader-protocol-v1"
159 };
160 }
161
162 pub mod backup_info;
163 pub mod catalog;
164 pub mod cached_chunk_reader;
165 pub mod checksum_reader;
166 pub mod checksum_writer;
167 pub mod chunk_stat;
168 pub mod chunk_store;
169 pub mod chunker;
170 pub mod crypt_reader;
171 pub mod crypt_writer;
172 pub mod data_blob;
173 pub mod data_blob_reader;
174 pub mod data_blob_writer;
175 pub mod file_formats;
176 pub mod index;
177 pub mod manifest;
178 pub mod paperkey;
179 pub mod prune;
180 pub mod read_chunk;
181 pub mod store_progress;
182 pub mod task;
183
184 pub mod dynamic_index;
185 pub mod fixed_index;
186
187 pub use backup_info::{BackupDir, BackupGroup, BackupInfo};
188 pub use checksum_reader::ChecksumReader;
189 pub use checksum_writer::ChecksumWriter;
190 pub use chunk_store::ChunkStore;
191 pub use chunker::Chunker;
192 pub use crypt_reader::CryptReader;
193 pub use crypt_writer::CryptWriter;
194 pub use data_blob::DataBlob;
195 pub use data_blob_reader::DataBlobReader;
196 pub use data_blob_writer::DataBlobWriter;
197 pub use manifest::BackupManifest;
198 pub use store_progress::StoreProgress;