]>
Commit | Line | Data |
---|---|---|
0433db19 | 1 | use std::fs::File; |
4cf0ced9 | 2 | use std::io::{self, BufWriter, Seek, SeekFrom, Write}; |
c443f58b | 3 | use std::ops::Range; |
0433db19 | 4 | use std::os::unix::io::AsRawFd; |
367f002e | 5 | use std::path::{Path, PathBuf}; |
4cf0ced9 | 6 | use std::sync::{Arc, Mutex}; |
a6f87283 | 7 | use std::task::Context; |
4cf0ced9 | 8 | use std::pin::Pin; |
367f002e | 9 | |
f7d4e4b5 | 10 | use anyhow::{bail, format_err, Error}; |
0433db19 | 11 | |
9b2b627f | 12 | use proxmox::tools::io::ReadExt; |
f569acc5 | 13 | use proxmox::tools::uuid::Uuid; |
f35197f4 | 14 | use proxmox::tools::vec; |
57e50fb9 | 15 | use proxmox::tools::mmap::Mmap; |
a6f87283 | 16 | use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation}; |
8ea3b1d1 | 17 | |
367f002e WB |
18 | use super::chunk_stat::ChunkStat; |
19 | use super::chunk_store::ChunkStore; | |
c443f58b | 20 | use super::index::ChunkReadInfo; |
367f002e | 21 | use super::read_chunk::ReadChunk; |
f569acc5 WB |
22 | use super::Chunker; |
23 | use super::IndexFile; | |
4ee8f53d | 24 | use super::{DataBlob, DataChunkBuilder}; |
e693818a | 25 | use crate::tools::{self, epoch_now_u64}; |
f98ac774 | 26 | |
e5064ba6 | 27 | /// Header format definition for dynamic index files (`.dixd`) |
0433db19 | 28 | #[repr(C)] |
93d5d779 | 29 | pub struct DynamicIndexHeader { |
a7dd4830 | 30 | pub magic: [u8; 8], |
0433db19 DM |
31 | pub uuid: [u8; 16], |
32 | pub ctime: u64, | |
16ff6b7c DM |
33 | /// Sha256 over the index ``SHA256(offset1||digest1||offset2||digest2||...)`` |
34 | pub index_csum: [u8; 32], | |
990b930f | 35 | reserved: [u8; 4032], // overall size is one page (4096 bytes) |
0433db19 | 36 | } |
9ea4bce4 | 37 | proxmox::static_assert_size!(DynamicIndexHeader, 4096); |
990b930f WB |
38 | // TODO: Once non-Copy unions are stabilized, use: |
39 | // union DynamicIndexHeader { | |
40 | // reserved: [u8; 4096], | |
41 | // pub data: DynamicIndexHeaderData, | |
42 | // } | |
77703d95 | 43 | |
57e50fb9 WB |
44 | #[derive(Clone, Debug)] |
45 | #[repr(C)] | |
46 | pub struct DynamicEntry { | |
7a6b5492 | 47 | end_le: u64, |
57e50fb9 WB |
48 | digest: [u8; 32], |
49 | } | |
50 | ||
7a6b5492 WB |
51 | impl DynamicEntry { |
52 | #[inline] | |
53 | pub fn end(&self) -> u64 { | |
54 | u64::from_le(self.end_le) | |
55 | } | |
56 | } | |
57 | ||
93d5d779 | 58 | pub struct DynamicIndexReader { |
728797d0 | 59 | _file: File, |
9f49fe1d | 60 | pub size: usize, |
57e50fb9 | 61 | index: Mmap<DynamicEntry>, |
9f49fe1d DM |
62 | pub uuid: [u8; 16], |
63 | pub ctime: u64, | |
16ff6b7c | 64 | pub index_csum: [u8; 32], |
77703d95 DM |
65 | } |
66 | ||
93d5d779 | 67 | impl DynamicIndexReader { |
d48a9955 | 68 | pub fn open(path: &Path) -> Result<Self, Error> { |
0f0a35b3 DM |
69 | File::open(path) |
70 | .map_err(Error::from) | |
57e50fb9 | 71 | .and_then(Self::new) |
0f0a35b3 | 72 | .map_err(|err| format_err!("Unable to open dynamic index {:?} - {}", path, err)) |
d48a9955 DM |
73 | } |
74 | ||
75 | pub fn new(mut file: std::fs::File) -> Result<Self, Error> { | |
f569acc5 WB |
76 | if let Err(err) = |
77 | nix::fcntl::flock(file.as_raw_fd(), nix::fcntl::FlockArg::LockSharedNonblock) | |
78 | { | |
0f0a35b3 DM |
79 | bail!("unable to get shared lock - {}", err); |
80 | } | |
81 | ||
57e50fb9 | 82 | // FIXME: This is NOT OUR job! Check the callers of this method and remove this! |
d48a9955 DM |
83 | file.seek(SeekFrom::Start(0))?; |
84 | ||
93d5d779 | 85 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
77703d95 | 86 | |
ba5e6747 | 87 | let header: Box<DynamicIndexHeader> = unsafe { file.read_host_value_boxed()? }; |
77703d95 | 88 | |
a7dd4830 | 89 | if header.magic != super::DYNAMIC_SIZED_CHUNK_INDEX_1_0 { |
d48a9955 | 90 | bail!("got unknown magic number"); |
77703d95 DM |
91 | } |
92 | ||
77703d95 DM |
93 | let ctime = u64::from_le(header.ctime); |
94 | ||
95 | let rawfd = file.as_raw_fd(); | |
96 | ||
d48a9955 | 97 | let stat = nix::sys::stat::fstat(rawfd)?; |
77703d95 DM |
98 | |
99 | let size = stat.st_size as usize; | |
100 | ||
ddbdf80d | 101 | let index_size = size - header_size; |
57e50fb9 WB |
102 | let index_count = index_size / 40; |
103 | if index_count * 40 != index_size { | |
d48a9955 | 104 | bail!("got unexpected file size"); |
77703d95 DM |
105 | } |
106 | ||
57e50fb9 WB |
107 | let index = unsafe { |
108 | Mmap::map_fd( | |
109 | rawfd, | |
110 | header_size as u64, | |
111 | index_count, | |
f569acc5 WB |
112 | nix::sys::mman::ProtFlags::PROT_READ, |
113 | nix::sys::mman::MapFlags::MAP_PRIVATE, | |
57e50fb9 WB |
114 | )? |
115 | }; | |
77703d95 | 116 | |
77703d95 | 117 | Ok(Self { |
728797d0 | 118 | _file: file, |
77703d95 | 119 | size, |
57e50fb9 | 120 | index, |
77703d95 DM |
121 | ctime, |
122 | uuid: header.uuid, | |
16ff6b7c | 123 | index_csum: header.index_csum, |
77703d95 DM |
124 | }) |
125 | } | |
126 | ||
39c6bd86 | 127 | #[inline] |
9fe2f639 | 128 | #[allow(clippy::cast_ptr_alignment)] |
39c6bd86 | 129 | fn chunk_end(&self, pos: usize) -> u64 { |
57e50fb9 | 130 | if pos >= self.index.len() { |
39c6bd86 DM |
131 | panic!("chunk index out of range"); |
132 | } | |
7a6b5492 | 133 | self.index[pos].end() |
39c6bd86 DM |
134 | } |
135 | ||
136 | #[inline] | |
f98ac774 | 137 | fn chunk_digest(&self, pos: usize) -> &[u8; 32] { |
57e50fb9 | 138 | if pos >= self.index.len() { |
39c6bd86 DM |
139 | panic!("chunk index out of range"); |
140 | } | |
57e50fb9 | 141 | &self.index[pos].digest |
39c6bd86 DM |
142 | } |
143 | ||
57e50fb9 | 144 | // TODO: can we use std::slice::binary_search with Mmap now? |
39c6bd86 DM |
145 | fn binary_search( |
146 | &self, | |
147 | start_idx: usize, | |
148 | start: u64, | |
149 | end_idx: usize, | |
150 | end: u64, | |
f569acc5 | 151 | offset: u64, |
39c6bd86 | 152 | ) -> Result<usize, Error> { |
39c6bd86 DM |
153 | if (offset >= end) || (offset < start) { |
154 | bail!("offset out of range"); | |
155 | } | |
156 | ||
157 | if end_idx == start_idx { | |
158 | return Ok(start_idx); // found | |
159 | } | |
f569acc5 | 160 | let middle_idx = (start_idx + end_idx) / 2; |
39c6bd86 DM |
161 | let middle_end = self.chunk_end(middle_idx); |
162 | ||
163 | if offset < middle_end { | |
62ee2eb4 | 164 | self.binary_search(start_idx, start, middle_idx, middle_end, offset) |
39c6bd86 | 165 | } else { |
62ee2eb4 | 166 | self.binary_search(middle_idx + 1, middle_end, end_idx, end, offset) |
39c6bd86 DM |
167 | } |
168 | } | |
169 | } | |
170 | ||
7bc1d727 WB |
171 | impl IndexFile for DynamicIndexReader { |
172 | fn index_count(&self) -> usize { | |
57e50fb9 | 173 | self.index.len() |
7bc1d727 WB |
174 | } |
175 | ||
176 | fn index_digest(&self, pos: usize) -> Option<&[u8; 32]> { | |
57e50fb9 | 177 | if pos >= self.index.len() { |
7bc1d727 WB |
178 | None |
179 | } else { | |
f569acc5 | 180 | Some(unsafe { std::mem::transmute(self.chunk_digest(pos).as_ptr()) }) |
7bc1d727 WB |
181 | } |
182 | } | |
a660978c DM |
183 | |
184 | fn index_bytes(&self) -> u64 { | |
57e50fb9 | 185 | if self.index.is_empty() { |
a660978c DM |
186 | 0 |
187 | } else { | |
57e50fb9 | 188 | self.chunk_end(self.index.len() - 1) |
a660978c DM |
189 | } |
190 | } | |
fdaab0df | 191 | |
2e079b8b DM |
192 | fn compute_csum(&self) -> ([u8; 32], u64) { |
193 | let mut csum = openssl::sha::Sha256::new(); | |
194 | let mut chunk_end = 0; | |
195 | for pos in 0..self.index_count() { | |
196 | let info = self.chunk_info(pos).unwrap(); | |
197 | chunk_end = info.range.end; | |
198 | csum.update(&chunk_end.to_le_bytes()); | |
199 | csum.update(&info.digest); | |
200 | } | |
201 | let csum = csum.finish(); | |
202 | (csum, chunk_end) | |
203 | } | |
204 | ||
fdaab0df DM |
205 | #[allow(clippy::cast_ptr_alignment)] |
206 | fn chunk_info(&self, pos: usize) -> Option<ChunkReadInfo> { | |
207 | if pos >= self.index.len() { | |
208 | return None; | |
209 | } | |
210 | let start = if pos == 0 { 0 } else { self.index[pos - 1].end() }; | |
211 | ||
212 | let end = self.index[pos].end(); | |
213 | ||
214 | Some(ChunkReadInfo { | |
215 | range: start..end, | |
216 | digest: self.index[pos].digest.clone(), | |
217 | }) | |
218 | } | |
7bc1d727 WB |
219 | } |
220 | ||
c443f58b WB |
221 | struct CachedChunk { |
222 | range: Range<u64>, | |
223 | data: Vec<u8>, | |
224 | } | |
225 | ||
226 | impl CachedChunk { | |
227 | /// Perform sanity checks on the range and data size: | |
228 | pub fn new(range: Range<u64>, data: Vec<u8>) -> Result<Self, Error> { | |
229 | if data.len() as u64 != range.end - range.start { | |
230 | bail!( | |
231 | "read chunk with wrong size ({} != {})", | |
232 | data.len(), | |
233 | range.end - range.start, | |
234 | ); | |
235 | } | |
236 | Ok(Self { range, data }) | |
237 | } | |
238 | } | |
239 | ||
d48a9955 DM |
240 | pub struct BufferedDynamicReader<S> { |
241 | store: S, | |
93d5d779 | 242 | index: DynamicIndexReader, |
39c6bd86 DM |
243 | archive_size: u64, |
244 | read_buffer: Vec<u8>, | |
245 | buffered_chunk_idx: usize, | |
246 | buffered_chunk_start: u64, | |
247 | read_offset: u64, | |
c443f58b | 248 | lru_cache: crate::tools::lru_cache::LruCache<usize, CachedChunk>, |
536683e7 CE |
249 | } |
250 | ||
251 | struct ChunkCacher<'a, S> { | |
252 | store: &'a mut S, | |
253 | index: &'a DynamicIndexReader, | |
254 | } | |
255 | ||
c443f58b WB |
256 | impl<'a, S: ReadChunk> crate::tools::lru_cache::Cacher<usize, CachedChunk> for ChunkCacher<'a, S> { |
257 | fn fetch(&mut self, index: usize) -> Result<Option<CachedChunk>, Error> { | |
fdaab0df DM |
258 | let info = match self.index.chunk_info(index) { |
259 | Some(info) => info, | |
260 | None => bail!("chunk index out of range"), | |
261 | }; | |
c443f58b WB |
262 | let range = info.range; |
263 | let data = self.store.read_chunk(&info.digest)?; | |
264 | CachedChunk::new(range, data).map(Some) | |
536683e7 | 265 | } |
77703d95 DM |
266 | } |
267 | ||
f569acc5 | 268 | impl<S: ReadChunk> BufferedDynamicReader<S> { |
d48a9955 | 269 | pub fn new(index: DynamicIndexReader, store: S) -> Self { |
57e50fb9 | 270 | let archive_size = index.index_bytes(); |
39c6bd86 | 271 | Self { |
d48a9955 | 272 | store, |
653b1ca1 WB |
273 | index, |
274 | archive_size, | |
f569acc5 | 275 | read_buffer: Vec::with_capacity(1024 * 1024), |
39c6bd86 DM |
276 | buffered_chunk_idx: 0, |
277 | buffered_chunk_start: 0, | |
278 | read_offset: 0, | |
536683e7 | 279 | lru_cache: crate::tools::lru_cache::LruCache::new(32), |
39c6bd86 DM |
280 | } |
281 | } | |
282 | ||
f569acc5 WB |
283 | pub fn archive_size(&self) -> u64 { |
284 | self.archive_size | |
285 | } | |
39c6bd86 | 286 | |
0a72e267 | 287 | fn buffer_chunk(&mut self, idx: usize) -> Result<(), Error> { |
c443f58b WB |
288 | //let (start, end, data) = self.lru_cache.access( |
289 | let cached_chunk = self.lru_cache.access( | |
536683e7 CE |
290 | idx, |
291 | &mut ChunkCacher { | |
292 | store: &mut self.store, | |
293 | index: &self.index, | |
294 | }, | |
295 | )?.ok_or_else(|| format_err!("chunk not found by cacher"))?; | |
296 | ||
536683e7 | 297 | // fixme: avoid copy |
f98ac774 | 298 | self.read_buffer.clear(); |
c443f58b | 299 | self.read_buffer.extend_from_slice(&cached_chunk.data); |
0a72e267 DM |
300 | |
301 | self.buffered_chunk_idx = idx; | |
d48a9955 | 302 | |
c443f58b | 303 | self.buffered_chunk_start = cached_chunk.range.start; |
0a72e267 DM |
304 | //println!("BUFFER {} {}", self.buffered_chunk_start, end); |
305 | Ok(()) | |
306 | } | |
307 | } | |
308 | ||
f569acc5 | 309 | impl<S: ReadChunk> crate::tools::BufferedRead for BufferedDynamicReader<S> { |
0a72e267 | 310 | fn buffered_read(&mut self, offset: u64) -> Result<&[u8], Error> { |
f569acc5 WB |
311 | if offset == self.archive_size { |
312 | return Ok(&self.read_buffer[0..0]); | |
313 | } | |
318564ac | 314 | |
39c6bd86 | 315 | let buffer_len = self.read_buffer.len(); |
0b05fd58 | 316 | let index = &self.index; |
39c6bd86 DM |
317 | |
318 | // optimization for sequential read | |
f569acc5 | 319 | if buffer_len > 0 |
57e50fb9 | 320 | && ((self.buffered_chunk_idx + 1) < index.index.len()) |
f569acc5 | 321 | && (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) |
39c6bd86 DM |
322 | { |
323 | let next_idx = self.buffered_chunk_idx + 1; | |
324 | let next_end = index.chunk_end(next_idx); | |
325 | if offset < next_end { | |
373ef4a5 | 326 | self.buffer_chunk(next_idx)?; |
39c6bd86 DM |
327 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; |
328 | return Ok(&self.read_buffer[buffer_offset..]); | |
329 | } | |
330 | } | |
331 | ||
f569acc5 WB |
332 | if (buffer_len == 0) |
333 | || (offset < self.buffered_chunk_start) | |
334 | || (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) | |
39c6bd86 | 335 | { |
57e50fb9 | 336 | let end_idx = index.index.len() - 1; |
39c6bd86 DM |
337 | let end = index.chunk_end(end_idx); |
338 | let idx = index.binary_search(0, 0, end_idx, end, offset)?; | |
373ef4a5 | 339 | self.buffer_chunk(idx)?; |
f569acc5 | 340 | } |
39c6bd86 DM |
341 | |
342 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; | |
343 | Ok(&self.read_buffer[buffer_offset..]) | |
344 | } | |
39c6bd86 | 345 | } |
77703d95 | 346 | |
f569acc5 | 347 | impl<S: ReadChunk> std::io::Read for BufferedDynamicReader<S> { |
4624fe29 | 348 | fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> { |
fded74d0 | 349 | use crate::tools::BufferedRead; |
f569acc5 | 350 | use std::io::{Error, ErrorKind}; |
4624fe29 DM |
351 | |
352 | let data = match self.buffered_read(self.read_offset) { | |
353 | Ok(v) => v, | |
354 | Err(err) => return Err(Error::new(ErrorKind::Other, err.to_string())), | |
355 | }; | |
356 | ||
f569acc5 WB |
357 | let n = if data.len() > buf.len() { |
358 | buf.len() | |
359 | } else { | |
360 | data.len() | |
361 | }; | |
4624fe29 | 362 | |
bde8e243 | 363 | buf[0..n].copy_from_slice(&data[0..n]); |
4624fe29 DM |
364 | |
365 | self.read_offset += n as u64; | |
366 | ||
62ee2eb4 | 367 | Ok(n) |
4624fe29 DM |
368 | } |
369 | } | |
370 | ||
f569acc5 | 371 | impl<S: ReadChunk> std::io::Seek for BufferedDynamicReader<S> { |
d48a9955 | 372 | fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> { |
34337050 | 373 | let new_offset = match pos { |
f569acc5 WB |
374 | SeekFrom::Start(start_offset) => start_offset as i64, |
375 | SeekFrom::End(end_offset) => (self.archive_size as i64) + end_offset, | |
34337050 DM |
376 | SeekFrom::Current(offset) => (self.read_offset as i64) + offset, |
377 | }; | |
378 | ||
ddbdf80d | 379 | use std::io::{Error, ErrorKind}; |
34337050 DM |
380 | if (new_offset < 0) || (new_offset > (self.archive_size as i64)) { |
381 | return Err(Error::new( | |
382 | ErrorKind::Other, | |
f569acc5 WB |
383 | format!( |
384 | "seek is out of range {} ([0..{}])", | |
385 | new_offset, self.archive_size | |
386 | ), | |
387 | )); | |
34337050 DM |
388 | } |
389 | self.read_offset = new_offset as u64; | |
390 | ||
391 | Ok(self.read_offset) | |
392 | } | |
393 | } | |
394 | ||
4cf0ced9 DC |
395 | /// This is a workaround until we have cleaned up the chunk/reader/... infrastructure for better |
396 | /// async use! | |
397 | /// | |
398 | /// Ideally BufferedDynamicReader gets replaced so the LruCache maps to `BroadcastFuture<Chunk>`, | |
399 | /// so that we can properly access it from multiple threads simultaneously while not issuing | |
400 | /// duplicate simultaneous reads over http. | |
401 | #[derive(Clone)] | |
402 | pub struct LocalDynamicReadAt<R: ReadChunk> { | |
403 | inner: Arc<Mutex<BufferedDynamicReader<R>>>, | |
404 | } | |
405 | ||
406 | impl<R: ReadChunk> LocalDynamicReadAt<R> { | |
407 | pub fn new(inner: BufferedDynamicReader<R>) -> Self { | |
408 | Self { | |
409 | inner: Arc::new(Mutex::new(inner)), | |
410 | } | |
411 | } | |
412 | } | |
413 | ||
a6f87283 WB |
414 | impl<R: ReadChunk> ReadAt for LocalDynamicReadAt<R> { |
415 | fn start_read_at<'a>( | |
416 | self: Pin<&'a Self>, | |
4cf0ced9 | 417 | _cx: &mut Context, |
a6f87283 | 418 | buf: &'a mut [u8], |
4cf0ced9 | 419 | offset: u64, |
a6f87283 | 420 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { |
4cf0ced9 | 421 | use std::io::Read; |
a6f87283 | 422 | MaybeReady::Ready(tokio::task::block_in_place(move || { |
4cf0ced9 DC |
423 | let mut reader = self.inner.lock().unwrap(); |
424 | reader.seek(SeekFrom::Start(offset))?; | |
a6f87283 WB |
425 | Ok(reader.read(buf)?) |
426 | })) | |
427 | } | |
428 | ||
429 | fn poll_complete<'a>( | |
430 | self: Pin<&'a Self>, | |
431 | _op: ReadAtOperation<'a>, | |
432 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
433 | panic!("LocalDynamicReadAt::start_read_at returned Pending"); | |
4cf0ced9 DC |
434 | } |
435 | } | |
436 | ||
437 | ||
976595e1 | 438 | /// Create dynamic index files (`.dixd`) |
93d5d779 | 439 | pub struct DynamicIndexWriter { |
1629d2ad | 440 | store: Arc<ChunkStore>, |
43b13033 | 441 | _lock: tools::ProcessLockSharedGuard, |
5032b57b | 442 | writer: BufWriter<File>, |
5e7a09be | 443 | closed: bool, |
0433db19 DM |
444 | filename: PathBuf, |
445 | tmp_filename: PathBuf, | |
16ff6b7c | 446 | csum: Option<openssl::sha::Sha256>, |
9f49fe1d DM |
447 | pub uuid: [u8; 16], |
448 | pub ctime: u64, | |
0433db19 DM |
449 | } |
450 | ||
93d5d779 | 451 | impl Drop for DynamicIndexWriter { |
1629d2ad DM |
452 | fn drop(&mut self) { |
453 | let _ = std::fs::remove_file(&self.tmp_filename); // ignore errors | |
454 | } | |
455 | } | |
456 | ||
93d5d779 | 457 | impl DynamicIndexWriter { |
976595e1 | 458 | pub fn create(store: Arc<ChunkStore>, path: &Path) -> Result<Self, Error> { |
43b13033 DM |
459 | let shared_lock = store.try_shared_lock()?; |
460 | ||
0433db19 DM |
461 | let full_path = store.relative_path(path); |
462 | let mut tmp_path = full_path.clone(); | |
93d5d779 | 463 | tmp_path.set_extension("tmp_didx"); |
0433db19 | 464 | |
ddbdf80d | 465 | let file = std::fs::OpenOptions::new() |
f569acc5 WB |
466 | .create(true) |
467 | .truncate(true) | |
0433db19 DM |
468 | .read(true) |
469 | .write(true) | |
470 | .open(&tmp_path)?; | |
471 | ||
f569acc5 | 472 | let mut writer = BufWriter::with_capacity(1024 * 1024, file); |
5032b57b | 473 | |
93d5d779 | 474 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
0433db19 DM |
475 | |
476 | // todo: use static assertion when available in rust | |
f569acc5 WB |
477 | if header_size != 4096 { |
478 | panic!("got unexpected header size"); | |
479 | } | |
0433db19 | 480 | |
e693818a | 481 | let ctime = epoch_now_u64()?; |
0433db19 | 482 | |
f569acc5 | 483 | let uuid = Uuid::generate(); |
0433db19 | 484 | |
d21f8a5b | 485 | let mut buffer = vec::zeroed(header_size); |
93d5d779 | 486 | let header = crate::tools::map_struct_mut::<DynamicIndexHeader>(&mut buffer)?; |
0433db19 | 487 | |
a7dd4830 | 488 | header.magic = super::DYNAMIC_SIZED_CHUNK_INDEX_1_0; |
0433db19 DM |
489 | header.ctime = u64::to_le(ctime); |
490 | header.uuid = *uuid.as_bytes(); | |
491 | ||
16ff6b7c DM |
492 | header.index_csum = [0u8; 32]; |
493 | ||
5032b57b | 494 | writer.write_all(&buffer)?; |
0433db19 | 495 | |
16ff6b7c DM |
496 | let csum = Some(openssl::sha::Sha256::new()); |
497 | ||
0433db19 DM |
498 | Ok(Self { |
499 | store, | |
43b13033 | 500 | _lock: shared_lock, |
653b1ca1 | 501 | writer, |
5e7a09be | 502 | closed: false, |
0433db19 DM |
503 | filename: full_path, |
504 | tmp_filename: tmp_path, | |
505 | ctime, | |
506 | uuid: *uuid.as_bytes(), | |
16ff6b7c | 507 | csum, |
0433db19 DM |
508 | }) |
509 | } | |
5e7a09be | 510 | |
f98ac774 | 511 | // fixme: use add_chunk instead? |
4ee8f53d DM |
512 | pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> { |
513 | self.store.insert_chunk(chunk, digest) | |
976595e1 DM |
514 | } |
515 | ||
f569acc5 | 516 | pub fn close(&mut self) -> Result<[u8; 32], Error> { |
5e7a09be | 517 | if self.closed { |
f569acc5 WB |
518 | bail!( |
519 | "cannot close already closed archive index file {:?}", | |
520 | self.filename | |
521 | ); | |
5e7a09be DM |
522 | } |
523 | ||
524 | self.closed = true; | |
525 | ||
5032b57b | 526 | self.writer.flush()?; |
5e7a09be | 527 | |
9ea4bce4 | 528 | let csum_offset = proxmox::offsetof!(DynamicIndexHeader, index_csum); |
d48a9955 | 529 | self.writer.seek(SeekFrom::Start(csum_offset as u64))?; |
16ff6b7c DM |
530 | |
531 | let csum = self.csum.take().unwrap(); | |
532 | let index_csum = csum.finish(); | |
533 | ||
534 | self.writer.write_all(&index_csum)?; | |
535 | self.writer.flush()?; | |
536 | ||
5e7a09be DM |
537 | if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { |
538 | bail!("Atomic rename file {:?} failed - {}", self.filename, err); | |
539 | } | |
540 | ||
16ff6b7c | 541 | Ok(index_csum) |
5e7a09be DM |
542 | } |
543 | ||
976595e1 DM |
544 | // fixme: rename to add_digest |
545 | pub fn add_chunk(&mut self, offset: u64, digest: &[u8; 32]) -> Result<(), Error> { | |
546 | if self.closed { | |
f569acc5 WB |
547 | bail!( |
548 | "cannot write to closed dynamic index file {:?}", | |
549 | self.filename | |
550 | ); | |
976595e1 | 551 | } |
16ff6b7c | 552 | |
f569acc5 | 553 | let offset_le: &[u8; 8] = unsafe { &std::mem::transmute::<u64, [u8; 8]>(offset.to_le()) }; |
16ff6b7c DM |
554 | |
555 | if let Some(ref mut csum) = self.csum { | |
556 | csum.update(offset_le); | |
557 | csum.update(digest); | |
558 | } | |
559 | ||
cd69d36b DM |
560 | self.writer.write_all(offset_le)?; |
561 | self.writer.write_all(digest)?; | |
976595e1 DM |
562 | Ok(()) |
563 | } | |
564 | } | |
565 | ||
566 | /// Writer which splits a binary stream into dynamic sized chunks | |
567 | /// | |
568 | /// And store the resulting chunk list into the index file. | |
569 | pub struct DynamicChunkWriter { | |
570 | index: DynamicIndexWriter, | |
571 | closed: bool, | |
572 | chunker: Chunker, | |
573 | stat: ChunkStat, | |
574 | chunk_offset: usize, | |
575 | last_chunk: usize, | |
576 | chunk_buffer: Vec<u8>, | |
577 | } | |
578 | ||
579 | impl DynamicChunkWriter { | |
976595e1 DM |
580 | pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self { |
581 | Self { | |
582 | index, | |
583 | closed: false, | |
584 | chunker: Chunker::new(chunk_size), | |
585 | stat: ChunkStat::new(0), | |
586 | chunk_offset: 0, | |
587 | last_chunk: 0, | |
f569acc5 | 588 | chunk_buffer: Vec::with_capacity(chunk_size * 4), |
976595e1 DM |
589 | } |
590 | } | |
591 | ||
7e336555 DM |
592 | pub fn stat(&self) -> &ChunkStat { |
593 | &self.stat | |
594 | } | |
595 | ||
f569acc5 | 596 | pub fn close(&mut self) -> Result<(), Error> { |
976595e1 DM |
597 | if self.closed { |
598 | return Ok(()); | |
599 | } | |
600 | ||
601 | self.closed = true; | |
602 | ||
603 | self.write_chunk_buffer()?; | |
604 | ||
605 | self.index.close()?; | |
606 | ||
607 | self.stat.size = self.chunk_offset as u64; | |
608 | ||
609 | // add size of index file | |
f569acc5 WB |
610 | self.stat.size += |
611 | (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64; | |
976595e1 DM |
612 | |
613 | Ok(()) | |
614 | } | |
615 | ||
616 | fn write_chunk_buffer(&mut self) -> Result<(), Error> { | |
5e7a09be DM |
617 | let chunk_size = self.chunk_buffer.len(); |
618 | ||
f569acc5 WB |
619 | if chunk_size == 0 { |
620 | return Ok(()); | |
621 | } | |
5e7a09be DM |
622 | |
623 | let expected_chunk_size = self.chunk_offset - self.last_chunk; | |
624 | if expected_chunk_size != self.chunk_buffer.len() { | |
976595e1 | 625 | bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size); |
5e7a09be DM |
626 | } |
627 | ||
7e336555 | 628 | self.stat.chunk_count += 1; |
247cdbce | 629 | |
5e7a09be DM |
630 | self.last_chunk = self.chunk_offset; |
631 | ||
4ee8f53d | 632 | let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer) |
f98ac774 DM |
633 | .compress(true) |
634 | .build()?; | |
635 | ||
4ee8f53d | 636 | match self.index.insert_chunk(&chunk, &digest) { |
f98ac774 | 637 | Ok((is_duplicate, compressed_size)) => { |
7e336555 | 638 | self.stat.compressed_size += compressed_size; |
798f7fa0 | 639 | if is_duplicate { |
7e336555 | 640 | self.stat.duplicate_chunks += 1; |
798f7fa0 | 641 | } else { |
7e336555 | 642 | self.stat.disk_size += compressed_size; |
798f7fa0 DM |
643 | } |
644 | ||
f569acc5 WB |
645 | println!( |
646 | "ADD CHUNK {:016x} {} {}% {} {}", | |
647 | self.chunk_offset, | |
648 | chunk_size, | |
649 | (compressed_size * 100) / (chunk_size as u64), | |
650 | is_duplicate, | |
651 | proxmox::tools::digest_to_hex(&digest) | |
652 | ); | |
976595e1 | 653 | self.index.add_chunk(self.chunk_offset as u64, &digest)?; |
5e7a09be | 654 | self.chunk_buffer.truncate(0); |
62ee2eb4 | 655 | Ok(()) |
5e7a09be DM |
656 | } |
657 | Err(err) => { | |
658 | self.chunk_buffer.truncate(0); | |
62ee2eb4 | 659 | Err(err) |
5e7a09be DM |
660 | } |
661 | } | |
5e7a09be | 662 | } |
0433db19 DM |
663 | } |
664 | ||
976595e1 | 665 | impl Write for DynamicChunkWriter { |
0433db19 | 666 | fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> { |
0433db19 DM |
667 | let chunker = &mut self.chunker; |
668 | ||
669 | let pos = chunker.scan(data); | |
670 | ||
671 | if pos > 0 { | |
4ee8f53d | 672 | self.chunk_buffer.extend_from_slice(&data[0..pos]); |
0433db19 DM |
673 | self.chunk_offset += pos; |
674 | ||
976595e1 | 675 | if let Err(err) = self.write_chunk_buffer() { |
f569acc5 WB |
676 | return Err(std::io::Error::new( |
677 | std::io::ErrorKind::Other, | |
678 | err.to_string(), | |
679 | )); | |
976595e1 | 680 | } |
5e7a09be | 681 | Ok(pos) |
0433db19 DM |
682 | } else { |
683 | self.chunk_offset += data.len(); | |
4ee8f53d | 684 | self.chunk_buffer.extend_from_slice(data); |
5e7a09be | 685 | Ok(data.len()) |
0433db19 DM |
686 | } |
687 | } | |
688 | ||
689 | fn flush(&mut self) -> std::result::Result<(), std::io::Error> { | |
f569acc5 WB |
690 | Err(std::io::Error::new( |
691 | std::io::ErrorKind::Other, | |
692 | "please use close() instead of flush()", | |
693 | )) | |
0433db19 DM |
694 | } |
695 | } |