]>
Commit | Line | Data |
---|---|---|
0433db19 | 1 | use std::fs::File; |
4cf0ced9 | 2 | use std::io::{self, BufWriter, Seek, SeekFrom, Write}; |
c443f58b | 3 | use std::ops::Range; |
0433db19 | 4 | use std::os::unix::io::AsRawFd; |
367f002e | 5 | use std::path::{Path, PathBuf}; |
4cf0ced9 DC |
6 | use std::sync::{Arc, Mutex}; |
7 | use std::task::{Context, Poll}; | |
8 | use std::pin::Pin; | |
367f002e | 9 | |
f7d4e4b5 | 10 | use anyhow::{bail, format_err, Error}; |
0433db19 | 11 | |
9b2b627f | 12 | use proxmox::tools::io::ReadExt; |
f569acc5 | 13 | use proxmox::tools::uuid::Uuid; |
f35197f4 | 14 | use proxmox::tools::vec; |
57e50fb9 | 15 | use proxmox::tools::mmap::Mmap; |
8ea3b1d1 | 16 | |
367f002e WB |
17 | use super::chunk_stat::ChunkStat; |
18 | use super::chunk_store::ChunkStore; | |
c443f58b | 19 | use super::index::ChunkReadInfo; |
367f002e | 20 | use super::read_chunk::ReadChunk; |
f569acc5 WB |
21 | use super::Chunker; |
22 | use super::IndexFile; | |
4ee8f53d | 23 | use super::{DataBlob, DataChunkBuilder}; |
e693818a | 24 | use crate::tools::{self, epoch_now_u64}; |
f98ac774 | 25 | |
e5064ba6 | 26 | /// Header format definition for dynamic index files (`.dixd`) |
0433db19 | 27 | #[repr(C)] |
93d5d779 | 28 | pub struct DynamicIndexHeader { |
a7dd4830 | 29 | pub magic: [u8; 8], |
0433db19 DM |
30 | pub uuid: [u8; 16], |
31 | pub ctime: u64, | |
16ff6b7c DM |
32 | /// Sha256 over the index ``SHA256(offset1||digest1||offset2||digest2||...)`` |
33 | pub index_csum: [u8; 32], | |
990b930f | 34 | reserved: [u8; 4032], // overall size is one page (4096 bytes) |
0433db19 | 35 | } |
9ea4bce4 | 36 | proxmox::static_assert_size!(DynamicIndexHeader, 4096); |
990b930f WB |
37 | // TODO: Once non-Copy unions are stabilized, use: |
38 | // union DynamicIndexHeader { | |
39 | // reserved: [u8; 4096], | |
40 | // pub data: DynamicIndexHeaderData, | |
41 | // } | |
77703d95 | 42 | |
57e50fb9 WB |
43 | #[derive(Clone, Debug)] |
44 | #[repr(C)] | |
45 | pub struct DynamicEntry { | |
7a6b5492 | 46 | end_le: u64, |
57e50fb9 WB |
47 | digest: [u8; 32], |
48 | } | |
49 | ||
7a6b5492 WB |
50 | impl DynamicEntry { |
51 | #[inline] | |
52 | pub fn end(&self) -> u64 { | |
53 | u64::from_le(self.end_le) | |
54 | } | |
55 | } | |
56 | ||
93d5d779 | 57 | pub struct DynamicIndexReader { |
728797d0 | 58 | _file: File, |
9f49fe1d | 59 | pub size: usize, |
57e50fb9 | 60 | index: Mmap<DynamicEntry>, |
9f49fe1d DM |
61 | pub uuid: [u8; 16], |
62 | pub ctime: u64, | |
16ff6b7c | 63 | pub index_csum: [u8; 32], |
77703d95 DM |
64 | } |
65 | ||
93d5d779 | 66 | impl DynamicIndexReader { |
d48a9955 | 67 | pub fn open(path: &Path) -> Result<Self, Error> { |
0f0a35b3 DM |
68 | File::open(path) |
69 | .map_err(Error::from) | |
57e50fb9 | 70 | .and_then(Self::new) |
0f0a35b3 | 71 | .map_err(|err| format_err!("Unable to open dynamic index {:?} - {}", path, err)) |
d48a9955 DM |
72 | } |
73 | ||
74 | pub fn new(mut file: std::fs::File) -> Result<Self, Error> { | |
f569acc5 WB |
75 | if let Err(err) = |
76 | nix::fcntl::flock(file.as_raw_fd(), nix::fcntl::FlockArg::LockSharedNonblock) | |
77 | { | |
0f0a35b3 DM |
78 | bail!("unable to get shared lock - {}", err); |
79 | } | |
80 | ||
57e50fb9 | 81 | // FIXME: This is NOT OUR job! Check the callers of this method and remove this! |
d48a9955 DM |
82 | file.seek(SeekFrom::Start(0))?; |
83 | ||
93d5d779 | 84 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
77703d95 | 85 | |
ba5e6747 | 86 | let header: Box<DynamicIndexHeader> = unsafe { file.read_host_value_boxed()? }; |
77703d95 | 87 | |
a7dd4830 | 88 | if header.magic != super::DYNAMIC_SIZED_CHUNK_INDEX_1_0 { |
d48a9955 | 89 | bail!("got unknown magic number"); |
77703d95 DM |
90 | } |
91 | ||
77703d95 DM |
92 | let ctime = u64::from_le(header.ctime); |
93 | ||
94 | let rawfd = file.as_raw_fd(); | |
95 | ||
d48a9955 | 96 | let stat = nix::sys::stat::fstat(rawfd)?; |
77703d95 DM |
97 | |
98 | let size = stat.st_size as usize; | |
99 | ||
ddbdf80d | 100 | let index_size = size - header_size; |
57e50fb9 WB |
101 | let index_count = index_size / 40; |
102 | if index_count * 40 != index_size { | |
d48a9955 | 103 | bail!("got unexpected file size"); |
77703d95 DM |
104 | } |
105 | ||
57e50fb9 WB |
106 | let index = unsafe { |
107 | Mmap::map_fd( | |
108 | rawfd, | |
109 | header_size as u64, | |
110 | index_count, | |
f569acc5 WB |
111 | nix::sys::mman::ProtFlags::PROT_READ, |
112 | nix::sys::mman::MapFlags::MAP_PRIVATE, | |
57e50fb9 WB |
113 | )? |
114 | }; | |
77703d95 | 115 | |
77703d95 | 116 | Ok(Self { |
728797d0 | 117 | _file: file, |
77703d95 | 118 | size, |
57e50fb9 | 119 | index, |
77703d95 DM |
120 | ctime, |
121 | uuid: header.uuid, | |
16ff6b7c | 122 | index_csum: header.index_csum, |
77703d95 DM |
123 | }) |
124 | } | |
125 | ||
9fe2f639 | 126 | #[allow(clippy::cast_ptr_alignment)] |
c443f58b | 127 | pub fn chunk_info(&self, pos: usize) -> Result<ChunkReadInfo, Error> { |
57e50fb9 | 128 | if pos >= self.index.len() { |
40f4e198 DM |
129 | bail!("chunk index out of range"); |
130 | } | |
131 | let start = if pos == 0 { | |
132 | 0 | |
133 | } else { | |
7a6b5492 | 134 | self.index[pos - 1].end() |
40f4e198 DM |
135 | }; |
136 | ||
7a6b5492 | 137 | let end = self.index[pos].end(); |
5e58e1bb | 138 | |
c443f58b WB |
139 | Ok(ChunkReadInfo { |
140 | range: start..end, | |
57e50fb9 | 141 | digest: self.index[pos].digest.clone(), |
c443f58b | 142 | }) |
40f4e198 DM |
143 | } |
144 | ||
39c6bd86 | 145 | #[inline] |
9fe2f639 | 146 | #[allow(clippy::cast_ptr_alignment)] |
39c6bd86 | 147 | fn chunk_end(&self, pos: usize) -> u64 { |
57e50fb9 | 148 | if pos >= self.index.len() { |
39c6bd86 DM |
149 | panic!("chunk index out of range"); |
150 | } | |
7a6b5492 | 151 | self.index[pos].end() |
39c6bd86 DM |
152 | } |
153 | ||
154 | #[inline] | |
f98ac774 | 155 | fn chunk_digest(&self, pos: usize) -> &[u8; 32] { |
57e50fb9 | 156 | if pos >= self.index.len() { |
39c6bd86 DM |
157 | panic!("chunk index out of range"); |
158 | } | |
57e50fb9 | 159 | &self.index[pos].digest |
39c6bd86 DM |
160 | } |
161 | ||
82c85a21 DM |
162 | /// Compute checksum and data size |
163 | pub fn compute_csum(&self) -> ([u8; 32], u64) { | |
82c85a21 | 164 | let mut csum = openssl::sha::Sha256::new(); |
57e50fb9 | 165 | for entry in &self.index { |
7a6b5492 | 166 | csum.update(&entry.end_le.to_ne_bytes()); |
57e50fb9 | 167 | csum.update(&entry.digest); |
82c85a21 DM |
168 | } |
169 | let csum = csum.finish(); | |
170 | ||
57e50fb9 WB |
171 | ( |
172 | csum, | |
173 | self.index | |
174 | .last() | |
7a6b5492 WB |
175 | .map(|entry| entry.end()) |
176 | .unwrap_or(0) | |
177 | ) | |
96df2fb4 | 178 | } |
39c6bd86 | 179 | |
57e50fb9 | 180 | // TODO: can we use std::slice::binary_search with Mmap now? |
39c6bd86 DM |
181 | fn binary_search( |
182 | &self, | |
183 | start_idx: usize, | |
184 | start: u64, | |
185 | end_idx: usize, | |
186 | end: u64, | |
f569acc5 | 187 | offset: u64, |
39c6bd86 | 188 | ) -> Result<usize, Error> { |
39c6bd86 DM |
189 | if (offset >= end) || (offset < start) { |
190 | bail!("offset out of range"); | |
191 | } | |
192 | ||
193 | if end_idx == start_idx { | |
194 | return Ok(start_idx); // found | |
195 | } | |
f569acc5 | 196 | let middle_idx = (start_idx + end_idx) / 2; |
39c6bd86 DM |
197 | let middle_end = self.chunk_end(middle_idx); |
198 | ||
199 | if offset < middle_end { | |
62ee2eb4 | 200 | self.binary_search(start_idx, start, middle_idx, middle_end, offset) |
39c6bd86 | 201 | } else { |
62ee2eb4 | 202 | self.binary_search(middle_idx + 1, middle_end, end_idx, end, offset) |
39c6bd86 DM |
203 | } |
204 | } | |
205 | } | |
206 | ||
7bc1d727 WB |
207 | impl IndexFile for DynamicIndexReader { |
208 | fn index_count(&self) -> usize { | |
57e50fb9 | 209 | self.index.len() |
7bc1d727 WB |
210 | } |
211 | ||
212 | fn index_digest(&self, pos: usize) -> Option<&[u8; 32]> { | |
57e50fb9 | 213 | if pos >= self.index.len() { |
7bc1d727 WB |
214 | None |
215 | } else { | |
f569acc5 | 216 | Some(unsafe { std::mem::transmute(self.chunk_digest(pos).as_ptr()) }) |
7bc1d727 WB |
217 | } |
218 | } | |
a660978c DM |
219 | |
220 | fn index_bytes(&self) -> u64 { | |
57e50fb9 | 221 | if self.index.is_empty() { |
a660978c DM |
222 | 0 |
223 | } else { | |
57e50fb9 | 224 | self.chunk_end(self.index.len() - 1) |
a660978c DM |
225 | } |
226 | } | |
7bc1d727 WB |
227 | } |
228 | ||
c443f58b WB |
229 | struct CachedChunk { |
230 | range: Range<u64>, | |
231 | data: Vec<u8>, | |
232 | } | |
233 | ||
234 | impl CachedChunk { | |
235 | /// Perform sanity checks on the range and data size: | |
236 | pub fn new(range: Range<u64>, data: Vec<u8>) -> Result<Self, Error> { | |
237 | if data.len() as u64 != range.end - range.start { | |
238 | bail!( | |
239 | "read chunk with wrong size ({} != {})", | |
240 | data.len(), | |
241 | range.end - range.start, | |
242 | ); | |
243 | } | |
244 | Ok(Self { range, data }) | |
245 | } | |
246 | } | |
247 | ||
d48a9955 DM |
248 | pub struct BufferedDynamicReader<S> { |
249 | store: S, | |
93d5d779 | 250 | index: DynamicIndexReader, |
39c6bd86 DM |
251 | archive_size: u64, |
252 | read_buffer: Vec<u8>, | |
253 | buffered_chunk_idx: usize, | |
254 | buffered_chunk_start: u64, | |
255 | read_offset: u64, | |
c443f58b | 256 | lru_cache: crate::tools::lru_cache::LruCache<usize, CachedChunk>, |
536683e7 CE |
257 | } |
258 | ||
259 | struct ChunkCacher<'a, S> { | |
260 | store: &'a mut S, | |
261 | index: &'a DynamicIndexReader, | |
262 | } | |
263 | ||
c443f58b WB |
264 | impl<'a, S: ReadChunk> crate::tools::lru_cache::Cacher<usize, CachedChunk> for ChunkCacher<'a, S> { |
265 | fn fetch(&mut self, index: usize) -> Result<Option<CachedChunk>, Error> { | |
266 | let info = self.index.chunk_info(index)?; | |
267 | let range = info.range; | |
268 | let data = self.store.read_chunk(&info.digest)?; | |
269 | CachedChunk::new(range, data).map(Some) | |
536683e7 | 270 | } |
77703d95 DM |
271 | } |
272 | ||
f569acc5 | 273 | impl<S: ReadChunk> BufferedDynamicReader<S> { |
d48a9955 | 274 | pub fn new(index: DynamicIndexReader, store: S) -> Self { |
57e50fb9 | 275 | let archive_size = index.index_bytes(); |
39c6bd86 | 276 | Self { |
d48a9955 | 277 | store, |
653b1ca1 WB |
278 | index, |
279 | archive_size, | |
f569acc5 | 280 | read_buffer: Vec::with_capacity(1024 * 1024), |
39c6bd86 DM |
281 | buffered_chunk_idx: 0, |
282 | buffered_chunk_start: 0, | |
283 | read_offset: 0, | |
536683e7 | 284 | lru_cache: crate::tools::lru_cache::LruCache::new(32), |
39c6bd86 DM |
285 | } |
286 | } | |
287 | ||
f569acc5 WB |
288 | pub fn archive_size(&self) -> u64 { |
289 | self.archive_size | |
290 | } | |
39c6bd86 | 291 | |
0a72e267 | 292 | fn buffer_chunk(&mut self, idx: usize) -> Result<(), Error> { |
c443f58b WB |
293 | //let (start, end, data) = self.lru_cache.access( |
294 | let cached_chunk = self.lru_cache.access( | |
536683e7 CE |
295 | idx, |
296 | &mut ChunkCacher { | |
297 | store: &mut self.store, | |
298 | index: &self.index, | |
299 | }, | |
300 | )?.ok_or_else(|| format_err!("chunk not found by cacher"))?; | |
301 | ||
536683e7 | 302 | // fixme: avoid copy |
f98ac774 | 303 | self.read_buffer.clear(); |
c443f58b | 304 | self.read_buffer.extend_from_slice(&cached_chunk.data); |
0a72e267 DM |
305 | |
306 | self.buffered_chunk_idx = idx; | |
d48a9955 | 307 | |
c443f58b | 308 | self.buffered_chunk_start = cached_chunk.range.start; |
0a72e267 DM |
309 | //println!("BUFFER {} {}", self.buffered_chunk_start, end); |
310 | Ok(()) | |
311 | } | |
312 | } | |
313 | ||
f569acc5 | 314 | impl<S: ReadChunk> crate::tools::BufferedRead for BufferedDynamicReader<S> { |
0a72e267 | 315 | fn buffered_read(&mut self, offset: u64) -> Result<&[u8], Error> { |
f569acc5 WB |
316 | if offset == self.archive_size { |
317 | return Ok(&self.read_buffer[0..0]); | |
318 | } | |
318564ac | 319 | |
39c6bd86 | 320 | let buffer_len = self.read_buffer.len(); |
0b05fd58 | 321 | let index = &self.index; |
39c6bd86 DM |
322 | |
323 | // optimization for sequential read | |
f569acc5 | 324 | if buffer_len > 0 |
57e50fb9 | 325 | && ((self.buffered_chunk_idx + 1) < index.index.len()) |
f569acc5 | 326 | && (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) |
39c6bd86 DM |
327 | { |
328 | let next_idx = self.buffered_chunk_idx + 1; | |
329 | let next_end = index.chunk_end(next_idx); | |
330 | if offset < next_end { | |
373ef4a5 | 331 | self.buffer_chunk(next_idx)?; |
39c6bd86 DM |
332 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; |
333 | return Ok(&self.read_buffer[buffer_offset..]); | |
334 | } | |
335 | } | |
336 | ||
f569acc5 WB |
337 | if (buffer_len == 0) |
338 | || (offset < self.buffered_chunk_start) | |
339 | || (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) | |
39c6bd86 | 340 | { |
57e50fb9 | 341 | let end_idx = index.index.len() - 1; |
39c6bd86 DM |
342 | let end = index.chunk_end(end_idx); |
343 | let idx = index.binary_search(0, 0, end_idx, end, offset)?; | |
373ef4a5 | 344 | self.buffer_chunk(idx)?; |
f569acc5 | 345 | } |
39c6bd86 DM |
346 | |
347 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; | |
348 | Ok(&self.read_buffer[buffer_offset..]) | |
349 | } | |
39c6bd86 | 350 | } |
77703d95 | 351 | |
f569acc5 | 352 | impl<S: ReadChunk> std::io::Read for BufferedDynamicReader<S> { |
4624fe29 | 353 | fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> { |
fded74d0 | 354 | use crate::tools::BufferedRead; |
f569acc5 | 355 | use std::io::{Error, ErrorKind}; |
4624fe29 DM |
356 | |
357 | let data = match self.buffered_read(self.read_offset) { | |
358 | Ok(v) => v, | |
359 | Err(err) => return Err(Error::new(ErrorKind::Other, err.to_string())), | |
360 | }; | |
361 | ||
f569acc5 WB |
362 | let n = if data.len() > buf.len() { |
363 | buf.len() | |
364 | } else { | |
365 | data.len() | |
366 | }; | |
4624fe29 | 367 | |
bde8e243 | 368 | buf[0..n].copy_from_slice(&data[0..n]); |
4624fe29 DM |
369 | |
370 | self.read_offset += n as u64; | |
371 | ||
62ee2eb4 | 372 | Ok(n) |
4624fe29 DM |
373 | } |
374 | } | |
375 | ||
f569acc5 | 376 | impl<S: ReadChunk> std::io::Seek for BufferedDynamicReader<S> { |
d48a9955 | 377 | fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> { |
34337050 | 378 | let new_offset = match pos { |
f569acc5 WB |
379 | SeekFrom::Start(start_offset) => start_offset as i64, |
380 | SeekFrom::End(end_offset) => (self.archive_size as i64) + end_offset, | |
34337050 DM |
381 | SeekFrom::Current(offset) => (self.read_offset as i64) + offset, |
382 | }; | |
383 | ||
ddbdf80d | 384 | use std::io::{Error, ErrorKind}; |
34337050 DM |
385 | if (new_offset < 0) || (new_offset > (self.archive_size as i64)) { |
386 | return Err(Error::new( | |
387 | ErrorKind::Other, | |
f569acc5 WB |
388 | format!( |
389 | "seek is out of range {} ([0..{}])", | |
390 | new_offset, self.archive_size | |
391 | ), | |
392 | )); | |
34337050 DM |
393 | } |
394 | self.read_offset = new_offset as u64; | |
395 | ||
396 | Ok(self.read_offset) | |
397 | } | |
398 | } | |
399 | ||
4cf0ced9 DC |
400 | /// This is a workaround until we have cleaned up the chunk/reader/... infrastructure for better |
401 | /// async use! | |
402 | /// | |
403 | /// Ideally BufferedDynamicReader gets replaced so the LruCache maps to `BroadcastFuture<Chunk>`, | |
404 | /// so that we can properly access it from multiple threads simultaneously while not issuing | |
405 | /// duplicate simultaneous reads over http. | |
406 | #[derive(Clone)] | |
407 | pub struct LocalDynamicReadAt<R: ReadChunk> { | |
408 | inner: Arc<Mutex<BufferedDynamicReader<R>>>, | |
409 | } | |
410 | ||
411 | impl<R: ReadChunk> LocalDynamicReadAt<R> { | |
412 | pub fn new(inner: BufferedDynamicReader<R>) -> Self { | |
413 | Self { | |
414 | inner: Arc::new(Mutex::new(inner)), | |
415 | } | |
416 | } | |
417 | } | |
418 | ||
419 | impl<R: ReadChunk> pxar::accessor::ReadAt for LocalDynamicReadAt<R> { | |
420 | fn poll_read_at( | |
421 | self: Pin<&Self>, | |
422 | _cx: &mut Context, | |
423 | buf: &mut [u8], | |
424 | offset: u64, | |
425 | ) -> Poll<io::Result<usize>> { | |
426 | use std::io::Read; | |
427 | tokio::task::block_in_place(move || { | |
428 | let mut reader = self.inner.lock().unwrap(); | |
429 | reader.seek(SeekFrom::Start(offset))?; | |
430 | Poll::Ready(Ok(reader.read(buf)?)) | |
431 | }) | |
432 | } | |
433 | } | |
434 | ||
435 | ||
976595e1 | 436 | /// Create dynamic index files (`.dixd`) |
93d5d779 | 437 | pub struct DynamicIndexWriter { |
1629d2ad | 438 | store: Arc<ChunkStore>, |
43b13033 | 439 | _lock: tools::ProcessLockSharedGuard, |
5032b57b | 440 | writer: BufWriter<File>, |
5e7a09be | 441 | closed: bool, |
0433db19 DM |
442 | filename: PathBuf, |
443 | tmp_filename: PathBuf, | |
16ff6b7c | 444 | csum: Option<openssl::sha::Sha256>, |
9f49fe1d DM |
445 | pub uuid: [u8; 16], |
446 | pub ctime: u64, | |
0433db19 DM |
447 | } |
448 | ||
93d5d779 | 449 | impl Drop for DynamicIndexWriter { |
1629d2ad DM |
450 | fn drop(&mut self) { |
451 | let _ = std::fs::remove_file(&self.tmp_filename); // ignore errors | |
452 | } | |
453 | } | |
454 | ||
93d5d779 | 455 | impl DynamicIndexWriter { |
976595e1 | 456 | pub fn create(store: Arc<ChunkStore>, path: &Path) -> Result<Self, Error> { |
43b13033 DM |
457 | let shared_lock = store.try_shared_lock()?; |
458 | ||
0433db19 DM |
459 | let full_path = store.relative_path(path); |
460 | let mut tmp_path = full_path.clone(); | |
93d5d779 | 461 | tmp_path.set_extension("tmp_didx"); |
0433db19 | 462 | |
ddbdf80d | 463 | let file = std::fs::OpenOptions::new() |
f569acc5 WB |
464 | .create(true) |
465 | .truncate(true) | |
0433db19 DM |
466 | .read(true) |
467 | .write(true) | |
468 | .open(&tmp_path)?; | |
469 | ||
f569acc5 | 470 | let mut writer = BufWriter::with_capacity(1024 * 1024, file); |
5032b57b | 471 | |
93d5d779 | 472 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
0433db19 DM |
473 | |
474 | // todo: use static assertion when available in rust | |
f569acc5 WB |
475 | if header_size != 4096 { |
476 | panic!("got unexpected header size"); | |
477 | } | |
0433db19 | 478 | |
e693818a | 479 | let ctime = epoch_now_u64()?; |
0433db19 | 480 | |
f569acc5 | 481 | let uuid = Uuid::generate(); |
0433db19 | 482 | |
d21f8a5b | 483 | let mut buffer = vec::zeroed(header_size); |
93d5d779 | 484 | let header = crate::tools::map_struct_mut::<DynamicIndexHeader>(&mut buffer)?; |
0433db19 | 485 | |
a7dd4830 | 486 | header.magic = super::DYNAMIC_SIZED_CHUNK_INDEX_1_0; |
0433db19 DM |
487 | header.ctime = u64::to_le(ctime); |
488 | header.uuid = *uuid.as_bytes(); | |
489 | ||
16ff6b7c DM |
490 | header.index_csum = [0u8; 32]; |
491 | ||
5032b57b | 492 | writer.write_all(&buffer)?; |
0433db19 | 493 | |
16ff6b7c DM |
494 | let csum = Some(openssl::sha::Sha256::new()); |
495 | ||
0433db19 DM |
496 | Ok(Self { |
497 | store, | |
43b13033 | 498 | _lock: shared_lock, |
653b1ca1 | 499 | writer, |
5e7a09be | 500 | closed: false, |
0433db19 DM |
501 | filename: full_path, |
502 | tmp_filename: tmp_path, | |
503 | ctime, | |
504 | uuid: *uuid.as_bytes(), | |
16ff6b7c | 505 | csum, |
0433db19 DM |
506 | }) |
507 | } | |
5e7a09be | 508 | |
f98ac774 | 509 | // fixme: use add_chunk instead? |
4ee8f53d DM |
510 | pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> { |
511 | self.store.insert_chunk(chunk, digest) | |
976595e1 DM |
512 | } |
513 | ||
f569acc5 | 514 | pub fn close(&mut self) -> Result<[u8; 32], Error> { |
5e7a09be | 515 | if self.closed { |
f569acc5 WB |
516 | bail!( |
517 | "cannot close already closed archive index file {:?}", | |
518 | self.filename | |
519 | ); | |
5e7a09be DM |
520 | } |
521 | ||
522 | self.closed = true; | |
523 | ||
5032b57b | 524 | self.writer.flush()?; |
5e7a09be | 525 | |
9ea4bce4 | 526 | let csum_offset = proxmox::offsetof!(DynamicIndexHeader, index_csum); |
d48a9955 | 527 | self.writer.seek(SeekFrom::Start(csum_offset as u64))?; |
16ff6b7c DM |
528 | |
529 | let csum = self.csum.take().unwrap(); | |
530 | let index_csum = csum.finish(); | |
531 | ||
532 | self.writer.write_all(&index_csum)?; | |
533 | self.writer.flush()?; | |
534 | ||
5e7a09be DM |
535 | if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { |
536 | bail!("Atomic rename file {:?} failed - {}", self.filename, err); | |
537 | } | |
538 | ||
16ff6b7c | 539 | Ok(index_csum) |
5e7a09be DM |
540 | } |
541 | ||
976595e1 DM |
542 | // fixme: rename to add_digest |
543 | pub fn add_chunk(&mut self, offset: u64, digest: &[u8; 32]) -> Result<(), Error> { | |
544 | if self.closed { | |
f569acc5 WB |
545 | bail!( |
546 | "cannot write to closed dynamic index file {:?}", | |
547 | self.filename | |
548 | ); | |
976595e1 | 549 | } |
16ff6b7c | 550 | |
f569acc5 | 551 | let offset_le: &[u8; 8] = unsafe { &std::mem::transmute::<u64, [u8; 8]>(offset.to_le()) }; |
16ff6b7c DM |
552 | |
553 | if let Some(ref mut csum) = self.csum { | |
554 | csum.update(offset_le); | |
555 | csum.update(digest); | |
556 | } | |
557 | ||
cd69d36b DM |
558 | self.writer.write_all(offset_le)?; |
559 | self.writer.write_all(digest)?; | |
976595e1 DM |
560 | Ok(()) |
561 | } | |
562 | } | |
563 | ||
564 | /// Writer which splits a binary stream into dynamic sized chunks | |
565 | /// | |
566 | /// And store the resulting chunk list into the index file. | |
567 | pub struct DynamicChunkWriter { | |
568 | index: DynamicIndexWriter, | |
569 | closed: bool, | |
570 | chunker: Chunker, | |
571 | stat: ChunkStat, | |
572 | chunk_offset: usize, | |
573 | last_chunk: usize, | |
574 | chunk_buffer: Vec<u8>, | |
575 | } | |
576 | ||
577 | impl DynamicChunkWriter { | |
976595e1 DM |
578 | pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self { |
579 | Self { | |
580 | index, | |
581 | closed: false, | |
582 | chunker: Chunker::new(chunk_size), | |
583 | stat: ChunkStat::new(0), | |
584 | chunk_offset: 0, | |
585 | last_chunk: 0, | |
f569acc5 | 586 | chunk_buffer: Vec::with_capacity(chunk_size * 4), |
976595e1 DM |
587 | } |
588 | } | |
589 | ||
7e336555 DM |
590 | pub fn stat(&self) -> &ChunkStat { |
591 | &self.stat | |
592 | } | |
593 | ||
f569acc5 | 594 | pub fn close(&mut self) -> Result<(), Error> { |
976595e1 DM |
595 | if self.closed { |
596 | return Ok(()); | |
597 | } | |
598 | ||
599 | self.closed = true; | |
600 | ||
601 | self.write_chunk_buffer()?; | |
602 | ||
603 | self.index.close()?; | |
604 | ||
605 | self.stat.size = self.chunk_offset as u64; | |
606 | ||
607 | // add size of index file | |
f569acc5 WB |
608 | self.stat.size += |
609 | (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64; | |
976595e1 DM |
610 | |
611 | Ok(()) | |
612 | } | |
613 | ||
614 | fn write_chunk_buffer(&mut self) -> Result<(), Error> { | |
5e7a09be DM |
615 | let chunk_size = self.chunk_buffer.len(); |
616 | ||
f569acc5 WB |
617 | if chunk_size == 0 { |
618 | return Ok(()); | |
619 | } | |
5e7a09be DM |
620 | |
621 | let expected_chunk_size = self.chunk_offset - self.last_chunk; | |
622 | if expected_chunk_size != self.chunk_buffer.len() { | |
976595e1 | 623 | bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size); |
5e7a09be DM |
624 | } |
625 | ||
7e336555 | 626 | self.stat.chunk_count += 1; |
247cdbce | 627 | |
5e7a09be DM |
628 | self.last_chunk = self.chunk_offset; |
629 | ||
4ee8f53d | 630 | let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer) |
f98ac774 DM |
631 | .compress(true) |
632 | .build()?; | |
633 | ||
4ee8f53d | 634 | match self.index.insert_chunk(&chunk, &digest) { |
f98ac774 | 635 | Ok((is_duplicate, compressed_size)) => { |
7e336555 | 636 | self.stat.compressed_size += compressed_size; |
798f7fa0 | 637 | if is_duplicate { |
7e336555 | 638 | self.stat.duplicate_chunks += 1; |
798f7fa0 | 639 | } else { |
7e336555 | 640 | self.stat.disk_size += compressed_size; |
798f7fa0 DM |
641 | } |
642 | ||
f569acc5 WB |
643 | println!( |
644 | "ADD CHUNK {:016x} {} {}% {} {}", | |
645 | self.chunk_offset, | |
646 | chunk_size, | |
647 | (compressed_size * 100) / (chunk_size as u64), | |
648 | is_duplicate, | |
649 | proxmox::tools::digest_to_hex(&digest) | |
650 | ); | |
976595e1 | 651 | self.index.add_chunk(self.chunk_offset as u64, &digest)?; |
5e7a09be | 652 | self.chunk_buffer.truncate(0); |
62ee2eb4 | 653 | Ok(()) |
5e7a09be DM |
654 | } |
655 | Err(err) => { | |
656 | self.chunk_buffer.truncate(0); | |
62ee2eb4 | 657 | Err(err) |
5e7a09be DM |
658 | } |
659 | } | |
5e7a09be | 660 | } |
0433db19 DM |
661 | } |
662 | ||
976595e1 | 663 | impl Write for DynamicChunkWriter { |
0433db19 | 664 | fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> { |
0433db19 DM |
665 | let chunker = &mut self.chunker; |
666 | ||
667 | let pos = chunker.scan(data); | |
668 | ||
669 | if pos > 0 { | |
4ee8f53d | 670 | self.chunk_buffer.extend_from_slice(&data[0..pos]); |
0433db19 DM |
671 | self.chunk_offset += pos; |
672 | ||
976595e1 | 673 | if let Err(err) = self.write_chunk_buffer() { |
f569acc5 WB |
674 | return Err(std::io::Error::new( |
675 | std::io::ErrorKind::Other, | |
676 | err.to_string(), | |
677 | )); | |
976595e1 | 678 | } |
5e7a09be | 679 | Ok(pos) |
0433db19 DM |
680 | } else { |
681 | self.chunk_offset += data.len(); | |
4ee8f53d | 682 | self.chunk_buffer.extend_from_slice(data); |
5e7a09be | 683 | Ok(data.len()) |
0433db19 DM |
684 | } |
685 | } | |
686 | ||
687 | fn flush(&mut self) -> std::result::Result<(), std::io::Error> { | |
f569acc5 WB |
688 | Err(std::io::Error::new( |
689 | std::io::ErrorKind::Other, | |
690 | "please use close() instead of flush()", | |
691 | )) | |
0433db19 DM |
692 | } |
693 | } |