]>
Commit | Line | Data |
---|---|---|
0433db19 | 1 | use std::fs::File; |
4cf0ced9 | 2 | use std::io::{self, BufWriter, Seek, SeekFrom, Write}; |
c443f58b | 3 | use std::ops::Range; |
0433db19 | 4 | use std::os::unix::io::AsRawFd; |
367f002e | 5 | use std::path::{Path, PathBuf}; |
4cf0ced9 | 6 | use std::sync::{Arc, Mutex}; |
a6f87283 | 7 | use std::task::Context; |
4cf0ced9 | 8 | use std::pin::Pin; |
367f002e | 9 | |
f7d4e4b5 | 10 | use anyhow::{bail, format_err, Error}; |
0433db19 | 11 | |
9b2b627f | 12 | use proxmox::tools::io::ReadExt; |
f569acc5 | 13 | use proxmox::tools::uuid::Uuid; |
f35197f4 | 14 | use proxmox::tools::vec; |
57e50fb9 | 15 | use proxmox::tools::mmap::Mmap; |
a6f87283 | 16 | use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation}; |
8ea3b1d1 | 17 | |
367f002e WB |
18 | use super::chunk_stat::ChunkStat; |
19 | use super::chunk_store::ChunkStore; | |
c443f58b | 20 | use super::index::ChunkReadInfo; |
367f002e | 21 | use super::read_chunk::ReadChunk; |
f569acc5 WB |
22 | use super::Chunker; |
23 | use super::IndexFile; | |
4ee8f53d | 24 | use super::{DataBlob, DataChunkBuilder}; |
e693818a | 25 | use crate::tools::{self, epoch_now_u64}; |
f98ac774 | 26 | |
e5064ba6 | 27 | /// Header format definition for dynamic index files (`.dixd`) |
0433db19 | 28 | #[repr(C)] |
93d5d779 | 29 | pub struct DynamicIndexHeader { |
a7dd4830 | 30 | pub magic: [u8; 8], |
0433db19 DM |
31 | pub uuid: [u8; 16], |
32 | pub ctime: u64, | |
16ff6b7c DM |
33 | /// Sha256 over the index ``SHA256(offset1||digest1||offset2||digest2||...)`` |
34 | pub index_csum: [u8; 32], | |
990b930f | 35 | reserved: [u8; 4032], // overall size is one page (4096 bytes) |
0433db19 | 36 | } |
9ea4bce4 | 37 | proxmox::static_assert_size!(DynamicIndexHeader, 4096); |
990b930f WB |
38 | // TODO: Once non-Copy unions are stabilized, use: |
39 | // union DynamicIndexHeader { | |
40 | // reserved: [u8; 4096], | |
41 | // pub data: DynamicIndexHeaderData, | |
42 | // } | |
77703d95 | 43 | |
57e50fb9 WB |
44 | #[derive(Clone, Debug)] |
45 | #[repr(C)] | |
46 | pub struct DynamicEntry { | |
7a6b5492 | 47 | end_le: u64, |
57e50fb9 WB |
48 | digest: [u8; 32], |
49 | } | |
50 | ||
7a6b5492 WB |
51 | impl DynamicEntry { |
52 | #[inline] | |
53 | pub fn end(&self) -> u64 { | |
54 | u64::from_le(self.end_le) | |
55 | } | |
56 | } | |
57 | ||
93d5d779 | 58 | pub struct DynamicIndexReader { |
728797d0 | 59 | _file: File, |
9f49fe1d | 60 | pub size: usize, |
57e50fb9 | 61 | index: Mmap<DynamicEntry>, |
9f49fe1d DM |
62 | pub uuid: [u8; 16], |
63 | pub ctime: u64, | |
16ff6b7c | 64 | pub index_csum: [u8; 32], |
77703d95 DM |
65 | } |
66 | ||
93d5d779 | 67 | impl DynamicIndexReader { |
d48a9955 | 68 | pub fn open(path: &Path) -> Result<Self, Error> { |
0f0a35b3 DM |
69 | File::open(path) |
70 | .map_err(Error::from) | |
57e50fb9 | 71 | .and_then(Self::new) |
0f0a35b3 | 72 | .map_err(|err| format_err!("Unable to open dynamic index {:?} - {}", path, err)) |
d48a9955 DM |
73 | } |
74 | ||
75 | pub fn new(mut file: std::fs::File) -> Result<Self, Error> { | |
f569acc5 WB |
76 | if let Err(err) = |
77 | nix::fcntl::flock(file.as_raw_fd(), nix::fcntl::FlockArg::LockSharedNonblock) | |
78 | { | |
0f0a35b3 DM |
79 | bail!("unable to get shared lock - {}", err); |
80 | } | |
81 | ||
57e50fb9 | 82 | // FIXME: This is NOT OUR job! Check the callers of this method and remove this! |
d48a9955 DM |
83 | file.seek(SeekFrom::Start(0))?; |
84 | ||
93d5d779 | 85 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
77703d95 | 86 | |
ba5e6747 | 87 | let header: Box<DynamicIndexHeader> = unsafe { file.read_host_value_boxed()? }; |
77703d95 | 88 | |
a7dd4830 | 89 | if header.magic != super::DYNAMIC_SIZED_CHUNK_INDEX_1_0 { |
d48a9955 | 90 | bail!("got unknown magic number"); |
77703d95 DM |
91 | } |
92 | ||
77703d95 DM |
93 | let ctime = u64::from_le(header.ctime); |
94 | ||
95 | let rawfd = file.as_raw_fd(); | |
96 | ||
d48a9955 | 97 | let stat = nix::sys::stat::fstat(rawfd)?; |
77703d95 DM |
98 | |
99 | let size = stat.st_size as usize; | |
100 | ||
ddbdf80d | 101 | let index_size = size - header_size; |
57e50fb9 WB |
102 | let index_count = index_size / 40; |
103 | if index_count * 40 != index_size { | |
d48a9955 | 104 | bail!("got unexpected file size"); |
77703d95 DM |
105 | } |
106 | ||
57e50fb9 WB |
107 | let index = unsafe { |
108 | Mmap::map_fd( | |
109 | rawfd, | |
110 | header_size as u64, | |
111 | index_count, | |
f569acc5 WB |
112 | nix::sys::mman::ProtFlags::PROT_READ, |
113 | nix::sys::mman::MapFlags::MAP_PRIVATE, | |
57e50fb9 WB |
114 | )? |
115 | }; | |
77703d95 | 116 | |
77703d95 | 117 | Ok(Self { |
728797d0 | 118 | _file: file, |
77703d95 | 119 | size, |
57e50fb9 | 120 | index, |
77703d95 DM |
121 | ctime, |
122 | uuid: header.uuid, | |
16ff6b7c | 123 | index_csum: header.index_csum, |
77703d95 DM |
124 | }) |
125 | } | |
126 | ||
39c6bd86 | 127 | #[inline] |
9fe2f639 | 128 | #[allow(clippy::cast_ptr_alignment)] |
39c6bd86 | 129 | fn chunk_end(&self, pos: usize) -> u64 { |
57e50fb9 | 130 | if pos >= self.index.len() { |
39c6bd86 DM |
131 | panic!("chunk index out of range"); |
132 | } | |
7a6b5492 | 133 | self.index[pos].end() |
39c6bd86 DM |
134 | } |
135 | ||
136 | #[inline] | |
f98ac774 | 137 | fn chunk_digest(&self, pos: usize) -> &[u8; 32] { |
57e50fb9 | 138 | if pos >= self.index.len() { |
39c6bd86 DM |
139 | panic!("chunk index out of range"); |
140 | } | |
57e50fb9 | 141 | &self.index[pos].digest |
39c6bd86 DM |
142 | } |
143 | ||
82c85a21 DM |
144 | /// Compute checksum and data size |
145 | pub fn compute_csum(&self) -> ([u8; 32], u64) { | |
82c85a21 | 146 | let mut csum = openssl::sha::Sha256::new(); |
57e50fb9 | 147 | for entry in &self.index { |
7a6b5492 | 148 | csum.update(&entry.end_le.to_ne_bytes()); |
57e50fb9 | 149 | csum.update(&entry.digest); |
82c85a21 DM |
150 | } |
151 | let csum = csum.finish(); | |
152 | ||
57e50fb9 WB |
153 | ( |
154 | csum, | |
155 | self.index | |
156 | .last() | |
7a6b5492 WB |
157 | .map(|entry| entry.end()) |
158 | .unwrap_or(0) | |
159 | ) | |
96df2fb4 | 160 | } |
39c6bd86 | 161 | |
57e50fb9 | 162 | // TODO: can we use std::slice::binary_search with Mmap now? |
39c6bd86 DM |
163 | fn binary_search( |
164 | &self, | |
165 | start_idx: usize, | |
166 | start: u64, | |
167 | end_idx: usize, | |
168 | end: u64, | |
f569acc5 | 169 | offset: u64, |
39c6bd86 | 170 | ) -> Result<usize, Error> { |
39c6bd86 DM |
171 | if (offset >= end) || (offset < start) { |
172 | bail!("offset out of range"); | |
173 | } | |
174 | ||
175 | if end_idx == start_idx { | |
176 | return Ok(start_idx); // found | |
177 | } | |
f569acc5 | 178 | let middle_idx = (start_idx + end_idx) / 2; |
39c6bd86 DM |
179 | let middle_end = self.chunk_end(middle_idx); |
180 | ||
181 | if offset < middle_end { | |
62ee2eb4 | 182 | self.binary_search(start_idx, start, middle_idx, middle_end, offset) |
39c6bd86 | 183 | } else { |
62ee2eb4 | 184 | self.binary_search(middle_idx + 1, middle_end, end_idx, end, offset) |
39c6bd86 DM |
185 | } |
186 | } | |
187 | } | |
188 | ||
7bc1d727 WB |
189 | impl IndexFile for DynamicIndexReader { |
190 | fn index_count(&self) -> usize { | |
57e50fb9 | 191 | self.index.len() |
7bc1d727 WB |
192 | } |
193 | ||
194 | fn index_digest(&self, pos: usize) -> Option<&[u8; 32]> { | |
57e50fb9 | 195 | if pos >= self.index.len() { |
7bc1d727 WB |
196 | None |
197 | } else { | |
f569acc5 | 198 | Some(unsafe { std::mem::transmute(self.chunk_digest(pos).as_ptr()) }) |
7bc1d727 WB |
199 | } |
200 | } | |
a660978c DM |
201 | |
202 | fn index_bytes(&self) -> u64 { | |
57e50fb9 | 203 | if self.index.is_empty() { |
a660978c DM |
204 | 0 |
205 | } else { | |
57e50fb9 | 206 | self.chunk_end(self.index.len() - 1) |
a660978c DM |
207 | } |
208 | } | |
fdaab0df DM |
209 | |
210 | #[allow(clippy::cast_ptr_alignment)] | |
211 | fn chunk_info(&self, pos: usize) -> Option<ChunkReadInfo> { | |
212 | if pos >= self.index.len() { | |
213 | return None; | |
214 | } | |
215 | let start = if pos == 0 { 0 } else { self.index[pos - 1].end() }; | |
216 | ||
217 | let end = self.index[pos].end(); | |
218 | ||
219 | Some(ChunkReadInfo { | |
220 | range: start..end, | |
221 | digest: self.index[pos].digest.clone(), | |
222 | }) | |
223 | } | |
7bc1d727 WB |
224 | } |
225 | ||
c443f58b WB |
226 | struct CachedChunk { |
227 | range: Range<u64>, | |
228 | data: Vec<u8>, | |
229 | } | |
230 | ||
231 | impl CachedChunk { | |
232 | /// Perform sanity checks on the range and data size: | |
233 | pub fn new(range: Range<u64>, data: Vec<u8>) -> Result<Self, Error> { | |
234 | if data.len() as u64 != range.end - range.start { | |
235 | bail!( | |
236 | "read chunk with wrong size ({} != {})", | |
237 | data.len(), | |
238 | range.end - range.start, | |
239 | ); | |
240 | } | |
241 | Ok(Self { range, data }) | |
242 | } | |
243 | } | |
244 | ||
d48a9955 DM |
245 | pub struct BufferedDynamicReader<S> { |
246 | store: S, | |
93d5d779 | 247 | index: DynamicIndexReader, |
39c6bd86 DM |
248 | archive_size: u64, |
249 | read_buffer: Vec<u8>, | |
250 | buffered_chunk_idx: usize, | |
251 | buffered_chunk_start: u64, | |
252 | read_offset: u64, | |
c443f58b | 253 | lru_cache: crate::tools::lru_cache::LruCache<usize, CachedChunk>, |
536683e7 CE |
254 | } |
255 | ||
256 | struct ChunkCacher<'a, S> { | |
257 | store: &'a mut S, | |
258 | index: &'a DynamicIndexReader, | |
259 | } | |
260 | ||
c443f58b WB |
261 | impl<'a, S: ReadChunk> crate::tools::lru_cache::Cacher<usize, CachedChunk> for ChunkCacher<'a, S> { |
262 | fn fetch(&mut self, index: usize) -> Result<Option<CachedChunk>, Error> { | |
fdaab0df DM |
263 | let info = match self.index.chunk_info(index) { |
264 | Some(info) => info, | |
265 | None => bail!("chunk index out of range"), | |
266 | }; | |
c443f58b WB |
267 | let range = info.range; |
268 | let data = self.store.read_chunk(&info.digest)?; | |
269 | CachedChunk::new(range, data).map(Some) | |
536683e7 | 270 | } |
77703d95 DM |
271 | } |
272 | ||
f569acc5 | 273 | impl<S: ReadChunk> BufferedDynamicReader<S> { |
d48a9955 | 274 | pub fn new(index: DynamicIndexReader, store: S) -> Self { |
57e50fb9 | 275 | let archive_size = index.index_bytes(); |
39c6bd86 | 276 | Self { |
d48a9955 | 277 | store, |
653b1ca1 WB |
278 | index, |
279 | archive_size, | |
f569acc5 | 280 | read_buffer: Vec::with_capacity(1024 * 1024), |
39c6bd86 DM |
281 | buffered_chunk_idx: 0, |
282 | buffered_chunk_start: 0, | |
283 | read_offset: 0, | |
536683e7 | 284 | lru_cache: crate::tools::lru_cache::LruCache::new(32), |
39c6bd86 DM |
285 | } |
286 | } | |
287 | ||
f569acc5 WB |
288 | pub fn archive_size(&self) -> u64 { |
289 | self.archive_size | |
290 | } | |
39c6bd86 | 291 | |
0a72e267 | 292 | fn buffer_chunk(&mut self, idx: usize) -> Result<(), Error> { |
c443f58b WB |
293 | //let (start, end, data) = self.lru_cache.access( |
294 | let cached_chunk = self.lru_cache.access( | |
536683e7 CE |
295 | idx, |
296 | &mut ChunkCacher { | |
297 | store: &mut self.store, | |
298 | index: &self.index, | |
299 | }, | |
300 | )?.ok_or_else(|| format_err!("chunk not found by cacher"))?; | |
301 | ||
536683e7 | 302 | // fixme: avoid copy |
f98ac774 | 303 | self.read_buffer.clear(); |
c443f58b | 304 | self.read_buffer.extend_from_slice(&cached_chunk.data); |
0a72e267 DM |
305 | |
306 | self.buffered_chunk_idx = idx; | |
d48a9955 | 307 | |
c443f58b | 308 | self.buffered_chunk_start = cached_chunk.range.start; |
0a72e267 DM |
309 | //println!("BUFFER {} {}", self.buffered_chunk_start, end); |
310 | Ok(()) | |
311 | } | |
312 | } | |
313 | ||
f569acc5 | 314 | impl<S: ReadChunk> crate::tools::BufferedRead for BufferedDynamicReader<S> { |
0a72e267 | 315 | fn buffered_read(&mut self, offset: u64) -> Result<&[u8], Error> { |
f569acc5 WB |
316 | if offset == self.archive_size { |
317 | return Ok(&self.read_buffer[0..0]); | |
318 | } | |
318564ac | 319 | |
39c6bd86 | 320 | let buffer_len = self.read_buffer.len(); |
0b05fd58 | 321 | let index = &self.index; |
39c6bd86 DM |
322 | |
323 | // optimization for sequential read | |
f569acc5 | 324 | if buffer_len > 0 |
57e50fb9 | 325 | && ((self.buffered_chunk_idx + 1) < index.index.len()) |
f569acc5 | 326 | && (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) |
39c6bd86 DM |
327 | { |
328 | let next_idx = self.buffered_chunk_idx + 1; | |
329 | let next_end = index.chunk_end(next_idx); | |
330 | if offset < next_end { | |
373ef4a5 | 331 | self.buffer_chunk(next_idx)?; |
39c6bd86 DM |
332 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; |
333 | return Ok(&self.read_buffer[buffer_offset..]); | |
334 | } | |
335 | } | |
336 | ||
f569acc5 WB |
337 | if (buffer_len == 0) |
338 | || (offset < self.buffered_chunk_start) | |
339 | || (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) | |
39c6bd86 | 340 | { |
57e50fb9 | 341 | let end_idx = index.index.len() - 1; |
39c6bd86 DM |
342 | let end = index.chunk_end(end_idx); |
343 | let idx = index.binary_search(0, 0, end_idx, end, offset)?; | |
373ef4a5 | 344 | self.buffer_chunk(idx)?; |
f569acc5 | 345 | } |
39c6bd86 DM |
346 | |
347 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; | |
348 | Ok(&self.read_buffer[buffer_offset..]) | |
349 | } | |
39c6bd86 | 350 | } |
77703d95 | 351 | |
f569acc5 | 352 | impl<S: ReadChunk> std::io::Read for BufferedDynamicReader<S> { |
4624fe29 | 353 | fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> { |
fded74d0 | 354 | use crate::tools::BufferedRead; |
f569acc5 | 355 | use std::io::{Error, ErrorKind}; |
4624fe29 DM |
356 | |
357 | let data = match self.buffered_read(self.read_offset) { | |
358 | Ok(v) => v, | |
359 | Err(err) => return Err(Error::new(ErrorKind::Other, err.to_string())), | |
360 | }; | |
361 | ||
f569acc5 WB |
362 | let n = if data.len() > buf.len() { |
363 | buf.len() | |
364 | } else { | |
365 | data.len() | |
366 | }; | |
4624fe29 | 367 | |
bde8e243 | 368 | buf[0..n].copy_from_slice(&data[0..n]); |
4624fe29 DM |
369 | |
370 | self.read_offset += n as u64; | |
371 | ||
62ee2eb4 | 372 | Ok(n) |
4624fe29 DM |
373 | } |
374 | } | |
375 | ||
f569acc5 | 376 | impl<S: ReadChunk> std::io::Seek for BufferedDynamicReader<S> { |
d48a9955 | 377 | fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> { |
34337050 | 378 | let new_offset = match pos { |
f569acc5 WB |
379 | SeekFrom::Start(start_offset) => start_offset as i64, |
380 | SeekFrom::End(end_offset) => (self.archive_size as i64) + end_offset, | |
34337050 DM |
381 | SeekFrom::Current(offset) => (self.read_offset as i64) + offset, |
382 | }; | |
383 | ||
ddbdf80d | 384 | use std::io::{Error, ErrorKind}; |
34337050 DM |
385 | if (new_offset < 0) || (new_offset > (self.archive_size as i64)) { |
386 | return Err(Error::new( | |
387 | ErrorKind::Other, | |
f569acc5 WB |
388 | format!( |
389 | "seek is out of range {} ([0..{}])", | |
390 | new_offset, self.archive_size | |
391 | ), | |
392 | )); | |
34337050 DM |
393 | } |
394 | self.read_offset = new_offset as u64; | |
395 | ||
396 | Ok(self.read_offset) | |
397 | } | |
398 | } | |
399 | ||
4cf0ced9 DC |
400 | /// This is a workaround until we have cleaned up the chunk/reader/... infrastructure for better |
401 | /// async use! | |
402 | /// | |
403 | /// Ideally BufferedDynamicReader gets replaced so the LruCache maps to `BroadcastFuture<Chunk>`, | |
404 | /// so that we can properly access it from multiple threads simultaneously while not issuing | |
405 | /// duplicate simultaneous reads over http. | |
406 | #[derive(Clone)] | |
407 | pub struct LocalDynamicReadAt<R: ReadChunk> { | |
408 | inner: Arc<Mutex<BufferedDynamicReader<R>>>, | |
409 | } | |
410 | ||
411 | impl<R: ReadChunk> LocalDynamicReadAt<R> { | |
412 | pub fn new(inner: BufferedDynamicReader<R>) -> Self { | |
413 | Self { | |
414 | inner: Arc::new(Mutex::new(inner)), | |
415 | } | |
416 | } | |
417 | } | |
418 | ||
a6f87283 WB |
419 | impl<R: ReadChunk> ReadAt for LocalDynamicReadAt<R> { |
420 | fn start_read_at<'a>( | |
421 | self: Pin<&'a Self>, | |
4cf0ced9 | 422 | _cx: &mut Context, |
a6f87283 | 423 | buf: &'a mut [u8], |
4cf0ced9 | 424 | offset: u64, |
a6f87283 | 425 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { |
4cf0ced9 | 426 | use std::io::Read; |
a6f87283 | 427 | MaybeReady::Ready(tokio::task::block_in_place(move || { |
4cf0ced9 DC |
428 | let mut reader = self.inner.lock().unwrap(); |
429 | reader.seek(SeekFrom::Start(offset))?; | |
a6f87283 WB |
430 | Ok(reader.read(buf)?) |
431 | })) | |
432 | } | |
433 | ||
434 | fn poll_complete<'a>( | |
435 | self: Pin<&'a Self>, | |
436 | _op: ReadAtOperation<'a>, | |
437 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
438 | panic!("LocalDynamicReadAt::start_read_at returned Pending"); | |
4cf0ced9 DC |
439 | } |
440 | } | |
441 | ||
442 | ||
976595e1 | 443 | /// Create dynamic index files (`.dixd`) |
93d5d779 | 444 | pub struct DynamicIndexWriter { |
1629d2ad | 445 | store: Arc<ChunkStore>, |
43b13033 | 446 | _lock: tools::ProcessLockSharedGuard, |
5032b57b | 447 | writer: BufWriter<File>, |
5e7a09be | 448 | closed: bool, |
0433db19 DM |
449 | filename: PathBuf, |
450 | tmp_filename: PathBuf, | |
16ff6b7c | 451 | csum: Option<openssl::sha::Sha256>, |
9f49fe1d DM |
452 | pub uuid: [u8; 16], |
453 | pub ctime: u64, | |
0433db19 DM |
454 | } |
455 | ||
93d5d779 | 456 | impl Drop for DynamicIndexWriter { |
1629d2ad DM |
457 | fn drop(&mut self) { |
458 | let _ = std::fs::remove_file(&self.tmp_filename); // ignore errors | |
459 | } | |
460 | } | |
461 | ||
93d5d779 | 462 | impl DynamicIndexWriter { |
976595e1 | 463 | pub fn create(store: Arc<ChunkStore>, path: &Path) -> Result<Self, Error> { |
43b13033 DM |
464 | let shared_lock = store.try_shared_lock()?; |
465 | ||
0433db19 DM |
466 | let full_path = store.relative_path(path); |
467 | let mut tmp_path = full_path.clone(); | |
93d5d779 | 468 | tmp_path.set_extension("tmp_didx"); |
0433db19 | 469 | |
ddbdf80d | 470 | let file = std::fs::OpenOptions::new() |
f569acc5 WB |
471 | .create(true) |
472 | .truncate(true) | |
0433db19 DM |
473 | .read(true) |
474 | .write(true) | |
475 | .open(&tmp_path)?; | |
476 | ||
f569acc5 | 477 | let mut writer = BufWriter::with_capacity(1024 * 1024, file); |
5032b57b | 478 | |
93d5d779 | 479 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
0433db19 DM |
480 | |
481 | // todo: use static assertion when available in rust | |
f569acc5 WB |
482 | if header_size != 4096 { |
483 | panic!("got unexpected header size"); | |
484 | } | |
0433db19 | 485 | |
e693818a | 486 | let ctime = epoch_now_u64()?; |
0433db19 | 487 | |
f569acc5 | 488 | let uuid = Uuid::generate(); |
0433db19 | 489 | |
d21f8a5b | 490 | let mut buffer = vec::zeroed(header_size); |
93d5d779 | 491 | let header = crate::tools::map_struct_mut::<DynamicIndexHeader>(&mut buffer)?; |
0433db19 | 492 | |
a7dd4830 | 493 | header.magic = super::DYNAMIC_SIZED_CHUNK_INDEX_1_0; |
0433db19 DM |
494 | header.ctime = u64::to_le(ctime); |
495 | header.uuid = *uuid.as_bytes(); | |
496 | ||
16ff6b7c DM |
497 | header.index_csum = [0u8; 32]; |
498 | ||
5032b57b | 499 | writer.write_all(&buffer)?; |
0433db19 | 500 | |
16ff6b7c DM |
501 | let csum = Some(openssl::sha::Sha256::new()); |
502 | ||
0433db19 DM |
503 | Ok(Self { |
504 | store, | |
43b13033 | 505 | _lock: shared_lock, |
653b1ca1 | 506 | writer, |
5e7a09be | 507 | closed: false, |
0433db19 DM |
508 | filename: full_path, |
509 | tmp_filename: tmp_path, | |
510 | ctime, | |
511 | uuid: *uuid.as_bytes(), | |
16ff6b7c | 512 | csum, |
0433db19 DM |
513 | }) |
514 | } | |
5e7a09be | 515 | |
f98ac774 | 516 | // fixme: use add_chunk instead? |
4ee8f53d DM |
517 | pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> { |
518 | self.store.insert_chunk(chunk, digest) | |
976595e1 DM |
519 | } |
520 | ||
f569acc5 | 521 | pub fn close(&mut self) -> Result<[u8; 32], Error> { |
5e7a09be | 522 | if self.closed { |
f569acc5 WB |
523 | bail!( |
524 | "cannot close already closed archive index file {:?}", | |
525 | self.filename | |
526 | ); | |
5e7a09be DM |
527 | } |
528 | ||
529 | self.closed = true; | |
530 | ||
5032b57b | 531 | self.writer.flush()?; |
5e7a09be | 532 | |
9ea4bce4 | 533 | let csum_offset = proxmox::offsetof!(DynamicIndexHeader, index_csum); |
d48a9955 | 534 | self.writer.seek(SeekFrom::Start(csum_offset as u64))?; |
16ff6b7c DM |
535 | |
536 | let csum = self.csum.take().unwrap(); | |
537 | let index_csum = csum.finish(); | |
538 | ||
539 | self.writer.write_all(&index_csum)?; | |
540 | self.writer.flush()?; | |
541 | ||
5e7a09be DM |
542 | if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { |
543 | bail!("Atomic rename file {:?} failed - {}", self.filename, err); | |
544 | } | |
545 | ||
16ff6b7c | 546 | Ok(index_csum) |
5e7a09be DM |
547 | } |
548 | ||
976595e1 DM |
549 | // fixme: rename to add_digest |
550 | pub fn add_chunk(&mut self, offset: u64, digest: &[u8; 32]) -> Result<(), Error> { | |
551 | if self.closed { | |
f569acc5 WB |
552 | bail!( |
553 | "cannot write to closed dynamic index file {:?}", | |
554 | self.filename | |
555 | ); | |
976595e1 | 556 | } |
16ff6b7c | 557 | |
f569acc5 | 558 | let offset_le: &[u8; 8] = unsafe { &std::mem::transmute::<u64, [u8; 8]>(offset.to_le()) }; |
16ff6b7c DM |
559 | |
560 | if let Some(ref mut csum) = self.csum { | |
561 | csum.update(offset_le); | |
562 | csum.update(digest); | |
563 | } | |
564 | ||
cd69d36b DM |
565 | self.writer.write_all(offset_le)?; |
566 | self.writer.write_all(digest)?; | |
976595e1 DM |
567 | Ok(()) |
568 | } | |
569 | } | |
570 | ||
571 | /// Writer which splits a binary stream into dynamic sized chunks | |
572 | /// | |
573 | /// And store the resulting chunk list into the index file. | |
574 | pub struct DynamicChunkWriter { | |
575 | index: DynamicIndexWriter, | |
576 | closed: bool, | |
577 | chunker: Chunker, | |
578 | stat: ChunkStat, | |
579 | chunk_offset: usize, | |
580 | last_chunk: usize, | |
581 | chunk_buffer: Vec<u8>, | |
582 | } | |
583 | ||
584 | impl DynamicChunkWriter { | |
976595e1 DM |
585 | pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self { |
586 | Self { | |
587 | index, | |
588 | closed: false, | |
589 | chunker: Chunker::new(chunk_size), | |
590 | stat: ChunkStat::new(0), | |
591 | chunk_offset: 0, | |
592 | last_chunk: 0, | |
f569acc5 | 593 | chunk_buffer: Vec::with_capacity(chunk_size * 4), |
976595e1 DM |
594 | } |
595 | } | |
596 | ||
7e336555 DM |
597 | pub fn stat(&self) -> &ChunkStat { |
598 | &self.stat | |
599 | } | |
600 | ||
f569acc5 | 601 | pub fn close(&mut self) -> Result<(), Error> { |
976595e1 DM |
602 | if self.closed { |
603 | return Ok(()); | |
604 | } | |
605 | ||
606 | self.closed = true; | |
607 | ||
608 | self.write_chunk_buffer()?; | |
609 | ||
610 | self.index.close()?; | |
611 | ||
612 | self.stat.size = self.chunk_offset as u64; | |
613 | ||
614 | // add size of index file | |
f569acc5 WB |
615 | self.stat.size += |
616 | (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64; | |
976595e1 DM |
617 | |
618 | Ok(()) | |
619 | } | |
620 | ||
621 | fn write_chunk_buffer(&mut self) -> Result<(), Error> { | |
5e7a09be DM |
622 | let chunk_size = self.chunk_buffer.len(); |
623 | ||
f569acc5 WB |
624 | if chunk_size == 0 { |
625 | return Ok(()); | |
626 | } | |
5e7a09be DM |
627 | |
628 | let expected_chunk_size = self.chunk_offset - self.last_chunk; | |
629 | if expected_chunk_size != self.chunk_buffer.len() { | |
976595e1 | 630 | bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size); |
5e7a09be DM |
631 | } |
632 | ||
7e336555 | 633 | self.stat.chunk_count += 1; |
247cdbce | 634 | |
5e7a09be DM |
635 | self.last_chunk = self.chunk_offset; |
636 | ||
4ee8f53d | 637 | let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer) |
f98ac774 DM |
638 | .compress(true) |
639 | .build()?; | |
640 | ||
4ee8f53d | 641 | match self.index.insert_chunk(&chunk, &digest) { |
f98ac774 | 642 | Ok((is_duplicate, compressed_size)) => { |
7e336555 | 643 | self.stat.compressed_size += compressed_size; |
798f7fa0 | 644 | if is_duplicate { |
7e336555 | 645 | self.stat.duplicate_chunks += 1; |
798f7fa0 | 646 | } else { |
7e336555 | 647 | self.stat.disk_size += compressed_size; |
798f7fa0 DM |
648 | } |
649 | ||
f569acc5 WB |
650 | println!( |
651 | "ADD CHUNK {:016x} {} {}% {} {}", | |
652 | self.chunk_offset, | |
653 | chunk_size, | |
654 | (compressed_size * 100) / (chunk_size as u64), | |
655 | is_duplicate, | |
656 | proxmox::tools::digest_to_hex(&digest) | |
657 | ); | |
976595e1 | 658 | self.index.add_chunk(self.chunk_offset as u64, &digest)?; |
5e7a09be | 659 | self.chunk_buffer.truncate(0); |
62ee2eb4 | 660 | Ok(()) |
5e7a09be DM |
661 | } |
662 | Err(err) => { | |
663 | self.chunk_buffer.truncate(0); | |
62ee2eb4 | 664 | Err(err) |
5e7a09be DM |
665 | } |
666 | } | |
5e7a09be | 667 | } |
0433db19 DM |
668 | } |
669 | ||
976595e1 | 670 | impl Write for DynamicChunkWriter { |
0433db19 | 671 | fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> { |
0433db19 DM |
672 | let chunker = &mut self.chunker; |
673 | ||
674 | let pos = chunker.scan(data); | |
675 | ||
676 | if pos > 0 { | |
4ee8f53d | 677 | self.chunk_buffer.extend_from_slice(&data[0..pos]); |
0433db19 DM |
678 | self.chunk_offset += pos; |
679 | ||
976595e1 | 680 | if let Err(err) = self.write_chunk_buffer() { |
f569acc5 WB |
681 | return Err(std::io::Error::new( |
682 | std::io::ErrorKind::Other, | |
683 | err.to_string(), | |
684 | )); | |
976595e1 | 685 | } |
5e7a09be | 686 | Ok(pos) |
0433db19 DM |
687 | } else { |
688 | self.chunk_offset += data.len(); | |
4ee8f53d | 689 | self.chunk_buffer.extend_from_slice(data); |
5e7a09be | 690 | Ok(data.len()) |
0433db19 DM |
691 | } |
692 | } | |
693 | ||
694 | fn flush(&mut self) -> std::result::Result<(), std::io::Error> { | |
f569acc5 WB |
695 | Err(std::io::Error::new( |
696 | std::io::ErrorKind::Other, | |
697 | "please use close() instead of flush()", | |
698 | )) | |
0433db19 DM |
699 | } |
700 | } |