]>
Commit | Line | Data |
---|---|---|
0433db19 | 1 | use std::fs::File; |
4cf0ced9 | 2 | use std::io::{self, BufWriter, Seek, SeekFrom, Write}; |
c443f58b | 3 | use std::ops::Range; |
0433db19 | 4 | use std::os::unix::io::AsRawFd; |
367f002e | 5 | use std::path::{Path, PathBuf}; |
4cf0ced9 | 6 | use std::sync::{Arc, Mutex}; |
a6f87283 | 7 | use std::task::Context; |
4cf0ced9 | 8 | use std::pin::Pin; |
367f002e | 9 | |
f7d4e4b5 | 10 | use anyhow::{bail, format_err, Error}; |
0433db19 | 11 | |
9b2b627f | 12 | use proxmox::tools::io::ReadExt; |
f569acc5 | 13 | use proxmox::tools::uuid::Uuid; |
f35197f4 | 14 | use proxmox::tools::vec; |
57e50fb9 | 15 | use proxmox::tools::mmap::Mmap; |
a6f87283 | 16 | use pxar::accessor::{MaybeReady, ReadAt, ReadAtOperation}; |
8ea3b1d1 | 17 | |
367f002e WB |
18 | use super::chunk_stat::ChunkStat; |
19 | use super::chunk_store::ChunkStore; | |
c443f58b | 20 | use super::index::ChunkReadInfo; |
367f002e | 21 | use super::read_chunk::ReadChunk; |
f569acc5 WB |
22 | use super::Chunker; |
23 | use super::IndexFile; | |
4ee8f53d | 24 | use super::{DataBlob, DataChunkBuilder}; |
e693818a | 25 | use crate::tools::{self, epoch_now_u64}; |
f98ac774 | 26 | |
e5064ba6 | 27 | /// Header format definition for dynamic index files (`.dixd`) |
0433db19 | 28 | #[repr(C)] |
93d5d779 | 29 | pub struct DynamicIndexHeader { |
a7dd4830 | 30 | pub magic: [u8; 8], |
0433db19 DM |
31 | pub uuid: [u8; 16], |
32 | pub ctime: u64, | |
16ff6b7c DM |
33 | /// Sha256 over the index ``SHA256(offset1||digest1||offset2||digest2||...)`` |
34 | pub index_csum: [u8; 32], | |
990b930f | 35 | reserved: [u8; 4032], // overall size is one page (4096 bytes) |
0433db19 | 36 | } |
9ea4bce4 | 37 | proxmox::static_assert_size!(DynamicIndexHeader, 4096); |
990b930f WB |
38 | // TODO: Once non-Copy unions are stabilized, use: |
39 | // union DynamicIndexHeader { | |
40 | // reserved: [u8; 4096], | |
41 | // pub data: DynamicIndexHeaderData, | |
42 | // } | |
77703d95 | 43 | |
57e50fb9 WB |
44 | #[derive(Clone, Debug)] |
45 | #[repr(C)] | |
46 | pub struct DynamicEntry { | |
7a6b5492 | 47 | end_le: u64, |
57e50fb9 WB |
48 | digest: [u8; 32], |
49 | } | |
50 | ||
7a6b5492 WB |
51 | impl DynamicEntry { |
52 | #[inline] | |
53 | pub fn end(&self) -> u64 { | |
54 | u64::from_le(self.end_le) | |
55 | } | |
56 | } | |
57 | ||
93d5d779 | 58 | pub struct DynamicIndexReader { |
728797d0 | 59 | _file: File, |
9f49fe1d | 60 | pub size: usize, |
57e50fb9 | 61 | index: Mmap<DynamicEntry>, |
9f49fe1d DM |
62 | pub uuid: [u8; 16], |
63 | pub ctime: u64, | |
16ff6b7c | 64 | pub index_csum: [u8; 32], |
77703d95 DM |
65 | } |
66 | ||
93d5d779 | 67 | impl DynamicIndexReader { |
d48a9955 | 68 | pub fn open(path: &Path) -> Result<Self, Error> { |
0f0a35b3 DM |
69 | File::open(path) |
70 | .map_err(Error::from) | |
57e50fb9 | 71 | .and_then(Self::new) |
0f0a35b3 | 72 | .map_err(|err| format_err!("Unable to open dynamic index {:?} - {}", path, err)) |
d48a9955 DM |
73 | } |
74 | ||
75 | pub fn new(mut file: std::fs::File) -> Result<Self, Error> { | |
f569acc5 WB |
76 | if let Err(err) = |
77 | nix::fcntl::flock(file.as_raw_fd(), nix::fcntl::FlockArg::LockSharedNonblock) | |
78 | { | |
0f0a35b3 DM |
79 | bail!("unable to get shared lock - {}", err); |
80 | } | |
81 | ||
57e50fb9 | 82 | // FIXME: This is NOT OUR job! Check the callers of this method and remove this! |
d48a9955 DM |
83 | file.seek(SeekFrom::Start(0))?; |
84 | ||
93d5d779 | 85 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
77703d95 | 86 | |
ba5e6747 | 87 | let header: Box<DynamicIndexHeader> = unsafe { file.read_host_value_boxed()? }; |
77703d95 | 88 | |
a7dd4830 | 89 | if header.magic != super::DYNAMIC_SIZED_CHUNK_INDEX_1_0 { |
d48a9955 | 90 | bail!("got unknown magic number"); |
77703d95 DM |
91 | } |
92 | ||
77703d95 DM |
93 | let ctime = u64::from_le(header.ctime); |
94 | ||
95 | let rawfd = file.as_raw_fd(); | |
96 | ||
d48a9955 | 97 | let stat = nix::sys::stat::fstat(rawfd)?; |
77703d95 DM |
98 | |
99 | let size = stat.st_size as usize; | |
100 | ||
ddbdf80d | 101 | let index_size = size - header_size; |
57e50fb9 WB |
102 | let index_count = index_size / 40; |
103 | if index_count * 40 != index_size { | |
d48a9955 | 104 | bail!("got unexpected file size"); |
77703d95 DM |
105 | } |
106 | ||
57e50fb9 WB |
107 | let index = unsafe { |
108 | Mmap::map_fd( | |
109 | rawfd, | |
110 | header_size as u64, | |
111 | index_count, | |
f569acc5 WB |
112 | nix::sys::mman::ProtFlags::PROT_READ, |
113 | nix::sys::mman::MapFlags::MAP_PRIVATE, | |
57e50fb9 WB |
114 | )? |
115 | }; | |
77703d95 | 116 | |
77703d95 | 117 | Ok(Self { |
728797d0 | 118 | _file: file, |
77703d95 | 119 | size, |
57e50fb9 | 120 | index, |
77703d95 DM |
121 | ctime, |
122 | uuid: header.uuid, | |
16ff6b7c | 123 | index_csum: header.index_csum, |
77703d95 DM |
124 | }) |
125 | } | |
126 | ||
9fe2f639 | 127 | #[allow(clippy::cast_ptr_alignment)] |
c443f58b | 128 | pub fn chunk_info(&self, pos: usize) -> Result<ChunkReadInfo, Error> { |
57e50fb9 | 129 | if pos >= self.index.len() { |
40f4e198 DM |
130 | bail!("chunk index out of range"); |
131 | } | |
132 | let start = if pos == 0 { | |
133 | 0 | |
134 | } else { | |
7a6b5492 | 135 | self.index[pos - 1].end() |
40f4e198 DM |
136 | }; |
137 | ||
7a6b5492 | 138 | let end = self.index[pos].end(); |
5e58e1bb | 139 | |
c443f58b WB |
140 | Ok(ChunkReadInfo { |
141 | range: start..end, | |
57e50fb9 | 142 | digest: self.index[pos].digest.clone(), |
c443f58b | 143 | }) |
40f4e198 DM |
144 | } |
145 | ||
39c6bd86 | 146 | #[inline] |
9fe2f639 | 147 | #[allow(clippy::cast_ptr_alignment)] |
39c6bd86 | 148 | fn chunk_end(&self, pos: usize) -> u64 { |
57e50fb9 | 149 | if pos >= self.index.len() { |
39c6bd86 DM |
150 | panic!("chunk index out of range"); |
151 | } | |
7a6b5492 | 152 | self.index[pos].end() |
39c6bd86 DM |
153 | } |
154 | ||
155 | #[inline] | |
f98ac774 | 156 | fn chunk_digest(&self, pos: usize) -> &[u8; 32] { |
57e50fb9 | 157 | if pos >= self.index.len() { |
39c6bd86 DM |
158 | panic!("chunk index out of range"); |
159 | } | |
57e50fb9 | 160 | &self.index[pos].digest |
39c6bd86 DM |
161 | } |
162 | ||
82c85a21 DM |
163 | /// Compute checksum and data size |
164 | pub fn compute_csum(&self) -> ([u8; 32], u64) { | |
82c85a21 | 165 | let mut csum = openssl::sha::Sha256::new(); |
57e50fb9 | 166 | for entry in &self.index { |
7a6b5492 | 167 | csum.update(&entry.end_le.to_ne_bytes()); |
57e50fb9 | 168 | csum.update(&entry.digest); |
82c85a21 DM |
169 | } |
170 | let csum = csum.finish(); | |
171 | ||
57e50fb9 WB |
172 | ( |
173 | csum, | |
174 | self.index | |
175 | .last() | |
7a6b5492 WB |
176 | .map(|entry| entry.end()) |
177 | .unwrap_or(0) | |
178 | ) | |
96df2fb4 | 179 | } |
39c6bd86 | 180 | |
57e50fb9 | 181 | // TODO: can we use std::slice::binary_search with Mmap now? |
39c6bd86 DM |
182 | fn binary_search( |
183 | &self, | |
184 | start_idx: usize, | |
185 | start: u64, | |
186 | end_idx: usize, | |
187 | end: u64, | |
f569acc5 | 188 | offset: u64, |
39c6bd86 | 189 | ) -> Result<usize, Error> { |
39c6bd86 DM |
190 | if (offset >= end) || (offset < start) { |
191 | bail!("offset out of range"); | |
192 | } | |
193 | ||
194 | if end_idx == start_idx { | |
195 | return Ok(start_idx); // found | |
196 | } | |
f569acc5 | 197 | let middle_idx = (start_idx + end_idx) / 2; |
39c6bd86 DM |
198 | let middle_end = self.chunk_end(middle_idx); |
199 | ||
200 | if offset < middle_end { | |
62ee2eb4 | 201 | self.binary_search(start_idx, start, middle_idx, middle_end, offset) |
39c6bd86 | 202 | } else { |
62ee2eb4 | 203 | self.binary_search(middle_idx + 1, middle_end, end_idx, end, offset) |
39c6bd86 DM |
204 | } |
205 | } | |
206 | } | |
207 | ||
7bc1d727 WB |
208 | impl IndexFile for DynamicIndexReader { |
209 | fn index_count(&self) -> usize { | |
57e50fb9 | 210 | self.index.len() |
7bc1d727 WB |
211 | } |
212 | ||
213 | fn index_digest(&self, pos: usize) -> Option<&[u8; 32]> { | |
57e50fb9 | 214 | if pos >= self.index.len() { |
7bc1d727 WB |
215 | None |
216 | } else { | |
f569acc5 | 217 | Some(unsafe { std::mem::transmute(self.chunk_digest(pos).as_ptr()) }) |
7bc1d727 WB |
218 | } |
219 | } | |
a660978c DM |
220 | |
221 | fn index_bytes(&self) -> u64 { | |
57e50fb9 | 222 | if self.index.is_empty() { |
a660978c DM |
223 | 0 |
224 | } else { | |
57e50fb9 | 225 | self.chunk_end(self.index.len() - 1) |
a660978c DM |
226 | } |
227 | } | |
7bc1d727 WB |
228 | } |
229 | ||
c443f58b WB |
230 | struct CachedChunk { |
231 | range: Range<u64>, | |
232 | data: Vec<u8>, | |
233 | } | |
234 | ||
235 | impl CachedChunk { | |
236 | /// Perform sanity checks on the range and data size: | |
237 | pub fn new(range: Range<u64>, data: Vec<u8>) -> Result<Self, Error> { | |
238 | if data.len() as u64 != range.end - range.start { | |
239 | bail!( | |
240 | "read chunk with wrong size ({} != {})", | |
241 | data.len(), | |
242 | range.end - range.start, | |
243 | ); | |
244 | } | |
245 | Ok(Self { range, data }) | |
246 | } | |
247 | } | |
248 | ||
d48a9955 DM |
249 | pub struct BufferedDynamicReader<S> { |
250 | store: S, | |
93d5d779 | 251 | index: DynamicIndexReader, |
39c6bd86 DM |
252 | archive_size: u64, |
253 | read_buffer: Vec<u8>, | |
254 | buffered_chunk_idx: usize, | |
255 | buffered_chunk_start: u64, | |
256 | read_offset: u64, | |
c443f58b | 257 | lru_cache: crate::tools::lru_cache::LruCache<usize, CachedChunk>, |
536683e7 CE |
258 | } |
259 | ||
260 | struct ChunkCacher<'a, S> { | |
261 | store: &'a mut S, | |
262 | index: &'a DynamicIndexReader, | |
263 | } | |
264 | ||
c443f58b WB |
265 | impl<'a, S: ReadChunk> crate::tools::lru_cache::Cacher<usize, CachedChunk> for ChunkCacher<'a, S> { |
266 | fn fetch(&mut self, index: usize) -> Result<Option<CachedChunk>, Error> { | |
267 | let info = self.index.chunk_info(index)?; | |
268 | let range = info.range; | |
269 | let data = self.store.read_chunk(&info.digest)?; | |
270 | CachedChunk::new(range, data).map(Some) | |
536683e7 | 271 | } |
77703d95 DM |
272 | } |
273 | ||
f569acc5 | 274 | impl<S: ReadChunk> BufferedDynamicReader<S> { |
d48a9955 | 275 | pub fn new(index: DynamicIndexReader, store: S) -> Self { |
57e50fb9 | 276 | let archive_size = index.index_bytes(); |
39c6bd86 | 277 | Self { |
d48a9955 | 278 | store, |
653b1ca1 WB |
279 | index, |
280 | archive_size, | |
f569acc5 | 281 | read_buffer: Vec::with_capacity(1024 * 1024), |
39c6bd86 DM |
282 | buffered_chunk_idx: 0, |
283 | buffered_chunk_start: 0, | |
284 | read_offset: 0, | |
536683e7 | 285 | lru_cache: crate::tools::lru_cache::LruCache::new(32), |
39c6bd86 DM |
286 | } |
287 | } | |
288 | ||
f569acc5 WB |
289 | pub fn archive_size(&self) -> u64 { |
290 | self.archive_size | |
291 | } | |
39c6bd86 | 292 | |
0a72e267 | 293 | fn buffer_chunk(&mut self, idx: usize) -> Result<(), Error> { |
c443f58b WB |
294 | //let (start, end, data) = self.lru_cache.access( |
295 | let cached_chunk = self.lru_cache.access( | |
536683e7 CE |
296 | idx, |
297 | &mut ChunkCacher { | |
298 | store: &mut self.store, | |
299 | index: &self.index, | |
300 | }, | |
301 | )?.ok_or_else(|| format_err!("chunk not found by cacher"))?; | |
302 | ||
536683e7 | 303 | // fixme: avoid copy |
f98ac774 | 304 | self.read_buffer.clear(); |
c443f58b | 305 | self.read_buffer.extend_from_slice(&cached_chunk.data); |
0a72e267 DM |
306 | |
307 | self.buffered_chunk_idx = idx; | |
d48a9955 | 308 | |
c443f58b | 309 | self.buffered_chunk_start = cached_chunk.range.start; |
0a72e267 DM |
310 | //println!("BUFFER {} {}", self.buffered_chunk_start, end); |
311 | Ok(()) | |
312 | } | |
313 | } | |
314 | ||
f569acc5 | 315 | impl<S: ReadChunk> crate::tools::BufferedRead for BufferedDynamicReader<S> { |
0a72e267 | 316 | fn buffered_read(&mut self, offset: u64) -> Result<&[u8], Error> { |
f569acc5 WB |
317 | if offset == self.archive_size { |
318 | return Ok(&self.read_buffer[0..0]); | |
319 | } | |
318564ac | 320 | |
39c6bd86 | 321 | let buffer_len = self.read_buffer.len(); |
0b05fd58 | 322 | let index = &self.index; |
39c6bd86 DM |
323 | |
324 | // optimization for sequential read | |
f569acc5 | 325 | if buffer_len > 0 |
57e50fb9 | 326 | && ((self.buffered_chunk_idx + 1) < index.index.len()) |
f569acc5 | 327 | && (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) |
39c6bd86 DM |
328 | { |
329 | let next_idx = self.buffered_chunk_idx + 1; | |
330 | let next_end = index.chunk_end(next_idx); | |
331 | if offset < next_end { | |
373ef4a5 | 332 | self.buffer_chunk(next_idx)?; |
39c6bd86 DM |
333 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; |
334 | return Ok(&self.read_buffer[buffer_offset..]); | |
335 | } | |
336 | } | |
337 | ||
f569acc5 WB |
338 | if (buffer_len == 0) |
339 | || (offset < self.buffered_chunk_start) | |
340 | || (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64))) | |
39c6bd86 | 341 | { |
57e50fb9 | 342 | let end_idx = index.index.len() - 1; |
39c6bd86 DM |
343 | let end = index.chunk_end(end_idx); |
344 | let idx = index.binary_search(0, 0, end_idx, end, offset)?; | |
373ef4a5 | 345 | self.buffer_chunk(idx)?; |
f569acc5 | 346 | } |
39c6bd86 DM |
347 | |
348 | let buffer_offset = (offset - self.buffered_chunk_start) as usize; | |
349 | Ok(&self.read_buffer[buffer_offset..]) | |
350 | } | |
39c6bd86 | 351 | } |
77703d95 | 352 | |
f569acc5 | 353 | impl<S: ReadChunk> std::io::Read for BufferedDynamicReader<S> { |
4624fe29 | 354 | fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> { |
fded74d0 | 355 | use crate::tools::BufferedRead; |
f569acc5 | 356 | use std::io::{Error, ErrorKind}; |
4624fe29 DM |
357 | |
358 | let data = match self.buffered_read(self.read_offset) { | |
359 | Ok(v) => v, | |
360 | Err(err) => return Err(Error::new(ErrorKind::Other, err.to_string())), | |
361 | }; | |
362 | ||
f569acc5 WB |
363 | let n = if data.len() > buf.len() { |
364 | buf.len() | |
365 | } else { | |
366 | data.len() | |
367 | }; | |
4624fe29 | 368 | |
bde8e243 | 369 | buf[0..n].copy_from_slice(&data[0..n]); |
4624fe29 DM |
370 | |
371 | self.read_offset += n as u64; | |
372 | ||
62ee2eb4 | 373 | Ok(n) |
4624fe29 DM |
374 | } |
375 | } | |
376 | ||
f569acc5 | 377 | impl<S: ReadChunk> std::io::Seek for BufferedDynamicReader<S> { |
d48a9955 | 378 | fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> { |
34337050 | 379 | let new_offset = match pos { |
f569acc5 WB |
380 | SeekFrom::Start(start_offset) => start_offset as i64, |
381 | SeekFrom::End(end_offset) => (self.archive_size as i64) + end_offset, | |
34337050 DM |
382 | SeekFrom::Current(offset) => (self.read_offset as i64) + offset, |
383 | }; | |
384 | ||
ddbdf80d | 385 | use std::io::{Error, ErrorKind}; |
34337050 DM |
386 | if (new_offset < 0) || (new_offset > (self.archive_size as i64)) { |
387 | return Err(Error::new( | |
388 | ErrorKind::Other, | |
f569acc5 WB |
389 | format!( |
390 | "seek is out of range {} ([0..{}])", | |
391 | new_offset, self.archive_size | |
392 | ), | |
393 | )); | |
34337050 DM |
394 | } |
395 | self.read_offset = new_offset as u64; | |
396 | ||
397 | Ok(self.read_offset) | |
398 | } | |
399 | } | |
400 | ||
4cf0ced9 DC |
401 | /// This is a workaround until we have cleaned up the chunk/reader/... infrastructure for better |
402 | /// async use! | |
403 | /// | |
404 | /// Ideally BufferedDynamicReader gets replaced so the LruCache maps to `BroadcastFuture<Chunk>`, | |
405 | /// so that we can properly access it from multiple threads simultaneously while not issuing | |
406 | /// duplicate simultaneous reads over http. | |
407 | #[derive(Clone)] | |
408 | pub struct LocalDynamicReadAt<R: ReadChunk> { | |
409 | inner: Arc<Mutex<BufferedDynamicReader<R>>>, | |
410 | } | |
411 | ||
412 | impl<R: ReadChunk> LocalDynamicReadAt<R> { | |
413 | pub fn new(inner: BufferedDynamicReader<R>) -> Self { | |
414 | Self { | |
415 | inner: Arc::new(Mutex::new(inner)), | |
416 | } | |
417 | } | |
418 | } | |
419 | ||
a6f87283 WB |
420 | impl<R: ReadChunk> ReadAt for LocalDynamicReadAt<R> { |
421 | fn start_read_at<'a>( | |
422 | self: Pin<&'a Self>, | |
4cf0ced9 | 423 | _cx: &mut Context, |
a6f87283 | 424 | buf: &'a mut [u8], |
4cf0ced9 | 425 | offset: u64, |
a6f87283 | 426 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { |
4cf0ced9 | 427 | use std::io::Read; |
a6f87283 | 428 | MaybeReady::Ready(tokio::task::block_in_place(move || { |
4cf0ced9 DC |
429 | let mut reader = self.inner.lock().unwrap(); |
430 | reader.seek(SeekFrom::Start(offset))?; | |
a6f87283 WB |
431 | Ok(reader.read(buf)?) |
432 | })) | |
433 | } | |
434 | ||
435 | fn poll_complete<'a>( | |
436 | self: Pin<&'a Self>, | |
437 | _op: ReadAtOperation<'a>, | |
438 | ) -> MaybeReady<io::Result<usize>, ReadAtOperation<'a>> { | |
439 | panic!("LocalDynamicReadAt::start_read_at returned Pending"); | |
4cf0ced9 DC |
440 | } |
441 | } | |
442 | ||
443 | ||
976595e1 | 444 | /// Create dynamic index files (`.dixd`) |
93d5d779 | 445 | pub struct DynamicIndexWriter { |
1629d2ad | 446 | store: Arc<ChunkStore>, |
43b13033 | 447 | _lock: tools::ProcessLockSharedGuard, |
5032b57b | 448 | writer: BufWriter<File>, |
5e7a09be | 449 | closed: bool, |
0433db19 DM |
450 | filename: PathBuf, |
451 | tmp_filename: PathBuf, | |
16ff6b7c | 452 | csum: Option<openssl::sha::Sha256>, |
9f49fe1d DM |
453 | pub uuid: [u8; 16], |
454 | pub ctime: u64, | |
0433db19 DM |
455 | } |
456 | ||
93d5d779 | 457 | impl Drop for DynamicIndexWriter { |
1629d2ad DM |
458 | fn drop(&mut self) { |
459 | let _ = std::fs::remove_file(&self.tmp_filename); // ignore errors | |
460 | } | |
461 | } | |
462 | ||
93d5d779 | 463 | impl DynamicIndexWriter { |
976595e1 | 464 | pub fn create(store: Arc<ChunkStore>, path: &Path) -> Result<Self, Error> { |
43b13033 DM |
465 | let shared_lock = store.try_shared_lock()?; |
466 | ||
0433db19 DM |
467 | let full_path = store.relative_path(path); |
468 | let mut tmp_path = full_path.clone(); | |
93d5d779 | 469 | tmp_path.set_extension("tmp_didx"); |
0433db19 | 470 | |
ddbdf80d | 471 | let file = std::fs::OpenOptions::new() |
f569acc5 WB |
472 | .create(true) |
473 | .truncate(true) | |
0433db19 DM |
474 | .read(true) |
475 | .write(true) | |
476 | .open(&tmp_path)?; | |
477 | ||
f569acc5 | 478 | let mut writer = BufWriter::with_capacity(1024 * 1024, file); |
5032b57b | 479 | |
93d5d779 | 480 | let header_size = std::mem::size_of::<DynamicIndexHeader>(); |
0433db19 DM |
481 | |
482 | // todo: use static assertion when available in rust | |
f569acc5 WB |
483 | if header_size != 4096 { |
484 | panic!("got unexpected header size"); | |
485 | } | |
0433db19 | 486 | |
e693818a | 487 | let ctime = epoch_now_u64()?; |
0433db19 | 488 | |
f569acc5 | 489 | let uuid = Uuid::generate(); |
0433db19 | 490 | |
d21f8a5b | 491 | let mut buffer = vec::zeroed(header_size); |
93d5d779 | 492 | let header = crate::tools::map_struct_mut::<DynamicIndexHeader>(&mut buffer)?; |
0433db19 | 493 | |
a7dd4830 | 494 | header.magic = super::DYNAMIC_SIZED_CHUNK_INDEX_1_0; |
0433db19 DM |
495 | header.ctime = u64::to_le(ctime); |
496 | header.uuid = *uuid.as_bytes(); | |
497 | ||
16ff6b7c DM |
498 | header.index_csum = [0u8; 32]; |
499 | ||
5032b57b | 500 | writer.write_all(&buffer)?; |
0433db19 | 501 | |
16ff6b7c DM |
502 | let csum = Some(openssl::sha::Sha256::new()); |
503 | ||
0433db19 DM |
504 | Ok(Self { |
505 | store, | |
43b13033 | 506 | _lock: shared_lock, |
653b1ca1 | 507 | writer, |
5e7a09be | 508 | closed: false, |
0433db19 DM |
509 | filename: full_path, |
510 | tmp_filename: tmp_path, | |
511 | ctime, | |
512 | uuid: *uuid.as_bytes(), | |
16ff6b7c | 513 | csum, |
0433db19 DM |
514 | }) |
515 | } | |
5e7a09be | 516 | |
f98ac774 | 517 | // fixme: use add_chunk instead? |
4ee8f53d DM |
518 | pub fn insert_chunk(&self, chunk: &DataBlob, digest: &[u8; 32]) -> Result<(bool, u64), Error> { |
519 | self.store.insert_chunk(chunk, digest) | |
976595e1 DM |
520 | } |
521 | ||
f569acc5 | 522 | pub fn close(&mut self) -> Result<[u8; 32], Error> { |
5e7a09be | 523 | if self.closed { |
f569acc5 WB |
524 | bail!( |
525 | "cannot close already closed archive index file {:?}", | |
526 | self.filename | |
527 | ); | |
5e7a09be DM |
528 | } |
529 | ||
530 | self.closed = true; | |
531 | ||
5032b57b | 532 | self.writer.flush()?; |
5e7a09be | 533 | |
9ea4bce4 | 534 | let csum_offset = proxmox::offsetof!(DynamicIndexHeader, index_csum); |
d48a9955 | 535 | self.writer.seek(SeekFrom::Start(csum_offset as u64))?; |
16ff6b7c DM |
536 | |
537 | let csum = self.csum.take().unwrap(); | |
538 | let index_csum = csum.finish(); | |
539 | ||
540 | self.writer.write_all(&index_csum)?; | |
541 | self.writer.flush()?; | |
542 | ||
5e7a09be DM |
543 | if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) { |
544 | bail!("Atomic rename file {:?} failed - {}", self.filename, err); | |
545 | } | |
546 | ||
16ff6b7c | 547 | Ok(index_csum) |
5e7a09be DM |
548 | } |
549 | ||
976595e1 DM |
550 | // fixme: rename to add_digest |
551 | pub fn add_chunk(&mut self, offset: u64, digest: &[u8; 32]) -> Result<(), Error> { | |
552 | if self.closed { | |
f569acc5 WB |
553 | bail!( |
554 | "cannot write to closed dynamic index file {:?}", | |
555 | self.filename | |
556 | ); | |
976595e1 | 557 | } |
16ff6b7c | 558 | |
f569acc5 | 559 | let offset_le: &[u8; 8] = unsafe { &std::mem::transmute::<u64, [u8; 8]>(offset.to_le()) }; |
16ff6b7c DM |
560 | |
561 | if let Some(ref mut csum) = self.csum { | |
562 | csum.update(offset_le); | |
563 | csum.update(digest); | |
564 | } | |
565 | ||
cd69d36b DM |
566 | self.writer.write_all(offset_le)?; |
567 | self.writer.write_all(digest)?; | |
976595e1 DM |
568 | Ok(()) |
569 | } | |
570 | } | |
571 | ||
572 | /// Writer which splits a binary stream into dynamic sized chunks | |
573 | /// | |
574 | /// And store the resulting chunk list into the index file. | |
575 | pub struct DynamicChunkWriter { | |
576 | index: DynamicIndexWriter, | |
577 | closed: bool, | |
578 | chunker: Chunker, | |
579 | stat: ChunkStat, | |
580 | chunk_offset: usize, | |
581 | last_chunk: usize, | |
582 | chunk_buffer: Vec<u8>, | |
583 | } | |
584 | ||
585 | impl DynamicChunkWriter { | |
976595e1 DM |
586 | pub fn new(index: DynamicIndexWriter, chunk_size: usize) -> Self { |
587 | Self { | |
588 | index, | |
589 | closed: false, | |
590 | chunker: Chunker::new(chunk_size), | |
591 | stat: ChunkStat::new(0), | |
592 | chunk_offset: 0, | |
593 | last_chunk: 0, | |
f569acc5 | 594 | chunk_buffer: Vec::with_capacity(chunk_size * 4), |
976595e1 DM |
595 | } |
596 | } | |
597 | ||
7e336555 DM |
598 | pub fn stat(&self) -> &ChunkStat { |
599 | &self.stat | |
600 | } | |
601 | ||
f569acc5 | 602 | pub fn close(&mut self) -> Result<(), Error> { |
976595e1 DM |
603 | if self.closed { |
604 | return Ok(()); | |
605 | } | |
606 | ||
607 | self.closed = true; | |
608 | ||
609 | self.write_chunk_buffer()?; | |
610 | ||
611 | self.index.close()?; | |
612 | ||
613 | self.stat.size = self.chunk_offset as u64; | |
614 | ||
615 | // add size of index file | |
f569acc5 WB |
616 | self.stat.size += |
617 | (self.stat.chunk_count * 40 + std::mem::size_of::<DynamicIndexHeader>()) as u64; | |
976595e1 DM |
618 | |
619 | Ok(()) | |
620 | } | |
621 | ||
622 | fn write_chunk_buffer(&mut self) -> Result<(), Error> { | |
5e7a09be DM |
623 | let chunk_size = self.chunk_buffer.len(); |
624 | ||
f569acc5 WB |
625 | if chunk_size == 0 { |
626 | return Ok(()); | |
627 | } | |
5e7a09be DM |
628 | |
629 | let expected_chunk_size = self.chunk_offset - self.last_chunk; | |
630 | if expected_chunk_size != self.chunk_buffer.len() { | |
976595e1 | 631 | bail!("wrong chunk size {} != {}", expected_chunk_size, chunk_size); |
5e7a09be DM |
632 | } |
633 | ||
7e336555 | 634 | self.stat.chunk_count += 1; |
247cdbce | 635 | |
5e7a09be DM |
636 | self.last_chunk = self.chunk_offset; |
637 | ||
4ee8f53d | 638 | let (chunk, digest) = DataChunkBuilder::new(&self.chunk_buffer) |
f98ac774 DM |
639 | .compress(true) |
640 | .build()?; | |
641 | ||
4ee8f53d | 642 | match self.index.insert_chunk(&chunk, &digest) { |
f98ac774 | 643 | Ok((is_duplicate, compressed_size)) => { |
7e336555 | 644 | self.stat.compressed_size += compressed_size; |
798f7fa0 | 645 | if is_duplicate { |
7e336555 | 646 | self.stat.duplicate_chunks += 1; |
798f7fa0 | 647 | } else { |
7e336555 | 648 | self.stat.disk_size += compressed_size; |
798f7fa0 DM |
649 | } |
650 | ||
f569acc5 WB |
651 | println!( |
652 | "ADD CHUNK {:016x} {} {}% {} {}", | |
653 | self.chunk_offset, | |
654 | chunk_size, | |
655 | (compressed_size * 100) / (chunk_size as u64), | |
656 | is_duplicate, | |
657 | proxmox::tools::digest_to_hex(&digest) | |
658 | ); | |
976595e1 | 659 | self.index.add_chunk(self.chunk_offset as u64, &digest)?; |
5e7a09be | 660 | self.chunk_buffer.truncate(0); |
62ee2eb4 | 661 | Ok(()) |
5e7a09be DM |
662 | } |
663 | Err(err) => { | |
664 | self.chunk_buffer.truncate(0); | |
62ee2eb4 | 665 | Err(err) |
5e7a09be DM |
666 | } |
667 | } | |
5e7a09be | 668 | } |
0433db19 DM |
669 | } |
670 | ||
976595e1 | 671 | impl Write for DynamicChunkWriter { |
0433db19 | 672 | fn write(&mut self, data: &[u8]) -> std::result::Result<usize, std::io::Error> { |
0433db19 DM |
673 | let chunker = &mut self.chunker; |
674 | ||
675 | let pos = chunker.scan(data); | |
676 | ||
677 | if pos > 0 { | |
4ee8f53d | 678 | self.chunk_buffer.extend_from_slice(&data[0..pos]); |
0433db19 DM |
679 | self.chunk_offset += pos; |
680 | ||
976595e1 | 681 | if let Err(err) = self.write_chunk_buffer() { |
f569acc5 WB |
682 | return Err(std::io::Error::new( |
683 | std::io::ErrorKind::Other, | |
684 | err.to_string(), | |
685 | )); | |
976595e1 | 686 | } |
5e7a09be | 687 | Ok(pos) |
0433db19 DM |
688 | } else { |
689 | self.chunk_offset += data.len(); | |
4ee8f53d | 690 | self.chunk_buffer.extend_from_slice(data); |
5e7a09be | 691 | Ok(data.len()) |
0433db19 DM |
692 | } |
693 | } | |
694 | ||
695 | fn flush(&mut self) -> std::result::Result<(), std::io::Error> { | |
f569acc5 WB |
696 | Err(std::io::Error::new( |
697 | std::io::ErrorKind::Other, | |
698 | "please use close() instead of flush()", | |
699 | )) | |
0433db19 DM |
700 | } |
701 | } |