]> git.proxmox.com Git - proxmox-backup.git/blob - src/backup/fixed_index.rs
src/backup/fixed_index.rs: new helper to compute checksum and file size
[proxmox-backup.git] / src / backup / fixed_index.rs
1 use failure::*;
2 use std::io::{Seek, SeekFrom};
3 use std::convert::TryInto;
4
5 use crate::tools;
6 use super::IndexFile;
7 use super::chunk_stat::*;
8 use super::chunk_store::*;
9
10 use std::sync::Arc;
11 use std::io::{Read, Write};
12 use std::fs::File;
13 use std::path::{Path, PathBuf};
14 use std::os::unix::io::AsRawFd;
15 use uuid::Uuid;
16 use chrono::{Local, TimeZone};
17
18 use super::ChunkInfo;
19 use super::read_chunk::*;
20
21 /// Header format definition for fixed index files (`.fidx`)
22 #[repr(C)]
23 pub struct FixedIndexHeader {
24 pub magic: [u8; 8],
25 pub uuid: [u8; 16],
26 pub ctime: u64,
27 /// Sha256 over the index ``SHA256(digest1||digest2||...)``
28 pub index_csum: [u8; 32],
29 pub size: u64,
30 pub chunk_size: u64,
31 reserved: [u8; 4016], // overall size is one page (4096 bytes)
32 }
33
34 // split image into fixed size chunks
35
36 pub struct FixedIndexReader {
37 _file: File,
38 pub chunk_size: usize,
39 pub size: u64,
40 index_length: usize,
41 index: *mut u8,
42 pub uuid: [u8; 16],
43 pub ctime: u64,
44 pub index_csum: [u8; 32],
45 }
46
47 // `index` is mmap()ed which cannot be thread-local so should be sendable
48 unsafe impl Send for FixedIndexReader {}
49 unsafe impl Sync for FixedIndexReader {}
50
51 impl Drop for FixedIndexReader {
52
53 fn drop(&mut self) {
54 if let Err(err) = self.unmap() {
55 eprintln!("Unable to unmap file - {}", err);
56 }
57 }
58 }
59
60 impl FixedIndexReader {
61
62 pub fn open(path: &Path) -> Result<Self, Error> {
63
64 File::open(path)
65 .map_err(Error::from)
66 .and_then(|file| Self::new(file))
67 .map_err(|err| format_err!("Unable to open fixed index {:?} - {}", path, err))
68 }
69
70 pub fn new(mut file: std::fs::File) -> Result<Self, Error> {
71
72 if let Err(err) = nix::fcntl::flock(file.as_raw_fd(), nix::fcntl::FlockArg::LockSharedNonblock) {
73 bail!("unable to get shared lock - {}", err);
74 }
75
76 file.seek(SeekFrom::Start(0))?;
77
78 let header_size = std::mem::size_of::<FixedIndexHeader>();
79
80 // todo: use static assertion when available in rust
81 if header_size != 4096 { bail!("got unexpected header size"); }
82
83 let mut buffer = vec![0u8; header_size];
84 file.read_exact(&mut buffer)?;
85
86 let header = unsafe { &mut * (buffer.as_ptr() as *mut FixedIndexHeader) };
87
88 if header.magic != super::FIXED_SIZED_CHUNK_INDEX_1_0 {
89 bail!("got unknown magic number");
90 }
91
92 let size = u64::from_le(header.size);
93 let ctime = u64::from_le(header.ctime);
94 let chunk_size = u64::from_le(header.chunk_size);
95
96 let index_length = ((size + chunk_size - 1)/chunk_size) as usize;
97 let index_size = index_length*32;
98
99 let rawfd = file.as_raw_fd();
100
101 let stat = match nix::sys::stat::fstat(rawfd) {
102 Ok(stat) => stat,
103 Err(err) => bail!("fstat failed - {}", err),
104 };
105
106 let expected_index_size = (stat.st_size as usize) - header_size;
107 if index_size != expected_index_size {
108 bail!("got unexpected file size ({} != {})", index_size, expected_index_size);
109 }
110
111 let data = unsafe { nix::sys::mman::mmap(
112 std::ptr::null_mut(),
113 index_size,
114 nix::sys::mman::ProtFlags::PROT_READ,
115 nix::sys::mman::MapFlags::MAP_PRIVATE,
116 file.as_raw_fd(),
117 header_size as i64) }? as *mut u8;
118
119 Ok(Self {
120 _file: file,
121 chunk_size: chunk_size as usize,
122 size,
123 index_length,
124 index: data,
125 ctime,
126 uuid: header.uuid,
127 index_csum: header.index_csum,
128 })
129 }
130
131 fn unmap(&mut self) -> Result<(), Error> {
132
133 if self.index == std::ptr::null_mut() { return Ok(()); }
134
135 let index_size = self.index_length*32;
136
137 if let Err(err) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, index_size) } {
138 bail!("unmap file failed - {}", err);
139 }
140
141 self.index = std::ptr::null_mut();
142
143 Ok(())
144 }
145
146 pub fn chunk_info(&self, pos: usize) -> Result<(u64, u64, [u8; 32]), Error> {
147
148 if pos >= self.index_length {
149 bail!("chunk index out of range");
150 }
151 let start = (pos * self.chunk_size) as u64;
152 let mut end = start + self.chunk_size as u64;
153
154 if end > self.size {
155 end = self.size;
156 }
157
158 let mut digest = std::mem::MaybeUninit::<[u8; 32]>::uninit();
159 unsafe {
160 std::ptr::copy_nonoverlapping(
161 self.index.add(pos*32),
162 (*digest.as_mut_ptr()).as_mut_ptr(),
163 32,
164 );
165 }
166
167 Ok((start, end, unsafe { digest.assume_init() }))
168 }
169
170 #[inline]
171 fn chunk_digest(&self, pos: usize) -> &[u8; 32] {
172 if pos >= self.index_length {
173 panic!("chunk index out of range");
174 }
175 let slice = unsafe { std::slice::from_raw_parts(self.index.add(pos*32), 32) };
176 slice.try_into().unwrap()
177 }
178
179 #[inline]
180 fn chunk_end(&self, pos: usize) -> u64 {
181 if pos >= self.index_length {
182 panic!("chunk index out of range");
183 }
184
185 let end = ((pos+1) * self.chunk_size) as u64;
186 if end > self.size {
187 self.size
188 } else {
189 end
190 }
191 }
192
193 /// Compute checksum and data size
194 pub fn compute_csum(&self) -> ([u8; 32], u64) {
195
196 let mut csum = openssl::sha::Sha256::new();
197 let mut chunk_end = 0;
198 for pos in 0..self.index_length {
199 chunk_end = ((pos+1) * self.chunk_size) as u64;
200 let digest = self.chunk_digest(pos);
201 csum.update(&chunk_end.to_le_bytes());
202 csum.update(digest);
203 }
204 let csum = csum.finish();
205
206 (csum, chunk_end)
207 }
208
209 pub fn print_info(&self) {
210 println!("Size: {}", self.size);
211 println!("ChunkSize: {}", self.chunk_size);
212 println!("CTime: {}", Local.timestamp(self.ctime as i64, 0).format("%c"));
213 println!("UUID: {:?}", self.uuid);
214 }
215 }
216
217 impl IndexFile for FixedIndexReader {
218 fn index_count(&self) -> usize {
219 self.index_length
220 }
221
222 fn index_digest(&self, pos: usize) -> Option<&[u8; 32]> {
223 if pos >= self.index_length {
224 None
225 } else {
226 Some(unsafe { std::mem::transmute(self.index.add(pos*32)) })
227 }
228 }
229
230 fn index_bytes(&self) -> u64 {
231 self.size
232 }
233 }
234
235 pub struct FixedIndexWriter {
236 store: Arc<ChunkStore>,
237 file: File,
238 _lock: tools::ProcessLockSharedGuard,
239 filename: PathBuf,
240 tmp_filename: PathBuf,
241 chunk_size: usize,
242 size: usize,
243 index_length: usize,
244 index: *mut u8,
245 pub uuid: [u8; 16],
246 pub ctime: u64,
247 }
248
249 // `index` is mmap()ed which cannot be thread-local so should be sendable
250 unsafe impl Send for FixedIndexWriter {}
251
252 impl Drop for FixedIndexWriter {
253
254 fn drop(&mut self) {
255 let _ = std::fs::remove_file(&self.tmp_filename); // ignore errors
256 if let Err(err) = self.unmap() {
257 eprintln!("Unable to unmap file {:?} - {}", self.tmp_filename, err);
258 }
259 }
260 }
261
262 impl FixedIndexWriter {
263
264 pub fn create(store: Arc<ChunkStore>, path: &Path, size: usize, chunk_size: usize) -> Result<Self, Error> {
265
266 let shared_lock = store.try_shared_lock()?;
267
268 let full_path = store.relative_path(path);
269 let mut tmp_path = full_path.clone();
270 tmp_path.set_extension("tmp_fidx");
271
272 let mut file = std::fs::OpenOptions::new()
273 .create(true).truncate(true)
274 .read(true)
275 .write(true)
276 .open(&tmp_path)?;
277
278 let header_size = std::mem::size_of::<FixedIndexHeader>();
279
280 // todo: use static assertion when available in rust
281 if header_size != 4096 { panic!("got unexpected header size"); }
282
283 let ctime = std::time::SystemTime::now().duration_since(
284 std::time::SystemTime::UNIX_EPOCH)?.as_secs();
285
286 let uuid = Uuid::new_v4();
287
288 let buffer = vec![0u8; header_size];
289 let header = unsafe { &mut * (buffer.as_ptr() as *mut FixedIndexHeader) };
290
291 header.magic = super::FIXED_SIZED_CHUNK_INDEX_1_0;
292 header.ctime = u64::to_le(ctime);
293 header.size = u64::to_le(size as u64);
294 header.chunk_size = u64::to_le(chunk_size as u64);
295 header.uuid = *uuid.as_bytes();
296
297 header.index_csum = [0u8; 32];
298
299 file.write_all(&buffer)?;
300
301 let index_length = (size + chunk_size - 1)/chunk_size;
302 let index_size = index_length*32;
303 nix::unistd::ftruncate(file.as_raw_fd(), (header_size + index_size) as i64)?;
304
305 let data = unsafe { nix::sys::mman::mmap(
306 std::ptr::null_mut(),
307 index_size,
308 nix::sys::mman::ProtFlags::PROT_READ | nix::sys::mman::ProtFlags::PROT_WRITE,
309 nix::sys::mman::MapFlags::MAP_SHARED,
310 file.as_raw_fd(),
311 header_size as i64) }? as *mut u8;
312
313 Ok(Self {
314 store,
315 file,
316 _lock: shared_lock,
317 filename: full_path,
318 tmp_filename: tmp_path,
319 chunk_size,
320 size,
321 index_length,
322 index: data,
323 ctime,
324 uuid: *uuid.as_bytes(),
325 })
326 }
327
328 pub fn index_length(&self) -> usize {
329 self.index_length
330 }
331
332 fn unmap(&mut self) -> Result<(), Error> {
333
334 if self.index == std::ptr::null_mut() { return Ok(()); }
335
336 let index_size = self.index_length*32;
337
338 if let Err(err) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, index_size) } {
339 bail!("unmap file {:?} failed - {}", self.tmp_filename, err);
340 }
341
342 self.index = std::ptr::null_mut();
343
344 Ok(())
345 }
346
347 pub fn close(&mut self) -> Result<[u8; 32], Error> {
348
349 if self.index == std::ptr::null_mut() { bail!("cannot close already closed index file."); }
350
351 let index_size = self.index_length*32;
352 let data = unsafe { std::slice::from_raw_parts(self.index, index_size) };
353 let index_csum = openssl::sha::sha256(data);
354
355 self.unmap()?;
356
357 let csum_offset = proxmox::tools::offsetof!(FixedIndexHeader, index_csum);
358 self.file.seek(SeekFrom::Start(csum_offset as u64))?;
359 self.file.write_all(&index_csum)?;
360 self.file.flush()?;
361
362 if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
363 bail!("Atomic rename file {:?} failed - {}", self.filename, err);
364 }
365
366 Ok(index_csum)
367 }
368
369 pub fn check_chunk_alignment(&self, offset: usize, chunk_len: usize) -> Result<usize, Error> {
370
371 if offset < chunk_len {
372 bail!("got chunk with small offset ({} < {}", offset, chunk_len);
373 }
374
375 let pos = offset - chunk_len;
376
377 if offset > self.size {
378 bail!("chunk data exceeds size ({} >= {})", offset, self.size);
379 }
380
381 // last chunk can be smaller
382 if ((offset != self.size) && (chunk_len != self.chunk_size)) ||
383 (chunk_len > self.chunk_size) || (chunk_len == 0) {
384 bail!("chunk with unexpected length ({} != {}", chunk_len, self.chunk_size);
385 }
386
387 if pos & (self.chunk_size-1) != 0 {
388 bail!("got unaligned chunk (pos = {})", pos);
389 }
390
391 Ok(pos / self.chunk_size)
392 }
393
394 // Note: We want to add data out of order, so do not assume any order here.
395 pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
396
397 let chunk_len = chunk_info.chunk_len as usize;
398 let offset = chunk_info.offset as usize; // end of chunk
399
400 let idx = self.check_chunk_alignment(offset, chunk_len)?;
401
402 let (is_duplicate, compressed_size) = self.store.insert_chunk(&chunk_info.chunk)?;
403
404 stat.chunk_count += 1;
405 stat.compressed_size += compressed_size;
406
407 let digest = chunk_info.chunk.digest();
408
409 println!("ADD CHUNK {} {} {}% {} {}", idx, chunk_len,
410 (compressed_size*100)/(chunk_len as u64), is_duplicate, proxmox::tools::digest_to_hex(digest));
411
412 if is_duplicate {
413 stat.duplicate_chunks += 1;
414 } else {
415 stat.disk_size += compressed_size;
416 }
417
418 self.add_digest(idx, digest)
419 }
420
421 pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
422
423 if index >= self.index_length {
424 bail!("add digest failed - index out of range ({} >= {})", index, self.index_length);
425 }
426
427 if self.index == std::ptr::null_mut() { bail!("cannot write to closed index file."); }
428
429 let index_pos = index*32;
430 unsafe {
431 let dst = self.index.add(index_pos);
432 dst.copy_from_nonoverlapping(digest.as_ptr(), 32);
433 }
434
435 Ok(())
436 }
437 }
438
439 pub struct BufferedFixedReader<S> {
440 store: S,
441 index: FixedIndexReader,
442 archive_size: u64,
443 read_buffer: Vec<u8>,
444 buffered_chunk_idx: usize,
445 buffered_chunk_start: u64,
446 read_offset: u64,
447 }
448
449 impl <S: ReadChunk> BufferedFixedReader<S> {
450
451 pub fn new(index: FixedIndexReader, store: S) -> Self {
452
453 let archive_size = index.size;
454 Self {
455 store,
456 index: index,
457 archive_size: archive_size,
458 read_buffer: Vec::with_capacity(1024*1024),
459 buffered_chunk_idx: 0,
460 buffered_chunk_start: 0,
461 read_offset: 0,
462 }
463 }
464
465 pub fn archive_size(&self) -> u64 { self.archive_size }
466
467 fn buffer_chunk(&mut self, idx: usize) -> Result<(), Error> {
468
469 let index = &self.index;
470 let (start, end, digest) = index.chunk_info(idx)?;
471
472 // fixme: avoid copy
473
474 let data = self.store.read_chunk(&digest)?;
475
476 if (end - start) != data.len() as u64 {
477 bail!("read chunk with wrong size ({} != {}", (end - start), data.len());
478 }
479
480 self.read_buffer.clear();
481 self.read_buffer.extend_from_slice(&data);
482
483 self.buffered_chunk_idx = idx;
484
485 self.buffered_chunk_start = start as u64;
486 //println!("BUFFER {} {}", self.buffered_chunk_start, end);
487 Ok(())
488 }
489 }
490
491 impl <S: ReadChunk> crate::tools::BufferedRead for BufferedFixedReader<S> {
492
493 fn buffered_read(&mut self, offset: u64) -> Result<&[u8], Error> {
494
495 if offset == self.archive_size { return Ok(&self.read_buffer[0..0]); }
496
497 let buffer_len = self.read_buffer.len();
498 let index = &self.index;
499
500 // optimization for sequential read
501 if buffer_len > 0 &&
502 ((self.buffered_chunk_idx + 1) < index.index_length) &&
503 (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64)))
504 {
505 let next_idx = self.buffered_chunk_idx + 1;
506 let next_end = index.chunk_end(next_idx);
507 if offset < next_end {
508 self.buffer_chunk(next_idx)?;
509 let buffer_offset = (offset - self.buffered_chunk_start) as usize;
510 return Ok(&self.read_buffer[buffer_offset..]);
511 }
512 }
513
514 if (buffer_len == 0) ||
515 (offset < self.buffered_chunk_start) ||
516 (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64)))
517 {
518 let idx = (offset / index.chunk_size as u64) as usize;
519 self.buffer_chunk(idx)?;
520 }
521
522 let buffer_offset = (offset - self.buffered_chunk_start) as usize;
523 Ok(&self.read_buffer[buffer_offset..])
524 }
525 }
526
527 impl <S: ReadChunk> std::io::Read for BufferedFixedReader<S> {
528
529 fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
530
531 use std::io::{Error, ErrorKind};
532 use crate::tools::BufferedRead;
533
534 let data = match self.buffered_read(self.read_offset) {
535 Ok(v) => v,
536 Err(err) => return Err(Error::new(ErrorKind::Other, err.to_string())),
537 };
538
539 let n = if data.len() > buf.len() { buf.len() } else { data.len() };
540
541 unsafe { std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr(), n); }
542
543 self.read_offset += n as u64;
544
545 return Ok(n);
546 }
547 }
548
549 impl <S: ReadChunk> Seek for BufferedFixedReader<S> {
550
551 fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
552
553 let new_offset = match pos {
554 SeekFrom::Start(start_offset) => start_offset as i64,
555 SeekFrom::End(end_offset) => (self.archive_size as i64)+ end_offset,
556 SeekFrom::Current(offset) => (self.read_offset as i64) + offset,
557 };
558
559 use std::io::{Error, ErrorKind};
560 if (new_offset < 0) || (new_offset > (self.archive_size as i64)) {
561 return Err(Error::new(
562 ErrorKind::Other,
563 format!("seek is out of range {} ([0..{}])", new_offset, self.archive_size)));
564 }
565 self.read_offset = new_offset as u64;
566
567 Ok(self.read_offset)
568 }
569 }