]> git.proxmox.com Git - proxmox-backup.git/blame - src/backup/fixed_index.rs
drop src/storage/futures.rs
[proxmox-backup.git] / src / backup / fixed_index.rs
CommitLineData
606ce64b 1use failure::*;
afb4cd28 2use std::io::{Seek, SeekFrom};
606ce64b 3
22968600 4use crate::tools;
7bc1d727 5use super::IndexFile;
7e336555 6use super::chunk_stat::*;
606ce64b
DM
7use super::chunk_store::*;
8
150f1bd8 9use std::sync::Arc;
4818c8b6 10use std::io::{Read, Write};
10eea49d 11use std::fs::File;
606ce64b
DM
12use std::path::{Path, PathBuf};
13use std::os::unix::io::AsRawFd;
d13e3745 14use uuid::Uuid;
4818c8b6 15use chrono::{Local, TimeZone};
afb4cd28 16
f98ac774 17use super::ChunkInfo;
afb4cd28 18use super::read_chunk::*;
606ce64b 19
8e39232a 20/// Header format definition for fixed index files (`.fidx`)
d13e3745 21#[repr(C)]
91a905b6 22pub struct FixedIndexHeader {
a7dd4830 23 pub magic: [u8; 8],
d13e3745 24 pub uuid: [u8; 16],
5e5b7f1c 25 pub ctime: u64,
9335d74e
DM
26 /// Sha256 over the index ``SHA256(digest1||digest2||...)``
27 pub index_csum: [u8; 32],
a7dd4830
DM
28 pub size: u64,
29 pub chunk_size: u64,
30 reserved: [u8; 4016], // overall size is one page (4096 bytes)
d13e3745 31}
606ce64b
DM
32
33// split image into fixed size chunks
34
91a905b6 35pub struct FixedIndexReader {
10eea49d 36 _file: File,
29ae5c86 37 pub chunk_size: usize,
b46c3fad 38 pub size: u64,
e1225de4 39 index_length: usize,
4818c8b6 40 index: *mut u8,
9f49fe1d
DM
41 pub uuid: [u8; 16],
42 pub ctime: u64,
9335d74e 43 pub index_csum: [u8; 32],
4818c8b6
DM
44}
45
5be4065b
WB
46// `index` is mmap()ed which cannot be thread-local so should be sendable
47unsafe impl Send for FixedIndexReader {}
48
91a905b6 49impl Drop for FixedIndexReader {
4818c8b6
DM
50
51 fn drop(&mut self) {
52 if let Err(err) = self.unmap() {
a7c72ad9 53 eprintln!("Unable to unmap file - {}", err);
4818c8b6
DM
54 }
55 }
56}
57
91a905b6 58impl FixedIndexReader {
4818c8b6 59
a7c72ad9 60 pub fn open(path: &Path) -> Result<Self, Error> {
4818c8b6 61
a7c72ad9
DM
62 File::open(path)
63 .map_err(Error::from)
64 .and_then(|file| Self::new(file))
65 .map_err(|err| format_err!("Unable to open fixed index {:?} - {}", path, err))
66 }
4818c8b6 67
a7c72ad9 68 pub fn new(mut file: std::fs::File) -> Result<Self, Error> {
4818c8b6 69
c597a92c 70 if let Err(err) = nix::fcntl::flock(file.as_raw_fd(), nix::fcntl::FlockArg::LockSharedNonblock) {
a7c72ad9 71 bail!("unable to get shared lock - {}", err);
c597a92c
DM
72 }
73
afb4cd28
DM
74 file.seek(SeekFrom::Start(0))?;
75
91a905b6 76 let header_size = std::mem::size_of::<FixedIndexHeader>();
4818c8b6
DM
77
78 // todo: use static assertion when available in rust
a7c72ad9 79 if header_size != 4096 { bail!("got unexpected header size"); }
4818c8b6
DM
80
81 let mut buffer = vec![0u8; header_size];
82 file.read_exact(&mut buffer)?;
83
91a905b6 84 let header = unsafe { &mut * (buffer.as_ptr() as *mut FixedIndexHeader) };
4818c8b6 85
a7dd4830 86 if header.magic != super::FIXED_SIZED_CHUNK_INDEX_1_0 {
a7c72ad9 87 bail!("got unknown magic number");
a360f6fa
DM
88 }
89
b46c3fad 90 let size = u64::from_le(header.size);
48d0d356 91 let ctime = u64::from_le(header.ctime);
b46c3fad 92 let chunk_size = u64::from_le(header.chunk_size);
4818c8b6 93
b46c3fad 94 let index_length = ((size + chunk_size - 1)/chunk_size) as usize;
e1225de4 95 let index_size = index_length*32;
4818c8b6 96
0b8e75ed
DM
97 let rawfd = file.as_raw_fd();
98
99 let stat = match nix::sys::stat::fstat(rawfd) {
100 Ok(stat) => stat,
a7c72ad9 101 Err(err) => bail!("fstat failed - {}", err),
0b8e75ed
DM
102 };
103
ddbdf80d 104 let expected_index_size = (stat.st_size as usize) - header_size;
0b8e75ed 105 if index_size != expected_index_size {
a7c72ad9 106 bail!("got unexpected file size ({} != {})", index_size, expected_index_size);
0b8e75ed 107 }
4818c8b6
DM
108
109 let data = unsafe { nix::sys::mman::mmap(
110 std::ptr::null_mut(),
111 index_size,
112 nix::sys::mman::ProtFlags::PROT_READ,
113 nix::sys::mman::MapFlags::MAP_PRIVATE,
114 file.as_raw_fd(),
115 header_size as i64) }? as *mut u8;
116
117 Ok(Self {
10eea49d 118 _file: file,
b46c3fad 119 chunk_size: chunk_size as usize,
4818c8b6 120 size,
e1225de4 121 index_length,
4818c8b6
DM
122 index: data,
123 ctime,
124 uuid: header.uuid,
9335d74e 125 index_csum: header.index_csum,
4818c8b6
DM
126 })
127 }
128
129 fn unmap(&mut self) -> Result<(), Error> {
130
131 if self.index == std::ptr::null_mut() { return Ok(()); }
132
e1225de4 133 let index_size = self.index_length*32;
4818c8b6
DM
134
135 if let Err(err) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, index_size) } {
a7c72ad9 136 bail!("unmap file failed - {}", err);
4818c8b6
DM
137 }
138
139 self.index = std::ptr::null_mut();
140
141 Ok(())
142 }
143
afb4cd28
DM
144 pub fn chunk_info(&self, pos: usize) -> Result<(u64, u64, [u8; 32]), Error> {
145
146 if pos >= self.index_length {
147 bail!("chunk index out of range");
148 }
149 let start = (pos * self.chunk_size) as u64;
150 let mut end = start + self.chunk_size as u64;
151
152 if end > self.size {
153 end = self.size;
154 }
155
5e58e1bb
WB
156 let mut digest = std::mem::MaybeUninit::<[u8; 32]>::uninit();
157 unsafe {
158 std::ptr::copy_nonoverlapping(
159 self.index.add(pos*32),
160 (*digest.as_mut_ptr()).as_mut_ptr(),
161 32,
162 );
163 }
afb4cd28 164
5e58e1bb 165 Ok((start, end, unsafe { digest.assume_init() }))
afb4cd28
DM
166 }
167
168 #[inline]
169 fn chunk_end(&self, pos: usize) -> u64 {
170 if pos >= self.index_length {
171 panic!("chunk index out of range");
172 }
173
174 let end = ((pos+1) * self.chunk_size) as u64;
175 if end > self.size {
176 self.size
177 } else {
178 end
179 }
180 }
181
4818c8b6 182 pub fn print_info(&self) {
4818c8b6
DM
183 println!("Size: {}", self.size);
184 println!("ChunkSize: {}", self.chunk_size);
185 println!("CTime: {}", Local.timestamp(self.ctime as i64, 0).format("%c"));
186 println!("UUID: {:?}", self.uuid);
187 }
188}
189
7bc1d727
WB
190impl IndexFile for FixedIndexReader {
191 fn index_count(&self) -> usize {
e1225de4 192 self.index_length
7bc1d727
WB
193 }
194
195 fn index_digest(&self, pos: usize) -> Option<&[u8; 32]> {
e1225de4 196 if pos >= self.index_length {
7bc1d727
WB
197 None
198 } else {
199 Some(unsafe { std::mem::transmute(self.index.add(pos*32)) })
200 }
201 }
a660978c
DM
202
203 fn index_bytes(&self) -> u64 {
b46c3fad 204 self.size
a660978c 205 }
7bc1d727
WB
206}
207
91a905b6 208pub struct FixedIndexWriter {
150f1bd8 209 store: Arc<ChunkStore>,
9335d74e 210 file: File,
43b13033 211 _lock: tools::ProcessLockSharedGuard,
4fbb72a8
DM
212 filename: PathBuf,
213 tmp_filename: PathBuf,
606ce64b
DM
214 chunk_size: usize,
215 size: usize,
e1225de4 216 index_length: usize,
606ce64b 217 index: *mut u8,
9f49fe1d
DM
218 pub uuid: [u8; 16],
219 pub ctime: u64,
606ce64b
DM
220}
221
c3bb97e5
WB
222// `index` is mmap()ed which cannot be thread-local so should be sendable
223unsafe impl Send for FixedIndexWriter {}
224
91a905b6 225impl Drop for FixedIndexWriter {
4fbb72a8
DM
226
227 fn drop(&mut self) {
228 let _ = std::fs::remove_file(&self.tmp_filename); // ignore errors
229 if let Err(err) = self.unmap() {
0cd9d420 230 eprintln!("Unable to unmap file {:?} - {}", self.tmp_filename, err);
4fbb72a8
DM
231 }
232 }
233}
234
91a905b6 235impl FixedIndexWriter {
606ce64b 236
150f1bd8 237 pub fn create(store: Arc<ChunkStore>, path: &Path, size: usize, chunk_size: usize) -> Result<Self, Error> {
606ce64b 238
43b13033
DM
239 let shared_lock = store.try_shared_lock()?;
240
606ce64b 241 let full_path = store.relative_path(path);
4fbb72a8 242 let mut tmp_path = full_path.clone();
91a905b6 243 tmp_path.set_extension("tmp_fidx");
606ce64b
DM
244
245 let mut file = std::fs::OpenOptions::new()
d13e3745 246 .create(true).truncate(true)
606ce64b
DM
247 .read(true)
248 .write(true)
4fbb72a8 249 .open(&tmp_path)?;
606ce64b 250
91a905b6 251 let header_size = std::mem::size_of::<FixedIndexHeader>();
d13e3745
DM
252
253 // todo: use static assertion when available in rust
254 if header_size != 4096 { panic!("got unexpected header size"); }
255
256 let ctime = std::time::SystemTime::now().duration_since(
5e5b7f1c 257 std::time::SystemTime::UNIX_EPOCH)?.as_secs();
d13e3745
DM
258
259 let uuid = Uuid::new_v4();
260
0cd9d420 261 let buffer = vec![0u8; header_size];
91a905b6 262 let header = unsafe { &mut * (buffer.as_ptr() as *mut FixedIndexHeader) };
d13e3745 263
a7dd4830 264 header.magic = super::FIXED_SIZED_CHUNK_INDEX_1_0;
48d0d356
DM
265 header.ctime = u64::to_le(ctime);
266 header.size = u64::to_le(size as u64);
267 header.chunk_size = u64::to_le(chunk_size as u64);
d13e3745
DM
268 header.uuid = *uuid.as_bytes();
269
9335d74e
DM
270 header.index_csum = [0u8; 32];
271
5e5b7f1c 272 file.write_all(&buffer)?;
d13e3745 273
e1225de4
DM
274 let index_length = (size + chunk_size - 1)/chunk_size;
275 let index_size = index_length*32;
d13e3745
DM
276 nix::unistd::ftruncate(file.as_raw_fd(), (header_size + index_size) as i64)?;
277
606ce64b
DM
278 let data = unsafe { nix::sys::mman::mmap(
279 std::ptr::null_mut(),
280 index_size,
281 nix::sys::mman::ProtFlags::PROT_READ | nix::sys::mman::ProtFlags::PROT_WRITE,
282 nix::sys::mman::MapFlags::MAP_SHARED,
283 file.as_raw_fd(),
d13e3745
DM
284 header_size as i64) }? as *mut u8;
285
606ce64b
DM
286 Ok(Self {
287 store,
9335d74e 288 file,
43b13033 289 _lock: shared_lock,
4fbb72a8
DM
290 filename: full_path,
291 tmp_filename: tmp_path,
606ce64b
DM
292 chunk_size,
293 size,
e1225de4 294 index_length,
606ce64b 295 index: data,
d13e3745
DM
296 ctime,
297 uuid: *uuid.as_bytes(),
606ce64b
DM
298 })
299 }
300
006f3ff4
DM
301 pub fn index_length(&self) -> usize {
302 self.index_length
303 }
304
4fbb72a8
DM
305 fn unmap(&mut self) -> Result<(), Error> {
306
307 if self.index == std::ptr::null_mut() { return Ok(()); }
308
e1225de4 309 let index_size = self.index_length*32;
4fbb72a8
DM
310
311 if let Err(err) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, index_size) } {
0cd9d420 312 bail!("unmap file {:?} failed - {}", self.tmp_filename, err);
4fbb72a8
DM
313 }
314
315 self.index = std::ptr::null_mut();
316
317 Ok(())
318 }
319
9335d74e 320 pub fn close(&mut self) -> Result<[u8; 32], Error> {
4fbb72a8
DM
321
322 if self.index == std::ptr::null_mut() { bail!("cannot close already closed index file."); }
323
9335d74e
DM
324 let index_size = self.index_length*32;
325 let data = unsafe { std::slice::from_raw_parts(self.index, index_size) };
326 let index_csum = openssl::sha::sha256(data);
327
4fbb72a8
DM
328 self.unmap()?;
329
9335d74e 330 let csum_offset = proxmox::tools::offsetof!(FixedIndexHeader, index_csum);
afb4cd28 331 self.file.seek(SeekFrom::Start(csum_offset as u64))?;
9335d74e
DM
332 self.file.write_all(&index_csum)?;
333 self.file.flush()?;
334
4fbb72a8
DM
335 if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
336 bail!("Atomic rename file {:?} failed - {}", self.filename, err);
337 }
338
9335d74e 339 Ok(index_csum)
4fbb72a8
DM
340 }
341
5e04ec70 342 pub fn check_chunk_alignment(&self, offset: usize, chunk_len: usize) -> Result<usize, Error> {
f98ac774 343
5e04ec70
DM
344 if offset < chunk_len {
345 bail!("got chunk with small offset ({} < {}", offset, chunk_len);
f98ac774
DM
346 }
347
5e04ec70 348 let pos = offset - chunk_len;
606ce64b 349
5e04ec70
DM
350 if offset > self.size {
351 bail!("chunk data exceeds size ({} >= {})", offset, self.size);
606ce64b
DM
352 }
353
354 // last chunk can be smaller
5e04ec70 355 if ((offset != self.size) && (chunk_len != self.chunk_size)) ||
f98ac774 356 (chunk_len > self.chunk_size) || (chunk_len == 0) {
5e04ec70 357 bail!("chunk with unexpected length ({} != {}", chunk_len, self.chunk_size);
606ce64b
DM
358 }
359
5e04ec70
DM
360 if pos & (self.chunk_size-1) != 0 {
361 bail!("got unaligned chunk (pos = {})", pos);
f98ac774
DM
362 }
363
5e04ec70
DM
364 Ok(pos / self.chunk_size)
365 }
366
367 // Note: We want to add data out of order, so do not assume any order here.
368 pub fn add_chunk(&mut self, chunk_info: &ChunkInfo, stat: &mut ChunkStat) -> Result<(), Error> {
369
370 let chunk_len = chunk_info.chunk_len as usize;
371 let offset = chunk_info.offset as usize; // end of chunk
372
373 let idx = self.check_chunk_alignment(offset, chunk_len)?;
374
f98ac774 375 let (is_duplicate, compressed_size) = self.store.insert_chunk(&chunk_info.chunk)?;
798f7fa0 376
cb0708dd
DM
377 stat.chunk_count += 1;
378 stat.compressed_size += compressed_size;
606ce64b 379
f98ac774
DM
380 let digest = chunk_info.chunk.digest();
381
5e04ec70 382 println!("ADD CHUNK {} {} {}% {} {}", idx, chunk_len,
bffd40d6 383 (compressed_size*100)/(chunk_len as u64), is_duplicate, proxmox::tools::digest_to_hex(digest));
798f7fa0
DM
384
385 if is_duplicate {
cb0708dd 386 stat.duplicate_chunks += 1;
798f7fa0 387 } else {
cb0708dd 388 stat.disk_size += compressed_size;
798f7fa0 389 }
606ce64b 390
5e04ec70 391 self.add_digest(idx, digest)
e3062f87
WB
392 }
393
394 pub fn add_digest(&mut self, index: usize, digest: &[u8; 32]) -> Result<(), Error> {
01af11f3 395
fc14b849
DM
396 if index >= self.index_length {
397 bail!("add digest failed - index out of range ({} >= {})", index, self.index_length);
398 }
399
01af11f3
DM
400 if self.index == std::ptr::null_mut() { bail!("cannot write to closed index file."); }
401
e3062f87 402 let index_pos = index*32;
606ce64b
DM
403 unsafe {
404 let dst = self.index.add(index_pos);
405 dst.copy_from_nonoverlapping(digest.as_ptr(), 32);
406 }
407
408 Ok(())
409 }
410}
afb4cd28
DM
411
412pub struct BufferedFixedReader<S> {
413 store: S,
414 index: FixedIndexReader,
415 archive_size: u64,
416 read_buffer: Vec<u8>,
417 buffered_chunk_idx: usize,
418 buffered_chunk_start: u64,
419 read_offset: u64,
420}
421
422impl <S: ReadChunk> BufferedFixedReader<S> {
423
424 pub fn new(index: FixedIndexReader, store: S) -> Self {
425
426 let archive_size = index.size;
427 Self {
428 store,
429 index: index,
430 archive_size: archive_size,
431 read_buffer: Vec::with_capacity(1024*1024),
432 buffered_chunk_idx: 0,
433 buffered_chunk_start: 0,
434 read_offset: 0,
435 }
436 }
437
438 pub fn archive_size(&self) -> u64 { self.archive_size }
439
440 fn buffer_chunk(&mut self, idx: usize) -> Result<(), Error> {
441
442 let index = &self.index;
443 let (start, end, digest) = index.chunk_info(idx)?;
444
445 // fixme: avoid copy
446
447 let data = self.store.read_chunk(&digest)?;
448
449 if (end - start) != data.len() as u64 {
450 bail!("read chunk with wrong size ({} != {}", (end - start), data.len());
451 }
452
453 self.read_buffer.clear();
454 self.read_buffer.extend_from_slice(&data);
455
456 self.buffered_chunk_idx = idx;
457
458 self.buffered_chunk_start = start as u64;
459 //println!("BUFFER {} {}", self.buffered_chunk_start, end);
460 Ok(())
461 }
462}
463
464impl <S: ReadChunk> crate::tools::BufferedRead for BufferedFixedReader<S> {
465
466 fn buffered_read(&mut self, offset: u64) -> Result<&[u8], Error> {
467
468 if offset == self.archive_size { return Ok(&self.read_buffer[0..0]); }
469
470 let buffer_len = self.read_buffer.len();
471 let index = &self.index;
472
473 // optimization for sequential read
474 if buffer_len > 0 &&
475 ((self.buffered_chunk_idx + 1) < index.index_length) &&
476 (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64)))
477 {
478 let next_idx = self.buffered_chunk_idx + 1;
479 let next_end = index.chunk_end(next_idx);
480 if offset < next_end {
481 self.buffer_chunk(next_idx)?;
482 let buffer_offset = (offset - self.buffered_chunk_start) as usize;
483 return Ok(&self.read_buffer[buffer_offset..]);
484 }
485 }
486
487 if (buffer_len == 0) ||
488 (offset < self.buffered_chunk_start) ||
489 (offset >= (self.buffered_chunk_start + (self.read_buffer.len() as u64)))
490 {
491 let idx = (offset / index.chunk_size as u64) as usize;
492 self.buffer_chunk(idx)?;
493 }
494
495 let buffer_offset = (offset - self.buffered_chunk_start) as usize;
496 Ok(&self.read_buffer[buffer_offset..])
497 }
498}
499
500impl <S: ReadChunk> std::io::Read for BufferedFixedReader<S> {
501
502 fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
503
504 use std::io::{Error, ErrorKind};
505 use crate::tools::BufferedRead;
506
507 let data = match self.buffered_read(self.read_offset) {
508 Ok(v) => v,
509 Err(err) => return Err(Error::new(ErrorKind::Other, err.to_string())),
510 };
511
512 let n = if data.len() > buf.len() { buf.len() } else { data.len() };
513
514 unsafe { std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr(), n); }
515
516 self.read_offset += n as u64;
517
518 return Ok(n);
519 }
520}
521
522impl <S: ReadChunk> Seek for BufferedFixedReader<S> {
523
524 fn seek(&mut self, pos: SeekFrom) -> Result<u64, std::io::Error> {
525
526 let new_offset = match pos {
527 SeekFrom::Start(start_offset) => start_offset as i64,
528 SeekFrom::End(end_offset) => (self.archive_size as i64)+ end_offset,
529 SeekFrom::Current(offset) => (self.read_offset as i64) + offset,
530 };
531
532 use std::io::{Error, ErrorKind};
533 if (new_offset < 0) || (new_offset > (self.archive_size as i64)) {
534 return Err(Error::new(
535 ErrorKind::Other,
536 format!("seek is out of range {} ([0..{}])", new_offset, self.archive_size)));
537 }
538 self.read_offset = new_offset as u64;
539
540 Ok(self.read_offset)
541 }
542}