2 use std
::io
::{Seek, SeekFrom}
;
3 use std
::convert
::TryInto
;
7 use super::chunk_stat
::*;
8 use super::chunk_store
::*;
11 use std
::io
::{Read, Write}
;
13 use std
::path
::{Path, PathBuf}
;
14 use std
::os
::unix
::io
::AsRawFd
;
16 use chrono
::{Local, TimeZone}
;
19 use super::read_chunk
::*;
21 /// Header format definition for fixed index files (`.fidx`)
23 pub struct FixedIndexHeader
{
27 /// Sha256 over the index ``SHA256(digest1||digest2||...)``
28 pub index_csum
: [u8; 32],
31 reserved
: [u8; 4016], // overall size is one page (4096 bytes)
34 // split image into fixed size chunks
36 pub struct FixedIndexReader
{
38 pub chunk_size
: usize,
44 pub index_csum
: [u8; 32],
47 // `index` is mmap()ed which cannot be thread-local so should be sendable
48 unsafe impl Send
for FixedIndexReader {}
49 unsafe impl Sync
for FixedIndexReader {}
51 impl Drop
for FixedIndexReader
{
54 if let Err(err
) = self.unmap() {
55 eprintln
!("Unable to unmap file - {}", err
);
60 impl FixedIndexReader
{
62 pub fn open(path
: &Path
) -> Result
<Self, Error
> {
66 .and_then(|file
| Self::new(file
))
67 .map_err(|err
| format_err
!("Unable to open fixed index {:?} - {}", path
, err
))
70 pub fn new(mut file
: std
::fs
::File
) -> Result
<Self, Error
> {
72 if let Err(err
) = nix
::fcntl
::flock(file
.as_raw_fd(), nix
::fcntl
::FlockArg
::LockSharedNonblock
) {
73 bail
!("unable to get shared lock - {}", err
);
76 file
.seek(SeekFrom
::Start(0))?
;
78 let header_size
= std
::mem
::size_of
::<FixedIndexHeader
>();
80 // todo: use static assertion when available in rust
81 if header_size
!= 4096 { bail!("got unexpected header size"); }
83 let mut buffer
= vec
![0u8; header_size
];
84 file
.read_exact(&mut buffer
)?
;
86 let header
= unsafe { &mut * (buffer.as_ptr() as *mut FixedIndexHeader) }
;
88 if header
.magic
!= super::FIXED_SIZED_CHUNK_INDEX_1_0
{
89 bail
!("got unknown magic number");
92 let size
= u64::from_le(header
.size
);
93 let ctime
= u64::from_le(header
.ctime
);
94 let chunk_size
= u64::from_le(header
.chunk_size
);
96 let index_length
= ((size
+ chunk_size
- 1)/chunk_size
) as usize;
97 let index_size
= index_length
*32;
99 let rawfd
= file
.as_raw_fd();
101 let stat
= match nix
::sys
::stat
::fstat(rawfd
) {
103 Err(err
) => bail
!("fstat failed - {}", err
),
106 let expected_index_size
= (stat
.st_size
as usize) - header_size
;
107 if index_size
!= expected_index_size
{
108 bail
!("got unexpected file size ({} != {})", index_size
, expected_index_size
);
111 let data
= unsafe { nix
::sys
::mman
::mmap(
112 std
::ptr
::null_mut(),
114 nix
::sys
::mman
::ProtFlags
::PROT_READ
,
115 nix
::sys
::mman
::MapFlags
::MAP_PRIVATE
,
117 header_size
as i64) }?
as *mut u8;
121 chunk_size
: chunk_size
as usize,
127 index_csum
: header
.index_csum
,
131 fn unmap(&mut self) -> Result
<(), Error
> {
133 if self.index
== std
::ptr
::null_mut() { return Ok(()); }
135 let index_size
= self.index_length
*32;
137 if let Err(err
) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, index_size) }
{
138 bail
!("unmap file failed - {}", err
);
141 self.index
= std
::ptr
::null_mut();
146 pub fn chunk_info(&self, pos
: usize) -> Result
<(u64, u64, [u8; 32]), Error
> {
148 if pos
>= self.index_length
{
149 bail
!("chunk index out of range");
151 let start
= (pos
* self.chunk_size
) as u64;
152 let mut end
= start
+ self.chunk_size
as u64;
158 let mut digest
= std
::mem
::MaybeUninit
::<[u8; 32]>::uninit();
160 std
::ptr
::copy_nonoverlapping(
161 self.index
.add(pos
*32),
162 (*digest
.as_mut_ptr()).as_mut_ptr(),
167 Ok((start
, end
, unsafe { digest.assume_init() }
))
171 fn chunk_digest(&self, pos
: usize) -> &[u8; 32] {
172 if pos
>= self.index_length
{
173 panic
!("chunk index out of range");
175 let slice
= unsafe { std::slice::from_raw_parts(self.index.add(pos*32), 32) }
;
176 slice
.try_into().unwrap()
180 fn chunk_end(&self, pos
: usize) -> u64 {
181 if pos
>= self.index_length
{
182 panic
!("chunk index out of range");
185 let end
= ((pos
+1) * self.chunk_size
) as u64;
193 /// Compute checksum and data size
194 pub fn compute_csum(&self) -> ([u8; 32], u64) {
196 let mut csum
= openssl
::sha
::Sha256
::new();
197 let mut chunk_end
= 0;
198 for pos
in 0..self.index_length
{
199 chunk_end
= ((pos
+1) * self.chunk_size
) as u64;
200 let digest
= self.chunk_digest(pos
);
201 csum
.update(&chunk_end
.to_le_bytes());
204 let csum
= csum
.finish();
209 pub fn print_info(&self) {
210 println
!("Size: {}", self.size
);
211 println
!("ChunkSize: {}", self.chunk_size
);
212 println
!("CTime: {}", Local
.timestamp(self.ctime
as i64, 0).format("%c"));
213 println
!("UUID: {:?}", self.uuid
);
217 impl IndexFile
for FixedIndexReader
{
218 fn index_count(&self) -> usize {
222 fn index_digest(&self, pos
: usize) -> Option
<&[u8; 32]> {
223 if pos
>= self.index_length
{
226 Some(unsafe { std::mem::transmute(self.index.add(pos*32)) }
)
230 fn index_bytes(&self) -> u64 {
235 pub struct FixedIndexWriter
{
236 store
: Arc
<ChunkStore
>,
238 _lock
: tools
::ProcessLockSharedGuard
,
240 tmp_filename
: PathBuf
,
249 // `index` is mmap()ed which cannot be thread-local so should be sendable
250 unsafe impl Send
for FixedIndexWriter {}
252 impl Drop
for FixedIndexWriter
{
255 let _
= std
::fs
::remove_file(&self.tmp_filename
); // ignore errors
256 if let Err(err
) = self.unmap() {
257 eprintln
!("Unable to unmap file {:?} - {}", self.tmp_filename
, err
);
262 impl FixedIndexWriter
{
264 pub fn create(store
: Arc
<ChunkStore
>, path
: &Path
, size
: usize, chunk_size
: usize) -> Result
<Self, Error
> {
266 let shared_lock
= store
.try_shared_lock()?
;
268 let full_path
= store
.relative_path(path
);
269 let mut tmp_path
= full_path
.clone();
270 tmp_path
.set_extension("tmp_fidx");
272 let mut file
= std
::fs
::OpenOptions
::new()
273 .create(true).truncate(true)
278 let header_size
= std
::mem
::size_of
::<FixedIndexHeader
>();
280 // todo: use static assertion when available in rust
281 if header_size
!= 4096 { panic!("got unexpected header size"); }
283 let ctime
= std
::time
::SystemTime
::now().duration_since(
284 std
::time
::SystemTime
::UNIX_EPOCH
)?
.as_secs();
286 let uuid
= Uuid
::new_v4();
288 let buffer
= vec
![0u8; header_size
];
289 let header
= unsafe { &mut * (buffer.as_ptr() as *mut FixedIndexHeader) }
;
291 header
.magic
= super::FIXED_SIZED_CHUNK_INDEX_1_0
;
292 header
.ctime
= u64::to_le(ctime
);
293 header
.size
= u64::to_le(size
as u64);
294 header
.chunk_size
= u64::to_le(chunk_size
as u64);
295 header
.uuid
= *uuid
.as_bytes();
297 header
.index_csum
= [0u8; 32];
299 file
.write_all(&buffer
)?
;
301 let index_length
= (size
+ chunk_size
- 1)/chunk_size
;
302 let index_size
= index_length
*32;
303 nix
::unistd
::ftruncate(file
.as_raw_fd(), (header_size
+ index_size
) as i64)?
;
305 let data
= unsafe { nix
::sys
::mman
::mmap(
306 std
::ptr
::null_mut(),
308 nix
::sys
::mman
::ProtFlags
::PROT_READ
| nix
::sys
::mman
::ProtFlags
::PROT_WRITE
,
309 nix
::sys
::mman
::MapFlags
::MAP_SHARED
,
311 header_size
as i64) }?
as *mut u8;
318 tmp_filename
: tmp_path
,
324 uuid
: *uuid
.as_bytes(),
328 pub fn index_length(&self) -> usize {
332 fn unmap(&mut self) -> Result
<(), Error
> {
334 if self.index
== std
::ptr
::null_mut() { return Ok(()); }
336 let index_size
= self.index_length
*32;
338 if let Err(err
) = unsafe { nix::sys::mman::munmap(self.index as *mut std::ffi::c_void, index_size) }
{
339 bail
!("unmap file {:?} failed - {}", self.tmp_filename
, err
);
342 self.index
= std
::ptr
::null_mut();
347 pub fn close(&mut self) -> Result
<[u8; 32], Error
> {
349 if self.index
== std
::ptr
::null_mut() { bail!("cannot close already closed index file."); }
351 let index_size
= self.index_length
*32;
352 let data
= unsafe { std::slice::from_raw_parts(self.index, index_size) }
;
353 let index_csum
= openssl
::sha
::sha256(data
);
357 let csum_offset
= proxmox
::tools
::offsetof!(FixedIndexHeader
, index_csum
);
358 self.file
.seek(SeekFrom
::Start(csum_offset
as u64))?
;
359 self.file
.write_all(&index_csum
)?
;
362 if let Err(err
) = std
::fs
::rename(&self.tmp_filename
, &self.filename
) {
363 bail
!("Atomic rename file {:?} failed - {}", self.filename
, err
);
369 pub fn check_chunk_alignment(&self, offset
: usize, chunk_len
: usize) -> Result
<usize, Error
> {
371 if offset
< chunk_len
{
372 bail
!("got chunk with small offset ({} < {}", offset
, chunk_len
);
375 let pos
= offset
- chunk_len
;
377 if offset
> self.size
{
378 bail
!("chunk data exceeds size ({} >= {})", offset
, self.size
);
381 // last chunk can be smaller
382 if ((offset
!= self.size
) && (chunk_len
!= self.chunk_size
)) ||
383 (chunk_len
> self.chunk_size
) || (chunk_len
== 0) {
384 bail
!("chunk with unexpected length ({} != {}", chunk_len
, self.chunk_size
);
387 if pos
& (self.chunk_size
-1) != 0 {
388 bail
!("got unaligned chunk (pos = {})", pos
);
391 Ok(pos
/ self.chunk_size
)
394 // Note: We want to add data out of order, so do not assume any order here.
395 pub fn add_chunk(&mut self, chunk_info
: &ChunkInfo
, stat
: &mut ChunkStat
) -> Result
<(), Error
> {
397 let chunk_len
= chunk_info
.chunk_len
as usize;
398 let offset
= chunk_info
.offset
as usize; // end of chunk
400 let idx
= self.check_chunk_alignment(offset
, chunk_len
)?
;
402 let (is_duplicate
, compressed_size
) = self.store
.insert_chunk(&chunk_info
.chunk
)?
;
404 stat
.chunk_count
+= 1;
405 stat
.compressed_size
+= compressed_size
;
407 let digest
= chunk_info
.chunk
.digest();
409 println
!("ADD CHUNK {} {} {}% {} {}", idx
, chunk_len
,
410 (compressed_size
*100)/(chunk_len
as u64), is_duplicate
, proxmox
::tools
::digest_to_hex(digest
));
413 stat
.duplicate_chunks
+= 1;
415 stat
.disk_size
+= compressed_size
;
418 self.add_digest(idx
, digest
)
421 pub fn add_digest(&mut self, index
: usize, digest
: &[u8; 32]) -> Result
<(), Error
> {
423 if index
>= self.index_length
{
424 bail
!("add digest failed - index out of range ({} >= {})", index
, self.index_length
);
427 if self.index
== std
::ptr
::null_mut() { bail!("cannot write to closed index file."); }
429 let index_pos
= index
*32;
431 let dst
= self.index
.add(index_pos
);
432 dst
.copy_from_nonoverlapping(digest
.as_ptr(), 32);
439 pub struct BufferedFixedReader
<S
> {
441 index
: FixedIndexReader
,
443 read_buffer
: Vec
<u8>,
444 buffered_chunk_idx
: usize,
445 buffered_chunk_start
: u64,
449 impl <S
: ReadChunk
> BufferedFixedReader
<S
> {
451 pub fn new(index
: FixedIndexReader
, store
: S
) -> Self {
453 let archive_size
= index
.size
;
457 archive_size
: archive_size
,
458 read_buffer
: Vec
::with_capacity(1024*1024),
459 buffered_chunk_idx
: 0,
460 buffered_chunk_start
: 0,
465 pub fn archive_size(&self) -> u64 { self.archive_size }
467 fn buffer_chunk(&mut self, idx
: usize) -> Result
<(), Error
> {
469 let index
= &self.index
;
470 let (start
, end
, digest
) = index
.chunk_info(idx
)?
;
474 let data
= self.store
.read_chunk(&digest
)?
;
476 if (end
- start
) != data
.len() as u64 {
477 bail
!("read chunk with wrong size ({} != {}", (end
- start
), data
.len());
480 self.read_buffer
.clear();
481 self.read_buffer
.extend_from_slice(&data
);
483 self.buffered_chunk_idx
= idx
;
485 self.buffered_chunk_start
= start
as u64;
486 //println!("BUFFER {} {}", self.buffered_chunk_start, end);
491 impl <S
: ReadChunk
> crate::tools
::BufferedRead
for BufferedFixedReader
<S
> {
493 fn buffered_read(&mut self, offset
: u64) -> Result
<&[u8], Error
> {
495 if offset
== self.archive_size { return Ok(&self.read_buffer[0..0]); }
497 let buffer_len
= self.read_buffer
.len();
498 let index
= &self.index
;
500 // optimization for sequential read
502 ((self.buffered_chunk_idx
+ 1) < index
.index_length
) &&
503 (offset
>= (self.buffered_chunk_start
+ (self.read_buffer
.len() as u64)))
505 let next_idx
= self.buffered_chunk_idx
+ 1;
506 let next_end
= index
.chunk_end(next_idx
);
507 if offset
< next_end
{
508 self.buffer_chunk(next_idx
)?
;
509 let buffer_offset
= (offset
- self.buffered_chunk_start
) as usize;
510 return Ok(&self.read_buffer
[buffer_offset
..]);
514 if (buffer_len
== 0) ||
515 (offset
< self.buffered_chunk_start
) ||
516 (offset
>= (self.buffered_chunk_start
+ (self.read_buffer
.len() as u64)))
518 let idx
= (offset
/ index
.chunk_size
as u64) as usize;
519 self.buffer_chunk(idx
)?
;
522 let buffer_offset
= (offset
- self.buffered_chunk_start
) as usize;
523 Ok(&self.read_buffer
[buffer_offset
..])
527 impl <S
: ReadChunk
> std
::io
::Read
for BufferedFixedReader
<S
> {
529 fn read(&mut self, buf
: &mut [u8]) -> Result
<usize, std
::io
::Error
> {
531 use std
::io
::{Error, ErrorKind}
;
532 use crate::tools
::BufferedRead
;
534 let data
= match self.buffered_read(self.read_offset
) {
536 Err(err
) => return Err(Error
::new(ErrorKind
::Other
, err
.to_string())),
539 let n
= if data
.len() > buf
.len() { buf.len() }
else { data.len() }
;
541 unsafe { std::ptr::copy_nonoverlapping(data.as_ptr(), buf.as_mut_ptr(), n); }
543 self.read_offset
+= n
as u64;
549 impl <S
: ReadChunk
> Seek
for BufferedFixedReader
<S
> {
551 fn seek(&mut self, pos
: SeekFrom
) -> Result
<u64, std
::io
::Error
> {
553 let new_offset
= match pos
{
554 SeekFrom
::Start(start_offset
) => start_offset
as i64,
555 SeekFrom
::End(end_offset
) => (self.archive_size
as i64)+ end_offset
,
556 SeekFrom
::Current(offset
) => (self.read_offset
as i64) + offset
,
559 use std
::io
::{Error, ErrorKind}
;
560 if (new_offset
< 0) || (new_offset
> (self.archive_size
as i64)) {
561 return Err(Error
::new(
563 format
!("seek is out of range {} ([0..{}])", new_offset
, self.archive_size
)));
565 self.read_offset
= new_offset
as u64;