So that we can generate better statistics ..
Ok(())
}
- pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32]), Error> {
+ pub fn insert_chunk(&self, chunk: &[u8]) -> Result<(bool, [u8; 32], u64), Error> {
// fixme: use Sha512/256 when available
let mut hasher = sha::Sha256::new();
if let Ok(metadata) = std::fs::metadata(&chunk_path) {
if metadata.is_file() {
- return Ok((true, digest));
+ return Ok((true, digest, metadata.len()));
} else {
bail!("Got unexpected file type on store '{}' for chunk {}", self.name, digest_str);
}
let mut encoder = lz4::EncoderBuilder::new().level(1).build(f)?;
encoder.write_all(chunk)?;
- let (_, encode_result) = encoder.finish();
+ let (f, encode_result) = encoder.finish();
encode_result?;
if let Err(err) = std::fs::rename(&tmp_path, &chunk_path) {
);
}
+ // fixme: is there a better way to get the compressed size?
+ let stat = nix::sys::stat::fstat(f.as_raw_fd())?;
+ let compressed_size = stat.st_size as u64;
+
//println!("PATH {:?}", chunk_path);
drop(lock);
- Ok((false, digest))
+ Ok((false, digest, compressed_size))
}
pub fn relative_path(&self, path: &Path) -> PathBuf {
assert!(chunk_store.is_err());
let chunk_store = ChunkStore::create("test", &path).unwrap();
- let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
+ let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
assert!(!exists);
- let (exists, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
+ let (exists, _, _) = chunk_store.insert_chunk(&[0u8, 1u8]).unwrap();
assert!(exists);
pub uuid: [u8; 16],
pub ctime: u64,
+ compressed_size: u64,
+ disk_size: u64,
chunk_count: usize,
chunk_offset: usize,
last_chunk: usize,
ctime,
uuid: *uuid.as_bytes(),
+ compressed_size: 0,
+ disk_size: 0,
chunk_count: 0,
chunk_offset: 0,
last_chunk: 0,
self.writer.flush()?;
- let avg = ((self.chunk_offset as f64)/(self.chunk_count as f64)) as usize;
- println!("Average chunk size {}", avg);
+ let size = self.chunk_offset;
+ let avg = ((size as f64)/(self.chunk_count as f64)) as usize;
+ let compression = (self.compressed_size*100)/(size as u64);
+ let rate = (self.disk_size*100)/(size as u64);
+ println!("Size: {}, average chunk size: {}, compression rate: {}%, disk_size: {} ({}%)",
+ size, avg, compression, self.disk_size, rate);
// fixme:
if let Err(err) = std::fs::rename(&self.tmp_filename, &self.filename) {
self.last_chunk = self.chunk_offset;
match self.store.insert_chunk(&self.chunk_buffer) {
- Ok((is_duplicate, digest)) => {
- println!("ADD CHUNK {:016x} {} {} {}", self.chunk_offset, chunk_size, is_duplicate, tools::digest_to_hex(&digest));
+ Ok((is_duplicate, digest, compressed_size)) => {
+
+ self.compressed_size += compressed_size;
+ if is_duplicate {
+ } else {
+ self.disk_size += compressed_size;
+ }
+
+ println!("ADD CHUNK {:016x} {} {}% {} {}", self.chunk_offset, chunk_size,
+ (compressed_size*100)/(chunk_size as u64), is_duplicate, tools::digest_to_hex(&digest));
self.writer.write(unsafe { &std::mem::transmute::<u64, [u8;8]>(self.chunk_offset as u64) })?;
self.writer.write(&digest)?;
self.chunk_buffer.truncate(0);
tmp_filename: PathBuf,
chunk_size: usize,
duplicate_chunks: usize,
+ disk_size: u64,
size: usize,
+ compressed_size: u64,
index: *mut u8,
pub uuid: [u8; 16],
pub ctime: u64,
chunk_size,
duplicate_chunks: 0,
size,
+ compressed_size: 0,
+ disk_size: 0,
index: data,
ctime,
uuid: *uuid.as_bytes(),
self.index = std::ptr::null_mut();
- println!("Original size: {} Compressed size: {} Deduplicated size: {}",
- self.size, self.size, self.size - (self.duplicate_chunks*self.chunk_size));
+ let compression = (self.compressed_size*100)/(self.size as u64);
+ let rate = (self.disk_size*100)/(self.size as u64);
+
+ println!("Original size: {}, compression rate: {}%, deduplicated size: {}, disk size: {} ({}%)",
+ self.size, compression, self.size - (self.duplicate_chunks*self.chunk_size), self.disk_size, rate);
Ok(())
}
if pos & (self.chunk_size-1) != 0 { bail!("add unaligned chunk (pos = {})", pos); }
- let (is_duplicate, digest) = self.store.insert_chunk(chunk)?;
+ let (is_duplicate, digest, compressed_size) = self.store.insert_chunk(chunk)?;
+
+ self.compressed_size += compressed_size;
- println!("ADD CHUNK {} {} {} {}", pos, chunk.len(), is_duplicate, tools::digest_to_hex(&digest));
+ println!("ADD CHUNK {} {} {}% {} {}", pos, chunk.len(),
+ (compressed_size*100)/(chunk.len() as u64), is_duplicate, tools::digest_to_hex(&digest));
+
+ if is_duplicate {
+ self.duplicate_chunks += 1;
+ } else {
+ self.disk_size += compressed_size;
+ }
- if is_duplicate { self.duplicate_chunks += 1; }
-
let index_pos = (pos/self.chunk_size)*32;
unsafe {
let dst = self.index.add(index_pos);