-use failure::*;
-use futures::*;
+use std::collections::HashMap;
+use std::ops::Range;
+use std::pin::Pin;
+use std::task::{Context, Poll};
+
use bytes::{Bytes, BytesMut};
+use anyhow::{format_err, Error};
+use futures::*;
+
+pub struct ChunkReadInfo {
+ pub range: Range<u64>,
+ pub digest: [u8; 32],
+}
+
+impl ChunkReadInfo {
+ #[inline]
+ pub fn size(&self) -> u64 {
+ self.range.end - self.range.start
+ }
+}
/// Trait to get digest list from index files
///
/// To allow easy iteration over all used chunks.
-pub trait IndexFile: Send {
+pub trait IndexFile {
fn index_count(&self) -> usize;
fn index_digest(&self, pos: usize) -> Option<&[u8; 32]>;
fn index_bytes(&self) -> u64;
+
+ /// Returns most often used chunks
+ fn find_most_used_chunks(&self, max: usize) -> HashMap<[u8; 32], usize> {
+ let mut map = HashMap::new();
+
+ for pos in 0..self.index_count() {
+ let digest = self.index_digest(pos).unwrap();
+
+ let count = map.entry(*digest).or_insert(0);
+ *count += 1;
+ }
+
+ let mut most_used = Vec::new();
+
+ for (digest, count) in map {
+ if count <= 1 { continue; }
+ match most_used.binary_search_by_key(&count, |&(_digest, count)| count) {
+ Ok(p) => most_used.insert(p, (digest, count)),
+ Err(p) => most_used.insert(p, (digest, count)),
+ }
+
+ if most_used.len() > max { let _ = most_used.pop(); }
+ }
+
+ let mut map = HashMap::new();
+
+ for data in most_used {
+ map.insert(data.0, data.1);
+ }
+
+ map
+ }
}
/// Encode digest list from an `IndexFile` into a binary stream
///
/// The reader simply returns a birary stream of 32 byte digest values.
pub struct DigestListEncoder {
- index: Box<dyn IndexFile>,
+ index: Box<dyn IndexFile + Send + Sync>,
pos: usize,
count: usize,
}
impl DigestListEncoder {
- pub fn new(index: Box<dyn IndexFile>) -> Self {
+ pub fn new(index: Box<dyn IndexFile + Send + Sync>) -> Self {
let count = index.index_count();
Self { index, pos: 0, count }
}
}
impl std::io::Read for DigestListEncoder {
-
fn read(&mut self, buf: &mut [u8]) -> Result<usize, std::io::Error> {
- if buf.len() < 32 { panic!("read buffer too small"); }
+ if buf.len() < 32 {
+ panic!("read buffer too small");
+ }
+
if self.pos < self.count {
let mut written = 0;
loop {
let digest = self.index.index_digest(self.pos).unwrap();
- unsafe { std::ptr::copy_nonoverlapping(digest.as_ptr(), buf.as_mut_ptr().add(written), 32); }
+ buf[written..(written + 32)].copy_from_slice(digest);
self.pos += 1;
written += 32;
- if self.pos >= self.count { break; }
- if (written + 32) >= buf.len() { break; }
+ if self.pos >= self.count {
+ break;
+ }
+ if (written + 32) >= buf.len() {
+ break;
+ }
}
- return Ok(written);
+ Ok(written)
} else {
- return Ok(0);
+ Ok(0)
}
}
}
///
/// The reader simply returns a birary stream of 32 byte digest values.
-pub struct DigestListDecoder<S> {
+pub struct DigestListDecoder<S: Unpin> {
input: S,
buffer: BytesMut,
}
-impl <S> DigestListDecoder<S> {
-
+impl<S: Unpin> DigestListDecoder<S> {
pub fn new(input: S) -> Self {
Self { input, buffer: BytesMut::new() }
}
}
-impl <S> Stream for DigestListDecoder<S>
- where S: Stream<Item=Bytes>,
- S::Error: Into<Error>,
-{
- type Item = [u8; 32];
- type Error = Error;
+impl<S: Unpin> Unpin for DigestListDecoder<S> {}
- fn poll(&mut self) -> Result<Async<Option<Self::Item>>, Self::Error> {
- loop {
-
- if self.buffer.len() >= 32 {
+impl<S: Unpin, E> Stream for DigestListDecoder<S>
+where
+ S: Stream<Item=Result<Bytes, E>>,
+ E: Into<Error>,
+{
+ type Item = Result<[u8; 32], Error>;
- let left = self.buffer.split_to(32);
+ fn poll_next(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Option<Self::Item>> {
+ let this = self.get_mut();
- let mut digest: [u8; 32] = unsafe { std::mem::uninitialized() };
- unsafe { std::ptr::copy_nonoverlapping(left.as_ptr(), digest.as_mut_ptr(), 32); }
+ loop {
+ if this.buffer.len() >= 32 {
+ let left = this.buffer.split_to(32);
- return Ok(Async::Ready(Some(digest)));
+ let mut digest = std::mem::MaybeUninit::<[u8; 32]>::uninit();
+ unsafe {
+ (*digest.as_mut_ptr()).copy_from_slice(&left[..]);
+ return Poll::Ready(Some(Ok(digest.assume_init())));
+ }
}
- match self.input.poll() {
- Err(err) => {
- return Err(err.into());
+ match Pin::new(&mut this.input).poll_next(cx) {
+ Poll::Pending => {
+ return Poll::Pending;
}
- Ok(Async::NotReady) => {
- return Ok(Async::NotReady);
+ Poll::Ready(Some(Err(err))) => {
+ return Poll::Ready(Some(Err(err.into())));
}
- Ok(Async::Ready(None)) => {
- let rest = self.buffer.len();
- if rest == 0 { return Ok(Async::Ready(None)); }
- return Err(format_err!("got small digest ({} != 32).", rest));
- }
- Ok(Async::Ready(Some(data))) => {
- self.buffer.extend_from_slice(&data);
+ Poll::Ready(Some(Ok(data))) => {
+ this.buffer.extend_from_slice(&data);
// continue
}
+ Poll::Ready(None) => {
+ let rest = this.buffer.len();
+ if rest == 0 {
+ return Poll::Ready(None);
+ }
+ return Poll::Ready(Some(Err(format_err!(
+ "got small digest ({} != 32).",
+ rest,
+ ))));
+ }
}
}
}