]>
git.proxmox.com Git - ceph.git/blob - ceph/src/common/FastCDC.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
8 // Based on this paper:
9 // https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf
12 // - window size fixed at 64 bytes (to match our word size)
13 // - use XOR instead of +
14 // - match mask instead of 0
15 // - use target mask when close to target size (instead of
16 // small/large mask). The idea here is to try to use a consistent (target)
17 // mask for most cut points if we can, and only resort to small/large mask
18 // when we are (very) small or (very) large.
20 // Note about the target_bits: The goal is an average chunk size of 1
21 // << target_bits. However, in reality the average is ~1.25x that
22 // because of the hard mininum chunk size.
24 class FastCDC
: public CDC
{
26 int target_bits
; ///< target chunk size bits (1 << target_bits)
27 int min_bits
; ///< hard minimum chunk size bits (1 << min_bits)
28 int max_bits
; ///< hard maximum chunk size bits (1 << max_bits)
30 uint64_t target_mask
; ///< maskA in the paper (target_bits set)
31 uint64_t small_mask
; ///< maskS in the paper (more bits set)
32 uint64_t large_mask
; ///< maskL in the paper (fewer bits set)
34 /// lookup table with pseudorandom values for each byte
37 /// window size in bytes
38 const size_t window
= sizeof(uint64_t)*8; // bits in uint64_t
40 void _setup(int target
, int window_bits
);
43 FastCDC(int target
= 18, int window_bits
= 0) {
44 _setup(target
, window_bits
);
47 void set_target_bits(int target
, int window_bits
) override
{
48 _setup(target
, window_bits
);
53 std::vector
<std::pair
<uint64_t, uint64_t>> *chunks
) const override
;