]> git.proxmox.com Git - ceph.git/blob - ceph/src/common/FastCDC.h
update ceph source to reef 18.2.0
[ceph.git] / ceph / src / common / FastCDC.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #pragma once
5
6 #include "CDC.h"
7
8 // Based on this paper:
9 // https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf
10 //
11 // Changes:
12 // - window size fixed at 64 bytes (to match our word size)
13 // - use XOR instead of +
14 // - match mask instead of 0
15 // - use target mask when close to target size (instead of
16 // small/large mask). The idea here is to try to use a consistent (target)
17 // mask for most cut points if we can, and only resort to small/large mask
18 // when we are (very) small or (very) large.
19
20 // Note about the target_bits: The goal is an average chunk size of 1
21 // << target_bits. However, in reality the average is ~1.25x that
22 // because of the hard mininum chunk size.
23
24 class FastCDC : public CDC {
25 private:
26 int target_bits; ///< target chunk size bits (1 << target_bits)
27 int min_bits; ///< hard minimum chunk size bits (1 << min_bits)
28 int max_bits; ///< hard maximum chunk size bits (1 << max_bits)
29
30 uint64_t target_mask; ///< maskA in the paper (target_bits set)
31 uint64_t small_mask; ///< maskS in the paper (more bits set)
32 uint64_t large_mask; ///< maskL in the paper (fewer bits set)
33
34 /// lookup table with pseudorandom values for each byte
35 uint64_t table[256];
36
37 /// window size in bytes
38 const size_t window = sizeof(uint64_t)*8; // bits in uint64_t
39
40 void _setup(int target, int window_bits);
41
42 public:
43 FastCDC(int target = 18, int window_bits = 0) {
44 _setup(target, window_bits);
45 };
46
47 void set_target_bits(int target, int window_bits) override {
48 _setup(target, window_bits);
49 }
50
51 void calc_chunks(
52 const bufferlist& bl,
53 std::vector<std::pair<uint64_t, uint64_t>> *chunks) const override;
54 };