]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #pragma once | |
5 | ||
6 | #include "CDC.h" | |
7 | ||
8 | // Based on this paper: | |
9 | // https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf | |
10 | // | |
11 | // Changes: | |
12 | // - window size fixed at 64 bytes (to match our word size) | |
13 | // - use XOR instead of + | |
14 | // - match mask instead of 0 | |
15 | // - use target mask when close to target size (instead of | |
16 | // small/large mask). The idea here is to try to use a consistent (target) | |
17 | // mask for most cut points if we can, and only resort to small/large mask | |
18 | // when we are (very) small or (very) large. | |
19 | ||
20 | // Note about the target_bits: The goal is an average chunk size of 1 | |
21 | // << target_bits. However, in reality the average is ~1.25x that | |
22 | // because of the hard mininum chunk size. | |
23 | ||
24 | class FastCDC : public CDC { | |
25 | private: | |
26 | int target_bits; ///< target chunk size bits (1 << target_bits) | |
27 | int min_bits; ///< hard minimum chunk size bits (1 << min_bits) | |
28 | int max_bits; ///< hard maximum chunk size bits (1 << max_bits) | |
29 | ||
30 | uint64_t target_mask; ///< maskA in the paper (target_bits set) | |
31 | uint64_t small_mask; ///< maskS in the paper (more bits set) | |
32 | uint64_t large_mask; ///< maskL in the paper (fewer bits set) | |
33 | ||
34 | /// lookup table with pseudorandom values for each byte | |
35 | uint64_t table[256]; | |
36 | ||
37 | /// window size in bytes | |
38 | const size_t window = sizeof(uint64_t)*8; // bits in uint64_t | |
39 | ||
40 | void _setup(int target, int window_bits); | |
41 | ||
42 | public: | |
43 | FastCDC(int target = 18, int window_bits = 0) { | |
44 | _setup(target, window_bits); | |
45 | }; | |
46 | ||
47 | void set_target_bits(int target, int window_bits) override { | |
48 | _setup(target, window_bits); | |
49 | } | |
50 | ||
51 | void calc_chunks( | |
52 | const bufferlist& bl, | |
53 | std::vector<std::pair<uint64_t, uint64_t>> *chunks) const override; | |
54 | }; |