]> git.proxmox.com Git - ceph.git/blame - ceph/src/common/FastCDC.h
import ceph quincy 17.2.6
[ceph.git] / ceph / src / common / FastCDC.h
CommitLineData
f67539c2
TL
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#pragma once
5
6#include "CDC.h"
7
8// Based on this paper:
9// https://www.usenix.org/system/files/conference/atc16/atc16-paper-xia.pdf
10//
11// Changes:
12// - window size fixed at 64 bytes (to match our word size)
13// - use XOR instead of +
14// - match mask instead of 0
15// - use target mask when close to target size (instead of
16// small/large mask). The idea here is to try to use a consistent (target)
17// mask for most cut points if we can, and only resort to small/large mask
18// when we are (very) small or (very) large.
19
20// Note about the target_bits: The goal is an average chunk size of 1
21// << target_bits. However, in reality the average is ~1.25x that
22// because of the hard mininum chunk size.
23
24class FastCDC : public CDC {
25private:
26 int target_bits; ///< target chunk size bits (1 << target_bits)
27 int min_bits; ///< hard minimum chunk size bits (1 << min_bits)
28 int max_bits; ///< hard maximum chunk size bits (1 << max_bits)
29
30 uint64_t target_mask; ///< maskA in the paper (target_bits set)
31 uint64_t small_mask; ///< maskS in the paper (more bits set)
32 uint64_t large_mask; ///< maskL in the paper (fewer bits set)
33
34 /// lookup table with pseudorandom values for each byte
35 uint64_t table[256];
36
37 /// window size in bytes
38 const size_t window = sizeof(uint64_t)*8; // bits in uint64_t
39
40 void _setup(int target, int window_bits);
41
42public:
43 FastCDC(int target = 18, int window_bits = 0) {
44 _setup(target, window_bits);
45 };
46
47 void set_target_bits(int target, int window_bits) override {
48 _setup(target, window_bits);
49 }
50
51 void calc_chunks(
52 const bufferlist& bl,
53 std::vector<std::pair<uint64_t, uint64_t>> *chunks) const override;
54};