]>
git.proxmox.com Git - ceph.git/blob - ceph/src/test/common/test_rabin_chunk.cc
dc33ba0fece73b049efb62fbca0ab30fa6b2813a
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
8 #include "include/types.h"
9 #include "include/buffer.h"
11 #include "common/rabin.h"
12 #include "gtest/gtest.h"
14 TEST(Rabin
, rabin_hash_simple
) {
15 uint64_t expected
= 680425538102669423;
18 unsigned int window_size
= 48;
19 char data
[window_size
+ 1];
21 memset(data
, 0, window_size
+ 1);
22 for (unsigned int i
= 0; i
< window_size
; ++i
) {
25 result
= rabin
.gen_rabin_hash(data
, 0);
26 ASSERT_EQ(expected
, result
);
29 TEST(Rabin
, chunk_check_min_max
) {
30 const char buf
[] = "0123456789";
34 for (int i
= 0; i
< 250; i
++) {
38 vector
<pair
<uint64_t, uint64_t>> chunks
;
39 size_t min_chunk
= 2000;
40 size_t max_chunk
= 8000;
42 rabin
.do_rabin_chunks(bl
, chunks
, min_chunk
, max_chunk
);
43 uint64_t chunk_size
= chunks
[0].second
;
44 ASSERT_GE(chunk_size
, min_chunk
);
45 ASSERT_LE(chunk_size
, max_chunk
);
48 TEST(Rabin
, test_cdc
) {
49 const char *base_str
= "123456789012345678901234567890123456789012345678";
50 bufferlist bl
, cmp_bl
;
51 for (int i
= 0; i
< 100; i
++) {
55 for (int i
= 0; i
< 100; i
++) {
56 cmp_bl
.append(base_str
);
60 vector
<pair
<uint64_t, uint64_t>> chunks
;
61 vector
<pair
<uint64_t, uint64_t>> cmp_chunks
;
62 size_t min_chunk
= 200;
63 size_t max_chunk
= 800;
64 rabin
.do_rabin_chunks(bl
, chunks
, min_chunk
, max_chunk
);
65 rabin
.do_rabin_chunks(cmp_bl
, cmp_chunks
, min_chunk
, max_chunk
);
66 // offset, len will be the same, except in the case of first offset
67 ASSERT_EQ(chunks
[4].first
+ 1, cmp_chunks
[4].first
);
68 ASSERT_EQ(chunks
[4].second
, cmp_chunks
[4].second
);
71 void generate_buffer(int size
, bufferlist
*outbl
)
74 outbl
->append_zero(size
);
75 char *b
= outbl
->c_str();
76 std::mt19937_64 engine
;
77 for (size_t i
= 0; i
< size
/ sizeof(uint64_t); ++i
) {
78 ((uint64_t*)b
)[i
] = engine();
87 rabin
.set_target_bits(18, 2);
89 for (int frontlen
= 1; frontlen
< 5; frontlen
++) {
91 generate_buffer(4*1024*1024, &bl1
);
92 generate_buffer(frontlen
, &bl2
);
96 vector
<pair
<uint64_t, uint64_t>> chunks1
, chunks2
;
97 rabin
.do_rabin_chunks(bl1
, chunks1
);
98 rabin
.do_rabin_chunks(bl2
, chunks2
);
99 cout
<< "1: " << chunks1
<< std::endl
;
100 cout
<< "2: " << chunks2
<< std::endl
;
102 ASSERT_GE(chunks2
.size(), chunks1
.size());
104 for (unsigned i
= 0; i
< chunks1
.size(); ++i
) {
105 unsigned j
= i
+ (chunks2
.size() - chunks1
.size());
106 if (chunks1
[i
].first
+ frontlen
== chunks2
[j
].first
&&
107 chunks1
[i
].second
== chunks2
[j
].second
) {
111 ASSERT_GE(match
, chunks1
.size() - 1);
116 void do_size_histogram(RabinChunk
& rabin
, bufferlist
& bl
,
119 vector
<pair
<uint64_t, uint64_t>> chunks
;
120 rabin
.do_rabin_chunks(bl
, chunks
);
121 for (auto& i
: chunks
) {
122 unsigned b
= i
.second
& 0xfffff000;
123 //unsigned b = 1 << cbits(i.second);
128 void print_histogram(map
<int,int>& h
)
130 cout
<< "size\tcount" << std::endl
;
132 cout
<< i
.first
<< "\t" << i
.second
<< std::endl
;
136 TEST(Rabin
, chunk_random
)
139 rabin
.set_target_bits(18, 2);
142 for (int i
= 0; i
< 8; ++i
) {
146 generate_buffer(16*1024*1024, &r
);
147 do_size_histogram(rabin
, r
, &h
);