]>
Commit | Line | Data |
---|---|---|
9f95a23c TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <vector> | |
5 | #include <cstring> | |
f67539c2 | 6 | #include <random> |
9f95a23c TL |
7 | |
8 | #include "include/types.h" | |
9 | #include "include/buffer.h" | |
10 | ||
11 | #include "common/rabin.h" | |
12 | #include "gtest/gtest.h" | |
13 | ||
14 | TEST(Rabin, rabin_hash_simple) { | |
15 | uint64_t expected = 680425538102669423; | |
16 | uint64_t result; | |
17 | ||
18 | unsigned int window_size = 48; | |
19 | char data[window_size + 1]; | |
20 | RabinChunk rabin; | |
21 | memset(data, 0, window_size + 1); | |
22 | for (unsigned int i = 0; i < window_size; ++i) { | |
23 | data[i] = i; | |
24 | } | |
25 | result = rabin.gen_rabin_hash(data, 0); | |
26 | ASSERT_EQ(expected, result); | |
27 | } | |
28 | ||
29 | TEST(Rabin, chunk_check_min_max) { | |
30 | const char buf[] = "0123456789"; | |
31 | ||
32 | bufferlist bl; | |
33 | RabinChunk rabin; | |
34 | for (int i = 0; i < 250; i++) { | |
35 | bl.append(buf); | |
36 | } | |
37 | ||
38 | vector<pair<uint64_t, uint64_t>> chunks; | |
39 | size_t min_chunk = 2000; | |
40 | size_t max_chunk = 8000; | |
41 | ||
42 | rabin.do_rabin_chunks(bl, chunks, min_chunk, max_chunk); | |
43 | uint64_t chunk_size = chunks[0].second; | |
44 | ASSERT_GE(chunk_size , min_chunk); | |
45 | ASSERT_LE(chunk_size , max_chunk); | |
46 | } | |
47 | ||
48 | TEST(Rabin, test_cdc) { | |
49 | const char *base_str = "123456789012345678901234567890123456789012345678"; | |
50 | bufferlist bl, cmp_bl; | |
51 | for (int i = 0; i < 100; i++) { | |
52 | bl.append(base_str); | |
53 | } | |
54 | cmp_bl.append('a'); | |
55 | for (int i = 0; i < 100; i++) { | |
56 | cmp_bl.append(base_str); | |
57 | } | |
58 | ||
59 | RabinChunk rabin; | |
60 | vector<pair<uint64_t, uint64_t>> chunks; | |
61 | vector<pair<uint64_t, uint64_t>> cmp_chunks; | |
62 | size_t min_chunk = 200; | |
63 | size_t max_chunk = 800; | |
64 | rabin.do_rabin_chunks(bl, chunks, min_chunk, max_chunk); | |
65 | rabin.do_rabin_chunks(cmp_bl, cmp_chunks, min_chunk, max_chunk); | |
66 | // offset, len will be the same, except in the case of first offset | |
67 | ASSERT_EQ(chunks[4].first + 1, cmp_chunks[4].first); | |
68 | ASSERT_EQ(chunks[4].second, cmp_chunks[4].second); | |
69 | } | |
70 | ||
f67539c2 TL |
71 | void generate_buffer(int size, bufferlist *outbl) |
72 | { | |
73 | outbl->clear(); | |
74 | outbl->append_zero(size); | |
75 | char *b = outbl->c_str(); | |
76 | std::mt19937_64 engine; | |
77 | for (size_t i = 0; i < size / sizeof(uint64_t); ++i) { | |
78 | ((uint64_t*)b)[i] = engine(); | |
79 | } | |
80 | } | |
81 | ||
82 | #if 0 | |
83 | this fails! | |
84 | TEST(Rabin, shifts) | |
85 | { | |
86 | RabinChunk rabin; | |
87 | rabin.set_target_bits(18, 2); | |
88 | ||
89 | for (int frontlen = 1; frontlen < 5; frontlen++) { | |
90 | bufferlist bl1, bl2; | |
91 | generate_buffer(4*1024*1024, &bl1); | |
92 | generate_buffer(frontlen, &bl2); | |
93 | bl2.append(bl1); | |
94 | bl2.rebuild(); | |
95 | ||
96 | vector<pair<uint64_t, uint64_t>> chunks1, chunks2; | |
97 | rabin.do_rabin_chunks(bl1, chunks1); | |
98 | rabin.do_rabin_chunks(bl2, chunks2); | |
99 | cout << "1: " << chunks1 << std::endl; | |
100 | cout << "2: " << chunks2 << std::endl; | |
101 | ||
102 | ASSERT_GE(chunks2.size(), chunks1.size()); | |
103 | int match = 0; | |
104 | for (unsigned i = 0; i < chunks1.size(); ++i) { | |
105 | unsigned j = i + (chunks2.size() - chunks1.size()); | |
106 | if (chunks1[i].first + frontlen == chunks2[j].first && | |
107 | chunks1[i].second == chunks2[j].second) { | |
108 | match++; | |
109 | } | |
110 | } | |
111 | ASSERT_GE(match, chunks1.size() - 1); | |
112 | } | |
113 | } | |
114 | #endif | |
115 | ||
116 | void do_size_histogram(RabinChunk& rabin, bufferlist& bl, | |
117 | map<int,int> *h) | |
118 | { | |
119 | vector<pair<uint64_t, uint64_t>> chunks; | |
120 | rabin.do_rabin_chunks(bl, chunks); | |
121 | for (auto& i : chunks) { | |
122 | unsigned b = i.second & 0xfffff000; | |
123 | //unsigned b = 1 << cbits(i.second); | |
124 | (*h)[b]++; | |
125 | } | |
126 | } | |
127 | ||
128 | void print_histogram(map<int,int>& h) | |
129 | { | |
130 | cout << "size\tcount" << std::endl; | |
131 | for (auto i : h) { | |
132 | cout << i.first << "\t" << i.second << std::endl; | |
133 | } | |
134 | } | |
135 | ||
136 | TEST(Rabin, chunk_random) | |
137 | { | |
138 | RabinChunk rabin; | |
139 | rabin.set_target_bits(18, 2); | |
140 | ||
141 | map<int,int> h; | |
142 | for (int i = 0; i < 8; ++i) { | |
143 | cout << "."; | |
144 | cout.flush(); | |
145 | bufferlist r; | |
146 | generate_buffer(16*1024*1024, &r); | |
147 | do_size_histogram(rabin, r, &h); | |
148 | } | |
149 | cout << std::endl; | |
150 | print_histogram(h); | |
151 | } |