]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/table/block_based/partitioned_filter_block_test.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / table / block_based / partitioned_filter_block_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5
6#include <map>
7
8#include "rocksdb/filter_policy.h"
9
f67539c2
TL
10#include "table/block_based/block_based_table_reader.h"
11#include "table/block_based/partitioned_filter_block.h"
12#include "table/block_based/filter_policy_internal.h"
13
14#include "index_builder.h"
15#include "logging/logging.h"
16#include "test_util/testharness.h"
17#include "test_util/testutil.h"
7c673cae
FG
18#include "util/coding.h"
19#include "util/hash.h"
7c673cae 20
f67539c2 21namespace ROCKSDB_NAMESPACE {
7c673cae 22
f67539c2 23std::map<uint64_t, std::string> blooms;
7c673cae
FG
24
25class MockedBlockBasedTable : public BlockBasedTable {
26 public:
f67539c2
TL
27 MockedBlockBasedTable(Rep* rep, PartitionedIndexBuilder* pib)
28 : BlockBasedTable(rep, /*block_cache_tracer=*/nullptr) {
11fdf7f2 29 // Initialize what Open normally does as much as necessary for the test
f67539c2
TL
30 rep->index_key_includes_seq = pib->seperator_is_key_plus_seq();
31 rep->index_value_is_full = !pib->get_use_value_delta_encoding();
7c673cae 32 }
f67539c2 33};
11fdf7f2 34
f67539c2
TL
35class MyPartitionedFilterBlockReader : public PartitionedFilterBlockReader {
36 public:
37 MyPartitionedFilterBlockReader(BlockBasedTable* t,
38 CachableEntry<Block>&& filter_block)
39 : PartitionedFilterBlockReader(t, std::move(filter_block)) {
40 for (const auto& pair : blooms) {
41 const uint64_t offset = pair.first;
42 const std::string& bloom = pair.second;
43
44 assert(t);
45 assert(t->get_rep());
46 CachableEntry<ParsedFullFilterBlock> block(
47 new ParsedFullFilterBlock(
48 t->get_rep()->table_options.filter_policy.get(),
49 BlockContents(Slice(bloom))),
50 nullptr /* cache */, nullptr /* cache_handle */,
51 true /* own_value */);
52 filter_map_[offset] = std::move(block);
53 }
11fdf7f2 54 }
7c673cae
FG
55};
56
11fdf7f2
TL
57class PartitionedFilterBlockTest
58 : public testing::Test,
59 virtual public ::testing::WithParamInterface<uint32_t> {
7c673cae 60 public:
f67539c2
TL
61 Options options_;
62 ImmutableCFOptions ioptions_;
63 EnvOptions env_options_;
7c673cae 64 BlockBasedTableOptions table_options_;
f67539c2
TL
65 InternalKeyComparator icomp_;
66 std::unique_ptr<BlockBasedTable> table_;
67 std::shared_ptr<Cache> cache_;
68 int bits_per_key_;
69
70 PartitionedFilterBlockTest()
71 : ioptions_(options_),
72 env_options_(options_),
73 icomp_(options_.comparator),
74 bits_per_key_(10) {
75 table_options_.filter_policy.reset(
76 NewBloomFilterPolicy(bits_per_key_, false));
11fdf7f2
TL
77 table_options_.format_version = GetParam();
78 table_options_.index_block_restart_interval = 3;
7c673cae
FG
79 }
80
494da23a 81 ~PartitionedFilterBlockTest() override {}
7c673cae
FG
82
83 const std::string keys[4] = {"afoo", "bar", "box", "hello"};
84 const std::string missing_keys[2] = {"missing", "other"};
85
86 uint64_t MaxIndexSize() {
87 int num_keys = sizeof(keys) / sizeof(*keys);
88 uint64_t max_key_size = 0;
89 for (int i = 1; i < num_keys; i++) {
90 max_key_size = std::max(max_key_size, static_cast<uint64_t>(keys[i].size()));
91 }
92 uint64_t max_index_size = num_keys * (max_key_size + 8 /*handle*/);
93 return max_index_size;
94 }
95
11fdf7f2 96 uint64_t MaxFilterSize() {
11fdf7f2 97 int num_keys = sizeof(keys) / sizeof(*keys);
f67539c2
TL
98 // General, rough over-approximation
99 return num_keys * bits_per_key_ + (CACHE_LINE_SIZE * 8 + /*metadata*/ 5);
11fdf7f2
TL
100 }
101
f67539c2 102 uint64_t last_offset = 10;
7c673cae
FG
103 BlockHandle Write(const Slice& slice) {
104 BlockHandle bh(last_offset + 1, slice.size());
f67539c2 105 blooms[bh.offset()] = slice.ToString();
7c673cae
FG
106 last_offset += bh.size();
107 return bh;
108 }
109
110 PartitionedIndexBuilder* NewIndexBuilder() {
11fdf7f2
TL
111 const bool kValueDeltaEncoded = true;
112 return PartitionedIndexBuilder::CreateIndexBuilder(
f67539c2 113 &icomp_, !kValueDeltaEncoded, table_options_);
7c673cae
FG
114 }
115
116 PartitionedFilterBlockBuilder* NewBuilder(
11fdf7f2
TL
117 PartitionedIndexBuilder* const p_index_builder,
118 const SliceTransform* prefix_extractor = nullptr) {
119 assert(table_options_.block_size_deviation <= 100);
120 auto partition_size = static_cast<uint32_t>(
121 ((table_options_.metadata_block_size *
122 (100 - table_options_.block_size_deviation)) +
123 99) /
124 100);
125 partition_size = std::max(partition_size, static_cast<uint32_t>(1));
126 const bool kValueDeltaEncoded = true;
7c673cae 127 return new PartitionedFilterBlockBuilder(
11fdf7f2 128 prefix_extractor, table_options_.whole_key_filtering,
f67539c2
TL
129 BloomFilterPolicy::GetBuilderFromContext(
130 FilterBuildingContext(table_options_)),
11fdf7f2
TL
131 table_options_.index_block_restart_interval, !kValueDeltaEncoded,
132 p_index_builder, partition_size);
7c673cae
FG
133 }
134
7c673cae 135 PartitionedFilterBlockReader* NewReader(
f67539c2 136 PartitionedFilterBlockBuilder* builder, PartitionedIndexBuilder* pib) {
7c673cae
FG
137 BlockHandle bh;
138 Status status;
139 Slice slice;
140 do {
141 slice = builder->Finish(bh, &status);
142 bh = Write(slice);
143 } while (status.IsIncomplete());
f67539c2
TL
144
145 constexpr bool skip_filters = false;
146 constexpr int level = 0;
147 constexpr bool immortal_table = false;
148 table_.reset(new MockedBlockBasedTable(
149 new BlockBasedTable::Rep(ioptions_, env_options_, table_options_,
150 icomp_, skip_filters, level, immortal_table),
151 pib));
152 BlockContents contents(slice);
153 CachableEntry<Block> block(
154 new Block(std::move(contents), kDisableGlobalSequenceNumber,
155 0 /* read_amp_bytes_per_bit */, nullptr),
156 nullptr /* cache */, nullptr /* cache_handle */, true /* own_value */);
157 auto reader =
158 new MyPartitionedFilterBlockReader(table_.get(), std::move(block));
7c673cae
FG
159 return reader;
160 }
161
162 void VerifyReader(PartitionedFilterBlockBuilder* builder,
11fdf7f2
TL
163 PartitionedIndexBuilder* pib, bool empty = false,
164 const SliceTransform* prefix_extractor = nullptr) {
165 std::unique_ptr<PartitionedFilterBlockReader> reader(
f67539c2 166 NewReader(builder, pib));
7c673cae
FG
167 // Querying added keys
168 const bool no_io = true;
169 for (auto key : keys) {
170 auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
171 const Slice ikey_slice = Slice(*ikey.rep());
11fdf7f2 172 ASSERT_TRUE(reader->KeyMayMatch(key, prefix_extractor, kNotValid, !no_io,
f67539c2
TL
173 &ikey_slice, /*get_context=*/nullptr,
174 /*lookup_context=*/nullptr));
7c673cae
FG
175 }
176 {
177 // querying a key twice
178 auto ikey = InternalKey(keys[0], 0, ValueType::kTypeValue);
179 const Slice ikey_slice = Slice(*ikey.rep());
f67539c2
TL
180 ASSERT_TRUE(reader->KeyMayMatch(
181 keys[0], prefix_extractor, kNotValid, !no_io, &ikey_slice,
182 /*get_context=*/nullptr, /*lookup_context=*/nullptr));
7c673cae
FG
183 }
184 // querying missing keys
185 for (auto key : missing_keys) {
186 auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
187 const Slice ikey_slice = Slice(*ikey.rep());
188 if (empty) {
f67539c2
TL
189 ASSERT_TRUE(reader->KeyMayMatch(
190 key, prefix_extractor, kNotValid, !no_io, &ikey_slice,
191 /*get_context=*/nullptr, /*lookup_context=*/nullptr));
7c673cae
FG
192 } else {
193 // assuming a good hash function
f67539c2
TL
194 ASSERT_FALSE(reader->KeyMayMatch(
195 key, prefix_extractor, kNotValid, !no_io, &ikey_slice,
196 /*get_context=*/nullptr, /*lookup_context=*/nullptr));
7c673cae
FG
197 }
198 }
199 }
200
201 int TestBlockPerKey() {
202 std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
203 std::unique_ptr<PartitionedFilterBlockBuilder> builder(
204 NewBuilder(pib.get()));
205 int i = 0;
206 builder->Add(keys[i]);
207 CutABlock(pib.get(), keys[i], keys[i + 1]);
208 i++;
209 builder->Add(keys[i]);
210 CutABlock(pib.get(), keys[i], keys[i + 1]);
211 i++;
212 builder->Add(keys[i]);
213 builder->Add(keys[i]);
214 CutABlock(pib.get(), keys[i], keys[i + 1]);
215 i++;
216 builder->Add(keys[i]);
217 CutABlock(pib.get(), keys[i]);
218
11fdf7f2 219 VerifyReader(builder.get(), pib.get());
7c673cae
FG
220 return CountNumOfIndexPartitions(pib.get());
221 }
222
11fdf7f2 223 void TestBlockPerTwoKeys(const SliceTransform* prefix_extractor = nullptr) {
7c673cae
FG
224 std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
225 std::unique_ptr<PartitionedFilterBlockBuilder> builder(
11fdf7f2 226 NewBuilder(pib.get(), prefix_extractor));
7c673cae
FG
227 int i = 0;
228 builder->Add(keys[i]);
229 i++;
230 builder->Add(keys[i]);
231 CutABlock(pib.get(), keys[i], keys[i + 1]);
232 i++;
233 builder->Add(keys[i]);
234 builder->Add(keys[i]);
235 i++;
236 builder->Add(keys[i]);
237 CutABlock(pib.get(), keys[i]);
238
11fdf7f2 239 VerifyReader(builder.get(), pib.get(), prefix_extractor);
7c673cae
FG
240 }
241
242 void TestBlockPerAllKeys() {
243 std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
244 std::unique_ptr<PartitionedFilterBlockBuilder> builder(
245 NewBuilder(pib.get()));
246 int i = 0;
247 builder->Add(keys[i]);
248 i++;
249 builder->Add(keys[i]);
250 i++;
251 builder->Add(keys[i]);
252 builder->Add(keys[i]);
253 i++;
254 builder->Add(keys[i]);
255 CutABlock(pib.get(), keys[i]);
256
11fdf7f2 257 VerifyReader(builder.get(), pib.get());
7c673cae
FG
258 }
259
260 void CutABlock(PartitionedIndexBuilder* builder,
261 const std::string& user_key) {
262 // Assuming a block is cut, add an entry to the index
263 std::string key =
264 std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep());
265 BlockHandle dont_care_block_handle(1, 1);
266 builder->AddIndexEntry(&key, nullptr, dont_care_block_handle);
267 }
268
269 void CutABlock(PartitionedIndexBuilder* builder, const std::string& user_key,
270 const std::string& next_user_key) {
271 // Assuming a block is cut, add an entry to the index
272 std::string key =
273 std::string(*InternalKey(user_key, 0, ValueType::kTypeValue).rep());
274 std::string next_key = std::string(
275 *InternalKey(next_user_key, 0, ValueType::kTypeValue).rep());
276 BlockHandle dont_care_block_handle(1, 1);
277 Slice slice = Slice(next_key.data(), next_key.size());
278 builder->AddIndexEntry(&key, &slice, dont_care_block_handle);
279 }
280
281 int CountNumOfIndexPartitions(PartitionedIndexBuilder* builder) {
282 IndexBuilder::IndexBlocks dont_care_ib;
283 BlockHandle dont_care_bh(10, 10);
284 Status s;
285 int cnt = 0;
286 do {
287 s = builder->Finish(&dont_care_ib, dont_care_bh);
288 cnt++;
289 } while (s.IsIncomplete());
290 return cnt - 1; // 1 is 2nd level index
291 }
292};
293
11fdf7f2
TL
294INSTANTIATE_TEST_CASE_P(FormatDef, PartitionedFilterBlockTest,
295 testing::Values(test::kDefaultFormatVersion));
296INSTANTIATE_TEST_CASE_P(FormatLatest, PartitionedFilterBlockTest,
297 testing::Values(test::kLatestFormatVersion));
298
299TEST_P(PartitionedFilterBlockTest, EmptyBuilder) {
7c673cae
FG
300 std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
301 std::unique_ptr<PartitionedFilterBlockBuilder> builder(NewBuilder(pib.get()));
302 const bool empty = true;
11fdf7f2 303 VerifyReader(builder.get(), pib.get(), empty);
7c673cae
FG
304}
305
11fdf7f2 306TEST_P(PartitionedFilterBlockTest, OneBlock) {
7c673cae
FG
307 uint64_t max_index_size = MaxIndexSize();
308 for (uint64_t i = 1; i < max_index_size + 1; i++) {
309 table_options_.metadata_block_size = i;
310 TestBlockPerAllKeys();
311 }
312}
313
11fdf7f2 314TEST_P(PartitionedFilterBlockTest, TwoBlocksPerKey) {
7c673cae
FG
315 uint64_t max_index_size = MaxIndexSize();
316 for (uint64_t i = 1; i < max_index_size + 1; i++) {
317 table_options_.metadata_block_size = i;
318 TestBlockPerTwoKeys();
319 }
320}
321
11fdf7f2
TL
322// This reproduces the bug that a prefix is the same among multiple consecutive
323// blocks but the bug would add it only to the first block.
324TEST_P(PartitionedFilterBlockTest, SamePrefixInMultipleBlocks) {
325 // some small number to cause partition cuts
326 table_options_.metadata_block_size = 1;
f67539c2
TL
327 std::unique_ptr<const SliceTransform> prefix_extractor(
328 ROCKSDB_NAMESPACE::NewFixedPrefixTransform(1));
11fdf7f2
TL
329 std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
330 std::unique_ptr<PartitionedFilterBlockBuilder> builder(
331 NewBuilder(pib.get(), prefix_extractor.get()));
f67539c2 332 const std::string pkeys[3] = {"p-key10", "p-key20", "p-key30"};
11fdf7f2
TL
333 builder->Add(pkeys[0]);
334 CutABlock(pib.get(), pkeys[0], pkeys[1]);
335 builder->Add(pkeys[1]);
336 CutABlock(pib.get(), pkeys[1], pkeys[2]);
337 builder->Add(pkeys[2]);
338 CutABlock(pib.get(), pkeys[2]);
339 std::unique_ptr<PartitionedFilterBlockReader> reader(
f67539c2 340 NewReader(builder.get(), pib.get()));
11fdf7f2
TL
341 for (auto key : pkeys) {
342 auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
343 const Slice ikey_slice = Slice(*ikey.rep());
f67539c2
TL
344 ASSERT_TRUE(reader->PrefixMayMatch(
345 prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid,
346 /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
347 /*lookup_context=*/nullptr));
348 }
349 // Non-existent keys but with the same prefix
350 const std::string pnonkeys[4] = {"p-key9", "p-key11", "p-key21", "p-key31"};
351 for (auto key : pnonkeys) {
352 auto ikey = InternalKey(key, 0, ValueType::kTypeValue);
353 const Slice ikey_slice = Slice(*ikey.rep());
354 ASSERT_TRUE(reader->PrefixMayMatch(
355 prefix_extractor->Transform(key), prefix_extractor.get(), kNotValid,
356 /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
357 /*lookup_context=*/nullptr));
358 }
359}
360
361// This reproduces the bug in format_version=3 that the seeking the prefix will
362// lead us to the partition before the one that has filter for the prefix.
363TEST_P(PartitionedFilterBlockTest, PrefixInWrongPartitionBug) {
364 // some small number to cause partition cuts
365 table_options_.metadata_block_size = 1;
366 std::unique_ptr<const SliceTransform> prefix_extractor(
367 ROCKSDB_NAMESPACE::NewFixedPrefixTransform(2));
368 std::unique_ptr<PartitionedIndexBuilder> pib(NewIndexBuilder());
369 std::unique_ptr<PartitionedFilterBlockBuilder> builder(
370 NewBuilder(pib.get(), prefix_extractor.get()));
371 // In the bug, searching for prefix "p3" on an index with format version 3,
372 // will give the key "p3" and the partition of the keys that are <= p3, i.e.,
373 // p2-keys, where the filter for prefix "p3" does not exist.
374 const std::string pkeys[] = {"p1-key1", "p2-key2", "p3-key3", "p4-key3",
375 "p5-key3"};
376 builder->Add(pkeys[0]);
377 CutABlock(pib.get(), pkeys[0], pkeys[1]);
378 builder->Add(pkeys[1]);
379 CutABlock(pib.get(), pkeys[1], pkeys[2]);
380 builder->Add(pkeys[2]);
381 CutABlock(pib.get(), pkeys[2], pkeys[3]);
382 builder->Add(pkeys[3]);
383 CutABlock(pib.get(), pkeys[3], pkeys[4]);
384 builder->Add(pkeys[4]);
385 CutABlock(pib.get(), pkeys[4]);
386 std::unique_ptr<PartitionedFilterBlockReader> reader(
387 NewReader(builder.get(), pib.get()));
388 for (auto key : pkeys) {
389 auto prefix = prefix_extractor->Transform(key);
390 auto ikey = InternalKey(prefix, 0, ValueType::kTypeValue);
391 const Slice ikey_slice = Slice(*ikey.rep());
392 ASSERT_TRUE(reader->PrefixMayMatch(
393 prefix, prefix_extractor.get(), kNotValid,
394 /*no_io=*/false, &ikey_slice, /*get_context=*/nullptr,
395 /*lookup_context=*/nullptr));
11fdf7f2
TL
396 }
397}
398
399TEST_P(PartitionedFilterBlockTest, OneBlockPerKey) {
7c673cae
FG
400 uint64_t max_index_size = MaxIndexSize();
401 for (uint64_t i = 1; i < max_index_size + 1; i++) {
402 table_options_.metadata_block_size = i;
403 TestBlockPerKey();
404 }
405}
406
11fdf7f2 407TEST_P(PartitionedFilterBlockTest, PartitionCount) {
7c673cae 408 int num_keys = sizeof(keys) / sizeof(*keys);
11fdf7f2
TL
409 table_options_.metadata_block_size =
410 std::max(MaxIndexSize(), MaxFilterSize());
7c673cae
FG
411 int partitions = TestBlockPerKey();
412 ASSERT_EQ(partitions, 1);
413 // A low number ensures cutting a block after each key
414 table_options_.metadata_block_size = 1;
415 partitions = TestBlockPerKey();
416 ASSERT_EQ(partitions, num_keys - 1 /* last two keys make one flush */);
417}
418
f67539c2 419} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
420
421int main(int argc, char** argv) {
422 ::testing::InitGoogleTest(&argc, argv);
423 return RUN_ALL_TESTS();
424}