]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/util/filter_bench.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / util / filter_bench.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #if !defined(GFLAGS) || defined(ROCKSDB_LITE)
7 #include <cstdio>
8 int main() {
9 fprintf(stderr, "filter_bench requires gflags and !ROCKSDB_LITE\n");
10 return 1;
11 }
12 #else
13
14 #include <cinttypes>
15 #include <iostream>
16 #include <sstream>
17 #include <vector>
18
19 #include "memory/arena.h"
20 #include "port/port.h"
21 #include "port/stack_trace.h"
22 #include "rocksdb/cache.h"
23 #include "rocksdb/env.h"
24 #include "rocksdb/system_clock.h"
25 #include "rocksdb/table.h"
26 #include "table/block_based/filter_policy_internal.h"
27 #include "table/block_based/full_filter_block.h"
28 #include "table/block_based/mock_block_based_table.h"
29 #include "table/plain/plain_table_bloom.h"
30 #include "util/cast_util.h"
31 #include "util/gflags_compat.h"
32 #include "util/hash.h"
33 #include "util/random.h"
34 #include "util/stderr_logger.h"
35 #include "util/stop_watch.h"
36 #include "util/string_util.h"
37
38 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
39 using GFLAGS_NAMESPACE::RegisterFlagValidator;
40 using GFLAGS_NAMESPACE::SetUsageMessage;
41
42 DEFINE_uint32(seed, 0, "Seed for random number generators");
43
44 DEFINE_double(working_mem_size_mb, 200,
45 "MB of memory to get up to among all filters, unless "
46 "m_keys_total_max is specified.");
47
48 DEFINE_uint32(average_keys_per_filter, 10000,
49 "Average number of keys per filter");
50
51 DEFINE_double(vary_key_count_ratio, 0.4,
52 "Vary number of keys by up to +/- vary_key_count_ratio * "
53 "average_keys_per_filter.");
54
55 DEFINE_uint32(key_size, 24, "Average number of bytes for each key");
56
57 DEFINE_bool(vary_key_alignment, true,
58 "Whether to vary key alignment (default: at least 32-bit "
59 "alignment)");
60
61 DEFINE_uint32(vary_key_size_log2_interval, 5,
62 "Use same key size 2^n times, then change. Key size varies from "
63 "-2 to +2 bytes vs. average, unless n>=30 to fix key size.");
64
65 DEFINE_uint32(batch_size, 8, "Number of keys to group in each batch");
66
67 DEFINE_double(bits_per_key, 10.0, "Bits per key setting for filters");
68
69 DEFINE_double(m_queries, 200, "Millions of queries for each test mode");
70
71 DEFINE_double(m_keys_total_max, 0,
72 "Maximum total keys added to filters, in millions. "
73 "0 (default) disables. Non-zero overrides working_mem_size_mb "
74 "option.");
75
76 DEFINE_bool(use_full_block_reader, false,
77 "Use FullFilterBlockReader interface rather than FilterBitsReader");
78
79 DEFINE_bool(use_plain_table_bloom, false,
80 "Use PlainTableBloom structure and interface rather than "
81 "FilterBitsReader/FullFilterBlockReader");
82
83 DEFINE_bool(new_builder, false,
84 "Whether to create a new builder for each new filter");
85
86 DEFINE_uint32(impl, 0,
87 "Select filter implementation. Without -use_plain_table_bloom:"
88 "0 = legacy full Bloom filter, "
89 "1 = format_version 5 Bloom filter, 2 = Ribbon128 filter. With "
90 "-use_plain_table_bloom: 0 = no locality, 1 = locality.");
91
92 DEFINE_bool(net_includes_hashing, false,
93 "Whether query net ns/op times should include hashing. "
94 "(if not, dry run will include hashing) "
95 "(build times always include hashing)");
96
97 DEFINE_bool(optimize_filters_for_memory, false,
98 "Setting for BlockBasedTableOptions::optimize_filters_for_memory");
99
100 DEFINE_bool(detect_filter_construct_corruption, false,
101 "Setting for "
102 "BlockBasedTableOptions::detect_filter_construct_corruption");
103
104 DEFINE_uint32(block_cache_capacity_MB, 8,
105 "Setting for "
106 "LRUCacheOptions::capacity");
107
108 DEFINE_bool(charge_filter_construction, false,
109 "Setting for "
110 "CacheEntryRoleOptions::charged of"
111 "CacheEntryRole::kFilterConstruction");
112
113 DEFINE_bool(strict_capacity_limit, false,
114 "Setting for "
115 "LRUCacheOptions::strict_capacity_limit");
116
117 DEFINE_bool(quick, false, "Run more limited set of tests, fewer queries");
118
119 DEFINE_bool(best_case, false, "Run limited tests only for best-case");
120
121 DEFINE_bool(allow_bad_fp_rate, false, "Continue even if FP rate is bad");
122
123 DEFINE_bool(legend, false,
124 "Print more information about interpreting results instead of "
125 "running tests");
126
127 DEFINE_uint32(runs, 1, "Number of times to rebuild and run benchmark tests");
128
129 void _always_assert_fail(int line, const char *file, const char *expr) {
130 fprintf(stderr, "%s: %d: Assertion %s failed\n", file, line, expr);
131 abort();
132 }
133
134 #define ALWAYS_ASSERT(cond) \
135 ((cond) ? (void)0 : ::_always_assert_fail(__LINE__, __FILE__, #cond))
136
137 #ifndef NDEBUG
138 // This could affect build times enough that we should not include it for
139 // accurate speed tests
140 #define PREDICT_FP_RATE
141 #endif
142
143 using ROCKSDB_NAMESPACE::Arena;
144 using ROCKSDB_NAMESPACE::BlockContents;
145 using ROCKSDB_NAMESPACE::BloomFilterPolicy;
146 using ROCKSDB_NAMESPACE::BloomHash;
147 using ROCKSDB_NAMESPACE::BloomLikeFilterPolicy;
148 using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder;
149 using ROCKSDB_NAMESPACE::CachableEntry;
150 using ROCKSDB_NAMESPACE::Cache;
151 using ROCKSDB_NAMESPACE::CacheEntryRole;
152 using ROCKSDB_NAMESPACE::CacheEntryRoleOptions;
153 using ROCKSDB_NAMESPACE::EncodeFixed32;
154 using ROCKSDB_NAMESPACE::Env;
155 using ROCKSDB_NAMESPACE::FastRange32;
156 using ROCKSDB_NAMESPACE::FilterBitsReader;
157 using ROCKSDB_NAMESPACE::FilterBuildingContext;
158 using ROCKSDB_NAMESPACE::FilterPolicy;
159 using ROCKSDB_NAMESPACE::FullFilterBlockReader;
160 using ROCKSDB_NAMESPACE::GetSliceHash;
161 using ROCKSDB_NAMESPACE::GetSliceHash64;
162 using ROCKSDB_NAMESPACE::Lower32of64;
163 using ROCKSDB_NAMESPACE::LRUCacheOptions;
164 using ROCKSDB_NAMESPACE::ParsedFullFilterBlock;
165 using ROCKSDB_NAMESPACE::PlainTableBloomV1;
166 using ROCKSDB_NAMESPACE::Random32;
167 using ROCKSDB_NAMESPACE::Slice;
168 using ROCKSDB_NAMESPACE::static_cast_with_check;
169 using ROCKSDB_NAMESPACE::Status;
170 using ROCKSDB_NAMESPACE::StderrLogger;
171 using ROCKSDB_NAMESPACE::mock::MockBlockBasedTableTester;
172
173 struct KeyMaker {
174 KeyMaker(size_t avg_size)
175 : smallest_size_(avg_size -
176 (FLAGS_vary_key_size_log2_interval >= 30 ? 2 : 0)),
177 buf_size_(avg_size + 11), // pad to vary key size and alignment
178 buf_(new char[buf_size_]) {
179 memset(buf_.get(), 0, buf_size_);
180 assert(smallest_size_ > 8);
181 }
182 size_t smallest_size_;
183 size_t buf_size_;
184 std::unique_ptr<char[]> buf_;
185
186 // Returns a unique(-ish) key based on the given parameter values. Each
187 // call returns a Slice from the same buffer so previously returned
188 // Slices should be considered invalidated.
189 Slice Get(uint32_t filter_num, uint32_t val_num) {
190 size_t start = FLAGS_vary_key_alignment ? val_num % 4 : 0;
191 size_t len = smallest_size_;
192 if (FLAGS_vary_key_size_log2_interval < 30) {
193 // To get range [avg_size - 2, avg_size + 2]
194 // use range [smallest_size, smallest_size + 4]
195 len += FastRange32(
196 (val_num >> FLAGS_vary_key_size_log2_interval) * 1234567891, 5);
197 }
198 char *data = buf_.get() + start;
199 // Populate key data such that all data makes it into a key of at
200 // least 8 bytes. We also don't want all the within-filter key
201 // variance confined to a contiguous 32 bits, because then a 32 bit
202 // hash function can "cheat" the false positive rate by
203 // approximating a perfect hash.
204 EncodeFixed32(data, val_num);
205 EncodeFixed32(data + 4, filter_num + val_num);
206 // ensure clearing leftovers from different alignment
207 EncodeFixed32(data + 8, 0);
208 return Slice(data, len);
209 }
210 };
211
212 void PrintWarnings() {
213 #if defined(__GNUC__) && !defined(__OPTIMIZE__)
214 fprintf(stdout,
215 "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
216 #endif
217 #ifndef NDEBUG
218 fprintf(stdout,
219 "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
220 #endif
221 }
222
223 void PrintError(const char *error) { fprintf(stderr, "ERROR: %s\n", error); }
224
225 struct FilterInfo {
226 uint32_t filter_id_ = 0;
227 std::unique_ptr<const char[]> owner_;
228 Slice filter_;
229 Status filter_construction_status = Status::OK();
230 uint32_t keys_added_ = 0;
231 std::unique_ptr<FilterBitsReader> reader_;
232 std::unique_ptr<FullFilterBlockReader> full_block_reader_;
233 std::unique_ptr<PlainTableBloomV1> plain_table_bloom_;
234 uint64_t outside_queries_ = 0;
235 uint64_t false_positives_ = 0;
236 };
237
238 enum TestMode {
239 kSingleFilter,
240 kBatchPrepared,
241 kBatchUnprepared,
242 kFiftyOneFilter,
243 kEightyTwentyFilter,
244 kRandomFilter,
245 };
246
247 static const std::vector<TestMode> allTestModes = {
248 kSingleFilter, kBatchPrepared, kBatchUnprepared,
249 kFiftyOneFilter, kEightyTwentyFilter, kRandomFilter,
250 };
251
252 static const std::vector<TestMode> quickTestModes = {
253 kSingleFilter,
254 kRandomFilter,
255 };
256
257 static const std::vector<TestMode> bestCaseTestModes = {
258 kSingleFilter,
259 };
260
261 const char *TestModeToString(TestMode tm) {
262 switch (tm) {
263 case kSingleFilter:
264 return "Single filter";
265 case kBatchPrepared:
266 return "Batched, prepared";
267 case kBatchUnprepared:
268 return "Batched, unprepared";
269 case kFiftyOneFilter:
270 return "Skewed 50% in 1%";
271 case kEightyTwentyFilter:
272 return "Skewed 80% in 20%";
273 case kRandomFilter:
274 return "Random filter";
275 }
276 return "Bad TestMode";
277 }
278
279 // Do just enough to keep some data dependence for the
280 // compiler / CPU
281 static uint32_t DryRunNoHash(Slice &s) {
282 uint32_t sz = static_cast<uint32_t>(s.size());
283 if (sz >= 4) {
284 return sz + s.data()[3];
285 } else {
286 return sz;
287 }
288 }
289
290 static uint32_t DryRunHash32(Slice &s) {
291 // Same perf characteristics as GetSliceHash()
292 return BloomHash(s);
293 }
294
295 static uint32_t DryRunHash64(Slice &s) {
296 return Lower32of64(GetSliceHash64(s));
297 }
298
299 const std::shared_ptr<const FilterPolicy> &GetPolicy() {
300 static std::shared_ptr<const FilterPolicy> policy;
301 if (!policy) {
302 policy = BloomLikeFilterPolicy::Create(
303 BloomLikeFilterPolicy::GetAllFixedImpls().at(FLAGS_impl),
304 FLAGS_bits_per_key);
305 }
306 return policy;
307 }
308
309 struct FilterBench : public MockBlockBasedTableTester {
310 std::vector<KeyMaker> kms_;
311 std::vector<FilterInfo> infos_;
312 Random32 random_;
313 std::ostringstream fp_rate_report_;
314 Arena arena_;
315 double m_queries_;
316 StderrLogger stderr_logger_;
317
318 FilterBench()
319 : MockBlockBasedTableTester(GetPolicy()),
320 random_(FLAGS_seed),
321 m_queries_(0) {
322 for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {
323 kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size);
324 }
325 ioptions_.logger = &stderr_logger_;
326 table_options_.optimize_filters_for_memory =
327 FLAGS_optimize_filters_for_memory;
328 table_options_.detect_filter_construct_corruption =
329 FLAGS_detect_filter_construct_corruption;
330 table_options_.cache_usage_options.options_overrides.insert(
331 {CacheEntryRole::kFilterConstruction,
332 {/*.charged = */ FLAGS_charge_filter_construction
333 ? CacheEntryRoleOptions::Decision::kEnabled
334 : CacheEntryRoleOptions::Decision::kDisabled}});
335 if (FLAGS_charge_filter_construction) {
336 table_options_.no_block_cache = false;
337 LRUCacheOptions lo;
338 lo.capacity = FLAGS_block_cache_capacity_MB * 1024 * 1024;
339 lo.num_shard_bits = 0; // 2^0 shard
340 lo.strict_capacity_limit = FLAGS_strict_capacity_limit;
341 std::shared_ptr<Cache> cache(NewLRUCache(lo));
342 table_options_.block_cache = cache;
343 }
344 }
345
346 void Go();
347
348 double RandomQueryTest(uint32_t inside_threshold, bool dry_run,
349 TestMode mode);
350 };
351
352 void FilterBench::Go() {
353 if (FLAGS_use_plain_table_bloom && FLAGS_use_full_block_reader) {
354 throw std::runtime_error(
355 "Can't combine -use_plain_table_bloom and -use_full_block_reader");
356 }
357 if (FLAGS_use_plain_table_bloom) {
358 if (FLAGS_impl > 1) {
359 throw std::runtime_error(
360 "-impl must currently be >= 0 and <= 1 for Plain table");
361 }
362 } else {
363 if (FLAGS_impl > 2) {
364 throw std::runtime_error(
365 "-impl must currently be >= 0 and <= 2 for Block-based table");
366 }
367 }
368
369 if (FLAGS_vary_key_count_ratio < 0.0 || FLAGS_vary_key_count_ratio > 1.0) {
370 throw std::runtime_error("-vary_key_count_ratio must be >= 0.0 and <= 1.0");
371 }
372
373 // For example, average_keys_per_filter = 100, vary_key_count_ratio = 0.1.
374 // Varys up to +/- 10 keys. variance_range = 21 (generating value 0..20).
375 // variance_offset = 10, so value - offset average value is always 0.
376 const uint32_t variance_range =
377 1 + 2 * static_cast<uint32_t>(FLAGS_vary_key_count_ratio *
378 FLAGS_average_keys_per_filter);
379 const uint32_t variance_offset = variance_range / 2;
380
381 const std::vector<TestMode> &testModes = FLAGS_best_case ? bestCaseTestModes
382 : FLAGS_quick ? quickTestModes
383 : allTestModes;
384
385 m_queries_ = FLAGS_m_queries;
386 double working_mem_size_mb = FLAGS_working_mem_size_mb;
387 if (FLAGS_quick) {
388 m_queries_ /= 7.0;
389 } else if (FLAGS_best_case) {
390 m_queries_ /= 3.0;
391 working_mem_size_mb /= 10.0;
392 }
393
394 std::cout << "Building..." << std::endl;
395
396 std::unique_ptr<BuiltinFilterBitsBuilder> builder;
397
398 size_t total_memory_used = 0;
399 size_t total_size = 0;
400 size_t total_keys_added = 0;
401 #ifdef PREDICT_FP_RATE
402 double weighted_predicted_fp_rate = 0.0;
403 #endif
404 size_t max_total_keys;
405 size_t max_mem;
406 if (FLAGS_m_keys_total_max > 0) {
407 max_total_keys = static_cast<size_t>(1000000 * FLAGS_m_keys_total_max);
408 max_mem = SIZE_MAX;
409 } else {
410 max_total_keys = SIZE_MAX;
411 max_mem = static_cast<size_t>(1024 * 1024 * working_mem_size_mb);
412 }
413
414 ROCKSDB_NAMESPACE::StopWatchNano timer(
415 ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
416
417 infos_.clear();
418 while ((working_mem_size_mb == 0 || total_size < max_mem) &&
419 total_keys_added < max_total_keys) {
420 uint32_t filter_id = random_.Next();
421 uint32_t keys_to_add = FLAGS_average_keys_per_filter +
422 FastRange32(random_.Next(), variance_range) -
423 variance_offset;
424 if (max_total_keys - total_keys_added < keys_to_add) {
425 keys_to_add = static_cast<uint32_t>(max_total_keys - total_keys_added);
426 }
427 infos_.emplace_back();
428 FilterInfo &info = infos_.back();
429 info.filter_id_ = filter_id;
430 info.keys_added_ = keys_to_add;
431 if (FLAGS_use_plain_table_bloom) {
432 info.plain_table_bloom_.reset(new PlainTableBloomV1());
433 info.plain_table_bloom_->SetTotalBits(
434 &arena_, static_cast<uint32_t>(keys_to_add * FLAGS_bits_per_key),
435 FLAGS_impl, 0 /*huge_page*/, nullptr /*logger*/);
436 for (uint32_t i = 0; i < keys_to_add; ++i) {
437 uint32_t hash = GetSliceHash(kms_[0].Get(filter_id, i));
438 info.plain_table_bloom_->AddHash(hash);
439 }
440 info.filter_ = info.plain_table_bloom_->GetRawData();
441 } else {
442 if (!builder) {
443 builder.reset(
444 static_cast_with_check<BuiltinFilterBitsBuilder>(GetBuilder()));
445 }
446 for (uint32_t i = 0; i < keys_to_add; ++i) {
447 builder->AddKey(kms_[0].Get(filter_id, i));
448 }
449 info.filter_ =
450 builder->Finish(&info.owner_, &info.filter_construction_status);
451 if (info.filter_construction_status.ok()) {
452 info.filter_construction_status =
453 builder->MaybePostVerify(info.filter_);
454 }
455 if (!info.filter_construction_status.ok()) {
456 PrintError(info.filter_construction_status.ToString().c_str());
457 }
458 #ifdef PREDICT_FP_RATE
459 weighted_predicted_fp_rate +=
460 keys_to_add *
461 builder->EstimatedFpRate(keys_to_add, info.filter_.size());
462 #endif
463 if (FLAGS_new_builder) {
464 builder.reset();
465 }
466 info.reader_.reset(
467 table_options_.filter_policy->GetFilterBitsReader(info.filter_));
468 CachableEntry<ParsedFullFilterBlock> block(
469 new ParsedFullFilterBlock(table_options_.filter_policy.get(),
470 BlockContents(info.filter_)),
471 nullptr /* cache */, nullptr /* cache_handle */,
472 true /* own_value */);
473 info.full_block_reader_.reset(
474 new FullFilterBlockReader(table_.get(), std::move(block)));
475 }
476 total_size += info.filter_.size();
477 #ifdef ROCKSDB_MALLOC_USABLE_SIZE
478 total_memory_used +=
479 malloc_usable_size(const_cast<char *>(info.filter_.data()));
480 #endif // ROCKSDB_MALLOC_USABLE_SIZE
481 total_keys_added += keys_to_add;
482 }
483
484 uint64_t elapsed_nanos = timer.ElapsedNanos();
485 double ns = double(elapsed_nanos) / total_keys_added;
486 std::cout << "Build avg ns/key: " << ns << std::endl;
487 std::cout << "Number of filters: " << infos_.size() << std::endl;
488 std::cout << "Total size (MB): " << total_size / 1024.0 / 1024.0 << std::endl;
489 if (total_memory_used > 0) {
490 std::cout << "Reported total allocated memory (MB): "
491 << total_memory_used / 1024.0 / 1024.0 << std::endl;
492 std::cout << "Reported internal fragmentation: "
493 << (total_memory_used - total_size) * 100.0 / total_size << "%"
494 << std::endl;
495 }
496
497 double bpk = total_size * 8.0 / total_keys_added;
498 std::cout << "Bits/key stored: " << bpk << std::endl;
499 #ifdef PREDICT_FP_RATE
500 std::cout << "Predicted FP rate %: "
501 << 100.0 * (weighted_predicted_fp_rate / total_keys_added)
502 << std::endl;
503 #endif
504 if (!FLAGS_quick && !FLAGS_best_case) {
505 double tolerable_rate = std::pow(2.0, -(bpk - 1.0) / (1.4 + bpk / 50.0));
506 std::cout << "Best possible FP rate %: " << 100.0 * std::pow(2.0, -bpk)
507 << std::endl;
508 std::cout << "Tolerable FP rate %: " << 100.0 * tolerable_rate << std::endl;
509
510 std::cout << "----------------------------" << std::endl;
511 std::cout << "Verifying..." << std::endl;
512
513 uint32_t outside_q_per_f =
514 static_cast<uint32_t>(m_queries_ * 1000000 / infos_.size());
515 uint64_t fps = 0;
516 for (uint32_t i = 0; i < infos_.size(); ++i) {
517 FilterInfo &info = infos_[i];
518 for (uint32_t j = 0; j < info.keys_added_; ++j) {
519 if (FLAGS_use_plain_table_bloom) {
520 uint32_t hash = GetSliceHash(kms_[0].Get(info.filter_id_, j));
521 ALWAYS_ASSERT(info.plain_table_bloom_->MayContainHash(hash));
522 } else {
523 ALWAYS_ASSERT(
524 info.reader_->MayMatch(kms_[0].Get(info.filter_id_, j)));
525 }
526 }
527 for (uint32_t j = 0; j < outside_q_per_f; ++j) {
528 if (FLAGS_use_plain_table_bloom) {
529 uint32_t hash =
530 GetSliceHash(kms_[0].Get(info.filter_id_, j | 0x80000000));
531 fps += info.plain_table_bloom_->MayContainHash(hash);
532 } else {
533 fps += info.reader_->MayMatch(
534 kms_[0].Get(info.filter_id_, j | 0x80000000));
535 }
536 }
537 }
538 std::cout << " No FNs :)" << std::endl;
539 double prelim_rate = double(fps) / outside_q_per_f / infos_.size();
540 std::cout << " Prelim FP rate %: " << (100.0 * prelim_rate) << std::endl;
541
542 if (!FLAGS_allow_bad_fp_rate) {
543 ALWAYS_ASSERT(prelim_rate < tolerable_rate);
544 }
545 }
546
547 std::cout << "----------------------------" << std::endl;
548 std::cout << "Mixed inside/outside queries..." << std::endl;
549 // 50% each inside and outside
550 uint32_t inside_threshold = UINT32_MAX / 2;
551 for (TestMode tm : testModes) {
552 random_.Seed(FLAGS_seed + 1);
553 double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
554 random_.Seed(FLAGS_seed + 1);
555 double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
556 std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
557 << std::endl;
558 }
559
560 if (!FLAGS_quick) {
561 std::cout << "----------------------------" << std::endl;
562 std::cout << "Inside queries (mostly)..." << std::endl;
563 // Do about 95% inside queries rather than 100% so that branch predictor
564 // can't give itself an artifically crazy advantage.
565 inside_threshold = UINT32_MAX / 20 * 19;
566 for (TestMode tm : testModes) {
567 random_.Seed(FLAGS_seed + 1);
568 double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
569 random_.Seed(FLAGS_seed + 1);
570 double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
571 std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
572 << std::endl;
573 }
574
575 std::cout << "----------------------------" << std::endl;
576 std::cout << "Outside queries (mostly)..." << std::endl;
577 // Do about 95% outside queries rather than 100% so that branch predictor
578 // can't give itself an artifically crazy advantage.
579 inside_threshold = UINT32_MAX / 20;
580 for (TestMode tm : testModes) {
581 random_.Seed(FLAGS_seed + 2);
582 double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
583 random_.Seed(FLAGS_seed + 2);
584 double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
585 std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
586 << std::endl;
587 }
588 }
589 std::cout << fp_rate_report_.str();
590
591 std::cout << "----------------------------" << std::endl;
592 std::cout << "Done. (For more info, run with -legend or -help.)" << std::endl;
593 }
594
595 double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
596 TestMode mode) {
597 for (auto &info : infos_) {
598 info.outside_queries_ = 0;
599 info.false_positives_ = 0;
600 }
601
602 auto dry_run_hash_fn = DryRunNoHash;
603 if (!FLAGS_net_includes_hashing) {
604 if (FLAGS_impl == 0 || FLAGS_use_plain_table_bloom) {
605 dry_run_hash_fn = DryRunHash32;
606 } else {
607 dry_run_hash_fn = DryRunHash64;
608 }
609 }
610
611 uint32_t num_infos = static_cast<uint32_t>(infos_.size());
612 uint32_t dry_run_hash = 0;
613 uint64_t max_queries = static_cast<uint64_t>(m_queries_ * 1000000 + 0.50);
614 // Some filters may be considered secondary in order to implement skewed
615 // queries. num_primary_filters is the number that are to be treated as
616 // equal, and any remainder will be treated as secondary.
617 uint32_t num_primary_filters = num_infos;
618 // The proportion (when divided by 2^32 - 1) of filter queries going to
619 // the primary filters (default = all). The remainder of queries are
620 // against secondary filters.
621 uint32_t primary_filter_threshold = 0xffffffff;
622 if (mode == kSingleFilter) {
623 // 100% of queries to 1 filter
624 num_primary_filters = 1;
625 } else if (mode == kFiftyOneFilter) {
626 if (num_infos < 50) {
627 return 0.0; // skip
628 }
629 // 50% of queries
630 primary_filter_threshold /= 2;
631 // to 1% of filters
632 num_primary_filters = (num_primary_filters + 99) / 100;
633 } else if (mode == kEightyTwentyFilter) {
634 if (num_infos < 5) {
635 return 0.0; // skip
636 }
637 // 80% of queries
638 primary_filter_threshold = primary_filter_threshold / 5 * 4;
639 // to 20% of filters
640 num_primary_filters = (num_primary_filters + 4) / 5;
641 } else if (mode == kRandomFilter) {
642 if (num_infos == 1) {
643 return 0.0; // skip
644 }
645 }
646 uint32_t batch_size = 1;
647 std::unique_ptr<Slice[]> batch_slices;
648 std::unique_ptr<Slice *[]> batch_slice_ptrs;
649 std::unique_ptr<bool[]> batch_results;
650 if (mode == kBatchPrepared || mode == kBatchUnprepared) {
651 batch_size = static_cast<uint32_t>(kms_.size());
652 }
653
654 batch_slices.reset(new Slice[batch_size]);
655 batch_slice_ptrs.reset(new Slice *[batch_size]);
656 batch_results.reset(new bool[batch_size]);
657 for (uint32_t i = 0; i < batch_size; ++i) {
658 batch_results[i] = false;
659 batch_slice_ptrs[i] = &batch_slices[i];
660 }
661
662 ROCKSDB_NAMESPACE::StopWatchNano timer(
663 ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
664
665 for (uint64_t q = 0; q < max_queries; q += batch_size) {
666 bool inside_this_time = random_.Next() <= inside_threshold;
667
668 uint32_t filter_index;
669 if (random_.Next() <= primary_filter_threshold) {
670 filter_index = random_.Uniformish(num_primary_filters);
671 } else {
672 // secondary
673 filter_index = num_primary_filters +
674 random_.Uniformish(num_infos - num_primary_filters);
675 }
676 FilterInfo &info = infos_[filter_index];
677 for (uint32_t i = 0; i < batch_size; ++i) {
678 if (inside_this_time) {
679 batch_slices[i] =
680 kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_));
681 } else {
682 batch_slices[i] =
683 kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_) |
684 uint32_t{0x80000000});
685 info.outside_queries_++;
686 }
687 }
688 // TODO: implement batched interface to full block reader
689 // TODO: implement batched interface to plain table bloom
690 if (mode == kBatchPrepared && !FLAGS_use_full_block_reader &&
691 !FLAGS_use_plain_table_bloom) {
692 for (uint32_t i = 0; i < batch_size; ++i) {
693 batch_results[i] = false;
694 }
695 if (dry_run) {
696 for (uint32_t i = 0; i < batch_size; ++i) {
697 batch_results[i] = true;
698 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
699 }
700 } else {
701 info.reader_->MayMatch(batch_size, batch_slice_ptrs.get(),
702 batch_results.get());
703 }
704 for (uint32_t i = 0; i < batch_size; ++i) {
705 if (inside_this_time) {
706 ALWAYS_ASSERT(batch_results[i]);
707 } else {
708 info.false_positives_ += batch_results[i];
709 }
710 }
711 } else {
712 for (uint32_t i = 0; i < batch_size; ++i) {
713 bool may_match;
714 if (FLAGS_use_plain_table_bloom) {
715 if (dry_run) {
716 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
717 may_match = true;
718 } else {
719 uint32_t hash = GetSliceHash(batch_slices[i]);
720 may_match = info.plain_table_bloom_->MayContainHash(hash);
721 }
722 } else if (FLAGS_use_full_block_reader) {
723 if (dry_run) {
724 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
725 may_match = true;
726 } else {
727 may_match = info.full_block_reader_->KeyMayMatch(
728 batch_slices[i],
729 /*no_io=*/false, /*const_ikey_ptr=*/nullptr,
730 /*get_context=*/nullptr,
731 /*lookup_context=*/nullptr, Env::IO_TOTAL);
732 }
733 } else {
734 if (dry_run) {
735 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
736 may_match = true;
737 } else {
738 may_match = info.reader_->MayMatch(batch_slices[i]);
739 }
740 }
741 if (inside_this_time) {
742 ALWAYS_ASSERT(may_match);
743 } else {
744 info.false_positives_ += may_match;
745 }
746 }
747 }
748 }
749
750 uint64_t elapsed_nanos = timer.ElapsedNanos();
751 double ns = double(elapsed_nanos) / max_queries;
752
753 if (!FLAGS_quick) {
754 if (dry_run) {
755 // Printing part of hash prevents dry run components from being optimized
756 // away by compiler
757 std::cout << " Dry run (" << std::hex << (dry_run_hash & 0xfffff)
758 << std::dec << ") ";
759 } else {
760 std::cout << " Gross filter ";
761 }
762 std::cout << "ns/op: " << ns << std::endl;
763 }
764
765 if (!dry_run) {
766 fp_rate_report_.str("");
767 uint64_t q = 0;
768 uint64_t fp = 0;
769 double worst_fp_rate = 0.0;
770 double best_fp_rate = 1.0;
771 for (auto &info : infos_) {
772 q += info.outside_queries_;
773 fp += info.false_positives_;
774 if (info.outside_queries_ > 0) {
775 double fp_rate = double(info.false_positives_) / info.outside_queries_;
776 worst_fp_rate = std::max(worst_fp_rate, fp_rate);
777 best_fp_rate = std::min(best_fp_rate, fp_rate);
778 }
779 }
780 fp_rate_report_ << " Average FP rate %: " << 100.0 * fp / q << std::endl;
781 if (!FLAGS_quick && !FLAGS_best_case) {
782 fp_rate_report_ << " Worst FP rate %: " << 100.0 * worst_fp_rate
783 << std::endl;
784 fp_rate_report_ << " Best FP rate %: " << 100.0 * best_fp_rate
785 << std::endl;
786 fp_rate_report_ << " Best possible bits/key: "
787 << -std::log(double(fp) / q) / std::log(2.0) << std::endl;
788 }
789 }
790 return ns;
791 }
792
793 int main(int argc, char **argv) {
794 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
795 SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
796 " [-quick] [OTHER OPTIONS]...");
797 ParseCommandLineFlags(&argc, &argv, true);
798
799 PrintWarnings();
800
801 if (FLAGS_legend) {
802 std::cout
803 << "Legend:" << std::endl
804 << " \"Inside\" - key that was added to filter" << std::endl
805 << " \"Outside\" - key that was not added to filter" << std::endl
806 << " \"FN\" - false negative query (must not happen)" << std::endl
807 << " \"FP\" - false positive query (OK at low rate)" << std::endl
808 << " \"Dry run\" - cost of testing and hashing overhead." << std::endl
809 << " \"Gross filter\" - cost of filter queries including testing "
810 << "\n and hashing overhead." << std::endl
811 << " \"net\" - best estimate of time in filter operation, without "
812 << "\n testing and hashing overhead (gross filter - dry run)"
813 << std::endl
814 << " \"ns/op\" - nanoseconds per operation (key query or add)"
815 << std::endl
816 << " \"Single filter\" - essentially minimum cost, assuming filter"
817 << "\n fits easily in L1 CPU cache." << std::endl
818 << " \"Batched, prepared\" - several queries at once against a"
819 << "\n randomly chosen filter, using multi-query interface."
820 << std::endl
821 << " \"Batched, unprepared\" - similar, but using serial calls"
822 << "\n to single query interface." << std::endl
823 << " \"Random filter\" - a filter is chosen at random as target"
824 << "\n of each query." << std::endl
825 << " \"Skewed X% in Y%\" - like \"Random filter\" except Y% of"
826 << "\n the filters are designated as \"hot\" and receive X%"
827 << "\n of queries." << std::endl;
828 } else {
829 FilterBench b;
830 for (uint32_t i = 0; i < FLAGS_runs; ++i) {
831 b.Go();
832 FLAGS_seed += 100;
833 b.random_.Seed(FLAGS_seed);
834 }
835 }
836
837 return 0;
838 }
839
840 #endif // !defined(GFLAGS) || defined(ROCKSDB_LITE)