]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/util/filter_bench.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / util / filter_bench.cc
CommitLineData
f67539c2
TL
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5
6#if !defined(GFLAGS) || defined(ROCKSDB_LITE)
7#include <cstdio>
8int main() {
9 fprintf(stderr, "filter_bench requires gflags and !ROCKSDB_LITE\n");
10 return 1;
11}
12#else
13
14#include <cinttypes>
15#include <iostream>
16#include <sstream>
17#include <vector>
18
19#include "memory/arena.h"
20#include "port/port.h"
21#include "port/stack_trace.h"
1e59de90
TL
22#include "rocksdb/cache.h"
23#include "rocksdb/env.h"
24#include "rocksdb/system_clock.h"
25#include "rocksdb/table.h"
f67539c2
TL
26#include "table/block_based/filter_policy_internal.h"
27#include "table/block_based/full_filter_block.h"
28#include "table/block_based/mock_block_based_table.h"
29#include "table/plain/plain_table_bloom.h"
20effc67 30#include "util/cast_util.h"
f67539c2
TL
31#include "util/gflags_compat.h"
32#include "util/hash.h"
33#include "util/random.h"
34#include "util/stderr_logger.h"
35#include "util/stop_watch.h"
1e59de90 36#include "util/string_util.h"
f67539c2
TL
37
38using GFLAGS_NAMESPACE::ParseCommandLineFlags;
39using GFLAGS_NAMESPACE::RegisterFlagValidator;
40using GFLAGS_NAMESPACE::SetUsageMessage;
41
42DEFINE_uint32(seed, 0, "Seed for random number generators");
43
44DEFINE_double(working_mem_size_mb, 200,
45 "MB of memory to get up to among all filters, unless "
46 "m_keys_total_max is specified.");
47
48DEFINE_uint32(average_keys_per_filter, 10000,
49 "Average number of keys per filter");
50
51DEFINE_double(vary_key_count_ratio, 0.4,
52 "Vary number of keys by up to +/- vary_key_count_ratio * "
53 "average_keys_per_filter.");
54
55DEFINE_uint32(key_size, 24, "Average number of bytes for each key");
56
57DEFINE_bool(vary_key_alignment, true,
58 "Whether to vary key alignment (default: at least 32-bit "
59 "alignment)");
60
61DEFINE_uint32(vary_key_size_log2_interval, 5,
62 "Use same key size 2^n times, then change. Key size varies from "
63 "-2 to +2 bytes vs. average, unless n>=30 to fix key size.");
64
65DEFINE_uint32(batch_size, 8, "Number of keys to group in each batch");
66
67DEFINE_double(bits_per_key, 10.0, "Bits per key setting for filters");
68
69DEFINE_double(m_queries, 200, "Millions of queries for each test mode");
70
71DEFINE_double(m_keys_total_max, 0,
72 "Maximum total keys added to filters, in millions. "
73 "0 (default) disables. Non-zero overrides working_mem_size_mb "
74 "option.");
75
76DEFINE_bool(use_full_block_reader, false,
77 "Use FullFilterBlockReader interface rather than FilterBitsReader");
78
79DEFINE_bool(use_plain_table_bloom, false,
80 "Use PlainTableBloom structure and interface rather than "
81 "FilterBitsReader/FullFilterBlockReader");
82
83DEFINE_bool(new_builder, false,
84 "Whether to create a new builder for each new filter");
85
86DEFINE_uint32(impl, 0,
87 "Select filter implementation. Without -use_plain_table_bloom:"
1e59de90
TL
88 "0 = legacy full Bloom filter, "
89 "1 = format_version 5 Bloom filter, 2 = Ribbon128 filter. With "
f67539c2
TL
90 "-use_plain_table_bloom: 0 = no locality, 1 = locality.");
91
92DEFINE_bool(net_includes_hashing, false,
93 "Whether query net ns/op times should include hashing. "
94 "(if not, dry run will include hashing) "
95 "(build times always include hashing)");
96
20effc67
TL
97DEFINE_bool(optimize_filters_for_memory, false,
98 "Setting for BlockBasedTableOptions::optimize_filters_for_memory");
99
1e59de90
TL
100DEFINE_bool(detect_filter_construct_corruption, false,
101 "Setting for "
102 "BlockBasedTableOptions::detect_filter_construct_corruption");
103
104DEFINE_uint32(block_cache_capacity_MB, 8,
105 "Setting for "
106 "LRUCacheOptions::capacity");
107
108DEFINE_bool(charge_filter_construction, false,
109 "Setting for "
110 "CacheEntryRoleOptions::charged of"
111 "CacheEntryRole::kFilterConstruction");
112
113DEFINE_bool(strict_capacity_limit, false,
114 "Setting for "
115 "LRUCacheOptions::strict_capacity_limit");
116
f67539c2
TL
117DEFINE_bool(quick, false, "Run more limited set of tests, fewer queries");
118
119DEFINE_bool(best_case, false, "Run limited tests only for best-case");
120
121DEFINE_bool(allow_bad_fp_rate, false, "Continue even if FP rate is bad");
122
123DEFINE_bool(legend, false,
124 "Print more information about interpreting results instead of "
125 "running tests");
126
127DEFINE_uint32(runs, 1, "Number of times to rebuild and run benchmark tests");
128
129void _always_assert_fail(int line, const char *file, const char *expr) {
130 fprintf(stderr, "%s: %d: Assertion %s failed\n", file, line, expr);
131 abort();
132}
133
134#define ALWAYS_ASSERT(cond) \
135 ((cond) ? (void)0 : ::_always_assert_fail(__LINE__, __FILE__, #cond))
136
137#ifndef NDEBUG
138// This could affect build times enough that we should not include it for
139// accurate speed tests
140#define PREDICT_FP_RATE
141#endif
142
143using ROCKSDB_NAMESPACE::Arena;
144using ROCKSDB_NAMESPACE::BlockContents;
145using ROCKSDB_NAMESPACE::BloomFilterPolicy;
146using ROCKSDB_NAMESPACE::BloomHash;
1e59de90 147using ROCKSDB_NAMESPACE::BloomLikeFilterPolicy;
f67539c2
TL
148using ROCKSDB_NAMESPACE::BuiltinFilterBitsBuilder;
149using ROCKSDB_NAMESPACE::CachableEntry;
1e59de90
TL
150using ROCKSDB_NAMESPACE::Cache;
151using ROCKSDB_NAMESPACE::CacheEntryRole;
152using ROCKSDB_NAMESPACE::CacheEntryRoleOptions;
f67539c2 153using ROCKSDB_NAMESPACE::EncodeFixed32;
1e59de90 154using ROCKSDB_NAMESPACE::Env;
20effc67 155using ROCKSDB_NAMESPACE::FastRange32;
f67539c2
TL
156using ROCKSDB_NAMESPACE::FilterBitsReader;
157using ROCKSDB_NAMESPACE::FilterBuildingContext;
1e59de90 158using ROCKSDB_NAMESPACE::FilterPolicy;
f67539c2
TL
159using ROCKSDB_NAMESPACE::FullFilterBlockReader;
160using ROCKSDB_NAMESPACE::GetSliceHash;
161using ROCKSDB_NAMESPACE::GetSliceHash64;
162using ROCKSDB_NAMESPACE::Lower32of64;
1e59de90 163using ROCKSDB_NAMESPACE::LRUCacheOptions;
f67539c2
TL
164using ROCKSDB_NAMESPACE::ParsedFullFilterBlock;
165using ROCKSDB_NAMESPACE::PlainTableBloomV1;
166using ROCKSDB_NAMESPACE::Random32;
167using ROCKSDB_NAMESPACE::Slice;
20effc67 168using ROCKSDB_NAMESPACE::static_cast_with_check;
1e59de90 169using ROCKSDB_NAMESPACE::Status;
f67539c2
TL
170using ROCKSDB_NAMESPACE::StderrLogger;
171using ROCKSDB_NAMESPACE::mock::MockBlockBasedTableTester;
172
173struct KeyMaker {
174 KeyMaker(size_t avg_size)
175 : smallest_size_(avg_size -
176 (FLAGS_vary_key_size_log2_interval >= 30 ? 2 : 0)),
177 buf_size_(avg_size + 11), // pad to vary key size and alignment
178 buf_(new char[buf_size_]) {
179 memset(buf_.get(), 0, buf_size_);
180 assert(smallest_size_ > 8);
181 }
182 size_t smallest_size_;
183 size_t buf_size_;
184 std::unique_ptr<char[]> buf_;
185
186 // Returns a unique(-ish) key based on the given parameter values. Each
187 // call returns a Slice from the same buffer so previously returned
188 // Slices should be considered invalidated.
189 Slice Get(uint32_t filter_num, uint32_t val_num) {
190 size_t start = FLAGS_vary_key_alignment ? val_num % 4 : 0;
191 size_t len = smallest_size_;
192 if (FLAGS_vary_key_size_log2_interval < 30) {
193 // To get range [avg_size - 2, avg_size + 2]
194 // use range [smallest_size, smallest_size + 4]
20effc67 195 len += FastRange32(
f67539c2
TL
196 (val_num >> FLAGS_vary_key_size_log2_interval) * 1234567891, 5);
197 }
1e59de90 198 char *data = buf_.get() + start;
f67539c2
TL
199 // Populate key data such that all data makes it into a key of at
200 // least 8 bytes. We also don't want all the within-filter key
201 // variance confined to a contiguous 32 bits, because then a 32 bit
202 // hash function can "cheat" the false positive rate by
203 // approximating a perfect hash.
204 EncodeFixed32(data, val_num);
205 EncodeFixed32(data + 4, filter_num + val_num);
206 // ensure clearing leftovers from different alignment
207 EncodeFixed32(data + 8, 0);
208 return Slice(data, len);
209 }
210};
211
212void PrintWarnings() {
213#if defined(__GNUC__) && !defined(__OPTIMIZE__)
214 fprintf(stdout,
215 "WARNING: Optimization is disabled: benchmarks unnecessarily slow\n");
216#endif
217#ifndef NDEBUG
218 fprintf(stdout,
219 "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
220#endif
221}
222
1e59de90
TL
223void PrintError(const char *error) { fprintf(stderr, "ERROR: %s\n", error); }
224
f67539c2
TL
225struct FilterInfo {
226 uint32_t filter_id_ = 0;
227 std::unique_ptr<const char[]> owner_;
228 Slice filter_;
1e59de90 229 Status filter_construction_status = Status::OK();
f67539c2
TL
230 uint32_t keys_added_ = 0;
231 std::unique_ptr<FilterBitsReader> reader_;
232 std::unique_ptr<FullFilterBlockReader> full_block_reader_;
233 std::unique_ptr<PlainTableBloomV1> plain_table_bloom_;
234 uint64_t outside_queries_ = 0;
235 uint64_t false_positives_ = 0;
236};
237
238enum TestMode {
239 kSingleFilter,
240 kBatchPrepared,
241 kBatchUnprepared,
242 kFiftyOneFilter,
243 kEightyTwentyFilter,
244 kRandomFilter,
245};
246
247static const std::vector<TestMode> allTestModes = {
248 kSingleFilter, kBatchPrepared, kBatchUnprepared,
249 kFiftyOneFilter, kEightyTwentyFilter, kRandomFilter,
250};
251
252static const std::vector<TestMode> quickTestModes = {
253 kSingleFilter,
254 kRandomFilter,
255};
256
257static const std::vector<TestMode> bestCaseTestModes = {
258 kSingleFilter,
259};
260
261const char *TestModeToString(TestMode tm) {
262 switch (tm) {
263 case kSingleFilter:
264 return "Single filter";
265 case kBatchPrepared:
266 return "Batched, prepared";
267 case kBatchUnprepared:
268 return "Batched, unprepared";
269 case kFiftyOneFilter:
270 return "Skewed 50% in 1%";
271 case kEightyTwentyFilter:
272 return "Skewed 80% in 20%";
273 case kRandomFilter:
274 return "Random filter";
275 }
276 return "Bad TestMode";
277}
278
279// Do just enough to keep some data dependence for the
280// compiler / CPU
281static uint32_t DryRunNoHash(Slice &s) {
282 uint32_t sz = static_cast<uint32_t>(s.size());
283 if (sz >= 4) {
284 return sz + s.data()[3];
285 } else {
286 return sz;
287 }
288}
289
290static uint32_t DryRunHash32(Slice &s) {
291 // Same perf characteristics as GetSliceHash()
292 return BloomHash(s);
293}
294
295static uint32_t DryRunHash64(Slice &s) {
296 return Lower32of64(GetSliceHash64(s));
297}
298
1e59de90
TL
299const std::shared_ptr<const FilterPolicy> &GetPolicy() {
300 static std::shared_ptr<const FilterPolicy> policy;
301 if (!policy) {
302 policy = BloomLikeFilterPolicy::Create(
303 BloomLikeFilterPolicy::GetAllFixedImpls().at(FLAGS_impl),
304 FLAGS_bits_per_key);
305 }
306 return policy;
307}
308
f67539c2
TL
309struct FilterBench : public MockBlockBasedTableTester {
310 std::vector<KeyMaker> kms_;
311 std::vector<FilterInfo> infos_;
312 Random32 random_;
313 std::ostringstream fp_rate_report_;
314 Arena arena_;
f67539c2 315 double m_queries_;
1e59de90 316 StderrLogger stderr_logger_;
f67539c2
TL
317
318 FilterBench()
1e59de90 319 : MockBlockBasedTableTester(GetPolicy()),
f67539c2
TL
320 random_(FLAGS_seed),
321 m_queries_(0) {
322 for (uint32_t i = 0; i < FLAGS_batch_size; ++i) {
323 kms_.emplace_back(FLAGS_key_size < 8 ? 8 : FLAGS_key_size);
324 }
1e59de90 325 ioptions_.logger = &stderr_logger_;
20effc67
TL
326 table_options_.optimize_filters_for_memory =
327 FLAGS_optimize_filters_for_memory;
1e59de90
TL
328 table_options_.detect_filter_construct_corruption =
329 FLAGS_detect_filter_construct_corruption;
330 table_options_.cache_usage_options.options_overrides.insert(
331 {CacheEntryRole::kFilterConstruction,
332 {/*.charged = */ FLAGS_charge_filter_construction
333 ? CacheEntryRoleOptions::Decision::kEnabled
334 : CacheEntryRoleOptions::Decision::kDisabled}});
335 if (FLAGS_charge_filter_construction) {
336 table_options_.no_block_cache = false;
337 LRUCacheOptions lo;
338 lo.capacity = FLAGS_block_cache_capacity_MB * 1024 * 1024;
339 lo.num_shard_bits = 0; // 2^0 shard
340 lo.strict_capacity_limit = FLAGS_strict_capacity_limit;
341 std::shared_ptr<Cache> cache(NewLRUCache(lo));
342 table_options_.block_cache = cache;
343 }
f67539c2
TL
344 }
345
346 void Go();
347
348 double RandomQueryTest(uint32_t inside_threshold, bool dry_run,
349 TestMode mode);
350};
351
352void FilterBench::Go() {
353 if (FLAGS_use_plain_table_bloom && FLAGS_use_full_block_reader) {
354 throw std::runtime_error(
355 "Can't combine -use_plain_table_bloom and -use_full_block_reader");
356 }
357 if (FLAGS_use_plain_table_bloom) {
358 if (FLAGS_impl > 1) {
359 throw std::runtime_error(
360 "-impl must currently be >= 0 and <= 1 for Plain table");
361 }
362 } else {
1e59de90 363 if (FLAGS_impl > 2) {
f67539c2 364 throw std::runtime_error(
1e59de90 365 "-impl must currently be >= 0 and <= 2 for Block-based table");
f67539c2
TL
366 }
367 }
368
369 if (FLAGS_vary_key_count_ratio < 0.0 || FLAGS_vary_key_count_ratio > 1.0) {
370 throw std::runtime_error("-vary_key_count_ratio must be >= 0.0 and <= 1.0");
371 }
372
373 // For example, average_keys_per_filter = 100, vary_key_count_ratio = 0.1.
374 // Varys up to +/- 10 keys. variance_range = 21 (generating value 0..20).
375 // variance_offset = 10, so value - offset average value is always 0.
376 const uint32_t variance_range =
377 1 + 2 * static_cast<uint32_t>(FLAGS_vary_key_count_ratio *
378 FLAGS_average_keys_per_filter);
379 const uint32_t variance_offset = variance_range / 2;
380
1e59de90
TL
381 const std::vector<TestMode> &testModes = FLAGS_best_case ? bestCaseTestModes
382 : FLAGS_quick ? quickTestModes
383 : allTestModes;
f67539c2
TL
384
385 m_queries_ = FLAGS_m_queries;
386 double working_mem_size_mb = FLAGS_working_mem_size_mb;
387 if (FLAGS_quick) {
388 m_queries_ /= 7.0;
389 } else if (FLAGS_best_case) {
390 m_queries_ /= 3.0;
391 working_mem_size_mb /= 10.0;
392 }
393
394 std::cout << "Building..." << std::endl;
395
396 std::unique_ptr<BuiltinFilterBitsBuilder> builder;
397
398 size_t total_memory_used = 0;
20effc67 399 size_t total_size = 0;
f67539c2
TL
400 size_t total_keys_added = 0;
401#ifdef PREDICT_FP_RATE
402 double weighted_predicted_fp_rate = 0.0;
403#endif
404 size_t max_total_keys;
405 size_t max_mem;
406 if (FLAGS_m_keys_total_max > 0) {
407 max_total_keys = static_cast<size_t>(1000000 * FLAGS_m_keys_total_max);
408 max_mem = SIZE_MAX;
409 } else {
410 max_total_keys = SIZE_MAX;
411 max_mem = static_cast<size_t>(1024 * 1024 * working_mem_size_mb);
412 }
413
1e59de90
TL
414 ROCKSDB_NAMESPACE::StopWatchNano timer(
415 ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
f67539c2
TL
416
417 infos_.clear();
20effc67 418 while ((working_mem_size_mb == 0 || total_size < max_mem) &&
f67539c2
TL
419 total_keys_added < max_total_keys) {
420 uint32_t filter_id = random_.Next();
421 uint32_t keys_to_add = FLAGS_average_keys_per_filter +
20effc67 422 FastRange32(random_.Next(), variance_range) -
f67539c2
TL
423 variance_offset;
424 if (max_total_keys - total_keys_added < keys_to_add) {
425 keys_to_add = static_cast<uint32_t>(max_total_keys - total_keys_added);
426 }
427 infos_.emplace_back();
428 FilterInfo &info = infos_.back();
429 info.filter_id_ = filter_id;
430 info.keys_added_ = keys_to_add;
431 if (FLAGS_use_plain_table_bloom) {
432 info.plain_table_bloom_.reset(new PlainTableBloomV1());
433 info.plain_table_bloom_->SetTotalBits(
434 &arena_, static_cast<uint32_t>(keys_to_add * FLAGS_bits_per_key),
435 FLAGS_impl, 0 /*huge_page*/, nullptr /*logger*/);
436 for (uint32_t i = 0; i < keys_to_add; ++i) {
437 uint32_t hash = GetSliceHash(kms_[0].Get(filter_id, i));
438 info.plain_table_bloom_->AddHash(hash);
439 }
440 info.filter_ = info.plain_table_bloom_->GetRawData();
441 } else {
442 if (!builder) {
20effc67
TL
443 builder.reset(
444 static_cast_with_check<BuiltinFilterBitsBuilder>(GetBuilder()));
f67539c2
TL
445 }
446 for (uint32_t i = 0; i < keys_to_add; ++i) {
447 builder->AddKey(kms_[0].Get(filter_id, i));
448 }
1e59de90
TL
449 info.filter_ =
450 builder->Finish(&info.owner_, &info.filter_construction_status);
451 if (info.filter_construction_status.ok()) {
452 info.filter_construction_status =
453 builder->MaybePostVerify(info.filter_);
454 }
455 if (!info.filter_construction_status.ok()) {
456 PrintError(info.filter_construction_status.ToString().c_str());
457 }
f67539c2
TL
458#ifdef PREDICT_FP_RATE
459 weighted_predicted_fp_rate +=
460 keys_to_add *
461 builder->EstimatedFpRate(keys_to_add, info.filter_.size());
462#endif
463 if (FLAGS_new_builder) {
464 builder.reset();
465 }
466 info.reader_.reset(
467 table_options_.filter_policy->GetFilterBitsReader(info.filter_));
468 CachableEntry<ParsedFullFilterBlock> block(
469 new ParsedFullFilterBlock(table_options_.filter_policy.get(),
470 BlockContents(info.filter_)),
471 nullptr /* cache */, nullptr /* cache_handle */,
472 true /* own_value */);
473 info.full_block_reader_.reset(
474 new FullFilterBlockReader(table_.get(), std::move(block)));
475 }
20effc67
TL
476 total_size += info.filter_.size();
477#ifdef ROCKSDB_MALLOC_USABLE_SIZE
478 total_memory_used +=
479 malloc_usable_size(const_cast<char *>(info.filter_.data()));
480#endif // ROCKSDB_MALLOC_USABLE_SIZE
f67539c2
TL
481 total_keys_added += keys_to_add;
482 }
483
484 uint64_t elapsed_nanos = timer.ElapsedNanos();
485 double ns = double(elapsed_nanos) / total_keys_added;
486 std::cout << "Build avg ns/key: " << ns << std::endl;
487 std::cout << "Number of filters: " << infos_.size() << std::endl;
20effc67
TL
488 std::cout << "Total size (MB): " << total_size / 1024.0 / 1024.0 << std::endl;
489 if (total_memory_used > 0) {
490 std::cout << "Reported total allocated memory (MB): "
491 << total_memory_used / 1024.0 / 1024.0 << std::endl;
492 std::cout << "Reported internal fragmentation: "
493 << (total_memory_used - total_size) * 100.0 / total_size << "%"
494 << std::endl;
495 }
f67539c2 496
20effc67
TL
497 double bpk = total_size * 8.0 / total_keys_added;
498 std::cout << "Bits/key stored: " << bpk << std::endl;
f67539c2
TL
499#ifdef PREDICT_FP_RATE
500 std::cout << "Predicted FP rate %: "
501 << 100.0 * (weighted_predicted_fp_rate / total_keys_added)
502 << std::endl;
503#endif
504 if (!FLAGS_quick && !FLAGS_best_case) {
505 double tolerable_rate = std::pow(2.0, -(bpk - 1.0) / (1.4 + bpk / 50.0));
506 std::cout << "Best possible FP rate %: " << 100.0 * std::pow(2.0, -bpk)
507 << std::endl;
508 std::cout << "Tolerable FP rate %: " << 100.0 * tolerable_rate << std::endl;
509
510 std::cout << "----------------------------" << std::endl;
511 std::cout << "Verifying..." << std::endl;
512
513 uint32_t outside_q_per_f =
514 static_cast<uint32_t>(m_queries_ * 1000000 / infos_.size());
515 uint64_t fps = 0;
516 for (uint32_t i = 0; i < infos_.size(); ++i) {
517 FilterInfo &info = infos_[i];
518 for (uint32_t j = 0; j < info.keys_added_; ++j) {
519 if (FLAGS_use_plain_table_bloom) {
520 uint32_t hash = GetSliceHash(kms_[0].Get(info.filter_id_, j));
521 ALWAYS_ASSERT(info.plain_table_bloom_->MayContainHash(hash));
522 } else {
523 ALWAYS_ASSERT(
524 info.reader_->MayMatch(kms_[0].Get(info.filter_id_, j)));
525 }
526 }
527 for (uint32_t j = 0; j < outside_q_per_f; ++j) {
528 if (FLAGS_use_plain_table_bloom) {
529 uint32_t hash =
530 GetSliceHash(kms_[0].Get(info.filter_id_, j | 0x80000000));
531 fps += info.plain_table_bloom_->MayContainHash(hash);
532 } else {
533 fps += info.reader_->MayMatch(
534 kms_[0].Get(info.filter_id_, j | 0x80000000));
535 }
536 }
537 }
538 std::cout << " No FNs :)" << std::endl;
539 double prelim_rate = double(fps) / outside_q_per_f / infos_.size();
540 std::cout << " Prelim FP rate %: " << (100.0 * prelim_rate) << std::endl;
541
542 if (!FLAGS_allow_bad_fp_rate) {
543 ALWAYS_ASSERT(prelim_rate < tolerable_rate);
544 }
545 }
546
547 std::cout << "----------------------------" << std::endl;
548 std::cout << "Mixed inside/outside queries..." << std::endl;
549 // 50% each inside and outside
550 uint32_t inside_threshold = UINT32_MAX / 2;
551 for (TestMode tm : testModes) {
552 random_.Seed(FLAGS_seed + 1);
553 double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
554 random_.Seed(FLAGS_seed + 1);
555 double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
556 std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
557 << std::endl;
558 }
559
560 if (!FLAGS_quick) {
561 std::cout << "----------------------------" << std::endl;
562 std::cout << "Inside queries (mostly)..." << std::endl;
563 // Do about 95% inside queries rather than 100% so that branch predictor
564 // can't give itself an artifically crazy advantage.
565 inside_threshold = UINT32_MAX / 20 * 19;
566 for (TestMode tm : testModes) {
567 random_.Seed(FLAGS_seed + 1);
568 double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
569 random_.Seed(FLAGS_seed + 1);
570 double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
571 std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
572 << std::endl;
573 }
574
575 std::cout << "----------------------------" << std::endl;
576 std::cout << "Outside queries (mostly)..." << std::endl;
577 // Do about 95% outside queries rather than 100% so that branch predictor
578 // can't give itself an artifically crazy advantage.
579 inside_threshold = UINT32_MAX / 20;
580 for (TestMode tm : testModes) {
581 random_.Seed(FLAGS_seed + 2);
582 double f = RandomQueryTest(inside_threshold, /*dry_run*/ false, tm);
583 random_.Seed(FLAGS_seed + 2);
584 double d = RandomQueryTest(inside_threshold, /*dry_run*/ true, tm);
585 std::cout << " " << TestModeToString(tm) << " net ns/op: " << (f - d)
586 << std::endl;
587 }
588 }
589 std::cout << fp_rate_report_.str();
590
591 std::cout << "----------------------------" << std::endl;
592 std::cout << "Done. (For more info, run with -legend or -help.)" << std::endl;
593}
594
595double FilterBench::RandomQueryTest(uint32_t inside_threshold, bool dry_run,
596 TestMode mode) {
597 for (auto &info : infos_) {
598 info.outside_queries_ = 0;
599 info.false_positives_ = 0;
600 }
601
602 auto dry_run_hash_fn = DryRunNoHash;
603 if (!FLAGS_net_includes_hashing) {
1e59de90 604 if (FLAGS_impl == 0 || FLAGS_use_plain_table_bloom) {
f67539c2
TL
605 dry_run_hash_fn = DryRunHash32;
606 } else {
607 dry_run_hash_fn = DryRunHash64;
608 }
609 }
610
611 uint32_t num_infos = static_cast<uint32_t>(infos_.size());
612 uint32_t dry_run_hash = 0;
613 uint64_t max_queries = static_cast<uint64_t>(m_queries_ * 1000000 + 0.50);
614 // Some filters may be considered secondary in order to implement skewed
615 // queries. num_primary_filters is the number that are to be treated as
616 // equal, and any remainder will be treated as secondary.
617 uint32_t num_primary_filters = num_infos;
618 // The proportion (when divided by 2^32 - 1) of filter queries going to
619 // the primary filters (default = all). The remainder of queries are
620 // against secondary filters.
621 uint32_t primary_filter_threshold = 0xffffffff;
622 if (mode == kSingleFilter) {
623 // 100% of queries to 1 filter
624 num_primary_filters = 1;
625 } else if (mode == kFiftyOneFilter) {
1e59de90
TL
626 if (num_infos < 50) {
627 return 0.0; // skip
628 }
f67539c2
TL
629 // 50% of queries
630 primary_filter_threshold /= 2;
631 // to 1% of filters
632 num_primary_filters = (num_primary_filters + 99) / 100;
633 } else if (mode == kEightyTwentyFilter) {
1e59de90
TL
634 if (num_infos < 5) {
635 return 0.0; // skip
636 }
f67539c2
TL
637 // 80% of queries
638 primary_filter_threshold = primary_filter_threshold / 5 * 4;
639 // to 20% of filters
640 num_primary_filters = (num_primary_filters + 4) / 5;
1e59de90
TL
641 } else if (mode == kRandomFilter) {
642 if (num_infos == 1) {
643 return 0.0; // skip
644 }
f67539c2
TL
645 }
646 uint32_t batch_size = 1;
647 std::unique_ptr<Slice[]> batch_slices;
648 std::unique_ptr<Slice *[]> batch_slice_ptrs;
649 std::unique_ptr<bool[]> batch_results;
650 if (mode == kBatchPrepared || mode == kBatchUnprepared) {
651 batch_size = static_cast<uint32_t>(kms_.size());
652 }
653
654 batch_slices.reset(new Slice[batch_size]);
655 batch_slice_ptrs.reset(new Slice *[batch_size]);
656 batch_results.reset(new bool[batch_size]);
657 for (uint32_t i = 0; i < batch_size; ++i) {
658 batch_results[i] = false;
659 batch_slice_ptrs[i] = &batch_slices[i];
660 }
661
1e59de90
TL
662 ROCKSDB_NAMESPACE::StopWatchNano timer(
663 ROCKSDB_NAMESPACE::SystemClock::Default().get(), true);
f67539c2
TL
664
665 for (uint64_t q = 0; q < max_queries; q += batch_size) {
666 bool inside_this_time = random_.Next() <= inside_threshold;
667
668 uint32_t filter_index;
669 if (random_.Next() <= primary_filter_threshold) {
670 filter_index = random_.Uniformish(num_primary_filters);
671 } else {
672 // secondary
673 filter_index = num_primary_filters +
674 random_.Uniformish(num_infos - num_primary_filters);
675 }
676 FilterInfo &info = infos_[filter_index];
677 for (uint32_t i = 0; i < batch_size; ++i) {
678 if (inside_this_time) {
679 batch_slices[i] =
680 kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_));
681 } else {
682 batch_slices[i] =
683 kms_[i].Get(info.filter_id_, random_.Uniformish(info.keys_added_) |
684 uint32_t{0x80000000});
685 info.outside_queries_++;
686 }
687 }
688 // TODO: implement batched interface to full block reader
689 // TODO: implement batched interface to plain table bloom
690 if (mode == kBatchPrepared && !FLAGS_use_full_block_reader &&
691 !FLAGS_use_plain_table_bloom) {
692 for (uint32_t i = 0; i < batch_size; ++i) {
693 batch_results[i] = false;
694 }
695 if (dry_run) {
696 for (uint32_t i = 0; i < batch_size; ++i) {
697 batch_results[i] = true;
698 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
699 }
700 } else {
701 info.reader_->MayMatch(batch_size, batch_slice_ptrs.get(),
702 batch_results.get());
703 }
704 for (uint32_t i = 0; i < batch_size; ++i) {
705 if (inside_this_time) {
706 ALWAYS_ASSERT(batch_results[i]);
707 } else {
708 info.false_positives_ += batch_results[i];
709 }
710 }
711 } else {
712 for (uint32_t i = 0; i < batch_size; ++i) {
713 bool may_match;
714 if (FLAGS_use_plain_table_bloom) {
715 if (dry_run) {
716 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
717 may_match = true;
718 } else {
719 uint32_t hash = GetSliceHash(batch_slices[i]);
720 may_match = info.plain_table_bloom_->MayContainHash(hash);
721 }
722 } else if (FLAGS_use_full_block_reader) {
723 if (dry_run) {
724 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
725 may_match = true;
726 } else {
727 may_match = info.full_block_reader_->KeyMayMatch(
728 batch_slices[i],
f67539c2
TL
729 /*no_io=*/false, /*const_ikey_ptr=*/nullptr,
730 /*get_context=*/nullptr,
1e59de90 731 /*lookup_context=*/nullptr, Env::IO_TOTAL);
f67539c2
TL
732 }
733 } else {
734 if (dry_run) {
735 dry_run_hash += dry_run_hash_fn(batch_slices[i]);
736 may_match = true;
737 } else {
738 may_match = info.reader_->MayMatch(batch_slices[i]);
739 }
740 }
741 if (inside_this_time) {
742 ALWAYS_ASSERT(may_match);
743 } else {
744 info.false_positives_ += may_match;
745 }
746 }
747 }
748 }
749
750 uint64_t elapsed_nanos = timer.ElapsedNanos();
751 double ns = double(elapsed_nanos) / max_queries;
752
753 if (!FLAGS_quick) {
754 if (dry_run) {
755 // Printing part of hash prevents dry run components from being optimized
756 // away by compiler
757 std::cout << " Dry run (" << std::hex << (dry_run_hash & 0xfffff)
758 << std::dec << ") ";
759 } else {
760 std::cout << " Gross filter ";
761 }
762 std::cout << "ns/op: " << ns << std::endl;
763 }
764
765 if (!dry_run) {
766 fp_rate_report_.str("");
767 uint64_t q = 0;
768 uint64_t fp = 0;
769 double worst_fp_rate = 0.0;
770 double best_fp_rate = 1.0;
771 for (auto &info : infos_) {
772 q += info.outside_queries_;
773 fp += info.false_positives_;
774 if (info.outside_queries_ > 0) {
775 double fp_rate = double(info.false_positives_) / info.outside_queries_;
776 worst_fp_rate = std::max(worst_fp_rate, fp_rate);
777 best_fp_rate = std::min(best_fp_rate, fp_rate);
778 }
779 }
780 fp_rate_report_ << " Average FP rate %: " << 100.0 * fp / q << std::endl;
781 if (!FLAGS_quick && !FLAGS_best_case) {
782 fp_rate_report_ << " Worst FP rate %: " << 100.0 * worst_fp_rate
783 << std::endl;
784 fp_rate_report_ << " Best FP rate %: " << 100.0 * best_fp_rate
785 << std::endl;
786 fp_rate_report_ << " Best possible bits/key: "
787 << -std::log(double(fp) / q) / std::log(2.0) << std::endl;
788 }
789 }
790 return ns;
791}
792
793int main(int argc, char **argv) {
794 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
795 SetUsageMessage(std::string("\nUSAGE:\n") + std::string(argv[0]) +
796 " [-quick] [OTHER OPTIONS]...");
797 ParseCommandLineFlags(&argc, &argv, true);
798
799 PrintWarnings();
800
801 if (FLAGS_legend) {
802 std::cout
803 << "Legend:" << std::endl
804 << " \"Inside\" - key that was added to filter" << std::endl
805 << " \"Outside\" - key that was not added to filter" << std::endl
806 << " \"FN\" - false negative query (must not happen)" << std::endl
807 << " \"FP\" - false positive query (OK at low rate)" << std::endl
808 << " \"Dry run\" - cost of testing and hashing overhead." << std::endl
809 << " \"Gross filter\" - cost of filter queries including testing "
810 << "\n and hashing overhead." << std::endl
811 << " \"net\" - best estimate of time in filter operation, without "
812 << "\n testing and hashing overhead (gross filter - dry run)"
813 << std::endl
814 << " \"ns/op\" - nanoseconds per operation (key query or add)"
815 << std::endl
816 << " \"Single filter\" - essentially minimum cost, assuming filter"
817 << "\n fits easily in L1 CPU cache." << std::endl
818 << " \"Batched, prepared\" - several queries at once against a"
819 << "\n randomly chosen filter, using multi-query interface."
820 << std::endl
821 << " \"Batched, unprepared\" - similar, but using serial calls"
822 << "\n to single query interface." << std::endl
823 << " \"Random filter\" - a filter is chosen at random as target"
824 << "\n of each query." << std::endl
825 << " \"Skewed X% in Y%\" - like \"Random filter\" except Y% of"
826 << "\n the filters are designated as \"hot\" and receive X%"
827 << "\n of queries." << std::endl;
828 } else {
829 FilterBench b;
830 for (uint32_t i = 0; i < FLAGS_runs; ++i) {
831 b.Go();
832 FLAGS_seed += 100;
833 b.random_.Seed(FLAGS_seed);
834 }
835 }
836
837 return 0;
838}
839
840#endif // !defined(GFLAGS) || defined(ROCKSDB_LITE)