]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/db_block_cache_test.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / rocksdb / db / db_block_cache_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9#include <cstdlib>
1e59de90
TL
10#include <functional>
11#include <memory>
12#include <unordered_set>
20effc67 13
1e59de90
TL
14#include "cache/cache_entry_roles.h"
15#include "cache/cache_key.h"
7c673cae 16#include "cache/lru_cache.h"
1e59de90
TL
17#include "db/column_family.h"
18#include "db/db_impl/db_impl.h"
7c673cae 19#include "db/db_test_util.h"
1e59de90 20#include "env/unique_id_gen.h"
7c673cae 21#include "port/stack_trace.h"
1e59de90
TL
22#include "rocksdb/persistent_cache.h"
23#include "rocksdb/statistics.h"
24#include "rocksdb/table.h"
25#include "rocksdb/table_properties.h"
26#include "table/block_based/block_based_table_reader.h"
27#include "table/unique_id_impl.h"
f67539c2 28#include "util/compression.h"
1e59de90
TL
29#include "util/defer.h"
30#include "util/hash.h"
31#include "util/math.h"
20effc67 32#include "util/random.h"
1e59de90 33#include "utilities/fault_injection_fs.h"
7c673cae 34
f67539c2 35namespace ROCKSDB_NAMESPACE {
7c673cae
FG
36
37class DBBlockCacheTest : public DBTestBase {
38 private:
39 size_t miss_count_ = 0;
40 size_t hit_count_ = 0;
41 size_t insert_count_ = 0;
42 size_t failure_count_ = 0;
f67539c2
TL
43 size_t compression_dict_miss_count_ = 0;
44 size_t compression_dict_hit_count_ = 0;
45 size_t compression_dict_insert_count_ = 0;
7c673cae
FG
46 size_t compressed_miss_count_ = 0;
47 size_t compressed_hit_count_ = 0;
48 size_t compressed_insert_count_ = 0;
49 size_t compressed_failure_count_ = 0;
50
51 public:
52 const size_t kNumBlocks = 10;
53 const size_t kValueSize = 100;
54
20effc67 55 DBBlockCacheTest()
1e59de90 56 : DBTestBase("db_block_cache_test", /*env_do_fsync=*/true) {}
7c673cae
FG
57
58 BlockBasedTableOptions GetTableOptions() {
59 BlockBasedTableOptions table_options;
60 // Set a small enough block size so that each key-value get its own block.
61 table_options.block_size = 1;
62 return table_options;
63 }
64
65 Options GetOptions(const BlockBasedTableOptions& table_options) {
66 Options options = CurrentOptions();
67 options.create_if_missing = true;
68 options.avoid_flush_during_recovery = false;
69 // options.compression = kNoCompression;
f67539c2 70 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
20effc67 71 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae
FG
72 return options;
73 }
74
11fdf7f2 75 void InitTable(const Options& /*options*/) {
7c673cae
FG
76 std::string value(kValueSize, 'a');
77 for (size_t i = 0; i < kNumBlocks; i++) {
1e59de90 78 ASSERT_OK(Put(std::to_string(i), value.c_str()));
7c673cae
FG
79 }
80 }
81
82 void RecordCacheCounters(const Options& options) {
83 miss_count_ = TestGetTickerCount(options, BLOCK_CACHE_MISS);
84 hit_count_ = TestGetTickerCount(options, BLOCK_CACHE_HIT);
85 insert_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD);
86 failure_count_ = TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
87 compressed_miss_count_ =
88 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
89 compressed_hit_count_ =
90 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
91 compressed_insert_count_ =
92 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
93 compressed_failure_count_ =
94 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
95 }
96
f67539c2
TL
97 void RecordCacheCountersForCompressionDict(const Options& options) {
98 compression_dict_miss_count_ =
99 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
100 compression_dict_hit_count_ =
101 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
102 compression_dict_insert_count_ =
103 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
104 }
105
7c673cae
FG
106 void CheckCacheCounters(const Options& options, size_t expected_misses,
107 size_t expected_hits, size_t expected_inserts,
108 size_t expected_failures) {
109 size_t new_miss_count = TestGetTickerCount(options, BLOCK_CACHE_MISS);
110 size_t new_hit_count = TestGetTickerCount(options, BLOCK_CACHE_HIT);
111 size_t new_insert_count = TestGetTickerCount(options, BLOCK_CACHE_ADD);
112 size_t new_failure_count =
113 TestGetTickerCount(options, BLOCK_CACHE_ADD_FAILURES);
114 ASSERT_EQ(miss_count_ + expected_misses, new_miss_count);
115 ASSERT_EQ(hit_count_ + expected_hits, new_hit_count);
116 ASSERT_EQ(insert_count_ + expected_inserts, new_insert_count);
117 ASSERT_EQ(failure_count_ + expected_failures, new_failure_count);
118 miss_count_ = new_miss_count;
119 hit_count_ = new_hit_count;
120 insert_count_ = new_insert_count;
121 failure_count_ = new_failure_count;
122 }
123
f67539c2
TL
124 void CheckCacheCountersForCompressionDict(
125 const Options& options, size_t expected_compression_dict_misses,
126 size_t expected_compression_dict_hits,
127 size_t expected_compression_dict_inserts) {
128 size_t new_compression_dict_miss_count =
129 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
130 size_t new_compression_dict_hit_count =
131 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_HIT);
132 size_t new_compression_dict_insert_count =
133 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_ADD);
134 ASSERT_EQ(compression_dict_miss_count_ + expected_compression_dict_misses,
135 new_compression_dict_miss_count);
136 ASSERT_EQ(compression_dict_hit_count_ + expected_compression_dict_hits,
137 new_compression_dict_hit_count);
138 ASSERT_EQ(
139 compression_dict_insert_count_ + expected_compression_dict_inserts,
140 new_compression_dict_insert_count);
141 compression_dict_miss_count_ = new_compression_dict_miss_count;
142 compression_dict_hit_count_ = new_compression_dict_hit_count;
143 compression_dict_insert_count_ = new_compression_dict_insert_count;
144 }
145
7c673cae
FG
146 void CheckCompressedCacheCounters(const Options& options,
147 size_t expected_misses,
148 size_t expected_hits,
149 size_t expected_inserts,
150 size_t expected_failures) {
151 size_t new_miss_count =
152 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS);
153 size_t new_hit_count =
154 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT);
155 size_t new_insert_count =
156 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD);
157 size_t new_failure_count =
158 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_ADD_FAILURES);
159 ASSERT_EQ(compressed_miss_count_ + expected_misses, new_miss_count);
160 ASSERT_EQ(compressed_hit_count_ + expected_hits, new_hit_count);
161 ASSERT_EQ(compressed_insert_count_ + expected_inserts, new_insert_count);
162 ASSERT_EQ(compressed_failure_count_ + expected_failures, new_failure_count);
163 compressed_miss_count_ = new_miss_count;
164 compressed_hit_count_ = new_hit_count;
165 compressed_insert_count_ = new_insert_count;
166 compressed_failure_count_ = new_failure_count;
167 }
1e59de90
TL
168
169#ifndef ROCKSDB_LITE
170 const std::array<size_t, kNumCacheEntryRoles> GetCacheEntryRoleCountsBg() {
171 // Verify in cache entry role stats
172 std::array<size_t, kNumCacheEntryRoles> cache_entry_role_counts;
173 std::map<std::string, std::string> values;
174 EXPECT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats,
175 &values));
176 for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
177 auto role = static_cast<CacheEntryRole>(i);
178 cache_entry_role_counts[i] =
179 ParseSizeT(values[BlockCacheEntryStatsMapKeys::EntryCount(role)]);
180 }
181 return cache_entry_role_counts;
182 }
183#endif // ROCKSDB_LITE
7c673cae
FG
184};
185
11fdf7f2
TL
186TEST_F(DBBlockCacheTest, IteratorBlockCacheUsage) {
187 ReadOptions read_options;
188 read_options.fill_cache = false;
189 auto table_options = GetTableOptions();
190 auto options = GetOptions(table_options);
191 InitTable(options);
192
1e59de90
TL
193 LRUCacheOptions co;
194 co.capacity = 0;
195 co.num_shard_bits = 0;
196 co.strict_capacity_limit = false;
197 // Needed not to count entry stats collector
198 co.metadata_charge_policy = kDontChargeCacheMetadata;
199 std::shared_ptr<Cache> cache = NewLRUCache(co);
11fdf7f2 200 table_options.block_cache = cache;
20effc67 201 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
11fdf7f2
TL
202 Reopen(options);
203 RecordCacheCounters(options);
204
205 std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
206 Iterator* iter = nullptr;
207
208 ASSERT_EQ(0, cache->GetUsage());
209 iter = db_->NewIterator(read_options);
1e59de90 210 iter->Seek(std::to_string(0));
11fdf7f2
TL
211 ASSERT_LT(0, cache->GetUsage());
212 delete iter;
213 iter = nullptr;
214 ASSERT_EQ(0, cache->GetUsage());
215}
216
7c673cae
FG
217TEST_F(DBBlockCacheTest, TestWithoutCompressedBlockCache) {
218 ReadOptions read_options;
219 auto table_options = GetTableOptions();
220 auto options = GetOptions(table_options);
221 InitTable(options);
222
1e59de90
TL
223 LRUCacheOptions co;
224 co.capacity = 0;
225 co.num_shard_bits = 0;
226 co.strict_capacity_limit = false;
227 // Needed not to count entry stats collector
228 co.metadata_charge_policy = kDontChargeCacheMetadata;
229 std::shared_ptr<Cache> cache = NewLRUCache(co);
7c673cae 230 table_options.block_cache = cache;
20effc67 231 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae
FG
232 Reopen(options);
233 RecordCacheCounters(options);
234
235 std::vector<std::unique_ptr<Iterator>> iterators(kNumBlocks - 1);
236 Iterator* iter = nullptr;
237
238 // Load blocks into cache.
20effc67 239 for (size_t i = 0; i + 1 < kNumBlocks; i++) {
7c673cae 240 iter = db_->NewIterator(read_options);
1e59de90 241 iter->Seek(std::to_string(i));
7c673cae
FG
242 ASSERT_OK(iter->status());
243 CheckCacheCounters(options, 1, 0, 1, 0);
244 iterators[i].reset(iter);
245 }
246 size_t usage = cache->GetUsage();
247 ASSERT_LT(0, usage);
248 cache->SetCapacity(usage);
249 ASSERT_EQ(usage, cache->GetPinnedUsage());
250
251 // Test with strict capacity limit.
252 cache->SetStrictCapacityLimit(true);
253 iter = db_->NewIterator(read_options);
1e59de90
TL
254 iter->Seek(std::to_string(kNumBlocks - 1));
255 ASSERT_TRUE(iter->status().IsMemoryLimit());
7c673cae
FG
256 CheckCacheCounters(options, 1, 0, 0, 1);
257 delete iter;
258 iter = nullptr;
259
494da23a 260 // Release iterators and access cache again.
20effc67 261 for (size_t i = 0; i + 1 < kNumBlocks; i++) {
7c673cae
FG
262 iterators[i].reset();
263 CheckCacheCounters(options, 0, 0, 0, 0);
264 }
265 ASSERT_EQ(0, cache->GetPinnedUsage());
20effc67 266 for (size_t i = 0; i + 1 < kNumBlocks; i++) {
7c673cae 267 iter = db_->NewIterator(read_options);
1e59de90 268 iter->Seek(std::to_string(i));
7c673cae
FG
269 ASSERT_OK(iter->status());
270 CheckCacheCounters(options, 0, 1, 0, 0);
271 iterators[i].reset(iter);
272 }
273}
274
275#ifdef SNAPPY
276TEST_F(DBBlockCacheTest, TestWithCompressedBlockCache) {
1e59de90
TL
277 Options options = CurrentOptions();
278 options.create_if_missing = true;
279 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
280
281 BlockBasedTableOptions table_options;
282 table_options.no_block_cache = true;
283 table_options.block_cache_compressed = nullptr;
284 table_options.block_size = 1;
285 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
286 table_options.cache_index_and_filter_blocks = false;
287 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae 288 options.compression = CompressionType::kSnappyCompression;
7c673cae 289
1e59de90
TL
290 DestroyAndReopen(options);
291
292 std::string value(kValueSize, 'a');
293 for (size_t i = 0; i < kNumBlocks; i++) {
294 ASSERT_OK(Put(std::to_string(i), value));
295 ASSERT_OK(Flush());
296 }
297
298 ReadOptions read_options;
7c673cae 299 std::shared_ptr<Cache> compressed_cache = NewLRUCache(1 << 25, 0, false);
1e59de90
TL
300 LRUCacheOptions co;
301 co.capacity = 0;
302 co.num_shard_bits = 0;
303 co.strict_capacity_limit = false;
304 // Needed not to count entry stats collector
305 co.metadata_charge_policy = kDontChargeCacheMetadata;
306 std::shared_ptr<Cache> cache = NewLRUCache(co);
7c673cae 307 table_options.block_cache = cache;
1e59de90 308 table_options.no_block_cache = false;
7c673cae 309 table_options.block_cache_compressed = compressed_cache;
1e59de90
TL
310 table_options.max_auto_readahead_size = 0;
311 table_options.cache_index_and_filter_blocks = false;
20effc67 312 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae
FG
313 Reopen(options);
314 RecordCacheCounters(options);
315
7c673cae 316 // Load blocks into cache.
1e59de90
TL
317 for (size_t i = 0; i < kNumBlocks - 1; i++) {
318 ASSERT_EQ(value, Get(std::to_string(i)));
7c673cae
FG
319 CheckCacheCounters(options, 1, 0, 1, 0);
320 CheckCompressedCacheCounters(options, 1, 0, 1, 0);
7c673cae 321 }
1e59de90 322
7c673cae 323 size_t usage = cache->GetUsage();
1e59de90 324 ASSERT_EQ(0, usage);
7c673cae
FG
325 ASSERT_EQ(usage, cache->GetPinnedUsage());
326 size_t compressed_usage = compressed_cache->GetUsage();
327 ASSERT_LT(0, compressed_usage);
328 // Compressed block cache cannot be pinned.
329 ASSERT_EQ(0, compressed_cache->GetPinnedUsage());
330
331 // Set strict capacity limit flag. Now block will only load into compressed
332 // block cache.
333 cache->SetCapacity(usage);
334 cache->SetStrictCapacityLimit(true);
335 ASSERT_EQ(usage, cache->GetPinnedUsage());
1e59de90
TL
336
337 // Load last key block.
338 ASSERT_EQ(
339 "Operation aborted: Memory limit reached: Insert failed due to LRU cache "
340 "being full.",
341 Get(std::to_string(kNumBlocks - 1)));
342 // Failure will also record the miss counter.
7c673cae
FG
343 CheckCacheCounters(options, 1, 0, 0, 1);
344 CheckCompressedCacheCounters(options, 1, 0, 1, 0);
7c673cae
FG
345
346 // Clear strict capacity limit flag. This time we shall hit compressed block
1e59de90 347 // cache and load into block cache.
7c673cae 348 cache->SetStrictCapacityLimit(false);
1e59de90
TL
349 // Load last key block.
350 ASSERT_EQ(value, Get(std::to_string(kNumBlocks - 1)));
7c673cae
FG
351 CheckCacheCounters(options, 1, 0, 1, 0);
352 CheckCompressedCacheCounters(options, 0, 1, 0, 0);
1e59de90
TL
353}
354
355namespace {
356class PersistentCacheFromCache : public PersistentCache {
357 public:
358 PersistentCacheFromCache(std::shared_ptr<Cache> cache, bool read_only)
359 : cache_(cache), read_only_(read_only) {}
360
361 Status Insert(const Slice& key, const char* data,
362 const size_t size) override {
363 if (read_only_) {
364 return Status::NotSupported();
365 }
366 std::unique_ptr<char[]> copy{new char[size]};
367 std::copy_n(data, size, copy.get());
368 Status s = cache_->Insert(
369 key, copy.get(), size,
370 GetCacheEntryDeleterForRole<char[], CacheEntryRole::kMisc>());
371 if (s.ok()) {
372 copy.release();
373 }
374 return s;
375 }
376
377 Status Lookup(const Slice& key, std::unique_ptr<char[]>* data,
378 size_t* size) override {
379 auto handle = cache_->Lookup(key);
380 if (handle) {
381 char* ptr = static_cast<char*>(cache_->Value(handle));
382 *size = cache_->GetCharge(handle);
383 data->reset(new char[*size]);
384 std::copy_n(ptr, *size, data->get());
385 cache_->Release(handle);
386 return Status::OK();
387 } else {
388 return Status::NotFound();
389 }
390 }
391
392 bool IsCompressed() override { return false; }
393
394 StatsType Stats() override { return StatsType(); }
395
396 std::string GetPrintableOptions() const override { return ""; }
397
398 uint64_t NewId() override { return cache_->NewId(); }
399
400 private:
401 std::shared_ptr<Cache> cache_;
402 bool read_only_;
403};
404
405class ReadOnlyCacheWrapper : public CacheWrapper {
406 using CacheWrapper::CacheWrapper;
407
408 using Cache::Insert;
409 Status Insert(const Slice& /*key*/, void* /*value*/, size_t /*charge*/,
410 void (*)(const Slice& key, void* value) /*deleter*/,
411 Handle** /*handle*/, Priority /*priority*/) override {
412 return Status::NotSupported();
413 }
414};
415
416} // anonymous namespace
417
418TEST_F(DBBlockCacheTest, TestWithSameCompressed) {
419 auto table_options = GetTableOptions();
420 auto options = GetOptions(table_options);
421 InitTable(options);
422
423 std::shared_ptr<Cache> rw_cache{NewLRUCache(1000000)};
424 std::shared_ptr<PersistentCacheFromCache> rw_pcache{
425 new PersistentCacheFromCache(rw_cache, /*read_only*/ false)};
426 // Exercise some obscure behavior with read-only wrappers
427 std::shared_ptr<Cache> ro_cache{new ReadOnlyCacheWrapper(rw_cache)};
428 std::shared_ptr<PersistentCacheFromCache> ro_pcache{
429 new PersistentCacheFromCache(rw_cache, /*read_only*/ true)};
430
431 // Simple same pointer
432 table_options.block_cache = rw_cache;
433 table_options.block_cache_compressed = rw_cache;
434 table_options.persistent_cache.reset();
435 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
436 ASSERT_EQ(TryReopen(options).ToString(),
437 "Invalid argument: block_cache same as block_cache_compressed not "
438 "currently supported, and would be bad for performance anyway");
439
440 // Other cases
441 table_options.block_cache = ro_cache;
442 table_options.block_cache_compressed = rw_cache;
443 table_options.persistent_cache.reset();
444 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
445 ASSERT_EQ(TryReopen(options).ToString(),
446 "Invalid argument: block_cache and block_cache_compressed share "
447 "the same key space, which is not supported");
448
449 table_options.block_cache = rw_cache;
450 table_options.block_cache_compressed = ro_cache;
451 table_options.persistent_cache.reset();
452 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
453 ASSERT_EQ(TryReopen(options).ToString(),
454 "Invalid argument: block_cache_compressed and block_cache share "
455 "the same key space, which is not supported");
456
457 table_options.block_cache = ro_cache;
458 table_options.block_cache_compressed.reset();
459 table_options.persistent_cache = rw_pcache;
460 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
461 ASSERT_EQ(TryReopen(options).ToString(),
462 "Invalid argument: block_cache and persistent_cache share the same "
463 "key space, which is not supported");
464
465 table_options.block_cache = rw_cache;
466 table_options.block_cache_compressed.reset();
467 table_options.persistent_cache = ro_pcache;
468 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
469 ASSERT_EQ(TryReopen(options).ToString(),
470 "Invalid argument: persistent_cache and block_cache share the same "
471 "key space, which is not supported");
472
473 table_options.block_cache.reset();
474 table_options.no_block_cache = true;
475 table_options.block_cache_compressed = ro_cache;
476 table_options.persistent_cache = rw_pcache;
477 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
478 ASSERT_EQ(TryReopen(options).ToString(),
479 "Invalid argument: block_cache_compressed and persistent_cache "
480 "share the same key space, which is not supported");
481
482 table_options.block_cache.reset();
483 table_options.no_block_cache = true;
484 table_options.block_cache_compressed = rw_cache;
485 table_options.persistent_cache = ro_pcache;
486 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
487 ASSERT_EQ(TryReopen(options).ToString(),
488 "Invalid argument: persistent_cache and block_cache_compressed "
489 "share the same key space, which is not supported");
7c673cae
FG
490}
491#endif // SNAPPY
492
493#ifndef ROCKSDB_LITE
494
495// Make sure that when options.block_cache is set, after a new table is
496// created its index/filter blocks are added to block cache.
497TEST_F(DBBlockCacheTest, IndexAndFilterBlocksOfNewTableAddedToCache) {
498 Options options = CurrentOptions();
499 options.create_if_missing = true;
f67539c2 500 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
501 BlockBasedTableOptions table_options;
502 table_options.cache_index_and_filter_blocks = true;
503 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
20effc67 504 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae
FG
505 CreateAndReopenWithCF({"pikachu"}, options);
506
507 ASSERT_OK(Put(1, "key", "val"));
508 // Create a new table.
509 ASSERT_OK(Flush(1));
510
511 // index/filter blocks added to block cache right after table creation.
512 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
513 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
514 ASSERT_EQ(2, /* only index/filter were added */
515 TestGetTickerCount(options, BLOCK_CACHE_ADD));
516 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
517 uint64_t int_num;
518 ASSERT_TRUE(
519 dbfull()->GetIntProperty("rocksdb.estimate-table-readers-mem", &int_num));
520 ASSERT_EQ(int_num, 0U);
521
522 // Make sure filter block is in cache.
523 std::string value;
524 ReadOptions ropt;
525 db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
526
527 // Miss count should remain the same.
528 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
529 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
530
531 db_->KeyMayExist(ReadOptions(), handles_[1], "key", &value);
532 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
533 ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_HIT));
534
535 // Make sure index block is in cache.
536 auto index_block_hit = TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT);
537 value = Get(1, "key");
538 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
539 ASSERT_EQ(index_block_hit + 1,
540 TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
541
542 value = Get(1, "key");
543 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
544 ASSERT_EQ(index_block_hit + 2,
545 TestGetTickerCount(options, BLOCK_CACHE_INDEX_HIT));
546}
547
11fdf7f2
TL
548// With fill_cache = false, fills up the cache, then iterates over the entire
549// db, verify dummy entries inserted in `BlockBasedTable::NewDataBlockIterator`
550// does not cause heap-use-after-free errors in COMPILE_WITH_ASAN=1 runs
551TEST_F(DBBlockCacheTest, FillCacheAndIterateDB) {
552 ReadOptions read_options;
553 read_options.fill_cache = false;
554 auto table_options = GetTableOptions();
555 auto options = GetOptions(table_options);
556 InitTable(options);
557
558 std::shared_ptr<Cache> cache = NewLRUCache(10, 0, true);
559 table_options.block_cache = cache;
20effc67 560 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
11fdf7f2
TL
561 Reopen(options);
562 ASSERT_OK(Put("key1", "val1"));
563 ASSERT_OK(Put("key2", "val2"));
564 ASSERT_OK(Flush());
565 ASSERT_OK(Put("key3", "val3"));
566 ASSERT_OK(Put("key4", "val4"));
567 ASSERT_OK(Flush());
568 ASSERT_OK(Put("key5", "val5"));
569 ASSERT_OK(Put("key6", "val6"));
570 ASSERT_OK(Flush());
571
572 Iterator* iter = nullptr;
573
574 iter = db_->NewIterator(read_options);
1e59de90 575 iter->Seek(std::to_string(0));
11fdf7f2
TL
576 while (iter->Valid()) {
577 iter->Next();
578 }
579 delete iter;
580 iter = nullptr;
581}
582
7c673cae
FG
583TEST_F(DBBlockCacheTest, IndexAndFilterBlocksStats) {
584 Options options = CurrentOptions();
585 options.create_if_missing = true;
f67539c2 586 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
587 BlockBasedTableOptions table_options;
588 table_options.cache_index_and_filter_blocks = true;
f67539c2
TL
589 LRUCacheOptions co;
590 // 500 bytes are enough to hold the first two blocks
591 co.capacity = 500;
592 co.num_shard_bits = 0;
593 co.strict_capacity_limit = false;
594 co.metadata_charge_policy = kDontChargeCacheMetadata;
595 std::shared_ptr<Cache> cache = NewLRUCache(co);
7c673cae 596 table_options.block_cache = cache;
494da23a 597 table_options.filter_policy.reset(NewBloomFilterPolicy(20, true));
20effc67 598 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae
FG
599 CreateAndReopenWithCF({"pikachu"}, options);
600
f67539c2 601 ASSERT_OK(Put(1, "longer_key", "val"));
7c673cae
FG
602 // Create a new table
603 ASSERT_OK(Flush(1));
604 size_t index_bytes_insert =
605 TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT);
606 size_t filter_bytes_insert =
607 TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT);
608 ASSERT_GT(index_bytes_insert, 0);
609 ASSERT_GT(filter_bytes_insert, 0);
610 ASSERT_EQ(cache->GetUsage(), index_bytes_insert + filter_bytes_insert);
611 // set the cache capacity to the current usage
612 cache->SetCapacity(index_bytes_insert + filter_bytes_insert);
f67539c2
TL
613 // The index and filter eviction statistics were broken by the refactoring
614 // that moved the readers out of the block cache. Disabling these until we can
615 // bring the stats back.
616 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT), 0);
617 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT), 0);
618 // Note that the second key needs to be no longer than the first one.
619 // Otherwise the second index block may not fit in cache.
620 ASSERT_OK(Put(1, "key", "val"));
7c673cae
FG
621 // Create a new table
622 ASSERT_OK(Flush(1));
623 // cache evicted old index and block entries
624 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_INSERT),
625 index_bytes_insert);
626 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_INSERT),
627 filter_bytes_insert);
f67539c2
TL
628 // The index and filter eviction statistics were broken by the refactoring
629 // that moved the readers out of the block cache. Disabling these until we can
630 // bring the stats back.
631 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_INDEX_BYTES_EVICT),
632 // index_bytes_insert);
633 // ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_FILTER_BYTES_EVICT),
634 // filter_bytes_insert);
7c673cae
FG
635}
636
1e59de90
TL
637#if (defined OS_LINUX || defined OS_WIN)
638TEST_F(DBBlockCacheTest, WarmCacheWithDataBlocksDuringFlush) {
639 Options options = CurrentOptions();
640 options.create_if_missing = true;
641 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
642
643 BlockBasedTableOptions table_options;
644 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
645 table_options.cache_index_and_filter_blocks = false;
646 table_options.prepopulate_block_cache =
647 BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
648 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
649 DestroyAndReopen(options);
650
651 std::string value(kValueSize, 'a');
652 for (size_t i = 1; i <= kNumBlocks; i++) {
653 ASSERT_OK(Put(std::to_string(i), value));
654 ASSERT_OK(Flush());
655 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
656 ASSERT_EQ(value, Get(std::to_string(i)));
657 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
658 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
659 }
660 // Verify compaction not counted
661 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), /*begin=*/nullptr,
662 /*end=*/nullptr));
663 EXPECT_EQ(kNumBlocks,
664 options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
665}
666
667// This test cache data, index and filter blocks during flush.
668class DBBlockCacheTest1 : public DBTestBase,
669 public ::testing::WithParamInterface<uint32_t> {
670 public:
671 const size_t kNumBlocks = 10;
672 const size_t kValueSize = 100;
673 DBBlockCacheTest1() : DBTestBase("db_block_cache_test1", true) {}
674};
675
676INSTANTIATE_TEST_CASE_P(DBBlockCacheTest1, DBBlockCacheTest1,
677 ::testing::Values(1, 2));
678
679TEST_P(DBBlockCacheTest1, WarmCacheWithBlocksDuringFlush) {
680 Options options = CurrentOptions();
681 options.create_if_missing = true;
682 options.disable_auto_compactions = true;
683 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
684
685 BlockBasedTableOptions table_options;
686 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
687
688 uint32_t filter_type = GetParam();
689 switch (filter_type) {
690 case 1: // partition_filter
691 table_options.partition_filters = true;
692 table_options.index_type =
693 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
694 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
695 break;
696 case 2: // full filter
697 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
698 break;
699 default:
700 assert(false);
701 }
702
703 table_options.cache_index_and_filter_blocks = true;
704 table_options.prepopulate_block_cache =
705 BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
706 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
707 DestroyAndReopen(options);
708
709 std::string value(kValueSize, 'a');
710 for (size_t i = 1; i <= kNumBlocks; i++) {
711 ASSERT_OK(Put(std::to_string(i), value));
712 ASSERT_OK(Flush());
713 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
714 if (filter_type == 1) {
715 ASSERT_EQ(2 * i,
716 options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
717 ASSERT_EQ(2 * i,
718 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
719 } else {
720 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
721 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
722 }
723 ASSERT_EQ(value, Get(std::to_string(i)));
724
725 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
726 ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
727
728 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_MISS));
729 ASSERT_EQ(i * 3, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_HIT));
730 if (filter_type == 1) {
731 ASSERT_EQ(i * 3,
732 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT));
733 } else {
734 ASSERT_EQ(i * 2,
735 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_HIT));
736 }
737 ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_MISS));
738 }
739
740 // Verify compaction not counted
741 CompactRangeOptions cro;
742 // Ensure files are rewritten, not just trivially moved.
743 cro.bottommost_level_compaction = BottommostLevelCompaction::kForceOptimized;
744 ASSERT_OK(db_->CompactRange(cro, /*begin=*/nullptr, /*end=*/nullptr));
745 EXPECT_EQ(kNumBlocks,
746 options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
747 // Index and filter blocks are automatically warmed when the new table file
748 // is automatically opened at the end of compaction. This is not easily
749 // disabled so results in the new index and filter blocks being warmed.
750 if (filter_type == 1) {
751 EXPECT_EQ(2 * (1 + kNumBlocks),
752 options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
753 EXPECT_EQ(2 * (1 + kNumBlocks),
754 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
755 } else {
756 EXPECT_EQ(1 + kNumBlocks,
757 options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
758 EXPECT_EQ(1 + kNumBlocks,
759 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
760 }
761}
762
763TEST_F(DBBlockCacheTest, DynamicallyWarmCacheDuringFlush) {
764 Options options = CurrentOptions();
765 options.create_if_missing = true;
766 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
767
768 BlockBasedTableOptions table_options;
769 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
770 table_options.cache_index_and_filter_blocks = false;
771 table_options.prepopulate_block_cache =
772 BlockBasedTableOptions::PrepopulateBlockCache::kFlushOnly;
773
774 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
775 DestroyAndReopen(options);
776
777 std::string value(kValueSize, 'a');
778
779 for (size_t i = 1; i <= 5; i++) {
780 ASSERT_OK(Put(std::to_string(i), value));
781 ASSERT_OK(Flush());
782 ASSERT_EQ(1,
783 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
784
785 ASSERT_EQ(value, Get(std::to_string(i)));
786 ASSERT_EQ(0,
787 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
788 ASSERT_EQ(
789 0, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
790 ASSERT_EQ(1,
791 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
792 }
793
794 ASSERT_OK(dbfull()->SetOptions(
795 {{"block_based_table_factory", "{prepopulate_block_cache=kDisable;}"}}));
796
797 for (size_t i = 6; i <= kNumBlocks; i++) {
798 ASSERT_OK(Put(std::to_string(i), value));
799 ASSERT_OK(Flush());
800 ASSERT_EQ(0,
801 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
802
803 ASSERT_EQ(value, Get(std::to_string(i)));
804 ASSERT_EQ(1,
805 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
806 ASSERT_EQ(
807 1, options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_MISS));
808 ASSERT_EQ(0,
809 options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_HIT));
810 }
811}
812#endif
813
7c673cae
FG
814namespace {
815
816// A mock cache wraps LRUCache, and record how many entries have been
817// inserted for each priority.
818class MockCache : public LRUCache {
819 public:
820 static uint32_t high_pri_insert_count;
821 static uint32_t low_pri_insert_count;
822
11fdf7f2
TL
823 MockCache()
824 : LRUCache((size_t)1 << 25 /*capacity*/, 0 /*num_shard_bits*/,
1e59de90
TL
825 false /*strict_capacity_limit*/, 0.0 /*high_pri_pool_ratio*/,
826 0.0 /*low_pri_pool_ratio*/) {}
827
828 using ShardedCache::Insert;
7c673cae 829
1e59de90
TL
830 Status Insert(const Slice& key, void* value,
831 const Cache::CacheItemHelper* helper_cb, size_t charge,
832 Handle** handle, Priority priority) override {
833 DeleterFn delete_cb = helper_cb->del_cb;
7c673cae
FG
834 if (priority == Priority::LOW) {
835 low_pri_insert_count++;
836 } else {
837 high_pri_insert_count++;
838 }
1e59de90 839 return LRUCache::Insert(key, value, charge, delete_cb, handle, priority);
7c673cae
FG
840 }
841};
842
843uint32_t MockCache::high_pri_insert_count = 0;
844uint32_t MockCache::low_pri_insert_count = 0;
845
846} // anonymous namespace
847
848TEST_F(DBBlockCacheTest, IndexAndFilterBlocksCachePriority) {
849 for (auto priority : {Cache::Priority::LOW, Cache::Priority::HIGH}) {
850 Options options = CurrentOptions();
851 options.create_if_missing = true;
f67539c2 852 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
853 BlockBasedTableOptions table_options;
854 table_options.cache_index_and_filter_blocks = true;
855 table_options.block_cache.reset(new MockCache());
856 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
857 table_options.cache_index_and_filter_blocks_with_high_priority =
858 priority == Cache::Priority::HIGH ? true : false;
20effc67 859 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae
FG
860 DestroyAndReopen(options);
861
862 MockCache::high_pri_insert_count = 0;
863 MockCache::low_pri_insert_count = 0;
864
865 // Create a new table.
866 ASSERT_OK(Put("foo", "value"));
867 ASSERT_OK(Put("bar", "value"));
868 ASSERT_OK(Flush());
869 ASSERT_EQ(1, NumTableFilesAtLevel(0));
870
871 // index/filter blocks added to block cache right after table creation.
872 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
873 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
874 ASSERT_EQ(2, /* only index/filter were added */
875 TestGetTickerCount(options, BLOCK_CACHE_ADD));
876 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
877 if (priority == Cache::Priority::LOW) {
f67539c2
TL
878 ASSERT_EQ(0u, MockCache::high_pri_insert_count);
879 ASSERT_EQ(2u, MockCache::low_pri_insert_count);
7c673cae 880 } else {
f67539c2
TL
881 ASSERT_EQ(2u, MockCache::high_pri_insert_count);
882 ASSERT_EQ(0u, MockCache::low_pri_insert_count);
7c673cae
FG
883 }
884
885 // Access data block.
886 ASSERT_EQ("value", Get("foo"));
887
888 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
889 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
890 ASSERT_EQ(3, /*adding data block*/
891 TestGetTickerCount(options, BLOCK_CACHE_ADD));
892 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_MISS));
893
894 // Data block should be inserted with low priority.
895 if (priority == Cache::Priority::LOW) {
f67539c2
TL
896 ASSERT_EQ(0u, MockCache::high_pri_insert_count);
897 ASSERT_EQ(3u, MockCache::low_pri_insert_count);
7c673cae 898 } else {
f67539c2
TL
899 ASSERT_EQ(2u, MockCache::high_pri_insert_count);
900 ASSERT_EQ(1u, MockCache::low_pri_insert_count);
7c673cae
FG
901 }
902 }
903}
904
20effc67
TL
905namespace {
906
907// An LRUCache wrapper that can falsely report "not found" on Lookup.
908// This allows us to manipulate BlockBasedTableReader into thinking
909// another thread inserted the data in between Lookup and Insert,
910// while mostly preserving the LRUCache interface/behavior.
911class LookupLiarCache : public CacheWrapper {
912 int nth_lookup_not_found_ = 0;
913
914 public:
915 explicit LookupLiarCache(std::shared_ptr<Cache> target)
916 : CacheWrapper(std::move(target)) {}
917
1e59de90 918 using Cache::Lookup;
20effc67
TL
919 Handle* Lookup(const Slice& key, Statistics* stats) override {
920 if (nth_lookup_not_found_ == 1) {
921 nth_lookup_not_found_ = 0;
922 return nullptr;
923 }
924 if (nth_lookup_not_found_ > 1) {
925 --nth_lookup_not_found_;
926 }
927 return CacheWrapper::Lookup(key, stats);
928 }
929
930 // 1 == next lookup, 2 == after next, etc.
931 void SetNthLookupNotFound(int n) { nth_lookup_not_found_ = n; }
932};
933
934} // anonymous namespace
935
936TEST_F(DBBlockCacheTest, AddRedundantStats) {
937 const size_t capacity = size_t{1} << 25;
938 const int num_shard_bits = 0; // 1 shard
939 int iterations_tested = 0;
940 for (std::shared_ptr<Cache> base_cache :
941 {NewLRUCache(capacity, num_shard_bits),
1e59de90
TL
942 HyperClockCacheOptions(
943 capacity,
944 BlockBasedTableOptions().block_size /*estimated_value_size*/,
945 num_shard_bits)
946 .MakeSharedCache()}) {
20effc67
TL
947 if (!base_cache) {
948 // Skip clock cache when not supported
949 continue;
950 }
951 ++iterations_tested;
952 Options options = CurrentOptions();
953 options.create_if_missing = true;
954 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
955
956 std::shared_ptr<LookupLiarCache> cache =
957 std::make_shared<LookupLiarCache>(base_cache);
958
959 BlockBasedTableOptions table_options;
960 table_options.cache_index_and_filter_blocks = true;
961 table_options.block_cache = cache;
962 table_options.filter_policy.reset(NewBloomFilterPolicy(50));
963 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
964 DestroyAndReopen(options);
965
966 // Create a new table.
967 ASSERT_OK(Put("foo", "value"));
968 ASSERT_OK(Put("bar", "value"));
969 ASSERT_OK(Flush());
970 ASSERT_EQ(1, NumTableFilesAtLevel(0));
971
972 // Normal access filter+index+data.
973 ASSERT_EQ("value", Get("foo"));
974
975 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
976 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
977 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
978 // --------
979 ASSERT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD));
980
981 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
982 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
983 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
984 // --------
985 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
986
987 // Againt access filter+index+data, but force redundant load+insert on index
988 cache->SetNthLookupNotFound(2);
989 ASSERT_EQ("value", Get("bar"));
990
991 ASSERT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
992 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
993 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
994 // --------
995 ASSERT_EQ(4, TestGetTickerCount(options, BLOCK_CACHE_ADD));
996
997 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
998 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
999 ASSERT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
1000 // --------
1001 ASSERT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
1002
1003 // Access just filter (with high probability), and force redundant
1004 // load+insert
1005 cache->SetNthLookupNotFound(1);
1006 ASSERT_EQ("NOT_FOUND", Get("this key was not added"));
1007
1008 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
1009 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
1010 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
1011 // --------
1012 EXPECT_EQ(5, TestGetTickerCount(options, BLOCK_CACHE_ADD));
1013
1014 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
1015 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
1016 EXPECT_EQ(0, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
1017 // --------
1018 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
1019
1020 // Access just data, forcing redundant load+insert
1021 ReadOptions read_options;
1022 std::unique_ptr<Iterator> iter{db_->NewIterator(read_options)};
1023 cache->SetNthLookupNotFound(1);
1024 iter->SeekToFirst();
1025 ASSERT_TRUE(iter->Valid());
1026 ASSERT_EQ(iter->key(), "bar");
1027
1028 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD));
1029 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD));
1030 EXPECT_EQ(2, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD));
1031 // --------
1032 EXPECT_EQ(6, TestGetTickerCount(options, BLOCK_CACHE_ADD));
1033
1034 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_INDEX_ADD_REDUNDANT));
1035 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_FILTER_ADD_REDUNDANT));
1036 EXPECT_EQ(1, TestGetTickerCount(options, BLOCK_CACHE_DATA_ADD_REDUNDANT));
1037 // --------
1038 EXPECT_EQ(3, TestGetTickerCount(options, BLOCK_CACHE_ADD_REDUNDANT));
1039 }
1040 EXPECT_GE(iterations_tested, 1);
1041}
1042
7c673cae
FG
1043TEST_F(DBBlockCacheTest, ParanoidFileChecks) {
1044 Options options = CurrentOptions();
1045 options.create_if_missing = true;
f67539c2 1046 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
1047 options.level0_file_num_compaction_trigger = 2;
1048 options.paranoid_file_checks = true;
1049 BlockBasedTableOptions table_options;
1050 table_options.cache_index_and_filter_blocks = false;
1051 table_options.filter_policy.reset(NewBloomFilterPolicy(20));
20effc67 1052 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
7c673cae
FG
1053 CreateAndReopenWithCF({"pikachu"}, options);
1054
1055 ASSERT_OK(Put(1, "1_key", "val"));
1056 ASSERT_OK(Put(1, "9_key", "val"));
1057 // Create a new table.
1058 ASSERT_OK(Flush(1));
1059 ASSERT_EQ(1, /* read and cache data block */
1060 TestGetTickerCount(options, BLOCK_CACHE_ADD));
1061
1062 ASSERT_OK(Put(1, "1_key2", "val2"));
1063 ASSERT_OK(Put(1, "9_key2", "val2"));
1064 // Create a new SST file. This will further trigger a compaction
1065 // and generate another file.
1066 ASSERT_OK(Flush(1));
1e59de90 1067 ASSERT_OK(dbfull()->TEST_WaitForCompact());
7c673cae
FG
1068 ASSERT_EQ(3, /* Totally 3 files created up to now */
1069 TestGetTickerCount(options, BLOCK_CACHE_ADD));
1070
1071 // After disabling options.paranoid_file_checks. NO further block
1072 // is added after generating a new file.
1073 ASSERT_OK(
1074 dbfull()->SetOptions(handles_[1], {{"paranoid_file_checks", "false"}}));
1075
1076 ASSERT_OK(Put(1, "1_key3", "val3"));
1077 ASSERT_OK(Put(1, "9_key3", "val3"));
1078 ASSERT_OK(Flush(1));
1079 ASSERT_OK(Put(1, "1_key4", "val4"));
1080 ASSERT_OK(Put(1, "9_key4", "val4"));
1081 ASSERT_OK(Flush(1));
1e59de90 1082 ASSERT_OK(dbfull()->TEST_WaitForCompact());
7c673cae
FG
1083 ASSERT_EQ(3, /* Totally 3 files created up to now */
1084 TestGetTickerCount(options, BLOCK_CACHE_ADD));
1085}
1086
1087TEST_F(DBBlockCacheTest, CompressedCache) {
1088 if (!Snappy_Supported()) {
1089 return;
1090 }
1091 int num_iter = 80;
1092
1093 // Run this test three iterations.
1094 // Iteration 1: only a uncompressed block cache
1095 // Iteration 2: only a compressed block cache
1096 // Iteration 3: both block cache and compressed cache
1097 // Iteration 4: both block cache and compressed cache, but DB is not
1098 // compressed
1099 for (int iter = 0; iter < 4; iter++) {
1100 Options options = CurrentOptions();
1101 options.write_buffer_size = 64 * 1024; // small write buffer
f67539c2 1102 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
7c673cae
FG
1103
1104 BlockBasedTableOptions table_options;
1105 switch (iter) {
1106 case 0:
1107 // only uncompressed block cache
1108 table_options.block_cache = NewLRUCache(8 * 1024);
1109 table_options.block_cache_compressed = nullptr;
1110 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1111 break;
1112 case 1:
1113 // no block cache, only compressed cache
1114 table_options.no_block_cache = true;
1115 table_options.block_cache = nullptr;
1116 table_options.block_cache_compressed = NewLRUCache(8 * 1024);
1117 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1118 break;
1119 case 2:
1120 // both compressed and uncompressed block cache
1121 table_options.block_cache = NewLRUCache(1024);
1122 table_options.block_cache_compressed = NewLRUCache(8 * 1024);
1123 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1124 break;
1125 case 3:
1126 // both block cache and compressed cache, but DB is not compressed
1127 // also, make block cache sizes bigger, to trigger block cache hits
1128 table_options.block_cache = NewLRUCache(1024 * 1024);
1129 table_options.block_cache_compressed = NewLRUCache(8 * 1024 * 1024);
1130 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1131 options.compression = kNoCompression;
1132 break;
1133 default:
11fdf7f2 1134 FAIL();
7c673cae
FG
1135 }
1136 CreateAndReopenWithCF({"pikachu"}, options);
1137 // default column family doesn't have block cache
1138 Options no_block_cache_opts;
1139 no_block_cache_opts.statistics = options.statistics;
1140 no_block_cache_opts = CurrentOptions(no_block_cache_opts);
1141 BlockBasedTableOptions table_options_no_bc;
1142 table_options_no_bc.no_block_cache = true;
1143 no_block_cache_opts.table_factory.reset(
1144 NewBlockBasedTableFactory(table_options_no_bc));
1145 ReopenWithColumnFamilies(
1146 {"default", "pikachu"},
1147 std::vector<Options>({no_block_cache_opts, options}));
1148
1149 Random rnd(301);
1150
1151 // Write 8MB (80 values, each 100K)
1152 ASSERT_EQ(NumTableFilesAtLevel(0, 1), 0);
1153 std::vector<std::string> values;
1154 std::string str;
1155 for (int i = 0; i < num_iter; i++) {
1156 if (i % 4 == 0) { // high compression ratio
20effc67 1157 str = rnd.RandomString(1000);
7c673cae
FG
1158 }
1159 values.push_back(str);
1160 ASSERT_OK(Put(1, Key(i), values[i]));
1161 }
1162
1163 // flush all data from memtable so that reads are from block cache
1164 ASSERT_OK(Flush(1));
1165
1166 for (int i = 0; i < num_iter; i++) {
1167 ASSERT_EQ(Get(1, Key(i)), values[i]);
1168 }
1169
1170 // check that we triggered the appropriate code paths in the cache
1171 switch (iter) {
1172 case 0:
1173 // only uncompressed block cache
1174 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
1175 ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
1176 break;
1177 case 1:
1178 // no block cache, only compressed cache
1179 ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
1180 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
1181 break;
1182 case 2:
1183 // both compressed and uncompressed block cache
1184 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
1185 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
1186 break;
1187 case 3:
1188 // both compressed and uncompressed block cache
1189 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_MISS), 0);
1190 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_HIT), 0);
1191 ASSERT_GT(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_MISS), 0);
1192 // compressed doesn't have any hits since blocks are not compressed on
1193 // storage
1194 ASSERT_EQ(TestGetTickerCount(options, BLOCK_CACHE_COMPRESSED_HIT), 0);
1195 break;
1196 default:
11fdf7f2 1197 FAIL();
7c673cae
FG
1198 }
1199
1200 options.create_if_missing = true;
1201 DestroyAndReopen(options);
1202 }
1203}
1204
494da23a
TL
1205TEST_F(DBBlockCacheTest, CacheCompressionDict) {
1206 const int kNumFiles = 4;
1207 const int kNumEntriesPerFile = 128;
1208 const int kNumBytesPerEntry = 1024;
1209
1210 // Try all the available libraries that support dictionary compression
1211 std::vector<CompressionType> compression_types;
f67539c2
TL
1212 if (Zlib_Supported()) {
1213 compression_types.push_back(kZlibCompression);
1214 }
1215 if (LZ4_Supported()) {
1216 compression_types.push_back(kLZ4Compression);
1217 compression_types.push_back(kLZ4HCCompression);
1218 }
1219 if (ZSTD_Supported()) {
1220 compression_types.push_back(kZSTD);
1221 } else if (ZSTDNotFinal_Supported()) {
1222 compression_types.push_back(kZSTDNotFinalCompression);
1223 }
494da23a
TL
1224 Random rnd(301);
1225 for (auto compression_type : compression_types) {
1226 Options options = CurrentOptions();
20effc67
TL
1227 options.bottommost_compression = compression_type;
1228 options.bottommost_compression_opts.max_dict_bytes = 4096;
1229 options.bottommost_compression_opts.enabled = true;
494da23a
TL
1230 options.create_if_missing = true;
1231 options.num_levels = 2;
f67539c2 1232 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
494da23a
TL
1233 options.target_file_size_base = kNumEntriesPerFile * kNumBytesPerEntry;
1234 BlockBasedTableOptions table_options;
1235 table_options.cache_index_and_filter_blocks = true;
1236 table_options.block_cache.reset(new MockCache());
20effc67 1237 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
494da23a
TL
1238 DestroyAndReopen(options);
1239
f67539c2
TL
1240 RecordCacheCountersForCompressionDict(options);
1241
494da23a
TL
1242 for (int i = 0; i < kNumFiles; ++i) {
1243 ASSERT_EQ(i, NumTableFilesAtLevel(0, 0));
1244 for (int j = 0; j < kNumEntriesPerFile; ++j) {
20effc67 1245 std::string value = rnd.RandomString(kNumBytesPerEntry);
494da23a
TL
1246 ASSERT_OK(Put(Key(j * kNumFiles + i), value.c_str()));
1247 }
1248 ASSERT_OK(Flush());
1249 }
1e59de90 1250 ASSERT_OK(dbfull()->TEST_WaitForCompact());
494da23a
TL
1251 ASSERT_EQ(0, NumTableFilesAtLevel(0));
1252 ASSERT_EQ(kNumFiles, NumTableFilesAtLevel(1));
1253
f67539c2
TL
1254 // Compression dictionary blocks are preloaded.
1255 CheckCacheCountersForCompressionDict(
1256 options, kNumFiles /* expected_compression_dict_misses */,
1257 0 /* expected_compression_dict_hits */,
1258 kNumFiles /* expected_compression_dict_inserts */);
1259
494da23a
TL
1260 // Seek to a key in a file. It should cause the SST's dictionary meta-block
1261 // to be read.
1262 RecordCacheCounters(options);
f67539c2 1263 RecordCacheCountersForCompressionDict(options);
494da23a
TL
1264 ReadOptions read_options;
1265 ASSERT_NE("NOT_FOUND", Get(Key(kNumFiles * kNumEntriesPerFile - 1)));
f67539c2
TL
1266 // Two block hits: index and dictionary since they are prefetched
1267 // One block missed/added: data block
1268 CheckCacheCounters(options, 1 /* expected_misses */, 2 /* expected_hits */,
1269 1 /* expected_inserts */, 0 /* expected_failures */);
1270 CheckCacheCountersForCompressionDict(
1271 options, 0 /* expected_compression_dict_misses */,
1272 1 /* expected_compression_dict_hits */,
1273 0 /* expected_compression_dict_inserts */);
494da23a
TL
1274 }
1275}
1276
1e59de90
TL
1277static void ClearCache(Cache* cache) {
1278 auto roles = CopyCacheDeleterRoleMap();
1279 std::deque<std::string> keys;
1280 Cache::ApplyToAllEntriesOptions opts;
1281 auto callback = [&](const Slice& key, void* /*value*/, size_t /*charge*/,
1282 Cache::DeleterFn deleter) {
1283 if (roles.find(deleter) == roles.end()) {
1284 // Keep the stats collector
1285 return;
1286 }
1287 keys.push_back(key.ToString());
1288 };
1289 cache->ApplyToAllEntries(callback, opts);
1290 for (auto& k : keys) {
1291 cache->Erase(k);
1292 }
1293}
1294
1295TEST_F(DBBlockCacheTest, CacheEntryRoleStats) {
1296 const size_t capacity = size_t{1} << 25;
1297 int iterations_tested = 0;
1298 for (bool partition : {false, true}) {
1299 for (std::shared_ptr<Cache> cache :
1300 {NewLRUCache(capacity),
1301 HyperClockCacheOptions(
1302 capacity,
1303 BlockBasedTableOptions().block_size /*estimated_value_size*/)
1304 .MakeSharedCache()}) {
1305 ++iterations_tested;
1306
1307 Options options = CurrentOptions();
1308 SetTimeElapseOnlySleepOnReopen(&options);
1309 options.create_if_missing = true;
1310 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1311 options.max_open_files = 13;
1312 options.table_cache_numshardbits = 0;
1313 // If this wakes up, it could interfere with test
1314 options.stats_dump_period_sec = 0;
1315
1316 BlockBasedTableOptions table_options;
1317 table_options.block_cache = cache;
1318 table_options.cache_index_and_filter_blocks = true;
1319 table_options.filter_policy.reset(NewBloomFilterPolicy(50));
1320 if (partition) {
1321 table_options.index_type = BlockBasedTableOptions::kTwoLevelIndexSearch;
1322 table_options.partition_filters = true;
1323 }
1324 table_options.metadata_cache_options.top_level_index_pinning =
1325 PinningTier::kNone;
1326 table_options.metadata_cache_options.partition_pinning =
1327 PinningTier::kNone;
1328 table_options.metadata_cache_options.unpartitioned_pinning =
1329 PinningTier::kNone;
1330 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1331 DestroyAndReopen(options);
1332
1333 // Create a new table.
1334 ASSERT_OK(Put("foo", "value"));
1335 ASSERT_OK(Put("bar", "value"));
1336 ASSERT_OK(Flush());
1337
1338 ASSERT_OK(Put("zfoo", "value"));
1339 ASSERT_OK(Put("zbar", "value"));
1340 ASSERT_OK(Flush());
1341
1342 ASSERT_EQ(2, NumTableFilesAtLevel(0));
1343
1344 // Fresh cache
1345 ClearCache(cache.get());
1346
1347 std::array<size_t, kNumCacheEntryRoles> expected{};
1348 // For CacheEntryStatsCollector
1349 expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
1350 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1351
1352 std::array<size_t, kNumCacheEntryRoles> prev_expected = expected;
1353
1354 // First access only filters
1355 ASSERT_EQ("NOT_FOUND", Get("different from any key added"));
1356 expected[static_cast<size_t>(CacheEntryRole::kFilterBlock)] += 2;
1357 if (partition) {
1358 expected[static_cast<size_t>(CacheEntryRole::kFilterMetaBlock)] += 2;
1359 }
1360 // Within some time window, we will get cached entry stats
1361 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1362 // Not enough to force a miss
1363 env_->MockSleepForSeconds(45);
1364 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1365 // Enough to force a miss
1366 env_->MockSleepForSeconds(601);
1367 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1368
1369 // Now access index and data block
1370 ASSERT_EQ("value", Get("foo"));
1371 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1372 if (partition) {
1373 // top-level
1374 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1375 }
1376 expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
1377 // Enough to force a miss
1378 env_->MockSleepForSeconds(601);
1379 // But inject a simulated long scan so that we need a longer
1380 // interval to force a miss next time.
1381 SyncPoint::GetInstance()->SetCallBack(
1382 "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
1383 [this](void*) {
1384 // To spend no more than 0.2% of time scanning, we would need
1385 // interval of at least 10000s
1386 env_->MockSleepForSeconds(20);
1387 });
1388 SyncPoint::GetInstance()->EnableProcessing();
1389 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1390 prev_expected = expected;
1391 SyncPoint::GetInstance()->DisableProcessing();
1392 SyncPoint::GetInstance()->ClearAllCallBacks();
1393
1394 // The same for other file
1395 ASSERT_EQ("value", Get("zfoo"));
1396 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1397 if (partition) {
1398 // top-level
1399 expected[static_cast<size_t>(CacheEntryRole::kIndexBlock)]++;
1400 }
1401 expected[static_cast<size_t>(CacheEntryRole::kDataBlock)]++;
1402 // Because of the simulated long scan, this is not enough to force
1403 // a miss
1404 env_->MockSleepForSeconds(601);
1405 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1406 // But this is enough
1407 env_->MockSleepForSeconds(10000);
1408 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1409 prev_expected = expected;
1410
1411 // Also check the GetProperty interface
1412 std::map<std::string, std::string> values;
1413 ASSERT_TRUE(
1414 db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
1415
1416 for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
1417 auto role = static_cast<CacheEntryRole>(i);
1418 EXPECT_EQ(std::to_string(expected[i]),
1419 values[BlockCacheEntryStatsMapKeys::EntryCount(role)]);
1420 }
1421
1422 // Add one for kWriteBuffer
1423 {
1424 WriteBufferManager wbm(size_t{1} << 20, cache);
1425 wbm.ReserveMem(1024);
1426 expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]++;
1427 // Now we check that the GetProperty interface is more agressive about
1428 // re-scanning stats, but not totally aggressive.
1429 // Within some time window, we will get cached entry stats
1430 env_->MockSleepForSeconds(1);
1431 EXPECT_EQ(std::to_string(prev_expected[static_cast<size_t>(
1432 CacheEntryRole::kWriteBuffer)]),
1433 values[BlockCacheEntryStatsMapKeys::EntryCount(
1434 CacheEntryRole::kWriteBuffer)]);
1435 // Not enough for a "background" miss but enough for a "foreground" miss
1436 env_->MockSleepForSeconds(45);
1437
1438 ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats,
1439 &values));
1440 EXPECT_EQ(
1441 std::to_string(
1442 expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
1443 values[BlockCacheEntryStatsMapKeys::EntryCount(
1444 CacheEntryRole::kWriteBuffer)]);
1445 }
1446 prev_expected = expected;
1447
1448 // With collector pinned in cache, we should be able to hit
1449 // even if the cache is full
1450 ClearCache(cache.get());
1451 Cache::Handle* h = nullptr;
1452 if (strcmp(cache->Name(), "LRUCache") == 0) {
1453 ASSERT_OK(cache->Insert("Fill-it-up", nullptr, capacity + 1,
1454 GetNoopDeleterForRole<CacheEntryRole::kMisc>(),
1455 &h, Cache::Priority::HIGH));
1456 } else {
1457 // For ClockCache we use a 16-byte key.
1458 ASSERT_OK(cache->Insert("Fill-it-up-xxxxx", nullptr, capacity + 1,
1459 GetNoopDeleterForRole<CacheEntryRole::kMisc>(),
1460 &h, Cache::Priority::HIGH));
1461 }
1462 ASSERT_GT(cache->GetUsage(), cache->GetCapacity());
1463 expected = {};
1464 // For CacheEntryStatsCollector
1465 expected[static_cast<size_t>(CacheEntryRole::kMisc)] = 1;
1466 // For Fill-it-up
1467 expected[static_cast<size_t>(CacheEntryRole::kMisc)]++;
1468 // Still able to hit on saved stats
1469 EXPECT_EQ(prev_expected, GetCacheEntryRoleCountsBg());
1470 // Enough to force a miss
1471 env_->MockSleepForSeconds(1000);
1472 EXPECT_EQ(expected, GetCacheEntryRoleCountsBg());
1473
1474 cache->Release(h);
1475
1476 // Now we test that the DB mutex is not held during scans, for the ways
1477 // we know how to (possibly) trigger them. Without a better good way to
1478 // check this, we simply inject an acquire & release of the DB mutex
1479 // deep in the stat collection code. If we were already holding the
1480 // mutex, that is UB that would at least be found by TSAN.
1481 int scan_count = 0;
1482 SyncPoint::GetInstance()->SetCallBack(
1483 "CacheEntryStatsCollector::GetStats:AfterApplyToAllEntries",
1484 [this, &scan_count](void*) {
1485 dbfull()->TEST_LockMutex();
1486 dbfull()->TEST_UnlockMutex();
1487 ++scan_count;
1488 });
1489 SyncPoint::GetInstance()->EnableProcessing();
1490
1491 // Different things that might trigger a scan, with mock sleeps to
1492 // force a miss.
1493 env_->MockSleepForSeconds(10000);
1494 dbfull()->DumpStats();
1495 ASSERT_EQ(scan_count, 1);
1496
1497 env_->MockSleepForSeconds(60);
1498 ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats,
1499 &values));
1500 ASSERT_EQ(scan_count, 1);
1501 ASSERT_TRUE(
1502 db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats, &values));
1503 ASSERT_EQ(scan_count, 2);
1504
1505 env_->MockSleepForSeconds(10000);
1506 ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kFastBlockCacheEntryStats,
1507 &values));
1508 ASSERT_EQ(scan_count, 3);
1509
1510 env_->MockSleepForSeconds(60);
1511 std::string value_str;
1512 ASSERT_TRUE(db_->GetProperty(DB::Properties::kFastBlockCacheEntryStats,
1513 &value_str));
1514 ASSERT_EQ(scan_count, 3);
1515 ASSERT_TRUE(
1516 db_->GetProperty(DB::Properties::kBlockCacheEntryStats, &value_str));
1517 ASSERT_EQ(scan_count, 4);
1518
1519 env_->MockSleepForSeconds(10000);
1520 ASSERT_TRUE(db_->GetProperty(DB::Properties::kFastBlockCacheEntryStats,
1521 &value_str));
1522 ASSERT_EQ(scan_count, 5);
1523
1524 ASSERT_TRUE(db_->GetProperty(DB::Properties::kCFStats, &value_str));
1525 // To match historical speed, querying this property no longer triggers
1526 // a scan, even if results are old. But periodic dump stats should keep
1527 // things reasonably updated.
1528 ASSERT_EQ(scan_count, /*unchanged*/ 5);
1529
1530 SyncPoint::GetInstance()->DisableProcessing();
1531 SyncPoint::GetInstance()->ClearAllCallBacks();
1532 }
1533 EXPECT_GE(iterations_tested, 1);
1534 }
1535}
1536
1537namespace {
1538
1539void DummyFillCache(Cache& cache, size_t entry_size,
1540 std::vector<CacheHandleGuard<void>>& handles) {
1541 // fprintf(stderr, "Entry size: %zu\n", entry_size);
1542 handles.clear();
1543 cache.EraseUnRefEntries();
1544 void* fake_value = &cache;
1545 size_t capacity = cache.GetCapacity();
1546 OffsetableCacheKey ck{"abc", "abc", 42};
1547 for (size_t my_usage = 0; my_usage < capacity;) {
1548 size_t charge = std::min(entry_size, capacity - my_usage);
1549 Cache::Handle* handle;
1550 Status st = cache.Insert(ck.WithOffset(my_usage).AsSlice(), fake_value,
1551 charge, /*deleter*/ nullptr, &handle);
1552 ASSERT_OK(st);
1553 handles.emplace_back(&cache, handle);
1554 my_usage += charge;
1555 }
1556}
1557
1558class CountingLogger : public Logger {
1559 public:
1560 ~CountingLogger() override {}
1561 using Logger::Logv;
1562 void Logv(const InfoLogLevel log_level, const char* format,
1563 va_list /*ap*/) override {
1564 if (std::strstr(format, "HyperClockCache") == nullptr) {
1565 // Not a match
1566 return;
1567 }
1568 // static StderrLogger debug;
1569 // debug.Logv(log_level, format, ap);
1570 if (log_level == InfoLogLevel::INFO_LEVEL) {
1571 ++info_count_;
1572 } else if (log_level == InfoLogLevel::WARN_LEVEL) {
1573 ++warn_count_;
1574 } else if (log_level == InfoLogLevel::ERROR_LEVEL) {
1575 ++error_count_;
1576 }
1577 }
1578
1579 std::array<int, 3> PopCounts() {
1580 std::array<int, 3> rv{{info_count_, warn_count_, error_count_}};
1581 info_count_ = warn_count_ = error_count_ = 0;
1582 return rv;
1583 }
1584
1585 private:
1586 int info_count_{};
1587 int warn_count_{};
1588 int error_count_{};
1589};
1590
1591} // namespace
1592
1593TEST_F(DBBlockCacheTest, HyperClockCacheReportProblems) {
1594 size_t capacity = 1024 * 1024;
1595 size_t value_size_est = 8 * 1024;
1596 HyperClockCacheOptions hcc_opts{capacity, value_size_est};
1597 hcc_opts.num_shard_bits = 2; // 4 shards
1598 hcc_opts.metadata_charge_policy = kDontChargeCacheMetadata;
1599 std::shared_ptr<Cache> cache = hcc_opts.MakeSharedCache();
1600 std::shared_ptr<CountingLogger> logger = std::make_shared<CountingLogger>();
1601
1602 auto table_options = GetTableOptions();
1603 auto options = GetOptions(table_options);
1604 table_options.block_cache = cache;
1605 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1606 options.info_log = logger;
1607 // Going to sample more directly
1608 options.stats_dump_period_sec = 0;
1609 Reopen(options);
1610
1611 std::vector<CacheHandleGuard<void>> handles;
1612
1613 // Clear anything from DB startup
1614 logger->PopCounts();
1615
1616 // Fill cache based on expected size and check that when we
1617 // don't report anything relevant in periodic stats dump
1618 DummyFillCache(*cache, value_size_est, handles);
1619 dbfull()->DumpStats();
1620 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 0}}));
1621
1622 // Same, within reasonable bounds
1623 DummyFillCache(*cache, value_size_est - value_size_est / 4, handles);
1624 dbfull()->DumpStats();
1625 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 0}}));
1626
1627 DummyFillCache(*cache, value_size_est + value_size_est / 3, handles);
1628 dbfull()->DumpStats();
1629 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 0}}));
1630
1631 // Estimate too high (value size too low) eventually reports ERROR
1632 DummyFillCache(*cache, value_size_est / 2, handles);
1633 dbfull()->DumpStats();
1634 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 1, 0}}));
1635
1636 DummyFillCache(*cache, value_size_est / 3, handles);
1637 dbfull()->DumpStats();
1638 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 0, 1}}));
1639
1640 // Estimate too low (value size too high) starts with INFO
1641 // and is only WARNING in the worst case
1642 DummyFillCache(*cache, value_size_est * 2, handles);
1643 dbfull()->DumpStats();
1644 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{1, 0, 0}}));
1645
1646 DummyFillCache(*cache, value_size_est * 3, handles);
1647 dbfull()->DumpStats();
1648 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 1, 0}}));
1649
1650 DummyFillCache(*cache, value_size_est * 20, handles);
1651 dbfull()->DumpStats();
1652 EXPECT_EQ(logger->PopCounts(), (std::array<int, 3>{{0, 1, 0}}));
1653}
1654
7c673cae
FG
1655#endif // ROCKSDB_LITE
1656
1e59de90
TL
1657class DBBlockCacheKeyTest
1658 : public DBTestBase,
1659 public testing::WithParamInterface<std::tuple<bool, bool>> {
1660 public:
1661 DBBlockCacheKeyTest()
1662 : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
1663
1664 void SetUp() override {
1665 use_compressed_cache_ = std::get<0>(GetParam());
1666 exclude_file_numbers_ = std::get<1>(GetParam());
1667 }
1668
1669 bool use_compressed_cache_;
1670 bool exclude_file_numbers_;
1671};
1672
1673// Disable LinkFile so that we can physically copy a DB using Checkpoint.
1674// Disable file GetUniqueId to enable stable cache keys.
1675class StableCacheKeyTestFS : public FaultInjectionTestFS {
1676 public:
1677 explicit StableCacheKeyTestFS(const std::shared_ptr<FileSystem>& base)
1678 : FaultInjectionTestFS(base) {
1679 SetFailGetUniqueId(true);
1680 }
1681
1682 virtual ~StableCacheKeyTestFS() override {}
1683
1684 IOStatus LinkFile(const std::string&, const std::string&, const IOOptions&,
1685 IODebugContext*) override {
1686 return IOStatus::NotSupported("Disabled");
1687 }
1688};
1689
1690TEST_P(DBBlockCacheKeyTest, StableCacheKeys) {
1691 std::shared_ptr<StableCacheKeyTestFS> test_fs{
1692 new StableCacheKeyTestFS(env_->GetFileSystem())};
1693 std::unique_ptr<CompositeEnvWrapper> test_env{
1694 new CompositeEnvWrapper(env_, test_fs)};
1695
1696 Options options = CurrentOptions();
1697 options.create_if_missing = true;
1698 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1699 options.env = test_env.get();
1700
1701 // Corrupting the table properties corrupts the unique id.
1702 // Ignore the unique id recorded in the manifest.
1703 options.verify_sst_unique_id_in_manifest = false;
1704
1705 BlockBasedTableOptions table_options;
1706
1707 int key_count = 0;
1708 uint64_t expected_stat = 0;
1709
1710 std::function<void()> verify_stats;
1711 if (use_compressed_cache_) {
1712 if (!Snappy_Supported()) {
1713 ROCKSDB_GTEST_SKIP("Compressed cache test requires snappy support");
1714 return;
1715 }
1716 options.compression = CompressionType::kSnappyCompression;
1717 table_options.no_block_cache = true;
1718 table_options.block_cache_compressed = NewLRUCache(1 << 25, 0, false);
1719 verify_stats = [&options, &expected_stat] {
1720 // One for ordinary SST file and one for external SST file
1721 ASSERT_EQ(expected_stat,
1722 options.statistics->getTickerCount(BLOCK_CACHE_COMPRESSED_ADD));
1723 };
1724 } else {
1725 table_options.cache_index_and_filter_blocks = true;
1726 table_options.block_cache = NewLRUCache(1 << 25, 0, false);
1727 verify_stats = [&options, &expected_stat] {
1728 ASSERT_EQ(expected_stat,
1729 options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
1730 ASSERT_EQ(expected_stat,
1731 options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
1732 ASSERT_EQ(expected_stat,
1733 options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
1734 };
1735 }
1736
1737 table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
1738 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1739 CreateAndReopenWithCF({"koko"}, options);
1740
1741 if (exclude_file_numbers_) {
1742 // Simulate something like old behavior without file numbers in properties.
1743 // This is a "control" side of the test that also ensures safely degraded
1744 // behavior on old files.
1745 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1746 "BlockBasedTableBuilder::BlockBasedTableBuilder:PreSetupBaseCacheKey",
1747 [&](void* arg) {
1748 TableProperties* props = reinterpret_cast<TableProperties*>(arg);
1749 props->orig_file_number = 0;
1750 });
1751 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1752 }
1753
1754 std::function<void()> perform_gets = [&key_count, &expected_stat, this]() {
1755 if (exclude_file_numbers_) {
1756 // No cache key reuse should happen, because we can't rely on current
1757 // file number being stable
1758 expected_stat += key_count;
1759 } else {
1760 // Cache keys should be stable
1761 expected_stat = key_count;
1762 }
1763 for (int i = 0; i < key_count; ++i) {
1764 ASSERT_EQ(Get(1, Key(i)), "abc");
1765 }
1766 };
1767
1768 // Ordinary SST files with same session id
1769 const std::string something_compressible(500U, 'x');
1770 for (int i = 0; i < 2; ++i) {
1771 ASSERT_OK(Put(1, Key(key_count), "abc"));
1772 ASSERT_OK(Put(1, Key(key_count) + "a", something_compressible));
1773 ASSERT_OK(Flush(1));
1774 ++key_count;
1775 }
1776
1777#ifndef ROCKSDB_LITE
1778 // Save an export of those ordinary SST files for later
1779 std::string export_files_dir = dbname_ + "/exported";
1780 ExportImportFilesMetaData* metadata_ptr_ = nullptr;
1781 Checkpoint* checkpoint;
1782 ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
1783 ASSERT_OK(checkpoint->ExportColumnFamily(handles_[1], export_files_dir,
1784 &metadata_ptr_));
1785 ASSERT_NE(metadata_ptr_, nullptr);
1786 delete checkpoint;
1787 checkpoint = nullptr;
1788
1789 // External SST files with same session id
1790 SstFileWriter sst_file_writer(EnvOptions(), options);
1791 std::vector<std::string> external;
1792 for (int i = 0; i < 2; ++i) {
1793 std::string f = dbname_ + "/external" + std::to_string(i) + ".sst";
1794 external.push_back(f);
1795 ASSERT_OK(sst_file_writer.Open(f));
1796 ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc"));
1797 ASSERT_OK(
1798 sst_file_writer.Put(Key(key_count) + "a", something_compressible));
1799 ++key_count;
1800 ExternalSstFileInfo external_info;
1801 ASSERT_OK(sst_file_writer.Finish(&external_info));
1802 IngestExternalFileOptions ingest_opts;
1803 ASSERT_OK(db_->IngestExternalFile(handles_[1], {f}, ingest_opts));
1804 }
1805
1806 if (exclude_file_numbers_) {
1807 // FIXME(peterd): figure out where these extra ADDs are coming from
1808 options.statistics->recordTick(BLOCK_CACHE_COMPRESSED_ADD,
1809 uint64_t{0} - uint64_t{2});
1810 }
1811#endif
1812
1813 perform_gets();
1814 verify_stats();
1815
1816 // Make sure we can cache hit after re-open
1817 ReopenWithColumnFamilies({"default", "koko"}, options);
1818
1819 perform_gets();
1820 verify_stats();
1821
1822 // Make sure we can cache hit even on a full copy of the DB. Using
1823 // StableCacheKeyTestFS, Checkpoint will resort to full copy not hard link.
1824 // (Checkpoint not available in LITE mode to test this.)
1825#ifndef ROCKSDB_LITE
1826 auto db_copy_name = dbname_ + "-copy";
1827 ASSERT_OK(Checkpoint::Create(db_, &checkpoint));
1828 ASSERT_OK(checkpoint->CreateCheckpoint(db_copy_name));
1829 delete checkpoint;
1830
1831 Close();
1832 Destroy(options);
1833
1834 // Switch to the DB copy
1835 SaveAndRestore<std::string> save_dbname(&dbname_, db_copy_name);
1836 ReopenWithColumnFamilies({"default", "koko"}, options);
1837
1838 perform_gets();
1839 verify_stats();
1840
1841 // And ensure that re-importing + ingesting the same files into a
1842 // different DB uses same cache keys
1843 DestroyAndReopen(options);
1844
1845 ColumnFamilyHandle* cfh = nullptr;
1846 ASSERT_OK(db_->CreateColumnFamilyWithImport(ColumnFamilyOptions(), "yoyo",
1847 ImportColumnFamilyOptions(),
1848 *metadata_ptr_, &cfh));
1849 ASSERT_NE(cfh, nullptr);
1850 delete cfh;
1851 cfh = nullptr;
1852 delete metadata_ptr_;
1853 metadata_ptr_ = nullptr;
1854
1855 ASSERT_OK(DestroyDB(export_files_dir, options));
1856
1857 ReopenWithColumnFamilies({"default", "yoyo"}, options);
1858
1859 IngestExternalFileOptions ingest_opts;
1860 ASSERT_OK(db_->IngestExternalFile(handles_[1], {external}, ingest_opts));
1861
1862 perform_gets();
1863 verify_stats();
1864#endif // !ROCKSDB_LITE
1865
1866 Close();
1867 Destroy(options);
1868 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
1869}
1870
1871class CacheKeyTest : public testing::Test {
1872 public:
1873 CacheKey GetBaseCacheKey() {
1874 CacheKey rv = GetOffsetableCacheKey(0, /*min file_number*/ 1).WithOffset(0);
1875 // Correct for file_number_ == 1
1876 *reinterpret_cast<uint64_t*>(&rv) ^= ReverseBits(uint64_t{1});
1877 return rv;
1878 }
1879 CacheKey GetCacheKey(uint64_t session_counter, uint64_t file_number,
1880 uint64_t offset) {
1881 OffsetableCacheKey offsetable =
1882 GetOffsetableCacheKey(session_counter, file_number);
1883 // * 4 to counteract optimization that strips lower 2 bits in encoding
1884 // the offset in BlockBasedTable::GetCacheKey (which we prefer to include
1885 // in unit tests to maximize functional coverage).
1886 EXPECT_GE(offset * 4, offset); // no overflow
1887 return BlockBasedTable::GetCacheKey(offsetable,
1888 BlockHandle(offset * 4, /*size*/ 5));
1889 }
1890
1891 protected:
1892 OffsetableCacheKey GetOffsetableCacheKey(uint64_t session_counter,
1893 uint64_t file_number) {
1894 // Like SemiStructuredUniqueIdGen::GenerateNext
1895 tp_.db_session_id = EncodeSessionId(base_session_upper_,
1896 base_session_lower_ ^ session_counter);
1897 tp_.db_id = std::to_string(db_id_);
1898 tp_.orig_file_number = file_number;
1899 bool is_stable;
1900 std::string cur_session_id = ""; // ignored
1901 uint64_t cur_file_number = 42; // ignored
1902 OffsetableCacheKey rv;
1903 BlockBasedTable::SetupBaseCacheKey(&tp_, cur_session_id, cur_file_number,
1904 &rv, &is_stable);
1905 EXPECT_TRUE(is_stable);
1906 EXPECT_TRUE(!rv.IsEmpty());
1907 // BEGIN some assertions in relation to SST unique IDs
1908 std::string external_unique_id_str;
1909 EXPECT_OK(GetUniqueIdFromTableProperties(tp_, &external_unique_id_str));
1910 UniqueId64x2 sst_unique_id = {};
1911 EXPECT_OK(DecodeUniqueIdBytes(external_unique_id_str, &sst_unique_id));
1912 ExternalUniqueIdToInternal(&sst_unique_id);
1913 OffsetableCacheKey ock =
1914 OffsetableCacheKey::FromInternalUniqueId(&sst_unique_id);
1915 EXPECT_EQ(rv.WithOffset(0).AsSlice(), ock.WithOffset(0).AsSlice());
1916 EXPECT_EQ(ock.ToInternalUniqueId(), sst_unique_id);
1917 // END some assertions in relation to SST unique IDs
1918 return rv;
1919 }
1920
1921 TableProperties tp_;
1922 uint64_t base_session_upper_ = 0;
1923 uint64_t base_session_lower_ = 0;
1924 uint64_t db_id_ = 0;
1925};
1926
1927TEST_F(CacheKeyTest, DBImplSessionIdStructure) {
1928 // We have to generate our own session IDs for simulation purposes in other
1929 // tests. Here we verify that the DBImpl implementation seems to match
1930 // our construction here, by using lowest XORed-in bits for "session
1931 // counter."
1932 std::string session_id1 = DBImpl::GenerateDbSessionId(/*env*/ nullptr);
1933 std::string session_id2 = DBImpl::GenerateDbSessionId(/*env*/ nullptr);
1934 uint64_t upper1, upper2, lower1, lower2;
1935 ASSERT_OK(DecodeSessionId(session_id1, &upper1, &lower1));
1936 ASSERT_OK(DecodeSessionId(session_id2, &upper2, &lower2));
1937 // Because generated in same process
1938 ASSERT_EQ(upper1, upper2);
1939 // Unless we generate > 4 billion session IDs in this process...
1940 ASSERT_EQ(Upper32of64(lower1), Upper32of64(lower2));
1941 // But they must be different somewhere
1942 ASSERT_NE(Lower32of64(lower1), Lower32of64(lower2));
1943}
1944
1945namespace {
1946// Deconstruct cache key, based on knowledge of implementation details.
1947void DeconstructNonemptyCacheKey(const CacheKey& key, uint64_t* file_num_etc64,
1948 uint64_t* offset_etc64) {
1949 *file_num_etc64 = *reinterpret_cast<const uint64_t*>(key.AsSlice().data());
1950 *offset_etc64 = *reinterpret_cast<const uint64_t*>(key.AsSlice().data() + 8);
1951 assert(*file_num_etc64 != 0);
1952 if (*offset_etc64 == 0) {
1953 std::swap(*file_num_etc64, *offset_etc64);
1954 }
1955 assert(*offset_etc64 != 0);
1956}
1957
1958// Make a bit mask of 0 to 64 bits
1959uint64_t MakeMask64(int bits) {
1960 if (bits >= 64) {
1961 return uint64_t{0} - 1;
1962 } else {
1963 return (uint64_t{1} << bits) - 1;
1964 }
1965}
1966
1967// See CacheKeyTest::Encodings
1968struct CacheKeyDecoder {
1969 // Inputs
1970 uint64_t base_file_num_etc64, base_offset_etc64;
1971 int session_counter_bits, file_number_bits, offset_bits;
1972
1973 // Derived
1974 uint64_t session_counter_mask, file_number_mask, offset_mask;
1975
1976 // Outputs
1977 uint64_t decoded_session_counter, decoded_file_num, decoded_offset;
1978
1979 void SetBaseCacheKey(const CacheKey& base) {
1980 DeconstructNonemptyCacheKey(base, &base_file_num_etc64, &base_offset_etc64);
1981 }
1982
1983 void SetRanges(int _session_counter_bits, int _file_number_bits,
1984 int _offset_bits) {
1985 session_counter_bits = _session_counter_bits;
1986 session_counter_mask = MakeMask64(session_counter_bits);
1987 file_number_bits = _file_number_bits;
1988 file_number_mask = MakeMask64(file_number_bits);
1989 offset_bits = _offset_bits;
1990 offset_mask = MakeMask64(offset_bits);
1991 }
1992
1993 void Decode(const CacheKey& key) {
1994 uint64_t file_num_etc64, offset_etc64;
1995 DeconstructNonemptyCacheKey(key, &file_num_etc64, &offset_etc64);
1996
1997 // First decode session counter
1998 if (offset_bits + session_counter_bits <= 64) {
1999 // fully recoverable from offset_etc64
2000 decoded_session_counter =
2001 ReverseBits((offset_etc64 ^ base_offset_etc64)) &
2002 session_counter_mask;
2003 } else if (file_number_bits + session_counter_bits <= 64) {
2004 // fully recoverable from file_num_etc64
2005 decoded_session_counter = DownwardInvolution(
2006 (file_num_etc64 ^ base_file_num_etc64) & session_counter_mask);
2007 } else {
2008 // Need to combine parts from each word.
2009 // Piece1 will contain some correct prefix of the bottom bits of
2010 // session counter.
2011 uint64_t piece1 =
2012 ReverseBits((offset_etc64 ^ base_offset_etc64) & ~offset_mask);
2013 int piece1_bits = 64 - offset_bits;
2014 // Piece2 will contain involuded bits that we can combine with piece1
2015 // to infer rest of session counter
2016 int piece2_bits = std::min(64 - file_number_bits, 64 - piece1_bits);
2017 ASSERT_LT(piece2_bits, 64);
2018 uint64_t piece2_mask = MakeMask64(piece2_bits);
2019 uint64_t piece2 = (file_num_etc64 ^ base_file_num_etc64) & piece2_mask;
2020
2021 // Cancel out the part of piece2 that we can infer from piece1
2022 // (DownwardInvolution distributes over xor)
2023 piece2 ^= DownwardInvolution(piece1) & piece2_mask;
2024
2025 // Now we need to solve for the unknown original bits in higher
2026 // positions than piece1 provides. We use Gaussian elimination
2027 // because we know that a piece2_bits X piece2_bits submatrix of
2028 // the matrix underlying DownwardInvolution times the vector of
2029 // unknown original bits equals piece2.
2030 //
2031 // Build an augmented row matrix for that submatrix, built column by
2032 // column.
2033 std::array<uint64_t, 64> aug_rows{};
2034 for (int i = 0; i < piece2_bits; ++i) { // over columns
2035 uint64_t col_i = DownwardInvolution(uint64_t{1} << piece1_bits << i);
2036 ASSERT_NE(col_i & 1U, 0);
2037 for (int j = 0; j < piece2_bits; ++j) { // over rows
2038 aug_rows[j] |= (col_i & 1U) << i;
2039 col_i >>= 1;
2040 }
2041 }
2042 // Augment with right hand side
2043 for (int j = 0; j < piece2_bits; ++j) { // over rows
2044 aug_rows[j] |= (piece2 & 1U) << piece2_bits;
2045 piece2 >>= 1;
2046 }
2047 // Run Gaussian elimination
2048 for (int i = 0; i < piece2_bits; ++i) { // over columns
2049 // Find a row that can be used to cancel others
2050 uint64_t canceller = 0;
2051 // Note: Rows 0 through i-1 contain 1s in columns already eliminated
2052 for (int j = i; j < piece2_bits; ++j) { // over rows
2053 if (aug_rows[j] & (uint64_t{1} << i)) {
2054 // Swap into appropriate row
2055 std::swap(aug_rows[i], aug_rows[j]);
2056 // Keep a handy copy for row reductions
2057 canceller = aug_rows[i];
2058 break;
2059 }
2060 }
2061 ASSERT_NE(canceller, 0);
2062 for (int j = 0; j < piece2_bits; ++j) { // over rows
2063 if (i != j && ((aug_rows[j] >> i) & 1) != 0) {
2064 // Row reduction
2065 aug_rows[j] ^= canceller;
2066 }
2067 }
2068 }
2069 // Extract result
2070 decoded_session_counter = piece1;
2071 for (int j = 0; j < piece2_bits; ++j) { // over rows
2072 ASSERT_EQ(aug_rows[j] & piece2_mask, uint64_t{1} << j);
2073 decoded_session_counter |= aug_rows[j] >> piece2_bits << piece1_bits
2074 << j;
2075 }
2076 }
2077
2078 decoded_offset =
2079 offset_etc64 ^ base_offset_etc64 ^ ReverseBits(decoded_session_counter);
2080
2081 decoded_file_num = ReverseBits(file_num_etc64 ^ base_file_num_etc64 ^
2082 DownwardInvolution(decoded_session_counter));
2083 }
2084};
2085} // anonymous namespace
2086
2087TEST_F(CacheKeyTest, Encodings) {
2088 // This test primarily verifies this claim from cache_key.cc:
2089 // // In fact, if DB ids were not involved, we would be guaranteed unique
2090 // // cache keys for files generated in a single process until total bits for
2091 // // biggest session_id_counter, orig_file_number, and offset_in_file
2092 // // reach 128 bits.
2093 //
2094 // To demonstrate this, CacheKeyDecoder can reconstruct the structured inputs
2095 // to the cache key when provided an output cache key, the unstructured
2096 // inputs, and bounds on the structured inputs.
2097 //
2098 // See OffsetableCacheKey comments in cache_key.cc.
2099
2100 // We are going to randomly initialize some values that *should* not affect
2101 // result
2102 Random64 r{std::random_device{}()};
2103
2104 CacheKeyDecoder decoder;
2105 db_id_ = r.Next();
2106 base_session_upper_ = r.Next();
2107 base_session_lower_ = r.Next();
2108 if (base_session_lower_ == 0) {
2109 base_session_lower_ = 1;
2110 }
2111
2112 decoder.SetBaseCacheKey(GetBaseCacheKey());
2113
2114 // Loop over configurations and test those
2115 for (int session_counter_bits = 0; session_counter_bits <= 64;
2116 ++session_counter_bits) {
2117 for (int file_number_bits = 1; file_number_bits <= 64; ++file_number_bits) {
2118 // 62 bits max because unoptimized offset will be 64 bits in that case
2119 for (int offset_bits = 0; offset_bits <= 62; ++offset_bits) {
2120 if (session_counter_bits + file_number_bits + offset_bits > 128) {
2121 break;
2122 }
2123
2124 decoder.SetRanges(session_counter_bits, file_number_bits, offset_bits);
2125
2126 uint64_t session_counter = r.Next() & decoder.session_counter_mask;
2127 uint64_t file_number = r.Next() & decoder.file_number_mask;
2128 if (file_number == 0) {
2129 // Minimum
2130 file_number = 1;
2131 }
2132 uint64_t offset = r.Next() & decoder.offset_mask;
2133 decoder.Decode(GetCacheKey(session_counter, file_number, offset));
2134
2135 EXPECT_EQ(decoder.decoded_session_counter, session_counter);
2136 EXPECT_EQ(decoder.decoded_file_num, file_number);
2137 EXPECT_EQ(decoder.decoded_offset, offset);
2138 }
2139 }
2140 }
2141}
2142
2143INSTANTIATE_TEST_CASE_P(DBBlockCacheKeyTest, DBBlockCacheKeyTest,
2144 ::testing::Combine(::testing::Bool(),
2145 ::testing::Bool()));
2146
20effc67
TL
2147class DBBlockCachePinningTest
2148 : public DBTestBase,
2149 public testing::WithParamInterface<
2150 std::tuple<bool, PinningTier, PinningTier, PinningTier>> {
2151 public:
2152 DBBlockCachePinningTest()
1e59de90 2153 : DBTestBase("db_block_cache_test", /*env_do_fsync=*/false) {}
20effc67
TL
2154
2155 void SetUp() override {
2156 partition_index_and_filters_ = std::get<0>(GetParam());
2157 top_level_index_pinning_ = std::get<1>(GetParam());
2158 partition_pinning_ = std::get<2>(GetParam());
2159 unpartitioned_pinning_ = std::get<3>(GetParam());
2160 }
2161
2162 bool partition_index_and_filters_;
2163 PinningTier top_level_index_pinning_;
2164 PinningTier partition_pinning_;
2165 PinningTier unpartitioned_pinning_;
2166};
2167
2168TEST_P(DBBlockCachePinningTest, TwoLevelDB) {
2169 // Creates one file in L0 and one file in L1. Both files have enough data that
2170 // their index and filter blocks are partitioned. The L1 file will also have
2171 // a compression dictionary (those are trained only during compaction), which
2172 // must be unpartitioned.
2173 const int kKeySize = 32;
2174 const int kBlockSize = 128;
2175 const int kNumBlocksPerFile = 128;
2176 const int kNumKeysPerFile = kBlockSize * kNumBlocksPerFile / kKeySize;
2177
2178 Options options = CurrentOptions();
2179 // `kNoCompression` makes the unit test more portable. But it relies on the
2180 // current behavior of persisting/accessing dictionary even when there's no
2181 // (de)compression happening, which seems fairly likely to change over time.
2182 options.compression = kNoCompression;
2183 options.compression_opts.max_dict_bytes = 4 << 10;
2184 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2185 BlockBasedTableOptions table_options;
2186 table_options.block_cache = NewLRUCache(1 << 20 /* capacity */);
2187 table_options.block_size = kBlockSize;
2188 table_options.metadata_block_size = kBlockSize;
2189 table_options.cache_index_and_filter_blocks = true;
2190 table_options.metadata_cache_options.top_level_index_pinning =
2191 top_level_index_pinning_;
2192 table_options.metadata_cache_options.partition_pinning = partition_pinning_;
2193 table_options.metadata_cache_options.unpartitioned_pinning =
2194 unpartitioned_pinning_;
2195 table_options.filter_policy.reset(
2196 NewBloomFilterPolicy(10 /* bits_per_key */));
2197 if (partition_index_and_filters_) {
2198 table_options.index_type =
2199 BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
2200 table_options.partition_filters = true;
2201 }
2202 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2203 Reopen(options);
2204
2205 Random rnd(301);
2206 for (int i = 0; i < 2; ++i) {
2207 for (int j = 0; j < kNumKeysPerFile; ++j) {
2208 ASSERT_OK(Put(Key(i * kNumKeysPerFile + j), rnd.RandomString(kKeySize)));
2209 }
2210 ASSERT_OK(Flush());
2211 if (i == 0) {
2212 // Prevent trivial move so file will be rewritten with dictionary and
2213 // reopened with L1's pinning settings.
2214 CompactRangeOptions cro;
2215 cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
2216 ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
2217 }
2218 }
2219
2220 // Clear all unpinned blocks so unpinned blocks will show up as cache misses
2221 // when reading a key from a file.
2222 table_options.block_cache->EraseUnRefEntries();
2223
2224 // Get base cache values
2225 uint64_t filter_misses = TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS);
2226 uint64_t index_misses = TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS);
2227 uint64_t compression_dict_misses =
2228 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS);
2229
2230 // Read a key from the L0 file
2231 Get(Key(kNumKeysPerFile));
2232 uint64_t expected_filter_misses = filter_misses;
2233 uint64_t expected_index_misses = index_misses;
2234 uint64_t expected_compression_dict_misses = compression_dict_misses;
2235 if (partition_index_and_filters_) {
2236 if (top_level_index_pinning_ == PinningTier::kNone) {
2237 ++expected_filter_misses;
2238 ++expected_index_misses;
2239 }
2240 if (partition_pinning_ == PinningTier::kNone) {
2241 ++expected_filter_misses;
2242 ++expected_index_misses;
2243 }
2244 } else {
2245 if (unpartitioned_pinning_ == PinningTier::kNone) {
2246 ++expected_filter_misses;
2247 ++expected_index_misses;
2248 }
2249 }
2250 if (unpartitioned_pinning_ == PinningTier::kNone) {
2251 ++expected_compression_dict_misses;
2252 }
2253 ASSERT_EQ(expected_filter_misses,
2254 TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
2255 ASSERT_EQ(expected_index_misses,
2256 TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
2257 ASSERT_EQ(expected_compression_dict_misses,
2258 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
2259
2260 // Clear all unpinned blocks so unpinned blocks will show up as cache misses
2261 // when reading a key from a file.
2262 table_options.block_cache->EraseUnRefEntries();
2263
2264 // Read a key from the L1 file
2265 Get(Key(0));
2266 if (partition_index_and_filters_) {
2267 if (top_level_index_pinning_ == PinningTier::kNone ||
2268 top_level_index_pinning_ == PinningTier::kFlushedAndSimilar) {
2269 ++expected_filter_misses;
2270 ++expected_index_misses;
2271 }
2272 if (partition_pinning_ == PinningTier::kNone ||
2273 partition_pinning_ == PinningTier::kFlushedAndSimilar) {
2274 ++expected_filter_misses;
2275 ++expected_index_misses;
2276 }
2277 } else {
2278 if (unpartitioned_pinning_ == PinningTier::kNone ||
2279 unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
2280 ++expected_filter_misses;
2281 ++expected_index_misses;
2282 }
2283 }
2284 if (unpartitioned_pinning_ == PinningTier::kNone ||
2285 unpartitioned_pinning_ == PinningTier::kFlushedAndSimilar) {
2286 ++expected_compression_dict_misses;
2287 }
2288 ASSERT_EQ(expected_filter_misses,
2289 TestGetTickerCount(options, BLOCK_CACHE_FILTER_MISS));
2290 ASSERT_EQ(expected_index_misses,
2291 TestGetTickerCount(options, BLOCK_CACHE_INDEX_MISS));
2292 ASSERT_EQ(expected_compression_dict_misses,
2293 TestGetTickerCount(options, BLOCK_CACHE_COMPRESSION_DICT_MISS));
2294}
2295
2296INSTANTIATE_TEST_CASE_P(
2297 DBBlockCachePinningTest, DBBlockCachePinningTest,
2298 ::testing::Combine(
2299 ::testing::Bool(),
2300 ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
2301 PinningTier::kAll),
2302 ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
2303 PinningTier::kAll),
2304 ::testing::Values(PinningTier::kNone, PinningTier::kFlushedAndSimilar,
2305 PinningTier::kAll)));
2306
f67539c2 2307} // namespace ROCKSDB_NAMESPACE
7c673cae
FG
2308
2309int main(int argc, char** argv) {
f67539c2 2310 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
7c673cae
FG
2311 ::testing::InitGoogleTest(&argc, argv);
2312 return RUN_ALL_TESTS();
2313}