1 // Copyright (c) 2022, Meta Platforms, Inc. and affiliates. All rights
2 // reserved. This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include "rocksdb/options.h"
9 #include "rocksdb/system_clock.h"
10 #include "rocksdb/table_reader_caller.h"
11 #include "rocksdb/trace_reader_writer.h"
12 #include "rocksdb/trace_record.h"
14 namespace ROCKSDB_NAMESPACE
{
15 // A record for block cache lookups/inserts. This is passed by the table
16 // reader to the BlockCacheTraceWriter for every block cache op.
17 struct BlockCacheTraceRecord
{
18 // Required fields for all accesses.
19 uint64_t access_timestamp
= 0;
21 // Info related to the block being looked up or inserted
23 // 1. The cache key for the block
24 std::string block_key
;
26 // 2. The type of block
27 TraceType block_type
= TraceType::kTraceMax
;
29 // 3. Size of the block
30 uint64_t block_size
= 0;
32 // Info about the SST file the block is in
34 // 1. Column family ID
37 // 2. Column family name
40 // 3. LSM level of the file
44 uint64_t sst_fd_number
= 0;
46 // Info about the calling context
48 // 1. The higher level request triggering the block cache request
49 TableReaderCaller caller
= TableReaderCaller::kMaxBlockCacheLookupCaller
;
51 // 2. Cache lookup hit/miss. Not relevant for inserts
52 bool is_cache_hit
= false;
54 // 3. Whether this request is a lookup
55 bool no_insert
= false;
57 // Get/MultiGet specific info
59 // 1. A unique ID for Get/MultiGet
60 uint64_t get_id
= kReservedGetId
;
62 // 2. Whether the Get/MultiGet is from a user-specified snapshot
63 bool get_from_user_specified_snapshot
= false;
65 // 3. The target user key in the block
66 std::string referenced_key
;
68 // Required fields for data block and user Get/Multi-Get only.
70 // 1. Size of te useful data in the block
71 uint64_t referenced_data_size
= 0;
73 // 2. Only for MultiGet, number of keys from the batch found in the block
74 uint64_t num_keys_in_block
= 0;
76 // 3. Whether the key was found in the block or not (false positive)
77 bool referenced_key_exist_in_block
= false;
79 static const uint64_t kReservedGetId
;
81 BlockCacheTraceRecord() {}
83 BlockCacheTraceRecord(uint64_t _access_timestamp
, std::string _block_key
,
84 TraceType _block_type
, uint64_t _block_size
,
85 uint64_t _cf_id
, std::string _cf_name
, uint32_t _level
,
86 uint64_t _sst_fd_number
, TableReaderCaller _caller
,
87 bool _is_cache_hit
, bool _no_insert
, uint64_t _get_id
,
88 bool _get_from_user_specified_snapshot
= false,
89 std::string _referenced_key
= "",
90 uint64_t _referenced_data_size
= 0,
91 uint64_t _num_keys_in_block
= 0,
92 bool _referenced_key_exist_in_block
= false)
93 : access_timestamp(_access_timestamp
),
94 block_key(_block_key
),
95 block_type(_block_type
),
96 block_size(_block_size
),
100 sst_fd_number(_sst_fd_number
),
102 is_cache_hit(_is_cache_hit
),
103 no_insert(_no_insert
),
105 get_from_user_specified_snapshot(_get_from_user_specified_snapshot
),
106 referenced_key(_referenced_key
),
107 referenced_data_size(_referenced_data_size
),
108 num_keys_in_block(_num_keys_in_block
),
109 referenced_key_exist_in_block(_referenced_key_exist_in_block
) {}
112 // Options for tracing block cache accesses
113 struct BlockCacheTraceOptions
{
114 // Specify trace sampling option, i.e. capture one per how many requests.
115 // Default to 1 (capture every request).
116 uint64_t sampling_frequency
= 1;
119 // Options for the built-in implementation of BlockCacheTraceWriter
120 struct BlockCacheTraceWriterOptions
{
121 uint64_t max_trace_file_size
= uint64_t{64} * 1024 * 1024 * 1024;
124 // BlockCacheTraceWriter is an abstract class that captures all RocksDB block
125 // cache accesses. Every RocksDB operation is passed to WriteBlockAccess()
126 // with a BlockCacheTraceRecord.
127 class BlockCacheTraceWriter
{
129 virtual ~BlockCacheTraceWriter() {}
131 // Pass Slice references to avoid copy.
132 virtual Status
WriteBlockAccess(const BlockCacheTraceRecord
& record
,
133 const Slice
& block_key
, const Slice
& cf_name
,
134 const Slice
& referenced_key
) = 0;
136 // Write a trace header at the beginning, typically on initiating a trace,
137 // with some metadata like a magic number and RocksDB version.
138 virtual Status
WriteHeader() = 0;
141 // Allocate an instance of the built-in BlockCacheTraceWriter implementation,
142 // that traces all block cache accesses to a user-provided TraceWriter. Each
143 // access is traced to a file with a timestamp and type, followed by the
145 std::unique_ptr
<BlockCacheTraceWriter
> NewBlockCacheTraceWriter(
146 SystemClock
* clock
, const BlockCacheTraceWriterOptions
& trace_options
,
147 std::unique_ptr
<TraceWriter
>&& trace_writer
);
149 } // namespace ROCKSDB_NAMESPACE