]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/perf_context.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / include / rocksdb / perf_context.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae 5
11fdf7f2 6#pragma once
7c673cae
FG
7
8#include <stdint.h>
1e59de90 9
494da23a 10#include <map>
7c673cae
FG
11#include <string>
12
13#include "rocksdb/perf_level.h"
14
f67539c2 15namespace ROCKSDB_NAMESPACE {
7c673cae
FG
16
17// A thread local context for gathering performance counter efficiently
18// and transparently.
19// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
20
494da23a
TL
21// Break down performance counters by level and store per-level perf context in
22// PerfContextByLevel
23struct PerfContextByLevel {
24 // # of times bloom filter has avoided file reads, i.e., negatives.
25 uint64_t bloom_filter_useful = 0;
26 // # of times bloom FullFilter has not avoided the reads.
27 uint64_t bloom_filter_full_positive = 0;
28 // # of times bloom FullFilter has not avoided the reads and data actually
29 // exist.
30 uint64_t bloom_filter_full_true_positive = 0;
31
32 // total number of user key returned (only include keys that are found, does
33 // not include keys that are deleted or merged without a final put
20effc67 34 uint64_t user_key_return_count = 0;
494da23a
TL
35
36 // total nanos spent on reading data from SST files
20effc67 37 uint64_t get_from_table_nanos = 0;
494da23a
TL
38
39 uint64_t block_cache_hit_count = 0; // total number of block cache hits
40 uint64_t block_cache_miss_count = 0; // total number of block cache misses
41
42 void Reset(); // reset all performance counters to zero
43};
44
7c673cae 45struct PerfContext {
494da23a
TL
46 ~PerfContext();
47
48 PerfContext() {}
49
50 PerfContext(const PerfContext&);
51 PerfContext& operator=(const PerfContext&);
52 PerfContext(PerfContext&&) noexcept;
7c673cae 53
494da23a 54 void Reset(); // reset all performance counters to zero
7c673cae
FG
55
56 std::string ToString(bool exclude_zero_counters = false) const;
57
494da23a
TL
58 // enable per level perf context and allocate storage for PerfContextByLevel
59 void EnablePerLevelPerfContext();
60
1e59de90 61 // temporarily disable per level perf context by setting the flag to false
494da23a
TL
62 void DisablePerLevelPerfContext();
63
64 // free the space for PerfContextByLevel, also disable per level perf context
65 void ClearPerLevelPerfContext();
66
67 uint64_t user_key_comparison_count; // total number of user key comparisons
68 uint64_t block_cache_hit_count; // total number of block cache hits
69 uint64_t block_read_count; // total number of block reads (with IO)
70 uint64_t block_read_byte; // total number of bytes from block reads
71 uint64_t block_read_time; // total nanos spent on block reads
1e59de90
TL
72 uint64_t block_cache_index_hit_count; // total number of index block hits
73 // total number of standalone handles lookup from secondary cache
74 uint64_t block_cache_standalone_handle_count;
75 // total number of real handles lookup from secondary cache that are inserted
76 // into primary cache
77 uint64_t block_cache_real_handle_count;
494da23a
TL
78 uint64_t index_block_read_count; // total number of index block reads
79 uint64_t block_cache_filter_hit_count; // total number of filter block hits
80 uint64_t filter_block_read_count; // total number of filter block reads
81 uint64_t compression_dict_block_read_count; // total number of compression
82 // dictionary block reads
1e59de90
TL
83
84 uint64_t secondary_cache_hit_count; // total number of secondary cache hits
85 // total number of real handles inserted into secondary cache
86 uint64_t compressed_sec_cache_insert_real_count;
87 // total number of dummy handles inserted into secondary cache
88 uint64_t compressed_sec_cache_insert_dummy_count;
89 // bytes for vals before compression in secondary cache
90 uint64_t compressed_sec_cache_uncompressed_bytes;
91 // bytes for vals after compression in secondary cache
92 uint64_t compressed_sec_cache_compressed_bytes;
93
494da23a 94 uint64_t block_checksum_time; // total nanos spent on block checksum
7c673cae 95 uint64_t block_decompress_time; // total nanos spent on block decompression
11fdf7f2
TL
96
97 uint64_t get_read_bytes; // bytes for vals returned by Get
98 uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet
99 uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator
100
1e59de90
TL
101 uint64_t blob_cache_hit_count; // total number of blob cache hits
102 uint64_t blob_read_count; // total number of blob reads (with IO)
103 uint64_t blob_read_byte; // total number of bytes from blob reads
104 uint64_t blob_read_time; // total nanos spent on blob reads
105 uint64_t blob_checksum_time; // total nanos spent on blob checksum
106 uint64_t blob_decompress_time; // total nanos spent on blob decompression
107
7c673cae
FG
108 // total number of internal keys skipped over during iteration.
109 // There are several reasons for it:
110 // 1. when calling Next(), the iterator is in the position of the previous
111 // key, so that we'll need to skip it. It means this counter will always
112 // be incremented in Next().
113 // 2. when calling Next(), we need to skip internal entries for the previous
114 // keys that are overwritten.
115 // 3. when calling Next(), Seek() or SeekToFirst(), after previous key
116 // before calling Next(), the seek key in Seek() or the beginning for
117 // SeekToFirst(), there may be one or more deleted keys before the next
118 // valid key that the operation should place the iterator to. We need
119 // to skip both of the tombstone and updates hidden by the tombstones. The
120 // tombstones are not included in this counter, while previous updates
121 // hidden by the tombstones will be included here.
122 // 4. symmetric cases for Prev() and SeekToLast()
123 // internal_recent_skipped_count is not included in this counter.
124 //
125 uint64_t internal_key_skipped_count;
126 // Total number of deletes and single deletes skipped over during iteration
127 // When calling Next(), Seek() or SeekToFirst(), after previous position
128 // before calling Next(), the seek key in Seek() or the beginning for
129 // SeekToFirst(), there may be one or more deleted keys before the next valid
130 // key. Every deleted key is counted once. We don't recount here if there are
131 // still older updates invalidated by the tombstones.
132 //
133 uint64_t internal_delete_skipped_count;
134 // How many times iterators skipped over internal keys that are more recent
135 // than the snapshot that iterator is using.
136 //
137 uint64_t internal_recent_skipped_count;
138 // How many values were fed into merge operator by iterators.
139 //
140 uint64_t internal_merge_count;
1e59de90
TL
141 // Number of times we reseeked inside a merging iterator, specifically to skip
142 // after or before a range of keys covered by a range deletion in a newer LSM
143 // component.
144 uint64_t internal_range_del_reseek_count;
7c673cae 145
494da23a
TL
146 uint64_t get_snapshot_time; // total nanos spent on getting snapshot
147 uint64_t get_from_memtable_time; // total nanos spent on querying memtables
148 uint64_t get_from_memtable_count; // number of mem tables queried
7c673cae
FG
149 // total nanos spent after Get() finds a key
150 uint64_t get_post_process_time;
151 uint64_t get_from_output_files_time; // total nanos reading from output files
152 // total nanos spent on seeking memtable
153 uint64_t seek_on_memtable_time;
154 // number of seeks issued on memtable
155 // (including SeekForPrev but not SeekToFirst and SeekToLast)
156 uint64_t seek_on_memtable_count;
157 // number of Next()s issued on memtable
158 uint64_t next_on_memtable_count;
159 // number of Prev()s issued on memtable
160 uint64_t prev_on_memtable_count;
161 // total nanos spent on seeking child iters
162 uint64_t seek_child_seek_time;
163 // number of seek issued in child iterators
164 uint64_t seek_child_seek_count;
165 uint64_t seek_min_heap_time; // total nanos spent on the merge min heap
166 uint64_t seek_max_heap_time; // total nanos spent on the merge max heap
167 // total nanos spent on seeking the internal entries
168 uint64_t seek_internal_seek_time;
169 // total nanos spent on iterating internal entries to find the next user entry
170 uint64_t find_next_user_entry_time;
171
11fdf7f2
TL
172 // This group of stats provide a breakdown of time spent by Write().
173 // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write
174 // are enabled.
175 //
7c673cae
FG
176 // total nanos spent on writing to WAL
177 uint64_t write_wal_time;
178 // total nanos spent on writing to mem tables
179 uint64_t write_memtable_time;
11fdf7f2 180 // total nanos spent on delaying or throttling write
7c673cae 181 uint64_t write_delay_time;
11fdf7f2
TL
182 // total nanos spent on switching memtable/wal and scheduling
183 // flushes/compactions.
184 uint64_t write_scheduling_flushes_compactions_time;
185 // total nanos spent on writing a record, excluding the above four things
7c673cae
FG
186 uint64_t write_pre_and_post_process_time;
187
11fdf7f2
TL
188 // time spent waiting for other threads of the batch group
189 uint64_t write_thread_wait_nanos;
190
191 // time spent on acquiring DB mutex.
192 uint64_t db_mutex_lock_nanos;
7c673cae
FG
193 // Time spent on waiting with a condition variable created with DB mutex.
194 uint64_t db_condition_wait_nanos;
195 // Time spent on merge operator.
196 uint64_t merge_operator_time_nanos;
197
198 // Time spent on reading index block from block cache or SST file
199 uint64_t read_index_block_nanos;
200 // Time spent on reading filter block from block cache or SST file
201 uint64_t read_filter_block_nanos;
202 // Time spent on creating data block iterator
203 uint64_t new_table_block_iter_nanos;
204 // Time spent on creating a iterator of an SST file.
205 uint64_t new_table_iterator_nanos;
206 // Time spent on seeking a key in data/index blocks
207 uint64_t block_seek_nanos;
208 // Time spent on finding or creating a table reader
209 uint64_t find_table_nanos;
210 // total number of mem table bloom hits
211 uint64_t bloom_memtable_hit_count;
212 // total number of mem table bloom misses
213 uint64_t bloom_memtable_miss_count;
214 // total number of SST table bloom hits
215 uint64_t bloom_sst_hit_count;
216 // total number of SST table bloom misses
217 uint64_t bloom_sst_miss_count;
218
11fdf7f2
TL
219 // Time spent waiting on key locks in transaction lock manager.
220 uint64_t key_lock_wait_time;
221 // number of times acquiring a lock was blocked by another transaction.
222 uint64_t key_lock_wait_count;
223
7c673cae
FG
224 // Total time spent in Env filesystem operations. These are only populated
225 // when TimedEnv is used.
226 uint64_t env_new_sequential_file_nanos;
227 uint64_t env_new_random_access_file_nanos;
228 uint64_t env_new_writable_file_nanos;
229 uint64_t env_reuse_writable_file_nanos;
230 uint64_t env_new_random_rw_file_nanos;
231 uint64_t env_new_directory_nanos;
232 uint64_t env_file_exists_nanos;
233 uint64_t env_get_children_nanos;
234 uint64_t env_get_children_file_attributes_nanos;
235 uint64_t env_delete_file_nanos;
236 uint64_t env_create_dir_nanos;
237 uint64_t env_create_dir_if_missing_nanos;
238 uint64_t env_delete_dir_nanos;
239 uint64_t env_get_file_size_nanos;
240 uint64_t env_get_file_modification_time_nanos;
241 uint64_t env_rename_file_nanos;
242 uint64_t env_link_file_nanos;
243 uint64_t env_lock_file_nanos;
244 uint64_t env_unlock_file_nanos;
245 uint64_t env_new_logger_nanos;
494da23a
TL
246
247 uint64_t get_cpu_nanos;
248 uint64_t iter_next_cpu_nanos;
249 uint64_t iter_prev_cpu_nanos;
250 uint64_t iter_seek_cpu_nanos;
251
20effc67
TL
252 // Time spent in encrypting data. Populated when EncryptedEnv is used.
253 uint64_t encrypt_data_nanos;
254 // Time spent in decrypting data. Populated when EncryptedEnv is used.
255 uint64_t decrypt_data_nanos;
256
1e59de90
TL
257 uint64_t number_async_seek;
258
494da23a
TL
259 std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
260 bool per_level_perf_context_enabled = false;
7c673cae
FG
261};
262
1e59de90
TL
263// If RocksDB is compiled with -DNPERF_CONTEXT, then a pointer to a global,
264// non-thread-local PerfContext object will be returned. Attempts to update
265// this object will be ignored, and reading from it will also be no-op.
266// Otherwise,
267// a) if thread-local is supported on the platform, then a pointer to
268// a thread-local PerfContext object will be returned.
269// b) if thread-local is NOT supported, then compilation will fail.
270//
271// This function never returns nullptr.
11fdf7f2 272PerfContext* get_perf_context();
7c673cae 273
f67539c2 274} // namespace ROCKSDB_NAMESPACE