]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/include/rocksdb/perf_context.h
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / include / rocksdb / perf_context.h
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae 5
11fdf7f2 6#pragma once
7c673cae
FG
7
8#include <stdint.h>
494da23a 9#include <map>
7c673cae
FG
10#include <string>
11
12#include "rocksdb/perf_level.h"
13
f67539c2 14namespace ROCKSDB_NAMESPACE {
7c673cae
FG
15
16// A thread local context for gathering performance counter efficiently
17// and transparently.
18// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
19
494da23a
TL
20// Break down performance counters by level and store per-level perf context in
21// PerfContextByLevel
22struct PerfContextByLevel {
23 // # of times bloom filter has avoided file reads, i.e., negatives.
24 uint64_t bloom_filter_useful = 0;
25 // # of times bloom FullFilter has not avoided the reads.
26 uint64_t bloom_filter_full_positive = 0;
27 // # of times bloom FullFilter has not avoided the reads and data actually
28 // exist.
29 uint64_t bloom_filter_full_true_positive = 0;
30
31 // total number of user key returned (only include keys that are found, does
32 // not include keys that are deleted or merged without a final put
20effc67 33 uint64_t user_key_return_count = 0;
494da23a
TL
34
35 // total nanos spent on reading data from SST files
20effc67 36 uint64_t get_from_table_nanos = 0;
494da23a
TL
37
38 uint64_t block_cache_hit_count = 0; // total number of block cache hits
39 uint64_t block_cache_miss_count = 0; // total number of block cache misses
40
41 void Reset(); // reset all performance counters to zero
42};
43
7c673cae 44struct PerfContext {
494da23a
TL
45 ~PerfContext();
46
47 PerfContext() {}
48
49 PerfContext(const PerfContext&);
50 PerfContext& operator=(const PerfContext&);
51 PerfContext(PerfContext&&) noexcept;
7c673cae 52
494da23a 53 void Reset(); // reset all performance counters to zero
7c673cae
FG
54
55 std::string ToString(bool exclude_zero_counters = false) const;
56
494da23a
TL
57 // enable per level perf context and allocate storage for PerfContextByLevel
58 void EnablePerLevelPerfContext();
59
60 // temporarily disable per level perf contxt by setting the flag to false
61 void DisablePerLevelPerfContext();
62
63 // free the space for PerfContextByLevel, also disable per level perf context
64 void ClearPerLevelPerfContext();
65
66 uint64_t user_key_comparison_count; // total number of user key comparisons
67 uint64_t block_cache_hit_count; // total number of block cache hits
68 uint64_t block_read_count; // total number of block reads (with IO)
69 uint64_t block_read_byte; // total number of bytes from block reads
70 uint64_t block_read_time; // total nanos spent on block reads
71 uint64_t block_cache_index_hit_count; // total number of index block hits
72 uint64_t index_block_read_count; // total number of index block reads
73 uint64_t block_cache_filter_hit_count; // total number of filter block hits
74 uint64_t filter_block_read_count; // total number of filter block reads
75 uint64_t compression_dict_block_read_count; // total number of compression
76 // dictionary block reads
77 uint64_t block_checksum_time; // total nanos spent on block checksum
7c673cae 78 uint64_t block_decompress_time; // total nanos spent on block decompression
11fdf7f2
TL
79
80 uint64_t get_read_bytes; // bytes for vals returned by Get
81 uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet
82 uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator
83
7c673cae
FG
84 // total number of internal keys skipped over during iteration.
85 // There are several reasons for it:
86 // 1. when calling Next(), the iterator is in the position of the previous
87 // key, so that we'll need to skip it. It means this counter will always
88 // be incremented in Next().
89 // 2. when calling Next(), we need to skip internal entries for the previous
90 // keys that are overwritten.
91 // 3. when calling Next(), Seek() or SeekToFirst(), after previous key
92 // before calling Next(), the seek key in Seek() or the beginning for
93 // SeekToFirst(), there may be one or more deleted keys before the next
94 // valid key that the operation should place the iterator to. We need
95 // to skip both of the tombstone and updates hidden by the tombstones. The
96 // tombstones are not included in this counter, while previous updates
97 // hidden by the tombstones will be included here.
98 // 4. symmetric cases for Prev() and SeekToLast()
99 // internal_recent_skipped_count is not included in this counter.
100 //
101 uint64_t internal_key_skipped_count;
102 // Total number of deletes and single deletes skipped over during iteration
103 // When calling Next(), Seek() or SeekToFirst(), after previous position
104 // before calling Next(), the seek key in Seek() or the beginning for
105 // SeekToFirst(), there may be one or more deleted keys before the next valid
106 // key. Every deleted key is counted once. We don't recount here if there are
107 // still older updates invalidated by the tombstones.
108 //
109 uint64_t internal_delete_skipped_count;
110 // How many times iterators skipped over internal keys that are more recent
111 // than the snapshot that iterator is using.
112 //
113 uint64_t internal_recent_skipped_count;
114 // How many values were fed into merge operator by iterators.
115 //
116 uint64_t internal_merge_count;
117
494da23a
TL
118 uint64_t get_snapshot_time; // total nanos spent on getting snapshot
119 uint64_t get_from_memtable_time; // total nanos spent on querying memtables
120 uint64_t get_from_memtable_count; // number of mem tables queried
7c673cae
FG
121 // total nanos spent after Get() finds a key
122 uint64_t get_post_process_time;
123 uint64_t get_from_output_files_time; // total nanos reading from output files
124 // total nanos spent on seeking memtable
125 uint64_t seek_on_memtable_time;
126 // number of seeks issued on memtable
127 // (including SeekForPrev but not SeekToFirst and SeekToLast)
128 uint64_t seek_on_memtable_count;
129 // number of Next()s issued on memtable
130 uint64_t next_on_memtable_count;
131 // number of Prev()s issued on memtable
132 uint64_t prev_on_memtable_count;
133 // total nanos spent on seeking child iters
134 uint64_t seek_child_seek_time;
135 // number of seek issued in child iterators
136 uint64_t seek_child_seek_count;
137 uint64_t seek_min_heap_time; // total nanos spent on the merge min heap
138 uint64_t seek_max_heap_time; // total nanos spent on the merge max heap
139 // total nanos spent on seeking the internal entries
140 uint64_t seek_internal_seek_time;
141 // total nanos spent on iterating internal entries to find the next user entry
142 uint64_t find_next_user_entry_time;
143
11fdf7f2
TL
144 // This group of stats provide a breakdown of time spent by Write().
145 // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write
146 // are enabled.
147 //
7c673cae
FG
148 // total nanos spent on writing to WAL
149 uint64_t write_wal_time;
150 // total nanos spent on writing to mem tables
151 uint64_t write_memtable_time;
11fdf7f2 152 // total nanos spent on delaying or throttling write
7c673cae 153 uint64_t write_delay_time;
11fdf7f2
TL
154 // total nanos spent on switching memtable/wal and scheduling
155 // flushes/compactions.
156 uint64_t write_scheduling_flushes_compactions_time;
157 // total nanos spent on writing a record, excluding the above four things
7c673cae
FG
158 uint64_t write_pre_and_post_process_time;
159
11fdf7f2
TL
160 // time spent waiting for other threads of the batch group
161 uint64_t write_thread_wait_nanos;
162
163 // time spent on acquiring DB mutex.
164 uint64_t db_mutex_lock_nanos;
7c673cae
FG
165 // Time spent on waiting with a condition variable created with DB mutex.
166 uint64_t db_condition_wait_nanos;
167 // Time spent on merge operator.
168 uint64_t merge_operator_time_nanos;
169
170 // Time spent on reading index block from block cache or SST file
171 uint64_t read_index_block_nanos;
172 // Time spent on reading filter block from block cache or SST file
173 uint64_t read_filter_block_nanos;
174 // Time spent on creating data block iterator
175 uint64_t new_table_block_iter_nanos;
176 // Time spent on creating a iterator of an SST file.
177 uint64_t new_table_iterator_nanos;
178 // Time spent on seeking a key in data/index blocks
179 uint64_t block_seek_nanos;
180 // Time spent on finding or creating a table reader
181 uint64_t find_table_nanos;
182 // total number of mem table bloom hits
183 uint64_t bloom_memtable_hit_count;
184 // total number of mem table bloom misses
185 uint64_t bloom_memtable_miss_count;
186 // total number of SST table bloom hits
187 uint64_t bloom_sst_hit_count;
188 // total number of SST table bloom misses
189 uint64_t bloom_sst_miss_count;
190
11fdf7f2
TL
191 // Time spent waiting on key locks in transaction lock manager.
192 uint64_t key_lock_wait_time;
193 // number of times acquiring a lock was blocked by another transaction.
194 uint64_t key_lock_wait_count;
195
7c673cae
FG
196 // Total time spent in Env filesystem operations. These are only populated
197 // when TimedEnv is used.
198 uint64_t env_new_sequential_file_nanos;
199 uint64_t env_new_random_access_file_nanos;
200 uint64_t env_new_writable_file_nanos;
201 uint64_t env_reuse_writable_file_nanos;
202 uint64_t env_new_random_rw_file_nanos;
203 uint64_t env_new_directory_nanos;
204 uint64_t env_file_exists_nanos;
205 uint64_t env_get_children_nanos;
206 uint64_t env_get_children_file_attributes_nanos;
207 uint64_t env_delete_file_nanos;
208 uint64_t env_create_dir_nanos;
209 uint64_t env_create_dir_if_missing_nanos;
210 uint64_t env_delete_dir_nanos;
211 uint64_t env_get_file_size_nanos;
212 uint64_t env_get_file_modification_time_nanos;
213 uint64_t env_rename_file_nanos;
214 uint64_t env_link_file_nanos;
215 uint64_t env_lock_file_nanos;
216 uint64_t env_unlock_file_nanos;
217 uint64_t env_new_logger_nanos;
494da23a
TL
218
219 uint64_t get_cpu_nanos;
220 uint64_t iter_next_cpu_nanos;
221 uint64_t iter_prev_cpu_nanos;
222 uint64_t iter_seek_cpu_nanos;
223
20effc67
TL
224 // Time spent in encrypting data. Populated when EncryptedEnv is used.
225 uint64_t encrypt_data_nanos;
226 // Time spent in decrypting data. Populated when EncryptedEnv is used.
227 uint64_t decrypt_data_nanos;
228
494da23a
TL
229 std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
230 bool per_level_perf_context_enabled = false;
7c673cae
FG
231};
232
11fdf7f2
TL
233// Get Thread-local PerfContext object pointer
234// if defined(NPERF_CONTEXT), then the pointer is not thread-local
235PerfContext* get_perf_context();
7c673cae 236
f67539c2 237} // namespace ROCKSDB_NAMESPACE