]>
Commit | Line | Data |
---|---|---|
7c673cae | 1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
11fdf7f2 TL |
2 | // This source code is licensed under both the GPLv2 (found in the |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
7c673cae | 5 | |
11fdf7f2 | 6 | #pragma once |
7c673cae FG |
7 | |
8 | #include <stdint.h> | |
1e59de90 | 9 | |
494da23a | 10 | #include <map> |
7c673cae FG |
11 | #include <string> |
12 | ||
13 | #include "rocksdb/perf_level.h" | |
14 | ||
f67539c2 | 15 | namespace ROCKSDB_NAMESPACE { |
7c673cae FG |
16 | |
17 | // A thread local context for gathering performance counter efficiently | |
18 | // and transparently. | |
19 | // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. | |
20 | ||
494da23a TL |
21 | // Break down performance counters by level and store per-level perf context in |
22 | // PerfContextByLevel | |
23 | struct PerfContextByLevel { | |
24 | // # of times bloom filter has avoided file reads, i.e., negatives. | |
25 | uint64_t bloom_filter_useful = 0; | |
26 | // # of times bloom FullFilter has not avoided the reads. | |
27 | uint64_t bloom_filter_full_positive = 0; | |
28 | // # of times bloom FullFilter has not avoided the reads and data actually | |
29 | // exist. | |
30 | uint64_t bloom_filter_full_true_positive = 0; | |
31 | ||
32 | // total number of user key returned (only include keys that are found, does | |
33 | // not include keys that are deleted or merged without a final put | |
20effc67 | 34 | uint64_t user_key_return_count = 0; |
494da23a TL |
35 | |
36 | // total nanos spent on reading data from SST files | |
20effc67 | 37 | uint64_t get_from_table_nanos = 0; |
494da23a TL |
38 | |
39 | uint64_t block_cache_hit_count = 0; // total number of block cache hits | |
40 | uint64_t block_cache_miss_count = 0; // total number of block cache misses | |
41 | ||
42 | void Reset(); // reset all performance counters to zero | |
43 | }; | |
44 | ||
7c673cae | 45 | struct PerfContext { |
494da23a TL |
46 | ~PerfContext(); |
47 | ||
48 | PerfContext() {} | |
49 | ||
50 | PerfContext(const PerfContext&); | |
51 | PerfContext& operator=(const PerfContext&); | |
52 | PerfContext(PerfContext&&) noexcept; | |
7c673cae | 53 | |
494da23a | 54 | void Reset(); // reset all performance counters to zero |
7c673cae FG |
55 | |
56 | std::string ToString(bool exclude_zero_counters = false) const; | |
57 | ||
494da23a TL |
58 | // enable per level perf context and allocate storage for PerfContextByLevel |
59 | void EnablePerLevelPerfContext(); | |
60 | ||
1e59de90 | 61 | // temporarily disable per level perf context by setting the flag to false |
494da23a TL |
62 | void DisablePerLevelPerfContext(); |
63 | ||
64 | // free the space for PerfContextByLevel, also disable per level perf context | |
65 | void ClearPerLevelPerfContext(); | |
66 | ||
67 | uint64_t user_key_comparison_count; // total number of user key comparisons | |
68 | uint64_t block_cache_hit_count; // total number of block cache hits | |
69 | uint64_t block_read_count; // total number of block reads (with IO) | |
70 | uint64_t block_read_byte; // total number of bytes from block reads | |
71 | uint64_t block_read_time; // total nanos spent on block reads | |
1e59de90 TL |
72 | uint64_t block_cache_index_hit_count; // total number of index block hits |
73 | // total number of standalone handles lookup from secondary cache | |
74 | uint64_t block_cache_standalone_handle_count; | |
75 | // total number of real handles lookup from secondary cache that are inserted | |
76 | // into primary cache | |
77 | uint64_t block_cache_real_handle_count; | |
494da23a TL |
78 | uint64_t index_block_read_count; // total number of index block reads |
79 | uint64_t block_cache_filter_hit_count; // total number of filter block hits | |
80 | uint64_t filter_block_read_count; // total number of filter block reads | |
81 | uint64_t compression_dict_block_read_count; // total number of compression | |
82 | // dictionary block reads | |
1e59de90 TL |
83 | |
84 | uint64_t secondary_cache_hit_count; // total number of secondary cache hits | |
85 | // total number of real handles inserted into secondary cache | |
86 | uint64_t compressed_sec_cache_insert_real_count; | |
87 | // total number of dummy handles inserted into secondary cache | |
88 | uint64_t compressed_sec_cache_insert_dummy_count; | |
89 | // bytes for vals before compression in secondary cache | |
90 | uint64_t compressed_sec_cache_uncompressed_bytes; | |
91 | // bytes for vals after compression in secondary cache | |
92 | uint64_t compressed_sec_cache_compressed_bytes; | |
93 | ||
494da23a | 94 | uint64_t block_checksum_time; // total nanos spent on block checksum |
7c673cae | 95 | uint64_t block_decompress_time; // total nanos spent on block decompression |
11fdf7f2 TL |
96 | |
97 | uint64_t get_read_bytes; // bytes for vals returned by Get | |
98 | uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet | |
99 | uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator | |
100 | ||
1e59de90 TL |
101 | uint64_t blob_cache_hit_count; // total number of blob cache hits |
102 | uint64_t blob_read_count; // total number of blob reads (with IO) | |
103 | uint64_t blob_read_byte; // total number of bytes from blob reads | |
104 | uint64_t blob_read_time; // total nanos spent on blob reads | |
105 | uint64_t blob_checksum_time; // total nanos spent on blob checksum | |
106 | uint64_t blob_decompress_time; // total nanos spent on blob decompression | |
107 | ||
7c673cae FG |
108 | // total number of internal keys skipped over during iteration. |
109 | // There are several reasons for it: | |
110 | // 1. when calling Next(), the iterator is in the position of the previous | |
111 | // key, so that we'll need to skip it. It means this counter will always | |
112 | // be incremented in Next(). | |
113 | // 2. when calling Next(), we need to skip internal entries for the previous | |
114 | // keys that are overwritten. | |
115 | // 3. when calling Next(), Seek() or SeekToFirst(), after previous key | |
116 | // before calling Next(), the seek key in Seek() or the beginning for | |
117 | // SeekToFirst(), there may be one or more deleted keys before the next | |
118 | // valid key that the operation should place the iterator to. We need | |
119 | // to skip both of the tombstone and updates hidden by the tombstones. The | |
120 | // tombstones are not included in this counter, while previous updates | |
121 | // hidden by the tombstones will be included here. | |
122 | // 4. symmetric cases for Prev() and SeekToLast() | |
123 | // internal_recent_skipped_count is not included in this counter. | |
124 | // | |
125 | uint64_t internal_key_skipped_count; | |
126 | // Total number of deletes and single deletes skipped over during iteration | |
127 | // When calling Next(), Seek() or SeekToFirst(), after previous position | |
128 | // before calling Next(), the seek key in Seek() or the beginning for | |
129 | // SeekToFirst(), there may be one or more deleted keys before the next valid | |
130 | // key. Every deleted key is counted once. We don't recount here if there are | |
131 | // still older updates invalidated by the tombstones. | |
132 | // | |
133 | uint64_t internal_delete_skipped_count; | |
134 | // How many times iterators skipped over internal keys that are more recent | |
135 | // than the snapshot that iterator is using. | |
136 | // | |
137 | uint64_t internal_recent_skipped_count; | |
138 | // How many values were fed into merge operator by iterators. | |
139 | // | |
140 | uint64_t internal_merge_count; | |
1e59de90 TL |
141 | // Number of times we reseeked inside a merging iterator, specifically to skip |
142 | // after or before a range of keys covered by a range deletion in a newer LSM | |
143 | // component. | |
144 | uint64_t internal_range_del_reseek_count; | |
7c673cae | 145 | |
494da23a TL |
146 | uint64_t get_snapshot_time; // total nanos spent on getting snapshot |
147 | uint64_t get_from_memtable_time; // total nanos spent on querying memtables | |
148 | uint64_t get_from_memtable_count; // number of mem tables queried | |
7c673cae FG |
149 | // total nanos spent after Get() finds a key |
150 | uint64_t get_post_process_time; | |
151 | uint64_t get_from_output_files_time; // total nanos reading from output files | |
152 | // total nanos spent on seeking memtable | |
153 | uint64_t seek_on_memtable_time; | |
154 | // number of seeks issued on memtable | |
155 | // (including SeekForPrev but not SeekToFirst and SeekToLast) | |
156 | uint64_t seek_on_memtable_count; | |
157 | // number of Next()s issued on memtable | |
158 | uint64_t next_on_memtable_count; | |
159 | // number of Prev()s issued on memtable | |
160 | uint64_t prev_on_memtable_count; | |
161 | // total nanos spent on seeking child iters | |
162 | uint64_t seek_child_seek_time; | |
163 | // number of seek issued in child iterators | |
164 | uint64_t seek_child_seek_count; | |
165 | uint64_t seek_min_heap_time; // total nanos spent on the merge min heap | |
166 | uint64_t seek_max_heap_time; // total nanos spent on the merge max heap | |
167 | // total nanos spent on seeking the internal entries | |
168 | uint64_t seek_internal_seek_time; | |
169 | // total nanos spent on iterating internal entries to find the next user entry | |
170 | uint64_t find_next_user_entry_time; | |
171 | ||
11fdf7f2 TL |
172 | // This group of stats provide a breakdown of time spent by Write(). |
173 | // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write | |
174 | // are enabled. | |
175 | // | |
7c673cae FG |
176 | // total nanos spent on writing to WAL |
177 | uint64_t write_wal_time; | |
178 | // total nanos spent on writing to mem tables | |
179 | uint64_t write_memtable_time; | |
11fdf7f2 | 180 | // total nanos spent on delaying or throttling write |
7c673cae | 181 | uint64_t write_delay_time; |
11fdf7f2 TL |
182 | // total nanos spent on switching memtable/wal and scheduling |
183 | // flushes/compactions. | |
184 | uint64_t write_scheduling_flushes_compactions_time; | |
185 | // total nanos spent on writing a record, excluding the above four things | |
7c673cae FG |
186 | uint64_t write_pre_and_post_process_time; |
187 | ||
11fdf7f2 TL |
188 | // time spent waiting for other threads of the batch group |
189 | uint64_t write_thread_wait_nanos; | |
190 | ||
191 | // time spent on acquiring DB mutex. | |
192 | uint64_t db_mutex_lock_nanos; | |
7c673cae FG |
193 | // Time spent on waiting with a condition variable created with DB mutex. |
194 | uint64_t db_condition_wait_nanos; | |
195 | // Time spent on merge operator. | |
196 | uint64_t merge_operator_time_nanos; | |
197 | ||
198 | // Time spent on reading index block from block cache or SST file | |
199 | uint64_t read_index_block_nanos; | |
200 | // Time spent on reading filter block from block cache or SST file | |
201 | uint64_t read_filter_block_nanos; | |
202 | // Time spent on creating data block iterator | |
203 | uint64_t new_table_block_iter_nanos; | |
204 | // Time spent on creating a iterator of an SST file. | |
205 | uint64_t new_table_iterator_nanos; | |
206 | // Time spent on seeking a key in data/index blocks | |
207 | uint64_t block_seek_nanos; | |
208 | // Time spent on finding or creating a table reader | |
209 | uint64_t find_table_nanos; | |
210 | // total number of mem table bloom hits | |
211 | uint64_t bloom_memtable_hit_count; | |
212 | // total number of mem table bloom misses | |
213 | uint64_t bloom_memtable_miss_count; | |
214 | // total number of SST table bloom hits | |
215 | uint64_t bloom_sst_hit_count; | |
216 | // total number of SST table bloom misses | |
217 | uint64_t bloom_sst_miss_count; | |
218 | ||
11fdf7f2 TL |
219 | // Time spent waiting on key locks in transaction lock manager. |
220 | uint64_t key_lock_wait_time; | |
221 | // number of times acquiring a lock was blocked by another transaction. | |
222 | uint64_t key_lock_wait_count; | |
223 | ||
7c673cae FG |
224 | // Total time spent in Env filesystem operations. These are only populated |
225 | // when TimedEnv is used. | |
226 | uint64_t env_new_sequential_file_nanos; | |
227 | uint64_t env_new_random_access_file_nanos; | |
228 | uint64_t env_new_writable_file_nanos; | |
229 | uint64_t env_reuse_writable_file_nanos; | |
230 | uint64_t env_new_random_rw_file_nanos; | |
231 | uint64_t env_new_directory_nanos; | |
232 | uint64_t env_file_exists_nanos; | |
233 | uint64_t env_get_children_nanos; | |
234 | uint64_t env_get_children_file_attributes_nanos; | |
235 | uint64_t env_delete_file_nanos; | |
236 | uint64_t env_create_dir_nanos; | |
237 | uint64_t env_create_dir_if_missing_nanos; | |
238 | uint64_t env_delete_dir_nanos; | |
239 | uint64_t env_get_file_size_nanos; | |
240 | uint64_t env_get_file_modification_time_nanos; | |
241 | uint64_t env_rename_file_nanos; | |
242 | uint64_t env_link_file_nanos; | |
243 | uint64_t env_lock_file_nanos; | |
244 | uint64_t env_unlock_file_nanos; | |
245 | uint64_t env_new_logger_nanos; | |
494da23a TL |
246 | |
247 | uint64_t get_cpu_nanos; | |
248 | uint64_t iter_next_cpu_nanos; | |
249 | uint64_t iter_prev_cpu_nanos; | |
250 | uint64_t iter_seek_cpu_nanos; | |
251 | ||
20effc67 TL |
252 | // Time spent in encrypting data. Populated when EncryptedEnv is used. |
253 | uint64_t encrypt_data_nanos; | |
254 | // Time spent in decrypting data. Populated when EncryptedEnv is used. | |
255 | uint64_t decrypt_data_nanos; | |
256 | ||
1e59de90 TL |
257 | uint64_t number_async_seek; |
258 | ||
494da23a TL |
259 | std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr; |
260 | bool per_level_perf_context_enabled = false; | |
7c673cae FG |
261 | }; |
262 | ||
1e59de90 TL |
263 | // If RocksDB is compiled with -DNPERF_CONTEXT, then a pointer to a global, |
264 | // non-thread-local PerfContext object will be returned. Attempts to update | |
265 | // this object will be ignored, and reading from it will also be no-op. | |
266 | // Otherwise, | |
267 | // a) if thread-local is supported on the platform, then a pointer to | |
268 | // a thread-local PerfContext object will be returned. | |
269 | // b) if thread-local is NOT supported, then compilation will fail. | |
270 | // | |
271 | // This function never returns nullptr. | |
11fdf7f2 | 272 | PerfContext* get_perf_context(); |
7c673cae | 273 | |
f67539c2 | 274 | } // namespace ROCKSDB_NAMESPACE |