]>
Commit | Line | Data |
---|---|---|
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. | |
2 | // This source code is licensed under both the GPLv2 (found in the | |
3 | // COPYING file in the root directory) and Apache 2.0 License | |
4 | // (found in the LICENSE.Apache file in the root directory). | |
5 | ||
6 | #pragma once | |
7 | ||
8 | #include <stdint.h> | |
9 | #include <map> | |
10 | #include <string> | |
11 | ||
12 | #include "rocksdb/perf_level.h" | |
13 | ||
14 | namespace ROCKSDB_NAMESPACE { | |
15 | ||
16 | // A thread local context for gathering performance counter efficiently | |
17 | // and transparently. | |
18 | // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. | |
19 | ||
20 | // Break down performance counters by level and store per-level perf context in | |
21 | // PerfContextByLevel | |
22 | struct PerfContextByLevel { | |
23 | // # of times bloom filter has avoided file reads, i.e., negatives. | |
24 | uint64_t bloom_filter_useful = 0; | |
25 | // # of times bloom FullFilter has not avoided the reads. | |
26 | uint64_t bloom_filter_full_positive = 0; | |
27 | // # of times bloom FullFilter has not avoided the reads and data actually | |
28 | // exist. | |
29 | uint64_t bloom_filter_full_true_positive = 0; | |
30 | ||
31 | // total number of user key returned (only include keys that are found, does | |
32 | // not include keys that are deleted or merged without a final put | |
33 | uint64_t user_key_return_count = 0; | |
34 | ||
35 | // total nanos spent on reading data from SST files | |
36 | uint64_t get_from_table_nanos = 0; | |
37 | ||
38 | uint64_t block_cache_hit_count = 0; // total number of block cache hits | |
39 | uint64_t block_cache_miss_count = 0; // total number of block cache misses | |
40 | ||
41 | void Reset(); // reset all performance counters to zero | |
42 | }; | |
43 | ||
44 | struct PerfContext { | |
45 | ~PerfContext(); | |
46 | ||
47 | PerfContext() {} | |
48 | ||
49 | PerfContext(const PerfContext&); | |
50 | PerfContext& operator=(const PerfContext&); | |
51 | PerfContext(PerfContext&&) noexcept; | |
52 | ||
53 | void Reset(); // reset all performance counters to zero | |
54 | ||
55 | std::string ToString(bool exclude_zero_counters = false) const; | |
56 | ||
57 | // enable per level perf context and allocate storage for PerfContextByLevel | |
58 | void EnablePerLevelPerfContext(); | |
59 | ||
60 | // temporarily disable per level perf contxt by setting the flag to false | |
61 | void DisablePerLevelPerfContext(); | |
62 | ||
63 | // free the space for PerfContextByLevel, also disable per level perf context | |
64 | void ClearPerLevelPerfContext(); | |
65 | ||
66 | uint64_t user_key_comparison_count; // total number of user key comparisons | |
67 | uint64_t block_cache_hit_count; // total number of block cache hits | |
68 | uint64_t block_read_count; // total number of block reads (with IO) | |
69 | uint64_t block_read_byte; // total number of bytes from block reads | |
70 | uint64_t block_read_time; // total nanos spent on block reads | |
71 | uint64_t block_cache_index_hit_count; // total number of index block hits | |
72 | uint64_t index_block_read_count; // total number of index block reads | |
73 | uint64_t block_cache_filter_hit_count; // total number of filter block hits | |
74 | uint64_t filter_block_read_count; // total number of filter block reads | |
75 | uint64_t compression_dict_block_read_count; // total number of compression | |
76 | // dictionary block reads | |
77 | uint64_t block_checksum_time; // total nanos spent on block checksum | |
78 | uint64_t block_decompress_time; // total nanos spent on block decompression | |
79 | ||
80 | uint64_t get_read_bytes; // bytes for vals returned by Get | |
81 | uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet | |
82 | uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator | |
83 | ||
84 | // total number of internal keys skipped over during iteration. | |
85 | // There are several reasons for it: | |
86 | // 1. when calling Next(), the iterator is in the position of the previous | |
87 | // key, so that we'll need to skip it. It means this counter will always | |
88 | // be incremented in Next(). | |
89 | // 2. when calling Next(), we need to skip internal entries for the previous | |
90 | // keys that are overwritten. | |
91 | // 3. when calling Next(), Seek() or SeekToFirst(), after previous key | |
92 | // before calling Next(), the seek key in Seek() or the beginning for | |
93 | // SeekToFirst(), there may be one or more deleted keys before the next | |
94 | // valid key that the operation should place the iterator to. We need | |
95 | // to skip both of the tombstone and updates hidden by the tombstones. The | |
96 | // tombstones are not included in this counter, while previous updates | |
97 | // hidden by the tombstones will be included here. | |
98 | // 4. symmetric cases for Prev() and SeekToLast() | |
99 | // internal_recent_skipped_count is not included in this counter. | |
100 | // | |
101 | uint64_t internal_key_skipped_count; | |
102 | // Total number of deletes and single deletes skipped over during iteration | |
103 | // When calling Next(), Seek() or SeekToFirst(), after previous position | |
104 | // before calling Next(), the seek key in Seek() or the beginning for | |
105 | // SeekToFirst(), there may be one or more deleted keys before the next valid | |
106 | // key. Every deleted key is counted once. We don't recount here if there are | |
107 | // still older updates invalidated by the tombstones. | |
108 | // | |
109 | uint64_t internal_delete_skipped_count; | |
110 | // How many times iterators skipped over internal keys that are more recent | |
111 | // than the snapshot that iterator is using. | |
112 | // | |
113 | uint64_t internal_recent_skipped_count; | |
114 | // How many values were fed into merge operator by iterators. | |
115 | // | |
116 | uint64_t internal_merge_count; | |
117 | ||
118 | uint64_t get_snapshot_time; // total nanos spent on getting snapshot | |
119 | uint64_t get_from_memtable_time; // total nanos spent on querying memtables | |
120 | uint64_t get_from_memtable_count; // number of mem tables queried | |
121 | // total nanos spent after Get() finds a key | |
122 | uint64_t get_post_process_time; | |
123 | uint64_t get_from_output_files_time; // total nanos reading from output files | |
124 | // total nanos spent on seeking memtable | |
125 | uint64_t seek_on_memtable_time; | |
126 | // number of seeks issued on memtable | |
127 | // (including SeekForPrev but not SeekToFirst and SeekToLast) | |
128 | uint64_t seek_on_memtable_count; | |
129 | // number of Next()s issued on memtable | |
130 | uint64_t next_on_memtable_count; | |
131 | // number of Prev()s issued on memtable | |
132 | uint64_t prev_on_memtable_count; | |
133 | // total nanos spent on seeking child iters | |
134 | uint64_t seek_child_seek_time; | |
135 | // number of seek issued in child iterators | |
136 | uint64_t seek_child_seek_count; | |
137 | uint64_t seek_min_heap_time; // total nanos spent on the merge min heap | |
138 | uint64_t seek_max_heap_time; // total nanos spent on the merge max heap | |
139 | // total nanos spent on seeking the internal entries | |
140 | uint64_t seek_internal_seek_time; | |
141 | // total nanos spent on iterating internal entries to find the next user entry | |
142 | uint64_t find_next_user_entry_time; | |
143 | ||
144 | // This group of stats provide a breakdown of time spent by Write(). | |
145 | // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write | |
146 | // are enabled. | |
147 | // | |
148 | // total nanos spent on writing to WAL | |
149 | uint64_t write_wal_time; | |
150 | // total nanos spent on writing to mem tables | |
151 | uint64_t write_memtable_time; | |
152 | // total nanos spent on delaying or throttling write | |
153 | uint64_t write_delay_time; | |
154 | // total nanos spent on switching memtable/wal and scheduling | |
155 | // flushes/compactions. | |
156 | uint64_t write_scheduling_flushes_compactions_time; | |
157 | // total nanos spent on writing a record, excluding the above four things | |
158 | uint64_t write_pre_and_post_process_time; | |
159 | ||
160 | // time spent waiting for other threads of the batch group | |
161 | uint64_t write_thread_wait_nanos; | |
162 | ||
163 | // time spent on acquiring DB mutex. | |
164 | uint64_t db_mutex_lock_nanos; | |
165 | // Time spent on waiting with a condition variable created with DB mutex. | |
166 | uint64_t db_condition_wait_nanos; | |
167 | // Time spent on merge operator. | |
168 | uint64_t merge_operator_time_nanos; | |
169 | ||
170 | // Time spent on reading index block from block cache or SST file | |
171 | uint64_t read_index_block_nanos; | |
172 | // Time spent on reading filter block from block cache or SST file | |
173 | uint64_t read_filter_block_nanos; | |
174 | // Time spent on creating data block iterator | |
175 | uint64_t new_table_block_iter_nanos; | |
176 | // Time spent on creating a iterator of an SST file. | |
177 | uint64_t new_table_iterator_nanos; | |
178 | // Time spent on seeking a key in data/index blocks | |
179 | uint64_t block_seek_nanos; | |
180 | // Time spent on finding or creating a table reader | |
181 | uint64_t find_table_nanos; | |
182 | // total number of mem table bloom hits | |
183 | uint64_t bloom_memtable_hit_count; | |
184 | // total number of mem table bloom misses | |
185 | uint64_t bloom_memtable_miss_count; | |
186 | // total number of SST table bloom hits | |
187 | uint64_t bloom_sst_hit_count; | |
188 | // total number of SST table bloom misses | |
189 | uint64_t bloom_sst_miss_count; | |
190 | ||
191 | // Time spent waiting on key locks in transaction lock manager. | |
192 | uint64_t key_lock_wait_time; | |
193 | // number of times acquiring a lock was blocked by another transaction. | |
194 | uint64_t key_lock_wait_count; | |
195 | ||
196 | // Total time spent in Env filesystem operations. These are only populated | |
197 | // when TimedEnv is used. | |
198 | uint64_t env_new_sequential_file_nanos; | |
199 | uint64_t env_new_random_access_file_nanos; | |
200 | uint64_t env_new_writable_file_nanos; | |
201 | uint64_t env_reuse_writable_file_nanos; | |
202 | uint64_t env_new_random_rw_file_nanos; | |
203 | uint64_t env_new_directory_nanos; | |
204 | uint64_t env_file_exists_nanos; | |
205 | uint64_t env_get_children_nanos; | |
206 | uint64_t env_get_children_file_attributes_nanos; | |
207 | uint64_t env_delete_file_nanos; | |
208 | uint64_t env_create_dir_nanos; | |
209 | uint64_t env_create_dir_if_missing_nanos; | |
210 | uint64_t env_delete_dir_nanos; | |
211 | uint64_t env_get_file_size_nanos; | |
212 | uint64_t env_get_file_modification_time_nanos; | |
213 | uint64_t env_rename_file_nanos; | |
214 | uint64_t env_link_file_nanos; | |
215 | uint64_t env_lock_file_nanos; | |
216 | uint64_t env_unlock_file_nanos; | |
217 | uint64_t env_new_logger_nanos; | |
218 | ||
219 | uint64_t get_cpu_nanos; | |
220 | uint64_t iter_next_cpu_nanos; | |
221 | uint64_t iter_prev_cpu_nanos; | |
222 | uint64_t iter_seek_cpu_nanos; | |
223 | ||
224 | // Time spent in encrypting data. Populated when EncryptedEnv is used. | |
225 | uint64_t encrypt_data_nanos; | |
226 | // Time spent in decrypting data. Populated when EncryptedEnv is used. | |
227 | uint64_t decrypt_data_nanos; | |
228 | ||
229 | std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr; | |
230 | bool per_level_perf_context_enabled = false; | |
231 | }; | |
232 | ||
233 | // Get Thread-local PerfContext object pointer | |
234 | // if defined(NPERF_CONTEXT), then the pointer is not thread-local | |
235 | PerfContext* get_perf_context(); | |
236 | ||
237 | } // namespace ROCKSDB_NAMESPACE |