[ceph.git] / ceph / src / rocksdb / include / rocksdb / perf_context.h

// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
//  This source code is licensed under both the GPLv2 (found in the
//  COPYING file in the root directory) and Apache 2.0 License
//  (found in the LICENSE.Apache file in the root directory).

#pragma once

#include <stdint.h>
#include <map>
#include <string>

#include "rocksdb/perf_level.h"

namespace ROCKSDB_NAMESPACE {

// A thread local context for gathering performance counter efficiently
// and transparently.
// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.

// Break down performance counters by level and store per-level perf context in
// PerfContextByLevel
struct PerfContextByLevel {
  // # of times bloom filter has avoided file reads, i.e., negatives.
  uint64_t bloom_filter_useful = 0;
  // # of times bloom FullFilter has not avoided the reads.
  uint64_t bloom_filter_full_positive = 0;
  // # of times bloom FullFilter has not avoided the reads and data actually
  // exist.
  uint64_t bloom_filter_full_true_positive = 0;

  // total number of user key returned (only include keys that are found, does
  // not include keys that are deleted or merged without a final put
  uint64_t user_key_return_count = 0;

  // total nanos spent on reading data from SST files
  uint64_t get_from_table_nanos = 0;

  uint64_t block_cache_hit_count = 0;   // total number of block cache hits
  uint64_t block_cache_miss_count = 0;  // total number of block cache misses

  void Reset();  // reset all performance counters to zero
};

struct PerfContext {
  ~PerfContext();

  PerfContext() {}

  PerfContext(const PerfContext&);
  PerfContext& operator=(const PerfContext&);
  PerfContext(PerfContext&&) noexcept;

  void Reset();  // reset all performance counters to zero

  std::string ToString(bool exclude_zero_counters = false) const;

  // enable per level perf context and allocate storage for PerfContextByLevel
  void EnablePerLevelPerfContext();

  // temporarily disable per level perf contxt by setting the flag to false
  void DisablePerLevelPerfContext();

  // free the space for PerfContextByLevel, also disable per level perf context
  void ClearPerLevelPerfContext();

  uint64_t user_key_comparison_count;  // total number of user key comparisons
  uint64_t block_cache_hit_count;      // total number of block cache hits
  uint64_t block_read_count;           // total number of block reads (with IO)
  uint64_t block_read_byte;            // total number of bytes from block reads
  uint64_t block_read_time;            // total nanos spent on block reads
  uint64_t block_cache_index_hit_count;   // total number of index block hits
  uint64_t index_block_read_count;        // total number of index block reads
  uint64_t block_cache_filter_hit_count;  // total number of filter block hits
  uint64_t filter_block_read_count;       // total number of filter block reads
  uint64_t compression_dict_block_read_count;  // total number of compression
                                               // dictionary block reads
  uint64_t block_checksum_time;    // total nanos spent on block checksum
  uint64_t block_decompress_time;  // total nanos spent on block decompression

  uint64_t get_read_bytes;       // bytes for vals returned by Get
  uint64_t multiget_read_bytes;  // bytes for vals returned by MultiGet
  uint64_t iter_read_bytes;      // bytes for keys/vals decoded by iterator

  // total number of internal keys skipped over during iteration.
  // There are several reasons for it:
  // 1. when calling Next(), the iterator is in the position of the previous
  //    key, so that we'll need to skip it. It means this counter will always
  //    be incremented in Next().
  // 2. when calling Next(), we need to skip internal entries for the previous
  //    keys that are overwritten.
  // 3. when calling Next(), Seek() or SeekToFirst(), after previous key
  //    before calling Next(), the seek key in Seek() or the beginning for
  //    SeekToFirst(), there may be one or more deleted keys before the next
  //    valid key that the operation should place the iterator to. We need
  //    to skip both of the tombstone and updates hidden by the tombstones. The
  //    tombstones are not included in this counter, while previous updates
  //    hidden by the tombstones will be included here.
  // 4. symmetric cases for Prev() and SeekToLast()
  // internal_recent_skipped_count is not included in this counter.
  //
  uint64_t internal_key_skipped_count;
  // Total number of deletes and single deletes skipped over during iteration
  // When calling Next(), Seek() or SeekToFirst(), after previous position
  // before calling Next(), the seek key in Seek() or the beginning for
  // SeekToFirst(), there may be one or more deleted keys before the next valid
  // key. Every deleted key is counted once. We don't recount here if there are
  // still older updates invalidated by the tombstones.
  //
  uint64_t internal_delete_skipped_count;
  // How many times iterators skipped over internal keys that are more recent
  // than the snapshot that iterator is using.
  //
  uint64_t internal_recent_skipped_count;
  // How many values were fed into merge operator by iterators.
  //
  uint64_t internal_merge_count;

  uint64_t get_snapshot_time;        // total nanos spent on getting snapshot
  uint64_t get_from_memtable_time;   // total nanos spent on querying memtables
  uint64_t get_from_memtable_count;  // number of mem tables queried
  // total nanos spent after Get() finds a key
  uint64_t get_post_process_time;
  uint64_t get_from_output_files_time;  // total nanos reading from output files
  // total nanos spent on seeking memtable
  uint64_t seek_on_memtable_time;
  // number of seeks issued on memtable
  // (including SeekForPrev but not SeekToFirst and SeekToLast)
  uint64_t seek_on_memtable_count;
  // number of Next()s issued on memtable
  uint64_t next_on_memtable_count;
  // number of Prev()s issued on memtable
  uint64_t prev_on_memtable_count;
  // total nanos spent on seeking child iters
  uint64_t seek_child_seek_time;
  // number of seek issued in child iterators
  uint64_t seek_child_seek_count;
  uint64_t seek_min_heap_time;  // total nanos spent on the merge min heap
  uint64_t seek_max_heap_time;  // total nanos spent on the merge max heap
  // total nanos spent on seeking the internal entries
  uint64_t seek_internal_seek_time;
  // total nanos spent on iterating internal entries to find the next user entry
  uint64_t find_next_user_entry_time;

  // This group of stats provide a breakdown of time spent by Write().
  // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write
  // are enabled.
  //
  // total nanos spent on writing to WAL
  uint64_t write_wal_time;
  // total nanos spent on writing to mem tables
  uint64_t write_memtable_time;
  // total nanos spent on delaying or throttling write
  uint64_t write_delay_time;
  // total nanos spent on switching memtable/wal and scheduling
  // flushes/compactions.
  uint64_t write_scheduling_flushes_compactions_time;
  // total nanos spent on writing a record, excluding the above four things
  uint64_t write_pre_and_post_process_time;

  // time spent waiting for other threads of the batch group
  uint64_t write_thread_wait_nanos;

  // time spent on acquiring DB mutex.
  uint64_t db_mutex_lock_nanos;
  // Time spent on waiting with a condition variable created with DB mutex.
  uint64_t db_condition_wait_nanos;
  // Time spent on merge operator.
  uint64_t merge_operator_time_nanos;

  // Time spent on reading index block from block cache or SST file
  uint64_t read_index_block_nanos;
  // Time spent on reading filter block from block cache or SST file
  uint64_t read_filter_block_nanos;
  // Time spent on creating data block iterator
  uint64_t new_table_block_iter_nanos;
  // Time spent on creating a iterator of an SST file.
  uint64_t new_table_iterator_nanos;
  // Time spent on seeking a key in data/index blocks
  uint64_t block_seek_nanos;
  // Time spent on finding or creating a table reader
  uint64_t find_table_nanos;
  // total number of mem table bloom hits
  uint64_t bloom_memtable_hit_count;
  // total number of mem table bloom misses
  uint64_t bloom_memtable_miss_count;
  // total number of SST table bloom hits
  uint64_t bloom_sst_hit_count;
  // total number of SST table bloom misses
  uint64_t bloom_sst_miss_count;

  // Time spent waiting on key locks in transaction lock manager.
  uint64_t key_lock_wait_time;
  // number of times acquiring a lock was blocked by another transaction.
  uint64_t key_lock_wait_count;

  // Total time spent in Env filesystem operations. These are only populated
  // when TimedEnv is used.
  uint64_t env_new_sequential_file_nanos;
  uint64_t env_new_random_access_file_nanos;
  uint64_t env_new_writable_file_nanos;
  uint64_t env_reuse_writable_file_nanos;
  uint64_t env_new_random_rw_file_nanos;
  uint64_t env_new_directory_nanos;
  uint64_t env_file_exists_nanos;
  uint64_t env_get_children_nanos;
  uint64_t env_get_children_file_attributes_nanos;
  uint64_t env_delete_file_nanos;
  uint64_t env_create_dir_nanos;
  uint64_t env_create_dir_if_missing_nanos;
  uint64_t env_delete_dir_nanos;
  uint64_t env_get_file_size_nanos;
  uint64_t env_get_file_modification_time_nanos;
  uint64_t env_rename_file_nanos;
  uint64_t env_link_file_nanos;
  uint64_t env_lock_file_nanos;
  uint64_t env_unlock_file_nanos;
  uint64_t env_new_logger_nanos;

  uint64_t get_cpu_nanos;
  uint64_t iter_next_cpu_nanos;
  uint64_t iter_prev_cpu_nanos;
  uint64_t iter_seek_cpu_nanos;

  // Time spent in encrypting data. Populated when EncryptedEnv is used.
  uint64_t encrypt_data_nanos;
  // Time spent in decrypting data. Populated when EncryptedEnv is used.
  uint64_t decrypt_data_nanos;

  std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
  bool per_level_perf_context_enabled = false;
};

// Get Thread-local PerfContext object pointer
// if defined(NPERF_CONTEXT), then the pointer is not thread-local
PerfContext* get_perf_context();

}  // namespace ROCKSDB_NAMESPACE
Commit	Line	Data
	1	// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
	2	// This source code is licensed under both the GPLv2 (found in the
	3	// COPYING file in the root directory) and Apache 2.0 License
	4	// (found in the LICENSE.Apache file in the root directory).
	5
	6	#pragma once
	7
	8	#include <stdint.h>
	9	#include <map>
	10	#include <string>
	11
	12	#include "rocksdb/perf_level.h"
	13
	14	namespace ROCKSDB_NAMESPACE {
	15
	16	// A thread local context for gathering performance counter efficiently
	17	// and transparently.
	18	// Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats.
	19
	20	// Break down performance counters by level and store per-level perf context in
	21	// PerfContextByLevel
	22	struct PerfContextByLevel {
	23	// # of times bloom filter has avoided file reads, i.e., negatives.
	24	uint64_t bloom_filter_useful = 0;
	25	// # of times bloom FullFilter has not avoided the reads.
	26	uint64_t bloom_filter_full_positive = 0;
	27	// # of times bloom FullFilter has not avoided the reads and data actually
	28	// exist.
	29	uint64_t bloom_filter_full_true_positive = 0;
	30
	31	// total number of user key returned (only include keys that are found, does
	32	// not include keys that are deleted or merged without a final put
	33	uint64_t user_key_return_count = 0;
	34
	35	// total nanos spent on reading data from SST files
	36	uint64_t get_from_table_nanos = 0;
	37
	38	uint64_t block_cache_hit_count = 0; // total number of block cache hits
	39	uint64_t block_cache_miss_count = 0; // total number of block cache misses
	40
	41	void Reset(); // reset all performance counters to zero
	42	};
	43
	44	struct PerfContext {
	45	~PerfContext();
	46
	47	PerfContext() {}
	48
	49	PerfContext(const PerfContext&);
	50	PerfContext& operator=(const PerfContext&);
	51	PerfContext(PerfContext&&) noexcept;
	52
	53	void Reset(); // reset all performance counters to zero
	54
	55	std::string ToString(bool exclude_zero_counters = false) const;
	56
	57	// enable per level perf context and allocate storage for PerfContextByLevel
	58	void EnablePerLevelPerfContext();
	59
	60	// temporarily disable per level perf contxt by setting the flag to false
	61	void DisablePerLevelPerfContext();
	62
	63	// free the space for PerfContextByLevel, also disable per level perf context
	64	void ClearPerLevelPerfContext();
	65
	66	uint64_t user_key_comparison_count; // total number of user key comparisons
	67	uint64_t block_cache_hit_count; // total number of block cache hits
	68	uint64_t block_read_count; // total number of block reads (with IO)
	69	uint64_t block_read_byte; // total number of bytes from block reads
	70	uint64_t block_read_time; // total nanos spent on block reads
	71	uint64_t block_cache_index_hit_count; // total number of index block hits
	72	uint64_t index_block_read_count; // total number of index block reads
	73	uint64_t block_cache_filter_hit_count; // total number of filter block hits
	74	uint64_t filter_block_read_count; // total number of filter block reads
	75	uint64_t compression_dict_block_read_count; // total number of compression
	76	// dictionary block reads
	77	uint64_t block_checksum_time; // total nanos spent on block checksum
	78	uint64_t block_decompress_time; // total nanos spent on block decompression
	79
	80	uint64_t get_read_bytes; // bytes for vals returned by Get
	81	uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet
	82	uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator
	83
	84	// total number of internal keys skipped over during iteration.
	85	// There are several reasons for it:
	86	// 1. when calling Next(), the iterator is in the position of the previous
	87	// key, so that we'll need to skip it. It means this counter will always
	88	// be incremented in Next().
	89	// 2. when calling Next(), we need to skip internal entries for the previous
	90	// keys that are overwritten.
	91	// 3. when calling Next(), Seek() or SeekToFirst(), after previous key
	92	// before calling Next(), the seek key in Seek() or the beginning for
	93	// SeekToFirst(), there may be one or more deleted keys before the next
	94	// valid key that the operation should place the iterator to. We need
	95	// to skip both of the tombstone and updates hidden by the tombstones. The
	96	// tombstones are not included in this counter, while previous updates
	97	// hidden by the tombstones will be included here.
	98	// 4. symmetric cases for Prev() and SeekToLast()
	99	// internal_recent_skipped_count is not included in this counter.
	100	//
	101	uint64_t internal_key_skipped_count;
	102	// Total number of deletes and single deletes skipped over during iteration
	103	// When calling Next(), Seek() or SeekToFirst(), after previous position
	104	// before calling Next(), the seek key in Seek() or the beginning for
	105	// SeekToFirst(), there may be one or more deleted keys before the next valid
	106	// key. Every deleted key is counted once. We don't recount here if there are
	107	// still older updates invalidated by the tombstones.
	108	//
	109	uint64_t internal_delete_skipped_count;
	110	// How many times iterators skipped over internal keys that are more recent
	111	// than the snapshot that iterator is using.
	112	//
	113	uint64_t internal_recent_skipped_count;
	114	// How many values were fed into merge operator by iterators.
	115	//
	116	uint64_t internal_merge_count;
	117
	118	uint64_t get_snapshot_time; // total nanos spent on getting snapshot
	119	uint64_t get_from_memtable_time; // total nanos spent on querying memtables
	120	uint64_t get_from_memtable_count; // number of mem tables queried
	121	// total nanos spent after Get() finds a key
	122	uint64_t get_post_process_time;
	123	uint64_t get_from_output_files_time; // total nanos reading from output files
	124	// total nanos spent on seeking memtable
	125	uint64_t seek_on_memtable_time;
	126	// number of seeks issued on memtable
	127	// (including SeekForPrev but not SeekToFirst and SeekToLast)
	128	uint64_t seek_on_memtable_count;
	129	// number of Next()s issued on memtable
	130	uint64_t next_on_memtable_count;
	131	// number of Prev()s issued on memtable
	132	uint64_t prev_on_memtable_count;
	133	// total nanos spent on seeking child iters
	134	uint64_t seek_child_seek_time;
	135	// number of seek issued in child iterators
	136	uint64_t seek_child_seek_count;
	137	uint64_t seek_min_heap_time; // total nanos spent on the merge min heap
	138	uint64_t seek_max_heap_time; // total nanos spent on the merge max heap
	139	// total nanos spent on seeking the internal entries
	140	uint64_t seek_internal_seek_time;
	141	// total nanos spent on iterating internal entries to find the next user entry
	142	uint64_t find_next_user_entry_time;
	143
	144	// This group of stats provide a breakdown of time spent by Write().
	145	// May be inaccurate when 2PC, two_write_queues or enable_pipelined_write
	146	// are enabled.
	147	//
	148	// total nanos spent on writing to WAL
	149	uint64_t write_wal_time;
	150	// total nanos spent on writing to mem tables
	151	uint64_t write_memtable_time;
	152	// total nanos spent on delaying or throttling write
	153	uint64_t write_delay_time;
	154	// total nanos spent on switching memtable/wal and scheduling
	155	// flushes/compactions.
	156	uint64_t write_scheduling_flushes_compactions_time;
	157	// total nanos spent on writing a record, excluding the above four things
	158	uint64_t write_pre_and_post_process_time;
	159
	160	// time spent waiting for other threads of the batch group
	161	uint64_t write_thread_wait_nanos;
	162
	163	// time spent on acquiring DB mutex.
	164	uint64_t db_mutex_lock_nanos;
	165	// Time spent on waiting with a condition variable created with DB mutex.
	166	uint64_t db_condition_wait_nanos;
	167	// Time spent on merge operator.
	168	uint64_t merge_operator_time_nanos;
	169
	170	// Time spent on reading index block from block cache or SST file
	171	uint64_t read_index_block_nanos;
	172	// Time spent on reading filter block from block cache or SST file
	173	uint64_t read_filter_block_nanos;
	174	// Time spent on creating data block iterator
	175	uint64_t new_table_block_iter_nanos;
	176	// Time spent on creating a iterator of an SST file.
	177	uint64_t new_table_iterator_nanos;
	178	// Time spent on seeking a key in data/index blocks
	179	uint64_t block_seek_nanos;
	180	// Time spent on finding or creating a table reader
	181	uint64_t find_table_nanos;
	182	// total number of mem table bloom hits
	183	uint64_t bloom_memtable_hit_count;
	184	// total number of mem table bloom misses
	185	uint64_t bloom_memtable_miss_count;
	186	// total number of SST table bloom hits
	187	uint64_t bloom_sst_hit_count;
	188	// total number of SST table bloom misses
	189	uint64_t bloom_sst_miss_count;
	190
	191	// Time spent waiting on key locks in transaction lock manager.
	192	uint64_t key_lock_wait_time;
	193	// number of times acquiring a lock was blocked by another transaction.
	194	uint64_t key_lock_wait_count;
	195
	196	// Total time spent in Env filesystem operations. These are only populated
	197	// when TimedEnv is used.
	198	uint64_t env_new_sequential_file_nanos;
	199	uint64_t env_new_random_access_file_nanos;
	200	uint64_t env_new_writable_file_nanos;
	201	uint64_t env_reuse_writable_file_nanos;
	202	uint64_t env_new_random_rw_file_nanos;
	203	uint64_t env_new_directory_nanos;
	204	uint64_t env_file_exists_nanos;
	205	uint64_t env_get_children_nanos;
	206	uint64_t env_get_children_file_attributes_nanos;
	207	uint64_t env_delete_file_nanos;
	208	uint64_t env_create_dir_nanos;
	209	uint64_t env_create_dir_if_missing_nanos;
	210	uint64_t env_delete_dir_nanos;
	211	uint64_t env_get_file_size_nanos;
	212	uint64_t env_get_file_modification_time_nanos;
	213	uint64_t env_rename_file_nanos;
	214	uint64_t env_link_file_nanos;
	215	uint64_t env_lock_file_nanos;
	216	uint64_t env_unlock_file_nanos;
	217	uint64_t env_new_logger_nanos;
	218
	219	uint64_t get_cpu_nanos;
	220	uint64_t iter_next_cpu_nanos;
	221	uint64_t iter_prev_cpu_nanos;
	222	uint64_t iter_seek_cpu_nanos;
	223
	224	// Time spent in encrypting data. Populated when EncryptedEnv is used.
	225	uint64_t encrypt_data_nanos;
	226	// Time spent in decrypting data. Populated when EncryptedEnv is used.
	227	uint64_t decrypt_data_nanos;
	228
	229	std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
	230	bool per_level_perf_context_enabled = false;
	231	};
	232
	233	// Get Thread-local PerfContext object pointer
	234	// if defined(NPERF_CONTEXT), then the pointer is not thread-local
	235	PerfContext* get_perf_context();
	236
	237	} // namespace ROCKSDB_NAMESPACE