]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/tools/trace_analyzer_tool.h
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / tools / trace_analyzer_tool.h
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #pragma once
7 #ifndef ROCKSDB_LITE
8
9 #include <list>
10 #include <map>
11 #include <queue>
12 #include <set>
13 #include <utility>
14 #include <vector>
15
16 #include "rocksdb/env.h"
17 #include "rocksdb/trace_reader_writer.h"
18 #include "rocksdb/trace_record.h"
19 #include "rocksdb/write_batch.h"
20 #include "trace_replay/trace_replay.h"
21
22 namespace ROCKSDB_NAMESPACE {
23
24 // Value sizes may be used as denominators. Replacing 0 value sizes with this
25 // positive integer avoids division error.
26 extern const size_t kShadowValueSize /* = 10*/;
27
28 enum TraceOperationType : int {
29 kGet = 0,
30 kPut = 1,
31 kDelete = 2,
32 kSingleDelete = 3,
33 kRangeDelete = 4,
34 kMerge = 5,
35 kIteratorSeek = 6,
36 kIteratorSeekForPrev = 7,
37 kMultiGet = 8,
38 kTaTypeNum = 9
39 };
40
41 struct TraceUnit {
42 uint64_t ts;
43 uint32_t type;
44 uint32_t cf_id;
45 size_t value_size;
46 std::string key;
47 };
48
49 struct TypeCorrelation {
50 uint64_t count;
51 uint64_t total_ts;
52 };
53
54 struct StatsUnit {
55 uint64_t key_id;
56 uint64_t access_count;
57 uint64_t latest_ts;
58 uint64_t succ_count; // current only used to count Get if key found
59 uint32_t cf_id;
60 size_t value_size;
61 std::vector<TypeCorrelation> v_correlation;
62 };
63
64 class AnalyzerOptions {
65 public:
66 std::vector<std::vector<int>> correlation_map;
67 std::vector<std::pair<int, int>> correlation_list;
68
69 AnalyzerOptions();
70
71 ~AnalyzerOptions();
72
73 void SparseCorrelationInput(const std::string& in_str);
74 };
75
76 // Note that, for the variable names in the trace_analyzer,
77 // Starting with 'a_' means the variable is used for 'accessed_keys'.
78 // Starting with 'w_' means it is used for 'the whole key space'.
79 // Ending with '_f' means a file write or reader pointer.
80 // For example, 'a_count' means 'accessed_keys_count',
81 // 'w_key_f' means 'whole_key_space_file'.
82
83 struct TraceStats {
84 uint32_t cf_id;
85 std::string cf_name;
86 uint64_t a_count;
87 uint64_t a_succ_count;
88 uint64_t a_key_id;
89 uint64_t a_key_size_sqsum;
90 uint64_t a_key_size_sum;
91 uint64_t a_key_mid;
92 uint64_t a_value_size_sqsum;
93 uint64_t a_value_size_sum;
94 uint64_t a_value_mid;
95 uint32_t a_peak_qps;
96 double a_ave_qps;
97 std::map<std::string, StatsUnit> a_key_stats;
98 std::map<uint64_t, uint64_t> a_count_stats;
99 std::map<uint64_t, uint64_t> a_key_size_stats;
100 std::map<uint64_t, uint64_t> a_value_size_stats;
101 std::map<uint32_t, uint32_t> a_qps_stats;
102 std::map<uint32_t, std::map<std::string, uint32_t>> a_qps_prefix_stats;
103 std::priority_queue<std::pair<uint64_t, std::string>,
104 std::vector<std::pair<uint64_t, std::string>>,
105 std::greater<std::pair<uint64_t, std::string>>>
106 top_k_queue;
107 std::priority_queue<std::pair<uint64_t, std::string>,
108 std::vector<std::pair<uint64_t, std::string>>,
109 std::greater<std::pair<uint64_t, std::string>>>
110 top_k_prefix_access;
111 std::priority_queue<std::pair<double, std::string>,
112 std::vector<std::pair<double, std::string>>,
113 std::greater<std::pair<double, std::string>>>
114 top_k_prefix_ave;
115 std::priority_queue<std::pair<uint32_t, uint32_t>,
116 std::vector<std::pair<uint32_t, uint32_t>>,
117 std::greater<std::pair<uint32_t, uint32_t>>>
118 top_k_qps_sec;
119 std::list<TraceUnit> time_series;
120 std::vector<std::pair<uint64_t, uint64_t>> correlation_output;
121 std::map<uint32_t, uint64_t> uni_key_num;
122
123 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> time_series_f;
124 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_f;
125 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_count_dist_f;
126 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_prefix_cut_f;
127 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_value_size_f;
128 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_size_f;
129 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_key_num_f;
130 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_qps_f;
131 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> a_top_qps_prefix_f;
132 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> w_key_f;
133 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> w_prefix_cut_f;
134
135 TraceStats();
136 ~TraceStats();
137 TraceStats(const TraceStats&) = delete;
138 TraceStats& operator=(const TraceStats&) = delete;
139 TraceStats(TraceStats&&) = default;
140 TraceStats& operator=(TraceStats&&) = default;
141 };
142
143 struct TypeUnit {
144 std::string type_name;
145 bool enabled;
146 uint64_t total_keys;
147 uint64_t total_access;
148 uint64_t total_succ_access;
149 uint32_t sample_count;
150 std::map<uint32_t, TraceStats> stats;
151 TypeUnit() = default;
152 ~TypeUnit() = default;
153 TypeUnit(const TypeUnit&) = delete;
154 TypeUnit& operator=(const TypeUnit&) = delete;
155 TypeUnit(TypeUnit&&) = default;
156 TypeUnit& operator=(TypeUnit&&) = default;
157 };
158
159 struct CfUnit {
160 uint32_t cf_id;
161 uint64_t w_count; // total keys in this cf if we use the whole key space
162 uint64_t a_count; // the total keys in this cf that are accessed
163 std::map<uint64_t, uint64_t> w_key_size_stats; // whole key space key size
164 // statistic this cf
165 std::map<uint32_t, uint32_t> cf_qps;
166 };
167
168 class TraceAnalyzer : private TraceRecord::Handler,
169 private WriteBatch::Handler {
170 public:
171 TraceAnalyzer(std::string& trace_path, std::string& output_path,
172 AnalyzerOptions _analyzer_opts);
173 ~TraceAnalyzer();
174
175 Status PrepareProcessing();
176
177 Status StartProcessing();
178
179 Status MakeStatistics();
180
181 Status ReProcessing();
182
183 Status EndProcessing();
184
185 Status WriteTraceUnit(TraceUnit& unit);
186
187 std::vector<TypeUnit>& GetTaVector() { return ta_; }
188
189 private:
190 using TraceRecord::Handler::Handle;
191 Status Handle(const WriteQueryTraceRecord& record,
192 std::unique_ptr<TraceRecordResult>* result) override;
193 Status Handle(const GetQueryTraceRecord& record,
194 std::unique_ptr<TraceRecordResult>* result) override;
195 Status Handle(const IteratorSeekQueryTraceRecord& record,
196 std::unique_ptr<TraceRecordResult>* result) override;
197 Status Handle(const MultiGetQueryTraceRecord& record,
198 std::unique_ptr<TraceRecordResult>* result) override;
199
200 using WriteBatch::Handler::PutCF;
201 Status PutCF(uint32_t column_family_id, const Slice& key,
202 const Slice& value) override;
203
204 using WriteBatch::Handler::DeleteCF;
205 Status DeleteCF(uint32_t column_family_id, const Slice& key) override;
206
207 using WriteBatch::Handler::SingleDeleteCF;
208 Status SingleDeleteCF(uint32_t column_family_id, const Slice& key) override;
209
210 using WriteBatch::Handler::DeleteRangeCF;
211 Status DeleteRangeCF(uint32_t column_family_id, const Slice& begin_key,
212 const Slice& end_key) override;
213
214 using WriteBatch::Handler::MergeCF;
215 Status MergeCF(uint32_t column_family_id, const Slice& key,
216 const Slice& value) override;
217
218 // The following hanlders are not implemented, return Status::OK() to avoid
219 // the running time assertion and other irrelevant falures.
220 using WriteBatch::Handler::PutBlobIndexCF;
221 Status PutBlobIndexCF(uint32_t /*column_family_id*/, const Slice& /*key*/,
222 const Slice& /*value*/) override {
223 return Status::OK();
224 }
225
226 // The default implementation of LogData does nothing.
227 using WriteBatch::Handler::LogData;
228 void LogData(const Slice& /*blob*/) override {}
229
230 using WriteBatch::Handler::MarkBeginPrepare;
231 Status MarkBeginPrepare(bool = false) override { return Status::OK(); }
232
233 using WriteBatch::Handler::MarkEndPrepare;
234 Status MarkEndPrepare(const Slice& /*xid*/) override { return Status::OK(); }
235
236 using WriteBatch::Handler::MarkNoop;
237 Status MarkNoop(bool /*empty_batch*/) override { return Status::OK(); }
238
239 using WriteBatch::Handler::MarkRollback;
240 Status MarkRollback(const Slice& /*xid*/) override { return Status::OK(); }
241
242 using WriteBatch::Handler::MarkCommit;
243 Status MarkCommit(const Slice& /*xid*/) override { return Status::OK(); }
244
245 using WriteBatch::Handler::MarkCommitWithTimestamp;
246 Status MarkCommitWithTimestamp(const Slice& /*xid*/,
247 const Slice& /*commit_ts*/) override {
248 return Status::OK();
249 }
250
251 // Process each trace operation and output the analysis result to
252 // stdout/files.
253 Status OutputAnalysisResult(TraceOperationType op_type, uint64_t timestamp,
254 std::vector<uint32_t> cf_ids,
255 std::vector<Slice> keys,
256 std::vector<size_t> value_sizes);
257
258 Status OutputAnalysisResult(TraceOperationType op_type, uint64_t timestamp,
259 uint32_t cf_id, const Slice& key,
260 size_t value_size);
261
262 ROCKSDB_NAMESPACE::Env* env_;
263 EnvOptions env_options_;
264 std::unique_ptr<TraceReader> trace_reader_;
265 size_t offset_;
266 char buffer_[1024];
267 // Timestamp of a WriteBatch, used in its iteration.
268 uint64_t write_batch_ts_;
269 std::string trace_name_;
270 std::string output_path_;
271 AnalyzerOptions analyzer_opts_;
272 uint64_t total_requests_;
273 uint64_t total_access_keys_;
274 uint64_t total_gets_;
275 uint64_t total_writes_;
276 uint64_t total_seeks_;
277 uint64_t total_seek_prevs_;
278 uint64_t total_multigets_;
279 uint64_t trace_create_time_;
280 uint64_t begin_time_;
281 uint64_t end_time_;
282 uint64_t time_series_start_;
283 uint32_t sample_max_;
284 uint32_t cur_time_sec_;
285 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
286 trace_sequence_f_; // readable trace
287 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile> qps_f_; // overall qps
288 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>
289 cf_qps_f_; // The qps of each CF>
290 std::vector<TypeUnit> ta_; // The main statistic collecting data structure
291 std::map<uint32_t, CfUnit> cfs_; // All the cf_id appears in this trace;
292 std::vector<uint32_t> qps_peak_;
293 std::vector<double> qps_ave_;
294
295 Status ReadTraceHeader(Trace* header);
296 Status ReadTraceFooter(Trace* footer);
297 Status ReadTraceRecord(Trace* trace);
298 Status KeyStatsInsertion(const uint32_t& type, const uint32_t& cf_id,
299 const std::string& key, const size_t value_size,
300 const uint64_t ts);
301 Status StatsUnitCorrelationUpdate(StatsUnit& unit, const uint32_t& type,
302 const uint64_t& ts, const std::string& key);
303 Status OpenStatsOutputFiles(const std::string& type, TraceStats& new_stats);
304 Status CreateOutputFile(
305 const std::string& type, const std::string& cf_name,
306 const std::string& ending,
307 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>* f_ptr);
308 Status CloseOutputFiles();
309
310 void PrintStatistics();
311 Status TraceUnitWriter(
312 std::unique_ptr<ROCKSDB_NAMESPACE::WritableFile>& f_ptr, TraceUnit& unit);
313 Status WriteTraceSequence(const uint32_t& type, const uint32_t& cf_id,
314 const Slice& key, const size_t value_size,
315 const uint64_t ts);
316 Status MakeStatisticKeyStatsOrPrefix(TraceStats& stats);
317 Status MakeStatisticCorrelation(TraceStats& stats, StatsUnit& unit);
318 Status MakeStatisticQPS();
319 int db_version_;
320 };
321
322 int trace_analyzer_tool(int argc, char** argv);
323
324 } // namespace ROCKSDB_NAMESPACE
325
326 #endif // ROCKSDB_LITE