1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
16 #include "rocksdb/env.h"
17 #include "rocksdb/trace_reader_writer.h"
18 #include "rocksdb/trace_record.h"
19 #include "rocksdb/write_batch.h"
20 #include "trace_replay/trace_replay.h"
22 namespace ROCKSDB_NAMESPACE
{
24 // Value sizes may be used as denominators. Replacing 0 value sizes with this
25 // positive integer avoids division error.
26 extern const size_t kShadowValueSize
/* = 10*/;
28 enum TraceOperationType
: int {
36 kIteratorSeekForPrev
= 7,
49 struct TypeCorrelation
{
56 uint64_t access_count
;
58 uint64_t succ_count
; // current only used to count Get if key found
61 std::vector
<TypeCorrelation
> v_correlation
;
64 class AnalyzerOptions
{
66 std::vector
<std::vector
<int>> correlation_map
;
67 std::vector
<std::pair
<int, int>> correlation_list
;
73 void SparseCorrelationInput(const std::string
& in_str
);
76 // Note that, for the variable names in the trace_analyzer,
77 // Starting with 'a_' means the variable is used for 'accessed_keys'.
78 // Starting with 'w_' means it is used for 'the whole key space'.
79 // Ending with '_f' means a file write or reader pointer.
80 // For example, 'a_count' means 'accessed_keys_count',
81 // 'w_key_f' means 'whole_key_space_file'.
87 uint64_t a_succ_count
;
89 uint64_t a_key_size_sqsum
;
90 uint64_t a_key_size_sum
;
92 uint64_t a_value_size_sqsum
;
93 uint64_t a_value_size_sum
;
97 std::map
<std::string
, StatsUnit
> a_key_stats
;
98 std::map
<uint64_t, uint64_t> a_count_stats
;
99 std::map
<uint64_t, uint64_t> a_key_size_stats
;
100 std::map
<uint64_t, uint64_t> a_value_size_stats
;
101 std::map
<uint32_t, uint32_t> a_qps_stats
;
102 std::map
<uint32_t, std::map
<std::string
, uint32_t>> a_qps_prefix_stats
;
103 std::priority_queue
<std::pair
<uint64_t, std::string
>,
104 std::vector
<std::pair
<uint64_t, std::string
>>,
105 std::greater
<std::pair
<uint64_t, std::string
>>>
107 std::priority_queue
<std::pair
<uint64_t, std::string
>,
108 std::vector
<std::pair
<uint64_t, std::string
>>,
109 std::greater
<std::pair
<uint64_t, std::string
>>>
111 std::priority_queue
<std::pair
<double, std::string
>,
112 std::vector
<std::pair
<double, std::string
>>,
113 std::greater
<std::pair
<double, std::string
>>>
115 std::priority_queue
<std::pair
<uint32_t, uint32_t>,
116 std::vector
<std::pair
<uint32_t, uint32_t>>,
117 std::greater
<std::pair
<uint32_t, uint32_t>>>
119 std::list
<TraceUnit
> time_series
;
120 std::vector
<std::pair
<uint64_t, uint64_t>> correlation_output
;
121 std::map
<uint32_t, uint64_t> uni_key_num
;
123 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> time_series_f
;
124 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_key_f
;
125 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_count_dist_f
;
126 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_prefix_cut_f
;
127 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_value_size_f
;
128 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_key_size_f
;
129 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_key_num_f
;
130 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_qps_f
;
131 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> a_top_qps_prefix_f
;
132 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> w_key_f
;
133 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> w_prefix_cut_f
;
137 TraceStats(const TraceStats
&) = delete;
138 TraceStats
& operator=(const TraceStats
&) = delete;
139 TraceStats(TraceStats
&&) = default;
140 TraceStats
& operator=(TraceStats
&&) = default;
144 std::string type_name
;
147 uint64_t total_access
;
148 uint64_t total_succ_access
;
149 uint32_t sample_count
;
150 std::map
<uint32_t, TraceStats
> stats
;
151 TypeUnit() = default;
152 ~TypeUnit() = default;
153 TypeUnit(const TypeUnit
&) = delete;
154 TypeUnit
& operator=(const TypeUnit
&) = delete;
155 TypeUnit(TypeUnit
&&) = default;
156 TypeUnit
& operator=(TypeUnit
&&) = default;
161 uint64_t w_count
; // total keys in this cf if we use the whole key space
162 uint64_t a_count
; // the total keys in this cf that are accessed
163 std::map
<uint64_t, uint64_t> w_key_size_stats
; // whole key space key size
165 std::map
<uint32_t, uint32_t> cf_qps
;
168 class TraceAnalyzer
: private TraceRecord::Handler
,
169 private WriteBatch::Handler
{
171 TraceAnalyzer(std::string
& trace_path
, std::string
& output_path
,
172 AnalyzerOptions _analyzer_opts
);
175 Status
PrepareProcessing();
177 Status
StartProcessing();
179 Status
MakeStatistics();
181 Status
ReProcessing();
183 Status
EndProcessing();
185 Status
WriteTraceUnit(TraceUnit
& unit
);
187 std::vector
<TypeUnit
>& GetTaVector() { return ta_
; }
190 using TraceRecord::Handler::Handle
;
191 Status
Handle(const WriteQueryTraceRecord
& record
,
192 std::unique_ptr
<TraceRecordResult
>* result
) override
;
193 Status
Handle(const GetQueryTraceRecord
& record
,
194 std::unique_ptr
<TraceRecordResult
>* result
) override
;
195 Status
Handle(const IteratorSeekQueryTraceRecord
& record
,
196 std::unique_ptr
<TraceRecordResult
>* result
) override
;
197 Status
Handle(const MultiGetQueryTraceRecord
& record
,
198 std::unique_ptr
<TraceRecordResult
>* result
) override
;
200 using WriteBatch::Handler::PutCF
;
201 Status
PutCF(uint32_t column_family_id
, const Slice
& key
,
202 const Slice
& value
) override
;
204 using WriteBatch::Handler::DeleteCF
;
205 Status
DeleteCF(uint32_t column_family_id
, const Slice
& key
) override
;
207 using WriteBatch::Handler::SingleDeleteCF
;
208 Status
SingleDeleteCF(uint32_t column_family_id
, const Slice
& key
) override
;
210 using WriteBatch::Handler::DeleteRangeCF
;
211 Status
DeleteRangeCF(uint32_t column_family_id
, const Slice
& begin_key
,
212 const Slice
& end_key
) override
;
214 using WriteBatch::Handler::MergeCF
;
215 Status
MergeCF(uint32_t column_family_id
, const Slice
& key
,
216 const Slice
& value
) override
;
218 // The following hanlders are not implemented, return Status::OK() to avoid
219 // the running time assertion and other irrelevant falures.
220 using WriteBatch::Handler::PutBlobIndexCF
;
221 Status
PutBlobIndexCF(uint32_t /*column_family_id*/, const Slice
& /*key*/,
222 const Slice
& /*value*/) override
{
226 // The default implementation of LogData does nothing.
227 using WriteBatch::Handler::LogData
;
228 void LogData(const Slice
& /*blob*/) override
{}
230 using WriteBatch::Handler::MarkBeginPrepare
;
231 Status
MarkBeginPrepare(bool = false) override
{ return Status::OK(); }
233 using WriteBatch::Handler::MarkEndPrepare
;
234 Status
MarkEndPrepare(const Slice
& /*xid*/) override
{ return Status::OK(); }
236 using WriteBatch::Handler::MarkNoop
;
237 Status
MarkNoop(bool /*empty_batch*/) override
{ return Status::OK(); }
239 using WriteBatch::Handler::MarkRollback
;
240 Status
MarkRollback(const Slice
& /*xid*/) override
{ return Status::OK(); }
242 using WriteBatch::Handler::MarkCommit
;
243 Status
MarkCommit(const Slice
& /*xid*/) override
{ return Status::OK(); }
245 using WriteBatch::Handler::MarkCommitWithTimestamp
;
246 Status
MarkCommitWithTimestamp(const Slice
& /*xid*/,
247 const Slice
& /*commit_ts*/) override
{
251 // Process each trace operation and output the analysis result to
253 Status
OutputAnalysisResult(TraceOperationType op_type
, uint64_t timestamp
,
254 std::vector
<uint32_t> cf_ids
,
255 std::vector
<Slice
> keys
,
256 std::vector
<size_t> value_sizes
);
258 Status
OutputAnalysisResult(TraceOperationType op_type
, uint64_t timestamp
,
259 uint32_t cf_id
, const Slice
& key
,
262 ROCKSDB_NAMESPACE::Env
* env_
;
263 EnvOptions env_options_
;
264 std::unique_ptr
<TraceReader
> trace_reader_
;
267 // Timestamp of a WriteBatch, used in its iteration.
268 uint64_t write_batch_ts_
;
269 std::string trace_name_
;
270 std::string output_path_
;
271 AnalyzerOptions analyzer_opts_
;
272 uint64_t total_requests_
;
273 uint64_t total_access_keys_
;
274 uint64_t total_gets_
;
275 uint64_t total_writes_
;
276 uint64_t total_seeks_
;
277 uint64_t total_seek_prevs_
;
278 uint64_t total_multigets_
;
279 uint64_t trace_create_time_
;
280 uint64_t begin_time_
;
282 uint64_t time_series_start_
;
283 uint32_t sample_max_
;
284 uint32_t cur_time_sec_
;
285 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
>
286 trace_sequence_f_
; // readable trace
287 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
> qps_f_
; // overall qps
288 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
>
289 cf_qps_f_
; // The qps of each CF>
290 std::vector
<TypeUnit
> ta_
; // The main statistic collecting data structure
291 std::map
<uint32_t, CfUnit
> cfs_
; // All the cf_id appears in this trace;
292 std::vector
<uint32_t> qps_peak_
;
293 std::vector
<double> qps_ave_
;
295 Status
ReadTraceHeader(Trace
* header
);
296 Status
ReadTraceFooter(Trace
* footer
);
297 Status
ReadTraceRecord(Trace
* trace
);
298 Status
KeyStatsInsertion(const uint32_t& type
, const uint32_t& cf_id
,
299 const std::string
& key
, const size_t value_size
,
301 Status
StatsUnitCorrelationUpdate(StatsUnit
& unit
, const uint32_t& type
,
302 const uint64_t& ts
, const std::string
& key
);
303 Status
OpenStatsOutputFiles(const std::string
& type
, TraceStats
& new_stats
);
304 Status
CreateOutputFile(
305 const std::string
& type
, const std::string
& cf_name
,
306 const std::string
& ending
,
307 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
>* f_ptr
);
308 Status
CloseOutputFiles();
310 void PrintStatistics();
311 Status
TraceUnitWriter(
312 std::unique_ptr
<ROCKSDB_NAMESPACE::WritableFile
>& f_ptr
, TraceUnit
& unit
);
313 Status
WriteTraceSequence(const uint32_t& type
, const uint32_t& cf_id
,
314 const Slice
& key
, const size_t value_size
,
316 Status
MakeStatisticKeyStatsOrPrefix(TraceStats
& stats
);
317 Status
MakeStatisticCorrelation(TraceStats
& stats
, StatsUnit
& unit
);
318 Status
MakeStatisticQPS();
322 int trace_analyzer_tool(int argc
, char** argv
);
324 } // namespace ROCKSDB_NAMESPACE
326 #endif // ROCKSDB_LITE