]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / tools / block_cache_analyzer / block_cache_trace_analyzer.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5
6 #ifndef ROCKSDB_LITE
7 #ifdef GFLAGS
8 #include "tools/block_cache_analyzer/block_cache_trace_analyzer.h"
9
10 #include <algorithm>
11 #include <cinttypes>
12 #include <cstdio>
13 #include <cstdlib>
14 #include <fstream>
15 #include <iomanip>
16 #include <iostream>
17 #include <memory>
18 #include <random>
19 #include <sstream>
20
21 #include "monitoring/histogram.h"
22 #include "util/gflags_compat.h"
23 #include "util/string_util.h"
24
25 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
26
27 DEFINE_string(block_cache_trace_path, "", "The trace file path.");
28 DEFINE_bool(is_block_cache_human_readable_trace, false,
29 "Is the trace file provided for analysis generated by running "
30 "block_cache_trace_analyzer with "
31 "FLAGS_human_readable_trace_file_path is specified.");
32 DEFINE_string(
33 block_cache_sim_config_path, "",
34 "The config file path. One cache configuration per line. The format of a "
35 "cache configuration is "
36 "cache_name,num_shard_bits,ghost_capacity,cache_capacity_1,...,cache_"
37 "capacity_N. Supported cache names are lru, lru_priority, lru_hybrid, and "
38 "lru_hybrid_no_insert_on_row_miss. User may also add a prefix 'ghost_' to "
39 "a cache_name to add a ghost cache in front of the real cache. "
40 "ghost_capacity and cache_capacity can be xK, xM or xG where x is a "
41 "positive number.");
42 DEFINE_int32(block_cache_trace_downsample_ratio, 1,
43 "The trace collected accesses on one in every "
44 "block_cache_trace_downsample_ratio blocks. We scale "
45 "down the simulated cache size by this ratio.");
46 DEFINE_bool(print_block_size_stats, false,
47 "Print block size distribution and the distribution break down by "
48 "block type and column family.");
49 DEFINE_bool(print_access_count_stats, false,
50 "Print access count distribution and the distribution break down "
51 "by block type and column family.");
52 DEFINE_bool(print_data_block_access_count_stats, false,
53 "Print data block accesses by user Get and Multi-Get.");
54 DEFINE_int32(cache_sim_warmup_seconds, 0,
55 "The number of seconds to warmup simulated caches. The hit/miss "
56 "counters are reset after the warmup completes.");
57 DEFINE_int32(analyze_bottom_k_access_count_blocks, 0,
58 "Print out detailed access information for blocks with their "
59 "number of accesses are the bottom k among all blocks.");
60 DEFINE_int32(analyze_top_k_access_count_blocks, 0,
61 "Print out detailed access information for blocks with their "
62 "number of accesses are the top k among all blocks.");
63 DEFINE_string(block_cache_analysis_result_dir, "",
64 "The directory that saves block cache analysis results.");
65 DEFINE_string(
66 timeline_labels, "",
67 "Group the number of accesses per block per second using these labels. "
68 "Possible labels are a combination of the following: cf (column family), "
69 "sst, level, bt (block type), caller, block. For example, label \"cf_bt\" "
70 "means the number of access per second is grouped by unique pairs of "
71 "\"cf_bt\". A label \"all\" contains the aggregated number of accesses per "
72 "second across all possible labels.");
73 DEFINE_string(reuse_distance_labels, "",
74 "Group the reuse distance of a block using these labels. Reuse "
75 "distance is defined as the cumulated size of unique blocks read "
76 "between two consecutive accesses on the same block.");
77 DEFINE_string(
78 reuse_distance_buckets, "",
79 "Group blocks by their reuse distances given these buckets. For "
80 "example, if 'reuse_distance_buckets' is '1K,1M,1G', we will "
81 "create four buckets. The first three buckets contain the number of "
82 "blocks with reuse distance less than 1KB, between 1K and 1M, between 1M "
83 "and 1G, respectively. The last bucket contains the number of blocks with "
84 "reuse distance larger than 1G. ");
85 DEFINE_string(
86 reuse_interval_labels, "",
87 "Group the reuse interval of a block using these labels. Reuse "
88 "interval is defined as the time between two consecutive accesses "
89 "on the same block.");
90 DEFINE_string(
91 reuse_interval_buckets, "",
92 "Group blocks by their reuse interval given these buckets. For "
93 "example, if 'reuse_distance_buckets' is '1,10,100', we will "
94 "create four buckets. The first three buckets contain the number of "
95 "blocks with reuse interval less than 1 second, between 1 second and 10 "
96 "seconds, between 10 seconds and 100 seconds, respectively. The last "
97 "bucket contains the number of blocks with reuse interval longer than 100 "
98 "seconds.");
99 DEFINE_string(
100 reuse_lifetime_labels, "",
101 "Group the reuse lifetime of a block using these labels. Reuse "
102 "lifetime is defined as the time interval between the first access on a "
103 "block and the last access on the same block. For blocks that are only "
104 "accessed once, its lifetime is set to kMaxUint64.");
105 DEFINE_string(
106 reuse_lifetime_buckets, "",
107 "Group blocks by their reuse lifetime given these buckets. For "
108 "example, if 'reuse_lifetime_buckets' is '1,10,100', we will "
109 "create four buckets. The first three buckets contain the number of "
110 "blocks with reuse lifetime less than 1 second, between 1 second and 10 "
111 "seconds, between 10 seconds and 100 seconds, respectively. The last "
112 "bucket contains the number of blocks with reuse lifetime longer than 100 "
113 "seconds.");
114 DEFINE_string(
115 analyze_callers, "",
116 "The list of callers to perform a detailed analysis on. If speicfied, the "
117 "analyzer will output a detailed percentage of accesses for each caller "
118 "break down by column family, level, and block type. A list of available "
119 "callers are: Get, MultiGet, Iterator, ApproximateSize, VerifyChecksum, "
120 "SSTDumpTool, ExternalSSTIngestion, Repair, Prefetch, Compaction, "
121 "CompactionRefill, Flush, SSTFileReader, Uncategorized.");
122 DEFINE_string(access_count_buckets, "",
123 "Group number of blocks by their access count given these "
124 "buckets. If specified, the analyzer will output a detailed "
125 "analysis on the number of blocks grouped by their access count "
126 "break down by block type and column family.");
127 DEFINE_int32(analyze_blocks_reuse_k_reuse_window, 0,
128 "Analyze the percentage of blocks that are accessed in the "
129 "[k, 2*k] seconds are accessed again in the next [2*k, 3*k], "
130 "[3*k, 4*k],...,[k*(n-1), k*n] seconds. ");
131 DEFINE_string(analyze_get_spatial_locality_labels, "",
132 "Group data blocks using these labels.");
133 DEFINE_string(analyze_get_spatial_locality_buckets, "",
134 "Group data blocks by their statistics using these buckets.");
135 DEFINE_string(skew_labels, "",
136 "Group the access count of a block using these labels.");
137 DEFINE_string(skew_buckets, "", "Group the skew labels using these buckets.");
138 DEFINE_bool(mrc_only, false,
139 "Evaluate alternative cache policies only. When this flag is true, "
140 "the analyzer does NOT maintain states of each block in memory for "
141 "analysis. It only feeds the accesses into the cache simulators.");
142 DEFINE_string(
143 analyze_correlation_coefficients_labels, "",
144 "Analyze the correlation coefficients of features such as number of past "
145 "accesses with regard to the number of accesses till the next access.");
146 DEFINE_int32(analyze_correlation_coefficients_max_number_of_values, 1000000,
147 "The maximum number of values for a feature. If the number of "
148 "values for a feature is larger than this max, it randomly "
149 "selects 'max' number of values.");
150 DEFINE_string(human_readable_trace_file_path, "",
151 "The filt path that saves human readable access records.");
152
153 namespace ROCKSDB_NAMESPACE {
154 namespace {
155
156 const std::string kMissRatioCurveFileName = "mrc";
157 const std::string kGroupbyBlock = "block";
158 const std::string kGroupbyTable = "table";
159 const std::string kGroupbyColumnFamily = "cf";
160 const std::string kGroupbySSTFile = "sst";
161 const std::string kGroupbyBlockType = "bt";
162 const std::string kGroupbyCaller = "caller";
163 const std::string kGroupbyLevel = "level";
164 const std::string kGroupbyAll = "all";
165 const std::set<std::string> kGroupbyLabels{
166 kGroupbyBlock, kGroupbyColumnFamily, kGroupbySSTFile, kGroupbyLevel,
167 kGroupbyBlockType, kGroupbyCaller, kGroupbyAll};
168 const std::string kSupportedCacheNames =
169 " lru ghost_lru lru_priority ghost_lru_priority lru_hybrid "
170 "ghost_lru_hybrid lru_hybrid_no_insert_on_row_miss "
171 "ghost_lru_hybrid_no_insert_on_row_miss ";
172
173 // The suffix for the generated csv files.
174 const std::string kFileNameSuffixMissRatioTimeline = "miss_ratio_timeline";
175 const std::string kFileNameSuffixMissTimeline = "miss_timeline";
176 const std::string kFileNameSuffixSkew = "skewness";
177 const std::string kFileNameSuffixAccessTimeline = "access_timeline";
178 const std::string kFileNameSuffixCorrelation = "correlation_input";
179 const std::string kFileNameSuffixAvgReuseIntervalNaccesses =
180 "avg_reuse_interval_naccesses";
181 const std::string kFileNameSuffixAvgReuseInterval = "avg_reuse_interval";
182 const std::string kFileNameSuffixReuseInterval = "access_reuse_interval";
183 const std::string kFileNameSuffixReuseLifetime = "reuse_lifetime";
184 const std::string kFileNameSuffixAccessReuseBlocksTimeline =
185 "reuse_blocks_timeline";
186 const std::string kFileNameSuffixPercentOfAccessSummary =
187 "percentage_of_accesses_summary";
188 const std::string kFileNameSuffixPercentRefKeys = "percent_ref_keys";
189 const std::string kFileNameSuffixPercentDataSizeOnRefKeys =
190 "percent_data_size_on_ref_keys";
191 const std::string kFileNameSuffixPercentAccessesOnRefKeys =
192 "percent_accesses_on_ref_keys";
193 const std::string kFileNameSuffixAccessCountSummary = "access_count_summary";
194
195 std::string block_type_to_string(TraceType type) {
196 switch (type) {
197 case kBlockTraceFilterBlock:
198 return "Filter";
199 case kBlockTraceDataBlock:
200 return "Data";
201 case kBlockTraceIndexBlock:
202 return "Index";
203 case kBlockTraceRangeDeletionBlock:
204 return "RangeDeletion";
205 case kBlockTraceUncompressionDictBlock:
206 return "UncompressionDict";
207 default:
208 break;
209 }
210 // This cannot happen.
211 return "InvalidType";
212 }
213
214 std::string caller_to_string(TableReaderCaller caller) {
215 switch (caller) {
216 case kUserGet:
217 return "Get";
218 case kUserMultiGet:
219 return "MultiGet";
220 case kUserIterator:
221 return "Iterator";
222 case kUserApproximateSize:
223 return "ApproximateSize";
224 case kUserVerifyChecksum:
225 return "VerifyChecksum";
226 case kSSTDumpTool:
227 return "SSTDumpTool";
228 case kExternalSSTIngestion:
229 return "ExternalSSTIngestion";
230 case kRepair:
231 return "Repair";
232 case kPrefetch:
233 return "Prefetch";
234 case kCompaction:
235 return "Compaction";
236 case kCompactionRefill:
237 return "CompactionRefill";
238 case kFlush:
239 return "Flush";
240 case kSSTFileReader:
241 return "SSTFileReader";
242 case kUncategorized:
243 return "Uncategorized";
244 default:
245 break;
246 }
247 // This cannot happen.
248 return "InvalidCaller";
249 }
250
251 TableReaderCaller string_to_caller(std::string caller_str) {
252 if (caller_str == "Get") {
253 return kUserGet;
254 } else if (caller_str == "MultiGet") {
255 return kUserMultiGet;
256 } else if (caller_str == "Iterator") {
257 return kUserIterator;
258 } else if (caller_str == "ApproximateSize") {
259 return kUserApproximateSize;
260 } else if (caller_str == "VerifyChecksum") {
261 return kUserVerifyChecksum;
262 } else if (caller_str == "SSTDumpTool") {
263 return kSSTDumpTool;
264 } else if (caller_str == "ExternalSSTIngestion") {
265 return kExternalSSTIngestion;
266 } else if (caller_str == "Repair") {
267 return kRepair;
268 } else if (caller_str == "Prefetch") {
269 return kPrefetch;
270 } else if (caller_str == "Compaction") {
271 return kCompaction;
272 } else if (caller_str == "CompactionRefill") {
273 return kCompactionRefill;
274 } else if (caller_str == "Flush") {
275 return kFlush;
276 } else if (caller_str == "SSTFileReader") {
277 return kSSTFileReader;
278 } else if (caller_str == "Uncategorized") {
279 return kUncategorized;
280 }
281 return TableReaderCaller::kMaxBlockCacheLookupCaller;
282 }
283
284 bool is_user_access(TableReaderCaller caller) {
285 switch (caller) {
286 case kUserGet:
287 case kUserMultiGet:
288 case kUserIterator:
289 case kUserApproximateSize:
290 case kUserVerifyChecksum:
291 return true;
292 default:
293 break;
294 }
295 return false;
296 }
297
298 const char kBreakLine[] =
299 "***************************************************************\n";
300
301 void print_break_lines(uint32_t num_break_lines) {
302 for (uint32_t i = 0; i < num_break_lines; i++) {
303 fprintf(stdout, kBreakLine);
304 }
305 }
306
307 double percent(uint64_t numerator, uint64_t denomenator) {
308 if (denomenator == 0) {
309 return -1;
310 }
311 return static_cast<double>(numerator * 100.0 / denomenator);
312 }
313
314 std::map<uint64_t, uint64_t> adjust_time_unit(
315 const std::map<uint64_t, uint64_t>& time_stats, uint64_t time_unit) {
316 if (time_unit == 1) {
317 return time_stats;
318 }
319 std::map<uint64_t, uint64_t> adjusted_time_stats;
320 for (auto const& time : time_stats) {
321 adjusted_time_stats[static_cast<uint64_t>(time.first / time_unit)] +=
322 time.second;
323 }
324 return adjusted_time_stats;
325 }
326 } // namespace
327
328 void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const {
329 if (!cache_simulator_) {
330 return;
331 }
332 if (output_dir_.empty()) {
333 return;
334 }
335 uint64_t trace_duration =
336 trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
337 uint64_t total_accesses = access_sequence_number_;
338 const std::string output_miss_ratio_curve_path =
339 output_dir_ + "/" + std::to_string(trace_duration) + "_" +
340 std::to_string(total_accesses) + "_" + kMissRatioCurveFileName;
341 std::ofstream out(output_miss_ratio_curve_path);
342 if (!out.is_open()) {
343 return;
344 }
345 // Write header.
346 const std::string header =
347 "cache_name,num_shard_bits,ghost_capacity,capacity,miss_ratio,total_"
348 "accesses";
349 out << header << std::endl;
350 for (auto const& config_caches : cache_simulator_->sim_caches()) {
351 const CacheConfiguration& config = config_caches.first;
352 for (uint32_t i = 0; i < config.cache_capacities.size(); i++) {
353 double miss_ratio =
354 config_caches.second[i]->miss_ratio_stats().miss_ratio();
355 // Write the body.
356 out << config.cache_name;
357 out << ",";
358 out << config.num_shard_bits;
359 out << ",";
360 out << config.ghost_cache_capacity;
361 out << ",";
362 out << config.cache_capacities[i];
363 out << ",";
364 out << std::fixed << std::setprecision(4) << miss_ratio;
365 out << ",";
366 out << config_caches.second[i]->miss_ratio_stats().total_accesses();
367 out << std::endl;
368 }
369 }
370 out.close();
371 }
372
373 void BlockCacheTraceAnalyzer::UpdateFeatureVectors(
374 const std::vector<uint64_t>& access_sequence_number_timeline,
375 const std::vector<uint64_t>& access_timeline, const std::string& label,
376 std::map<std::string, Features>* label_features,
377 std::map<std::string, Predictions>* label_predictions) const {
378 if (access_sequence_number_timeline.empty() || access_timeline.empty()) {
379 return;
380 }
381 assert(access_timeline.size() == access_sequence_number_timeline.size());
382 uint64_t prev_access_sequence_number = access_sequence_number_timeline[0];
383 uint64_t prev_access_timestamp = access_timeline[0];
384 for (uint32_t i = 0; i < access_sequence_number_timeline.size(); i++) {
385 uint64_t num_accesses_since_last_access =
386 access_sequence_number_timeline[i] - prev_access_sequence_number;
387 uint64_t elapsed_time_since_last_access =
388 access_timeline[i] - prev_access_timestamp;
389 prev_access_sequence_number = access_sequence_number_timeline[i];
390 prev_access_timestamp = access_timeline[i];
391 if (i < access_sequence_number_timeline.size() - 1) {
392 (*label_features)[label].num_accesses_since_last_access.push_back(
393 num_accesses_since_last_access);
394 (*label_features)[label].num_past_accesses.push_back(i);
395 (*label_features)[label].elapsed_time_since_last_access.push_back(
396 elapsed_time_since_last_access);
397 }
398 if (i >= 1) {
399 (*label_predictions)[label].num_accesses_till_next_access.push_back(
400 num_accesses_since_last_access);
401 (*label_predictions)[label].elapsed_time_till_next_access.push_back(
402 elapsed_time_since_last_access);
403 }
404 }
405 }
406
407 void BlockCacheTraceAnalyzer::WriteMissRatioTimeline(uint64_t time_unit) const {
408 if (!cache_simulator_ || output_dir_.empty()) {
409 return;
410 }
411 std::map<uint64_t, std::map<std::string, std::map<uint64_t, double>>>
412 cs_name_timeline;
413 uint64_t start_time = port::kMaxUint64;
414 uint64_t end_time = 0;
415 const std::map<uint64_t, uint64_t>& trace_num_misses =
416 adjust_time_unit(miss_ratio_stats_.num_misses_timeline(), time_unit);
417 const std::map<uint64_t, uint64_t>& trace_num_accesses =
418 adjust_time_unit(miss_ratio_stats_.num_accesses_timeline(), time_unit);
419 assert(trace_num_misses.size() == trace_num_accesses.size());
420 for (auto const& num_miss : trace_num_misses) {
421 uint64_t time = num_miss.first;
422 start_time = std::min(start_time, time);
423 end_time = std::max(end_time, time);
424 uint64_t miss = num_miss.second;
425 auto it = trace_num_accesses.find(time);
426 assert(it != trace_num_accesses.end());
427 uint64_t access = it->second;
428 cs_name_timeline[port::kMaxUint64]["trace"][time] = percent(miss, access);
429 }
430 for (auto const& config_caches : cache_simulator_->sim_caches()) {
431 const CacheConfiguration& config = config_caches.first;
432 std::string cache_label = config.cache_name + "-" +
433 std::to_string(config.num_shard_bits) + "-" +
434 std::to_string(config.ghost_cache_capacity);
435 for (uint32_t i = 0; i < config.cache_capacities.size(); i++) {
436 const std::map<uint64_t, uint64_t>& num_misses = adjust_time_unit(
437 config_caches.second[i]->miss_ratio_stats().num_misses_timeline(),
438 time_unit);
439 const std::map<uint64_t, uint64_t>& num_accesses = adjust_time_unit(
440 config_caches.second[i]->miss_ratio_stats().num_accesses_timeline(),
441 time_unit);
442 assert(num_misses.size() == num_accesses.size());
443 for (auto const& num_miss : num_misses) {
444 uint64_t time = num_miss.first;
445 start_time = std::min(start_time, time);
446 end_time = std::max(end_time, time);
447 uint64_t miss = num_miss.second;
448 auto it = num_accesses.find(time);
449 assert(it != num_accesses.end());
450 uint64_t access = it->second;
451 cs_name_timeline[config.cache_capacities[i]][cache_label][time] =
452 percent(miss, access);
453 }
454 }
455 }
456 for (auto const& it : cs_name_timeline) {
457 const std::string output_miss_ratio_timeline_path =
458 output_dir_ + "/" + std::to_string(it.first) + "_" +
459 std::to_string(time_unit) + "_" + kFileNameSuffixMissRatioTimeline;
460 std::ofstream out(output_miss_ratio_timeline_path);
461 if (!out.is_open()) {
462 return;
463 }
464 std::string header("time");
465 for (uint64_t now = start_time; now <= end_time; now++) {
466 header += ",";
467 header += std::to_string(now);
468 }
469 out << header << std::endl;
470 for (auto const& label : it.second) {
471 std::string row(label.first);
472 for (uint64_t now = start_time; now <= end_time; now++) {
473 auto misses = label.second.find(now);
474 row += ",";
475 if (misses != label.second.end()) {
476 row += std::to_string(misses->second);
477 } else {
478 row += "0";
479 }
480 }
481 out << row << std::endl;
482 }
483 out.close();
484 }
485 }
486
487 void BlockCacheTraceAnalyzer::WriteMissTimeline(uint64_t time_unit) const {
488 if (!cache_simulator_ || output_dir_.empty()) {
489 return;
490 }
491 std::map<uint64_t, std::map<std::string, std::map<uint64_t, uint64_t>>>
492 cs_name_timeline;
493 uint64_t start_time = port::kMaxUint64;
494 uint64_t end_time = 0;
495 const std::map<uint64_t, uint64_t>& trace_num_misses =
496 adjust_time_unit(miss_ratio_stats_.num_misses_timeline(), time_unit);
497 for (auto const& num_miss : trace_num_misses) {
498 uint64_t time = num_miss.first;
499 start_time = std::min(start_time, time);
500 end_time = std::max(end_time, time);
501 uint64_t miss = num_miss.second;
502 cs_name_timeline[port::kMaxUint64]["trace"][time] = miss;
503 }
504 for (auto const& config_caches : cache_simulator_->sim_caches()) {
505 const CacheConfiguration& config = config_caches.first;
506 std::string cache_label = config.cache_name + "-" +
507 std::to_string(config.num_shard_bits) + "-" +
508 std::to_string(config.ghost_cache_capacity);
509 for (uint32_t i = 0; i < config.cache_capacities.size(); i++) {
510 const std::map<uint64_t, uint64_t>& num_misses = adjust_time_unit(
511 config_caches.second[i]->miss_ratio_stats().num_misses_timeline(),
512 time_unit);
513 for (auto const& num_miss : num_misses) {
514 uint64_t time = num_miss.first;
515 start_time = std::min(start_time, time);
516 end_time = std::max(end_time, time);
517 uint64_t miss = num_miss.second;
518 cs_name_timeline[config.cache_capacities[i]][cache_label][time] = miss;
519 }
520 }
521 }
522 for (auto const& it : cs_name_timeline) {
523 const std::string output_miss_ratio_timeline_path =
524 output_dir_ + "/" + std::to_string(it.first) + "_" +
525 std::to_string(time_unit) + "_" + kFileNameSuffixMissTimeline;
526 std::ofstream out(output_miss_ratio_timeline_path);
527 if (!out.is_open()) {
528 return;
529 }
530 std::string header("time");
531 for (uint64_t now = start_time; now <= end_time; now++) {
532 header += ",";
533 header += std::to_string(now);
534 }
535 out << header << std::endl;
536 for (auto const& label : it.second) {
537 std::string row(label.first);
538 for (uint64_t now = start_time; now <= end_time; now++) {
539 auto misses = label.second.find(now);
540 row += ",";
541 if (misses != label.second.end()) {
542 row += std::to_string(misses->second);
543 } else {
544 row += "0";
545 }
546 }
547 out << row << std::endl;
548 }
549 out.close();
550 }
551 }
552
553 void BlockCacheTraceAnalyzer::WriteSkewness(
554 const std::string& label_str, const std::vector<uint64_t>& percent_buckets,
555 TraceType target_block_type) const {
556 std::set<std::string> labels = ParseLabelStr(label_str);
557 std::map<std::string, uint64_t> label_naccesses;
558 uint64_t total_naccesses = 0;
559 auto block_callback = [&](const std::string& cf_name, uint64_t fd,
560 uint32_t level, TraceType type,
561 const std::string& /*block_key*/, uint64_t block_id,
562 const BlockAccessInfo& block) {
563 if (target_block_type != TraceType::kTraceMax &&
564 target_block_type != type) {
565 return;
566 }
567 const std::string label = BuildLabel(
568 labels, cf_name, fd, level, type,
569 TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
570 label_naccesses[label] += block.num_accesses;
571 total_naccesses += block.num_accesses;
572 };
573 TraverseBlocks(block_callback, &labels);
574 std::map<std::string, std::map<uint64_t, uint64_t>> label_bucket_naccesses;
575 std::vector<std::pair<std::string, uint64_t>> pairs;
576 for (auto const& itr : label_naccesses) {
577 pairs.push_back(itr);
578 }
579 // Sort in descending order.
580 sort(pairs.begin(), pairs.end(),
581 [](const std::pair<std::string, uint64_t>& a,
582 const std::pair<std::string, uint64_t>& b) {
583 return b.second < a.second;
584 });
585
586 size_t prev_start_index = 0;
587 for (auto const& percent : percent_buckets) {
588 label_bucket_naccesses[label_str][percent] = 0;
589 size_t end_index = 0;
590 if (percent == port::kMaxUint64) {
591 end_index = label_naccesses.size();
592 } else {
593 end_index = percent * label_naccesses.size() / 100;
594 }
595 for (size_t i = prev_start_index; i < end_index; i++) {
596 label_bucket_naccesses[label_str][percent] += pairs[i].second;
597 }
598 prev_start_index = end_index;
599 }
600 std::string filename_suffix;
601 if (target_block_type != TraceType::kTraceMax) {
602 filename_suffix = block_type_to_string(target_block_type);
603 filename_suffix += "_";
604 }
605 filename_suffix += kFileNameSuffixSkew;
606 WriteStatsToFile(label_str, percent_buckets, filename_suffix,
607 label_bucket_naccesses, total_naccesses);
608 }
609
610 void BlockCacheTraceAnalyzer::WriteCorrelationFeatures(
611 const std::string& label_str, uint32_t max_number_of_values) const {
612 std::set<std::string> labels = ParseLabelStr(label_str);
613 std::map<std::string, Features> label_features;
614 std::map<std::string, Predictions> label_predictions;
615 auto block_callback =
616 [&](const std::string& cf_name, uint64_t fd, uint32_t level,
617 TraceType block_type, const std::string& /*block_key*/,
618 uint64_t /*block_key_id*/, const BlockAccessInfo& block) {
619 if (block.table_id == 0 && labels.find(kGroupbyTable) != labels.end()) {
620 // We only know table id information for get requests.
621 return;
622 }
623 if (labels.find(kGroupbyCaller) != labels.end()) {
624 // Group by caller.
625 for (auto const& caller_map : block.caller_access_timeline) {
626 const std::string label =
627 BuildLabel(labels, cf_name, fd, level, block_type,
628 caller_map.first, /*block_id=*/0, block);
629 auto it = block.caller_access_sequence__number_timeline.find(
630 caller_map.first);
631 assert(it != block.caller_access_sequence__number_timeline.end());
632 UpdateFeatureVectors(it->second, caller_map.second, label,
633 &label_features, &label_predictions);
634 }
635 return;
636 }
637 const std::string label =
638 BuildLabel(labels, cf_name, fd, level, block_type,
639 TableReaderCaller::kMaxBlockCacheLookupCaller,
640 /*block_id=*/0, block);
641 UpdateFeatureVectors(block.access_sequence_number_timeline,
642 block.access_timeline, label, &label_features,
643 &label_predictions);
644 };
645 TraverseBlocks(block_callback, &labels);
646 WriteCorrelationFeaturesToFile(label_str, label_features, label_predictions,
647 max_number_of_values);
648 }
649
650 void BlockCacheTraceAnalyzer::WriteCorrelationFeaturesToFile(
651 const std::string& label,
652 const std::map<std::string, Features>& label_features,
653 const std::map<std::string, Predictions>& label_predictions,
654 uint32_t max_number_of_values) const {
655 for (auto const& label_feature_vectors : label_features) {
656 const Features& past = label_feature_vectors.second;
657 auto it = label_predictions.find(label_feature_vectors.first);
658 assert(it != label_predictions.end());
659 const Predictions& future = it->second;
660 const std::string output_path = output_dir_ + "/" + label + "_" +
661 label_feature_vectors.first + "_" +
662 kFileNameSuffixCorrelation;
663 std::ofstream out(output_path);
664 if (!out.is_open()) {
665 return;
666 }
667 std::string header(
668 "num_accesses_since_last_access,elapsed_time_since_last_access,num_"
669 "past_accesses,num_accesses_till_next_access,elapsed_time_till_next_"
670 "access");
671 out << header << std::endl;
672 std::vector<uint32_t> indexes;
673 for (uint32_t i = 0; i < past.num_accesses_since_last_access.size(); i++) {
674 indexes.push_back(i);
675 }
676 RandomShuffle(indexes.begin(), indexes.end());
677 for (uint32_t i = 0; i < max_number_of_values && i < indexes.size(); i++) {
678 uint32_t rand_index = indexes[i];
679 out << std::to_string(past.num_accesses_since_last_access[rand_index])
680 << ",";
681 out << std::to_string(past.elapsed_time_since_last_access[rand_index])
682 << ",";
683 out << std::to_string(past.num_past_accesses[rand_index]) << ",";
684 out << std::to_string(future.num_accesses_till_next_access[rand_index])
685 << ",";
686 out << std::to_string(future.elapsed_time_till_next_access[rand_index])
687 << std::endl;
688 }
689 out.close();
690 }
691 }
692
693 void BlockCacheTraceAnalyzer::WriteCorrelationFeaturesForGet(
694 uint32_t max_number_of_values) const {
695 std::string label = "GetKeyInfo";
696 std::map<std::string, Features> label_features;
697 std::map<std::string, Predictions> label_predictions;
698 for (auto const& get_info : get_key_info_map_) {
699 const GetKeyInfo& info = get_info.second;
700 UpdateFeatureVectors(info.access_sequence_number_timeline,
701 info.access_timeline, label, &label_features,
702 &label_predictions);
703 }
704 WriteCorrelationFeaturesToFile(label, label_features, label_predictions,
705 max_number_of_values);
706 }
707
708 std::set<std::string> BlockCacheTraceAnalyzer::ParseLabelStr(
709 const std::string& label_str) const {
710 std::stringstream ss(label_str);
711 std::set<std::string> labels;
712 // label_str is in the form of "label1_label2_label3", e.g., cf_bt.
713 while (ss.good()) {
714 std::string label_name;
715 getline(ss, label_name, '_');
716 if (kGroupbyLabels.find(label_name) == kGroupbyLabels.end()) {
717 // Unknown label name.
718 fprintf(stderr, "Unknown label name %s, label string %s\n",
719 label_name.c_str(), label_str.c_str());
720 return {};
721 }
722 labels.insert(label_name);
723 }
724 return labels;
725 }
726
727 std::string BlockCacheTraceAnalyzer::BuildLabel(
728 const std::set<std::string>& labels, const std::string& cf_name,
729 uint64_t fd, uint32_t level, TraceType type, TableReaderCaller caller,
730 uint64_t block_key, const BlockAccessInfo& block) const {
731 std::map<std::string, std::string> label_value_map;
732 label_value_map[kGroupbyAll] = kGroupbyAll;
733 label_value_map[kGroupbyLevel] = std::to_string(level);
734 label_value_map[kGroupbyCaller] = caller_to_string(caller);
735 label_value_map[kGroupbySSTFile] = std::to_string(fd);
736 label_value_map[kGroupbyBlockType] = block_type_to_string(type);
737 label_value_map[kGroupbyColumnFamily] = cf_name;
738 label_value_map[kGroupbyBlock] = std::to_string(block_key);
739 label_value_map[kGroupbyTable] = std::to_string(block.table_id);
740 // Concatenate the label values.
741 std::string label;
742 for (auto const& l : labels) {
743 label += label_value_map[l];
744 label += "-";
745 }
746 if (!label.empty()) {
747 label.pop_back();
748 }
749 return label;
750 }
751
752 void BlockCacheTraceAnalyzer::TraverseBlocks(
753 std::function<void(const std::string& /*cf_name*/, uint64_t /*fd*/,
754 uint32_t /*level*/, TraceType /*block_type*/,
755 const std::string& /*block_key*/,
756 uint64_t /*block_key_id*/,
757 const BlockAccessInfo& /*block_access_info*/)>
758 block_callback,
759 std::set<std::string>* labels) const {
760 for (auto const& cf_aggregates : cf_aggregates_map_) {
761 // Stats per column family.
762 const std::string& cf_name = cf_aggregates.first;
763 for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
764 // Stats per SST file.
765 const uint64_t fd = file_aggregates.first;
766 const uint32_t level = file_aggregates.second.level;
767 for (auto const& block_type_aggregates :
768 file_aggregates.second.block_type_aggregates_map) {
769 // Stats per block type.
770 const TraceType type = block_type_aggregates.first;
771 for (auto const& block_access_info :
772 block_type_aggregates.second.block_access_info_map) {
773 // Stats per block.
774 if (labels && block_access_info.second.table_id == 0 &&
775 labels->find(kGroupbyTable) != labels->end()) {
776 // We only know table id information for get requests.
777 return;
778 }
779 block_callback(cf_name, fd, level, type, block_access_info.first,
780 block_access_info.second.block_id,
781 block_access_info.second);
782 }
783 }
784 }
785 }
786 }
787
788 void BlockCacheTraceAnalyzer::WriteGetSpatialLocality(
789 const std::string& label_str,
790 const std::vector<uint64_t>& percent_buckets) const {
791 std::set<std::string> labels = ParseLabelStr(label_str);
792 std::map<std::string, std::map<uint64_t, uint64_t>> label_pnrefkeys_nblocks;
793 std::map<std::string, std::map<uint64_t, uint64_t>> label_pnrefs_nblocks;
794 std::map<std::string, std::map<uint64_t, uint64_t>> label_pndatasize_nblocks;
795 uint64_t nblocks = 0;
796 auto block_callback = [&](const std::string& cf_name, uint64_t fd,
797 uint32_t level, TraceType /*block_type*/,
798 const std::string& /*block_key*/,
799 uint64_t /*block_key_id*/,
800 const BlockAccessInfo& block) {
801 if (block.num_keys == 0) {
802 return;
803 }
804 uint64_t naccesses = 0;
805 for (auto const& key_access : block.key_num_access_map) {
806 for (auto const& caller_access : key_access.second) {
807 if (caller_access.first == TableReaderCaller::kUserGet) {
808 naccesses += caller_access.second;
809 }
810 }
811 }
812 const std::string label =
813 BuildLabel(labels, cf_name, fd, level, TraceType::kBlockTraceDataBlock,
814 TableReaderCaller::kUserGet, /*block_id=*/0, block);
815
816 const uint64_t percent_referenced_for_existing_keys =
817 static_cast<uint64_t>(std::max(
818 percent(block.key_num_access_map.size(), block.num_keys), 0.0));
819 const uint64_t percent_accesses_for_existing_keys =
820 static_cast<uint64_t>(std::max(
821 percent(block.num_referenced_key_exist_in_block, naccesses), 0.0));
822 const uint64_t percent_referenced_data_size = static_cast<uint64_t>(
823 std::max(percent(block.referenced_data_size, block.block_size), 0.0));
824 if (label_pnrefkeys_nblocks.find(label) == label_pnrefkeys_nblocks.end()) {
825 for (auto const& percent_bucket : percent_buckets) {
826 label_pnrefkeys_nblocks[label][percent_bucket] = 0;
827 label_pnrefs_nblocks[label][percent_bucket] = 0;
828 label_pndatasize_nblocks[label][percent_bucket] = 0;
829 }
830 }
831 label_pnrefkeys_nblocks[label]
832 .upper_bound(percent_referenced_for_existing_keys)
833 ->second += 1;
834 label_pnrefs_nblocks[label]
835 .upper_bound(percent_accesses_for_existing_keys)
836 ->second += 1;
837 label_pndatasize_nblocks[label]
838 .upper_bound(percent_referenced_data_size)
839 ->second += 1;
840 nblocks += 1;
841 };
842 TraverseBlocks(block_callback, &labels);
843 WriteStatsToFile(label_str, percent_buckets, kFileNameSuffixPercentRefKeys,
844 label_pnrefkeys_nblocks, nblocks);
845 WriteStatsToFile(label_str, percent_buckets,
846 kFileNameSuffixPercentAccessesOnRefKeys,
847 label_pnrefs_nblocks, nblocks);
848 WriteStatsToFile(label_str, percent_buckets,
849 kFileNameSuffixPercentDataSizeOnRefKeys,
850 label_pndatasize_nblocks, nblocks);
851 }
852
853 void BlockCacheTraceAnalyzer::WriteAccessTimeline(const std::string& label_str,
854 uint64_t time_unit,
855 bool user_access_only) const {
856 std::set<std::string> labels = ParseLabelStr(label_str);
857 uint64_t start_time = port::kMaxUint64;
858 uint64_t end_time = 0;
859 std::map<std::string, std::map<uint64_t, uint64_t>> label_access_timeline;
860 std::map<uint64_t, std::vector<std::string>> access_count_block_id_map;
861
862 auto block_callback = [&](const std::string& cf_name, uint64_t fd,
863 uint32_t level, TraceType type,
864 const std::string& /*block_key*/, uint64_t block_id,
865 const BlockAccessInfo& block) {
866 uint64_t naccesses = 0;
867 for (auto const& timeline : block.caller_num_accesses_timeline) {
868 const TableReaderCaller caller = timeline.first;
869 if (user_access_only && !is_user_access(caller)) {
870 continue;
871 }
872 const std::string label =
873 BuildLabel(labels, cf_name, fd, level, type, caller, block_id, block);
874 for (auto const& naccess : timeline.second) {
875 const uint64_t timestamp = naccess.first / time_unit;
876 const uint64_t num = naccess.second;
877 label_access_timeline[label][timestamp] += num;
878 start_time = std::min(start_time, timestamp);
879 end_time = std::max(end_time, timestamp);
880 naccesses += num;
881 }
882 }
883 if (naccesses > 0) {
884 access_count_block_id_map[naccesses].push_back(std::to_string(block_id));
885 }
886 };
887 TraverseBlocks(block_callback, &labels);
888
889 // We have label_access_timeline now. Write them into a file.
890 const std::string user_access_prefix =
891 user_access_only ? "user_access_only_" : "all_access_";
892 const std::string output_path = output_dir_ + "/" + user_access_prefix +
893 label_str + "_" + std::to_string(time_unit) +
894 "_" + kFileNameSuffixAccessTimeline;
895 std::ofstream out(output_path);
896 if (!out.is_open()) {
897 return;
898 }
899 std::string header("time");
900 if (labels.find("block") != labels.end()) {
901 for (uint64_t now = start_time; now <= end_time; now++) {
902 header += ",";
903 header += std::to_string(now);
904 }
905 out << header << std::endl;
906 // Write the most frequently accessed blocks first.
907 for (auto naccess_it = access_count_block_id_map.rbegin();
908 naccess_it != access_count_block_id_map.rend(); naccess_it++) {
909 for (auto& block_id_it : naccess_it->second) {
910 std::string row(block_id_it);
911 for (uint64_t now = start_time; now <= end_time; now++) {
912 auto it = label_access_timeline[block_id_it].find(now);
913 row += ",";
914 if (it != label_access_timeline[block_id_it].end()) {
915 row += std::to_string(it->second);
916 } else {
917 row += "0";
918 }
919 }
920 out << row << std::endl;
921 }
922 }
923 out.close();
924 return;
925 }
926 for (uint64_t now = start_time; now <= end_time; now++) {
927 header += ",";
928 header += std::to_string(now);
929 }
930 out << header << std::endl;
931 for (auto const& label : label_access_timeline) {
932 std::string row(label.first);
933 for (uint64_t now = start_time; now <= end_time; now++) {
934 auto it = label.second.find(now);
935 row += ",";
936 if (it != label.second.end()) {
937 row += std::to_string(it->second);
938 } else {
939 row += "0";
940 }
941 }
942 out << row << std::endl;
943 }
944
945 out.close();
946 }
947
948 void BlockCacheTraceAnalyzer::WriteReuseDistance(
949 const std::string& label_str,
950 const std::vector<uint64_t>& distance_buckets) const {
951 std::set<std::string> labels = ParseLabelStr(label_str);
952 std::map<std::string, std::map<uint64_t, uint64_t>> label_distance_num_reuses;
953 uint64_t total_num_reuses = 0;
954 auto block_callback = [&](const std::string& cf_name, uint64_t fd,
955 uint32_t level, TraceType type,
956 const std::string& /*block_key*/, uint64_t block_id,
957 const BlockAccessInfo& block) {
958 const std::string label = BuildLabel(
959 labels, cf_name, fd, level, type,
960 TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
961 if (label_distance_num_reuses.find(label) ==
962 label_distance_num_reuses.end()) {
963 // The first time we encounter this label.
964 for (auto const& distance_bucket : distance_buckets) {
965 label_distance_num_reuses[label][distance_bucket] = 0;
966 }
967 }
968 for (auto const& reuse_distance : block.reuse_distance_count) {
969 label_distance_num_reuses[label]
970 .upper_bound(reuse_distance.first)
971 ->second += reuse_distance.second;
972 total_num_reuses += reuse_distance.second;
973 }
974 };
975 TraverseBlocks(block_callback, &labels);
976 // We have label_naccesses and label_distance_num_reuses now. Write them into
977 // a file.
978 const std::string output_path =
979 output_dir_ + "/" + label_str + "_reuse_distance";
980 std::ofstream out(output_path);
981 if (!out.is_open()) {
982 return;
983 }
984 std::string header("bucket");
985 for (auto const& label_it : label_distance_num_reuses) {
986 header += ",";
987 header += label_it.first;
988 }
989 out << header << std::endl;
990 for (auto const& bucket : distance_buckets) {
991 std::string row(std::to_string(bucket));
992 for (auto const& label_it : label_distance_num_reuses) {
993 auto const& it = label_it.second.find(bucket);
994 assert(it != label_it.second.end());
995 row += ",";
996 row += std::to_string(percent(it->second, total_num_reuses));
997 }
998 out << row << std::endl;
999 }
1000 out.close();
1001 }
1002
1003 void BlockCacheTraceAnalyzer::UpdateReuseIntervalStats(
1004 const std::string& label, const std::vector<uint64_t>& time_buckets,
1005 const std::map<uint64_t, uint64_t> timeline,
1006 std::map<std::string, std::map<uint64_t, uint64_t>>* label_time_num_reuses,
1007 uint64_t* total_num_reuses) const {
1008 assert(label_time_num_reuses);
1009 assert(total_num_reuses);
1010 if (label_time_num_reuses->find(label) == label_time_num_reuses->end()) {
1011 // The first time we encounter this label.
1012 for (auto const& time_bucket : time_buckets) {
1013 (*label_time_num_reuses)[label][time_bucket] = 0;
1014 }
1015 }
1016 auto it = timeline.begin();
1017 uint64_t prev_timestamp = it->first;
1018 const uint64_t prev_num = it->second;
1019 it++;
1020 // Reused within one second.
1021 if (prev_num > 1) {
1022 (*label_time_num_reuses)[label].upper_bound(0)->second += prev_num - 1;
1023 *total_num_reuses += prev_num - 1;
1024 }
1025 while (it != timeline.end()) {
1026 const uint64_t timestamp = it->first;
1027 const uint64_t num = it->second;
1028 const uint64_t reuse_interval = timestamp - prev_timestamp;
1029 (*label_time_num_reuses)[label].upper_bound(reuse_interval)->second += 1;
1030 if (num > 1) {
1031 (*label_time_num_reuses)[label].upper_bound(0)->second += num - 1;
1032 }
1033 prev_timestamp = timestamp;
1034 *total_num_reuses += num;
1035 it++;
1036 }
1037 }
1038
1039 void BlockCacheTraceAnalyzer::WriteStatsToFile(
1040 const std::string& label_str, const std::vector<uint64_t>& time_buckets,
1041 const std::string& filename_suffix,
1042 const std::map<std::string, std::map<uint64_t, uint64_t>>& label_data,
1043 uint64_t ntotal) const {
1044 const std::string output_path =
1045 output_dir_ + "/" + label_str + "_" + filename_suffix;
1046 std::ofstream out(output_path);
1047 if (!out.is_open()) {
1048 return;
1049 }
1050 std::string header("bucket");
1051 for (auto const& label_it : label_data) {
1052 header += ",";
1053 header += label_it.first;
1054 }
1055 out << header << std::endl;
1056 for (auto const& bucket : time_buckets) {
1057 std::string row(std::to_string(bucket));
1058 for (auto const& label_it : label_data) {
1059 auto const& it = label_it.second.find(bucket);
1060 assert(it != label_it.second.end());
1061 row += ",";
1062 row += std::to_string(percent(it->second, ntotal));
1063 }
1064 out << row << std::endl;
1065 }
1066 out.close();
1067 }
1068
1069 void BlockCacheTraceAnalyzer::WriteReuseInterval(
1070 const std::string& label_str,
1071 const std::vector<uint64_t>& time_buckets) const {
1072 std::set<std::string> labels = ParseLabelStr(label_str);
1073 std::map<std::string, std::map<uint64_t, uint64_t>> label_time_num_reuses;
1074 std::map<std::string, std::map<uint64_t, uint64_t>> label_avg_reuse_nblocks;
1075 std::map<std::string, std::map<uint64_t, uint64_t>> label_avg_reuse_naccesses;
1076
1077 uint64_t total_num_reuses = 0;
1078 uint64_t total_nblocks = 0;
1079 uint64_t total_accesses = 0;
1080 auto block_callback = [&](const std::string& cf_name, uint64_t fd,
1081 uint32_t level, TraceType type,
1082 const std::string& /*block_key*/, uint64_t block_id,
1083 const BlockAccessInfo& block) {
1084 total_nblocks++;
1085 total_accesses += block.num_accesses;
1086 uint64_t avg_reuse_interval = 0;
1087 if (block.num_accesses > 1) {
1088 avg_reuse_interval = ((block.last_access_time - block.first_access_time) /
1089 kMicrosInSecond) /
1090 block.num_accesses;
1091 } else {
1092 avg_reuse_interval = port::kMaxUint64 - 1;
1093 }
1094 if (labels.find(kGroupbyCaller) != labels.end()) {
1095 for (auto const& timeline : block.caller_num_accesses_timeline) {
1096 const TableReaderCaller caller = timeline.first;
1097 const std::string label = BuildLabel(labels, cf_name, fd, level, type,
1098 caller, block_id, block);
1099 UpdateReuseIntervalStats(label, time_buckets, timeline.second,
1100 &label_time_num_reuses, &total_num_reuses);
1101 }
1102 return;
1103 }
1104 // Does not group by caller so we need to flatten the access timeline.
1105 const std::string label = BuildLabel(
1106 labels, cf_name, fd, level, type,
1107 TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
1108 std::map<uint64_t, uint64_t> timeline;
1109 for (auto const& caller_timeline : block.caller_num_accesses_timeline) {
1110 for (auto const& time_naccess : caller_timeline.second) {
1111 timeline[time_naccess.first] += time_naccess.second;
1112 }
1113 }
1114 UpdateReuseIntervalStats(label, time_buckets, timeline,
1115 &label_time_num_reuses, &total_num_reuses);
1116 if (label_avg_reuse_nblocks.find(label) == label_avg_reuse_nblocks.end()) {
1117 for (auto const& time_bucket : time_buckets) {
1118 label_avg_reuse_nblocks[label][time_bucket] = 0;
1119 label_avg_reuse_naccesses[label][time_bucket] = 0;
1120 }
1121 }
1122 label_avg_reuse_nblocks[label].upper_bound(avg_reuse_interval)->second += 1;
1123 label_avg_reuse_naccesses[label].upper_bound(avg_reuse_interval)->second +=
1124 block.num_accesses;
1125 };
1126 TraverseBlocks(block_callback, &labels);
1127
1128 // Write the stats into files.
1129 WriteStatsToFile(label_str, time_buckets, kFileNameSuffixReuseInterval,
1130 label_time_num_reuses, total_num_reuses);
1131 WriteStatsToFile(label_str, time_buckets, kFileNameSuffixAvgReuseInterval,
1132 label_avg_reuse_nblocks, total_nblocks);
1133 WriteStatsToFile(label_str, time_buckets,
1134 kFileNameSuffixAvgReuseIntervalNaccesses,
1135 label_avg_reuse_naccesses, total_accesses);
1136 }
1137
1138 void BlockCacheTraceAnalyzer::WriteReuseLifetime(
1139 const std::string& label_str,
1140 const std::vector<uint64_t>& time_buckets) const {
1141 std::set<std::string> labels = ParseLabelStr(label_str);
1142 std::map<std::string, std::map<uint64_t, uint64_t>> label_lifetime_nblocks;
1143 uint64_t total_nblocks = 0;
1144 auto block_callback = [&](const std::string& cf_name, uint64_t fd,
1145 uint32_t level, TraceType type,
1146 const std::string& /*block_key*/, uint64_t block_id,
1147 const BlockAccessInfo& block) {
1148 uint64_t lifetime = 0;
1149 if (block.num_accesses > 1) {
1150 lifetime =
1151 (block.last_access_time - block.first_access_time) / kMicrosInSecond;
1152 } else {
1153 lifetime = port::kMaxUint64 - 1;
1154 }
1155 const std::string label = BuildLabel(
1156 labels, cf_name, fd, level, type,
1157 TableReaderCaller::kMaxBlockCacheLookupCaller, block_id, block);
1158
1159 if (label_lifetime_nblocks.find(label) == label_lifetime_nblocks.end()) {
1160 // The first time we encounter this label.
1161 for (auto const& time_bucket : time_buckets) {
1162 label_lifetime_nblocks[label][time_bucket] = 0;
1163 }
1164 }
1165 label_lifetime_nblocks[label].upper_bound(lifetime)->second += 1;
1166 total_nblocks += 1;
1167 };
1168 TraverseBlocks(block_callback, &labels);
1169 WriteStatsToFile(label_str, time_buckets, kFileNameSuffixReuseLifetime,
1170 label_lifetime_nblocks, total_nblocks);
1171 }
1172
1173 void BlockCacheTraceAnalyzer::WriteBlockReuseTimeline(
1174 const uint64_t reuse_window, bool user_access_only, TraceType block_type) const {
1175 // A map from block key to an array of bools that states whether a block is
1176 // accessed in a time window.
1177 std::map<uint64_t, std::vector<bool>> block_accessed;
1178 const uint64_t trace_duration =
1179 trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
1180 const uint64_t reuse_vector_size = (trace_duration / reuse_window);
1181 if (reuse_vector_size < 2) {
1182 // The reuse window is less than 2. We cannot calculate the reused
1183 // percentage of blocks.
1184 return;
1185 }
1186 auto block_callback = [&](const std::string& /*cf_name*/, uint64_t /*fd*/,
1187 uint32_t /*level*/, TraceType /*type*/,
1188 const std::string& /*block_key*/, uint64_t block_id,
1189 const BlockAccessInfo& block) {
1190 if (block_accessed.find(block_id) == block_accessed.end()) {
1191 block_accessed[block_id].resize(reuse_vector_size);
1192 for (uint64_t i = 0; i < reuse_vector_size; i++) {
1193 block_accessed[block_id][i] = false;
1194 }
1195 }
1196 for (auto const& caller_num : block.caller_num_accesses_timeline) {
1197 const TableReaderCaller caller = caller_num.first;
1198 for (auto const& timeline : caller_num.second) {
1199 const uint64_t timestamp = timeline.first;
1200 const uint64_t elapsed_time =
1201 timestamp - trace_start_timestamp_in_seconds_;
1202 if (!user_access_only || is_user_access(caller)) {
1203 uint64_t index =
1204 std::min(elapsed_time / reuse_window, reuse_vector_size - 1);
1205 block_accessed[block_id][index] = true;
1206 }
1207 }
1208 }
1209 };
1210 TraverseBlocks(block_callback);
1211
1212 // A cell is the number of blocks accessed in a reuse window.
1213 std::unique_ptr<uint64_t[]> reuse_table(new uint64_t[reuse_vector_size * reuse_vector_size]);
1214 for (uint64_t start_time = 0; start_time < reuse_vector_size; start_time++) {
1215 // Initialize the reuse_table.
1216 for (uint64_t i = 0; i < reuse_vector_size; i++) {
1217 reuse_table[start_time * reuse_vector_size + i] = 0;
1218 }
1219 // Examine all blocks.
1220 for (auto const& block : block_accessed) {
1221 for (uint64_t i = start_time; i < reuse_vector_size; i++) {
1222 if (block.second[start_time] && block.second[i]) {
1223 // This block is accessed at start time and at the current time. We
1224 // increment reuse_table[start_time][i] since it is reused at the ith
1225 // window.
1226 reuse_table[start_time * reuse_vector_size + i]++;
1227 }
1228 }
1229 }
1230 }
1231 const std::string user_access_prefix =
1232 user_access_only ? "_user_access_only_" : "_all_access_";
1233 const std::string output_path =
1234 output_dir_ + "/" + block_type_to_string(block_type) +
1235 user_access_prefix + std::to_string(reuse_window) + "_" +
1236 kFileNameSuffixAccessReuseBlocksTimeline;
1237 std::ofstream out(output_path);
1238 if (!out.is_open()) {
1239 return;
1240 }
1241 std::string header("start_time");
1242 for (uint64_t start_time = 0; start_time < reuse_vector_size; start_time++) {
1243 header += ",";
1244 header += std::to_string(start_time);
1245 }
1246 out << header << std::endl;
1247 for (uint64_t start_time = 0; start_time < reuse_vector_size; start_time++) {
1248 std::string row(std::to_string(start_time * reuse_window));
1249 for (uint64_t j = 0; j < reuse_vector_size; j++) {
1250 row += ",";
1251 if (j < start_time) {
1252 row += "100.0";
1253 } else {
1254 row += std::to_string(percent(reuse_table[start_time * reuse_vector_size + j],
1255 reuse_table[start_time * reuse_vector_size + start_time]));
1256 }
1257 }
1258 out << row << std::endl;
1259 }
1260 out.close();
1261 }
1262
1263 std::string BlockCacheTraceAnalyzer::OutputPercentAccessStats(
1264 uint64_t total_accesses,
1265 const std::map<std::string, uint64_t>& cf_access_count) const {
1266 std::string row;
1267 for (auto const& cf_aggregates : cf_aggregates_map_) {
1268 const std::string& cf_name = cf_aggregates.first;
1269 const auto& naccess = cf_access_count.find(cf_name);
1270 row += ",";
1271 if (naccess != cf_access_count.end()) {
1272 row += std::to_string(percent(naccess->second, total_accesses));
1273 } else {
1274 row += "0";
1275 }
1276 }
1277 return row;
1278 }
1279
1280 void BlockCacheTraceAnalyzer::WritePercentAccessSummaryStats() const {
1281 std::map<TableReaderCaller, std::map<std::string, uint64_t>>
1282 caller_cf_accesses;
1283 uint64_t total_accesses = 0;
1284 auto block_callback =
1285 [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
1286 TraceType /*type*/, const std::string& /*block_key*/,
1287 uint64_t /*block_id*/, const BlockAccessInfo& block) {
1288 for (auto const& caller_num : block.caller_num_access_map) {
1289 const TableReaderCaller caller = caller_num.first;
1290 const uint64_t naccess = caller_num.second;
1291 caller_cf_accesses[caller][cf_name] += naccess;
1292 total_accesses += naccess;
1293 }
1294 };
1295 TraverseBlocks(block_callback);
1296
1297 const std::string output_path =
1298 output_dir_ + "/" + kFileNameSuffixPercentOfAccessSummary;
1299 std::ofstream out(output_path);
1300 if (!out.is_open()) {
1301 return;
1302 }
1303 std::string header("caller");
1304 for (auto const& cf_name : cf_aggregates_map_) {
1305 header += ",";
1306 header += cf_name.first;
1307 }
1308 out << header << std::endl;
1309 for (auto const& cf_naccess_it : caller_cf_accesses) {
1310 const TableReaderCaller caller = cf_naccess_it.first;
1311 std::string row;
1312 row += caller_to_string(caller);
1313 row += OutputPercentAccessStats(total_accesses, cf_naccess_it.second);
1314 out << row << std::endl;
1315 }
1316 out.close();
1317 }
1318
1319 void BlockCacheTraceAnalyzer::WriteDetailedPercentAccessSummaryStats(
1320 TableReaderCaller analyzing_caller) const {
1321 std::map<uint32_t, std::map<std::string, uint64_t>> level_cf_accesses;
1322 std::map<TraceType, std::map<std::string, uint64_t>> bt_cf_accesses;
1323 uint64_t total_accesses = 0;
1324 auto block_callback =
1325 [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t level,
1326 TraceType type, const std::string& /*block_key*/,
1327 uint64_t /*block_id*/, const BlockAccessInfo& block) {
1328 for (auto const& caller_num : block.caller_num_access_map) {
1329 const TableReaderCaller caller = caller_num.first;
1330 if (caller == analyzing_caller) {
1331 const uint64_t naccess = caller_num.second;
1332 level_cf_accesses[level][cf_name] += naccess;
1333 bt_cf_accesses[type][cf_name] += naccess;
1334 total_accesses += naccess;
1335 }
1336 }
1337 };
1338 TraverseBlocks(block_callback);
1339 {
1340 const std::string output_path =
1341 output_dir_ + "/" + caller_to_string(analyzing_caller) + "_level_" +
1342 kFileNameSuffixPercentOfAccessSummary;
1343 std::ofstream out(output_path);
1344 if (!out.is_open()) {
1345 return;
1346 }
1347 std::string header("level");
1348 for (auto const& cf_name : cf_aggregates_map_) {
1349 header += ",";
1350 header += cf_name.first;
1351 }
1352 out << header << std::endl;
1353 for (auto const& level_naccess_it : level_cf_accesses) {
1354 const uint32_t level = level_naccess_it.first;
1355 std::string row;
1356 row += std::to_string(level);
1357 row += OutputPercentAccessStats(total_accesses, level_naccess_it.second);
1358 out << row << std::endl;
1359 }
1360 out.close();
1361 }
1362 {
1363 const std::string output_path =
1364 output_dir_ + "/" + caller_to_string(analyzing_caller) + "_bt_" +
1365 kFileNameSuffixPercentOfAccessSummary;
1366 std::ofstream out(output_path);
1367 if (!out.is_open()) {
1368 return;
1369 }
1370 std::string header("bt");
1371 for (auto const& cf_name : cf_aggregates_map_) {
1372 header += ",";
1373 header += cf_name.first;
1374 }
1375 out << header << std::endl;
1376 for (auto const& bt_naccess_it : bt_cf_accesses) {
1377 const TraceType bt = bt_naccess_it.first;
1378 std::string row;
1379 row += block_type_to_string(bt);
1380 row += OutputPercentAccessStats(total_accesses, bt_naccess_it.second);
1381 out << row << std::endl;
1382 }
1383 out.close();
1384 }
1385 }
1386
1387 void BlockCacheTraceAnalyzer::WriteAccessCountSummaryStats(
1388 const std::vector<uint64_t>& access_count_buckets,
1389 bool user_access_only) const {
1390 // x: buckets.
1391 // y: # of accesses.
1392 std::map<std::string, std::map<uint64_t, uint64_t>> bt_access_nblocks;
1393 std::map<std::string, std::map<uint64_t, uint64_t>> cf_access_nblocks;
1394 uint64_t total_nblocks = 0;
1395 auto block_callback =
1396 [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
1397 TraceType type, const std::string& /*block_key*/,
1398 uint64_t /*block_id*/, const BlockAccessInfo& block) {
1399 const std::string type_str = block_type_to_string(type);
1400 if (cf_access_nblocks.find(cf_name) == cf_access_nblocks.end()) {
1401 // initialize.
1402 for (auto& access : access_count_buckets) {
1403 cf_access_nblocks[cf_name][access] = 0;
1404 }
1405 }
1406 if (bt_access_nblocks.find(type_str) == bt_access_nblocks.end()) {
1407 // initialize.
1408 for (auto& access : access_count_buckets) {
1409 bt_access_nblocks[type_str][access] = 0;
1410 }
1411 }
1412 uint64_t naccesses = 0;
1413 for (auto const& caller_access : block.caller_num_access_map) {
1414 if (!user_access_only || is_user_access(caller_access.first)) {
1415 naccesses += caller_access.second;
1416 }
1417 }
1418 if (naccesses == 0) {
1419 return;
1420 }
1421 total_nblocks += 1;
1422 bt_access_nblocks[type_str].upper_bound(naccesses)->second += 1;
1423 cf_access_nblocks[cf_name].upper_bound(naccesses)->second += 1;
1424 };
1425 TraverseBlocks(block_callback);
1426 const std::string user_access_prefix =
1427 user_access_only ? "user_access_only_" : "all_access_";
1428 WriteStatsToFile("cf", access_count_buckets,
1429 user_access_prefix + kFileNameSuffixAccessCountSummary,
1430 cf_access_nblocks, total_nblocks);
1431 WriteStatsToFile("bt", access_count_buckets,
1432 user_access_prefix + kFileNameSuffixAccessCountSummary,
1433 bt_access_nblocks, total_nblocks);
1434 }
1435
1436 BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer(
1437 const std::string& trace_file_path, const std::string& output_dir,
1438 const std::string& human_readable_trace_file_path,
1439 bool compute_reuse_distance, bool mrc_only,
1440 bool is_human_readable_trace_file,
1441 std::unique_ptr<BlockCacheTraceSimulator>&& cache_simulator)
1442 : env_(ROCKSDB_NAMESPACE::Env::Default()),
1443 trace_file_path_(trace_file_path),
1444 output_dir_(output_dir),
1445 human_readable_trace_file_path_(human_readable_trace_file_path),
1446 compute_reuse_distance_(compute_reuse_distance),
1447 mrc_only_(mrc_only),
1448 is_human_readable_trace_file_(is_human_readable_trace_file),
1449 cache_simulator_(std::move(cache_simulator)) {}
1450
1451 void BlockCacheTraceAnalyzer::ComputeReuseDistance(
1452 BlockAccessInfo* info) const {
1453 assert(info);
1454 if (info->num_accesses == 0) {
1455 return;
1456 }
1457 uint64_t reuse_distance = 0;
1458 for (auto const& block_key : info->unique_blocks_since_last_access) {
1459 auto const& it = block_info_map_.find(block_key);
1460 // This block must exist.
1461 assert(it != block_info_map_.end());
1462 reuse_distance += it->second->block_size;
1463 }
1464 info->reuse_distance_count[reuse_distance] += 1;
1465 // We clear this hash set since this is the second access on this block.
1466 info->unique_blocks_since_last_access.clear();
1467 }
1468
1469 Status BlockCacheTraceAnalyzer::RecordAccess(
1470 const BlockCacheTraceRecord& access) {
1471 ColumnFamilyAccessInfoAggregate& cf_aggr = cf_aggregates_map_[access.cf_name];
1472 SSTFileAccessInfoAggregate& file_aggr =
1473 cf_aggr.fd_aggregates_map[access.sst_fd_number];
1474 file_aggr.level = access.level;
1475 BlockTypeAccessInfoAggregate& block_type_aggr =
1476 file_aggr.block_type_aggregates_map[access.block_type];
1477 if (block_type_aggr.block_access_info_map.find(access.block_key) ==
1478 block_type_aggr.block_access_info_map.end()) {
1479 block_type_aggr.block_access_info_map[access.block_key].block_id =
1480 unique_block_id_;
1481 unique_block_id_++;
1482 }
1483 BlockAccessInfo& block_access_info =
1484 block_type_aggr.block_access_info_map[access.block_key];
1485 if (compute_reuse_distance_) {
1486 ComputeReuseDistance(&block_access_info);
1487 }
1488 block_access_info.AddAccess(access, access_sequence_number_);
1489 block_info_map_[access.block_key] = &block_access_info;
1490 uint64_t get_key_id = 0;
1491 if (access.caller == TableReaderCaller::kUserGet &&
1492 access.get_id != BlockCacheTraceHelper::kReservedGetId) {
1493 std::string user_key = ExtractUserKey(access.referenced_key).ToString();
1494 if (get_key_info_map_.find(user_key) == get_key_info_map_.end()) {
1495 get_key_info_map_[user_key].key_id = unique_get_key_id_;
1496 unique_get_key_id_++;
1497 }
1498 get_key_id = get_key_info_map_[user_key].key_id;
1499 get_key_info_map_[user_key].AddAccess(access, access_sequence_number_);
1500 }
1501
1502 if (compute_reuse_distance_) {
1503 // Add this block to all existing blocks.
1504 for (auto& cf_aggregates : cf_aggregates_map_) {
1505 for (auto& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
1506 for (auto& block_type_aggregates :
1507 file_aggregates.second.block_type_aggregates_map) {
1508 for (auto& existing_block :
1509 block_type_aggregates.second.block_access_info_map) {
1510 existing_block.second.unique_blocks_since_last_access.insert(
1511 access.block_key);
1512 }
1513 }
1514 }
1515 }
1516 }
1517 return human_readable_trace_writer_.WriteHumanReadableTraceRecord(
1518 access, block_access_info.block_id, get_key_id);
1519 }
1520
1521 Status BlockCacheTraceAnalyzer::Analyze() {
1522 std::unique_ptr<BlockCacheTraceReader> reader;
1523 Status s = Status::OK();
1524 if (is_human_readable_trace_file_) {
1525 reader.reset(new BlockCacheHumanReadableTraceReader(trace_file_path_));
1526 } else {
1527 std::unique_ptr<TraceReader> trace_reader;
1528 s = NewFileTraceReader(env_, EnvOptions(), trace_file_path_, &trace_reader);
1529 if (!s.ok()) {
1530 return s;
1531 }
1532 reader.reset(new BlockCacheTraceReader(std::move(trace_reader)));
1533 s = reader->ReadHeader(&header_);
1534 if (!s.ok()) {
1535 return s;
1536 }
1537 }
1538 if (!human_readable_trace_file_path_.empty()) {
1539 s = human_readable_trace_writer_.NewWritableFile(
1540 human_readable_trace_file_path_, env_);
1541 if (!s.ok()) {
1542 return s;
1543 }
1544 }
1545 uint64_t start = env_->NowMicros();
1546 uint64_t time_interval = 0;
1547 while (s.ok()) {
1548 BlockCacheTraceRecord access;
1549 s = reader->ReadAccess(&access);
1550 if (!s.ok()) {
1551 break;
1552 }
1553 if (!mrc_only_) {
1554 s = RecordAccess(access);
1555 if (!s.ok()) {
1556 break;
1557 }
1558 }
1559 if (trace_start_timestamp_in_seconds_ == 0) {
1560 trace_start_timestamp_in_seconds_ =
1561 access.access_timestamp / kMicrosInSecond;
1562 }
1563 trace_end_timestamp_in_seconds_ = access.access_timestamp / kMicrosInSecond;
1564 miss_ratio_stats_.UpdateMetrics(access.access_timestamp,
1565 is_user_access(access.caller),
1566 access.is_cache_hit == Boolean::kFalse);
1567 if (cache_simulator_) {
1568 cache_simulator_->Access(access);
1569 }
1570 access_sequence_number_++;
1571 uint64_t now = env_->NowMicros();
1572 uint64_t duration = (now - start) / kMicrosInSecond;
1573 if (duration > 10 * time_interval) {
1574 uint64_t trace_duration =
1575 trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
1576 fprintf(stdout,
1577 "Running for %" PRIu64 " seconds: Processed %" PRIu64
1578 " records/second. Trace duration %" PRIu64
1579 " seconds. Observed miss ratio %.2f\n",
1580 duration, duration > 0 ? access_sequence_number_ / duration : 0,
1581 trace_duration, miss_ratio_stats_.miss_ratio());
1582 time_interval++;
1583 }
1584 }
1585 uint64_t now = env_->NowMicros();
1586 uint64_t duration = (now - start) / kMicrosInSecond;
1587 uint64_t trace_duration =
1588 trace_end_timestamp_in_seconds_ - trace_start_timestamp_in_seconds_;
1589 fprintf(stdout,
1590 "Running for %" PRIu64 " seconds: Processed %" PRIu64
1591 " records/second. Trace duration %" PRIu64
1592 " seconds. Observed miss ratio %.2f\n",
1593 duration, duration > 0 ? access_sequence_number_ / duration : 0,
1594 trace_duration, miss_ratio_stats_.miss_ratio());
1595 return s;
1596 }
1597
1598 void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const {
1599 HistogramStat bs_stats;
1600 std::map<TraceType, HistogramStat> bt_stats_map;
1601 std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
1602 auto block_callback =
1603 [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
1604 TraceType type, const std::string& /*block_key*/,
1605 uint64_t /*block_id*/, const BlockAccessInfo& block) {
1606 if (block.block_size == 0) {
1607 // Block size may be 0 when 1) compaction observes a cache miss and
1608 // does not insert the missing block into the cache again. 2)
1609 // fetching filter blocks in SST files at the last level.
1610 return;
1611 }
1612 bs_stats.Add(block.block_size);
1613 bt_stats_map[type].Add(block.block_size);
1614 cf_bt_stats_map[cf_name][type].Add(block.block_size);
1615 };
1616 TraverseBlocks(block_callback);
1617 fprintf(stdout, "Block size stats: \n%s", bs_stats.ToString().c_str());
1618 for (auto const& bt_stats : bt_stats_map) {
1619 print_break_lines(/*num_break_lines=*/1);
1620 fprintf(stdout, "Block size stats for block type %s: \n%s",
1621 block_type_to_string(bt_stats.first).c_str(),
1622 bt_stats.second.ToString().c_str());
1623 }
1624 for (auto const& cf_bt_stats : cf_bt_stats_map) {
1625 const std::string& cf_name = cf_bt_stats.first;
1626 for (auto const& bt_stats : cf_bt_stats.second) {
1627 print_break_lines(/*num_break_lines=*/1);
1628 fprintf(stdout,
1629 "Block size stats for column family %s and block type %s: \n%s",
1630 cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
1631 bt_stats.second.ToString().c_str());
1632 }
1633 }
1634 }
1635
1636 void BlockCacheTraceAnalyzer::PrintAccessCountStats(bool user_access_only,
1637 uint32_t bottom_k,
1638 uint32_t top_k) const {
1639 HistogramStat access_stats;
1640 std::map<TraceType, HistogramStat> bt_stats_map;
1641 std::map<std::string, std::map<TraceType, HistogramStat>> cf_bt_stats_map;
1642 std::map<uint64_t, std::vector<std::string>> access_count_blocks;
1643 auto block_callback = [&](const std::string& cf_name, uint64_t /*fd*/,
1644 uint32_t /*level*/, TraceType type,
1645 const std::string& block_key, uint64_t /*block_id*/,
1646 const BlockAccessInfo& block) {
1647 uint64_t naccesses = 0;
1648 for (auto const& caller_access : block.caller_num_access_map) {
1649 if (!user_access_only || is_user_access(caller_access.first)) {
1650 naccesses += caller_access.second;
1651 }
1652 }
1653 if (naccesses == 0) {
1654 return;
1655 }
1656 if (type == TraceType::kBlockTraceDataBlock) {
1657 access_count_blocks[naccesses].push_back(block_key);
1658 }
1659 access_stats.Add(naccesses);
1660 bt_stats_map[type].Add(naccesses);
1661 cf_bt_stats_map[cf_name][type].Add(naccesses);
1662 };
1663 TraverseBlocks(block_callback);
1664 fprintf(stdout,
1665 "Block access count stats: The number of accesses per block. %s\n%s",
1666 user_access_only ? "User accesses only" : "All accesses",
1667 access_stats.ToString().c_str());
1668 uint32_t bottom_k_index = 0;
1669 for (auto naccess_it = access_count_blocks.begin();
1670 naccess_it != access_count_blocks.end(); naccess_it++) {
1671 bottom_k_index++;
1672 if (bottom_k_index >= bottom_k) {
1673 break;
1674 }
1675 std::map<TableReaderCaller, uint64_t> caller_naccesses;
1676 uint64_t naccesses = 0;
1677 for (auto const& block_id : naccess_it->second) {
1678 BlockAccessInfo* block = block_info_map_.find(block_id)->second;
1679 for (auto const& caller_access : block->caller_num_access_map) {
1680 if (!user_access_only || is_user_access(caller_access.first)) {
1681 caller_naccesses[caller_access.first] += caller_access.second;
1682 naccesses += caller_access.second;
1683 }
1684 }
1685 }
1686 std::string statistics("Caller:");
1687 for (auto const& caller_naccessess_it : caller_naccesses) {
1688 statistics += caller_to_string(caller_naccessess_it.first);
1689 statistics += ":";
1690 statistics +=
1691 std::to_string(percent(caller_naccessess_it.second, naccesses));
1692 statistics += ",";
1693 }
1694 fprintf(stdout,
1695 "Bottom %" PRIu32 " access count. Access count=%" PRIu64
1696 " nblocks=%" ROCKSDB_PRIszt " %s\n",
1697 bottom_k, naccess_it->first, naccess_it->second.size(),
1698 statistics.c_str());
1699 }
1700
1701 uint32_t top_k_index = 0;
1702 for (auto naccess_it = access_count_blocks.rbegin();
1703 naccess_it != access_count_blocks.rend(); naccess_it++) {
1704 top_k_index++;
1705 if (top_k_index >= top_k) {
1706 break;
1707 }
1708 for (auto const& block_id : naccess_it->second) {
1709 BlockAccessInfo* block = block_info_map_.find(block_id)->second;
1710 std::string statistics("Caller:");
1711 uint64_t naccesses = 0;
1712 for (auto const& caller_access : block->caller_num_access_map) {
1713 if (!user_access_only || is_user_access(caller_access.first)) {
1714 naccesses += caller_access.second;
1715 }
1716 }
1717 assert(naccesses > 0);
1718 for (auto const& caller_access : block->caller_num_access_map) {
1719 if (!user_access_only || is_user_access(caller_access.first)) {
1720 statistics += ",";
1721 statistics += caller_to_string(caller_access.first);
1722 statistics += ":";
1723 statistics +=
1724 std::to_string(percent(caller_access.second, naccesses));
1725 }
1726 }
1727 uint64_t ref_keys_accesses = 0;
1728 uint64_t ref_keys_does_not_exist_accesses = 0;
1729 for (auto const& ref_key_caller_access : block->key_num_access_map) {
1730 for (auto const& caller_access : ref_key_caller_access.second) {
1731 if (!user_access_only || is_user_access(caller_access.first)) {
1732 ref_keys_accesses += caller_access.second;
1733 }
1734 }
1735 }
1736 for (auto const& ref_key_caller_access :
1737 block->non_exist_key_num_access_map) {
1738 for (auto const& caller_access : ref_key_caller_access.second) {
1739 if (!user_access_only || is_user_access(caller_access.first)) {
1740 ref_keys_does_not_exist_accesses += caller_access.second;
1741 }
1742 }
1743 }
1744 statistics += ",nkeys=";
1745 statistics += std::to_string(block->num_keys);
1746 statistics += ",block_size=";
1747 statistics += std::to_string(block->block_size);
1748 statistics += ",num_ref_keys=";
1749 statistics += std::to_string(block->key_num_access_map.size());
1750 statistics += ",percent_access_ref_keys=";
1751 statistics += std::to_string(percent(ref_keys_accesses, naccesses));
1752 statistics += ",num_ref_keys_does_not_exist=";
1753 statistics += std::to_string(block->non_exist_key_num_access_map.size());
1754 statistics += ",percent_access_ref_keys_does_not_exist=";
1755 statistics +=
1756 std::to_string(percent(ref_keys_does_not_exist_accesses, naccesses));
1757 statistics += ",ref_data_size=";
1758 statistics += std::to_string(block->referenced_data_size);
1759 fprintf(stdout,
1760 "Top %" PRIu32 " access count blocks access_count=%" PRIu64
1761 " %s\n",
1762 top_k, naccess_it->first, statistics.c_str());
1763 }
1764 }
1765
1766 for (auto const& bt_stats : bt_stats_map) {
1767 print_break_lines(/*num_break_lines=*/1);
1768 fprintf(stdout, "Break down by block type %s: \n%s",
1769 block_type_to_string(bt_stats.first).c_str(),
1770 bt_stats.second.ToString().c_str());
1771 }
1772 for (auto const& cf_bt_stats : cf_bt_stats_map) {
1773 const std::string& cf_name = cf_bt_stats.first;
1774 for (auto const& bt_stats : cf_bt_stats.second) {
1775 print_break_lines(/*num_break_lines=*/1);
1776 fprintf(stdout,
1777 "Break down by column family %s and block type "
1778 "%s: \n%s",
1779 cf_name.c_str(), block_type_to_string(bt_stats.first).c_str(),
1780 bt_stats.second.ToString().c_str());
1781 }
1782 }
1783 }
1784
1785 void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
1786 HistogramStat existing_keys_stats;
1787 std::map<std::string, HistogramStat> cf_existing_keys_stats_map;
1788 HistogramStat non_existing_keys_stats;
1789 std::map<std::string, HistogramStat> cf_non_existing_keys_stats_map;
1790 HistogramStat block_access_stats;
1791 std::map<std::string, HistogramStat> cf_block_access_info;
1792 HistogramStat percent_referenced_bytes;
1793 std::map<std::string, HistogramStat> cf_percent_referenced_bytes;
1794 // Total number of accesses in a data block / number of keys in a data block.
1795 HistogramStat avg_naccesses_per_key_in_a_data_block;
1796 std::map<std::string, HistogramStat> cf_avg_naccesses_per_key_in_a_data_block;
1797 // The standard deviation on the number of accesses of a key in a data block.
1798 HistogramStat stdev_naccesses_per_key_in_a_data_block;
1799 std::map<std::string, HistogramStat>
1800 cf_stdev_naccesses_per_key_in_a_data_block;
1801 auto block_callback =
1802 [&](const std::string& cf_name, uint64_t /*fd*/, uint32_t /*level*/,
1803 TraceType /*type*/, const std::string& /*block_key*/,
1804 uint64_t /*block_id*/, const BlockAccessInfo& block) {
1805 if (block.num_keys == 0) {
1806 return;
1807 }
1808 // Use four decimal points.
1809 uint64_t percent_referenced_for_existing_keys = (uint64_t)(
1810 ((double)block.key_num_access_map.size() / (double)block.num_keys) *
1811 10000.0);
1812 uint64_t percent_referenced_for_non_existing_keys =
1813 (uint64_t)(((double)block.non_exist_key_num_access_map.size() /
1814 (double)block.num_keys) *
1815 10000.0);
1816 uint64_t percent_accesses_for_existing_keys =
1817 (uint64_t)(((double)block.num_referenced_key_exist_in_block /
1818 (double)block.num_accesses) *
1819 10000.0);
1820
1821 HistogramStat hist_naccess_per_key;
1822 for (auto const& key_access : block.key_num_access_map) {
1823 for (auto const& caller_access : key_access.second) {
1824 hist_naccess_per_key.Add(caller_access.second);
1825 }
1826 }
1827 uint64_t avg_accesses =
1828 static_cast<uint64_t>(hist_naccess_per_key.Average());
1829 uint64_t stdev_accesses =
1830 static_cast<uint64_t>(hist_naccess_per_key.StandardDeviation());
1831 avg_naccesses_per_key_in_a_data_block.Add(avg_accesses);
1832 cf_avg_naccesses_per_key_in_a_data_block[cf_name].Add(avg_accesses);
1833 stdev_naccesses_per_key_in_a_data_block.Add(stdev_accesses);
1834 cf_stdev_naccesses_per_key_in_a_data_block[cf_name].Add(stdev_accesses);
1835
1836 existing_keys_stats.Add(percent_referenced_for_existing_keys);
1837 cf_existing_keys_stats_map[cf_name].Add(
1838 percent_referenced_for_existing_keys);
1839 non_existing_keys_stats.Add(percent_referenced_for_non_existing_keys);
1840 cf_non_existing_keys_stats_map[cf_name].Add(
1841 percent_referenced_for_non_existing_keys);
1842 block_access_stats.Add(percent_accesses_for_existing_keys);
1843 cf_block_access_info[cf_name].Add(percent_accesses_for_existing_keys);
1844 };
1845 TraverseBlocks(block_callback);
1846 fprintf(stdout,
1847 "Histogram on the number of referenced keys existing in a block over "
1848 "the total number of keys in a block: \n%s",
1849 existing_keys_stats.ToString().c_str());
1850 for (auto const& cf_stats : cf_existing_keys_stats_map) {
1851 print_break_lines(/*num_break_lines=*/1);
1852 fprintf(stdout, "Break down by column family %s: \n%s",
1853 cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
1854 }
1855 print_break_lines(/*num_break_lines=*/1);
1856 fprintf(
1857 stdout,
1858 "Histogram on the number of referenced keys DO NOT exist in a block over "
1859 "the total number of keys in a block: \n%s",
1860 non_existing_keys_stats.ToString().c_str());
1861 for (auto const& cf_stats : cf_non_existing_keys_stats_map) {
1862 print_break_lines(/*num_break_lines=*/1);
1863 fprintf(stdout, "Break down by column family %s: \n%s",
1864 cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
1865 }
1866 print_break_lines(/*num_break_lines=*/1);
1867 fprintf(stdout,
1868 "Histogram on the number of accesses on keys exist in a block over "
1869 "the total number of accesses in a block: \n%s",
1870 block_access_stats.ToString().c_str());
1871 for (auto const& cf_stats : cf_block_access_info) {
1872 print_break_lines(/*num_break_lines=*/1);
1873 fprintf(stdout, "Break down by column family %s: \n%s",
1874 cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
1875 }
1876 print_break_lines(/*num_break_lines=*/1);
1877 fprintf(
1878 stdout,
1879 "Histogram on the average number of accesses per key in a block: \n%s",
1880 avg_naccesses_per_key_in_a_data_block.ToString().c_str());
1881 for (auto const& cf_stats : cf_avg_naccesses_per_key_in_a_data_block) {
1882 fprintf(stdout, "Break down by column family %s: \n%s",
1883 cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
1884 }
1885 print_break_lines(/*num_break_lines=*/1);
1886 fprintf(stdout,
1887 "Histogram on the standard deviation of the number of accesses per "
1888 "key in a block: \n%s",
1889 stdev_naccesses_per_key_in_a_data_block.ToString().c_str());
1890 for (auto const& cf_stats : cf_stdev_naccesses_per_key_in_a_data_block) {
1891 fprintf(stdout, "Break down by column family %s: \n%s",
1892 cf_stats.first.c_str(), cf_stats.second.ToString().c_str());
1893 }
1894 }
1895
1896 void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
1897 uint64_t total_num_files = 0;
1898 uint64_t total_num_blocks = 0;
1899 uint64_t total_num_accesses = 0;
1900 std::map<TraceType, uint64_t> bt_num_blocks_map;
1901 std::map<TableReaderCaller, uint64_t> caller_num_access_map;
1902 std::map<TableReaderCaller, std::map<TraceType, uint64_t>>
1903 caller_bt_num_access_map;
1904 std::map<TableReaderCaller, std::map<uint32_t, uint64_t>>
1905 caller_level_num_access_map;
1906 for (auto const& cf_aggregates : cf_aggregates_map_) {
1907 // Stats per column family.
1908 const std::string& cf_name = cf_aggregates.first;
1909 uint64_t cf_num_files = 0;
1910 uint64_t cf_num_blocks = 0;
1911 std::map<TraceType, uint64_t> cf_bt_blocks;
1912 uint64_t cf_num_accesses = 0;
1913 std::map<TableReaderCaller, uint64_t> cf_caller_num_accesses_map;
1914 std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
1915 cf_caller_level_num_accesses_map;
1916 std::map<TableReaderCaller, std::map<uint64_t, uint64_t>>
1917 cf_caller_file_num_accesses_map;
1918 std::map<TableReaderCaller, std::map<TraceType, uint64_t>>
1919 cf_caller_bt_num_accesses_map;
1920 total_num_files += cf_aggregates.second.fd_aggregates_map.size();
1921 for (auto const& file_aggregates : cf_aggregates.second.fd_aggregates_map) {
1922 // Stats per SST file.
1923 const uint64_t fd = file_aggregates.first;
1924 const uint32_t level = file_aggregates.second.level;
1925 cf_num_files++;
1926 for (auto const& block_type_aggregates :
1927 file_aggregates.second.block_type_aggregates_map) {
1928 // Stats per block type.
1929 const TraceType type = block_type_aggregates.first;
1930 cf_bt_blocks[type] +=
1931 block_type_aggregates.second.block_access_info_map.size();
1932 total_num_blocks +=
1933 block_type_aggregates.second.block_access_info_map.size();
1934 bt_num_blocks_map[type] +=
1935 block_type_aggregates.second.block_access_info_map.size();
1936 for (auto const& block_access_info :
1937 block_type_aggregates.second.block_access_info_map) {
1938 // Stats per block.
1939 cf_num_blocks++;
1940 for (auto const& stats :
1941 block_access_info.second.caller_num_access_map) {
1942 // Stats per caller.
1943 const TableReaderCaller caller = stats.first;
1944 const uint64_t num_accesses = stats.second;
1945 // Overall stats.
1946 total_num_accesses += num_accesses;
1947 caller_num_access_map[caller] += num_accesses;
1948 caller_bt_num_access_map[caller][type] += num_accesses;
1949 caller_level_num_access_map[caller][level] += num_accesses;
1950 // Column Family stats.
1951 cf_num_accesses += num_accesses;
1952 cf_caller_num_accesses_map[caller] += num_accesses;
1953 cf_caller_level_num_accesses_map[caller][level] += num_accesses;
1954 cf_caller_file_num_accesses_map[caller][fd] += num_accesses;
1955 cf_caller_bt_num_accesses_map[caller][type] += num_accesses;
1956 }
1957 }
1958 }
1959 }
1960
1961 // Print stats.
1962 print_break_lines(/*num_break_lines=*/3);
1963 fprintf(stdout, "Statistics for column family %s:\n", cf_name.c_str());
1964 fprintf(stdout,
1965 " Number of files:%" PRIu64 " Number of blocks: %" PRIu64
1966 " Number of accesses: %" PRIu64 "\n",
1967 cf_num_files, cf_num_blocks, cf_num_accesses);
1968 for (auto block_type : cf_bt_blocks) {
1969 fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
1970 block_type_to_string(block_type.first).c_str(), block_type.second,
1971 percent(block_type.second, cf_num_blocks));
1972 }
1973 for (auto caller : cf_caller_num_accesses_map) {
1974 const uint64_t naccesses = caller.second;
1975 print_break_lines(/*num_break_lines=*/1);
1976 fprintf(stdout,
1977 "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
1978 caller_to_string(caller.first).c_str(), naccesses,
1979 percent(naccesses, cf_num_accesses));
1980 fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
1981 caller_to_string(caller.first).c_str());
1982 for (auto naccess_level :
1983 cf_caller_level_num_accesses_map[caller.first]) {
1984 fprintf(stdout,
1985 "\t Level %" PRIu64 ": Number of accesses: %" PRIu64
1986 " Percent: %.2f\n",
1987 naccess_level.first, naccess_level.second,
1988 percent(naccess_level.second, naccesses));
1989 }
1990 fprintf(stdout, "Caller %s: Number of accesses per file break down\n",
1991 caller_to_string(caller.first).c_str());
1992 for (auto naccess_file : cf_caller_file_num_accesses_map[caller.first]) {
1993 fprintf(stdout,
1994 "\t File %" PRIu64 ": Number of accesses: %" PRIu64
1995 " Percent: %.2f\n",
1996 naccess_file.first, naccess_file.second,
1997 percent(naccess_file.second, naccesses));
1998 }
1999 fprintf(stdout,
2000 "Caller %s: Number of accesses per block type break down\n",
2001 caller_to_string(caller.first).c_str());
2002 for (auto naccess_type : cf_caller_bt_num_accesses_map[caller.first]) {
2003 fprintf(stdout,
2004 "\t Block Type %s: Number of accesses: %" PRIu64
2005 " Percent: %.2f\n",
2006 block_type_to_string(naccess_type.first).c_str(),
2007 naccess_type.second, percent(naccess_type.second, naccesses));
2008 }
2009 }
2010 }
2011 print_break_lines(/*num_break_lines=*/3);
2012 fprintf(stdout, "Overall statistics:\n");
2013 fprintf(stdout,
2014 "Number of files: %" PRIu64 " Number of blocks: %" PRIu64
2015 " Number of accesses: %" PRIu64 "\n",
2016 total_num_files, total_num_blocks, total_num_accesses);
2017 for (auto block_type : bt_num_blocks_map) {
2018 fprintf(stdout, "Number of %s blocks: %" PRIu64 " Percent: %.2f\n",
2019 block_type_to_string(block_type.first).c_str(), block_type.second,
2020 percent(block_type.second, total_num_blocks));
2021 }
2022 for (auto caller : caller_num_access_map) {
2023 print_break_lines(/*num_break_lines=*/1);
2024 uint64_t naccesses = caller.second;
2025 fprintf(stdout, "Caller %s: Number of accesses %" PRIu64 " Percent: %.2f\n",
2026 caller_to_string(caller.first).c_str(), naccesses,
2027 percent(naccesses, total_num_accesses));
2028 fprintf(stdout, "Caller %s: Number of accesses per level break down\n",
2029 caller_to_string(caller.first).c_str());
2030 for (auto naccess_level : caller_level_num_access_map[caller.first]) {
2031 fprintf(stdout,
2032 "\t Level %d: Number of accesses: %" PRIu64 " Percent: %.2f\n",
2033 naccess_level.first, naccess_level.second,
2034 percent(naccess_level.second, naccesses));
2035 }
2036 fprintf(stdout, "Caller %s: Number of accesses per block type break down\n",
2037 caller_to_string(caller.first).c_str());
2038 for (auto naccess_type : caller_bt_num_access_map[caller.first]) {
2039 fprintf(stdout,
2040 "\t Block Type %s: Number of accesses: %" PRIu64
2041 " Percent: %.2f\n",
2042 block_type_to_string(naccess_type.first).c_str(),
2043 naccess_type.second, percent(naccess_type.second, naccesses));
2044 }
2045 }
2046 }
2047
2048 std::vector<CacheConfiguration> parse_cache_config_file(
2049 const std::string& config_path) {
2050 std::ifstream file(config_path);
2051 if (!file.is_open()) {
2052 return {};
2053 }
2054 std::vector<CacheConfiguration> configs;
2055 std::string line;
2056 while (getline(file, line)) {
2057 CacheConfiguration cache_config;
2058 std::stringstream ss(line);
2059 std::vector<std::string> config_strs;
2060 while (ss.good()) {
2061 std::string substr;
2062 getline(ss, substr, ',');
2063 config_strs.push_back(substr);
2064 }
2065 // Sanity checks.
2066 if (config_strs.size() < 4) {
2067 fprintf(stderr, "Invalid cache simulator configuration %s\n",
2068 line.c_str());
2069 exit(1);
2070 }
2071 if (kSupportedCacheNames.find(" " + config_strs[0] + " ") ==
2072 std::string::npos) {
2073 fprintf(stderr, "Invalid cache name %s. Supported cache names are %s\n",
2074 line.c_str(), kSupportedCacheNames.c_str());
2075 exit(1);
2076 }
2077 cache_config.cache_name = config_strs[0];
2078 cache_config.num_shard_bits = ParseUint32(config_strs[1]);
2079 cache_config.ghost_cache_capacity = ParseUint64(config_strs[2]);
2080 for (uint32_t i = 3; i < config_strs.size(); i++) {
2081 uint64_t capacity = ParseUint64(config_strs[i]);
2082 if (capacity == 0) {
2083 fprintf(stderr, "Invalid cache capacity %s, %s\n",
2084 config_strs[i].c_str(), line.c_str());
2085 exit(1);
2086 }
2087 cache_config.cache_capacities.push_back(capacity);
2088 }
2089 configs.push_back(cache_config);
2090 }
2091 file.close();
2092 return configs;
2093 }
2094
2095 std::vector<uint64_t> parse_buckets(const std::string& bucket_str) {
2096 std::vector<uint64_t> buckets;
2097 std::stringstream ss(bucket_str);
2098 while (ss.good()) {
2099 std::string bucket;
2100 getline(ss, bucket, ',');
2101 buckets.push_back(ParseUint64(bucket));
2102 }
2103 buckets.push_back(port::kMaxUint64);
2104 return buckets;
2105 }
2106
2107 int block_cache_trace_analyzer_tool(int argc, char** argv) {
2108 ParseCommandLineFlags(&argc, &argv, true);
2109 if (FLAGS_block_cache_trace_path.empty()) {
2110 fprintf(stderr, "block cache trace path is empty\n");
2111 exit(1);
2112 }
2113 uint64_t warmup_seconds =
2114 FLAGS_cache_sim_warmup_seconds > 0 ? FLAGS_cache_sim_warmup_seconds : 0;
2115 uint32_t downsample_ratio = FLAGS_block_cache_trace_downsample_ratio > 0
2116 ? FLAGS_block_cache_trace_downsample_ratio
2117 : 0;
2118 std::vector<CacheConfiguration> cache_configs =
2119 parse_cache_config_file(FLAGS_block_cache_sim_config_path);
2120 std::unique_ptr<BlockCacheTraceSimulator> cache_simulator;
2121 if (!cache_configs.empty()) {
2122 cache_simulator.reset(new BlockCacheTraceSimulator(
2123 warmup_seconds, downsample_ratio, cache_configs));
2124 Status s = cache_simulator->InitializeCaches();
2125 if (!s.ok()) {
2126 fprintf(stderr, "Cannot initialize cache simulators %s\n",
2127 s.ToString().c_str());
2128 exit(1);
2129 }
2130 }
2131 BlockCacheTraceAnalyzer analyzer(
2132 FLAGS_block_cache_trace_path, FLAGS_block_cache_analysis_result_dir,
2133 FLAGS_human_readable_trace_file_path,
2134 !FLAGS_reuse_distance_labels.empty(), FLAGS_mrc_only,
2135 FLAGS_is_block_cache_human_readable_trace, std::move(cache_simulator));
2136 Status s = analyzer.Analyze();
2137 if (!s.IsIncomplete() && !s.ok()) {
2138 // Read all traces.
2139 fprintf(stderr, "Cannot process the trace %s\n", s.ToString().c_str());
2140 exit(1);
2141 }
2142 fprintf(stdout, "Status: %s\n", s.ToString().c_str());
2143 analyzer.WriteMissRatioCurves();
2144 analyzer.WriteMissRatioTimeline(1);
2145 analyzer.WriteMissRatioTimeline(kSecondInMinute);
2146 analyzer.WriteMissRatioTimeline(kSecondInHour);
2147 analyzer.WriteMissTimeline(1);
2148 analyzer.WriteMissTimeline(kSecondInMinute);
2149 analyzer.WriteMissTimeline(kSecondInHour);
2150
2151 if (FLAGS_mrc_only) {
2152 fprintf(stdout,
2153 "Skipping the analysis statistics since the user wants to compute "
2154 "MRC only");
2155 return 0;
2156 }
2157
2158 analyzer.PrintStatsSummary();
2159 if (FLAGS_print_access_count_stats) {
2160 print_break_lines(/*num_break_lines=*/3);
2161 analyzer.PrintAccessCountStats(
2162 /*user_access_only=*/false, FLAGS_analyze_bottom_k_access_count_blocks,
2163 FLAGS_analyze_top_k_access_count_blocks);
2164 print_break_lines(/*num_break_lines=*/3);
2165 analyzer.PrintAccessCountStats(
2166 /*user_access_only=*/true, FLAGS_analyze_bottom_k_access_count_blocks,
2167 FLAGS_analyze_top_k_access_count_blocks);
2168 }
2169 if (FLAGS_print_block_size_stats) {
2170 print_break_lines(/*num_break_lines=*/3);
2171 analyzer.PrintBlockSizeStats();
2172 }
2173 if (FLAGS_print_data_block_access_count_stats) {
2174 print_break_lines(/*num_break_lines=*/3);
2175 analyzer.PrintDataBlockAccessStats();
2176 }
2177 print_break_lines(/*num_break_lines=*/3);
2178
2179 if (!FLAGS_timeline_labels.empty()) {
2180 std::stringstream ss(FLAGS_timeline_labels);
2181 while (ss.good()) {
2182 std::string label;
2183 getline(ss, label, ',');
2184 if (label.find("block") != std::string::npos) {
2185 analyzer.WriteAccessTimeline(label, kSecondInMinute, true);
2186 analyzer.WriteAccessTimeline(label, kSecondInMinute, false);
2187 analyzer.WriteAccessTimeline(label, kSecondInHour, true);
2188 analyzer.WriteAccessTimeline(label, kSecondInHour, false);
2189 } else {
2190 analyzer.WriteAccessTimeline(label, kSecondInMinute, false);
2191 analyzer.WriteAccessTimeline(label, kSecondInHour, false);
2192 }
2193 }
2194 }
2195
2196 if (!FLAGS_analyze_callers.empty()) {
2197 analyzer.WritePercentAccessSummaryStats();
2198 std::stringstream ss(FLAGS_analyze_callers);
2199 while (ss.good()) {
2200 std::string caller;
2201 getline(ss, caller, ',');
2202 analyzer.WriteDetailedPercentAccessSummaryStats(string_to_caller(caller));
2203 }
2204 }
2205
2206 if (!FLAGS_access_count_buckets.empty()) {
2207 std::vector<uint64_t> buckets = parse_buckets(FLAGS_access_count_buckets);
2208 analyzer.WriteAccessCountSummaryStats(buckets, /*user_access_only=*/true);
2209 analyzer.WriteAccessCountSummaryStats(buckets, /*user_access_only=*/false);
2210 }
2211
2212 if (!FLAGS_reuse_distance_labels.empty() &&
2213 !FLAGS_reuse_distance_buckets.empty()) {
2214 std::vector<uint64_t> buckets = parse_buckets(FLAGS_reuse_distance_buckets);
2215 std::stringstream ss(FLAGS_reuse_distance_labels);
2216 while (ss.good()) {
2217 std::string label;
2218 getline(ss, label, ',');
2219 analyzer.WriteReuseDistance(label, buckets);
2220 }
2221 }
2222
2223 if (!FLAGS_reuse_interval_labels.empty() &&
2224 !FLAGS_reuse_interval_buckets.empty()) {
2225 std::vector<uint64_t> buckets = parse_buckets(FLAGS_reuse_interval_buckets);
2226 std::stringstream ss(FLAGS_reuse_interval_labels);
2227 while (ss.good()) {
2228 std::string label;
2229 getline(ss, label, ',');
2230 analyzer.WriteReuseInterval(label, buckets);
2231 }
2232 }
2233
2234 if (!FLAGS_reuse_lifetime_labels.empty() &&
2235 !FLAGS_reuse_lifetime_buckets.empty()) {
2236 std::vector<uint64_t> buckets = parse_buckets(FLAGS_reuse_lifetime_buckets);
2237 std::stringstream ss(FLAGS_reuse_lifetime_labels);
2238 while (ss.good()) {
2239 std::string label;
2240 getline(ss, label, ',');
2241 analyzer.WriteReuseLifetime(label, buckets);
2242 }
2243 }
2244
2245 if (FLAGS_analyze_blocks_reuse_k_reuse_window != 0) {
2246 std::vector<TraceType> block_types{TraceType::kBlockTraceIndexBlock,
2247 TraceType::kBlockTraceDataBlock,
2248 TraceType::kBlockTraceFilterBlock};
2249 for (auto block_type : block_types) {
2250 analyzer.WriteBlockReuseTimeline(
2251 FLAGS_analyze_blocks_reuse_k_reuse_window,
2252 /*user_access_only=*/true, block_type);
2253 analyzer.WriteBlockReuseTimeline(
2254 FLAGS_analyze_blocks_reuse_k_reuse_window,
2255 /*user_access_only=*/false, block_type);
2256 }
2257 }
2258
2259 if (!FLAGS_analyze_get_spatial_locality_labels.empty() &&
2260 !FLAGS_analyze_get_spatial_locality_buckets.empty()) {
2261 std::vector<uint64_t> buckets =
2262 parse_buckets(FLAGS_analyze_get_spatial_locality_buckets);
2263 std::stringstream ss(FLAGS_analyze_get_spatial_locality_labels);
2264 while (ss.good()) {
2265 std::string label;
2266 getline(ss, label, ',');
2267 analyzer.WriteGetSpatialLocality(label, buckets);
2268 }
2269 }
2270
2271 if (!FLAGS_analyze_correlation_coefficients_labels.empty()) {
2272 std::stringstream ss(FLAGS_analyze_correlation_coefficients_labels);
2273 while (ss.good()) {
2274 std::string label;
2275 getline(ss, label, ',');
2276 analyzer.WriteCorrelationFeatures(
2277 label, FLAGS_analyze_correlation_coefficients_max_number_of_values);
2278 }
2279 analyzer.WriteCorrelationFeaturesForGet(
2280 FLAGS_analyze_correlation_coefficients_max_number_of_values);
2281 }
2282
2283 if (!FLAGS_skew_labels.empty() && !FLAGS_skew_buckets.empty()) {
2284 std::vector<uint64_t> buckets = parse_buckets(FLAGS_skew_buckets);
2285 std::stringstream ss(FLAGS_skew_labels);
2286 while (ss.good()) {
2287 std::string label;
2288 getline(ss, label, ',');
2289 if (label.find("block") != std::string::npos) {
2290 analyzer.WriteSkewness(label, buckets,
2291 TraceType::kBlockTraceIndexBlock);
2292 analyzer.WriteSkewness(label, buckets,
2293 TraceType::kBlockTraceFilterBlock);
2294 analyzer.WriteSkewness(label, buckets, TraceType::kBlockTraceDataBlock);
2295 analyzer.WriteSkewness(label, buckets, TraceType::kTraceMax);
2296 } else {
2297 analyzer.WriteSkewness(label, buckets, TraceType::kTraceMax);
2298 }
2299 }
2300 }
2301 return 0;
2302 }
2303
2304 } // namespace ROCKSDB_NAMESPACE
2305
2306 #endif // GFLAGS
2307 #endif // ROCKSDB_LITE