1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
8 #include "tools/block_cache_analyzer/block_cache_trace_analyzer.h"
21 #include "monitoring/histogram.h"
22 #include "util/gflags_compat.h"
23 #include "util/string_util.h"
25 using GFLAGS_NAMESPACE::ParseCommandLineFlags
;
27 DEFINE_string(block_cache_trace_path
, "", "The trace file path.");
28 DEFINE_bool(is_block_cache_human_readable_trace
, false,
29 "Is the trace file provided for analysis generated by running "
30 "block_cache_trace_analyzer with "
31 "FLAGS_human_readable_trace_file_path is specified.");
33 block_cache_sim_config_path
, "",
34 "The config file path. One cache configuration per line. The format of a "
35 "cache configuration is "
36 "cache_name,num_shard_bits,ghost_capacity,cache_capacity_1,...,cache_"
37 "capacity_N. Supported cache names are lru, lru_priority, lru_hybrid, and "
38 "lru_hybrid_no_insert_on_row_miss. User may also add a prefix 'ghost_' to "
39 "a cache_name to add a ghost cache in front of the real cache. "
40 "ghost_capacity and cache_capacity can be xK, xM or xG where x is a "
42 DEFINE_int32(block_cache_trace_downsample_ratio
, 1,
43 "The trace collected accesses on one in every "
44 "block_cache_trace_downsample_ratio blocks. We scale "
45 "down the simulated cache size by this ratio.");
46 DEFINE_bool(print_block_size_stats
, false,
47 "Print block size distribution and the distribution break down by "
48 "block type and column family.");
49 DEFINE_bool(print_access_count_stats
, false,
50 "Print access count distribution and the distribution break down "
51 "by block type and column family.");
52 DEFINE_bool(print_data_block_access_count_stats
, false,
53 "Print data block accesses by user Get and Multi-Get.");
54 DEFINE_int32(cache_sim_warmup_seconds
, 0,
55 "The number of seconds to warmup simulated caches. The hit/miss "
56 "counters are reset after the warmup completes.");
57 DEFINE_int32(analyze_bottom_k_access_count_blocks
, 0,
58 "Print out detailed access information for blocks with their "
59 "number of accesses are the bottom k among all blocks.");
60 DEFINE_int32(analyze_top_k_access_count_blocks
, 0,
61 "Print out detailed access information for blocks with their "
62 "number of accesses are the top k among all blocks.");
63 DEFINE_string(block_cache_analysis_result_dir
, "",
64 "The directory that saves block cache analysis results.");
67 "Group the number of accesses per block per second using these labels. "
68 "Possible labels are a combination of the following: cf (column family), "
69 "sst, level, bt (block type), caller, block. For example, label \"cf_bt\" "
70 "means the number of access per second is grouped by unique pairs of "
71 "\"cf_bt\". A label \"all\" contains the aggregated number of accesses per "
72 "second across all possible labels.");
73 DEFINE_string(reuse_distance_labels
, "",
74 "Group the reuse distance of a block using these labels. Reuse "
75 "distance is defined as the cumulated size of unique blocks read "
76 "between two consecutive accesses on the same block.");
78 reuse_distance_buckets
, "",
79 "Group blocks by their reuse distances given these buckets. For "
80 "example, if 'reuse_distance_buckets' is '1K,1M,1G', we will "
81 "create four buckets. The first three buckets contain the number of "
82 "blocks with reuse distance less than 1KB, between 1K and 1M, between 1M "
83 "and 1G, respectively. The last bucket contains the number of blocks with "
84 "reuse distance larger than 1G. ");
86 reuse_interval_labels
, "",
87 "Group the reuse interval of a block using these labels. Reuse "
88 "interval is defined as the time between two consecutive accesses "
89 "on the same block.");
91 reuse_interval_buckets
, "",
92 "Group blocks by their reuse interval given these buckets. For "
93 "example, if 'reuse_distance_buckets' is '1,10,100', we will "
94 "create four buckets. The first three buckets contain the number of "
95 "blocks with reuse interval less than 1 second, between 1 second and 10 "
96 "seconds, between 10 seconds and 100 seconds, respectively. The last "
97 "bucket contains the number of blocks with reuse interval longer than 100 "
100 reuse_lifetime_labels
, "",
101 "Group the reuse lifetime of a block using these labels. Reuse "
102 "lifetime is defined as the time interval between the first access on a "
103 "block and the last access on the same block. For blocks that are only "
104 "accessed once, its lifetime is set to kMaxUint64.");
106 reuse_lifetime_buckets
, "",
107 "Group blocks by their reuse lifetime given these buckets. For "
108 "example, if 'reuse_lifetime_buckets' is '1,10,100', we will "
109 "create four buckets. The first three buckets contain the number of "
110 "blocks with reuse lifetime less than 1 second, between 1 second and 10 "
111 "seconds, between 10 seconds and 100 seconds, respectively. The last "
112 "bucket contains the number of blocks with reuse lifetime longer than 100 "
116 "The list of callers to perform a detailed analysis on. If speicfied, the "
117 "analyzer will output a detailed percentage of accesses for each caller "
118 "break down by column family, level, and block type. A list of available "
119 "callers are: Get, MultiGet, Iterator, ApproximateSize, VerifyChecksum, "
120 "SSTDumpTool, ExternalSSTIngestion, Repair, Prefetch, Compaction, "
121 "CompactionRefill, Flush, SSTFileReader, Uncategorized.");
122 DEFINE_string(access_count_buckets
, "",
123 "Group number of blocks by their access count given these "
124 "buckets. If specified, the analyzer will output a detailed "
125 "analysis on the number of blocks grouped by their access count "
126 "break down by block type and column family.");
127 DEFINE_int32(analyze_blocks_reuse_k_reuse_window
, 0,
128 "Analyze the percentage of blocks that are accessed in the "
129 "[k, 2*k] seconds are accessed again in the next [2*k, 3*k], "
130 "[3*k, 4*k],...,[k*(n-1), k*n] seconds. ");
131 DEFINE_string(analyze_get_spatial_locality_labels
, "",
132 "Group data blocks using these labels.");
133 DEFINE_string(analyze_get_spatial_locality_buckets
, "",
134 "Group data blocks by their statistics using these buckets.");
135 DEFINE_string(skew_labels
, "",
136 "Group the access count of a block using these labels.");
137 DEFINE_string(skew_buckets
, "", "Group the skew labels using these buckets.");
138 DEFINE_bool(mrc_only
, false,
139 "Evaluate alternative cache policies only. When this flag is true, "
140 "the analyzer does NOT maintain states of each block in memory for "
141 "analysis. It only feeds the accesses into the cache simulators.");
143 analyze_correlation_coefficients_labels
, "",
144 "Analyze the correlation coefficients of features such as number of past "
145 "accesses with regard to the number of accesses till the next access.");
146 DEFINE_int32(analyze_correlation_coefficients_max_number_of_values
, 1000000,
147 "The maximum number of values for a feature. If the number of "
148 "values for a feature is larger than this max, it randomly "
149 "selects 'max' number of values.");
150 DEFINE_string(human_readable_trace_file_path
, "",
151 "The filt path that saves human readable access records.");
153 namespace ROCKSDB_NAMESPACE
{
156 const std::string kMissRatioCurveFileName
= "mrc";
157 const std::string kGroupbyBlock
= "block";
158 const std::string kGroupbyTable
= "table";
159 const std::string kGroupbyColumnFamily
= "cf";
160 const std::string kGroupbySSTFile
= "sst";
161 const std::string kGroupbyBlockType
= "bt";
162 const std::string kGroupbyCaller
= "caller";
163 const std::string kGroupbyLevel
= "level";
164 const std::string kGroupbyAll
= "all";
165 const std::set
<std::string
> kGroupbyLabels
{
166 kGroupbyBlock
, kGroupbyColumnFamily
, kGroupbySSTFile
, kGroupbyLevel
,
167 kGroupbyBlockType
, kGroupbyCaller
, kGroupbyAll
};
168 const std::string kSupportedCacheNames
=
169 " lru ghost_lru lru_priority ghost_lru_priority lru_hybrid "
170 "ghost_lru_hybrid lru_hybrid_no_insert_on_row_miss "
171 "ghost_lru_hybrid_no_insert_on_row_miss ";
173 // The suffix for the generated csv files.
174 const std::string kFileNameSuffixMissRatioTimeline
= "miss_ratio_timeline";
175 const std::string kFileNameSuffixMissTimeline
= "miss_timeline";
176 const std::string kFileNameSuffixSkew
= "skewness";
177 const std::string kFileNameSuffixAccessTimeline
= "access_timeline";
178 const std::string kFileNameSuffixCorrelation
= "correlation_input";
179 const std::string kFileNameSuffixAvgReuseIntervalNaccesses
=
180 "avg_reuse_interval_naccesses";
181 const std::string kFileNameSuffixAvgReuseInterval
= "avg_reuse_interval";
182 const std::string kFileNameSuffixReuseInterval
= "access_reuse_interval";
183 const std::string kFileNameSuffixReuseLifetime
= "reuse_lifetime";
184 const std::string kFileNameSuffixAccessReuseBlocksTimeline
=
185 "reuse_blocks_timeline";
186 const std::string kFileNameSuffixPercentOfAccessSummary
=
187 "percentage_of_accesses_summary";
188 const std::string kFileNameSuffixPercentRefKeys
= "percent_ref_keys";
189 const std::string kFileNameSuffixPercentDataSizeOnRefKeys
=
190 "percent_data_size_on_ref_keys";
191 const std::string kFileNameSuffixPercentAccessesOnRefKeys
=
192 "percent_accesses_on_ref_keys";
193 const std::string kFileNameSuffixAccessCountSummary
= "access_count_summary";
195 std::string
block_type_to_string(TraceType type
) {
197 case kBlockTraceFilterBlock
:
199 case kBlockTraceDataBlock
:
201 case kBlockTraceIndexBlock
:
203 case kBlockTraceRangeDeletionBlock
:
204 return "RangeDeletion";
205 case kBlockTraceUncompressionDictBlock
:
206 return "UncompressionDict";
210 // This cannot happen.
211 return "InvalidType";
214 std::string
caller_to_string(TableReaderCaller caller
) {
222 case kUserApproximateSize
:
223 return "ApproximateSize";
224 case kUserVerifyChecksum
:
225 return "VerifyChecksum";
227 return "SSTDumpTool";
228 case kExternalSSTIngestion
:
229 return "ExternalSSTIngestion";
236 case kCompactionRefill
:
237 return "CompactionRefill";
241 return "SSTFileReader";
243 return "Uncategorized";
247 // This cannot happen.
248 return "InvalidCaller";
251 TableReaderCaller
string_to_caller(std::string caller_str
) {
252 if (caller_str
== "Get") {
254 } else if (caller_str
== "MultiGet") {
255 return kUserMultiGet
;
256 } else if (caller_str
== "Iterator") {
257 return kUserIterator
;
258 } else if (caller_str
== "ApproximateSize") {
259 return kUserApproximateSize
;
260 } else if (caller_str
== "VerifyChecksum") {
261 return kUserVerifyChecksum
;
262 } else if (caller_str
== "SSTDumpTool") {
264 } else if (caller_str
== "ExternalSSTIngestion") {
265 return kExternalSSTIngestion
;
266 } else if (caller_str
== "Repair") {
268 } else if (caller_str
== "Prefetch") {
270 } else if (caller_str
== "Compaction") {
272 } else if (caller_str
== "CompactionRefill") {
273 return kCompactionRefill
;
274 } else if (caller_str
== "Flush") {
276 } else if (caller_str
== "SSTFileReader") {
277 return kSSTFileReader
;
278 } else if (caller_str
== "Uncategorized") {
279 return kUncategorized
;
281 return TableReaderCaller::kMaxBlockCacheLookupCaller
;
284 bool is_user_access(TableReaderCaller caller
) {
289 case kUserApproximateSize
:
290 case kUserVerifyChecksum
:
298 const char kBreakLine
[] =
299 "***************************************************************\n";
301 void print_break_lines(uint32_t num_break_lines
) {
302 for (uint32_t i
= 0; i
< num_break_lines
; i
++) {
303 fprintf(stdout
, kBreakLine
);
307 double percent(uint64_t numerator
, uint64_t denomenator
) {
308 if (denomenator
== 0) {
311 return static_cast<double>(numerator
* 100.0 / denomenator
);
314 std::map
<uint64_t, uint64_t> adjust_time_unit(
315 const std::map
<uint64_t, uint64_t>& time_stats
, uint64_t time_unit
) {
316 if (time_unit
== 1) {
319 std::map
<uint64_t, uint64_t> adjusted_time_stats
;
320 for (auto const& time
: time_stats
) {
321 adjusted_time_stats
[static_cast<uint64_t>(time
.first
/ time_unit
)] +=
324 return adjusted_time_stats
;
328 void BlockCacheTraceAnalyzer::WriteMissRatioCurves() const {
329 if (!cache_simulator_
) {
332 if (output_dir_
.empty()) {
335 uint64_t trace_duration
=
336 trace_end_timestamp_in_seconds_
- trace_start_timestamp_in_seconds_
;
337 uint64_t total_accesses
= access_sequence_number_
;
338 const std::string output_miss_ratio_curve_path
=
339 output_dir_
+ "/" + std::to_string(trace_duration
) + "_" +
340 std::to_string(total_accesses
) + "_" + kMissRatioCurveFileName
;
341 std::ofstream
out(output_miss_ratio_curve_path
);
342 if (!out
.is_open()) {
346 const std::string header
=
347 "cache_name,num_shard_bits,ghost_capacity,capacity,miss_ratio,total_"
349 out
<< header
<< std::endl
;
350 for (auto const& config_caches
: cache_simulator_
->sim_caches()) {
351 const CacheConfiguration
& config
= config_caches
.first
;
352 for (uint32_t i
= 0; i
< config
.cache_capacities
.size(); i
++) {
354 config_caches
.second
[i
]->miss_ratio_stats().miss_ratio();
356 out
<< config
.cache_name
;
358 out
<< config
.num_shard_bits
;
360 out
<< config
.ghost_cache_capacity
;
362 out
<< config
.cache_capacities
[i
];
364 out
<< std::fixed
<< std::setprecision(4) << miss_ratio
;
366 out
<< config_caches
.second
[i
]->miss_ratio_stats().total_accesses();
373 void BlockCacheTraceAnalyzer::UpdateFeatureVectors(
374 const std::vector
<uint64_t>& access_sequence_number_timeline
,
375 const std::vector
<uint64_t>& access_timeline
, const std::string
& label
,
376 std::map
<std::string
, Features
>* label_features
,
377 std::map
<std::string
, Predictions
>* label_predictions
) const {
378 if (access_sequence_number_timeline
.empty() || access_timeline
.empty()) {
381 assert(access_timeline
.size() == access_sequence_number_timeline
.size());
382 uint64_t prev_access_sequence_number
= access_sequence_number_timeline
[0];
383 uint64_t prev_access_timestamp
= access_timeline
[0];
384 for (uint32_t i
= 0; i
< access_sequence_number_timeline
.size(); i
++) {
385 uint64_t num_accesses_since_last_access
=
386 access_sequence_number_timeline
[i
] - prev_access_sequence_number
;
387 uint64_t elapsed_time_since_last_access
=
388 access_timeline
[i
] - prev_access_timestamp
;
389 prev_access_sequence_number
= access_sequence_number_timeline
[i
];
390 prev_access_timestamp
= access_timeline
[i
];
391 if (i
< access_sequence_number_timeline
.size() - 1) {
392 (*label_features
)[label
].num_accesses_since_last_access
.push_back(
393 num_accesses_since_last_access
);
394 (*label_features
)[label
].num_past_accesses
.push_back(i
);
395 (*label_features
)[label
].elapsed_time_since_last_access
.push_back(
396 elapsed_time_since_last_access
);
399 (*label_predictions
)[label
].num_accesses_till_next_access
.push_back(
400 num_accesses_since_last_access
);
401 (*label_predictions
)[label
].elapsed_time_till_next_access
.push_back(
402 elapsed_time_since_last_access
);
407 void BlockCacheTraceAnalyzer::WriteMissRatioTimeline(uint64_t time_unit
) const {
408 if (!cache_simulator_
|| output_dir_
.empty()) {
411 std::map
<uint64_t, std::map
<std::string
, std::map
<uint64_t, double>>>
413 uint64_t start_time
= port::kMaxUint64
;
414 uint64_t end_time
= 0;
415 const std::map
<uint64_t, uint64_t>& trace_num_misses
=
416 adjust_time_unit(miss_ratio_stats_
.num_misses_timeline(), time_unit
);
417 const std::map
<uint64_t, uint64_t>& trace_num_accesses
=
418 adjust_time_unit(miss_ratio_stats_
.num_accesses_timeline(), time_unit
);
419 assert(trace_num_misses
.size() == trace_num_accesses
.size());
420 for (auto const& num_miss
: trace_num_misses
) {
421 uint64_t time
= num_miss
.first
;
422 start_time
= std::min(start_time
, time
);
423 end_time
= std::max(end_time
, time
);
424 uint64_t miss
= num_miss
.second
;
425 auto it
= trace_num_accesses
.find(time
);
426 assert(it
!= trace_num_accesses
.end());
427 uint64_t access
= it
->second
;
428 cs_name_timeline
[port::kMaxUint64
]["trace"][time
] = percent(miss
, access
);
430 for (auto const& config_caches
: cache_simulator_
->sim_caches()) {
431 const CacheConfiguration
& config
= config_caches
.first
;
432 std::string cache_label
= config
.cache_name
+ "-" +
433 std::to_string(config
.num_shard_bits
) + "-" +
434 std::to_string(config
.ghost_cache_capacity
);
435 for (uint32_t i
= 0; i
< config
.cache_capacities
.size(); i
++) {
436 const std::map
<uint64_t, uint64_t>& num_misses
= adjust_time_unit(
437 config_caches
.second
[i
]->miss_ratio_stats().num_misses_timeline(),
439 const std::map
<uint64_t, uint64_t>& num_accesses
= adjust_time_unit(
440 config_caches
.second
[i
]->miss_ratio_stats().num_accesses_timeline(),
442 assert(num_misses
.size() == num_accesses
.size());
443 for (auto const& num_miss
: num_misses
) {
444 uint64_t time
= num_miss
.first
;
445 start_time
= std::min(start_time
, time
);
446 end_time
= std::max(end_time
, time
);
447 uint64_t miss
= num_miss
.second
;
448 auto it
= num_accesses
.find(time
);
449 assert(it
!= num_accesses
.end());
450 uint64_t access
= it
->second
;
451 cs_name_timeline
[config
.cache_capacities
[i
]][cache_label
][time
] =
452 percent(miss
, access
);
456 for (auto const& it
: cs_name_timeline
) {
457 const std::string output_miss_ratio_timeline_path
=
458 output_dir_
+ "/" + std::to_string(it
.first
) + "_" +
459 std::to_string(time_unit
) + "_" + kFileNameSuffixMissRatioTimeline
;
460 std::ofstream
out(output_miss_ratio_timeline_path
);
461 if (!out
.is_open()) {
464 std::string
header("time");
465 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
467 header
+= std::to_string(now
);
469 out
<< header
<< std::endl
;
470 for (auto const& label
: it
.second
) {
471 std::string
row(label
.first
);
472 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
473 auto misses
= label
.second
.find(now
);
475 if (misses
!= label
.second
.end()) {
476 row
+= std::to_string(misses
->second
);
481 out
<< row
<< std::endl
;
487 void BlockCacheTraceAnalyzer::WriteMissTimeline(uint64_t time_unit
) const {
488 if (!cache_simulator_
|| output_dir_
.empty()) {
491 std::map
<uint64_t, std::map
<std::string
, std::map
<uint64_t, uint64_t>>>
493 uint64_t start_time
= port::kMaxUint64
;
494 uint64_t end_time
= 0;
495 const std::map
<uint64_t, uint64_t>& trace_num_misses
=
496 adjust_time_unit(miss_ratio_stats_
.num_misses_timeline(), time_unit
);
497 for (auto const& num_miss
: trace_num_misses
) {
498 uint64_t time
= num_miss
.first
;
499 start_time
= std::min(start_time
, time
);
500 end_time
= std::max(end_time
, time
);
501 uint64_t miss
= num_miss
.second
;
502 cs_name_timeline
[port::kMaxUint64
]["trace"][time
] = miss
;
504 for (auto const& config_caches
: cache_simulator_
->sim_caches()) {
505 const CacheConfiguration
& config
= config_caches
.first
;
506 std::string cache_label
= config
.cache_name
+ "-" +
507 std::to_string(config
.num_shard_bits
) + "-" +
508 std::to_string(config
.ghost_cache_capacity
);
509 for (uint32_t i
= 0; i
< config
.cache_capacities
.size(); i
++) {
510 const std::map
<uint64_t, uint64_t>& num_misses
= adjust_time_unit(
511 config_caches
.second
[i
]->miss_ratio_stats().num_misses_timeline(),
513 for (auto const& num_miss
: num_misses
) {
514 uint64_t time
= num_miss
.first
;
515 start_time
= std::min(start_time
, time
);
516 end_time
= std::max(end_time
, time
);
517 uint64_t miss
= num_miss
.second
;
518 cs_name_timeline
[config
.cache_capacities
[i
]][cache_label
][time
] = miss
;
522 for (auto const& it
: cs_name_timeline
) {
523 const std::string output_miss_ratio_timeline_path
=
524 output_dir_
+ "/" + std::to_string(it
.first
) + "_" +
525 std::to_string(time_unit
) + "_" + kFileNameSuffixMissTimeline
;
526 std::ofstream
out(output_miss_ratio_timeline_path
);
527 if (!out
.is_open()) {
530 std::string
header("time");
531 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
533 header
+= std::to_string(now
);
535 out
<< header
<< std::endl
;
536 for (auto const& label
: it
.second
) {
537 std::string
row(label
.first
);
538 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
539 auto misses
= label
.second
.find(now
);
541 if (misses
!= label
.second
.end()) {
542 row
+= std::to_string(misses
->second
);
547 out
<< row
<< std::endl
;
553 void BlockCacheTraceAnalyzer::WriteSkewness(
554 const std::string
& label_str
, const std::vector
<uint64_t>& percent_buckets
,
555 TraceType target_block_type
) const {
556 std::set
<std::string
> labels
= ParseLabelStr(label_str
);
557 std::map
<std::string
, uint64_t> label_naccesses
;
558 uint64_t total_naccesses
= 0;
559 auto block_callback
= [&](const std::string
& cf_name
, uint64_t fd
,
560 uint32_t level
, TraceType type
,
561 const std::string
& /*block_key*/, uint64_t block_id
,
562 const BlockAccessInfo
& block
) {
563 if (target_block_type
!= TraceType::kTraceMax
&&
564 target_block_type
!= type
) {
567 const std::string label
= BuildLabel(
568 labels
, cf_name
, fd
, level
, type
,
569 TableReaderCaller::kMaxBlockCacheLookupCaller
, block_id
, block
);
570 label_naccesses
[label
] += block
.num_accesses
;
571 total_naccesses
+= block
.num_accesses
;
573 TraverseBlocks(block_callback
, &labels
);
574 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_bucket_naccesses
;
575 std::vector
<std::pair
<std::string
, uint64_t>> pairs
;
576 for (auto const& itr
: label_naccesses
) {
577 pairs
.push_back(itr
);
579 // Sort in descending order.
580 sort(pairs
.begin(), pairs
.end(),
581 [](const std::pair
<std::string
, uint64_t>& a
,
582 const std::pair
<std::string
, uint64_t>& b
) {
583 return b
.second
< a
.second
;
586 size_t prev_start_index
= 0;
587 for (auto const& percent
: percent_buckets
) {
588 label_bucket_naccesses
[label_str
][percent
] = 0;
589 size_t end_index
= 0;
590 if (percent
== port::kMaxUint64
) {
591 end_index
= label_naccesses
.size();
593 end_index
= percent
* label_naccesses
.size() / 100;
595 for (size_t i
= prev_start_index
; i
< end_index
; i
++) {
596 label_bucket_naccesses
[label_str
][percent
] += pairs
[i
].second
;
598 prev_start_index
= end_index
;
600 std::string filename_suffix
;
601 if (target_block_type
!= TraceType::kTraceMax
) {
602 filename_suffix
= block_type_to_string(target_block_type
);
603 filename_suffix
+= "_";
605 filename_suffix
+= kFileNameSuffixSkew
;
606 WriteStatsToFile(label_str
, percent_buckets
, filename_suffix
,
607 label_bucket_naccesses
, total_naccesses
);
610 void BlockCacheTraceAnalyzer::WriteCorrelationFeatures(
611 const std::string
& label_str
, uint32_t max_number_of_values
) const {
612 std::set
<std::string
> labels
= ParseLabelStr(label_str
);
613 std::map
<std::string
, Features
> label_features
;
614 std::map
<std::string
, Predictions
> label_predictions
;
615 auto block_callback
=
616 [&](const std::string
& cf_name
, uint64_t fd
, uint32_t level
,
617 TraceType block_type
, const std::string
& /*block_key*/,
618 uint64_t /*block_key_id*/, const BlockAccessInfo
& block
) {
619 if (block
.table_id
== 0 && labels
.find(kGroupbyTable
) != labels
.end()) {
620 // We only know table id information for get requests.
623 if (labels
.find(kGroupbyCaller
) != labels
.end()) {
625 for (auto const& caller_map
: block
.caller_access_timeline
) {
626 const std::string label
=
627 BuildLabel(labels
, cf_name
, fd
, level
, block_type
,
628 caller_map
.first
, /*block_id=*/0, block
);
629 auto it
= block
.caller_access_sequence__number_timeline
.find(
631 assert(it
!= block
.caller_access_sequence__number_timeline
.end());
632 UpdateFeatureVectors(it
->second
, caller_map
.second
, label
,
633 &label_features
, &label_predictions
);
637 const std::string label
=
638 BuildLabel(labels
, cf_name
, fd
, level
, block_type
,
639 TableReaderCaller::kMaxBlockCacheLookupCaller
,
640 /*block_id=*/0, block
);
641 UpdateFeatureVectors(block
.access_sequence_number_timeline
,
642 block
.access_timeline
, label
, &label_features
,
645 TraverseBlocks(block_callback
, &labels
);
646 WriteCorrelationFeaturesToFile(label_str
, label_features
, label_predictions
,
647 max_number_of_values
);
650 void BlockCacheTraceAnalyzer::WriteCorrelationFeaturesToFile(
651 const std::string
& label
,
652 const std::map
<std::string
, Features
>& label_features
,
653 const std::map
<std::string
, Predictions
>& label_predictions
,
654 uint32_t max_number_of_values
) const {
655 for (auto const& label_feature_vectors
: label_features
) {
656 const Features
& past
= label_feature_vectors
.second
;
657 auto it
= label_predictions
.find(label_feature_vectors
.first
);
658 assert(it
!= label_predictions
.end());
659 const Predictions
& future
= it
->second
;
660 const std::string output_path
= output_dir_
+ "/" + label
+ "_" +
661 label_feature_vectors
.first
+ "_" +
662 kFileNameSuffixCorrelation
;
663 std::ofstream
out(output_path
);
664 if (!out
.is_open()) {
668 "num_accesses_since_last_access,elapsed_time_since_last_access,num_"
669 "past_accesses,num_accesses_till_next_access,elapsed_time_till_next_"
671 out
<< header
<< std::endl
;
672 std::vector
<uint32_t> indexes
;
673 for (uint32_t i
= 0; i
< past
.num_accesses_since_last_access
.size(); i
++) {
674 indexes
.push_back(i
);
676 RandomShuffle(indexes
.begin(), indexes
.end());
677 for (uint32_t i
= 0; i
< max_number_of_values
&& i
< indexes
.size(); i
++) {
678 uint32_t rand_index
= indexes
[i
];
679 out
<< std::to_string(past
.num_accesses_since_last_access
[rand_index
])
681 out
<< std::to_string(past
.elapsed_time_since_last_access
[rand_index
])
683 out
<< std::to_string(past
.num_past_accesses
[rand_index
]) << ",";
684 out
<< std::to_string(future
.num_accesses_till_next_access
[rand_index
])
686 out
<< std::to_string(future
.elapsed_time_till_next_access
[rand_index
])
693 void BlockCacheTraceAnalyzer::WriteCorrelationFeaturesForGet(
694 uint32_t max_number_of_values
) const {
695 std::string label
= "GetKeyInfo";
696 std::map
<std::string
, Features
> label_features
;
697 std::map
<std::string
, Predictions
> label_predictions
;
698 for (auto const& get_info
: get_key_info_map_
) {
699 const GetKeyInfo
& info
= get_info
.second
;
700 UpdateFeatureVectors(info
.access_sequence_number_timeline
,
701 info
.access_timeline
, label
, &label_features
,
704 WriteCorrelationFeaturesToFile(label
, label_features
, label_predictions
,
705 max_number_of_values
);
708 std::set
<std::string
> BlockCacheTraceAnalyzer::ParseLabelStr(
709 const std::string
& label_str
) const {
710 std::stringstream
ss(label_str
);
711 std::set
<std::string
> labels
;
712 // label_str is in the form of "label1_label2_label3", e.g., cf_bt.
714 std::string label_name
;
715 getline(ss
, label_name
, '_');
716 if (kGroupbyLabels
.find(label_name
) == kGroupbyLabels
.end()) {
717 // Unknown label name.
718 fprintf(stderr
, "Unknown label name %s, label string %s\n",
719 label_name
.c_str(), label_str
.c_str());
722 labels
.insert(label_name
);
727 std::string
BlockCacheTraceAnalyzer::BuildLabel(
728 const std::set
<std::string
>& labels
, const std::string
& cf_name
,
729 uint64_t fd
, uint32_t level
, TraceType type
, TableReaderCaller caller
,
730 uint64_t block_key
, const BlockAccessInfo
& block
) const {
731 std::map
<std::string
, std::string
> label_value_map
;
732 label_value_map
[kGroupbyAll
] = kGroupbyAll
;
733 label_value_map
[kGroupbyLevel
] = std::to_string(level
);
734 label_value_map
[kGroupbyCaller
] = caller_to_string(caller
);
735 label_value_map
[kGroupbySSTFile
] = std::to_string(fd
);
736 label_value_map
[kGroupbyBlockType
] = block_type_to_string(type
);
737 label_value_map
[kGroupbyColumnFamily
] = cf_name
;
738 label_value_map
[kGroupbyBlock
] = std::to_string(block_key
);
739 label_value_map
[kGroupbyTable
] = std::to_string(block
.table_id
);
740 // Concatenate the label values.
742 for (auto const& l
: labels
) {
743 label
+= label_value_map
[l
];
746 if (!label
.empty()) {
752 void BlockCacheTraceAnalyzer::TraverseBlocks(
753 std::function
<void(const std::string
& /*cf_name*/, uint64_t /*fd*/,
754 uint32_t /*level*/, TraceType
/*block_type*/,
755 const std::string
& /*block_key*/,
756 uint64_t /*block_key_id*/,
757 const BlockAccessInfo
& /*block_access_info*/)>
759 std::set
<std::string
>* labels
) const {
760 for (auto const& cf_aggregates
: cf_aggregates_map_
) {
761 // Stats per column family.
762 const std::string
& cf_name
= cf_aggregates
.first
;
763 for (auto const& file_aggregates
: cf_aggregates
.second
.fd_aggregates_map
) {
764 // Stats per SST file.
765 const uint64_t fd
= file_aggregates
.first
;
766 const uint32_t level
= file_aggregates
.second
.level
;
767 for (auto const& block_type_aggregates
:
768 file_aggregates
.second
.block_type_aggregates_map
) {
769 // Stats per block type.
770 const TraceType type
= block_type_aggregates
.first
;
771 for (auto const& block_access_info
:
772 block_type_aggregates
.second
.block_access_info_map
) {
774 if (labels
&& block_access_info
.second
.table_id
== 0 &&
775 labels
->find(kGroupbyTable
) != labels
->end()) {
776 // We only know table id information for get requests.
779 block_callback(cf_name
, fd
, level
, type
, block_access_info
.first
,
780 block_access_info
.second
.block_id
,
781 block_access_info
.second
);
788 void BlockCacheTraceAnalyzer::WriteGetSpatialLocality(
789 const std::string
& label_str
,
790 const std::vector
<uint64_t>& percent_buckets
) const {
791 std::set
<std::string
> labels
= ParseLabelStr(label_str
);
792 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_pnrefkeys_nblocks
;
793 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_pnrefs_nblocks
;
794 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_pndatasize_nblocks
;
795 uint64_t nblocks
= 0;
796 auto block_callback
= [&](const std::string
& cf_name
, uint64_t fd
,
797 uint32_t level
, TraceType
/*block_type*/,
798 const std::string
& /*block_key*/,
799 uint64_t /*block_key_id*/,
800 const BlockAccessInfo
& block
) {
801 if (block
.num_keys
== 0) {
804 uint64_t naccesses
= 0;
805 for (auto const& key_access
: block
.key_num_access_map
) {
806 for (auto const& caller_access
: key_access
.second
) {
807 if (caller_access
.first
== TableReaderCaller::kUserGet
) {
808 naccesses
+= caller_access
.second
;
812 const std::string label
=
813 BuildLabel(labels
, cf_name
, fd
, level
, TraceType::kBlockTraceDataBlock
,
814 TableReaderCaller::kUserGet
, /*block_id=*/0, block
);
816 const uint64_t percent_referenced_for_existing_keys
=
817 static_cast<uint64_t>(std::max(
818 percent(block
.key_num_access_map
.size(), block
.num_keys
), 0.0));
819 const uint64_t percent_accesses_for_existing_keys
=
820 static_cast<uint64_t>(std::max(
821 percent(block
.num_referenced_key_exist_in_block
, naccesses
), 0.0));
822 const uint64_t percent_referenced_data_size
= static_cast<uint64_t>(
823 std::max(percent(block
.referenced_data_size
, block
.block_size
), 0.0));
824 if (label_pnrefkeys_nblocks
.find(label
) == label_pnrefkeys_nblocks
.end()) {
825 for (auto const& percent_bucket
: percent_buckets
) {
826 label_pnrefkeys_nblocks
[label
][percent_bucket
] = 0;
827 label_pnrefs_nblocks
[label
][percent_bucket
] = 0;
828 label_pndatasize_nblocks
[label
][percent_bucket
] = 0;
831 label_pnrefkeys_nblocks
[label
]
832 .upper_bound(percent_referenced_for_existing_keys
)
834 label_pnrefs_nblocks
[label
]
835 .upper_bound(percent_accesses_for_existing_keys
)
837 label_pndatasize_nblocks
[label
]
838 .upper_bound(percent_referenced_data_size
)
842 TraverseBlocks(block_callback
, &labels
);
843 WriteStatsToFile(label_str
, percent_buckets
, kFileNameSuffixPercentRefKeys
,
844 label_pnrefkeys_nblocks
, nblocks
);
845 WriteStatsToFile(label_str
, percent_buckets
,
846 kFileNameSuffixPercentAccessesOnRefKeys
,
847 label_pnrefs_nblocks
, nblocks
);
848 WriteStatsToFile(label_str
, percent_buckets
,
849 kFileNameSuffixPercentDataSizeOnRefKeys
,
850 label_pndatasize_nblocks
, nblocks
);
853 void BlockCacheTraceAnalyzer::WriteAccessTimeline(const std::string
& label_str
,
855 bool user_access_only
) const {
856 std::set
<std::string
> labels
= ParseLabelStr(label_str
);
857 uint64_t start_time
= port::kMaxUint64
;
858 uint64_t end_time
= 0;
859 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_access_timeline
;
860 std::map
<uint64_t, std::vector
<std::string
>> access_count_block_id_map
;
862 auto block_callback
= [&](const std::string
& cf_name
, uint64_t fd
,
863 uint32_t level
, TraceType type
,
864 const std::string
& /*block_key*/, uint64_t block_id
,
865 const BlockAccessInfo
& block
) {
866 uint64_t naccesses
= 0;
867 for (auto const& timeline
: block
.caller_num_accesses_timeline
) {
868 const TableReaderCaller caller
= timeline
.first
;
869 if (user_access_only
&& !is_user_access(caller
)) {
872 const std::string label
=
873 BuildLabel(labels
, cf_name
, fd
, level
, type
, caller
, block_id
, block
);
874 for (auto const& naccess
: timeline
.second
) {
875 const uint64_t timestamp
= naccess
.first
/ time_unit
;
876 const uint64_t num
= naccess
.second
;
877 label_access_timeline
[label
][timestamp
] += num
;
878 start_time
= std::min(start_time
, timestamp
);
879 end_time
= std::max(end_time
, timestamp
);
884 access_count_block_id_map
[naccesses
].push_back(std::to_string(block_id
));
887 TraverseBlocks(block_callback
, &labels
);
889 // We have label_access_timeline now. Write them into a file.
890 const std::string user_access_prefix
=
891 user_access_only
? "user_access_only_" : "all_access_";
892 const std::string output_path
= output_dir_
+ "/" + user_access_prefix
+
893 label_str
+ "_" + std::to_string(time_unit
) +
894 "_" + kFileNameSuffixAccessTimeline
;
895 std::ofstream
out(output_path
);
896 if (!out
.is_open()) {
899 std::string
header("time");
900 if (labels
.find("block") != labels
.end()) {
901 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
903 header
+= std::to_string(now
);
905 out
<< header
<< std::endl
;
906 // Write the most frequently accessed blocks first.
907 for (auto naccess_it
= access_count_block_id_map
.rbegin();
908 naccess_it
!= access_count_block_id_map
.rend(); naccess_it
++) {
909 for (auto& block_id_it
: naccess_it
->second
) {
910 std::string
row(block_id_it
);
911 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
912 auto it
= label_access_timeline
[block_id_it
].find(now
);
914 if (it
!= label_access_timeline
[block_id_it
].end()) {
915 row
+= std::to_string(it
->second
);
920 out
<< row
<< std::endl
;
926 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
928 header
+= std::to_string(now
);
930 out
<< header
<< std::endl
;
931 for (auto const& label
: label_access_timeline
) {
932 std::string
row(label
.first
);
933 for (uint64_t now
= start_time
; now
<= end_time
; now
++) {
934 auto it
= label
.second
.find(now
);
936 if (it
!= label
.second
.end()) {
937 row
+= std::to_string(it
->second
);
942 out
<< row
<< std::endl
;
948 void BlockCacheTraceAnalyzer::WriteReuseDistance(
949 const std::string
& label_str
,
950 const std::vector
<uint64_t>& distance_buckets
) const {
951 std::set
<std::string
> labels
= ParseLabelStr(label_str
);
952 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_distance_num_reuses
;
953 uint64_t total_num_reuses
= 0;
954 auto block_callback
= [&](const std::string
& cf_name
, uint64_t fd
,
955 uint32_t level
, TraceType type
,
956 const std::string
& /*block_key*/, uint64_t block_id
,
957 const BlockAccessInfo
& block
) {
958 const std::string label
= BuildLabel(
959 labels
, cf_name
, fd
, level
, type
,
960 TableReaderCaller::kMaxBlockCacheLookupCaller
, block_id
, block
);
961 if (label_distance_num_reuses
.find(label
) ==
962 label_distance_num_reuses
.end()) {
963 // The first time we encounter this label.
964 for (auto const& distance_bucket
: distance_buckets
) {
965 label_distance_num_reuses
[label
][distance_bucket
] = 0;
968 for (auto const& reuse_distance
: block
.reuse_distance_count
) {
969 label_distance_num_reuses
[label
]
970 .upper_bound(reuse_distance
.first
)
971 ->second
+= reuse_distance
.second
;
972 total_num_reuses
+= reuse_distance
.second
;
975 TraverseBlocks(block_callback
, &labels
);
976 // We have label_naccesses and label_distance_num_reuses now. Write them into
978 const std::string output_path
=
979 output_dir_
+ "/" + label_str
+ "_reuse_distance";
980 std::ofstream
out(output_path
);
981 if (!out
.is_open()) {
984 std::string
header("bucket");
985 for (auto const& label_it
: label_distance_num_reuses
) {
987 header
+= label_it
.first
;
989 out
<< header
<< std::endl
;
990 for (auto const& bucket
: distance_buckets
) {
991 std::string
row(std::to_string(bucket
));
992 for (auto const& label_it
: label_distance_num_reuses
) {
993 auto const& it
= label_it
.second
.find(bucket
);
994 assert(it
!= label_it
.second
.end());
996 row
+= std::to_string(percent(it
->second
, total_num_reuses
));
998 out
<< row
<< std::endl
;
1003 void BlockCacheTraceAnalyzer::UpdateReuseIntervalStats(
1004 const std::string
& label
, const std::vector
<uint64_t>& time_buckets
,
1005 const std::map
<uint64_t, uint64_t> timeline
,
1006 std::map
<std::string
, std::map
<uint64_t, uint64_t>>* label_time_num_reuses
,
1007 uint64_t* total_num_reuses
) const {
1008 assert(label_time_num_reuses
);
1009 assert(total_num_reuses
);
1010 if (label_time_num_reuses
->find(label
) == label_time_num_reuses
->end()) {
1011 // The first time we encounter this label.
1012 for (auto const& time_bucket
: time_buckets
) {
1013 (*label_time_num_reuses
)[label
][time_bucket
] = 0;
1016 auto it
= timeline
.begin();
1017 uint64_t prev_timestamp
= it
->first
;
1018 const uint64_t prev_num
= it
->second
;
1020 // Reused within one second.
1022 (*label_time_num_reuses
)[label
].upper_bound(0)->second
+= prev_num
- 1;
1023 *total_num_reuses
+= prev_num
- 1;
1025 while (it
!= timeline
.end()) {
1026 const uint64_t timestamp
= it
->first
;
1027 const uint64_t num
= it
->second
;
1028 const uint64_t reuse_interval
= timestamp
- prev_timestamp
;
1029 (*label_time_num_reuses
)[label
].upper_bound(reuse_interval
)->second
+= 1;
1031 (*label_time_num_reuses
)[label
].upper_bound(0)->second
+= num
- 1;
1033 prev_timestamp
= timestamp
;
1034 *total_num_reuses
+= num
;
1039 void BlockCacheTraceAnalyzer::WriteStatsToFile(
1040 const std::string
& label_str
, const std::vector
<uint64_t>& time_buckets
,
1041 const std::string
& filename_suffix
,
1042 const std::map
<std::string
, std::map
<uint64_t, uint64_t>>& label_data
,
1043 uint64_t ntotal
) const {
1044 const std::string output_path
=
1045 output_dir_
+ "/" + label_str
+ "_" + filename_suffix
;
1046 std::ofstream
out(output_path
);
1047 if (!out
.is_open()) {
1050 std::string
header("bucket");
1051 for (auto const& label_it
: label_data
) {
1053 header
+= label_it
.first
;
1055 out
<< header
<< std::endl
;
1056 for (auto const& bucket
: time_buckets
) {
1057 std::string
row(std::to_string(bucket
));
1058 for (auto const& label_it
: label_data
) {
1059 auto const& it
= label_it
.second
.find(bucket
);
1060 assert(it
!= label_it
.second
.end());
1062 row
+= std::to_string(percent(it
->second
, ntotal
));
1064 out
<< row
<< std::endl
;
1069 void BlockCacheTraceAnalyzer::WriteReuseInterval(
1070 const std::string
& label_str
,
1071 const std::vector
<uint64_t>& time_buckets
) const {
1072 std::set
<std::string
> labels
= ParseLabelStr(label_str
);
1073 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_time_num_reuses
;
1074 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_avg_reuse_nblocks
;
1075 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_avg_reuse_naccesses
;
1077 uint64_t total_num_reuses
= 0;
1078 uint64_t total_nblocks
= 0;
1079 uint64_t total_accesses
= 0;
1080 auto block_callback
= [&](const std::string
& cf_name
, uint64_t fd
,
1081 uint32_t level
, TraceType type
,
1082 const std::string
& /*block_key*/, uint64_t block_id
,
1083 const BlockAccessInfo
& block
) {
1085 total_accesses
+= block
.num_accesses
;
1086 uint64_t avg_reuse_interval
= 0;
1087 if (block
.num_accesses
> 1) {
1088 avg_reuse_interval
= ((block
.last_access_time
- block
.first_access_time
) /
1092 avg_reuse_interval
= port::kMaxUint64
- 1;
1094 if (labels
.find(kGroupbyCaller
) != labels
.end()) {
1095 for (auto const& timeline
: block
.caller_num_accesses_timeline
) {
1096 const TableReaderCaller caller
= timeline
.first
;
1097 const std::string label
= BuildLabel(labels
, cf_name
, fd
, level
, type
,
1098 caller
, block_id
, block
);
1099 UpdateReuseIntervalStats(label
, time_buckets
, timeline
.second
,
1100 &label_time_num_reuses
, &total_num_reuses
);
1104 // Does not group by caller so we need to flatten the access timeline.
1105 const std::string label
= BuildLabel(
1106 labels
, cf_name
, fd
, level
, type
,
1107 TableReaderCaller::kMaxBlockCacheLookupCaller
, block_id
, block
);
1108 std::map
<uint64_t, uint64_t> timeline
;
1109 for (auto const& caller_timeline
: block
.caller_num_accesses_timeline
) {
1110 for (auto const& time_naccess
: caller_timeline
.second
) {
1111 timeline
[time_naccess
.first
] += time_naccess
.second
;
1114 UpdateReuseIntervalStats(label
, time_buckets
, timeline
,
1115 &label_time_num_reuses
, &total_num_reuses
);
1116 if (label_avg_reuse_nblocks
.find(label
) == label_avg_reuse_nblocks
.end()) {
1117 for (auto const& time_bucket
: time_buckets
) {
1118 label_avg_reuse_nblocks
[label
][time_bucket
] = 0;
1119 label_avg_reuse_naccesses
[label
][time_bucket
] = 0;
1122 label_avg_reuse_nblocks
[label
].upper_bound(avg_reuse_interval
)->second
+= 1;
1123 label_avg_reuse_naccesses
[label
].upper_bound(avg_reuse_interval
)->second
+=
1126 TraverseBlocks(block_callback
, &labels
);
1128 // Write the stats into files.
1129 WriteStatsToFile(label_str
, time_buckets
, kFileNameSuffixReuseInterval
,
1130 label_time_num_reuses
, total_num_reuses
);
1131 WriteStatsToFile(label_str
, time_buckets
, kFileNameSuffixAvgReuseInterval
,
1132 label_avg_reuse_nblocks
, total_nblocks
);
1133 WriteStatsToFile(label_str
, time_buckets
,
1134 kFileNameSuffixAvgReuseIntervalNaccesses
,
1135 label_avg_reuse_naccesses
, total_accesses
);
1138 void BlockCacheTraceAnalyzer::WriteReuseLifetime(
1139 const std::string
& label_str
,
1140 const std::vector
<uint64_t>& time_buckets
) const {
1141 std::set
<std::string
> labels
= ParseLabelStr(label_str
);
1142 std::map
<std::string
, std::map
<uint64_t, uint64_t>> label_lifetime_nblocks
;
1143 uint64_t total_nblocks
= 0;
1144 auto block_callback
= [&](const std::string
& cf_name
, uint64_t fd
,
1145 uint32_t level
, TraceType type
,
1146 const std::string
& /*block_key*/, uint64_t block_id
,
1147 const BlockAccessInfo
& block
) {
1148 uint64_t lifetime
= 0;
1149 if (block
.num_accesses
> 1) {
1151 (block
.last_access_time
- block
.first_access_time
) / kMicrosInSecond
;
1153 lifetime
= port::kMaxUint64
- 1;
1155 const std::string label
= BuildLabel(
1156 labels
, cf_name
, fd
, level
, type
,
1157 TableReaderCaller::kMaxBlockCacheLookupCaller
, block_id
, block
);
1159 if (label_lifetime_nblocks
.find(label
) == label_lifetime_nblocks
.end()) {
1160 // The first time we encounter this label.
1161 for (auto const& time_bucket
: time_buckets
) {
1162 label_lifetime_nblocks
[label
][time_bucket
] = 0;
1165 label_lifetime_nblocks
[label
].upper_bound(lifetime
)->second
+= 1;
1168 TraverseBlocks(block_callback
, &labels
);
1169 WriteStatsToFile(label_str
, time_buckets
, kFileNameSuffixReuseLifetime
,
1170 label_lifetime_nblocks
, total_nblocks
);
1173 void BlockCacheTraceAnalyzer::WriteBlockReuseTimeline(
1174 const uint64_t reuse_window
, bool user_access_only
, TraceType block_type
) const {
1175 // A map from block key to an array of bools that states whether a block is
1176 // accessed in a time window.
1177 std::map
<uint64_t, std::vector
<bool>> block_accessed
;
1178 const uint64_t trace_duration
=
1179 trace_end_timestamp_in_seconds_
- trace_start_timestamp_in_seconds_
;
1180 const uint64_t reuse_vector_size
= (trace_duration
/ reuse_window
);
1181 if (reuse_vector_size
< 2) {
1182 // The reuse window is less than 2. We cannot calculate the reused
1183 // percentage of blocks.
1186 auto block_callback
= [&](const std::string
& /*cf_name*/, uint64_t /*fd*/,
1187 uint32_t /*level*/, TraceType
/*type*/,
1188 const std::string
& /*block_key*/, uint64_t block_id
,
1189 const BlockAccessInfo
& block
) {
1190 if (block_accessed
.find(block_id
) == block_accessed
.end()) {
1191 block_accessed
[block_id
].resize(reuse_vector_size
);
1192 for (uint64_t i
= 0; i
< reuse_vector_size
; i
++) {
1193 block_accessed
[block_id
][i
] = false;
1196 for (auto const& caller_num
: block
.caller_num_accesses_timeline
) {
1197 const TableReaderCaller caller
= caller_num
.first
;
1198 for (auto const& timeline
: caller_num
.second
) {
1199 const uint64_t timestamp
= timeline
.first
;
1200 const uint64_t elapsed_time
=
1201 timestamp
- trace_start_timestamp_in_seconds_
;
1202 if (!user_access_only
|| is_user_access(caller
)) {
1204 std::min(elapsed_time
/ reuse_window
, reuse_vector_size
- 1);
1205 block_accessed
[block_id
][index
] = true;
1210 TraverseBlocks(block_callback
);
1212 // A cell is the number of blocks accessed in a reuse window.
1213 std::unique_ptr
<uint64_t[]> reuse_table(new uint64_t[reuse_vector_size
* reuse_vector_size
]);
1214 for (uint64_t start_time
= 0; start_time
< reuse_vector_size
; start_time
++) {
1215 // Initialize the reuse_table.
1216 for (uint64_t i
= 0; i
< reuse_vector_size
; i
++) {
1217 reuse_table
[start_time
* reuse_vector_size
+ i
] = 0;
1219 // Examine all blocks.
1220 for (auto const& block
: block_accessed
) {
1221 for (uint64_t i
= start_time
; i
< reuse_vector_size
; i
++) {
1222 if (block
.second
[start_time
] && block
.second
[i
]) {
1223 // This block is accessed at start time and at the current time. We
1224 // increment reuse_table[start_time][i] since it is reused at the ith
1226 reuse_table
[start_time
* reuse_vector_size
+ i
]++;
1231 const std::string user_access_prefix
=
1232 user_access_only
? "_user_access_only_" : "_all_access_";
1233 const std::string output_path
=
1234 output_dir_
+ "/" + block_type_to_string(block_type
) +
1235 user_access_prefix
+ std::to_string(reuse_window
) + "_" +
1236 kFileNameSuffixAccessReuseBlocksTimeline
;
1237 std::ofstream
out(output_path
);
1238 if (!out
.is_open()) {
1241 std::string
header("start_time");
1242 for (uint64_t start_time
= 0; start_time
< reuse_vector_size
; start_time
++) {
1244 header
+= std::to_string(start_time
);
1246 out
<< header
<< std::endl
;
1247 for (uint64_t start_time
= 0; start_time
< reuse_vector_size
; start_time
++) {
1248 std::string
row(std::to_string(start_time
* reuse_window
));
1249 for (uint64_t j
= 0; j
< reuse_vector_size
; j
++) {
1251 if (j
< start_time
) {
1254 row
+= std::to_string(percent(reuse_table
[start_time
* reuse_vector_size
+ j
],
1255 reuse_table
[start_time
* reuse_vector_size
+ start_time
]));
1258 out
<< row
<< std::endl
;
1263 std::string
BlockCacheTraceAnalyzer::OutputPercentAccessStats(
1264 uint64_t total_accesses
,
1265 const std::map
<std::string
, uint64_t>& cf_access_count
) const {
1267 for (auto const& cf_aggregates
: cf_aggregates_map_
) {
1268 const std::string
& cf_name
= cf_aggregates
.first
;
1269 const auto& naccess
= cf_access_count
.find(cf_name
);
1271 if (naccess
!= cf_access_count
.end()) {
1272 row
+= std::to_string(percent(naccess
->second
, total_accesses
));
1280 void BlockCacheTraceAnalyzer::WritePercentAccessSummaryStats() const {
1281 std::map
<TableReaderCaller
, std::map
<std::string
, uint64_t>>
1283 uint64_t total_accesses
= 0;
1284 auto block_callback
=
1285 [&](const std::string
& cf_name
, uint64_t /*fd*/, uint32_t /*level*/,
1286 TraceType
/*type*/, const std::string
& /*block_key*/,
1287 uint64_t /*block_id*/, const BlockAccessInfo
& block
) {
1288 for (auto const& caller_num
: block
.caller_num_access_map
) {
1289 const TableReaderCaller caller
= caller_num
.first
;
1290 const uint64_t naccess
= caller_num
.second
;
1291 caller_cf_accesses
[caller
][cf_name
] += naccess
;
1292 total_accesses
+= naccess
;
1295 TraverseBlocks(block_callback
);
1297 const std::string output_path
=
1298 output_dir_
+ "/" + kFileNameSuffixPercentOfAccessSummary
;
1299 std::ofstream
out(output_path
);
1300 if (!out
.is_open()) {
1303 std::string
header("caller");
1304 for (auto const& cf_name
: cf_aggregates_map_
) {
1306 header
+= cf_name
.first
;
1308 out
<< header
<< std::endl
;
1309 for (auto const& cf_naccess_it
: caller_cf_accesses
) {
1310 const TableReaderCaller caller
= cf_naccess_it
.first
;
1312 row
+= caller_to_string(caller
);
1313 row
+= OutputPercentAccessStats(total_accesses
, cf_naccess_it
.second
);
1314 out
<< row
<< std::endl
;
1319 void BlockCacheTraceAnalyzer::WriteDetailedPercentAccessSummaryStats(
1320 TableReaderCaller analyzing_caller
) const {
1321 std::map
<uint32_t, std::map
<std::string
, uint64_t>> level_cf_accesses
;
1322 std::map
<TraceType
, std::map
<std::string
, uint64_t>> bt_cf_accesses
;
1323 uint64_t total_accesses
= 0;
1324 auto block_callback
=
1325 [&](const std::string
& cf_name
, uint64_t /*fd*/, uint32_t level
,
1326 TraceType type
, const std::string
& /*block_key*/,
1327 uint64_t /*block_id*/, const BlockAccessInfo
& block
) {
1328 for (auto const& caller_num
: block
.caller_num_access_map
) {
1329 const TableReaderCaller caller
= caller_num
.first
;
1330 if (caller
== analyzing_caller
) {
1331 const uint64_t naccess
= caller_num
.second
;
1332 level_cf_accesses
[level
][cf_name
] += naccess
;
1333 bt_cf_accesses
[type
][cf_name
] += naccess
;
1334 total_accesses
+= naccess
;
1338 TraverseBlocks(block_callback
);
1340 const std::string output_path
=
1341 output_dir_
+ "/" + caller_to_string(analyzing_caller
) + "_level_" +
1342 kFileNameSuffixPercentOfAccessSummary
;
1343 std::ofstream
out(output_path
);
1344 if (!out
.is_open()) {
1347 std::string
header("level");
1348 for (auto const& cf_name
: cf_aggregates_map_
) {
1350 header
+= cf_name
.first
;
1352 out
<< header
<< std::endl
;
1353 for (auto const& level_naccess_it
: level_cf_accesses
) {
1354 const uint32_t level
= level_naccess_it
.first
;
1356 row
+= std::to_string(level
);
1357 row
+= OutputPercentAccessStats(total_accesses
, level_naccess_it
.second
);
1358 out
<< row
<< std::endl
;
1363 const std::string output_path
=
1364 output_dir_
+ "/" + caller_to_string(analyzing_caller
) + "_bt_" +
1365 kFileNameSuffixPercentOfAccessSummary
;
1366 std::ofstream
out(output_path
);
1367 if (!out
.is_open()) {
1370 std::string
header("bt");
1371 for (auto const& cf_name
: cf_aggregates_map_
) {
1373 header
+= cf_name
.first
;
1375 out
<< header
<< std::endl
;
1376 for (auto const& bt_naccess_it
: bt_cf_accesses
) {
1377 const TraceType bt
= bt_naccess_it
.first
;
1379 row
+= block_type_to_string(bt
);
1380 row
+= OutputPercentAccessStats(total_accesses
, bt_naccess_it
.second
);
1381 out
<< row
<< std::endl
;
1387 void BlockCacheTraceAnalyzer::WriteAccessCountSummaryStats(
1388 const std::vector
<uint64_t>& access_count_buckets
,
1389 bool user_access_only
) const {
1391 // y: # of accesses.
1392 std::map
<std::string
, std::map
<uint64_t, uint64_t>> bt_access_nblocks
;
1393 std::map
<std::string
, std::map
<uint64_t, uint64_t>> cf_access_nblocks
;
1394 uint64_t total_nblocks
= 0;
1395 auto block_callback
=
1396 [&](const std::string
& cf_name
, uint64_t /*fd*/, uint32_t /*level*/,
1397 TraceType type
, const std::string
& /*block_key*/,
1398 uint64_t /*block_id*/, const BlockAccessInfo
& block
) {
1399 const std::string type_str
= block_type_to_string(type
);
1400 if (cf_access_nblocks
.find(cf_name
) == cf_access_nblocks
.end()) {
1402 for (auto& access
: access_count_buckets
) {
1403 cf_access_nblocks
[cf_name
][access
] = 0;
1406 if (bt_access_nblocks
.find(type_str
) == bt_access_nblocks
.end()) {
1408 for (auto& access
: access_count_buckets
) {
1409 bt_access_nblocks
[type_str
][access
] = 0;
1412 uint64_t naccesses
= 0;
1413 for (auto const& caller_access
: block
.caller_num_access_map
) {
1414 if (!user_access_only
|| is_user_access(caller_access
.first
)) {
1415 naccesses
+= caller_access
.second
;
1418 if (naccesses
== 0) {
1422 bt_access_nblocks
[type_str
].upper_bound(naccesses
)->second
+= 1;
1423 cf_access_nblocks
[cf_name
].upper_bound(naccesses
)->second
+= 1;
1425 TraverseBlocks(block_callback
);
1426 const std::string user_access_prefix
=
1427 user_access_only
? "user_access_only_" : "all_access_";
1428 WriteStatsToFile("cf", access_count_buckets
,
1429 user_access_prefix
+ kFileNameSuffixAccessCountSummary
,
1430 cf_access_nblocks
, total_nblocks
);
1431 WriteStatsToFile("bt", access_count_buckets
,
1432 user_access_prefix
+ kFileNameSuffixAccessCountSummary
,
1433 bt_access_nblocks
, total_nblocks
);
1436 BlockCacheTraceAnalyzer::BlockCacheTraceAnalyzer(
1437 const std::string
& trace_file_path
, const std::string
& output_dir
,
1438 const std::string
& human_readable_trace_file_path
,
1439 bool compute_reuse_distance
, bool mrc_only
,
1440 bool is_human_readable_trace_file
,
1441 std::unique_ptr
<BlockCacheTraceSimulator
>&& cache_simulator
)
1442 : env_(ROCKSDB_NAMESPACE::Env::Default()),
1443 trace_file_path_(trace_file_path
),
1444 output_dir_(output_dir
),
1445 human_readable_trace_file_path_(human_readable_trace_file_path
),
1446 compute_reuse_distance_(compute_reuse_distance
),
1447 mrc_only_(mrc_only
),
1448 is_human_readable_trace_file_(is_human_readable_trace_file
),
1449 cache_simulator_(std::move(cache_simulator
)) {}
1451 void BlockCacheTraceAnalyzer::ComputeReuseDistance(
1452 BlockAccessInfo
* info
) const {
1454 if (info
->num_accesses
== 0) {
1457 uint64_t reuse_distance
= 0;
1458 for (auto const& block_key
: info
->unique_blocks_since_last_access
) {
1459 auto const& it
= block_info_map_
.find(block_key
);
1460 // This block must exist.
1461 assert(it
!= block_info_map_
.end());
1462 reuse_distance
+= it
->second
->block_size
;
1464 info
->reuse_distance_count
[reuse_distance
] += 1;
1465 // We clear this hash set since this is the second access on this block.
1466 info
->unique_blocks_since_last_access
.clear();
1469 Status
BlockCacheTraceAnalyzer::RecordAccess(
1470 const BlockCacheTraceRecord
& access
) {
1471 ColumnFamilyAccessInfoAggregate
& cf_aggr
= cf_aggregates_map_
[access
.cf_name
];
1472 SSTFileAccessInfoAggregate
& file_aggr
=
1473 cf_aggr
.fd_aggregates_map
[access
.sst_fd_number
];
1474 file_aggr
.level
= access
.level
;
1475 BlockTypeAccessInfoAggregate
& block_type_aggr
=
1476 file_aggr
.block_type_aggregates_map
[access
.block_type
];
1477 if (block_type_aggr
.block_access_info_map
.find(access
.block_key
) ==
1478 block_type_aggr
.block_access_info_map
.end()) {
1479 block_type_aggr
.block_access_info_map
[access
.block_key
].block_id
=
1483 BlockAccessInfo
& block_access_info
=
1484 block_type_aggr
.block_access_info_map
[access
.block_key
];
1485 if (compute_reuse_distance_
) {
1486 ComputeReuseDistance(&block_access_info
);
1488 block_access_info
.AddAccess(access
, access_sequence_number_
);
1489 block_info_map_
[access
.block_key
] = &block_access_info
;
1490 uint64_t get_key_id
= 0;
1491 if (access
.caller
== TableReaderCaller::kUserGet
&&
1492 access
.get_id
!= BlockCacheTraceHelper::kReservedGetId
) {
1493 std::string user_key
= ExtractUserKey(access
.referenced_key
).ToString();
1494 if (get_key_info_map_
.find(user_key
) == get_key_info_map_
.end()) {
1495 get_key_info_map_
[user_key
].key_id
= unique_get_key_id_
;
1496 unique_get_key_id_
++;
1498 get_key_id
= get_key_info_map_
[user_key
].key_id
;
1499 get_key_info_map_
[user_key
].AddAccess(access
, access_sequence_number_
);
1502 if (compute_reuse_distance_
) {
1503 // Add this block to all existing blocks.
1504 for (auto& cf_aggregates
: cf_aggregates_map_
) {
1505 for (auto& file_aggregates
: cf_aggregates
.second
.fd_aggregates_map
) {
1506 for (auto& block_type_aggregates
:
1507 file_aggregates
.second
.block_type_aggregates_map
) {
1508 for (auto& existing_block
:
1509 block_type_aggregates
.second
.block_access_info_map
) {
1510 existing_block
.second
.unique_blocks_since_last_access
.insert(
1517 return human_readable_trace_writer_
.WriteHumanReadableTraceRecord(
1518 access
, block_access_info
.block_id
, get_key_id
);
1521 Status
BlockCacheTraceAnalyzer::Analyze() {
1522 std::unique_ptr
<BlockCacheTraceReader
> reader
;
1523 Status s
= Status::OK();
1524 if (is_human_readable_trace_file_
) {
1525 reader
.reset(new BlockCacheHumanReadableTraceReader(trace_file_path_
));
1527 std::unique_ptr
<TraceReader
> trace_reader
;
1528 s
= NewFileTraceReader(env_
, EnvOptions(), trace_file_path_
, &trace_reader
);
1532 reader
.reset(new BlockCacheTraceReader(std::move(trace_reader
)));
1533 s
= reader
->ReadHeader(&header_
);
1538 if (!human_readable_trace_file_path_
.empty()) {
1539 s
= human_readable_trace_writer_
.NewWritableFile(
1540 human_readable_trace_file_path_
, env_
);
1545 uint64_t start
= env_
->NowMicros();
1546 uint64_t time_interval
= 0;
1548 BlockCacheTraceRecord access
;
1549 s
= reader
->ReadAccess(&access
);
1554 s
= RecordAccess(access
);
1559 if (trace_start_timestamp_in_seconds_
== 0) {
1560 trace_start_timestamp_in_seconds_
=
1561 access
.access_timestamp
/ kMicrosInSecond
;
1563 trace_end_timestamp_in_seconds_
= access
.access_timestamp
/ kMicrosInSecond
;
1564 miss_ratio_stats_
.UpdateMetrics(access
.access_timestamp
,
1565 is_user_access(access
.caller
),
1566 access
.is_cache_hit
== Boolean::kFalse
);
1567 if (cache_simulator_
) {
1568 cache_simulator_
->Access(access
);
1570 access_sequence_number_
++;
1571 uint64_t now
= env_
->NowMicros();
1572 uint64_t duration
= (now
- start
) / kMicrosInSecond
;
1573 if (duration
> 10 * time_interval
) {
1574 uint64_t trace_duration
=
1575 trace_end_timestamp_in_seconds_
- trace_start_timestamp_in_seconds_
;
1577 "Running for %" PRIu64
" seconds: Processed %" PRIu64
1578 " records/second. Trace duration %" PRIu64
1579 " seconds. Observed miss ratio %.2f\n",
1580 duration
, duration
> 0 ? access_sequence_number_
/ duration
: 0,
1581 trace_duration
, miss_ratio_stats_
.miss_ratio());
1585 uint64_t now
= env_
->NowMicros();
1586 uint64_t duration
= (now
- start
) / kMicrosInSecond
;
1587 uint64_t trace_duration
=
1588 trace_end_timestamp_in_seconds_
- trace_start_timestamp_in_seconds_
;
1590 "Running for %" PRIu64
" seconds: Processed %" PRIu64
1591 " records/second. Trace duration %" PRIu64
1592 " seconds. Observed miss ratio %.2f\n",
1593 duration
, duration
> 0 ? access_sequence_number_
/ duration
: 0,
1594 trace_duration
, miss_ratio_stats_
.miss_ratio());
1598 void BlockCacheTraceAnalyzer::PrintBlockSizeStats() const {
1599 HistogramStat bs_stats
;
1600 std::map
<TraceType
, HistogramStat
> bt_stats_map
;
1601 std::map
<std::string
, std::map
<TraceType
, HistogramStat
>> cf_bt_stats_map
;
1602 auto block_callback
=
1603 [&](const std::string
& cf_name
, uint64_t /*fd*/, uint32_t /*level*/,
1604 TraceType type
, const std::string
& /*block_key*/,
1605 uint64_t /*block_id*/, const BlockAccessInfo
& block
) {
1606 if (block
.block_size
== 0) {
1607 // Block size may be 0 when 1) compaction observes a cache miss and
1608 // does not insert the missing block into the cache again. 2)
1609 // fetching filter blocks in SST files at the last level.
1612 bs_stats
.Add(block
.block_size
);
1613 bt_stats_map
[type
].Add(block
.block_size
);
1614 cf_bt_stats_map
[cf_name
][type
].Add(block
.block_size
);
1616 TraverseBlocks(block_callback
);
1617 fprintf(stdout
, "Block size stats: \n%s", bs_stats
.ToString().c_str());
1618 for (auto const& bt_stats
: bt_stats_map
) {
1619 print_break_lines(/*num_break_lines=*/1);
1620 fprintf(stdout
, "Block size stats for block type %s: \n%s",
1621 block_type_to_string(bt_stats
.first
).c_str(),
1622 bt_stats
.second
.ToString().c_str());
1624 for (auto const& cf_bt_stats
: cf_bt_stats_map
) {
1625 const std::string
& cf_name
= cf_bt_stats
.first
;
1626 for (auto const& bt_stats
: cf_bt_stats
.second
) {
1627 print_break_lines(/*num_break_lines=*/1);
1629 "Block size stats for column family %s and block type %s: \n%s",
1630 cf_name
.c_str(), block_type_to_string(bt_stats
.first
).c_str(),
1631 bt_stats
.second
.ToString().c_str());
1636 void BlockCacheTraceAnalyzer::PrintAccessCountStats(bool user_access_only
,
1638 uint32_t top_k
) const {
1639 HistogramStat access_stats
;
1640 std::map
<TraceType
, HistogramStat
> bt_stats_map
;
1641 std::map
<std::string
, std::map
<TraceType
, HistogramStat
>> cf_bt_stats_map
;
1642 std::map
<uint64_t, std::vector
<std::string
>> access_count_blocks
;
1643 auto block_callback
= [&](const std::string
& cf_name
, uint64_t /*fd*/,
1644 uint32_t /*level*/, TraceType type
,
1645 const std::string
& block_key
, uint64_t /*block_id*/,
1646 const BlockAccessInfo
& block
) {
1647 uint64_t naccesses
= 0;
1648 for (auto const& caller_access
: block
.caller_num_access_map
) {
1649 if (!user_access_only
|| is_user_access(caller_access
.first
)) {
1650 naccesses
+= caller_access
.second
;
1653 if (naccesses
== 0) {
1656 if (type
== TraceType::kBlockTraceDataBlock
) {
1657 access_count_blocks
[naccesses
].push_back(block_key
);
1659 access_stats
.Add(naccesses
);
1660 bt_stats_map
[type
].Add(naccesses
);
1661 cf_bt_stats_map
[cf_name
][type
].Add(naccesses
);
1663 TraverseBlocks(block_callback
);
1665 "Block access count stats: The number of accesses per block. %s\n%s",
1666 user_access_only
? "User accesses only" : "All accesses",
1667 access_stats
.ToString().c_str());
1668 uint32_t bottom_k_index
= 0;
1669 for (auto naccess_it
= access_count_blocks
.begin();
1670 naccess_it
!= access_count_blocks
.end(); naccess_it
++) {
1672 if (bottom_k_index
>= bottom_k
) {
1675 std::map
<TableReaderCaller
, uint64_t> caller_naccesses
;
1676 uint64_t naccesses
= 0;
1677 for (auto const& block_id
: naccess_it
->second
) {
1678 BlockAccessInfo
* block
= block_info_map_
.find(block_id
)->second
;
1679 for (auto const& caller_access
: block
->caller_num_access_map
) {
1680 if (!user_access_only
|| is_user_access(caller_access
.first
)) {
1681 caller_naccesses
[caller_access
.first
] += caller_access
.second
;
1682 naccesses
+= caller_access
.second
;
1686 std::string
statistics("Caller:");
1687 for (auto const& caller_naccessess_it
: caller_naccesses
) {
1688 statistics
+= caller_to_string(caller_naccessess_it
.first
);
1691 std::to_string(percent(caller_naccessess_it
.second
, naccesses
));
1695 "Bottom %" PRIu32
" access count. Access count=%" PRIu64
1696 " nblocks=%" ROCKSDB_PRIszt
" %s\n",
1697 bottom_k
, naccess_it
->first
, naccess_it
->second
.size(),
1698 statistics
.c_str());
1701 uint32_t top_k_index
= 0;
1702 for (auto naccess_it
= access_count_blocks
.rbegin();
1703 naccess_it
!= access_count_blocks
.rend(); naccess_it
++) {
1705 if (top_k_index
>= top_k
) {
1708 for (auto const& block_id
: naccess_it
->second
) {
1709 BlockAccessInfo
* block
= block_info_map_
.find(block_id
)->second
;
1710 std::string
statistics("Caller:");
1711 uint64_t naccesses
= 0;
1712 for (auto const& caller_access
: block
->caller_num_access_map
) {
1713 if (!user_access_only
|| is_user_access(caller_access
.first
)) {
1714 naccesses
+= caller_access
.second
;
1717 assert(naccesses
> 0);
1718 for (auto const& caller_access
: block
->caller_num_access_map
) {
1719 if (!user_access_only
|| is_user_access(caller_access
.first
)) {
1721 statistics
+= caller_to_string(caller_access
.first
);
1724 std::to_string(percent(caller_access
.second
, naccesses
));
1727 uint64_t ref_keys_accesses
= 0;
1728 uint64_t ref_keys_does_not_exist_accesses
= 0;
1729 for (auto const& ref_key_caller_access
: block
->key_num_access_map
) {
1730 for (auto const& caller_access
: ref_key_caller_access
.second
) {
1731 if (!user_access_only
|| is_user_access(caller_access
.first
)) {
1732 ref_keys_accesses
+= caller_access
.second
;
1736 for (auto const& ref_key_caller_access
:
1737 block
->non_exist_key_num_access_map
) {
1738 for (auto const& caller_access
: ref_key_caller_access
.second
) {
1739 if (!user_access_only
|| is_user_access(caller_access
.first
)) {
1740 ref_keys_does_not_exist_accesses
+= caller_access
.second
;
1744 statistics
+= ",nkeys=";
1745 statistics
+= std::to_string(block
->num_keys
);
1746 statistics
+= ",block_size=";
1747 statistics
+= std::to_string(block
->block_size
);
1748 statistics
+= ",num_ref_keys=";
1749 statistics
+= std::to_string(block
->key_num_access_map
.size());
1750 statistics
+= ",percent_access_ref_keys=";
1751 statistics
+= std::to_string(percent(ref_keys_accesses
, naccesses
));
1752 statistics
+= ",num_ref_keys_does_not_exist=";
1753 statistics
+= std::to_string(block
->non_exist_key_num_access_map
.size());
1754 statistics
+= ",percent_access_ref_keys_does_not_exist=";
1756 std::to_string(percent(ref_keys_does_not_exist_accesses
, naccesses
));
1757 statistics
+= ",ref_data_size=";
1758 statistics
+= std::to_string(block
->referenced_data_size
);
1760 "Top %" PRIu32
" access count blocks access_count=%" PRIu64
1762 top_k
, naccess_it
->first
, statistics
.c_str());
1766 for (auto const& bt_stats
: bt_stats_map
) {
1767 print_break_lines(/*num_break_lines=*/1);
1768 fprintf(stdout
, "Break down by block type %s: \n%s",
1769 block_type_to_string(bt_stats
.first
).c_str(),
1770 bt_stats
.second
.ToString().c_str());
1772 for (auto const& cf_bt_stats
: cf_bt_stats_map
) {
1773 const std::string
& cf_name
= cf_bt_stats
.first
;
1774 for (auto const& bt_stats
: cf_bt_stats
.second
) {
1775 print_break_lines(/*num_break_lines=*/1);
1777 "Break down by column family %s and block type "
1779 cf_name
.c_str(), block_type_to_string(bt_stats
.first
).c_str(),
1780 bt_stats
.second
.ToString().c_str());
1785 void BlockCacheTraceAnalyzer::PrintDataBlockAccessStats() const {
1786 HistogramStat existing_keys_stats
;
1787 std::map
<std::string
, HistogramStat
> cf_existing_keys_stats_map
;
1788 HistogramStat non_existing_keys_stats
;
1789 std::map
<std::string
, HistogramStat
> cf_non_existing_keys_stats_map
;
1790 HistogramStat block_access_stats
;
1791 std::map
<std::string
, HistogramStat
> cf_block_access_info
;
1792 HistogramStat percent_referenced_bytes
;
1793 std::map
<std::string
, HistogramStat
> cf_percent_referenced_bytes
;
1794 // Total number of accesses in a data block / number of keys in a data block.
1795 HistogramStat avg_naccesses_per_key_in_a_data_block
;
1796 std::map
<std::string
, HistogramStat
> cf_avg_naccesses_per_key_in_a_data_block
;
1797 // The standard deviation on the number of accesses of a key in a data block.
1798 HistogramStat stdev_naccesses_per_key_in_a_data_block
;
1799 std::map
<std::string
, HistogramStat
>
1800 cf_stdev_naccesses_per_key_in_a_data_block
;
1801 auto block_callback
=
1802 [&](const std::string
& cf_name
, uint64_t /*fd*/, uint32_t /*level*/,
1803 TraceType
/*type*/, const std::string
& /*block_key*/,
1804 uint64_t /*block_id*/, const BlockAccessInfo
& block
) {
1805 if (block
.num_keys
== 0) {
1808 // Use four decimal points.
1809 uint64_t percent_referenced_for_existing_keys
= (uint64_t)(
1810 ((double)block
.key_num_access_map
.size() / (double)block
.num_keys
) *
1812 uint64_t percent_referenced_for_non_existing_keys
=
1813 (uint64_t)(((double)block
.non_exist_key_num_access_map
.size() /
1814 (double)block
.num_keys
) *
1816 uint64_t percent_accesses_for_existing_keys
=
1817 (uint64_t)(((double)block
.num_referenced_key_exist_in_block
/
1818 (double)block
.num_accesses
) *
1821 HistogramStat hist_naccess_per_key
;
1822 for (auto const& key_access
: block
.key_num_access_map
) {
1823 for (auto const& caller_access
: key_access
.second
) {
1824 hist_naccess_per_key
.Add(caller_access
.second
);
1827 uint64_t avg_accesses
=
1828 static_cast<uint64_t>(hist_naccess_per_key
.Average());
1829 uint64_t stdev_accesses
=
1830 static_cast<uint64_t>(hist_naccess_per_key
.StandardDeviation());
1831 avg_naccesses_per_key_in_a_data_block
.Add(avg_accesses
);
1832 cf_avg_naccesses_per_key_in_a_data_block
[cf_name
].Add(avg_accesses
);
1833 stdev_naccesses_per_key_in_a_data_block
.Add(stdev_accesses
);
1834 cf_stdev_naccesses_per_key_in_a_data_block
[cf_name
].Add(stdev_accesses
);
1836 existing_keys_stats
.Add(percent_referenced_for_existing_keys
);
1837 cf_existing_keys_stats_map
[cf_name
].Add(
1838 percent_referenced_for_existing_keys
);
1839 non_existing_keys_stats
.Add(percent_referenced_for_non_existing_keys
);
1840 cf_non_existing_keys_stats_map
[cf_name
].Add(
1841 percent_referenced_for_non_existing_keys
);
1842 block_access_stats
.Add(percent_accesses_for_existing_keys
);
1843 cf_block_access_info
[cf_name
].Add(percent_accesses_for_existing_keys
);
1845 TraverseBlocks(block_callback
);
1847 "Histogram on the number of referenced keys existing in a block over "
1848 "the total number of keys in a block: \n%s",
1849 existing_keys_stats
.ToString().c_str());
1850 for (auto const& cf_stats
: cf_existing_keys_stats_map
) {
1851 print_break_lines(/*num_break_lines=*/1);
1852 fprintf(stdout
, "Break down by column family %s: \n%s",
1853 cf_stats
.first
.c_str(), cf_stats
.second
.ToString().c_str());
1855 print_break_lines(/*num_break_lines=*/1);
1858 "Histogram on the number of referenced keys DO NOT exist in a block over "
1859 "the total number of keys in a block: \n%s",
1860 non_existing_keys_stats
.ToString().c_str());
1861 for (auto const& cf_stats
: cf_non_existing_keys_stats_map
) {
1862 print_break_lines(/*num_break_lines=*/1);
1863 fprintf(stdout
, "Break down by column family %s: \n%s",
1864 cf_stats
.first
.c_str(), cf_stats
.second
.ToString().c_str());
1866 print_break_lines(/*num_break_lines=*/1);
1868 "Histogram on the number of accesses on keys exist in a block over "
1869 "the total number of accesses in a block: \n%s",
1870 block_access_stats
.ToString().c_str());
1871 for (auto const& cf_stats
: cf_block_access_info
) {
1872 print_break_lines(/*num_break_lines=*/1);
1873 fprintf(stdout
, "Break down by column family %s: \n%s",
1874 cf_stats
.first
.c_str(), cf_stats
.second
.ToString().c_str());
1876 print_break_lines(/*num_break_lines=*/1);
1879 "Histogram on the average number of accesses per key in a block: \n%s",
1880 avg_naccesses_per_key_in_a_data_block
.ToString().c_str());
1881 for (auto const& cf_stats
: cf_avg_naccesses_per_key_in_a_data_block
) {
1882 fprintf(stdout
, "Break down by column family %s: \n%s",
1883 cf_stats
.first
.c_str(), cf_stats
.second
.ToString().c_str());
1885 print_break_lines(/*num_break_lines=*/1);
1887 "Histogram on the standard deviation of the number of accesses per "
1888 "key in a block: \n%s",
1889 stdev_naccesses_per_key_in_a_data_block
.ToString().c_str());
1890 for (auto const& cf_stats
: cf_stdev_naccesses_per_key_in_a_data_block
) {
1891 fprintf(stdout
, "Break down by column family %s: \n%s",
1892 cf_stats
.first
.c_str(), cf_stats
.second
.ToString().c_str());
1896 void BlockCacheTraceAnalyzer::PrintStatsSummary() const {
1897 uint64_t total_num_files
= 0;
1898 uint64_t total_num_blocks
= 0;
1899 uint64_t total_num_accesses
= 0;
1900 std::map
<TraceType
, uint64_t> bt_num_blocks_map
;
1901 std::map
<TableReaderCaller
, uint64_t> caller_num_access_map
;
1902 std::map
<TableReaderCaller
, std::map
<TraceType
, uint64_t>>
1903 caller_bt_num_access_map
;
1904 std::map
<TableReaderCaller
, std::map
<uint32_t, uint64_t>>
1905 caller_level_num_access_map
;
1906 for (auto const& cf_aggregates
: cf_aggregates_map_
) {
1907 // Stats per column family.
1908 const std::string
& cf_name
= cf_aggregates
.first
;
1909 uint64_t cf_num_files
= 0;
1910 uint64_t cf_num_blocks
= 0;
1911 std::map
<TraceType
, uint64_t> cf_bt_blocks
;
1912 uint64_t cf_num_accesses
= 0;
1913 std::map
<TableReaderCaller
, uint64_t> cf_caller_num_accesses_map
;
1914 std::map
<TableReaderCaller
, std::map
<uint64_t, uint64_t>>
1915 cf_caller_level_num_accesses_map
;
1916 std::map
<TableReaderCaller
, std::map
<uint64_t, uint64_t>>
1917 cf_caller_file_num_accesses_map
;
1918 std::map
<TableReaderCaller
, std::map
<TraceType
, uint64_t>>
1919 cf_caller_bt_num_accesses_map
;
1920 total_num_files
+= cf_aggregates
.second
.fd_aggregates_map
.size();
1921 for (auto const& file_aggregates
: cf_aggregates
.second
.fd_aggregates_map
) {
1922 // Stats per SST file.
1923 const uint64_t fd
= file_aggregates
.first
;
1924 const uint32_t level
= file_aggregates
.second
.level
;
1926 for (auto const& block_type_aggregates
:
1927 file_aggregates
.second
.block_type_aggregates_map
) {
1928 // Stats per block type.
1929 const TraceType type
= block_type_aggregates
.first
;
1930 cf_bt_blocks
[type
] +=
1931 block_type_aggregates
.second
.block_access_info_map
.size();
1933 block_type_aggregates
.second
.block_access_info_map
.size();
1934 bt_num_blocks_map
[type
] +=
1935 block_type_aggregates
.second
.block_access_info_map
.size();
1936 for (auto const& block_access_info
:
1937 block_type_aggregates
.second
.block_access_info_map
) {
1940 for (auto const& stats
:
1941 block_access_info
.second
.caller_num_access_map
) {
1942 // Stats per caller.
1943 const TableReaderCaller caller
= stats
.first
;
1944 const uint64_t num_accesses
= stats
.second
;
1946 total_num_accesses
+= num_accesses
;
1947 caller_num_access_map
[caller
] += num_accesses
;
1948 caller_bt_num_access_map
[caller
][type
] += num_accesses
;
1949 caller_level_num_access_map
[caller
][level
] += num_accesses
;
1950 // Column Family stats.
1951 cf_num_accesses
+= num_accesses
;
1952 cf_caller_num_accesses_map
[caller
] += num_accesses
;
1953 cf_caller_level_num_accesses_map
[caller
][level
] += num_accesses
;
1954 cf_caller_file_num_accesses_map
[caller
][fd
] += num_accesses
;
1955 cf_caller_bt_num_accesses_map
[caller
][type
] += num_accesses
;
1962 print_break_lines(/*num_break_lines=*/3);
1963 fprintf(stdout
, "Statistics for column family %s:\n", cf_name
.c_str());
1965 " Number of files:%" PRIu64
" Number of blocks: %" PRIu64
1966 " Number of accesses: %" PRIu64
"\n",
1967 cf_num_files
, cf_num_blocks
, cf_num_accesses
);
1968 for (auto block_type
: cf_bt_blocks
) {
1969 fprintf(stdout
, "Number of %s blocks: %" PRIu64
" Percent: %.2f\n",
1970 block_type_to_string(block_type
.first
).c_str(), block_type
.second
,
1971 percent(block_type
.second
, cf_num_blocks
));
1973 for (auto caller
: cf_caller_num_accesses_map
) {
1974 const uint64_t naccesses
= caller
.second
;
1975 print_break_lines(/*num_break_lines=*/1);
1977 "Caller %s: Number of accesses %" PRIu64
" Percent: %.2f\n",
1978 caller_to_string(caller
.first
).c_str(), naccesses
,
1979 percent(naccesses
, cf_num_accesses
));
1980 fprintf(stdout
, "Caller %s: Number of accesses per level break down\n",
1981 caller_to_string(caller
.first
).c_str());
1982 for (auto naccess_level
:
1983 cf_caller_level_num_accesses_map
[caller
.first
]) {
1985 "\t Level %" PRIu64
": Number of accesses: %" PRIu64
1987 naccess_level
.first
, naccess_level
.second
,
1988 percent(naccess_level
.second
, naccesses
));
1990 fprintf(stdout
, "Caller %s: Number of accesses per file break down\n",
1991 caller_to_string(caller
.first
).c_str());
1992 for (auto naccess_file
: cf_caller_file_num_accesses_map
[caller
.first
]) {
1994 "\t File %" PRIu64
": Number of accesses: %" PRIu64
1996 naccess_file
.first
, naccess_file
.second
,
1997 percent(naccess_file
.second
, naccesses
));
2000 "Caller %s: Number of accesses per block type break down\n",
2001 caller_to_string(caller
.first
).c_str());
2002 for (auto naccess_type
: cf_caller_bt_num_accesses_map
[caller
.first
]) {
2004 "\t Block Type %s: Number of accesses: %" PRIu64
2006 block_type_to_string(naccess_type
.first
).c_str(),
2007 naccess_type
.second
, percent(naccess_type
.second
, naccesses
));
2011 print_break_lines(/*num_break_lines=*/3);
2012 fprintf(stdout
, "Overall statistics:\n");
2014 "Number of files: %" PRIu64
" Number of blocks: %" PRIu64
2015 " Number of accesses: %" PRIu64
"\n",
2016 total_num_files
, total_num_blocks
, total_num_accesses
);
2017 for (auto block_type
: bt_num_blocks_map
) {
2018 fprintf(stdout
, "Number of %s blocks: %" PRIu64
" Percent: %.2f\n",
2019 block_type_to_string(block_type
.first
).c_str(), block_type
.second
,
2020 percent(block_type
.second
, total_num_blocks
));
2022 for (auto caller
: caller_num_access_map
) {
2023 print_break_lines(/*num_break_lines=*/1);
2024 uint64_t naccesses
= caller
.second
;
2025 fprintf(stdout
, "Caller %s: Number of accesses %" PRIu64
" Percent: %.2f\n",
2026 caller_to_string(caller
.first
).c_str(), naccesses
,
2027 percent(naccesses
, total_num_accesses
));
2028 fprintf(stdout
, "Caller %s: Number of accesses per level break down\n",
2029 caller_to_string(caller
.first
).c_str());
2030 for (auto naccess_level
: caller_level_num_access_map
[caller
.first
]) {
2032 "\t Level %d: Number of accesses: %" PRIu64
" Percent: %.2f\n",
2033 naccess_level
.first
, naccess_level
.second
,
2034 percent(naccess_level
.second
, naccesses
));
2036 fprintf(stdout
, "Caller %s: Number of accesses per block type break down\n",
2037 caller_to_string(caller
.first
).c_str());
2038 for (auto naccess_type
: caller_bt_num_access_map
[caller
.first
]) {
2040 "\t Block Type %s: Number of accesses: %" PRIu64
2042 block_type_to_string(naccess_type
.first
).c_str(),
2043 naccess_type
.second
, percent(naccess_type
.second
, naccesses
));
2048 std::vector
<CacheConfiguration
> parse_cache_config_file(
2049 const std::string
& config_path
) {
2050 std::ifstream
file(config_path
);
2051 if (!file
.is_open()) {
2054 std::vector
<CacheConfiguration
> configs
;
2056 while (getline(file
, line
)) {
2057 CacheConfiguration cache_config
;
2058 std::stringstream
ss(line
);
2059 std::vector
<std::string
> config_strs
;
2062 getline(ss
, substr
, ',');
2063 config_strs
.push_back(substr
);
2066 if (config_strs
.size() < 4) {
2067 fprintf(stderr
, "Invalid cache simulator configuration %s\n",
2071 if (kSupportedCacheNames
.find(" " + config_strs
[0] + " ") ==
2072 std::string::npos
) {
2073 fprintf(stderr
, "Invalid cache name %s. Supported cache names are %s\n",
2074 line
.c_str(), kSupportedCacheNames
.c_str());
2077 cache_config
.cache_name
= config_strs
[0];
2078 cache_config
.num_shard_bits
= ParseUint32(config_strs
[1]);
2079 cache_config
.ghost_cache_capacity
= ParseUint64(config_strs
[2]);
2080 for (uint32_t i
= 3; i
< config_strs
.size(); i
++) {
2081 uint64_t capacity
= ParseUint64(config_strs
[i
]);
2082 if (capacity
== 0) {
2083 fprintf(stderr
, "Invalid cache capacity %s, %s\n",
2084 config_strs
[i
].c_str(), line
.c_str());
2087 cache_config
.cache_capacities
.push_back(capacity
);
2089 configs
.push_back(cache_config
);
2095 std::vector
<uint64_t> parse_buckets(const std::string
& bucket_str
) {
2096 std::vector
<uint64_t> buckets
;
2097 std::stringstream
ss(bucket_str
);
2100 getline(ss
, bucket
, ',');
2101 buckets
.push_back(ParseUint64(bucket
));
2103 buckets
.push_back(port::kMaxUint64
);
2107 int block_cache_trace_analyzer_tool(int argc
, char** argv
) {
2108 ParseCommandLineFlags(&argc
, &argv
, true);
2109 if (FLAGS_block_cache_trace_path
.empty()) {
2110 fprintf(stderr
, "block cache trace path is empty\n");
2113 uint64_t warmup_seconds
=
2114 FLAGS_cache_sim_warmup_seconds
> 0 ? FLAGS_cache_sim_warmup_seconds
: 0;
2115 uint32_t downsample_ratio
= FLAGS_block_cache_trace_downsample_ratio
> 0
2116 ? FLAGS_block_cache_trace_downsample_ratio
2118 std::vector
<CacheConfiguration
> cache_configs
=
2119 parse_cache_config_file(FLAGS_block_cache_sim_config_path
);
2120 std::unique_ptr
<BlockCacheTraceSimulator
> cache_simulator
;
2121 if (!cache_configs
.empty()) {
2122 cache_simulator
.reset(new BlockCacheTraceSimulator(
2123 warmup_seconds
, downsample_ratio
, cache_configs
));
2124 Status s
= cache_simulator
->InitializeCaches();
2126 fprintf(stderr
, "Cannot initialize cache simulators %s\n",
2127 s
.ToString().c_str());
2131 BlockCacheTraceAnalyzer
analyzer(
2132 FLAGS_block_cache_trace_path
, FLAGS_block_cache_analysis_result_dir
,
2133 FLAGS_human_readable_trace_file_path
,
2134 !FLAGS_reuse_distance_labels
.empty(), FLAGS_mrc_only
,
2135 FLAGS_is_block_cache_human_readable_trace
, std::move(cache_simulator
));
2136 Status s
= analyzer
.Analyze();
2137 if (!s
.IsIncomplete() && !s
.ok()) {
2139 fprintf(stderr
, "Cannot process the trace %s\n", s
.ToString().c_str());
2142 fprintf(stdout
, "Status: %s\n", s
.ToString().c_str());
2143 analyzer
.WriteMissRatioCurves();
2144 analyzer
.WriteMissRatioTimeline(1);
2145 analyzer
.WriteMissRatioTimeline(kSecondInMinute
);
2146 analyzer
.WriteMissRatioTimeline(kSecondInHour
);
2147 analyzer
.WriteMissTimeline(1);
2148 analyzer
.WriteMissTimeline(kSecondInMinute
);
2149 analyzer
.WriteMissTimeline(kSecondInHour
);
2151 if (FLAGS_mrc_only
) {
2153 "Skipping the analysis statistics since the user wants to compute "
2158 analyzer
.PrintStatsSummary();
2159 if (FLAGS_print_access_count_stats
) {
2160 print_break_lines(/*num_break_lines=*/3);
2161 analyzer
.PrintAccessCountStats(
2162 /*user_access_only=*/false, FLAGS_analyze_bottom_k_access_count_blocks
,
2163 FLAGS_analyze_top_k_access_count_blocks
);
2164 print_break_lines(/*num_break_lines=*/3);
2165 analyzer
.PrintAccessCountStats(
2166 /*user_access_only=*/true, FLAGS_analyze_bottom_k_access_count_blocks
,
2167 FLAGS_analyze_top_k_access_count_blocks
);
2169 if (FLAGS_print_block_size_stats
) {
2170 print_break_lines(/*num_break_lines=*/3);
2171 analyzer
.PrintBlockSizeStats();
2173 if (FLAGS_print_data_block_access_count_stats
) {
2174 print_break_lines(/*num_break_lines=*/3);
2175 analyzer
.PrintDataBlockAccessStats();
2177 print_break_lines(/*num_break_lines=*/3);
2179 if (!FLAGS_timeline_labels
.empty()) {
2180 std::stringstream
ss(FLAGS_timeline_labels
);
2183 getline(ss
, label
, ',');
2184 if (label
.find("block") != std::string::npos
) {
2185 analyzer
.WriteAccessTimeline(label
, kSecondInMinute
, true);
2186 analyzer
.WriteAccessTimeline(label
, kSecondInMinute
, false);
2187 analyzer
.WriteAccessTimeline(label
, kSecondInHour
, true);
2188 analyzer
.WriteAccessTimeline(label
, kSecondInHour
, false);
2190 analyzer
.WriteAccessTimeline(label
, kSecondInMinute
, false);
2191 analyzer
.WriteAccessTimeline(label
, kSecondInHour
, false);
2196 if (!FLAGS_analyze_callers
.empty()) {
2197 analyzer
.WritePercentAccessSummaryStats();
2198 std::stringstream
ss(FLAGS_analyze_callers
);
2201 getline(ss
, caller
, ',');
2202 analyzer
.WriteDetailedPercentAccessSummaryStats(string_to_caller(caller
));
2206 if (!FLAGS_access_count_buckets
.empty()) {
2207 std::vector
<uint64_t> buckets
= parse_buckets(FLAGS_access_count_buckets
);
2208 analyzer
.WriteAccessCountSummaryStats(buckets
, /*user_access_only=*/true);
2209 analyzer
.WriteAccessCountSummaryStats(buckets
, /*user_access_only=*/false);
2212 if (!FLAGS_reuse_distance_labels
.empty() &&
2213 !FLAGS_reuse_distance_buckets
.empty()) {
2214 std::vector
<uint64_t> buckets
= parse_buckets(FLAGS_reuse_distance_buckets
);
2215 std::stringstream
ss(FLAGS_reuse_distance_labels
);
2218 getline(ss
, label
, ',');
2219 analyzer
.WriteReuseDistance(label
, buckets
);
2223 if (!FLAGS_reuse_interval_labels
.empty() &&
2224 !FLAGS_reuse_interval_buckets
.empty()) {
2225 std::vector
<uint64_t> buckets
= parse_buckets(FLAGS_reuse_interval_buckets
);
2226 std::stringstream
ss(FLAGS_reuse_interval_labels
);
2229 getline(ss
, label
, ',');
2230 analyzer
.WriteReuseInterval(label
, buckets
);
2234 if (!FLAGS_reuse_lifetime_labels
.empty() &&
2235 !FLAGS_reuse_lifetime_buckets
.empty()) {
2236 std::vector
<uint64_t> buckets
= parse_buckets(FLAGS_reuse_lifetime_buckets
);
2237 std::stringstream
ss(FLAGS_reuse_lifetime_labels
);
2240 getline(ss
, label
, ',');
2241 analyzer
.WriteReuseLifetime(label
, buckets
);
2245 if (FLAGS_analyze_blocks_reuse_k_reuse_window
!= 0) {
2246 std::vector
<TraceType
> block_types
{TraceType::kBlockTraceIndexBlock
,
2247 TraceType::kBlockTraceDataBlock
,
2248 TraceType::kBlockTraceFilterBlock
};
2249 for (auto block_type
: block_types
) {
2250 analyzer
.WriteBlockReuseTimeline(
2251 FLAGS_analyze_blocks_reuse_k_reuse_window
,
2252 /*user_access_only=*/true, block_type
);
2253 analyzer
.WriteBlockReuseTimeline(
2254 FLAGS_analyze_blocks_reuse_k_reuse_window
,
2255 /*user_access_only=*/false, block_type
);
2259 if (!FLAGS_analyze_get_spatial_locality_labels
.empty() &&
2260 !FLAGS_analyze_get_spatial_locality_buckets
.empty()) {
2261 std::vector
<uint64_t> buckets
=
2262 parse_buckets(FLAGS_analyze_get_spatial_locality_buckets
);
2263 std::stringstream
ss(FLAGS_analyze_get_spatial_locality_labels
);
2266 getline(ss
, label
, ',');
2267 analyzer
.WriteGetSpatialLocality(label
, buckets
);
2271 if (!FLAGS_analyze_correlation_coefficients_labels
.empty()) {
2272 std::stringstream
ss(FLAGS_analyze_correlation_coefficients_labels
);
2275 getline(ss
, label
, ',');
2276 analyzer
.WriteCorrelationFeatures(
2277 label
, FLAGS_analyze_correlation_coefficients_max_number_of_values
);
2279 analyzer
.WriteCorrelationFeaturesForGet(
2280 FLAGS_analyze_correlation_coefficients_max_number_of_values
);
2283 if (!FLAGS_skew_labels
.empty() && !FLAGS_skew_buckets
.empty()) {
2284 std::vector
<uint64_t> buckets
= parse_buckets(FLAGS_skew_buckets
);
2285 std::stringstream
ss(FLAGS_skew_labels
);
2288 getline(ss
, label
, ',');
2289 if (label
.find("block") != std::string::npos
) {
2290 analyzer
.WriteSkewness(label
, buckets
,
2291 TraceType::kBlockTraceIndexBlock
);
2292 analyzer
.WriteSkewness(label
, buckets
,
2293 TraceType::kBlockTraceFilterBlock
);
2294 analyzer
.WriteSkewness(label
, buckets
, TraceType::kBlockTraceDataBlock
);
2295 analyzer
.WriteSkewness(label
, buckets
, TraceType::kTraceMax
);
2297 analyzer
.WriteSkewness(label
, buckets
, TraceType::kTraceMax
);
2304 } // namespace ROCKSDB_NAMESPACE
2307 #endif // ROCKSDB_LITE