]>
git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/tools/advisor/advisor/db_bench_runner.py
1 # Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 # This source code is licensed under both the GPLv2 (found in the
3 # COPYING file in the root directory) and Apache 2.0 License
4 # (found in the LICENSE.Apache file in the root directory).
10 from advisor
.bench_runner
import BenchmarkRunner
11 from advisor
.db_log_parser
import DatabaseLogs
, DataSource
, NO_COL_FAMILY
12 from advisor
.db_stats_fetcher
import (
20 NOTE: This is not thread-safe, because the output file is simply overwritten.
24 class DBBenchRunner(BenchmarkRunner
):
25 OUTPUT_FILE
= "temp/dbbench_out.tmp"
26 ERROR_FILE
= "temp/dbbench_err.tmp"
28 THROUGHPUT
= "ops/sec"
29 PERF_CON
= " PERF_CONTEXT:"
32 def is_metric_better(new_metric
, old_metric
):
33 # for db_bench 'throughput' is the metric returned by run_experiment
34 return new_metric
>= old_metric
37 def get_opt_args_str(misc_options_dict
):
38 # given a dictionary of options and their values, return a string
39 # that can be appended as command-line arguments
40 optional_args_str
= ""
41 for option_name
, option_value
in misc_options_dict
.items():
43 optional_args_str
+= " --" + option_name
+ "=" + str(option_value
)
44 return optional_args_str
46 def __init__(self
, positional_args
, ods_args
=None):
47 # parse positional_args list appropriately
48 self
.db_bench_binary
= positional_args
[0]
49 self
.benchmark
= positional_args
[1]
50 self
.db_bench_args
= None
51 if len(positional_args
) > 2:
52 # options list with each option given as "<option>=<value>"
53 self
.db_bench_args
= positional_args
[2:]
54 # save ods_args, if provided
55 self
.ods_args
= ods_args
57 def _parse_output(self
, get_perf_context
=False):
59 Sample db_bench output after running 'readwhilewriting' benchmark:
60 DB path: [/tmp/rocksdbtest-155919/dbbench]\n
61 readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
64 user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
66 output
= {self
.THROUGHPUT
: None, self
.DB_PATH
: None, self
.PERF_CON
: None}
67 perf_context_begins
= False
68 with
open(self
.OUTPUT_FILE
, "r") as fp
:
70 if line
.startswith(self
.benchmark
):
71 # line from sample output:
72 # readwhilewriting : 16.582 micros/op 60305 ops/sec; \
73 # 4.2 MB/s (3433828 of 5427999 found)\n
74 print(line
) # print output of the benchmark run
75 token_list
= line
.strip().split()
76 for ix
, token
in enumerate(token_list
):
77 if token
.startswith(self
.THROUGHPUT
):
78 # in above example, throughput = 60305 ops/sec
79 output
[self
.THROUGHPUT
] = float(token_list
[ix
- 1])
81 elif get_perf_context
and line
.startswith(self
.PERF_CON
):
82 # the following lines in the output contain perf context
83 # statistics (refer example above)
84 perf_context_begins
= True
85 elif get_perf_context
and perf_context_begins
:
86 # Sample perf_context output:
87 # user_key_comparison_count = 500, block_cache_hit_count =\
88 # 468, block_read_count = 580, block_read_byte = 445, ...
89 token_list
= line
.strip().split(",")
90 # token_list = ['user_key_comparison_count = 500',
91 # 'block_cache_hit_count = 468','block_read_count = 580'...
93 tk
.split("=")[0].strip(): tk
.split("=")[1].strip()
97 # TODO(poojam23): this is a hack and should be replaced
98 # with the timestamp that db_bench will provide per printed
100 timestamp
= int(time
.time())
102 for stat
in perf_context
.keys():
103 perf_context_ts
[stat
] = {timestamp
: int(perf_context
[stat
])}
104 output
[self
.PERF_CON
] = perf_context_ts
105 perf_context_begins
= False
106 elif line
.startswith(self
.DB_PATH
):
107 # line from sample output:
108 # DB path: [/tmp/rocksdbtest-155919/dbbench]\n
109 output
[self
.DB_PATH
] = line
.split("[")[1].split("]")[0]
112 def get_log_options(self
, db_options
, db_path
):
113 # get the location of the LOG file and the frequency at which stats are
114 # dumped in the LOG file
116 stats_freq_sec
= None
117 logs_file_prefix
= None
119 # fetch frequency at which the stats are dumped in the Rocksdb logs
120 dump_period
= "DBOptions.stats_dump_period_sec"
121 # fetch the directory, if specified, in which the Rocksdb logs are
122 # dumped, by default logs are dumped in same location as database
123 log_dir
= "DBOptions.db_log_dir"
124 log_options
= db_options
.get_options([dump_period
, log_dir
])
125 if dump_period
in log_options
:
126 stats_freq_sec
= int(log_options
[dump_period
][NO_COL_FAMILY
])
127 if log_dir
in log_options
:
128 log_dir_path
= log_options
[log_dir
][NO_COL_FAMILY
]
130 log_file_name
= DBBenchRunner
.get_info_log_file_name(log_dir_path
, db_path
)
133 log_dir_path
= db_path
134 if not log_dir_path
.endswith("/"):
137 logs_file_prefix
= log_dir_path
+ log_file_name
138 return (logs_file_prefix
, stats_freq_sec
)
140 def _get_options_command_line_args_str(self
, curr_options
):
142 This method uses the provided Rocksdb OPTIONS to create a string of
143 command-line arguments for db_bench.
144 The --options_file argument is always given and the options that are
145 not supported by the OPTIONS file are given as separate arguments.
147 optional_args_str
= DBBenchRunner
.get_opt_args_str(
148 curr_options
.get_misc_options()
150 # generate an options configuration file
151 options_file
= curr_options
.generate_options_config(nonce
="12345")
152 optional_args_str
+= " --options_file=" + options_file
153 return optional_args_str
155 def _setup_db_before_experiment(self
, curr_options
, db_path
):
156 # remove destination directory if it already exists
158 shutil
.rmtree(db_path
, ignore_errors
=True)
160 print("Error: rmdir " + e
.filename
+ " " + e
.strerror
)
161 # setup database with a million keys using the fillrandom benchmark
162 command
= "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
163 self
.db_bench_binary
,
166 args_str
= self
._get
_options
_command
_line
_args
_str
(curr_options
)
168 self
._run
_command
(command
)
170 def _build_experiment_command(self
, curr_options
, db_path
):
171 command
= "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
172 self
.db_bench_binary
,
176 # fetch the command-line arguments string for providing Rocksdb options
177 args_str
= self
._get
_options
_command
_line
_args
_str
(curr_options
)
178 # handle the command-line args passed in the constructor, these
179 # arguments are specific to db_bench
180 for cmd_line_arg
in self
.db_bench_args
:
181 args_str
+= " --" + cmd_line_arg
185 def _run_command(self
, command
):
186 out_file
= open(self
.OUTPUT_FILE
, "w+")
187 err_file
= open(self
.ERROR_FILE
, "w+")
188 print("executing... - " + command
)
189 subprocess
.call(command
, shell
=True, stdout
=out_file
, stderr
=err_file
)
193 def run_experiment(self
, db_options
, db_path
):
194 # setup the Rocksdb database before running experiment
195 self
._setup
_db
_before
_experiment
(db_options
, db_path
)
196 # get the command to run the experiment
197 command
= self
._build
_experiment
_command
(db_options
, db_path
)
198 experiment_start_time
= int(time
.time())
200 self
._run
_command
(command
)
201 experiment_end_time
= int(time
.time())
202 # parse the db_bench experiment output
203 parsed_output
= self
._parse
_output
(get_perf_context
=True)
205 # get the log files path prefix and frequency at which Rocksdb stats
206 # are dumped in the logs
207 logs_file_prefix
, stats_freq_sec
= self
.get_log_options(
208 db_options
, parsed_output
[self
.DB_PATH
]
210 # create the Rocksbd LOGS object
211 db_logs
= DatabaseLogs(logs_file_prefix
, db_options
.get_column_families())
212 # Create the Log STATS object
213 db_log_stats
= LogStatsParser(logs_file_prefix
, stats_freq_sec
)
214 # Create the PerfContext STATS object
215 db_perf_context
= DatabasePerfContext(parsed_output
[self
.PERF_CON
], 0, False)
216 # create the data-sources dictionary
218 DataSource
.Type
.DB_OPTIONS
: [db_options
],
219 DataSource
.Type
.LOG
: [db_logs
],
220 DataSource
.Type
.TIME_SERIES
: [db_log_stats
, db_perf_context
],
222 # Create the ODS STATS object
225 if "key_prefix" in self
.ods_args
:
226 key_prefix
= self
.ods_args
["key_prefix"]
227 data_sources
[DataSource
.Type
.TIME_SERIES
].append(
229 self
.ods_args
["client_script"],
230 self
.ods_args
["entity"],
231 experiment_start_time
,
236 # return the experiment's data-sources and throughput
237 return data_sources
, parsed_output
[self
.THROUGHPUT
]