]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/tools/advisor/advisor/db_bench_runner.py
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / tools / advisor / advisor / db_bench_runner.py
CommitLineData
11fdf7f2
TL
1# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2# This source code is licensed under both the GPLv2 (found in the
3# COPYING file in the root directory) and Apache 2.0 License
4# (found in the LICENSE.Apache file in the root directory).
5
11fdf7f2
TL
6import shutil
7import subprocess
8import time
9
1e59de90
TL
10from advisor.bench_runner import BenchmarkRunner
11from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY
12from advisor.db_stats_fetcher import (
13 DatabasePerfContext,
14 LogStatsParser,
15 OdsStatsFetcher,
16)
17
11fdf7f2 18
1e59de90 19"""
11fdf7f2 20NOTE: This is not thread-safe, because the output file is simply overwritten.
1e59de90 21"""
11fdf7f2
TL
22
23
24class DBBenchRunner(BenchmarkRunner):
25 OUTPUT_FILE = "temp/dbbench_out.tmp"
26 ERROR_FILE = "temp/dbbench_err.tmp"
27 DB_PATH = "DB path"
28 THROUGHPUT = "ops/sec"
29 PERF_CON = " PERF_CONTEXT:"
30
31 @staticmethod
32 def is_metric_better(new_metric, old_metric):
33 # for db_bench 'throughput' is the metric returned by run_experiment
34 return new_metric >= old_metric
35
36 @staticmethod
37 def get_opt_args_str(misc_options_dict):
38 # given a dictionary of options and their values, return a string
39 # that can be appended as command-line arguments
40 optional_args_str = ""
41 for option_name, option_value in misc_options_dict.items():
42 if option_value:
1e59de90 43 optional_args_str += " --" + option_name + "=" + str(option_value)
11fdf7f2
TL
44 return optional_args_str
45
46 def __init__(self, positional_args, ods_args=None):
47 # parse positional_args list appropriately
48 self.db_bench_binary = positional_args[0]
49 self.benchmark = positional_args[1]
50 self.db_bench_args = None
51 if len(positional_args) > 2:
52 # options list with each option given as "<option>=<value>"
53 self.db_bench_args = positional_args[2:]
54 # save ods_args, if provided
55 self.ods_args = ods_args
56
57 def _parse_output(self, get_perf_context=False):
1e59de90 58 """
11fdf7f2
TL
59 Sample db_bench output after running 'readwhilewriting' benchmark:
60 DB path: [/tmp/rocksdbtest-155919/dbbench]\n
61 readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
62 of 5427999 found)\n
63 PERF_CONTEXT:\n
64 user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
1e59de90
TL
65 """
66 output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None}
11fdf7f2 67 perf_context_begins = False
1e59de90 68 with open(self.OUTPUT_FILE, "r") as fp:
11fdf7f2
TL
69 for line in fp:
70 if line.startswith(self.benchmark):
71 # line from sample output:
72 # readwhilewriting : 16.582 micros/op 60305 ops/sec; \
73 # 4.2 MB/s (3433828 of 5427999 found)\n
74 print(line) # print output of the benchmark run
75 token_list = line.strip().split()
76 for ix, token in enumerate(token_list):
77 if token.startswith(self.THROUGHPUT):
78 # in above example, throughput = 60305 ops/sec
1e59de90 79 output[self.THROUGHPUT] = float(token_list[ix - 1])
11fdf7f2
TL
80 break
81 elif get_perf_context and line.startswith(self.PERF_CON):
82 # the following lines in the output contain perf context
83 # statistics (refer example above)
84 perf_context_begins = True
85 elif get_perf_context and perf_context_begins:
86 # Sample perf_context output:
87 # user_key_comparison_count = 500, block_cache_hit_count =\
88 # 468, block_read_count = 580, block_read_byte = 445, ...
1e59de90 89 token_list = line.strip().split(",")
11fdf7f2
TL
90 # token_list = ['user_key_comparison_count = 500',
91 # 'block_cache_hit_count = 468','block_read_count = 580'...
92 perf_context = {
1e59de90 93 tk.split("=")[0].strip(): tk.split("=")[1].strip()
11fdf7f2
TL
94 for tk in token_list
95 if tk
96 }
97 # TODO(poojam23): this is a hack and should be replaced
98 # with the timestamp that db_bench will provide per printed
99 # perf_context
100 timestamp = int(time.time())
101 perf_context_ts = {}
102 for stat in perf_context.keys():
1e59de90 103 perf_context_ts[stat] = {timestamp: int(perf_context[stat])}
11fdf7f2
TL
104 output[self.PERF_CON] = perf_context_ts
105 perf_context_begins = False
106 elif line.startswith(self.DB_PATH):
107 # line from sample output:
108 # DB path: [/tmp/rocksdbtest-155919/dbbench]\n
1e59de90 109 output[self.DB_PATH] = line.split("[")[1].split("]")[0]
11fdf7f2
TL
110 return output
111
112 def get_log_options(self, db_options, db_path):
113 # get the location of the LOG file and the frequency at which stats are
114 # dumped in the LOG file
115 log_dir_path = None
116 stats_freq_sec = None
117 logs_file_prefix = None
118
119 # fetch frequency at which the stats are dumped in the Rocksdb logs
1e59de90 120 dump_period = "DBOptions.stats_dump_period_sec"
11fdf7f2
TL
121 # fetch the directory, if specified, in which the Rocksdb logs are
122 # dumped, by default logs are dumped in same location as database
1e59de90 123 log_dir = "DBOptions.db_log_dir"
11fdf7f2
TL
124 log_options = db_options.get_options([dump_period, log_dir])
125 if dump_period in log_options:
126 stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
127 if log_dir in log_options:
128 log_dir_path = log_options[log_dir][NO_COL_FAMILY]
129
1e59de90 130 log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path)
11fdf7f2
TL
131
132 if not log_dir_path:
133 log_dir_path = db_path
1e59de90
TL
134 if not log_dir_path.endswith("/"):
135 log_dir_path += "/"
11fdf7f2
TL
136
137 logs_file_prefix = log_dir_path + log_file_name
138 return (logs_file_prefix, stats_freq_sec)
139
140 def _get_options_command_line_args_str(self, curr_options):
1e59de90 141 """
11fdf7f2
TL
142 This method uses the provided Rocksdb OPTIONS to create a string of
143 command-line arguments for db_bench.
144 The --options_file argument is always given and the options that are
145 not supported by the OPTIONS file are given as separate arguments.
1e59de90 146 """
11fdf7f2
TL
147 optional_args_str = DBBenchRunner.get_opt_args_str(
148 curr_options.get_misc_options()
149 )
150 # generate an options configuration file
1e59de90 151 options_file = curr_options.generate_options_config(nonce="12345")
11fdf7f2
TL
152 optional_args_str += " --options_file=" + options_file
153 return optional_args_str
154
155 def _setup_db_before_experiment(self, curr_options, db_path):
156 # remove destination directory if it already exists
157 try:
158 shutil.rmtree(db_path, ignore_errors=True)
159 except OSError as e:
1e59de90 160 print("Error: rmdir " + e.filename + " " + e.strerror)
11fdf7f2
TL
161 # setup database with a million keys using the fillrandom benchmark
162 command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
1e59de90
TL
163 self.db_bench_binary,
164 db_path,
11fdf7f2
TL
165 )
166 args_str = self._get_options_command_line_args_str(curr_options)
167 command += args_str
168 self._run_command(command)
169
170 def _build_experiment_command(self, curr_options, db_path):
171 command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
1e59de90
TL
172 self.db_bench_binary,
173 self.benchmark,
174 db_path,
11fdf7f2
TL
175 )
176 # fetch the command-line arguments string for providing Rocksdb options
177 args_str = self._get_options_command_line_args_str(curr_options)
178 # handle the command-line args passed in the constructor, these
179 # arguments are specific to db_bench
180 for cmd_line_arg in self.db_bench_args:
1e59de90 181 args_str += " --" + cmd_line_arg
11fdf7f2
TL
182 command += args_str
183 return command
184
185 def _run_command(self, command):
186 out_file = open(self.OUTPUT_FILE, "w+")
187 err_file = open(self.ERROR_FILE, "w+")
1e59de90 188 print("executing... - " + command)
11fdf7f2
TL
189 subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
190 out_file.close()
191 err_file.close()
192
193 def run_experiment(self, db_options, db_path):
194 # setup the Rocksdb database before running experiment
195 self._setup_db_before_experiment(db_options, db_path)
196 # get the command to run the experiment
197 command = self._build_experiment_command(db_options, db_path)
198 experiment_start_time = int(time.time())
199 # run experiment
200 self._run_command(command)
201 experiment_end_time = int(time.time())
202 # parse the db_bench experiment output
203 parsed_output = self._parse_output(get_perf_context=True)
204
205 # get the log files path prefix and frequency at which Rocksdb stats
206 # are dumped in the logs
207 logs_file_prefix, stats_freq_sec = self.get_log_options(
208 db_options, parsed_output[self.DB_PATH]
209 )
210 # create the Rocksbd LOGS object
1e59de90 211 db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families())
11fdf7f2
TL
212 # Create the Log STATS object
213 db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
214 # Create the PerfContext STATS object
1e59de90 215 db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False)
11fdf7f2
TL
216 # create the data-sources dictionary
217 data_sources = {
218 DataSource.Type.DB_OPTIONS: [db_options],
219 DataSource.Type.LOG: [db_logs],
1e59de90 220 DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context],
11fdf7f2
TL
221 }
222 # Create the ODS STATS object
223 if self.ods_args:
1e59de90
TL
224 key_prefix = ""
225 if "key_prefix" in self.ods_args:
226 key_prefix = self.ods_args["key_prefix"]
227 data_sources[DataSource.Type.TIME_SERIES].append(
228 OdsStatsFetcher(
229 self.ods_args["client_script"],
230 self.ods_args["entity"],
231 experiment_start_time,
232 experiment_end_time,
233 key_prefix,
234 )
235 )
11fdf7f2
TL
236 # return the experiment's data-sources and throughput
237 return data_sources, parsed_output[self.THROUGHPUT]