]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | # Copyright (c) 2011-present, Facebook, Inc. All rights reserved. |
2 | # This source code is licensed under both the GPLv2 (found in the | |
3 | # COPYING file in the root directory) and Apache 2.0 License | |
4 | # (found in the LICENSE.Apache file in the root directory). | |
5 | ||
11fdf7f2 TL |
6 | import shutil |
7 | import subprocess | |
8 | import time | |
9 | ||
1e59de90 TL |
10 | from advisor.bench_runner import BenchmarkRunner |
11 | from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY | |
12 | from advisor.db_stats_fetcher import ( | |
13 | DatabasePerfContext, | |
14 | LogStatsParser, | |
15 | OdsStatsFetcher, | |
16 | ) | |
17 | ||
11fdf7f2 | 18 | |
1e59de90 | 19 | """ |
11fdf7f2 | 20 | NOTE: This is not thread-safe, because the output file is simply overwritten. |
1e59de90 | 21 | """ |
11fdf7f2 TL |
22 | |
23 | ||
24 | class DBBenchRunner(BenchmarkRunner): | |
25 | OUTPUT_FILE = "temp/dbbench_out.tmp" | |
26 | ERROR_FILE = "temp/dbbench_err.tmp" | |
27 | DB_PATH = "DB path" | |
28 | THROUGHPUT = "ops/sec" | |
29 | PERF_CON = " PERF_CONTEXT:" | |
30 | ||
31 | @staticmethod | |
32 | def is_metric_better(new_metric, old_metric): | |
33 | # for db_bench 'throughput' is the metric returned by run_experiment | |
34 | return new_metric >= old_metric | |
35 | ||
36 | @staticmethod | |
37 | def get_opt_args_str(misc_options_dict): | |
38 | # given a dictionary of options and their values, return a string | |
39 | # that can be appended as command-line arguments | |
40 | optional_args_str = "" | |
41 | for option_name, option_value in misc_options_dict.items(): | |
42 | if option_value: | |
1e59de90 | 43 | optional_args_str += " --" + option_name + "=" + str(option_value) |
11fdf7f2 TL |
44 | return optional_args_str |
45 | ||
46 | def __init__(self, positional_args, ods_args=None): | |
47 | # parse positional_args list appropriately | |
48 | self.db_bench_binary = positional_args[0] | |
49 | self.benchmark = positional_args[1] | |
50 | self.db_bench_args = None | |
51 | if len(positional_args) > 2: | |
52 | # options list with each option given as "<option>=<value>" | |
53 | self.db_bench_args = positional_args[2:] | |
54 | # save ods_args, if provided | |
55 | self.ods_args = ods_args | |
56 | ||
57 | def _parse_output(self, get_perf_context=False): | |
1e59de90 | 58 | """ |
11fdf7f2 TL |
59 | Sample db_bench output after running 'readwhilewriting' benchmark: |
60 | DB path: [/tmp/rocksdbtest-155919/dbbench]\n | |
61 | readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\ | |
62 | of 5427999 found)\n | |
63 | PERF_CONTEXT:\n | |
64 | user_key_comparison_count = 500466712, block_cache_hit_count = ...\n | |
1e59de90 TL |
65 | """ |
66 | output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None} | |
11fdf7f2 | 67 | perf_context_begins = False |
1e59de90 | 68 | with open(self.OUTPUT_FILE, "r") as fp: |
11fdf7f2 TL |
69 | for line in fp: |
70 | if line.startswith(self.benchmark): | |
71 | # line from sample output: | |
72 | # readwhilewriting : 16.582 micros/op 60305 ops/sec; \ | |
73 | # 4.2 MB/s (3433828 of 5427999 found)\n | |
74 | print(line) # print output of the benchmark run | |
75 | token_list = line.strip().split() | |
76 | for ix, token in enumerate(token_list): | |
77 | if token.startswith(self.THROUGHPUT): | |
78 | # in above example, throughput = 60305 ops/sec | |
1e59de90 | 79 | output[self.THROUGHPUT] = float(token_list[ix - 1]) |
11fdf7f2 TL |
80 | break |
81 | elif get_perf_context and line.startswith(self.PERF_CON): | |
82 | # the following lines in the output contain perf context | |
83 | # statistics (refer example above) | |
84 | perf_context_begins = True | |
85 | elif get_perf_context and perf_context_begins: | |
86 | # Sample perf_context output: | |
87 | # user_key_comparison_count = 500, block_cache_hit_count =\ | |
88 | # 468, block_read_count = 580, block_read_byte = 445, ... | |
1e59de90 | 89 | token_list = line.strip().split(",") |
11fdf7f2 TL |
90 | # token_list = ['user_key_comparison_count = 500', |
91 | # 'block_cache_hit_count = 468','block_read_count = 580'... | |
92 | perf_context = { | |
1e59de90 | 93 | tk.split("=")[0].strip(): tk.split("=")[1].strip() |
11fdf7f2 TL |
94 | for tk in token_list |
95 | if tk | |
96 | } | |
97 | # TODO(poojam23): this is a hack and should be replaced | |
98 | # with the timestamp that db_bench will provide per printed | |
99 | # perf_context | |
100 | timestamp = int(time.time()) | |
101 | perf_context_ts = {} | |
102 | for stat in perf_context.keys(): | |
1e59de90 | 103 | perf_context_ts[stat] = {timestamp: int(perf_context[stat])} |
11fdf7f2 TL |
104 | output[self.PERF_CON] = perf_context_ts |
105 | perf_context_begins = False | |
106 | elif line.startswith(self.DB_PATH): | |
107 | # line from sample output: | |
108 | # DB path: [/tmp/rocksdbtest-155919/dbbench]\n | |
1e59de90 | 109 | output[self.DB_PATH] = line.split("[")[1].split("]")[0] |
11fdf7f2 TL |
110 | return output |
111 | ||
112 | def get_log_options(self, db_options, db_path): | |
113 | # get the location of the LOG file and the frequency at which stats are | |
114 | # dumped in the LOG file | |
115 | log_dir_path = None | |
116 | stats_freq_sec = None | |
117 | logs_file_prefix = None | |
118 | ||
119 | # fetch frequency at which the stats are dumped in the Rocksdb logs | |
1e59de90 | 120 | dump_period = "DBOptions.stats_dump_period_sec" |
11fdf7f2 TL |
121 | # fetch the directory, if specified, in which the Rocksdb logs are |
122 | # dumped, by default logs are dumped in same location as database | |
1e59de90 | 123 | log_dir = "DBOptions.db_log_dir" |
11fdf7f2 TL |
124 | log_options = db_options.get_options([dump_period, log_dir]) |
125 | if dump_period in log_options: | |
126 | stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY]) | |
127 | if log_dir in log_options: | |
128 | log_dir_path = log_options[log_dir][NO_COL_FAMILY] | |
129 | ||
1e59de90 | 130 | log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path) |
11fdf7f2 TL |
131 | |
132 | if not log_dir_path: | |
133 | log_dir_path = db_path | |
1e59de90 TL |
134 | if not log_dir_path.endswith("/"): |
135 | log_dir_path += "/" | |
11fdf7f2 TL |
136 | |
137 | logs_file_prefix = log_dir_path + log_file_name | |
138 | return (logs_file_prefix, stats_freq_sec) | |
139 | ||
140 | def _get_options_command_line_args_str(self, curr_options): | |
1e59de90 | 141 | """ |
11fdf7f2 TL |
142 | This method uses the provided Rocksdb OPTIONS to create a string of |
143 | command-line arguments for db_bench. | |
144 | The --options_file argument is always given and the options that are | |
145 | not supported by the OPTIONS file are given as separate arguments. | |
1e59de90 | 146 | """ |
11fdf7f2 TL |
147 | optional_args_str = DBBenchRunner.get_opt_args_str( |
148 | curr_options.get_misc_options() | |
149 | ) | |
150 | # generate an options configuration file | |
1e59de90 | 151 | options_file = curr_options.generate_options_config(nonce="12345") |
11fdf7f2 TL |
152 | optional_args_str += " --options_file=" + options_file |
153 | return optional_args_str | |
154 | ||
155 | def _setup_db_before_experiment(self, curr_options, db_path): | |
156 | # remove destination directory if it already exists | |
157 | try: | |
158 | shutil.rmtree(db_path, ignore_errors=True) | |
159 | except OSError as e: | |
1e59de90 | 160 | print("Error: rmdir " + e.filename + " " + e.strerror) |
11fdf7f2 TL |
161 | # setup database with a million keys using the fillrandom benchmark |
162 | command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % ( | |
1e59de90 TL |
163 | self.db_bench_binary, |
164 | db_path, | |
11fdf7f2 TL |
165 | ) |
166 | args_str = self._get_options_command_line_args_str(curr_options) | |
167 | command += args_str | |
168 | self._run_command(command) | |
169 | ||
170 | def _build_experiment_command(self, curr_options, db_path): | |
171 | command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % ( | |
1e59de90 TL |
172 | self.db_bench_binary, |
173 | self.benchmark, | |
174 | db_path, | |
11fdf7f2 TL |
175 | ) |
176 | # fetch the command-line arguments string for providing Rocksdb options | |
177 | args_str = self._get_options_command_line_args_str(curr_options) | |
178 | # handle the command-line args passed in the constructor, these | |
179 | # arguments are specific to db_bench | |
180 | for cmd_line_arg in self.db_bench_args: | |
1e59de90 | 181 | args_str += " --" + cmd_line_arg |
11fdf7f2 TL |
182 | command += args_str |
183 | return command | |
184 | ||
185 | def _run_command(self, command): | |
186 | out_file = open(self.OUTPUT_FILE, "w+") | |
187 | err_file = open(self.ERROR_FILE, "w+") | |
1e59de90 | 188 | print("executing... - " + command) |
11fdf7f2 TL |
189 | subprocess.call(command, shell=True, stdout=out_file, stderr=err_file) |
190 | out_file.close() | |
191 | err_file.close() | |
192 | ||
193 | def run_experiment(self, db_options, db_path): | |
194 | # setup the Rocksdb database before running experiment | |
195 | self._setup_db_before_experiment(db_options, db_path) | |
196 | # get the command to run the experiment | |
197 | command = self._build_experiment_command(db_options, db_path) | |
198 | experiment_start_time = int(time.time()) | |
199 | # run experiment | |
200 | self._run_command(command) | |
201 | experiment_end_time = int(time.time()) | |
202 | # parse the db_bench experiment output | |
203 | parsed_output = self._parse_output(get_perf_context=True) | |
204 | ||
205 | # get the log files path prefix and frequency at which Rocksdb stats | |
206 | # are dumped in the logs | |
207 | logs_file_prefix, stats_freq_sec = self.get_log_options( | |
208 | db_options, parsed_output[self.DB_PATH] | |
209 | ) | |
210 | # create the Rocksbd LOGS object | |
1e59de90 | 211 | db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families()) |
11fdf7f2 TL |
212 | # Create the Log STATS object |
213 | db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec) | |
214 | # Create the PerfContext STATS object | |
1e59de90 | 215 | db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False) |
11fdf7f2 TL |
216 | # create the data-sources dictionary |
217 | data_sources = { | |
218 | DataSource.Type.DB_OPTIONS: [db_options], | |
219 | DataSource.Type.LOG: [db_logs], | |
1e59de90 | 220 | DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context], |
11fdf7f2 TL |
221 | } |
222 | # Create the ODS STATS object | |
223 | if self.ods_args: | |
1e59de90 TL |
224 | key_prefix = "" |
225 | if "key_prefix" in self.ods_args: | |
226 | key_prefix = self.ods_args["key_prefix"] | |
227 | data_sources[DataSource.Type.TIME_SERIES].append( | |
228 | OdsStatsFetcher( | |
229 | self.ods_args["client_script"], | |
230 | self.ods_args["entity"], | |
231 | experiment_start_time, | |
232 | experiment_end_time, | |
233 | key_prefix, | |
234 | ) | |
235 | ) | |
11fdf7f2 TL |
236 | # return the experiment's data-sources and throughput |
237 | return data_sources, parsed_output[self.THROUGHPUT] |