ceph/src/rocksdb/tools/advisor/advisor/db_bench_runner.py

   1 # Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
   2 #  This source code is licensed under both the GPLv2 (found in the
   3 #  COPYING file in the root directory) and Apache 2.0 License
   4 #  (found in the LICENSE.Apache file in the root directory).
   5
   6 import shutil
   7 import subprocess
   8 import time
   9
  10 from advisor.bench_runner import BenchmarkRunner
  11 from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY
  12 from advisor.db_stats_fetcher import (
  13     DatabasePerfContext,
  14     LogStatsParser,
  15     OdsStatsFetcher,
  16 )
  17
  18
  19 """
  20 NOTE: This is not thread-safe, because the output file is simply overwritten.
  21 """
  22
  23
  24 class DBBenchRunner(BenchmarkRunner):
  25     OUTPUT_FILE = "temp/dbbench_out.tmp"
  26     ERROR_FILE = "temp/dbbench_err.tmp"
  27     DB_PATH = "DB path"
  28     THROUGHPUT = "ops/sec"
  29     PERF_CON = " PERF_CONTEXT:"
  30
  31     @staticmethod
  32     def is_metric_better(new_metric, old_metric):
  33         # for db_bench 'throughput' is the metric returned by run_experiment
  34         return new_metric >= old_metric
  35
  36     @staticmethod
  37     def get_opt_args_str(misc_options_dict):
  38         # given a dictionary of options and their values, return a string
  39         # that can be appended as command-line arguments
  40         optional_args_str = ""
  41         for option_name, option_value in misc_options_dict.items():
  42             if option_value:
  43                 optional_args_str += " --" + option_name + "=" + str(option_value)
  44         return optional_args_str
  45
  46     def __init__(self, positional_args, ods_args=None):
  47         # parse positional_args list appropriately
  48         self.db_bench_binary = positional_args[0]
  49         self.benchmark = positional_args[1]
  50         self.db_bench_args = None
  51         if len(positional_args) > 2:
  52             # options list with each option given as "<option>=<value>"
  53             self.db_bench_args = positional_args[2:]
  54         # save ods_args, if provided
  55         self.ods_args = ods_args
  56
  57     def _parse_output(self, get_perf_context=False):
  58         """
  59         Sample db_bench output after running 'readwhilewriting' benchmark:
  60         DB path: [/tmp/rocksdbtest-155919/dbbench]\n
  61         readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
  62         of 5427999 found)\n
  63         PERF_CONTEXT:\n
  64         user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
  65         """
  66         output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None}
  67         perf_context_begins = False
  68         with open(self.OUTPUT_FILE, "r") as fp:
  69             for line in fp:
  70                 if line.startswith(self.benchmark):
  71                     # line from sample output:
  72                     # readwhilewriting : 16.582 micros/op 60305 ops/sec; \
  73                     # 4.2 MB/s (3433828 of 5427999 found)\n
  74                     print(line)  # print output of the benchmark run
  75                     token_list = line.strip().split()
  76                     for ix, token in enumerate(token_list):
  77                         if token.startswith(self.THROUGHPUT):
  78                             # in above example, throughput = 60305 ops/sec
  79                             output[self.THROUGHPUT] = float(token_list[ix - 1])
  80                             break
  81                 elif get_perf_context and line.startswith(self.PERF_CON):
  82                     # the following lines in the output contain perf context
  83                     # statistics (refer example above)
  84                     perf_context_begins = True
  85                 elif get_perf_context and perf_context_begins:
  86                     # Sample perf_context output:
  87                     # user_key_comparison_count = 500, block_cache_hit_count =\
  88                     # 468, block_read_count = 580, block_read_byte = 445, ...
  89                     token_list = line.strip().split(",")
  90                     # token_list = ['user_key_comparison_count = 500',
  91                     # 'block_cache_hit_count = 468','block_read_count = 580'...
  92                     perf_context = {
  93                         tk.split("=")[0].strip(): tk.split("=")[1].strip()
  94                         for tk in token_list
  95                         if tk
  96                     }
  97                     # TODO(poojam23): this is a hack and should be replaced
  98                     # with the timestamp that db_bench will provide per printed
  99                     # perf_context
 100                     timestamp = int(time.time())
 101                     perf_context_ts = {}
 102                     for stat in perf_context.keys():
 103                         perf_context_ts[stat] = {timestamp: int(perf_context[stat])}
 104                     output[self.PERF_CON] = perf_context_ts
 105                     perf_context_begins = False
 106                 elif line.startswith(self.DB_PATH):
 107                     # line from sample output:
 108                     # DB path: [/tmp/rocksdbtest-155919/dbbench]\n
 109                     output[self.DB_PATH] = line.split("[")[1].split("]")[0]
 110         return output
 111
 112     def get_log_options(self, db_options, db_path):
 113         # get the location of the LOG file and the frequency at which stats are
 114         # dumped in the LOG file
 115         log_dir_path = None
 116         stats_freq_sec = None
 117         logs_file_prefix = None
 118
 119         # fetch frequency at which the stats are dumped in the Rocksdb logs
 120         dump_period = "DBOptions.stats_dump_period_sec"
 121         # fetch the directory, if specified, in which the Rocksdb logs are
 122         # dumped, by default logs are dumped in same location as database
 123         log_dir = "DBOptions.db_log_dir"
 124         log_options = db_options.get_options([dump_period, log_dir])
 125         if dump_period in log_options:
 126             stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
 127         if log_dir in log_options:
 128             log_dir_path = log_options[log_dir][NO_COL_FAMILY]
 129
 130         log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path)
 131
 132         if not log_dir_path:
 133             log_dir_path = db_path
 134         if not log_dir_path.endswith("/"):
 135             log_dir_path += "/"
 136
 137         logs_file_prefix = log_dir_path + log_file_name
 138         return (logs_file_prefix, stats_freq_sec)
 139
 140     def _get_options_command_line_args_str(self, curr_options):
 141         """
 142         This method uses the provided Rocksdb OPTIONS to create a string of
 143         command-line arguments for db_bench.
 144         The --options_file argument is always given and the options that are
 145         not supported by the OPTIONS file are given as separate arguments.
 146         """
 147         optional_args_str = DBBenchRunner.get_opt_args_str(
 148             curr_options.get_misc_options()
 149         )
 150         # generate an options configuration file
 151         options_file = curr_options.generate_options_config(nonce="12345")
 152         optional_args_str += " --options_file=" + options_file
 153         return optional_args_str
 154
 155     def _setup_db_before_experiment(self, curr_options, db_path):
 156         # remove destination directory if it already exists
 157         try:
 158             shutil.rmtree(db_path, ignore_errors=True)
 159         except OSError as e:
 160             print("Error: rmdir " + e.filename + " " + e.strerror)
 161         # setup database with a million keys using the fillrandom benchmark
 162         command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
 163             self.db_bench_binary,
 164             db_path,
 165         )
 166         args_str = self._get_options_command_line_args_str(curr_options)
 167         command += args_str
 168         self._run_command(command)
 169
 170     def _build_experiment_command(self, curr_options, db_path):
 171         command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
 172             self.db_bench_binary,
 173             self.benchmark,
 174             db_path,
 175         )
 176         # fetch the command-line arguments string for providing Rocksdb options
 177         args_str = self._get_options_command_line_args_str(curr_options)
 178         # handle the command-line args passed in the constructor, these
 179         # arguments are specific to db_bench
 180         for cmd_line_arg in self.db_bench_args:
 181             args_str += " --" + cmd_line_arg
 182         command += args_str
 183         return command
 184
 185     def _run_command(self, command):
 186         out_file = open(self.OUTPUT_FILE, "w+")
 187         err_file = open(self.ERROR_FILE, "w+")
 188         print("executing... - " + command)
 189         subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
 190         out_file.close()
 191         err_file.close()
 192
 193     def run_experiment(self, db_options, db_path):
 194         # setup the Rocksdb database before running experiment
 195         self._setup_db_before_experiment(db_options, db_path)
 196         # get the command to run the experiment
 197         command = self._build_experiment_command(db_options, db_path)
 198         experiment_start_time = int(time.time())
 199         # run experiment
 200         self._run_command(command)
 201         experiment_end_time = int(time.time())
 202         # parse the db_bench experiment output
 203         parsed_output = self._parse_output(get_perf_context=True)
 204
 205         # get the log files path prefix and frequency at which Rocksdb stats
 206         # are dumped in the logs
 207         logs_file_prefix, stats_freq_sec = self.get_log_options(
 208             db_options, parsed_output[self.DB_PATH]
 209         )
 210         # create the Rocksbd LOGS object
 211         db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families())
 212         # Create the Log STATS object
 213         db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
 214         # Create the PerfContext STATS object
 215         db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False)
 216         # create the data-sources dictionary
 217         data_sources = {
 218             DataSource.Type.DB_OPTIONS: [db_options],
 219             DataSource.Type.LOG: [db_logs],
 220             DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context],
 221         }
 222         # Create the ODS STATS object
 223         if self.ods_args:
 224             key_prefix = ""
 225             if "key_prefix" in self.ods_args:
 226                 key_prefix = self.ods_args["key_prefix"]
 227             data_sources[DataSource.Type.TIME_SERIES].append(
 228                 OdsStatsFetcher(
 229                     self.ods_args["client_script"],
 230                     self.ods_args["entity"],
 231                     experiment_start_time,
 232                     experiment_end_time,
 233                     key_prefix,
 234                 )
 235             )
 236         # return the experiment's data-sources and throughput
 237         return data_sources, parsed_output[self.THROUGHPUT]