[ceph.git] / ceph / src / rocksdb / tools / advisor / advisor / db_bench_runner.py

# Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
#  This source code is licensed under both the GPLv2 (found in the
#  COPYING file in the root directory) and Apache 2.0 License
#  (found in the LICENSE.Apache file in the root directory).

import shutil
import subprocess
import time

from advisor.bench_runner import BenchmarkRunner
from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY
from advisor.db_stats_fetcher import (
    DatabasePerfContext,
    LogStatsParser,
    OdsStatsFetcher,
)


"""
NOTE: This is not thread-safe, because the output file is simply overwritten.
"""


class DBBenchRunner(BenchmarkRunner):
    OUTPUT_FILE = "temp/dbbench_out.tmp"
    ERROR_FILE = "temp/dbbench_err.tmp"
    DB_PATH = "DB path"
    THROUGHPUT = "ops/sec"
    PERF_CON = " PERF_CONTEXT:"

    @staticmethod
    def is_metric_better(new_metric, old_metric):
        # for db_bench 'throughput' is the metric returned by run_experiment
        return new_metric >= old_metric

    @staticmethod
    def get_opt_args_str(misc_options_dict):
        # given a dictionary of options and their values, return a string
        # that can be appended as command-line arguments
        optional_args_str = ""
        for option_name, option_value in misc_options_dict.items():
            if option_value:
                optional_args_str += " --" + option_name + "=" + str(option_value)
        return optional_args_str

    def __init__(self, positional_args, ods_args=None):
        # parse positional_args list appropriately
        self.db_bench_binary = positional_args[0]
        self.benchmark = positional_args[1]
        self.db_bench_args = None
        if len(positional_args) > 2:
            # options list with each option given as "<option>=<value>"
            self.db_bench_args = positional_args[2:]
        # save ods_args, if provided
        self.ods_args = ods_args

    def _parse_output(self, get_perf_context=False):
        """
        Sample db_bench output after running 'readwhilewriting' benchmark:
        DB path: [/tmp/rocksdbtest-155919/dbbench]\n
        readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
        of 5427999 found)\n
        PERF_CONTEXT:\n
        user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
        """
        output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None}
        perf_context_begins = False
        with open(self.OUTPUT_FILE, "r") as fp:
            for line in fp:
                if line.startswith(self.benchmark):
                    # line from sample output:
                    # readwhilewriting : 16.582 micros/op 60305 ops/sec; \
                    # 4.2 MB/s (3433828 of 5427999 found)\n
                    print(line)  # print output of the benchmark run
                    token_list = line.strip().split()
                    for ix, token in enumerate(token_list):
                        if token.startswith(self.THROUGHPUT):
                            # in above example, throughput = 60305 ops/sec
                            output[self.THROUGHPUT] = float(token_list[ix - 1])
                            break
                elif get_perf_context and line.startswith(self.PERF_CON):
                    # the following lines in the output contain perf context
                    # statistics (refer example above)
                    perf_context_begins = True
                elif get_perf_context and perf_context_begins:
                    # Sample perf_context output:
                    # user_key_comparison_count = 500, block_cache_hit_count =\
                    # 468, block_read_count = 580, block_read_byte = 445, ...
                    token_list = line.strip().split(",")
                    # token_list = ['user_key_comparison_count = 500',
                    # 'block_cache_hit_count = 468','block_read_count = 580'...
                    perf_context = {
                        tk.split("=")[0].strip(): tk.split("=")[1].strip()
                        for tk in token_list
                        if tk
                    }
                    # TODO(poojam23): this is a hack and should be replaced
                    # with the timestamp that db_bench will provide per printed
                    # perf_context
                    timestamp = int(time.time())
                    perf_context_ts = {}
                    for stat in perf_context.keys():
                        perf_context_ts[stat] = {timestamp: int(perf_context[stat])}
                    output[self.PERF_CON] = perf_context_ts
                    perf_context_begins = False
                elif line.startswith(self.DB_PATH):
                    # line from sample output:
                    # DB path: [/tmp/rocksdbtest-155919/dbbench]\n
                    output[self.DB_PATH] = line.split("[")[1].split("]")[0]
        return output

    def get_log_options(self, db_options, db_path):
        # get the location of the LOG file and the frequency at which stats are
        # dumped in the LOG file
        log_dir_path = None
        stats_freq_sec = None
        logs_file_prefix = None

        # fetch frequency at which the stats are dumped in the Rocksdb logs
        dump_period = "DBOptions.stats_dump_period_sec"
        # fetch the directory, if specified, in which the Rocksdb logs are
        # dumped, by default logs are dumped in same location as database
        log_dir = "DBOptions.db_log_dir"
        log_options = db_options.get_options([dump_period, log_dir])
        if dump_period in log_options:
            stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
        if log_dir in log_options:
            log_dir_path = log_options[log_dir][NO_COL_FAMILY]

        log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path)

        if not log_dir_path:
            log_dir_path = db_path
        if not log_dir_path.endswith("/"):
            log_dir_path += "/"

        logs_file_prefix = log_dir_path + log_file_name
        return (logs_file_prefix, stats_freq_sec)

    def _get_options_command_line_args_str(self, curr_options):
        """
        This method uses the provided Rocksdb OPTIONS to create a string of
        command-line arguments for db_bench.
        The --options_file argument is always given and the options that are
        not supported by the OPTIONS file are given as separate arguments.
        """
        optional_args_str = DBBenchRunner.get_opt_args_str(
            curr_options.get_misc_options()
        )
        # generate an options configuration file
        options_file = curr_options.generate_options_config(nonce="12345")
        optional_args_str += " --options_file=" + options_file
        return optional_args_str

    def _setup_db_before_experiment(self, curr_options, db_path):
        # remove destination directory if it already exists
        try:
            shutil.rmtree(db_path, ignore_errors=True)
        except OSError as e:
            print("Error: rmdir " + e.filename + " " + e.strerror)
        # setup database with a million keys using the fillrandom benchmark
        command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
            self.db_bench_binary,
            db_path,
        )
        args_str = self._get_options_command_line_args_str(curr_options)
        command += args_str
        self._run_command(command)

    def _build_experiment_command(self, curr_options, db_path):
        command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
            self.db_bench_binary,
            self.benchmark,
            db_path,
        )
        # fetch the command-line arguments string for providing Rocksdb options
        args_str = self._get_options_command_line_args_str(curr_options)
        # handle the command-line args passed in the constructor, these
        # arguments are specific to db_bench
        for cmd_line_arg in self.db_bench_args:
            args_str += " --" + cmd_line_arg
        command += args_str
        return command

    def _run_command(self, command):
        out_file = open(self.OUTPUT_FILE, "w+")
        err_file = open(self.ERROR_FILE, "w+")
        print("executing... - " + command)
        subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
        out_file.close()
        err_file.close()

    def run_experiment(self, db_options, db_path):
        # setup the Rocksdb database before running experiment
        self._setup_db_before_experiment(db_options, db_path)
        # get the command to run the experiment
        command = self._build_experiment_command(db_options, db_path)
        experiment_start_time = int(time.time())
        # run experiment
        self._run_command(command)
        experiment_end_time = int(time.time())
        # parse the db_bench experiment output
        parsed_output = self._parse_output(get_perf_context=True)

        # get the log files path prefix and frequency at which Rocksdb stats
        # are dumped in the logs
        logs_file_prefix, stats_freq_sec = self.get_log_options(
            db_options, parsed_output[self.DB_PATH]
        )
        # create the Rocksbd LOGS object
        db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families())
        # Create the Log STATS object
        db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
        # Create the PerfContext STATS object
        db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False)
        # create the data-sources dictionary
        data_sources = {
            DataSource.Type.DB_OPTIONS: [db_options],
            DataSource.Type.LOG: [db_logs],
            DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context],
        }
        # Create the ODS STATS object
        if self.ods_args:
            key_prefix = ""
            if "key_prefix" in self.ods_args:
                key_prefix = self.ods_args["key_prefix"]
            data_sources[DataSource.Type.TIME_SERIES].append(
                OdsStatsFetcher(
                    self.ods_args["client_script"],
                    self.ods_args["entity"],
                    experiment_start_time,
                    experiment_end_time,
                    key_prefix,
                )
            )
        # return the experiment's data-sources and throughput
        return data_sources, parsed_output[self.THROUGHPUT]
Commit	Line	Data
11fdf7f2 TL	1	# Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
	2	# This source code is licensed under both the GPLv2 (found in the
	3	# COPYING file in the root directory) and Apache 2.0 License
	4	# (found in the LICENSE.Apache file in the root directory).
	5
11fdf7f2 TL	6	import shutil
	7	import subprocess
	8	import time
	9
1e59de90 TL	10	from advisor.bench_runner import BenchmarkRunner
	11	from advisor.db_log_parser import DatabaseLogs, DataSource, NO_COL_FAMILY
	12	from advisor.db_stats_fetcher import (
	13	DatabasePerfContext,
	14	LogStatsParser,
	15	OdsStatsFetcher,
	16	)
	17
11fdf7f2	18
1e59de90	19	"""
11fdf7f2	20	NOTE: This is not thread-safe, because the output file is simply overwritten.
1e59de90	21	"""
11fdf7f2 TL	22
	23
	24	class DBBenchRunner(BenchmarkRunner):
	25	OUTPUT_FILE = "temp/dbbench_out.tmp"
	26	ERROR_FILE = "temp/dbbench_err.tmp"
	27	DB_PATH = "DB path"
	28	THROUGHPUT = "ops/sec"
	29	PERF_CON = " PERF_CONTEXT:"
	30
	31	@staticmethod
	32	def is_metric_better(new_metric, old_metric):
	33	# for db_bench 'throughput' is the metric returned by run_experiment
	34	return new_metric >= old_metric
	35
	36	@staticmethod
	37	def get_opt_args_str(misc_options_dict):
	38	# given a dictionary of options and their values, return a string
	39	# that can be appended as command-line arguments
	40	optional_args_str = ""
	41	for option_name, option_value in misc_options_dict.items():
	42	if option_value:
1e59de90	43	optional_args_str += " --" + option_name + "=" + str(option_value)
11fdf7f2 TL	44	return optional_args_str
	45
	46	def __init__(self, positional_args, ods_args=None):
	47	# parse positional_args list appropriately
	48	self.db_bench_binary = positional_args[0]
	49	self.benchmark = positional_args[1]
	50	self.db_bench_args = None
	51	if len(positional_args) > 2:
	52	# options list with each option given as "<option>=<value>"
	53	self.db_bench_args = positional_args[2:]
	54	# save ods_args, if provided
	55	self.ods_args = ods_args
	56
	57	def _parse_output(self, get_perf_context=False):
1e59de90	58	"""
11fdf7f2 TL	59	Sample db_bench output after running 'readwhilewriting' benchmark:
	60	DB path: [/tmp/rocksdbtest-155919/dbbench]\n
	61	readwhilewriting : 16.582 micros/op 60305 ops/sec; 4.2 MB/s (3433828\
	62	of 5427999 found)\n
	63	PERF_CONTEXT:\n
	64	user_key_comparison_count = 500466712, block_cache_hit_count = ...\n
1e59de90 TL	65	"""
1e59de90 TL	66	output = {self.THROUGHPUT: None, self.DB_PATH: None, self.PERF_CON: None}
11fdf7f2	67	perf_context_begins = False
1e59de90	68	with open(self.OUTPUT_FILE, "r") as fp:
11fdf7f2 TL	69	for line in fp:
	70	if line.startswith(self.benchmark):
	71	# line from sample output:
	72	# readwhilewriting : 16.582 micros/op 60305 ops/sec; \
	73	# 4.2 MB/s (3433828 of 5427999 found)\n
	74	print(line) # print output of the benchmark run
	75	token_list = line.strip().split()
	76	for ix, token in enumerate(token_list):
	77	if token.startswith(self.THROUGHPUT):
	78	# in above example, throughput = 60305 ops/sec
1e59de90	79	output[self.THROUGHPUT] = float(token_list[ix - 1])
11fdf7f2 TL	80	break
	81	elif get_perf_context and line.startswith(self.PERF_CON):
	82	# the following lines in the output contain perf context
	83	# statistics (refer example above)
	84	perf_context_begins = True
	85	elif get_perf_context and perf_context_begins:
	86	# Sample perf_context output:
	87	# user_key_comparison_count = 500, block_cache_hit_count =\
	88	# 468, block_read_count = 580, block_read_byte = 445, ...
1e59de90	89	token_list = line.strip().split(",")
11fdf7f2 TL	90	# token_list = ['user_key_comparison_count = 500',
	91	# 'block_cache_hit_count = 468','block_read_count = 580'...
	92	perf_context = {
1e59de90	93	tk.split("=")[0].strip(): tk.split("=")[1].strip()
11fdf7f2 TL	94	for tk in token_list
	95	if tk
	96	}
	97	# TODO(poojam23): this is a hack and should be replaced
	98	# with the timestamp that db_bench will provide per printed
	99	# perf_context
	100	timestamp = int(time.time())
	101	perf_context_ts = {}
	102	for stat in perf_context.keys():
1e59de90	103	perf_context_ts[stat] = {timestamp: int(perf_context[stat])}
11fdf7f2 TL	104	output[self.PERF_CON] = perf_context_ts
	105	perf_context_begins = False
	106	elif line.startswith(self.DB_PATH):
	107	# line from sample output:
	108	# DB path: [/tmp/rocksdbtest-155919/dbbench]\n
1e59de90	109	output[self.DB_PATH] = line.split("[")[1].split("]")[0]
11fdf7f2 TL	110	return output
	111
	112	def get_log_options(self, db_options, db_path):
	113	# get the location of the LOG file and the frequency at which stats are
	114	# dumped in the LOG file
	115	log_dir_path = None
	116	stats_freq_sec = None
	117	logs_file_prefix = None
	118
	119	# fetch frequency at which the stats are dumped in the Rocksdb logs
1e59de90	120	dump_period = "DBOptions.stats_dump_period_sec"
11fdf7f2 TL	121	# fetch the directory, if specified, in which the Rocksdb logs are
11fdf7f2 TL	122	# dumped, by default logs are dumped in same location as database
1e59de90	123	log_dir = "DBOptions.db_log_dir"
11fdf7f2 TL	124	log_options = db_options.get_options([dump_period, log_dir])
	125	if dump_period in log_options:
	126	stats_freq_sec = int(log_options[dump_period][NO_COL_FAMILY])
	127	if log_dir in log_options:
	128	log_dir_path = log_options[log_dir][NO_COL_FAMILY]
	129
1e59de90	130	log_file_name = DBBenchRunner.get_info_log_file_name(log_dir_path, db_path)
11fdf7f2 TL	131
	132	if not log_dir_path:
	133	log_dir_path = db_path
1e59de90 TL	134	if not log_dir_path.endswith("/"):
1e59de90 TL	135	log_dir_path += "/"
11fdf7f2 TL	136
	137	logs_file_prefix = log_dir_path + log_file_name
	138	return (logs_file_prefix, stats_freq_sec)
	139
	140	def _get_options_command_line_args_str(self, curr_options):
1e59de90	141	"""
11fdf7f2 TL	142	This method uses the provided Rocksdb OPTIONS to create a string of
	143	command-line arguments for db_bench.
	144	The --options_file argument is always given and the options that are
	145	not supported by the OPTIONS file are given as separate arguments.
1e59de90	146	"""
11fdf7f2 TL	147	optional_args_str = DBBenchRunner.get_opt_args_str(
	148	curr_options.get_misc_options()
	149	)
	150	# generate an options configuration file
1e59de90	151	options_file = curr_options.generate_options_config(nonce="12345")
11fdf7f2 TL	152	optional_args_str += " --options_file=" + options_file
	153	return optional_args_str
	154
	155	def _setup_db_before_experiment(self, curr_options, db_path):
	156	# remove destination directory if it already exists
	157	try:
	158	shutil.rmtree(db_path, ignore_errors=True)
	159	except OSError as e:
1e59de90	160	print("Error: rmdir " + e.filename + " " + e.strerror)
11fdf7f2 TL	161	# setup database with a million keys using the fillrandom benchmark
11fdf7f2 TL	162	command = "%s --benchmarks=fillrandom --db=%s --num=1000000" % (
1e59de90 TL	163	self.db_bench_binary,
1e59de90 TL	164	db_path,
11fdf7f2 TL	165	)
	166	args_str = self._get_options_command_line_args_str(curr_options)
	167	command += args_str
	168	self._run_command(command)
	169
	170	def _build_experiment_command(self, curr_options, db_path):
	171	command = "%s --benchmarks=%s --statistics --perf_level=3 --db=%s" % (
1e59de90 TL	172	self.db_bench_binary,
	173	self.benchmark,
	174	db_path,
11fdf7f2 TL	175	)
	176	# fetch the command-line arguments string for providing Rocksdb options
	177	args_str = self._get_options_command_line_args_str(curr_options)
	178	# handle the command-line args passed in the constructor, these
	179	# arguments are specific to db_bench
	180	for cmd_line_arg in self.db_bench_args:
1e59de90	181	args_str += " --" + cmd_line_arg
11fdf7f2 TL	182	command += args_str
	183	return command
	184
	185	def _run_command(self, command):
	186	out_file = open(self.OUTPUT_FILE, "w+")
	187	err_file = open(self.ERROR_FILE, "w+")
1e59de90	188	print("executing... - " + command)
11fdf7f2 TL	189	subprocess.call(command, shell=True, stdout=out_file, stderr=err_file)
	190	out_file.close()
	191	err_file.close()
	192
	193	def run_experiment(self, db_options, db_path):
	194	# setup the Rocksdb database before running experiment
	195	self._setup_db_before_experiment(db_options, db_path)
	196	# get the command to run the experiment
	197	command = self._build_experiment_command(db_options, db_path)
	198	experiment_start_time = int(time.time())
	199	# run experiment
	200	self._run_command(command)
	201	experiment_end_time = int(time.time())
	202	# parse the db_bench experiment output
	203	parsed_output = self._parse_output(get_perf_context=True)
	204
	205	# get the log files path prefix and frequency at which Rocksdb stats
	206	# are dumped in the logs
	207	logs_file_prefix, stats_freq_sec = self.get_log_options(
	208	db_options, parsed_output[self.DB_PATH]
	209	)
	210	# create the Rocksbd LOGS object
1e59de90	211	db_logs = DatabaseLogs(logs_file_prefix, db_options.get_column_families())
11fdf7f2 TL	212	# Create the Log STATS object
	213	db_log_stats = LogStatsParser(logs_file_prefix, stats_freq_sec)
	214	# Create the PerfContext STATS object
1e59de90	215	db_perf_context = DatabasePerfContext(parsed_output[self.PERF_CON], 0, False)
11fdf7f2 TL	216	# create the data-sources dictionary
	217	data_sources = {
	218	DataSource.Type.DB_OPTIONS: [db_options],
	219	DataSource.Type.LOG: [db_logs],
1e59de90	220	DataSource.Type.TIME_SERIES: [db_log_stats, db_perf_context],
11fdf7f2 TL	221	}
	222	# Create the ODS STATS object
	223	if self.ods_args:
1e59de90 TL	224	key_prefix = ""
	225	if "key_prefix" in self.ods_args:
	226	key_prefix = self.ods_args["key_prefix"]
	227	data_sources[DataSource.Type.TIME_SERIES].append(
	228	OdsStatsFetcher(
	229	self.ods_args["client_script"],
	230	self.ods_args["entity"],
	231	experiment_start_time,
	232	experiment_end_time,
	233	key_prefix,
	234	)
	235	)
11fdf7f2 TL	236	# return the experiment's data-sources and throughput
11fdf7f2 TL	237	return data_sources, parsed_output[self.THROUGHPUT]