[ceph.git] / ceph / src / rocksdb / tools / regression_test.sh

#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
# The RocksDB regression test script.
# REQUIREMENT: must be able to run make db_bench in the current directory
#
# This script will do the following things in order:
#
# 1. check out the specified rocksdb commit.
# 2. build db_bench using the specified commit
# 3. setup test directory $TEST_PATH.  If not specified, then the test directory
#    will be "/tmp/rocksdb/regression_test"
# 4. run set of benchmarks on the specified host
#    (can be either locally or remotely)
# 5. generate report in the $RESULT_PATH.  If RESULT_PATH is not specified,
#    RESULT_PATH will be set to $TEST_PATH/current_time
#
# = Examples =
# * Run the regression test using rocksdb commit abcdef that outputs results
#   and temp files in "/my/output/dir"
#r
#   TEST_PATH=/my/output/dir COMMIT_ID=abcdef ./tools/regression_test.sh
#
# * Run the regression test on a remost host under "/my/output/dir" directory
#   and stores the result locally in "/my/benchmark/results" using commit
#   abcdef and with the rocksdb options specified in /my/path/to/OPTIONS-012345
#   with 1000000000 keys in each benchmark in the regression test where each
#   key and value are 100 and 900 bytes respectively:
#
#   REMOTE_USER_AT_HOST=yhchiang@my.remote.host \
#       TEST_PATH=/my/output/dir \
#       RESULT_PATH=/my/benchmark/results \
#       COMMIT_ID=abcdef \
#       OPTIONS_FILE=/my/path/to/OPTIONS-012345 \
#       NUM_KEYS=1000000000 \
#       KEY_SIZE=100 \
#       VALUE_SIZE=900 \
#       ./tools/regression_test.sh
#
# = Regression test environmental parameters =
#   DEBUG: If true, then the script will not build db_bench if db_bench already
#       exists
#       Default: 0
#   TEST_MODE: If 1, run fillseqdeterminstic and benchmarks both
#       if 0, only run fillseqdeterministc
#       if 2, only run benchmarks
#       Default: 1
#   TEST_PATH: the root directory of the regression test.
#       Default: "/tmp/rocksdb/regression_test"
#       !!! NOTE !!! - a DB will also be saved in $TEST_PATH/../db
#   RESULT_PATH: the directory where the regression results will be generated.
#       Default: "$TEST_PATH/current_time"
#   REMOTE_USER_AT_HOST: If set, then test will run on the specified host under
#       TEST_PATH directory and outputs test results locally in RESULT_PATH
#       The REMOTE_USER_AT_HOST should follow the format user-id@host.name
#   DB_PATH: the path where the rocksdb database will be created during the
#       regression test.  Default:  $TEST_PATH/db
#   WAL_PATH: the path where the rocksdb WAL will be outputed.
#       Default:  $TEST_PATH/wal
#   OPTIONS_FILE:  If specified, then the regression test will use the specified
#       file to initialize the RocksDB options in its benchmarks.  Note that
#       this feature only work for commits after 88acd93 or rocksdb version
#       later than 4.9.
#   DELETE_TEST_PATH: If true, then the test directory will be deleted
#       after the script ends.
#       Default: 0
#
# = db_bench parameters =
#   NUM_THREADS:  The number of concurrent foreground threads that will issue
#       database operations in the benchmark.  Default: 16.
#   NUM_KEYS:  The key range that will be used in the entire regression test.
#       Default: 1G.
#   NUM_OPS:  The number of operations (reads, writes, or deletes) that will
#       be issued in EACH thread.
#       Default: $NUM_KEYS / $NUM_THREADS
#   KEY_SIZE:  The size of each key in bytes in db_bench.  Default: 100.
#   VALUE_SIZE:  The size of each value in bytes in db_bench.  Default: 900.
#   CACHE_SIZE:  The size of RocksDB block cache used in db_bench.  Default: 1G
#   STATISTICS:  If 1, then statistics is on in db_bench.  Default: 0.
#   COMPRESSION_RATIO:  The compression ratio of the key generated in db_bench.
#       Default: 0.5.
#   HISTOGRAM:  If 1, then the histogram feature on performance feature is on.
#   STATS_PER_INTERVAL:  If 1, then the statistics will be reported for every
#       STATS_INTERVAL_SECONDS seconds.  Default 1.
#   STATS_INTERVAL_SECONDS:  If STATS_PER_INTERVAL is set to 1, then statistics
#       will be reported for every STATS_INTERVAL_SECONDS.  Default 60.
#   MAX_BACKGROUND_FLUSHES:  The maxinum number of concurrent flushes in
#       db_bench.  Default: 4.
#   MAX_BACKGROUND_COMPACTIONS:  The maximum number of concurrent compactions
#       in db_bench.  Default: 16.
#   NUM_HIGH_PRI_THREADS:  The number of high-pri threads available for
#       concurrent flushes in db_bench.  Default: 4.
#   NUM_LOW_PRI_THREADS:  The number of low-pri threads available for
#       concurrent compactions in db_bench.  Default: 16.
#   SEEK_NEXTS:  Controls how many Next() will be called after seek.
#       Default: 10.
#   SEED:  random seed that controls the randomness of the benchmark.
#       Default: $( date +%s )

#==============================================================================
#  CONSTANT
#==============================================================================
TITLE_FORMAT="%40s,%25s,%30s,%7s,%9s,%8s,"
TITLE_FORMAT+="%10s,%13s,%14s,%11s,%12s,"
TITLE_FORMAT+="%7s,%11s,"
TITLE_FORMAT+="%9s,%10s,%10s,%10s,%10s,%10s,%5s,"
TITLE_FORMAT+="%5s,%5s,%5s" # time
TITLE_FORMAT+="\n"

DATA_FORMAT="%40s,%25s,%30s,%7s,%9s,%8s,"
DATA_FORMAT+="%10s,%13.0f,%14s,%11s,%12s,"
DATA_FORMAT+="%7s,%11s,"
DATA_FORMAT+="%9.0f,%10.0f,%10.0f,%10.0f,%10.0f,%10.0f,%5.0f,"
DATA_FORMAT+="%5.0f,%5.0f,%5.0f" # time
DATA_FORMAT+="\n"

MAIN_PATTERN="$1""[[:blank:]]+:.*[[:blank:]]+([0-9\.]+)[[:blank:]]+ops/sec"
PERC_PATTERN="Percentiles: P50: ([0-9\.]+) P75: ([0-9\.]+) "
PERC_PATTERN+="P99: ([0-9\.]+) P99.9: ([0-9\.]+) P99.99: ([0-9\.]+)"
#==============================================================================

function main {
  TEST_ROOT_DIR=${TEST_PATH:-"/tmp/rocksdb/regression_test"}
  init_arguments $TEST_ROOT_DIR

  build_db_bench_and_ldb

  setup_test_directory
  if [ $TEST_MODE -le 1 ]; then
      test_remote "test -d $ORIGIN_PATH"
      if [[ $? -ne 0 ]]; then
          echo "Building DB..."
          # compactall alone will not print ops or threads, which will fail update_report
          run_db_bench "fillseq,compactall" $NUM_KEYS 1 0 0
          # only save for future use on success
          test_remote "mv $DB_PATH $ORIGIN_PATH"
      fi
  fi
  if [ $TEST_MODE -ge 1 ]; then
      build_checkpoint
      run_db_bench "readrandom"
      run_db_bench "readwhilewriting"
      run_db_bench "deleterandom"
      run_db_bench "seekrandom"
      run_db_bench "seekrandomwhilewriting"
      run_db_bench "multireadrandom"
  fi

  cleanup_test_directory $TEST_ROOT_DIR
  echo ""
  echo "Benchmark completed!  Results are available in $RESULT_PATH"
}

############################################################################
function init_arguments {
  K=1024
  M=$((1024 * K))
  G=$((1024 * M))

  current_time=$(date +"%F-%H:%M:%S")
  RESULT_PATH=${RESULT_PATH:-"$1/results/$current_time"}
  COMMIT_ID=`hg id -i 2>/dev/null || git rev-parse HEAD 2>/dev/null || echo 'unknown'`
  SUMMARY_FILE="$RESULT_PATH/SUMMARY.csv"

  DB_PATH=${3:-"$1/db"}
  ORIGIN_PATH=${ORIGIN_PATH:-"$(dirname $(dirname $DB_PATH))/db"}
  WAL_PATH=${4:-""}
  if [ -z "$REMOTE_USER_AT_HOST" ]; then
    DB_BENCH_DIR=${5:-"."}
  else
    DB_BENCH_DIR=${5:-"$1/db_bench"}
  fi

  DEBUG=${DEBUG:-0}
  TEST_MODE=${TEST_MODE:-1}
  SCP=${SCP:-"scp"}
  SSH=${SSH:-"ssh"}
  NUM_THREADS=${NUM_THREADS:-16}
  NUM_KEYS=${NUM_KEYS:-$((1 * G))}  # key range
  NUM_OPS=${NUM_OPS:-$(($NUM_KEYS / $NUM_THREADS))}
  KEY_SIZE=${KEY_SIZE:-100}
  VALUE_SIZE=${VALUE_SIZE:-900}
  CACHE_SIZE=${CACHE_SIZE:-$((1 * G))}
  STATISTICS=${STATISTICS:-0}
  COMPRESSION_RATIO=${COMPRESSION_RATIO:-0.5}
  HISTOGRAM=${HISTOGRAM:-1}
  NUM_MULTI_DB=${NUM_MULTI_DB:-1}
  STATS_PER_INTERVAL=${STATS_PER_INTERVAL:-1}
  STATS_INTERVAL_SECONDS=${STATS_INTERVAL_SECONDS:-600}
  MAX_BACKGROUND_FLUSHES=${MAX_BACKGROUND_FLUSHES:-4}
  MAX_BACKGROUND_COMPACTIONS=${MAX_BACKGROUND_COMPACTIONS:-16}
  NUM_HIGH_PRI_THREADS=${NUM_HIGH_PRI_THREADS:-4}
  NUM_LOW_PRI_THREADS=${NUM_LOW_PRI_THREADS:-16}
  DELETE_TEST_PATH=${DELETE_TEST_PATH:-0}
  SEEK_NEXTS=${SEEK_NEXTS:-10}
  SEED=${SEED:-$( date +%s )}
  MULTIREAD_BATCH_SIZE=${MULTIREAD_BATCH_SIZE:-128}
  MULTIREAD_STRIDE=${MULTIREAD_STRIDE:-12}
  PERF_LEVEL=${PERF_LEVEL:-1}
}

# $1 --- benchmark name
# $2 --- number of operations.  Default: $NUM_KEYS
# $3 --- number of threads.  Default $NUM_THREADS
# $4 --- use_existing_db.  Default: 1
# $5 --- update_report. Default: 1
function run_db_bench {
  # Make sure no other db_bench is running. (Make sure command succeeds if pidof
  # command exists but finds nothing.)
  pids_cmd='pidof db_bench || pidof --version > /dev/null'
  # But first, make best effort to kill any db_bench that have run for more
  # than 12 hours, as that indicates a hung or runaway process.
  kill_old_cmd='for PID in $(pidof db_bench); do [ "$(($(stat -c %Y /proc/$PID) + 43200))" -lt "$(date +%s)" ] && echo "Killing old db_bench $PID" && kill $PID && sleep 5 && kill -9 $PID && sleep 5; done; pidof --version > /dev/null'
  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
    pids_cmd="$SSH $REMOTE_USER_AT_HOST '$pids_cmd'"
    kill_old_cmd="$SSH $REMOTE_USER_AT_HOST '$kill_old_cmd'"
  fi

  eval $kill_old_cmd
  exit_on_error $? "$kill_old_cmd"

  pids_output="$(eval $pids_cmd)"
  exit_on_error $? "$pids_cmd"

  if [ "$pids_output" != "" ]; then
    echo "Stopped regression_test.sh as there're still recent db_bench "
    echo "processes running: $pids_output"
    echo "Clean up test directory"
    cleanup_test_directory $TEST_ROOT_DIR
    exit 2
  fi

  # Build db_bench command
  ops=${2:-$NUM_OPS}
  threads=${3:-$NUM_THREADS}
  USE_EXISTING_DB=${4:-1}
  UPDATE_REPORT=${5:-1}
  echo ""
  echo "======================================================================="
  echo "Benchmark $1"
  echo "======================================================================="
  echo ""
  db_bench_error=0
  options_file_arg=$(setup_options_file)
  echo "$options_file_arg"
  # use `which time` to avoid using bash's internal time command
  db_bench_cmd="\$(which time) -p $DB_BENCH_DIR/db_bench \
      --benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \
      --use_existing_db=$USE_EXISTING_DB \
      --perf_level=$PERF_LEVEL \
      --disable_auto_compactions \
      --threads=$threads \
      --num=$NUM_KEYS \
      --reads=$ops \
      --writes=$ops \
      --deletes=$ops \
      --key_size=$KEY_SIZE \
      --value_size=$VALUE_SIZE \
      --cache_size=$CACHE_SIZE \
      --statistics=$STATISTICS \
      $options_file_arg \
      --compression_ratio=$COMPRESSION_RATIO \
      --histogram=$HISTOGRAM \
      --seek_nexts=$SEEK_NEXTS \
      --stats_per_interval=$STATS_PER_INTERVAL \
      --stats_interval_seconds=$STATS_INTERVAL_SECONDS \
      --max_background_flushes=$MAX_BACKGROUND_FLUSHES \
      --num_multi_db=$NUM_MULTI_DB \
      --max_background_compactions=$MAX_BACKGROUND_COMPACTIONS \
      --num_high_pri_threads=$NUM_HIGH_PRI_THREADS \
      --num_low_pri_threads=$NUM_LOW_PRI_THREADS \
      --seed=$SEED \
      --multiread_batched=true \
      --batch_size=$MULTIREAD_BATCH_SIZE \
      --multiread_stride=$MULTIREAD_STRIDE 2>&1"
  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
    echo "Running benchmark remotely on $REMOTE_USER_AT_HOST"
    db_bench_cmd="$SSH $REMOTE_USER_AT_HOST '$db_bench_cmd'"
  fi
  echo db_bench_cmd="$db_bench_cmd"

  # Run the db_bench command
  eval $db_bench_cmd | tee -a "$RESULT_PATH/$1"
  exit_on_error ${PIPESTATUS[0]} db_bench
  if [ $UPDATE_REPORT -ne 0 ]; then
    update_report "$1" "$RESULT_PATH/$1" $ops $threads
  fi
}

function build_checkpoint {
    cmd_prefix=""
    if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
        cmd_prefix="$SSH $REMOTE_USER_AT_HOST "
    fi
    if [ $NUM_MULTI_DB -gt 1 ]; then
        dirs=$($cmd_prefix find $ORIGIN_PATH -type d -links 2)
        for dir in $dirs; do
            db_index=$(basename $dir)
            echo "Building checkpoints: $ORIGIN_PATH/$db_index -> $DB_PATH/$db_index ..."
            $cmd_prefix $DB_BENCH_DIR/ldb checkpoint --checkpoint_dir=$DB_PATH/$db_index \
                        --db=$ORIGIN_PATH/$db_index --try_load_options 2>&1
            exit_on_error $?
        done
    else
        # checkpoint cannot build in directory already exists
        $cmd_prefix rm -rf $DB_PATH
        echo "Building checkpoint: $ORIGIN_PATH -> $DB_PATH ..."
        $cmd_prefix $DB_BENCH_DIR/ldb checkpoint --checkpoint_dir=$DB_PATH \
                    --db=$ORIGIN_PATH --try_load_options 2>&1
        exit_on_error $?
    fi
}

function multiply {
  echo "$1 * $2" | bc
}

# $1 --- name of the benchmark
# $2 --- the filename of the output log of db_bench
function update_report {
  main_result=`cat $2 | grep $1`
  exit_on_error $?
  perc_statement=`cat $2 | grep Percentile`
  exit_on_error $?

  # Obtain micros / op

  [[ $main_result =~ $MAIN_PATTERN ]]
  ops_per_s=${BASH_REMATCH[1]}

  # Obtain percentile information
  [[ $perc_statement =~ $PERC_PATTERN ]]
  perc[0]=${BASH_REMATCH[1]}  # p50
  perc[1]=${BASH_REMATCH[2]}  # p75
  perc[2]=${BASH_REMATCH[3]}  # p99
  perc[3]=${BASH_REMATCH[4]}  # p99.9
  perc[4]=${BASH_REMATCH[5]}  # p99.99

  # Parse the output of the time command
  real_sec=`tail -3 $2 | grep real | awk '{print $2}'`
  user_sec=`tail -3 $2 | grep user | awk '{print $2}'`
  sys_sec=`tail -3 $2 | grep sys | awk '{print $2}'`

  (printf "$DATA_FORMAT" \
    $COMMIT_ID $1 $REMOTE_USER_AT_HOST $NUM_MULTI_DB $NUM_KEYS $KEY_SIZE $VALUE_SIZE \
       $(multiply $COMPRESSION_RATIO 100) \
       $3 $4 $CACHE_SIZE \
       $MAX_BACKGROUND_FLUSHES $MAX_BACKGROUND_COMPACTIONS \
       $ops_per_s \
       $(multiply ${perc[0]} 1000) \
       $(multiply ${perc[1]} 1000) \
       $(multiply ${perc[2]} 1000) \
       $(multiply ${perc[3]} 1000) \
       $(multiply ${perc[4]} 1000) \
       $DEBUG \
       $real_sec \
       $user_sec \
       $sys_sec \
       >> $SUMMARY_FILE)
  exit_on_error $?
}

function exit_on_error {
  if [ $1 -ne 0 ]; then
    echo ""
    echo "ERROR: Benchmark did not complete successfully."
    if ! [ -z "$2" ]; then
      echo "Failure command: $2"
    fi
    echo "Partial results are output to $RESULT_PATH"
    echo "ERROR" >> $SUMMARY_FILE
    exit $1
  fi
}

function build_db_bench_and_ldb {
  echo "Building db_bench & ldb ..."

  make clean
  exit_on_error $?

  DEBUG_LEVEL=0 make db_bench ldb -j32
  exit_on_error $?
}

function run_remote {
  test_remote "$1"
  exit_on_error $? "$1"
}

function test_remote {
  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
      cmd="$SSH $REMOTE_USER_AT_HOST '$1'"
  else
      cmd="$1"
  fi
  eval "$cmd"
}

function run_local {
  eval "$1"
  exit_on_error $? "$1"
}

function setup_options_file {
  if ! [ -z "$OPTIONS_FILE" ]; then
    if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
      options_file="$DB_BENCH_DIR/OPTIONS_FILE"
      run_local "$SCP $OPTIONS_FILE $REMOTE_USER_AT_HOST:$options_file"
    else
      options_file="$OPTIONS_FILE"
    fi
    echo "--options_file=$options_file"
  fi
  echo ""
}

function setup_test_directory {
  echo "Deleting old regression test directories and creating new ones"

  run_local 'test "$DB_PATH" != "."'
  run_remote "rm -rf $DB_PATH"

  if [ "$DB_BENCH_DIR" != "." ]; then
    run_remote "rm -rf $DB_BENCH_DIR"
  fi

  run_local 'test "$RESULT_PATH" != "."'
  run_local "rm -rf $RESULT_PATH"

  if ! [ -z "$WAL_PATH" ]; then
    run_remote "rm -rf $WAL_PATH"
    run_remote "mkdir -p $WAL_PATH"
  fi

  run_remote "mkdir -p $DB_PATH"

  run_remote "mkdir -p $DB_BENCH_DIR"
  run_remote "ls -l $DB_BENCH_DIR"

  if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
      run_local "$SCP ./db_bench $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/db_bench"
      run_local "$SCP ./ldb $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/ldb"
  fi

  run_local "mkdir -p $RESULT_PATH"

  (printf $TITLE_FORMAT \
      "commit id" "benchmark" "user@host" "num-dbs" "key-range" "key-size" \
      "value-size" "compress-rate" "ops-per-thread" "num-threads" "cache-size" \
      "flushes" "compactions" \
      "ops-per-s" "p50" "p75" "p99" "p99.9" "p99.99" "debug" \
      "real-sec" "user-sec" "sys-sec" \
      >> $SUMMARY_FILE)
  exit_on_error $?
}

function cleanup_test_directory {

  if [ $DELETE_TEST_PATH -ne 0 ]; then
    echo "Clear old regression test directories and creating new ones"
    run_remote "rm -rf $DB_PATH"
    run_remote "rm -rf $WAL_PATH"
    if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
      run_remote "rm -rf $DB_BENCH_DIR"
    fi
    run_remote "rm -rf $1"
  else
    echo "------------ DEBUG MODE ------------"
    echo "DB  PATH: $DB_PATH"
    echo "WAL PATH: $WAL_PATH"
  fi
}

############################################################################

# shellcheck disable=SC2068
main $@
Commit	Line	Data
11fdf7f2	1	#!/usr/bin/env bash
f67539c2	2	# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
7c673cae FG	3	# The RocksDB regression test script.
	4	# REQUIREMENT: must be able to run make db_bench in the current directory
	5	#
	6	# This script will do the following things in order:
	7	#
	8	# 1. check out the specified rocksdb commit.
	9	# 2. build db_bench using the specified commit
	10	# 3. setup test directory $TEST_PATH. If not specified, then the test directory
	11	# will be "/tmp/rocksdb/regression_test"
	12	# 4. run set of benchmarks on the specified host
	13	# (can be either locally or remotely)
	14	# 5. generate report in the $RESULT_PATH. If RESULT_PATH is not specified,
	15	# RESULT_PATH will be set to $TEST_PATH/current_time
	16	#
	17	# = Examples =
	18	# * Run the regression test using rocksdb commit abcdef that outputs results
	19	# and temp files in "/my/output/dir"
	20	#r
	21	# TEST_PATH=/my/output/dir COMMIT_ID=abcdef ./tools/regression_test.sh
	22	#
	23	# * Run the regression test on a remost host under "/my/output/dir" directory
	24	# and stores the result locally in "/my/benchmark/results" using commit
	25	# abcdef and with the rocksdb options specified in /my/path/to/OPTIONS-012345
	26	# with 1000000000 keys in each benchmark in the regression test where each
	27	# key and value are 100 and 900 bytes respectively:
	28	#
	29	# REMOTE_USER_AT_HOST=yhchiang@my.remote.host \
	30	# TEST_PATH=/my/output/dir \
	31	# RESULT_PATH=/my/benchmark/results \
	32	# COMMIT_ID=abcdef \
	33	# OPTIONS_FILE=/my/path/to/OPTIONS-012345 \
	34	# NUM_KEYS=1000000000 \
	35	# KEY_SIZE=100 \
	36	# VALUE_SIZE=900 \
	37	# ./tools/regression_test.sh
	38	#
	39	# = Regression test environmental parameters =
1e59de90 TL	40	# DEBUG: If true, then the script will not build db_bench if db_bench already
1e59de90 TL	41	# exists
7c673cae FG	42	# Default: 0
	43	# TEST_MODE: If 1, run fillseqdeterminstic and benchmarks both
	44	# if 0, only run fillseqdeterministc
	45	# if 2, only run benchmarks
	46	# Default: 1
	47	# TEST_PATH: the root directory of the regression test.
	48	# Default: "/tmp/rocksdb/regression_test"
1e59de90	49	# !!! NOTE !!! - a DB will also be saved in $TEST_PATH/../db
7c673cae FG	50	# RESULT_PATH: the directory where the regression results will be generated.
	51	# Default: "$TEST_PATH/current_time"
	52	# REMOTE_USER_AT_HOST: If set, then test will run on the specified host under
	53	# TEST_PATH directory and outputs test results locally in RESULT_PATH
	54	# The REMOTE_USER_AT_HOST should follow the format user-id@host.name
	55	# DB_PATH: the path where the rocksdb database will be created during the
	56	# regression test. Default: $TEST_PATH/db
	57	# WAL_PATH: the path where the rocksdb WAL will be outputed.
	58	# Default: $TEST_PATH/wal
	59	# OPTIONS_FILE: If specified, then the regression test will use the specified
	60	# file to initialize the RocksDB options in its benchmarks. Note that
	61	# this feature only work for commits after 88acd93 or rocksdb version
	62	# later than 4.9.
	63	# DELETE_TEST_PATH: If true, then the test directory will be deleted
	64	# after the script ends.
	65	# Default: 0
	66	#
	67	# = db_bench parameters =
	68	# NUM_THREADS: The number of concurrent foreground threads that will issue
	69	# database operations in the benchmark. Default: 16.
	70	# NUM_KEYS: The key range that will be used in the entire regression test.
	71	# Default: 1G.
	72	# NUM_OPS: The number of operations (reads, writes, or deletes) that will
	73	# be issued in EACH thread.
	74	# Default: $NUM_KEYS / $NUM_THREADS
	75	# KEY_SIZE: The size of each key in bytes in db_bench. Default: 100.
	76	# VALUE_SIZE: The size of each value in bytes in db_bench. Default: 900.
	77	# CACHE_SIZE: The size of RocksDB block cache used in db_bench. Default: 1G
	78	# STATISTICS: If 1, then statistics is on in db_bench. Default: 0.
	79	# COMPRESSION_RATIO: The compression ratio of the key generated in db_bench.
	80	# Default: 0.5.
	81	# HISTOGRAM: If 1, then the histogram feature on performance feature is on.
	82	# STATS_PER_INTERVAL: If 1, then the statistics will be reported for every
	83	# STATS_INTERVAL_SECONDS seconds. Default 1.
	84	# STATS_INTERVAL_SECONDS: If STATS_PER_INTERVAL is set to 1, then statistics
	85	# will be reported for every STATS_INTERVAL_SECONDS. Default 60.
	86	# MAX_BACKGROUND_FLUSHES: The maxinum number of concurrent flushes in
	87	# db_bench. Default: 4.
	88	# MAX_BACKGROUND_COMPACTIONS: The maximum number of concurrent compactions
	89	# in db_bench. Default: 16.
11fdf7f2 TL	90	# NUM_HIGH_PRI_THREADS: The number of high-pri threads available for
	91	# concurrent flushes in db_bench. Default: 4.
	92	# NUM_LOW_PRI_THREADS: The number of low-pri threads available for
	93	# concurrent compactions in db_bench. Default: 16.
7c673cae FG	94	# SEEK_NEXTS: Controls how many Next() will be called after seek.
	95	# Default: 10.
	96	# SEED: random seed that controls the randomness of the benchmark.
	97	# Default: $( date +%s )
	98
	99	#==============================================================================
	100	# CONSTANT
	101	#==============================================================================
	102	TITLE_FORMAT="%40s,%25s,%30s,%7s,%9s,%8s,"
	103	TITLE_FORMAT+="%10s,%13s,%14s,%11s,%12s,"
	104	TITLE_FORMAT+="%7s,%11s,"
	105	TITLE_FORMAT+="%9s,%10s,%10s,%10s,%10s,%10s,%5s,"
	106	TITLE_FORMAT+="%5s,%5s,%5s" # time
	107	TITLE_FORMAT+="\n"
	108
	109	DATA_FORMAT="%40s,%25s,%30s,%7s,%9s,%8s,"
	110	DATA_FORMAT+="%10s,%13.0f,%14s,%11s,%12s,"
	111	DATA_FORMAT+="%7s,%11s,"
	112	DATA_FORMAT+="%9.0f,%10.0f,%10.0f,%10.0f,%10.0f,%10.0f,%5.0f,"
	113	DATA_FORMAT+="%5.0f,%5.0f,%5.0f" # time
	114	DATA_FORMAT+="\n"
	115
	116	MAIN_PATTERN="$1""[[:blank:]]+:.*[[:blank:]]+([0-9\.]+)[[:blank:]]+ops/sec"
	117	PERC_PATTERN="Percentiles: P50: ([0-9\.]+) P75: ([0-9\.]+) "
	118	PERC_PATTERN+="P99: ([0-9\.]+) P99.9: ([0-9\.]+) P99.99: ([0-9\.]+)"
	119	#==============================================================================
	120
	121	function main {
11fdf7f2 TL	122	TEST_ROOT_DIR=${TEST_PATH:-"/tmp/rocksdb/regression_test"}
	123	init_arguments $TEST_ROOT_DIR
	124
	125	build_db_bench_and_ldb
7c673cae FG	126
	127	setup_test_directory
	128	if [ $TEST_MODE -le 1 ]; then
1e59de90	129	test_remote "test -d $ORIGIN_PATH"
11fdf7f2	130	if [[ $? -ne 0 ]]; then
7c673cae	131	echo "Building DB..."
11fdf7f2 TL	132	# compactall alone will not print ops or threads, which will fail update_report
11fdf7f2 TL	133	run_db_bench "fillseq,compactall" $NUM_KEYS 1 0 0
1e59de90 TL	134	# only save for future use on success
1e59de90 TL	135	test_remote "mv $DB_PATH $ORIGIN_PATH"
7c673cae	136	fi
7c673cae FG	137	fi
	138	if [ $TEST_MODE -ge 1 ]; then
	139	build_checkpoint
	140	run_db_bench "readrandom"
	141	run_db_bench "readwhilewriting"
1e59de90	142	run_db_bench "deleterandom"
7c673cae FG	143	run_db_bench "seekrandom"
7c673cae FG	144	run_db_bench "seekrandomwhilewriting"
1e59de90	145	run_db_bench "multireadrandom"
7c673cae FG	146	fi
7c673cae FG	147
11fdf7f2	148	cleanup_test_directory $TEST_ROOT_DIR
7c673cae FG	149	echo ""
	150	echo "Benchmark completed! Results are available in $RESULT_PATH"
	151	}
	152
	153	############################################################################
	154	function init_arguments {
	155	K=1024
	156	M=$((1024 * K))
	157	G=$((1024 * M))
	158
	159	current_time=$(date +"%F-%H:%M:%S")
	160	RESULT_PATH=${RESULT_PATH:-"$1/results/$current_time"}
1e59de90	161	COMMIT_ID=`hg id -i 2>/dev/null \|\| git rev-parse HEAD 2>/dev/null \|\| echo 'unknown'`
7c673cae FG	162	SUMMARY_FILE="$RESULT_PATH/SUMMARY.csv"
	163
	164	DB_PATH=${3:-"$1/db"}
	165	ORIGIN_PATH=${ORIGIN_PATH:-"$(dirname $(dirname $DB_PATH))/db"}
	166	WAL_PATH=${4:-""}
	167	if [ -z "$REMOTE_USER_AT_HOST" ]; then
	168	DB_BENCH_DIR=${5:-"."}
	169	else
	170	DB_BENCH_DIR=${5:-"$1/db_bench"}
	171	fi
	172
	173	DEBUG=${DEBUG:-0}
	174	TEST_MODE=${TEST_MODE:-1}
	175	SCP=${SCP:-"scp"}
	176	SSH=${SSH:-"ssh"}
	177	NUM_THREADS=${NUM_THREADS:-16}
	178	NUM_KEYS=${NUM_KEYS:-$((1 * G))} # key range
	179	NUM_OPS=${NUM_OPS:-$(($NUM_KEYS / $NUM_THREADS))}
	180	KEY_SIZE=${KEY_SIZE:-100}
	181	VALUE_SIZE=${VALUE_SIZE:-900}
	182	CACHE_SIZE=${CACHE_SIZE:-$((1 * G))}
	183	STATISTICS=${STATISTICS:-0}
	184	COMPRESSION_RATIO=${COMPRESSION_RATIO:-0.5}
	185	HISTOGRAM=${HISTOGRAM:-1}
	186	NUM_MULTI_DB=${NUM_MULTI_DB:-1}
	187	STATS_PER_INTERVAL=${STATS_PER_INTERVAL:-1}
	188	STATS_INTERVAL_SECONDS=${STATS_INTERVAL_SECONDS:-600}
	189	MAX_BACKGROUND_FLUSHES=${MAX_BACKGROUND_FLUSHES:-4}
	190	MAX_BACKGROUND_COMPACTIONS=${MAX_BACKGROUND_COMPACTIONS:-16}
11fdf7f2 TL	191	NUM_HIGH_PRI_THREADS=${NUM_HIGH_PRI_THREADS:-4}
11fdf7f2 TL	192	NUM_LOW_PRI_THREADS=${NUM_LOW_PRI_THREADS:-16}
7c673cae FG	193	DELETE_TEST_PATH=${DELETE_TEST_PATH:-0}
	194	SEEK_NEXTS=${SEEK_NEXTS:-10}
	195	SEED=${SEED:-$( date +%s )}
1e59de90 TL	196	MULTIREAD_BATCH_SIZE=${MULTIREAD_BATCH_SIZE:-128}
	197	MULTIREAD_STRIDE=${MULTIREAD_STRIDE:-12}
	198	PERF_LEVEL=${PERF_LEVEL:-1}
7c673cae FG	199	}
	200
	201	# $1 --- benchmark name
	202	# $2 --- number of operations. Default: $NUM_KEYS
	203	# $3 --- number of threads. Default $NUM_THREADS
	204	# $4 --- use_existing_db. Default: 1
11fdf7f2	205	# $5 --- update_report. Default: 1
7c673cae	206	function run_db_bench {
1e59de90 TL	207	# Make sure no other db_bench is running. (Make sure command succeeds if pidof
	208	# command exists but finds nothing.)
	209	pids_cmd='pidof db_bench \|\| pidof --version > /dev/null'
	210	# But first, make best effort to kill any db_bench that have run for more
	211	# than 12 hours, as that indicates a hung or runaway process.
	212	kill_old_cmd='for PID in $(pidof db_bench); do [ "$(($(stat -c %Y /proc/$PID) + 43200))" -lt "$(date +%s)" ] && echo "Killing old db_bench $PID" && kill $PID && sleep 5 && kill -9 $PID && sleep 5; done; pidof --version > /dev/null'
	213	if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
	214	pids_cmd="$SSH $REMOTE_USER_AT_HOST '$pids_cmd'"
	215	kill_old_cmd="$SSH $REMOTE_USER_AT_HOST '$kill_old_cmd'"
	216	fi
	217
	218	eval $kill_old_cmd
	219	exit_on_error $? "$kill_old_cmd"
	220
	221	pids_output="$(eval $pids_cmd)"
	222	exit_on_error $? "$pids_cmd"
7c673cae	223
1e59de90 TL	224	if [ "$pids_output" != "" ]; then
	225	echo "Stopped regression_test.sh as there're still recent db_bench "
	226	echo "processes running: $pids_output"
	227	echo "Clean up test directory"
	228	cleanup_test_directory $TEST_ROOT_DIR
	229	exit 2
	230	fi
	231
	232	# Build db_bench command
7c673cae FG	233	ops=${2:-$NUM_OPS}
7c673cae FG	234	threads=${3:-$NUM_THREADS}
11fdf7f2 TL	235	USE_EXISTING_DB=${4:-1}
11fdf7f2 TL	236	UPDATE_REPORT=${5:-1}
7c673cae FG	237	echo ""
	238	echo "======================================================================="
	239	echo "Benchmark $1"
	240	echo "======================================================================="
	241	echo ""
	242	db_bench_error=0
	243	options_file_arg=$(setup_options_file)
	244	echo "$options_file_arg"
	245	# use `which time` to avoid using bash's internal time command
1e59de90	246	db_bench_cmd="\$(which time) -p $DB_BENCH_DIR/db_bench \
7c673cae FG	247	--benchmarks=$1 --db=$DB_PATH --wal_dir=$WAL_PATH \
7c673cae FG	248	--use_existing_db=$USE_EXISTING_DB \
1e59de90	249	--perf_level=$PERF_LEVEL \
7c673cae FG	250	--disable_auto_compactions \
	251	--threads=$threads \
	252	--num=$NUM_KEYS \
	253	--reads=$ops \
	254	--writes=$ops \
	255	--deletes=$ops \
	256	--key_size=$KEY_SIZE \
	257	--value_size=$VALUE_SIZE \
	258	--cache_size=$CACHE_SIZE \
	259	--statistics=$STATISTICS \
	260	$options_file_arg \
	261	--compression_ratio=$COMPRESSION_RATIO \
	262	--histogram=$HISTOGRAM \
	263	--seek_nexts=$SEEK_NEXTS \
	264	--stats_per_interval=$STATS_PER_INTERVAL \
	265	--stats_interval_seconds=$STATS_INTERVAL_SECONDS \
	266	--max_background_flushes=$MAX_BACKGROUND_FLUSHES \
	267	--num_multi_db=$NUM_MULTI_DB \
	268	--max_background_compactions=$MAX_BACKGROUND_COMPACTIONS \
11fdf7f2 TL	269	--num_high_pri_threads=$NUM_HIGH_PRI_THREADS \
11fdf7f2 TL	270	--num_low_pri_threads=$NUM_LOW_PRI_THREADS \
1e59de90 TL	271	--seed=$SEED \
	272	--multiread_batched=true \
	273	--batch_size=$MULTIREAD_BATCH_SIZE \
	274	--multiread_stride=$MULTIREAD_STRIDE 2>&1"
7c673cae FG	275	if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
7c673cae FG	276	echo "Running benchmark remotely on $REMOTE_USER_AT_HOST"
1e59de90	277	db_bench_cmd="$SSH $REMOTE_USER_AT_HOST '$db_bench_cmd'"
7c673cae	278	fi
1e59de90	279	echo db_bench_cmd="$db_bench_cmd"
7c673cae	280
1e59de90 TL	281	# Run the db_bench command
	282	eval $db_bench_cmd \| tee -a "$RESULT_PATH/$1"
	283	exit_on_error ${PIPESTATUS[0]} db_bench
11fdf7f2 TL	284	if [ $UPDATE_REPORT -ne 0 ]; then
	285	update_report "$1" "$RESULT_PATH/$1" $ops $threads
	286	fi
7c673cae FG	287	}
	288
	289	function build_checkpoint {
	290	cmd_prefix=""
	291	if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
	292	cmd_prefix="$SSH $REMOTE_USER_AT_HOST "
	293	fi
11fdf7f2 TL	294	if [ $NUM_MULTI_DB -gt 1 ]; then
	295	dirs=$($cmd_prefix find $ORIGIN_PATH -type d -links 2)
	296	for dir in $dirs; do
	297	db_index=$(basename $dir)
	298	echo "Building checkpoints: $ORIGIN_PATH/$db_index -> $DB_PATH/$db_index ..."
	299	$cmd_prefix $DB_BENCH_DIR/ldb checkpoint --checkpoint_dir=$DB_PATH/$db_index \
1e59de90 TL	300	--db=$ORIGIN_PATH/$db_index --try_load_options 2>&1
1e59de90 TL	301	exit_on_error $?
11fdf7f2 TL	302	done
	303	else
	304	# checkpoint cannot build in directory already exists
	305	$cmd_prefix rm -rf $DB_PATH
	306	echo "Building checkpoint: $ORIGIN_PATH -> $DB_PATH ..."
	307	$cmd_prefix $DB_BENCH_DIR/ldb checkpoint --checkpoint_dir=$DB_PATH \
1e59de90 TL	308	--db=$ORIGIN_PATH --try_load_options 2>&1
1e59de90 TL	309	exit_on_error $?
11fdf7f2	310	fi
7c673cae FG	311	}
	312
	313	function multiply {
	314	echo "$1 * $2" \| bc
	315	}
	316
	317	# $1 --- name of the benchmark
	318	# $2 --- the filename of the output log of db_bench
	319	function update_report {
	320	main_result=`cat $2 \| grep $1`
	321	exit_on_error $?
	322	perc_statement=`cat $2 \| grep Percentile`
	323	exit_on_error $?
	324
	325	# Obtain micros / op
	326
	327	[[ $main_result =~ $MAIN_PATTERN ]]
	328	ops_per_s=${BASH_REMATCH[1]}
	329
	330	# Obtain percentile information
	331	[[ $perc_statement =~ $PERC_PATTERN ]]
	332	perc[0]=${BASH_REMATCH[1]} # p50
	333	perc[1]=${BASH_REMATCH[2]} # p75
	334	perc[2]=${BASH_REMATCH[3]} # p99
	335	perc[3]=${BASH_REMATCH[4]} # p99.9
	336	perc[4]=${BASH_REMATCH[5]} # p99.99
	337
	338	# Parse the output of the time command
	339	real_sec=`tail -3 $2 \| grep real \| awk '{print $2}'`
	340	user_sec=`tail -3 $2 \| grep user \| awk '{print $2}'`
	341	sys_sec=`tail -3 $2 \| grep sys \| awk '{print $2}'`
	342
	343	(printf "$DATA_FORMAT" \
	344	$COMMIT_ID $1 $REMOTE_USER_AT_HOST $NUM_MULTI_DB $NUM_KEYS $KEY_SIZE $VALUE_SIZE \
	345	$(multiply $COMPRESSION_RATIO 100) \
	346	$3 $4 $CACHE_SIZE \
	347	$MAX_BACKGROUND_FLUSHES $MAX_BACKGROUND_COMPACTIONS \
	348	$ops_per_s \
	349	$(multiply ${perc[0]} 1000) \
	350	$(multiply ${perc[1]} 1000) \
	351	$(multiply ${perc[2]} 1000) \
	352	$(multiply ${perc[3]} 1000) \
	353	$(multiply ${perc[4]} 1000) \
	354	$DEBUG \
	355	$real_sec \
	356	$user_sec \
	357	$sys_sec \
	358	>> $SUMMARY_FILE)
	359	exit_on_error $?
	360	}
	361
	362	function exit_on_error {
	363	if [ $1 -ne 0 ]; then
	364	echo ""
	365	echo "ERROR: Benchmark did not complete successfully."
	366	if ! [ -z "$2" ]; then
	367	echo "Failure command: $2"
	368	fi
	369	echo "Partial results are output to $RESULT_PATH"
	370	echo "ERROR" >> $SUMMARY_FILE
	371	exit $1
	372	fi
	373	}
	374
7c673cae FG	375	function build_db_bench_and_ldb {
	376	echo "Building db_bench & ldb ..."
	377
	378	make clean
	379	exit_on_error $?
	380
1e59de90	381	DEBUG_LEVEL=0 make db_bench ldb -j32
7c673cae FG	382	exit_on_error $?
	383	}
	384
	385	function run_remote {
	386	test_remote "$1"
	387	exit_on_error $? "$1"
	388	}
	389
	390	function test_remote {
	391	if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
	392	cmd="$SSH $REMOTE_USER_AT_HOST '$1'"
	393	else
	394	cmd="$1"
	395	fi
	396	eval "$cmd"
	397	}
	398
	399	function run_local {
	400	eval "$1"
1e59de90	401	exit_on_error $? "$1"
7c673cae FG	402	}
	403
	404	function setup_options_file {
	405	if ! [ -z "$OPTIONS_FILE" ]; then
	406	if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
	407	options_file="$DB_BENCH_DIR/OPTIONS_FILE"
	408	run_local "$SCP $OPTIONS_FILE $REMOTE_USER_AT_HOST:$options_file"
	409	else
	410	options_file="$OPTIONS_FILE"
	411	fi
	412	echo "--options_file=$options_file"
	413	fi
	414	echo ""
	415	}
	416
	417	function setup_test_directory {
	418	echo "Deleting old regression test directories and creating new ones"
	419
1e59de90	420	run_local 'test "$DB_PATH" != "."'
7c673cae	421	run_remote "rm -rf $DB_PATH"
1e59de90 TL	422
	423	if [ "$DB_BENCH_DIR" != "." ]; then
	424	run_remote "rm -rf $DB_BENCH_DIR"
	425	fi
	426
	427	run_local 'test "$RESULT_PATH" != "."'
7c673cae FG	428	run_local "rm -rf $RESULT_PATH"
	429
	430	if ! [ -z "$WAL_PATH" ]; then
	431	run_remote "rm -rf $WAL_PATH"
	432	run_remote "mkdir -p $WAL_PATH"
	433	fi
	434
	435	run_remote "mkdir -p $DB_PATH"
	436
	437	run_remote "mkdir -p $DB_BENCH_DIR"
	438	run_remote "ls -l $DB_BENCH_DIR"
	439
	440	if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
	441	run_local "$SCP ./db_bench $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/db_bench"
	442	run_local "$SCP ./ldb $REMOTE_USER_AT_HOST:$DB_BENCH_DIR/ldb"
	443	fi
	444
	445	run_local "mkdir -p $RESULT_PATH"
	446
	447	(printf $TITLE_FORMAT \
	448	"commit id" "benchmark" "user@host" "num-dbs" "key-range" "key-size" \
	449	"value-size" "compress-rate" "ops-per-thread" "num-threads" "cache-size" \
	450	"flushes" "compactions" \
	451	"ops-per-s" "p50" "p75" "p99" "p99.9" "p99.99" "debug" \
	452	"real-sec" "user-sec" "sys-sec" \
	453	>> $SUMMARY_FILE)
	454	exit_on_error $?
	455	}
	456
	457	function cleanup_test_directory {
	458
	459	if [ $DELETE_TEST_PATH -ne 0 ]; then
	460	echo "Clear old regression test directories and creating new ones"
	461	run_remote "rm -rf $DB_PATH"
	462	run_remote "rm -rf $WAL_PATH"
	463	if ! [ -z "$REMOTE_USER_AT_HOST" ]; then
	464	run_remote "rm -rf $DB_BENCH_DIR"
	465	fi
	466	run_remote "rm -rf $1"
	467	else
	468	echo "------------ DEBUG MODE ------------"
	469	echo "DB PATH: $DB_PATH"
	470	echo "WAL PATH: $WAL_PATH"
	471	fi
	472	}
	473
	474	############################################################################
	475
11fdf7f2	476	# shellcheck disable=SC2068
7c673cae	477	main $@