ceph/src/rocksdb/tools/benchmark.sh

   1 #!/usr/bin/env bash
   2 # REQUIRE: db_bench binary exists in the current directory
   3
   4 if [ $# -ne 1 ]; then
   5   echo -n "./benchmark.sh [bulkload/fillseq/overwrite/filluniquerandom/"
   6   echo    "readrandom/readwhilewriting/readwhilemerging/updaterandom/"
   7   echo    "mergerandom/randomtransaction/compact]"
   8   exit 0
   9 fi
  10
  11 # Make it easier to run only the compaction test. Getting valid data requires
  12 # a number of iterations and having an ability to run the test separately from
  13 # rest of the benchmarks helps.
  14 if [ "$COMPACTION_TEST" == "1" -a "$1" != "universal_compaction" ]; then
  15   echo "Skipping $1 because it's not a compaction test."
  16   exit 0
  17 fi
  18
  19 # size constants
  20 K=1024
  21 M=$((1024 * K))
  22 G=$((1024 * M))
  23 T=$((1024 * T))
  24
  25 if [ -z $DB_DIR ]; then
  26   echo "DB_DIR is not defined"
  27   exit 0
  28 fi
  29
  30 if [ -z $WAL_DIR ]; then
  31   echo "WAL_DIR is not defined"
  32   exit 0
  33 fi
  34
  35 output_dir=${OUTPUT_DIR:-/tmp/}
  36 if [ ! -d $output_dir ]; then
  37   mkdir -p $output_dir
  38 fi
  39
  40 # all multithreaded tests run with sync=1 unless
  41 # $DB_BENCH_NO_SYNC is defined
  42 syncval="1"
  43 if [ ! -z $DB_BENCH_NO_SYNC ]; then
  44   echo "Turning sync off for all multithreaded tests"
  45   syncval="0";
  46 fi
  47
  48 num_threads=${NUM_THREADS:-64}
  49 mb_written_per_sec=${MB_WRITE_PER_SEC:-0}
  50 # Only for tests that do range scans
  51 num_nexts_per_seek=${NUM_NEXTS_PER_SEEK:-10}
  52 cache_size=${CACHE_SIZE:-$((17179869184))}
  53 compression_max_dict_bytes=${COMPRESSION_MAX_DICT_BYTES:-0}
  54 compression_type=${COMPRESSION_TYPE:-zstd}
  55 duration=${DURATION:-0}
  56
  57 num_keys=${NUM_KEYS:-8000000000}
  58 key_size=${KEY_SIZE:-20}
  59 value_size=${VALUE_SIZE:-400}
  60 block_size=${BLOCK_SIZE:-8192}
  61
  62 const_params="
  63   --db=$DB_DIR \
  64   --wal_dir=$WAL_DIR \
  65   \
  66   --num=$num_keys \
  67   --num_levels=6 \
  68   --key_size=$key_size \
  69   --value_size=$value_size \
  70   --block_size=$block_size \
  71   --cache_size=$cache_size \
  72   --cache_numshardbits=6 \
  73   --compression_max_dict_bytes=$compression_max_dict_bytes \
  74   --compression_ratio=0.5 \
  75   --compression_type=$compression_type \
  76   --level_compaction_dynamic_level_bytes=true \
  77   --bytes_per_sync=$((8 * M)) \
  78   --cache_index_and_filter_blocks=0 \
  79   --pin_l0_filter_and_index_blocks_in_cache=1 \
  80   --benchmark_write_rate_limit=$(( 1024 * 1024 * $mb_written_per_sec )) \
  81   \
  82   --hard_rate_limit=3 \
  83   --rate_limit_delay_max_milliseconds=1000000 \
  84   --write_buffer_size=$((128 * M)) \
  85   --target_file_size_base=$((128 * M)) \
  86   --max_bytes_for_level_base=$((1 * G)) \
  87   \
  88   --verify_checksum=1 \
  89   --delete_obsolete_files_period_micros=$((60 * M)) \
  90   --max_bytes_for_level_multiplier=8 \
  91   \
  92   --statistics=0 \
  93   --stats_per_interval=1 \
  94   --stats_interval_seconds=60 \
  95   --histogram=1 \
  96   \
  97   --memtablerep=skip_list \
  98   --bloom_bits=10 \
  99   --open_files=-1"
 100
 101 l0_config="
 102   --level0_file_num_compaction_trigger=4 \
 103   --level0_stop_writes_trigger=20"
 104
 105 if [ $duration -gt 0 ]; then
 106   const_params="$const_params --duration=$duration"
 107 fi
 108
 109 params_w="$const_params \
 110           $l0_config \
 111           --max_background_compactions=16 \
 112           --max_write_buffer_number=8 \
 113           --max_background_flushes=7"
 114
 115 params_bulkload="$const_params \
 116                  --max_background_compactions=16 \
 117                  --max_write_buffer_number=8 \
 118                  --allow_concurrent_memtable_write=false \
 119                  --max_background_flushes=7 \
 120                  --level0_file_num_compaction_trigger=$((10 * M)) \
 121                  --level0_slowdown_writes_trigger=$((10 * M)) \
 122                  --level0_stop_writes_trigger=$((10 * M))"
 123
 124 params_fillseq="$params_w \
 125                 --allow_concurrent_memtable_write=false"
 126 #
 127 # Tune values for level and universal compaction.
 128 # For universal compaction, these level0_* options mean total sorted of runs in
 129 # LSM. In level-based compaction, it means number of L0 files.
 130 #
 131 params_level_compact="$const_params \
 132                 --max_background_flushes=4 \
 133                 --max_write_buffer_number=4 \
 134                 --level0_file_num_compaction_trigger=4 \
 135                 --level0_slowdown_writes_trigger=16 \
 136                 --level0_stop_writes_trigger=20"
 137
 138 params_univ_compact="$const_params \
 139                 --max_background_flushes=4 \
 140                 --max_write_buffer_number=4 \
 141                 --level0_file_num_compaction_trigger=8 \
 142                 --level0_slowdown_writes_trigger=16 \
 143                 --level0_stop_writes_trigger=20"
 144
 145 function summarize_result {
 146   test_out=$1
 147   test_name=$2
 148   bench_name=$3
 149
 150   # Note that this function assumes that the benchmark executes long enough so
 151   # that "Compaction Stats" is written to stdout at least once. If it won't
 152   # happen then empty output from grep when searching for "Sum" will cause
 153   # syntax errors.
 154   uptime=$( grep ^Uptime\(secs $test_out | tail -1 | awk '{ printf "%.0f", $2 }' )
 155   stall_time=$( grep "^Cumulative stall" $test_out | tail -1  | awk '{  print $3 }' )
 156   stall_pct=$( grep "^Cumulative stall" $test_out| tail -1  | awk '{  print $5 }' )
 157   ops_sec=$( grep ^${bench_name} $test_out | awk '{ print $5 }' )
 158   mb_sec=$( grep ^${bench_name} $test_out | awk '{ print $7 }' )
 159   lo_wgb=$( grep "^  L0" $test_out | tail -1 | awk '{ print $9 }' )
 160   sum_wgb=$( grep "^ Sum" $test_out | tail -1 | awk '{ print $9 }' )
 161   sum_size=$( grep "^ Sum" $test_out | tail -1 | awk '{ printf "%.1f", $3 / 1024.0 }' )
 162   wamp=$( echo "scale=1; $sum_wgb / $lo_wgb" | bc )
 163   wmb_ps=$( echo "scale=1; ( $sum_wgb * 1024.0 ) / $uptime" | bc )
 164   usecs_op=$( grep ^${bench_name} $test_out | awk '{ printf "%.1f", $3 }' )
 165   p50=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.1f", $3 }' )
 166   p75=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.1f", $5 }' )
 167   p99=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.0f", $7 }' )
 168   p999=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.0f", $9 }' )
 169   p9999=$( grep "^Percentiles:" $test_out | tail -1 | awk '{ printf "%.0f", $11 }' )
 170   echo -e "$ops_sec\t$mb_sec\t$sum_size\t$lo_wgb\t$sum_wgb\t$wamp\t$wmb_ps\t$usecs_op\t$p50\t$p75\t$p99\t$p999\t$p9999\t$uptime\t$stall_time\t$stall_pct\t$test_name" \
 171     >> $output_dir/report.txt
 172 }
 173
 174 function run_bulkload {
 175   # This runs with a vector memtable and the WAL disabled to load faster. It is still crash safe and the
 176   # client can discover where to restart a load after a crash. I think this is a good way to load.
 177   echo "Bulk loading $num_keys random keys"
 178   cmd="./db_bench --benchmarks=fillrandom \
 179        --use_existing_db=0 \
 180        --disable_auto_compactions=1 \
 181        --sync=0 \
 182        $params_bulkload \
 183        --threads=1 \
 184        --memtablerep=vector \
 185        --allow_concurrent_memtable_write=false \
 186        --disable_wal=1 \
 187        --seed=$( date +%s ) \
 188        2>&1 | tee -a $output_dir/benchmark_bulkload_fillrandom.log"
 189   echo $cmd | tee $output_dir/benchmark_bulkload_fillrandom.log
 190   eval $cmd
 191   summarize_result $output_dir/benchmark_bulkload_fillrandom.log bulkload fillrandom
 192   echo "Compacting..."
 193   cmd="./db_bench --benchmarks=compact \
 194        --use_existing_db=1 \
 195        --disable_auto_compactions=1 \
 196        --sync=0 \
 197        $params_w \
 198        --threads=1 \
 199        2>&1 | tee -a $output_dir/benchmark_bulkload_compact.log"
 200   echo $cmd | tee $output_dir/benchmark_bulkload_compact.log
 201   eval $cmd
 202 }
 203
 204 #
 205 # Parameter description:
 206 #
 207 # $1 - 1 if I/O statistics should be collected.
 208 # $2 - compaction type to use (level=0, universal=1).
 209 # $3 - number of subcompactions.
 210 # $4 - number of maximum background compactions.
 211 #
 212 function run_manual_compaction_worker {
 213   # This runs with a vector memtable and the WAL disabled to load faster.
 214   # It is still crash safe and the client can discover where to restart a
 215   # load after a crash. I think this is a good way to load.
 216   echo "Bulk loading $num_keys random keys for manual compaction."
 217
 218   fillrandom_output_file=$output_dir/benchmark_man_compact_fillrandom_$3.log
 219   man_compact_output_log=$output_dir/benchmark_man_compact_$3.log
 220
 221   if [ "$2" == "1" ]; then
 222     extra_params=$params_univ_compact
 223   else
 224     extra_params=$params_level_compact
 225   fi
 226
 227   # Make sure that fillrandom uses the same compaction options as compact.
 228   cmd="./db_bench --benchmarks=fillrandom \
 229        --use_existing_db=0 \
 230        --disable_auto_compactions=0 \
 231        --sync=0 \
 232        $extra_params \
 233        --threads=$num_threads \
 234        --compaction_measure_io_stats=$1 \
 235        --compaction_style=$2 \
 236        --subcompactions=$3 \
 237        --memtablerep=vector \
 238        --allow_concurrent_memtable_write=false \
 239        --disable_wal=1 \
 240        --max_background_compactions=$4 \
 241        --seed=$( date +%s ) \
 242        2>&1 | tee -a $fillrandom_output_file"
 243
 244   echo $cmd | tee $fillrandom_output_file
 245   eval $cmd
 246
 247   summarize_result $fillrandom_output_file man_compact_fillrandom_$3 fillrandom
 248
 249   echo "Compacting with $3 subcompactions specified ..."
 250
 251   # This is the part we're really interested in. Given that compact benchmark
 252   # doesn't output regular statistics then we'll just use the time command to
 253   # measure how long this step takes.
 254   cmd="{ \
 255        time ./db_bench --benchmarks=compact \
 256        --use_existing_db=1 \
 257        --disable_auto_compactions=0 \
 258        --sync=0 \
 259        $extra_params \
 260        --threads=$num_threads \
 261        --compaction_measure_io_stats=$1 \
 262        --compaction_style=$2 \
 263        --subcompactions=$3 \
 264        --max_background_compactions=$4 \
 265        ;}
 266        2>&1 | tee -a $man_compact_output_log"
 267
 268   echo $cmd | tee $man_compact_output_log
 269   eval $cmd
 270
 271   # Can't use summarize_result here. One way to analyze the results is to run
 272   # "grep real" on the resulting log files.
 273 }
 274
 275 function run_univ_compaction {
 276   # Always ask for I/O statistics to be measured.
 277   io_stats=1
 278
 279   # Values: kCompactionStyleLevel = 0x0, kCompactionStyleUniversal = 0x1.
 280   compaction_style=1
 281
 282   # Define a set of benchmarks.
 283   subcompactions=(1 2 4 8 16)
 284   max_background_compactions=(16 16 8 4 2)
 285
 286   i=0
 287   total=${#subcompactions[@]}
 288
 289   # Execute a set of benchmarks to cover variety of scenarios.
 290   while [ "$i" -lt "$total" ]
 291   do
 292     run_manual_compaction_worker $io_stats $compaction_style ${subcompactions[$i]} \
 293       ${max_background_compactions[$i]}
 294     ((i++))
 295   done
 296 }
 297
 298 function run_fillseq {
 299   # This runs with a vector memtable. WAL can be either disabled or enabled
 300   # depending on the input parameter (1 for disabled, 0 for enabled). The main
 301   # benefit behind disabling WAL is to make loading faster. It is still crash
 302   # safe and the client can discover where to restart a load after a crash. I
 303   # think this is a good way to load.
 304
 305   # Make sure that we'll have unique names for all the files so that data won't
 306   # be overwritten.
 307   if [ $1 == 1 ]; then
 308     log_file_name=$output_dir/benchmark_fillseq.wal_disabled.v${value_size}.log
 309     test_name=fillseq.wal_disabled.v${value_size}
 310   else
 311     log_file_name=$output_dir/benchmark_fillseq.wal_enabled.v${value_size}.log
 312     test_name=fillseq.wal_enabled.v${value_size}
 313   fi
 314
 315   echo "Loading $num_keys keys sequentially"
 316   cmd="./db_bench --benchmarks=fillseq \
 317        --use_existing_db=0 \
 318        --sync=0 \
 319        $params_fillseq \
 320        --min_level_to_compress=0 \
 321        --threads=1 \
 322        --memtablerep=vector \
 323        --allow_concurrent_memtable_write=false \
 324        --disable_wal=$1 \
 325        --seed=$( date +%s ) \
 326        2>&1 | tee -a $log_file_name"
 327   echo $cmd | tee $log_file_name
 328   eval $cmd
 329
 330   # The constant "fillseq" which we pass to db_bench is the benchmark name.
 331   summarize_result $log_file_name $test_name fillseq
 332 }
 333
 334 function run_change {
 335   operation=$1
 336   echo "Do $num_keys random $operation"
 337   out_name="benchmark_${operation}.t${num_threads}.s${syncval}.log"
 338   cmd="./db_bench --benchmarks=$operation \
 339        --use_existing_db=1 \
 340        --sync=$syncval \
 341        $params_w \
 342        --threads=$num_threads \
 343        --merge_operator=\"put\" \
 344        --seed=$( date +%s ) \
 345        2>&1 | tee -a $output_dir/${out_name}"
 346   echo $cmd | tee $output_dir/${out_name}
 347   eval $cmd
 348   summarize_result $output_dir/${out_name} ${operation}.t${num_threads}.s${syncval} $operation
 349 }
 350
 351 function run_filluniquerandom {
 352   echo "Loading $num_keys unique keys randomly"
 353   cmd="./db_bench --benchmarks=filluniquerandom \
 354        --use_existing_db=0 \
 355        --sync=0 \
 356        $params_w \
 357        --threads=1 \
 358        --seed=$( date +%s ) \
 359        2>&1 | tee -a $output_dir/benchmark_filluniquerandom.log"
 360   echo $cmd | tee $output_dir/benchmark_filluniquerandom.log
 361   eval $cmd
 362   summarize_result $output_dir/benchmark_filluniquerandom.log filluniquerandom filluniquerandom
 363 }
 364
 365 function run_readrandom {
 366   echo "Reading $num_keys random keys"
 367   out_name="benchmark_readrandom.t${num_threads}.log"
 368   cmd="./db_bench --benchmarks=readrandom \
 369        --use_existing_db=1 \
 370        $params_w \
 371        --threads=$num_threads \
 372        --seed=$( date +%s ) \
 373        2>&1 | tee -a $output_dir/${out_name}"
 374   echo $cmd | tee $output_dir/${out_name}
 375   eval $cmd
 376   summarize_result $output_dir/${out_name} readrandom.t${num_threads} readrandom
 377 }
 378
 379 function run_readwhile {
 380   operation=$1
 381   echo "Reading $num_keys random keys while $operation"
 382   out_name="benchmark_readwhile${operation}.t${num_threads}.log"
 383   cmd="./db_bench --benchmarks=readwhile${operation} \
 384        --use_existing_db=1 \
 385        --sync=$syncval \
 386        $params_w \
 387        --threads=$num_threads \
 388        --merge_operator=\"put\" \
 389        --seed=$( date +%s ) \
 390        2>&1 | tee -a $output_dir/${out_name}"
 391   echo $cmd | tee $output_dir/${out_name}
 392   eval $cmd
 393   summarize_result $output_dir/${out_name} readwhile${operation}.t${num_threads} readwhile${operation}
 394 }
 395
 396 function run_rangewhile {
 397   operation=$1
 398   full_name=$2
 399   reverse_arg=$3
 400   out_name="benchmark_${full_name}.t${num_threads}.log"
 401   echo "Range scan $num_keys random keys while ${operation} for reverse_iter=${reverse_arg}"
 402   cmd="./db_bench --benchmarks=seekrandomwhile${operation} \
 403        --use_existing_db=1 \
 404        --sync=$syncval \
 405        $params_w \
 406        --threads=$num_threads \
 407        --merge_operator=\"put\" \
 408        --seek_nexts=$num_nexts_per_seek \
 409        --reverse_iterator=$reverse_arg \
 410        --seed=$( date +%s ) \
 411        2>&1 | tee -a $output_dir/${out_name}"
 412   echo $cmd | tee $output_dir/${out_name}
 413   eval $cmd
 414   summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandomwhile${operation}
 415 }
 416
 417 function run_range {
 418   full_name=$1
 419   reverse_arg=$2
 420   out_name="benchmark_${full_name}.t${num_threads}.log"
 421   echo "Range scan $num_keys random keys for reverse_iter=${reverse_arg}"
 422   cmd="./db_bench --benchmarks=seekrandom \
 423        --use_existing_db=1 \
 424        $params_w \
 425        --threads=$num_threads \
 426        --seek_nexts=$num_nexts_per_seek \
 427        --reverse_iterator=$reverse_arg \
 428        --seed=$( date +%s ) \
 429        2>&1 | tee -a $output_dir/${out_name}"
 430   echo $cmd | tee $output_dir/${out_name}
 431   eval $cmd
 432   summarize_result $output_dir/${out_name} ${full_name}.t${num_threads} seekrandom
 433 }
 434
 435 function run_randomtransaction {
 436   echo "..."
 437   cmd="./db_bench $params_r --benchmarks=randomtransaction \
 438        --num=$num_keys \
 439        --transaction_db \
 440        --threads=5 \
 441        --transaction_sets=5 \
 442        2>&1 | tee $output_dir/benchmark_randomtransaction.log"
 443   echo $cmd | tee $output_dir/benchmark_rangescanwhilewriting.log
 444   eval $cmd
 445 }
 446
 447 function now() {
 448   echo `date +"%s"`
 449 }
 450
 451 report="$output_dir/report.txt"
 452 schedule="$output_dir/schedule.txt"
 453
 454 echo "===== Benchmark ====="
 455
 456 # Run!!!
 457 IFS=',' read -a jobs <<< $1
 458 # shellcheck disable=SC2068
 459 for job in ${jobs[@]}; do
 460
 461   if [ $job != debug ]; then
 462     echo "Start $job at `date`" | tee -a $schedule
 463   fi
 464
 465   start=$(now)
 466   if [ $job = bulkload ]; then
 467     run_bulkload
 468   elif [ $job = fillseq_disable_wal ]; then
 469     run_fillseq 1
 470   elif [ $job = fillseq_enable_wal ]; then
 471     run_fillseq 0
 472   elif [ $job = overwrite ]; then
 473     syncval="0"
 474     params_w="$params_w \
 475         --writes=125000000 \
 476         --subcompactions=4 \
 477         --soft_pending_compaction_bytes_limit=$((1 * T)) \
 478         --hard_pending_compaction_bytes_limit=$((4 * T)) "
 479     run_change overwrite
 480   elif [ $job = updaterandom ]; then
 481     run_change updaterandom
 482   elif [ $job = mergerandom ]; then
 483     run_change mergerandom
 484   elif [ $job = filluniquerandom ]; then
 485     run_filluniquerandom
 486   elif [ $job = readrandom ]; then
 487     run_readrandom
 488   elif [ $job = fwdrange ]; then
 489     run_range $job false
 490   elif [ $job = revrange ]; then
 491     run_range $job true
 492   elif [ $job = readwhilewriting ]; then
 493     run_readwhile writing
 494   elif [ $job = readwhilemerging ]; then
 495     run_readwhile merging
 496   elif [ $job = fwdrangewhilewriting ]; then
 497     run_rangewhile writing $job false
 498   elif [ $job = revrangewhilewriting ]; then
 499     run_rangewhile writing $job true
 500   elif [ $job = fwdrangewhilemerging ]; then
 501     run_rangewhile merging $job false
 502   elif [ $job = revrangewhilemerging ]; then
 503     run_rangewhile merging $job true
 504   elif [ $job = randomtransaction ]; then
 505     run_randomtransaction
 506   elif [ $job = universal_compaction ]; then
 507     run_univ_compaction
 508   elif [ $job = debug ]; then
 509     num_keys=1000; # debug
 510     echo "Setting num_keys to $num_keys"
 511   else
 512     echo "unknown job $job"
 513     exit
 514   fi
 515   end=$(now)
 516
 517   if [ $job != debug ]; then
 518     echo "Complete $job in $((end-start)) seconds" | tee -a $schedule
 519   fi
 520
 521   echo -e "ops/sec\tmb/sec\tSize-GB\tL0_GB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tp99.9\tp99.99\tUptime\tStall-time\tStall%\tTest"
 522   tail -1 $output_dir/report.txt
 523
 524 done