ceph/src/rocksdb/tools/run_flash_bench.sh

   1 #!/bin/bash
   2 # REQUIRE: benchmark.sh exists in the current directory
   3 # After execution of this script, log files are generated in $output_dir.
   4 # report.txt provides a high level statistics
   5
   6 # This should be run from the parent of the tools directory. The command line is:
   7 #   [$env_vars] tools/run_flash_bench.sh [list-of-threads]
   8 #
   9 # This runs a sequence of tests in the following sequence:
  10 #   step 1) load - bulkload, compact, fillseq, overwrite
  11 #   step 2) read-only for each number of threads
  12 #   step 3) read-write for each number of threads
  13 #   step 4) merge for each number of threads
  14 #
  15 # The list of threads is optional and when not set is equivalent to "24".
  16 # Were list-of-threads specified as "1 2 4" then the tests in steps 2, 3 and
  17 # 4 above would be repeated for 1, 2 and 4 threads. The tests in step 1 are
  18 # only run for 1 thread.
  19
  20 # Test output is written to $OUTPUT_DIR, currently /tmp/output. The performance
  21 # summary is in $OUTPUT_DIR/report.txt. There is one file in $OUTPUT_DIR per
  22 # test and the tests are listed below.
  23 #
  24 # The environment variables are also optional. The variables are:
  25 #
  26 #   NKEYS         - number of key/value pairs to load
  27 #   BG_MBWRITEPERSEC - write rate limit in MB/second for tests in which
  28 #                   there is one thread doing writes and stats are
  29 #                   reported for read threads. "BG" stands for background.
  30 #                   If this is too large then the non-writer threads can get
  31 #                   starved. This is used for the "readwhile" tests.
  32 #   FG_MBWRITEPERSEC - write rate limit in MB/second for tests like overwrite
  33 #                   where stats are reported for the write threads.
  34 #   NSECONDS      - number of seconds for which to run each test in steps 2,
  35 #                   3 and 4. There are currently 15 tests in those steps and
  36 #                   they are repeated for each entry in list-of-threads so
  37 #                   this variable lets you control the total duration to
  38 #                   finish the benchmark.
  39 #   RANGE_LIMIT   - the number of rows to read per range query for tests that
  40 #                   do range queries.
  41 #   VAL_SIZE      - the length of the value in the key/value pairs loaded.
  42 #                   You can estimate the size of the test database from this,
  43 #                   NKEYS and the compression rate (--compression_ratio) set
  44 #                   in tools/benchmark.sh
  45 #   BLOCK_LENGTH  - value for db_bench --block_size
  46 #   CACHE_BYTES   - the size of the RocksDB block cache in bytes
  47 #   DATA_DIR      - directory in which to create database files
  48 #   LOG_DIR       - directory in which to create WAL files, may be the same
  49 #                   as DATA_DIR
  50 #   DO_SETUP      - when set to 0 then a backup of the database is copied from
  51 #                   $DATA_DIR.bak to $DATA_DIR and the load tests from step 1
  52 #                   The WAL directory is also copied from a backup if
  53 #                   DATA_DIR != LOG_DIR. This allows tests from steps 2, 3, 4
  54 #                   to be repeated faster.
  55 #   SAVE_SETUP    - saves a copy of the database at the end of step 1 to
  56 #                   $DATA_DIR.bak. When LOG_DIR != DATA_DIR then it is copied
  57 #                   to $LOG_DIR.bak.
  58 #   SKIP_LOW_PRI_TESTS - skip some of the tests which aren't crucial for getting
  59 #                   actionable benchmarking data (look for keywords "bulkload",
  60 #                   "sync=1", and "while merging").
  61 #
  62
  63 # Size constants
  64 K=1024
  65 M=$((1024 * K))
  66 G=$((1024 * M))
  67
  68 num_keys=${NKEYS:-$((1 * G))}
  69 # write rate for readwhile... tests
  70 bg_mbwps=${BG_MBWRITEPERSEC:-4}
  71 # write rate for tests other than readwhile, 0 means no limit
  72 fg_mbwps=${FG_MBWRITEPERSEC:-0}
  73 duration=${NSECONDS:-$((60 * 60))}
  74 nps=${RANGE_LIMIT:-10}
  75 vs=${VAL_SIZE:-400}
  76 cs=${CACHE_BYTES:-$(( 1 * G ))}
  77 bs=${BLOCK_LENGTH:-8192}
  78
  79 # If no command line arguments then run for 24 threads.
  80 if [[ $# -eq 0 ]]; then
  81   nthreads=( 24 )
  82 else
  83   nthreads=( "$@" )
  84 fi
  85
  86 for num_thr in "${nthreads[@]}" ; do
  87   echo Will run for $num_thr threads
  88 done
  89
  90 # Update these parameters before execution !!!
  91 db_dir=${DATA_DIR:-"/tmp/rocksdb/"}
  92 wal_dir=${LOG_DIR:-"/tmp/rocksdb/"}
  93
  94 do_setup=${DO_SETUP:-1}
  95 save_setup=${SAVE_SETUP:-0}
  96
  97 # By default we'll run all the tests. Set this to skip a set of tests which
  98 # aren't critical for getting key metrics.
  99 skip_low_pri_tests=${SKIP_LOW_PRI_TESTS:-0}
 100
 101 if [[ $skip_low_pri_tests == 1 ]]; then
 102   echo "Skipping some non-critical tests because SKIP_LOW_PRI_TESTS is set."
 103 fi
 104
 105 output_dir="${TMPDIR:-/tmp}/output"
 106
 107 ARGS="\
 108 OUTPUT_DIR=$output_dir \
 109 NUM_KEYS=$num_keys \
 110 DB_DIR=$db_dir \
 111 WAL_DIR=$wal_dir \
 112 VALUE_SIZE=$vs \
 113 BLOCK_SIZE=$bs \
 114 CACHE_SIZE=$cs"
 115
 116 mkdir -p $output_dir
 117 echo -e "ops/sec\tmb/sec\tSize-GB\tL0_GB\tSum_GB\tW-Amp\tW-MB/s\tusec/op\tp50\tp75\tp99\tp99.9\tp99.99\tUptime\tStall-time\tStall%\tTest" \
 118   > $output_dir/report.txt
 119
 120 # Notes on test sequence:
 121 #   step 1) Setup database via sequential fill followed by overwrite to fragment it.
 122 #           Done without setting DURATION to make sure that overwrite does $num_keys writes
 123 #   step 2) read-only tests for all levels of concurrency requested
 124 #   step 3) non read-only tests for all levels of concurrency requested
 125 #   step 4) merge tests for all levels of concurrency requested. These must come last.
 126
 127 ###### Setup the database
 128
 129 if [[ $do_setup != 0 ]]; then
 130   echo Doing setup
 131
 132   if [[ $skip_low_pri_tests != 1 ]]; then
 133     # Test 1: bulk load
 134     env $ARGS ./tools/benchmark.sh bulkload
 135   fi
 136
 137   # Test 2a: sequential fill with large values to get peak ingest
 138   #          adjust NUM_KEYS given the use of larger values
 139   env $ARGS BLOCK_SIZE=$((1 * M)) VALUE_SIZE=$((32 * K)) NUM_KEYS=$(( num_keys / 64 )) \
 140        ./tools/benchmark.sh fillseq_disable_wal
 141
 142   # Test 2b: sequential fill with the configured value size
 143   env $ARGS ./tools/benchmark.sh fillseq_disable_wal
 144
 145   # Test 2c: same as 2a, but with WAL being enabled.
 146   env $ARGS BLOCK_SIZE=$((1 * M)) VALUE_SIZE=$((32 * K)) NUM_KEYS=$(( num_keys / 64 )) \
 147        ./tools/benchmark.sh fillseq_enable_wal
 148
 149   # Test 2d: same as 2b, but with WAL being enabled.
 150   env $ARGS ./tools/benchmark.sh fillseq_enable_wal
 151
 152   # Test 3: single-threaded overwrite
 153   env $ARGS NUM_THREADS=1 DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite
 154
 155 else
 156   echo Restoring from backup
 157
 158   rm -rf $db_dir
 159
 160   if [ ! -d ${db_dir}.bak ]; then
 161     echo Database backup does not exist at ${db_dir}.bak
 162     exit -1
 163   fi
 164
 165   echo Restore database from ${db_dir}.bak
 166   cp -p -r ${db_dir}.bak $db_dir
 167
 168   if [[ $db_dir != $wal_dir ]]; then
 169     rm -rf $wal_dir
 170
 171     if [ ! -d ${wal_dir}.bak ]; then
 172       echo WAL backup does not exist at ${wal_dir}.bak
 173       exit -1
 174     fi
 175
 176     echo Restore WAL from ${wal_dir}.bak
 177     cp -p -r ${wal_dir}.bak $wal_dir
 178   fi
 179 fi
 180
 181 if [[ $save_setup != 0 ]]; then
 182   echo Save database to ${db_dir}.bak
 183   cp -p -r $db_dir ${db_dir}.bak
 184
 185   if [[ $db_dir != $wal_dir ]]; then
 186     echo Save WAL to ${wal_dir}.bak
 187     cp -p -r $wal_dir ${wal_dir}.bak
 188   fi
 189 fi
 190
 191 ###### Read-only tests
 192
 193 for num_thr in "${nthreads[@]}" ; do
 194   # Test 4: random read
 195   env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh readrandom
 196
 197   # Test 5: random range scans
 198   env $ARGS DURATION=$duration NUM_THREADS=$num_thr NUM_NEXTS_PER_SEEK=$nps \
 199     ./tools/benchmark.sh fwdrange
 200
 201   # Test 6: random reverse range scans
 202   env $ARGS DURATION=$duration NUM_THREADS=$num_thr NUM_NEXTS_PER_SEEK=$nps \
 203     ./tools/benchmark.sh revrange
 204 done
 205
 206 ###### Non read-only tests
 207
 208 for num_thr in "${nthreads[@]}" ; do
 209   # Test 7: overwrite with sync=0
 210   env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
 211     DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh overwrite
 212
 213   if [[ $skip_low_pri_tests != 1 ]]; then
 214     # Test 8: overwrite with sync=1
 215     env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
 216       ./tools/benchmark.sh overwrite
 217   fi
 218
 219   # Test 9: random update with sync=0
 220   env $ARGS DURATION=$duration NUM_THREADS=$num_thr DB_BENCH_NO_SYNC=1 \
 221       ./tools/benchmark.sh updaterandom
 222
 223   if [[ $skip_low_pri_tests != 1 ]]; then
 224     # Test 10: random update with sync=1
 225    env $ARGS DURATION=$duration NUM_THREADS=$num_thr ./tools/benchmark.sh updaterandom
 226   fi
 227
 228   # Test 11: random read while writing
 229   env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
 230     DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilewriting
 231
 232   # Test 12: range scan while writing
 233   env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
 234     DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilewriting
 235
 236   # Test 13: reverse range scan while writing
 237   env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
 238     DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilewriting
 239 done
 240
 241 ###### Merge tests
 242
 243 for num_thr in "${nthreads[@]}" ; do
 244   # Test 14: random merge with sync=0
 245   env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
 246     DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh mergerandom
 247
 248   if [[ $skip_low_pri_tests != 1 ]]; then
 249     # Test 15: random merge with sync=1
 250     env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$fg_mbwps \
 251       ./tools/benchmark.sh mergerandom
 252
 253     # Test 16: random read while merging
 254     env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
 255       DB_BENCH_NO_SYNC=1 ./tools/benchmark.sh readwhilemerging
 256
 257     # Test 17: range scan while merging
 258     env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
 259       DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh fwdrangewhilemerging
 260
 261     # Test 18: reverse range scan while merging
 262     env $ARGS DURATION=$duration NUM_THREADS=$num_thr MB_WRITE_PER_SEC=$bg_mbwps \
 263       DB_BENCH_NO_SYNC=1 NUM_NEXTS_PER_SEEK=$nps ./tools/benchmark.sh revrangewhilemerging
 264   fi
 265 done
 266
 267 ###### Universal compaction tests.
 268
 269 # Use a single thread to reduce the variability in the benchmark.
 270 env $ARGS COMPACTION_TEST=1 NUM_THREADS=1 ./tools/benchmark.sh universal_compaction
 271
 272 if [[ $skip_low_pri_tests != 1 ]]; then
 273   echo bulkload > $output_dir/report2.txt
 274   head -1 $output_dir/report.txt >> $output_dir/report2.txt
 275   grep bulkload $output_dir/report.txt >> $output_dir/report2.txt
 276 fi
 277
 278 echo fillseq_wal_disabled >> $output_dir/report2.txt
 279 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 280 grep fillseq.wal_disabled $output_dir/report.txt >> $output_dir/report2.txt
 281
 282 echo fillseq_wal_enabled >> $output_dir/report2.txt
 283 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 284 grep fillseq.wal_enabled $output_dir/report.txt >> $output_dir/report2.txt
 285
 286 echo overwrite sync=0 >> $output_dir/report2.txt
 287 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 288 grep overwrite $output_dir/report.txt | grep \.s0  >> $output_dir/report2.txt
 289
 290 if [[ $skip_low_pri_tests != 1 ]]; then
 291   echo overwrite sync=1 >> $output_dir/report2.txt
 292   head -1 $output_dir/report.txt >> $output_dir/report2.txt
 293   grep overwrite $output_dir/report.txt | grep \.s1  >> $output_dir/report2.txt
 294 fi
 295
 296 echo updaterandom sync=0 >> $output_dir/report2.txt
 297 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 298 grep updaterandom $output_dir/report.txt | grep \.s0 >> $output_dir/report2.txt
 299
 300 if [[ $skip_low_pri_tests != 1 ]]; then
 301   echo updaterandom sync=1 >> $output_dir/report2.txt
 302   head -1 $output_dir/report.txt >> $output_dir/report2.txt
 303   grep updaterandom $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt
 304 fi
 305
 306 echo mergerandom sync=0 >> $output_dir/report2.txt
 307 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 308 grep mergerandom $output_dir/report.txt | grep \.s0 >> $output_dir/report2.txt
 309
 310 if [[ $skip_low_pri_tests != 1 ]]; then
 311   echo mergerandom sync=1 >> $output_dir/report2.txt
 312   head -1 $output_dir/report.txt >> $output_dir/report2.txt
 313   grep mergerandom $output_dir/report.txt | grep \.s1 >> $output_dir/report2.txt
 314 fi
 315
 316 echo readrandom >> $output_dir/report2.txt
 317 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 318 grep readrandom $output_dir/report.txt  >> $output_dir/report2.txt
 319
 320 echo fwdrange >> $output_dir/report2.txt
 321 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 322 grep fwdrange\.t $output_dir/report.txt >> $output_dir/report2.txt
 323
 324 echo revrange >> $output_dir/report2.txt
 325 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 326 grep revrange\.t $output_dir/report.txt >> $output_dir/report2.txt
 327
 328 echo readwhile >> $output_dir/report2.txt >> $output_dir/report2.txt
 329 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 330 grep readwhilewriting $output_dir/report.txt >> $output_dir/report2.txt
 331
 332 if [[ $skip_low_pri_tests != 1 ]]; then
 333   echo readwhile >> $output_dir/report2.txt
 334   head -1 $output_dir/report.txt >> $output_dir/report2.txt
 335   grep readwhilemerging $output_dir/report.txt >> $output_dir/report2.txt
 336 fi
 337
 338 echo fwdreadwhilewriting >> $output_dir/report2.txt
 339 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 340 grep fwdrangewhilewriting $output_dir/report.txt >> $output_dir/report2.txt
 341
 342 if [[ $skip_low_pri_tests != 1 ]]; then
 343   echo fwdreadwhilemerging >> $output_dir/report2.txt
 344   head -1 $output_dir/report.txt >> $output_dir/report2.txt
 345   grep fwdrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt
 346 fi
 347
 348 echo revreadwhilewriting >> $output_dir/report2.txt
 349 head -1 $output_dir/report.txt >> $output_dir/report2.txt
 350 grep revrangewhilewriting $output_dir/report.txt >> $output_dir/report2.txt
 351
 352 if [[ $skip_low_pri_tests != 1 ]]; then
 353   echo revreadwhilemerging >> $output_dir/report2.txt
 354   head -1 $output_dir/report.txt >> $output_dir/report2.txt
 355   grep revrangewhilemerg $output_dir/report.txt >> $output_dir/report2.txt
 356 fi
 357
 358 cat $output_dir/report2.txt