ceph/src/rocksdb/tools/db_crashtest.py

   1 #!/usr/bin/env python3
   2 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
   3 from __future__ import absolute_import, division, print_function, unicode_literals
   4
   5 import os
   6 import sys
   7 import time
   8 import random
   9 import re
  10 import tempfile
  11 import subprocess
  12 import shutil
  13 import argparse
  14
  15 # params overwrite priority:
  16 #   for default:
  17 #       default_params < {blackbox,whitebox}_default_params < args
  18 #   for simple:
  19 #       default_params < {blackbox,whitebox}_default_params <
  20 #       simple_default_params <
  21 #       {blackbox,whitebox}_simple_default_params < args
  22 #   for cf_consistency:
  23 #       default_params < {blackbox,whitebox}_default_params <
  24 #       cf_consistency_params < args
  25 #   for txn:
  26 #       default_params < {blackbox,whitebox}_default_params < txn_params < args
  27
  28
  29 default_params = {
  30     "acquire_snapshot_one_in": 10000,
  31     "backup_max_size": 100 * 1024 * 1024,
  32     # Consider larger number when backups considered more stable
  33     "backup_one_in": 100000,
  34     "block_size": 16384,
  35     "bloom_bits": lambda: random.choice([random.randint(0,19),
  36                                          random.lognormvariate(2.3, 1.3)]),
  37     "cache_index_and_filter_blocks": lambda: random.randint(0, 1),
  38     "cache_size": 1048576,
  39     "checkpoint_one_in": 1000000,
  40     "compression_type": lambda: random.choice(
  41         ["none", "snappy", "zlib", "bzip2", "lz4", "lz4hc", "xpress", "zstd"]),
  42     "bottommost_compression_type": lambda:
  43         "disable" if random.randint(0, 1) == 0 else
  44         random.choice(
  45             ["none", "snappy", "zlib", "bzip2", "lz4", "lz4hc", "xpress",
  46              "zstd"]),
  47     "checksum_type" : lambda: random.choice(["kCRC32c", "kxxHash", "kxxHash64"]),
  48     "compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1),
  49     "compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1),
  50     # Disabled compression_parallel_threads as the feature is not stable
  51     # lambda: random.choice([1] * 9 + [4])
  52     "compression_parallel_threads": 1,
  53     "clear_column_family_one_in": 0,
  54     "compact_files_one_in": 1000000,
  55     "compact_range_one_in": 1000000,
  56     "delpercent": 4,
  57     "delrangepercent": 1,
  58     "destroy_db_initially": 0,
  59     "enable_pipelined_write": lambda: random.randint(0, 1),
  60     "enable_compaction_filter": lambda: random.choice([0, 0, 0, 1]),
  61     "expected_values_path": lambda: setup_expected_values_file(),
  62     "flush_one_in": 1000000,
  63     "file_checksum_impl": lambda: random.choice(["none", "crc32c", "xxh64", "big"]),
  64     "get_live_files_one_in": 1000000,
  65     # Note: the following two are intentionally disabled as the corresponding
  66     # APIs are not guaranteed to succeed.
  67     "get_sorted_wal_files_one_in": 0,
  68     "get_current_wal_file_one_in": 0,
  69     # Temporarily disable hash index
  70     "index_type": lambda: random.choice([0, 0, 0, 2, 2, 3]),
  71     "iterpercent": 10,
  72     "mark_for_compaction_one_file_in": lambda: 10 * random.randint(0, 1),
  73     "max_background_compactions": 20,
  74     "max_bytes_for_level_base": 10485760,
  75     "max_key": 100000000,
  76     "max_write_buffer_number": 3,
  77     "mmap_read": lambda: random.randint(0, 1),
  78     "nooverwritepercent": 1,
  79     "open_files": lambda : random.choice([-1, -1, 100, 500000]),
  80     "optimize_filters_for_memory": lambda: random.randint(0, 1),
  81     "partition_filters": lambda: random.randint(0, 1),
  82     "partition_pinning": lambda: random.randint(0, 3),
  83     "pause_background_one_in": 1000000,
  84     "prefixpercent": 5,
  85     "progress_reports": 0,
  86     "readpercent": 45,
  87     "recycle_log_file_num": lambda: random.randint(0, 1),
  88     "reopen": 20,
  89     "snapshot_hold_ops": 100000,
  90     "sst_file_manager_bytes_per_sec": lambda: random.choice([0, 104857600]),
  91     "sst_file_manager_bytes_per_truncate": lambda: random.choice([0, 1048576]),
  92     "long_running_snapshots": lambda: random.randint(0, 1),
  93     "subcompactions": lambda: random.randint(1, 4),
  94     "target_file_size_base": 2097152,
  95     "target_file_size_multiplier": 2,
  96     "top_level_index_pinning": lambda: random.randint(0, 3),
  97     "unpartitioned_pinning": lambda: random.randint(0, 3),
  98     "use_direct_reads": lambda: random.randint(0, 1),
  99     "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
 100     "mock_direct_io": False,
 101     "use_full_merge_v1": lambda: random.randint(0, 1),
 102     "use_merge": lambda: random.randint(0, 1),
 103     "use_ribbon_filter": lambda: random.randint(0, 1),
 104     "verify_checksum": 1,
 105     "write_buffer_size": 4 * 1024 * 1024,
 106     "writepercent": 35,
 107     "format_version": lambda: random.choice([2, 3, 4, 5, 5]),
 108     "index_block_restart_interval": lambda: random.choice(range(1, 16)),
 109     "use_multiget" : lambda: random.randint(0, 1),
 110     "periodic_compaction_seconds" :
 111         lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
 112     "compaction_ttl" : lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
 113     # Test small max_manifest_file_size in a smaller chance, as most of the
 114     # time we wnat manifest history to be preserved to help debug
 115     "max_manifest_file_size" : lambda : random.choice(
 116         [t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]),
 117     # Sync mode might make test runs slower so running it in a smaller chance
 118     "sync" : lambda : random.choice(
 119         [1 if t == 0 else 0 for t in range(0, 20)]),
 120     # Disable compation_readahead_size because the test is not passing.
 121     #"compaction_readahead_size" : lambda : random.choice(
 122     #    [0, 0, 1024 * 1024]),
 123     "db_write_buffer_size" : lambda: random.choice(
 124         [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]),
 125     "avoid_unnecessary_blocking_io" : random.randint(0, 1),
 126     "write_dbid_to_manifest" : random.randint(0, 1),
 127     "avoid_flush_during_recovery" : random.choice(
 128         [1 if t == 0 else 0 for t in range(0, 8)]),
 129     "max_write_batch_group_size_bytes" : lambda: random.choice(
 130         [16, 64, 1024 * 1024, 16 * 1024 * 1024]),
 131     "level_compaction_dynamic_level_bytes" : True,
 132     "verify_checksum_one_in": 1000000,
 133     "verify_db_one_in": 100000,
 134     "continuous_verification_interval" : 0,
 135     "max_key_len": 3,
 136     "key_len_percent_dist": "1,30,69",
 137     "read_fault_one_in": lambda: random.choice([0, 1000]),
 138     "sync_fault_injection": False,
 139     "get_property_one_in": 1000000,
 140     "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
 141     "max_write_buffer_size_to_maintain": lambda: random.choice(
 142         [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]),
 143 }
 144
 145 _TEST_DIR_ENV_VAR = 'TEST_TMPDIR'
 146 _DEBUG_LEVEL_ENV_VAR = 'DEBUG_LEVEL'
 147
 148
 149 def is_release_mode():
 150     return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0"
 151
 152
 153 def get_dbname(test_name):
 154     test_dir_name = "rocksdb_crashtest_" + test_name
 155     test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
 156     if test_tmpdir is None or test_tmpdir == "":
 157         dbname = tempfile.mkdtemp(prefix=test_dir_name)
 158     else:
 159         dbname = test_tmpdir + "/" + test_dir_name
 160         shutil.rmtree(dbname, True)
 161         os.mkdir(dbname)
 162     return dbname
 163
 164 expected_values_file = None
 165 def setup_expected_values_file():
 166     global expected_values_file
 167     if expected_values_file is not None:
 168         return expected_values_file
 169     expected_file_name = "rocksdb_crashtest_" + "expected"
 170     test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
 171     if test_tmpdir is None or test_tmpdir == "":
 172         expected_values_file = tempfile.NamedTemporaryFile(
 173             prefix=expected_file_name, delete=False).name
 174     else:
 175         # if tmpdir is specified, store the expected_values_file in the same dir
 176         expected_values_file = test_tmpdir + "/" + expected_file_name
 177         if os.path.exists(expected_values_file):
 178             os.remove(expected_values_file)
 179         open(expected_values_file, 'a').close()
 180     return expected_values_file
 181
 182
 183 def is_direct_io_supported(dbname):
 184     with tempfile.NamedTemporaryFile(dir=dbname) as f:
 185         try:
 186             os.open(f.name, os.O_DIRECT)
 187         except BaseException:
 188             return False
 189         return True
 190
 191
 192 blackbox_default_params = {
 193     # total time for this script to test db_stress
 194     "duration": 6000,
 195     # time for one db_stress instance to run
 196     "interval": 120,
 197     # since we will be killing anyway, use large value for ops_per_thread
 198     "ops_per_thread": 100000000,
 199     "set_options_one_in": 10000,
 200     "test_batches_snapshots": 1,
 201 }
 202
 203 whitebox_default_params = {
 204     "duration": 10000,
 205     "log2_keys_per_lock": 10,
 206     "ops_per_thread": 200000,
 207     "random_kill_odd": 888887,
 208     "test_batches_snapshots": lambda: random.randint(0, 1),
 209 }
 210
 211 simple_default_params = {
 212     "allow_concurrent_memtable_write": lambda: random.randint(0, 1),
 213     "column_families": 1,
 214     "max_background_compactions": 1,
 215     "max_bytes_for_level_base": 67108864,
 216     "memtablerep": "skip_list",
 217     "prefixpercent": 0,
 218     "readpercent": 50,
 219     "prefix_size" : -1,
 220     "target_file_size_base": 16777216,
 221     "target_file_size_multiplier": 1,
 222     "test_batches_snapshots": 0,
 223     "write_buffer_size": 32 * 1024 * 1024,
 224     "level_compaction_dynamic_level_bytes": False,
 225     "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
 226 }
 227
 228 blackbox_simple_default_params = {
 229     "open_files": -1,
 230     "set_options_one_in": 0,
 231 }
 232
 233 whitebox_simple_default_params = {}
 234
 235 cf_consistency_params = {
 236     "disable_wal": lambda: random.randint(0, 1),
 237     "reopen": 0,
 238     "test_cf_consistency": 1,
 239     # use small value for write_buffer_size so that RocksDB triggers flush
 240     # more frequently
 241     "write_buffer_size": 1024 * 1024,
 242     "enable_pipelined_write": lambda: random.randint(0, 1),
 243     # Snapshots are used heavily in this test mode, while they are incompatible
 244     # with compaction filter.
 245     "enable_compaction_filter": 0,
 246 }
 247
 248 txn_params = {
 249     "use_txn" : 1,
 250     # Avoid lambda to set it once for the entire test
 251     "txn_write_policy": random.randint(0, 2),
 252     "unordered_write": random.randint(0, 1),
 253     "disable_wal": 0,
 254     # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
 255     "checkpoint_one_in": 0,
 256     # pipeline write is not currnetly compatible with WritePrepared txns
 257     "enable_pipelined_write": 0,
 258 }
 259
 260 best_efforts_recovery_params = {
 261     "best_efforts_recovery": True,
 262     "skip_verifydb": True,
 263     "verify_db_one_in": 0,
 264     "continuous_verification_interval": 0,
 265 }
 266
 267 def finalize_and_sanitize(src_params):
 268     dest_params = dict([(k,  v() if callable(v) else v)
 269                         for (k, v) in src_params.items()])
 270     if dest_params.get("compression_type") != "zstd" or \
 271             dest_params.get("compression_max_dict_bytes") == 0:
 272         dest_params["compression_zstd_max_train_bytes"] = 0
 273     if dest_params.get("allow_concurrent_memtable_write", 1) == 1:
 274         dest_params["memtablerep"] = "skip_list"
 275     if dest_params["mmap_read"] == 1:
 276         dest_params["use_direct_io_for_flush_and_compaction"] = 0
 277         dest_params["use_direct_reads"] = 0
 278     if (dest_params["use_direct_io_for_flush_and_compaction"] == 1
 279             or dest_params["use_direct_reads"] == 1) and \
 280             not is_direct_io_supported(dest_params["db"]):
 281         if is_release_mode():
 282             print("{} does not support direct IO. Disabling use_direct_reads and "
 283                     "use_direct_io_for_flush_and_compaction.\n".format(
 284                         dest_params["db"]))
 285             dest_params["use_direct_reads"] = 0
 286             dest_params["use_direct_io_for_flush_and_compaction"] = 0
 287         else:
 288             dest_params["mock_direct_io"] = True
 289
 290     # DeleteRange is not currnetly compatible with Txns
 291     if dest_params.get("test_batches_snapshots") == 1 or \
 292             dest_params.get("use_txn") == 1:
 293         dest_params["delpercent"] += dest_params["delrangepercent"]
 294         dest_params["delrangepercent"] = 0
 295     # Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb
 296     if dest_params.get("unordered_write", 0) == 1:
 297         dest_params["txn_write_policy"] = 1
 298         dest_params["allow_concurrent_memtable_write"] = 1
 299     if dest_params.get("disable_wal", 0) == 1:
 300         dest_params["atomic_flush"] = 1
 301         dest_params["sync"] = 0
 302     if dest_params.get("open_files", 1) != -1:
 303         # Compaction TTL and periodic compactions are only compatible
 304         # with open_files = -1
 305         dest_params["compaction_ttl"] = 0
 306         dest_params["periodic_compaction_seconds"] = 0
 307     if dest_params.get("compaction_style", 0) == 2:
 308         # Disable compaction TTL in FIFO compaction, because right
 309         # now assertion failures are triggered.
 310         dest_params["compaction_ttl"] = 0
 311         dest_params["periodic_compaction_seconds"] = 0
 312     if dest_params["partition_filters"] == 1:
 313         if dest_params["index_type"] != 2:
 314             dest_params["partition_filters"] = 0
 315         else:
 316             dest_params["use_block_based_filter"] = 0
 317     if dest_params.get("atomic_flush", 0) == 1:
 318         # disable pipelined write when atomic flush is used.
 319         dest_params["enable_pipelined_write"] = 0
 320     if dest_params.get("sst_file_manager_bytes_per_sec", 0) == 0:
 321         dest_params["sst_file_manager_bytes_per_truncate"] = 0
 322     if dest_params.get("enable_compaction_filter", 0) == 1:
 323         # Compaction filter is incompatible with snapshots. Need to avoid taking
 324         # snapshots, as well as avoid operations that use snapshots for
 325         # verification.
 326         dest_params["acquire_snapshot_one_in"] = 0
 327         dest_params["compact_range_one_in"] = 0
 328         # Give the iterator ops away to reads.
 329         dest_params["readpercent"] += dest_params.get("iterpercent", 10)
 330         dest_params["iterpercent"] = 0
 331         dest_params["test_batches_snapshots"] = 0
 332     return dest_params
 333
 334 def gen_cmd_params(args):
 335     params = {}
 336
 337     params.update(default_params)
 338     if args.test_type == 'blackbox':
 339         params.update(blackbox_default_params)
 340     if args.test_type == 'whitebox':
 341         params.update(whitebox_default_params)
 342     if args.simple:
 343         params.update(simple_default_params)
 344         if args.test_type == 'blackbox':
 345             params.update(blackbox_simple_default_params)
 346         if args.test_type == 'whitebox':
 347             params.update(whitebox_simple_default_params)
 348     if args.cf_consistency:
 349         params.update(cf_consistency_params)
 350     if args.txn:
 351         params.update(txn_params)
 352     if args.test_best_efforts_recovery:
 353         params.update(best_efforts_recovery_params)
 354
 355     for k, v in vars(args).items():
 356         if v is not None:
 357             params[k] = v
 358     return params
 359
 360
 361 def gen_cmd(params, unknown_params):
 362     finalzied_params = finalize_and_sanitize(params)
 363     cmd = ['./db_stress'] + [
 364         '--{0}={1}'.format(k, v)
 365         for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)]
 366         if k not in set(['test_type', 'simple', 'duration', 'interval',
 367                          'random_kill_odd', 'cf_consistency', 'txn',
 368                          'test_best_efforts_recovery'])
 369         and v is not None] + unknown_params
 370     return cmd
 371
 372
 373 # Inject inconsistency to db directory.
 374 def inject_inconsistencies_to_db_dir(dir_path):
 375     files = os.listdir(dir_path)
 376     file_num_rgx = re.compile(r'(?P<number>[0-9]{6})')
 377     largest_fnum = 0
 378     for f in files:
 379         m = file_num_rgx.search(f)
 380         if m and not f.startswith('LOG'):
 381             largest_fnum = max(largest_fnum, int(m.group('number')))
 382
 383     candidates = [
 384         f for f in files if re.search(r'[0-9]+\.sst', f)
 385     ]
 386     deleted = 0
 387     corrupted = 0
 388     for f in candidates:
 389         rnd = random.randint(0, 99)
 390         f_path = os.path.join(dir_path, f)
 391         if rnd < 10:
 392             os.unlink(f_path)
 393             deleted = deleted + 1
 394         elif 10 <= rnd and rnd < 30:
 395             with open(f_path, "a") as fd:
 396                 fd.write('12345678')
 397             corrupted = corrupted + 1
 398     print('Removed %d table files' % deleted)
 399     print('Corrupted %d table files' % corrupted)
 400
 401     # Add corrupted MANIFEST and SST
 402     for num in range(largest_fnum + 1, largest_fnum + 10):
 403         rnd = random.randint(0, 1)
 404         fname = ("MANIFEST-%06d" % num) if rnd == 0 else ("%06d.sst" % num)
 405         print('Write %s' % fname)
 406         with open(os.path.join(dir_path, fname), "w") as fd:
 407             fd.write("garbage")
 408
 409
 410 # This script runs and kills db_stress multiple times. It checks consistency
 411 # in case of unsafe crashes in RocksDB.
 412 def blackbox_crash_main(args, unknown_args):
 413     cmd_params = gen_cmd_params(args)
 414     dbname = get_dbname('blackbox')
 415     exit_time = time.time() + cmd_params['duration']
 416
 417     print("Running blackbox-crash-test with \n"
 418           + "interval_between_crash=" + str(cmd_params['interval']) + "\n"
 419           + "total-duration=" + str(cmd_params['duration']) + "\n")
 420
 421     while time.time() < exit_time:
 422         run_had_errors = False
 423         killtime = time.time() + cmd_params['interval']
 424
 425         cmd = gen_cmd(dict(
 426             list(cmd_params.items())
 427             + list({'db': dbname}.items())), unknown_args)
 428
 429         child = subprocess.Popen(cmd, stderr=subprocess.PIPE)
 430         print("Running db_stress with pid=%d: %s\n\n"
 431               % (child.pid, ' '.join(cmd)))
 432
 433         stop_early = False
 434         while time.time() < killtime:
 435             if child.poll() is not None:
 436                 print("WARNING: db_stress ended before kill: exitcode=%d\n"
 437                       % child.returncode)
 438                 stop_early = True
 439                 break
 440             time.sleep(1)
 441
 442         if not stop_early:
 443             if child.poll() is not None:
 444                 print("WARNING: db_stress ended before kill: exitcode=%d\n"
 445                       % child.returncode)
 446             else:
 447                 child.kill()
 448                 print("KILLED %d\n" % child.pid)
 449                 time.sleep(1)  # time to stabilize after a kill
 450
 451         while True:
 452             line = child.stderr.readline().strip().decode('utf-8')
 453             if line == '':
 454                 break
 455             elif not line.startswith('WARNING'):
 456                 run_had_errors = True
 457                 print('stderr has error message:')
 458                 print('***' + line + '***')
 459
 460         if run_had_errors:
 461             sys.exit(2)
 462
 463         time.sleep(1)  # time to stabilize before the next run
 464
 465         if args.test_best_efforts_recovery:
 466             inject_inconsistencies_to_db_dir(dbname)
 467
 468         time.sleep(1)  # time to stabilize before the next run
 469
 470     # we need to clean up after ourselves -- only do this on test success
 471     shutil.rmtree(dbname, True)
 472
 473
 474 # This python script runs db_stress multiple times. Some runs with
 475 # kill_random_test that causes rocksdb to crash at various points in code.
 476 def whitebox_crash_main(args, unknown_args):
 477     cmd_params = gen_cmd_params(args)
 478     dbname = get_dbname('whitebox')
 479
 480     cur_time = time.time()
 481     exit_time = cur_time + cmd_params['duration']
 482     half_time = cur_time + cmd_params['duration'] // 2
 483
 484     print("Running whitebox-crash-test with \n"
 485           + "total-duration=" + str(cmd_params['duration']) + "\n")
 486
 487     total_check_mode = 4
 488     check_mode = 0
 489     kill_random_test = cmd_params['random_kill_odd']
 490     kill_mode = 0
 491
 492     while time.time() < exit_time:
 493         if check_mode == 0:
 494             additional_opts = {
 495                 # use large ops per thread since we will kill it anyway
 496                 "ops_per_thread": 100 * cmd_params['ops_per_thread'],
 497             }
 498             # run with kill_random_test, with three modes.
 499             # Mode 0 covers all kill points. Mode 1 covers less kill points but
 500             # increases change of triggering them. Mode 2 covers even less
 501             # frequent kill points and further increases triggering change.
 502             if kill_mode == 0:
 503                 additional_opts.update({
 504                     "kill_random_test": kill_random_test,
 505                 })
 506             elif kill_mode == 1:
 507                 if cmd_params.get('disable_wal', 0) == 1:
 508                     my_kill_odd = kill_random_test // 50 + 1
 509                 else:
 510                     my_kill_odd = kill_random_test // 10 + 1
 511                 additional_opts.update({
 512                     "kill_random_test": my_kill_odd,
 513                     "kill_exclude_prefixes": "WritableFileWriter::Append,"
 514                     + "WritableFileWriter::WriteBuffered",
 515                 })
 516             elif kill_mode == 2:
 517                 # TODO: May need to adjust random odds if kill_random_test
 518                 # is too small.
 519                 additional_opts.update({
 520                     "kill_random_test": (kill_random_test // 5000 + 1),
 521                     "kill_exclude_prefixes": "WritableFileWriter::Append,"
 522                     "WritableFileWriter::WriteBuffered,"
 523                     "PosixMmapFile::Allocate,WritableFileWriter::Flush",
 524                 })
 525             # Run kill mode 0, 1 and 2 by turn.
 526             kill_mode = (kill_mode + 1) % 3
 527         elif check_mode == 1:
 528             # normal run with universal compaction mode
 529             additional_opts = {
 530                 "kill_random_test": None,
 531                 "ops_per_thread": cmd_params['ops_per_thread'],
 532                 "compaction_style": 1,
 533             }
 534             # Single level universal has a lot of special logic. Ensure we cover
 535             # it sometimes.
 536             if random.randint(0, 1) == 1:
 537                 additional_opts.update({
 538                     "num_levels": 1,
 539                 })
 540         elif check_mode == 2:
 541             # normal run with FIFO compaction mode
 542             # ops_per_thread is divided by 5 because FIFO compaction
 543             # style is quite a bit slower on reads with lot of files
 544             additional_opts = {
 545                 "kill_random_test": None,
 546                 "ops_per_thread": cmd_params['ops_per_thread'] // 5,
 547                 "compaction_style": 2,
 548             }
 549         else:
 550             # normal run
 551             additional_opts = {
 552                 "kill_random_test": None,
 553                 "ops_per_thread": cmd_params['ops_per_thread'],
 554             }
 555
 556         cmd = gen_cmd(dict(list(cmd_params.items())
 557             + list(additional_opts.items())
 558             + list({'db': dbname}.items())), unknown_args)
 559
 560         print("Running:" + ' '.join(cmd) + "\n")  # noqa: E999 T25377293 Grandfathered in
 561
 562         popen = subprocess.Popen(cmd, stdout=subprocess.PIPE,
 563                                  stderr=subprocess.STDOUT)
 564         stdoutdata, stderrdata = popen.communicate()
 565         if stdoutdata:
 566             stdoutdata = stdoutdata.decode('utf-8')
 567         if stderrdata:
 568             stderrdata = stderrdata.decode('utf-8')
 569         retncode = popen.returncode
 570         msg = ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
 571                check_mode, additional_opts['kill_random_test'], retncode))
 572         print(msg)
 573         print(stdoutdata)
 574
 575         expected = False
 576         if additional_opts['kill_random_test'] is None and (retncode == 0):
 577             # we expect zero retncode if no kill option
 578             expected = True
 579         elif additional_opts['kill_random_test'] is not None and retncode <= 0:
 580             # When kill option is given, the test MIGHT kill itself.
 581             # If it does, negative retncode is expected. Otherwise 0.
 582             expected = True
 583
 584         if not expected:
 585             print("TEST FAILED. See kill option and exit code above!!!\n")
 586             sys.exit(1)
 587
 588         stdoutdata = stdoutdata.lower()
 589         errorcount = (stdoutdata.count('error') -
 590                       stdoutdata.count('got errors 0 times'))
 591         print("#times error occurred in output is " + str(errorcount) + "\n")
 592
 593         if (errorcount > 0):
 594             print("TEST FAILED. Output has 'error'!!!\n")
 595             sys.exit(2)
 596         if (stdoutdata.find('fail') >= 0):
 597             print("TEST FAILED. Output has 'fail'!!!\n")
 598             sys.exit(2)
 599
 600         # First half of the duration, keep doing kill test. For the next half,
 601         # try different modes.
 602         if time.time() > half_time:
 603             # we need to clean up after ourselves -- only do this on test
 604             # success
 605             shutil.rmtree(dbname, True)
 606             os.mkdir(dbname)
 607             cmd_params.pop('expected_values_path', None)
 608             check_mode = (check_mode + 1) % total_check_mode
 609
 610         time.sleep(1)  # time to stabilize after a kill
 611
 612
 613 def main():
 614     parser = argparse.ArgumentParser(description="This script runs and kills \
 615         db_stress multiple times")
 616     parser.add_argument("test_type", choices=["blackbox", "whitebox"])
 617     parser.add_argument("--simple", action="store_true")
 618     parser.add_argument("--cf_consistency", action='store_true')
 619     parser.add_argument("--txn", action='store_true')
 620     parser.add_argument("--test_best_efforts_recovery", action='store_true')
 621
 622     all_params = dict(list(default_params.items())
 623                       + list(blackbox_default_params.items())
 624                       + list(whitebox_default_params.items())
 625                       + list(simple_default_params.items())
 626                       + list(blackbox_simple_default_params.items())
 627                       + list(whitebox_simple_default_params.items()))
 628
 629     for k, v in all_params.items():
 630         parser.add_argument("--" + k, type=type(v() if callable(v) else v))
 631     # unknown_args are passed directly to db_stress
 632     args, unknown_args = parser.parse_known_args()
 633
 634     test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
 635     if test_tmpdir is not None and not os.path.isdir(test_tmpdir):
 636         print('%s env var is set to a non-existent directory: %s' %
 637                 (_TEST_DIR_ENV_VAR, test_tmpdir))
 638         sys.exit(1)
 639
 640     if args.test_type == 'blackbox':
 641         blackbox_crash_main(args, unknown_args)
 642     if args.test_type == 'whitebox':
 643         whitebox_crash_main(args, unknown_args)
 644     # Only delete the `expected_values_file` if test passes
 645     if os.path.exists(expected_values_file):
 646         os.remove(expected_values_file)
 647
 648
 649 if __name__ == '__main__':
 650     main()