ceph/src/rocksdb/tools/db_crashtest.py

   1 #!/usr/bin/env python3
   2 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
   3 from __future__ import absolute_import, division, print_function, unicode_literals
   4
   5 import argparse
   6
   7 import os
   8 import random
   9 import shutil
  10 import subprocess
  11 import sys
  12 import tempfile
  13 import time
  14
  15 # params overwrite priority:
  16 #   for default:
  17 #       default_params < {blackbox,whitebox}_default_params < args
  18 #   for simple:
  19 #       default_params < {blackbox,whitebox}_default_params <
  20 #       simple_default_params <
  21 #       {blackbox,whitebox}_simple_default_params < args
  22 #   for cf_consistency:
  23 #       default_params < {blackbox,whitebox}_default_params <
  24 #       cf_consistency_params < args
  25 #   for txn:
  26 #       default_params < {blackbox,whitebox}_default_params < txn_params < args
  27 #   for ts:
  28 #       default_params < {blackbox,whitebox}_default_params < ts_params < args
  29 #   for multiops_txn:
  30 #       default_params < {blackbox,whitebox}_default_params < multiops_txn_params < args
  31
  32
  33 default_params = {
  34     "acquire_snapshot_one_in": 10000,
  35     "backup_max_size": 100 * 1024 * 1024,
  36     # Consider larger number when backups considered more stable
  37     "backup_one_in": 100000,
  38     "batch_protection_bytes_per_key": lambda: random.choice([0, 8]),
  39     "memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
  40     "block_size": 16384,
  41     "bloom_bits": lambda: random.choice(
  42         [random.randint(0, 19), random.lognormvariate(2.3, 1.3)]
  43     ),
  44     "cache_index_and_filter_blocks": lambda: random.randint(0, 1),
  45     "cache_size": 8388608,
  46     "charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]),
  47     "charge_filter_construction": lambda: random.choice([0, 1]),
  48     "charge_table_reader": lambda: random.choice([0, 1]),
  49     "charge_file_metadata": lambda: random.choice([0, 1]),
  50     "checkpoint_one_in": 1000000,
  51     "compression_type": lambda: random.choice(
  52         ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]
  53     ),
  54     "bottommost_compression_type": lambda: "disable"
  55     if random.randint(0, 1) == 0
  56     else random.choice(["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
  57     "checksum_type": lambda: random.choice(
  58         ["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]
  59     ),
  60     "compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1),
  61     "compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1),
  62     # Disabled compression_parallel_threads as the feature is not stable
  63     # lambda: random.choice([1] * 9 + [4])
  64     "compression_parallel_threads": 1,
  65     "compression_max_dict_buffer_bytes": lambda: (1 << random.randint(0, 40)) - 1,
  66     "compression_use_zstd_dict_trainer": lambda: random.randint(0, 1),
  67     "clear_column_family_one_in": 0,
  68     "compact_files_one_in": 1000000,
  69     "compact_range_one_in": 1000000,
  70     "compaction_pri": random.randint(0, 4),
  71     "data_block_index_type": lambda: random.choice([0, 1]),
  72     "delpercent": 4,
  73     "delrangepercent": 1,
  74     "destroy_db_initially": 0,
  75     "enable_pipelined_write": lambda: random.randint(0, 1),
  76     "enable_compaction_filter": lambda: random.choice([0, 0, 0, 1]),
  77     "expected_values_dir": lambda: setup_expected_values_dir(),
  78     "fail_if_options_file_error": lambda: random.randint(0, 1),
  79     "flush_one_in": 1000000,
  80     "manual_wal_flush_one_in": lambda: random.choice([0, 0, 1000, 1000000]),
  81     "file_checksum_impl": lambda: random.choice(["none", "crc32c", "xxh64", "big"]),
  82     "get_live_files_one_in": 1000000,
  83     # Note: the following two are intentionally disabled as the corresponding
  84     # APIs are not guaranteed to succeed.
  85     "get_sorted_wal_files_one_in": 0,
  86     "get_current_wal_file_one_in": 0,
  87     # Temporarily disable hash index
  88     "index_type": lambda: random.choice([0, 0, 0, 2, 2, 3]),
  89     "ingest_external_file_one_in": 1000000,
  90     "iterpercent": 10,
  91     "mark_for_compaction_one_file_in": lambda: 10 * random.randint(0, 1),
  92     "max_background_compactions": 20,
  93     "max_bytes_for_level_base": 10485760,
  94     "max_key": 25000000,
  95     "max_write_buffer_number": 3,
  96     "mmap_read": lambda: random.randint(0, 1),
  97     # Setting `nooverwritepercent > 0` is only possible because we do not vary
  98     # the random seed, so the same keys are chosen by every run for disallowing
  99     # overwrites.
 100     "nooverwritepercent": 1,
 101     "open_files": lambda: random.choice([-1, -1, 100, 500000]),
 102     "optimize_filters_for_memory": lambda: random.randint(0, 1),
 103     "partition_filters": lambda: random.randint(0, 1),
 104     "partition_pinning": lambda: random.randint(0, 3),
 105     "pause_background_one_in": 1000000,
 106     "prefix_size": lambda: random.choice([-1, 1, 5, 7, 8]),
 107     "prefixpercent": 5,
 108     "progress_reports": 0,
 109     "readpercent": 45,
 110     "recycle_log_file_num": lambda: random.randint(0, 1),
 111     "snapshot_hold_ops": 100000,
 112     "sst_file_manager_bytes_per_sec": lambda: random.choice([0, 104857600]),
 113     "sst_file_manager_bytes_per_truncate": lambda: random.choice([0, 1048576]),
 114     "long_running_snapshots": lambda: random.randint(0, 1),
 115     "subcompactions": lambda: random.randint(1, 4),
 116     "target_file_size_base": 2097152,
 117     "target_file_size_multiplier": 2,
 118     "test_batches_snapshots": random.randint(0, 1),
 119     "top_level_index_pinning": lambda: random.randint(0, 3),
 120     "unpartitioned_pinning": lambda: random.randint(0, 3),
 121     "use_direct_reads": lambda: random.randint(0, 1),
 122     "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
 123     "mock_direct_io": False,
 124     "cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]),
 125     "use_full_merge_v1": lambda: random.randint(0, 1),
 126     "use_merge": lambda: random.randint(0, 1),
 127     # use_put_entity_one_in has to be the same across invocations for verification to work, hence no lambda
 128     "use_put_entity_one_in": random.choice([0] * 7 + [1, 5, 10]),
 129     # 999 -> use Bloom API
 130     "ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]),
 131     "value_size_mult": 32,
 132     "verify_checksum": 1,
 133     "write_buffer_size": 4 * 1024 * 1024,
 134     "writepercent": 35,
 135     "format_version": lambda: random.choice([2, 3, 4, 5, 5]),
 136     "index_block_restart_interval": lambda: random.choice(range(1, 16)),
 137     "use_multiget": lambda: random.randint(0, 1),
 138     "periodic_compaction_seconds": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
 139     # 0 = never (used by some), 10 = often (for threading bugs), 600 = default
 140     "stats_dump_period_sec": lambda: random.choice([0, 10, 600]),
 141     "compaction_ttl": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
 142     # Test small max_manifest_file_size in a smaller chance, as most of the
 143     # time we wnat manifest history to be preserved to help debug
 144     "max_manifest_file_size": lambda: random.choice(
 145         [t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]
 146     ),
 147     # Sync mode might make test runs slower so running it in a smaller chance
 148     "sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]),
 149     "bytes_per_sync": lambda: random.choice([0, 262144]),
 150     "wal_bytes_per_sync": lambda: random.choice([0, 524288]),
 151     # Disable compaction_readahead_size because the test is not passing.
 152     # "compaction_readahead_size" : lambda : random.choice(
 153     #    [0, 0, 1024 * 1024]),
 154     "db_write_buffer_size": lambda: random.choice(
 155         [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]
 156     ),
 157     "avoid_unnecessary_blocking_io": random.randint(0, 1),
 158     "write_dbid_to_manifest": random.randint(0, 1),
 159     "avoid_flush_during_recovery": lambda: random.choice(
 160         [1 if t == 0 else 0 for t in range(0, 8)]
 161     ),
 162     "max_write_batch_group_size_bytes": lambda: random.choice(
 163         [16, 64, 1024 * 1024, 16 * 1024 * 1024]
 164     ),
 165     "level_compaction_dynamic_level_bytes": True,
 166     "verify_checksum_one_in": 1000000,
 167     "verify_db_one_in": 100000,
 168     "continuous_verification_interval": 0,
 169     "max_key_len": 3,
 170     "key_len_percent_dist": "1,30,69",
 171     "read_fault_one_in": lambda: random.choice([0, 32, 1000]),
 172     "open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]),
 173     "open_write_fault_one_in": lambda: random.choice([0, 0, 16]),
 174     "open_read_fault_one_in": lambda: random.choice([0, 0, 32]),
 175     "sync_fault_injection": lambda: random.randint(0, 1),
 176     "get_property_one_in": 1000000,
 177     "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
 178     "max_write_buffer_size_to_maintain": lambda: random.choice(
 179         [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]
 180     ),
 181     "user_timestamp_size": 0,
 182     "secondary_cache_fault_one_in": lambda: random.choice([0, 0, 32]),
 183     "prepopulate_block_cache": lambda: random.choice([0, 1]),
 184     "memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]),
 185     "memtable_whole_key_filtering": lambda: random.randint(0, 1),
 186     "detect_filter_construct_corruption": lambda: random.choice([0, 1]),
 187     "adaptive_readahead": lambda: random.choice([0, 1]),
 188     "async_io": lambda: random.choice([0, 1]),
 189     "wal_compression": lambda: random.choice(["none", "zstd"]),
 190     "verify_sst_unique_id_in_manifest": 1,  # always do unique_id verification
 191     "secondary_cache_uri": lambda: random.choice(
 192         [
 193             "",
 194             "compressed_secondary_cache://capacity=8388608",
 195             "compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true",
 196         ]
 197     ),
 198     "allow_data_in_errors": True,
 199     "readahead_size": lambda: random.choice([0, 16384, 524288]),
 200     "initial_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
 201     "max_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
 202     "num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]),
 203     "min_write_buffer_number_to_merge": lambda: random.choice([1, 2]),
 204     "preserve_internal_time_seconds": lambda: random.choice([0, 60, 3600, 36000]),
 205 }
 206
 207 _TEST_DIR_ENV_VAR = "TEST_TMPDIR"
 208 _DEBUG_LEVEL_ENV_VAR = "DEBUG_LEVEL"
 209
 210 stress_cmd = "./db_stress"
 211 cleanup_cmd = None
 212
 213
 214 def is_release_mode():
 215     return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0"
 216
 217
 218 def get_dbname(test_name):
 219     test_dir_name = "rocksdb_crashtest_" + test_name
 220     test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
 221     if test_tmpdir is None or test_tmpdir == "":
 222         dbname = tempfile.mkdtemp(prefix=test_dir_name)
 223     else:
 224         dbname = test_tmpdir + "/" + test_dir_name
 225         shutil.rmtree(dbname, True)
 226         if cleanup_cmd is not None:
 227             print("Running DB cleanup command - %s\n" % cleanup_cmd)
 228             # Ignore failure
 229             os.system(cleanup_cmd)
 230         os.mkdir(dbname)
 231     return dbname
 232
 233
 234 expected_values_dir = None
 235
 236
 237 def setup_expected_values_dir():
 238     global expected_values_dir
 239     if expected_values_dir is not None:
 240         return expected_values_dir
 241     expected_dir_prefix = "rocksdb_crashtest_expected_"
 242     test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
 243     if test_tmpdir is None or test_tmpdir == "":
 244         expected_values_dir = tempfile.mkdtemp(prefix=expected_dir_prefix)
 245     else:
 246         # if tmpdir is specified, store the expected_values_dir under that dir
 247         expected_values_dir = test_tmpdir + "/rocksdb_crashtest_expected"
 248         if os.path.exists(expected_values_dir):
 249             shutil.rmtree(expected_values_dir)
 250         os.mkdir(expected_values_dir)
 251     return expected_values_dir
 252
 253
 254 multiops_txn_key_spaces_file = None
 255
 256
 257 def setup_multiops_txn_key_spaces_file():
 258     global multiops_txn_key_spaces_file
 259     if multiops_txn_key_spaces_file is not None:
 260         return multiops_txn_key_spaces_file
 261     key_spaces_file_prefix = "rocksdb_crashtest_multiops_txn_key_spaces"
 262     test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
 263     if test_tmpdir is None or test_tmpdir == "":
 264         multiops_txn_key_spaces_file = tempfile.mkstemp(prefix=key_spaces_file_prefix)[
 265             1
 266         ]
 267     else:
 268         if not os.path.exists(test_tmpdir):
 269             os.mkdir(test_tmpdir)
 270         multiops_txn_key_spaces_file = tempfile.mkstemp(
 271             prefix=key_spaces_file_prefix, dir=test_tmpdir
 272         )[1]
 273     return multiops_txn_key_spaces_file
 274
 275
 276 def is_direct_io_supported(dbname):
 277     with tempfile.NamedTemporaryFile(dir=dbname) as f:
 278         try:
 279             os.open(f.name, os.O_DIRECT)
 280         except BaseException:
 281             return False
 282         return True
 283
 284
 285 blackbox_default_params = {
 286     "disable_wal": lambda: random.choice([0, 0, 0, 1]),
 287     # total time for this script to test db_stress
 288     "duration": 6000,
 289     # time for one db_stress instance to run
 290     "interval": 120,
 291     # since we will be killing anyway, use large value for ops_per_thread
 292     "ops_per_thread": 100000000,
 293     "reopen": 0,
 294     "set_options_one_in": 10000,
 295 }
 296
 297 whitebox_default_params = {
 298     # TODO: enable this once we figure out how to adjust kill odds for WAL-
 299     # disabled runs, and either (1) separate full `db_stress` runs out of
 300     # whitebox crash or (2) support verification at end of `db_stress` runs
 301     # that ran with WAL disabled.
 302     "disable_wal": 0,
 303     "duration": 10000,
 304     "log2_keys_per_lock": 10,
 305     "ops_per_thread": 200000,
 306     "random_kill_odd": 888887,
 307     "reopen": 20,
 308 }
 309
 310 simple_default_params = {
 311     "allow_concurrent_memtable_write": lambda: random.randint(0, 1),
 312     "column_families": 1,
 313     # TODO: re-enable once internal task T124324915 is fixed.
 314     # "experimental_mempurge_threshold": lambda: 10.0*random.random(),
 315     "max_background_compactions": 1,
 316     "max_bytes_for_level_base": 67108864,
 317     "memtablerep": "skip_list",
 318     "target_file_size_base": 16777216,
 319     "target_file_size_multiplier": 1,
 320     "test_batches_snapshots": 0,
 321     "write_buffer_size": 32 * 1024 * 1024,
 322     "level_compaction_dynamic_level_bytes": False,
 323     "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
 324     "verify_iterator_with_expected_state_one_in": 5,  # this locks a range of keys
 325 }
 326
 327 blackbox_simple_default_params = {
 328     "open_files": -1,
 329     "set_options_one_in": 0,
 330 }
 331
 332 whitebox_simple_default_params = {}
 333
 334 cf_consistency_params = {
 335     "disable_wal": lambda: random.randint(0, 1),
 336     "reopen": 0,
 337     "test_cf_consistency": 1,
 338     # use small value for write_buffer_size so that RocksDB triggers flush
 339     # more frequently
 340     "write_buffer_size": 1024 * 1024,
 341     "enable_pipelined_write": lambda: random.randint(0, 1),
 342     # Snapshots are used heavily in this test mode, while they are incompatible
 343     # with compaction filter.
 344     "enable_compaction_filter": 0,
 345     # `CfConsistencyStressTest::TestIngestExternalFile()` is not implemented.
 346     "ingest_external_file_one_in": 0,
 347 }
 348
 349 txn_params = {
 350     "use_txn": 1,
 351     # Avoid lambda to set it once for the entire test
 352     "txn_write_policy": random.randint(0, 2),
 353     "unordered_write": random.randint(0, 1),
 354     # TODO: there is such a thing as transactions with WAL disabled. We should
 355     # cover that case.
 356     "disable_wal": 0,
 357     # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
 358     "checkpoint_one_in": 0,
 359     # pipeline write is not currnetly compatible with WritePrepared txns
 360     "enable_pipelined_write": 0,
 361     "create_timestamped_snapshot_one_in": random.choice([0, 20]),
 362     # PutEntity in transactions is not yet implemented
 363     "use_put_entity_one_in" : 0,
 364 }
 365
 366 best_efforts_recovery_params = {
 367     "best_efforts_recovery": 1,
 368     "atomic_flush": 0,
 369     "disable_wal": 1,
 370     "column_families": 1,
 371 }
 372
 373 blob_params = {
 374     "allow_setting_blob_options_dynamically": 1,
 375     # Enable blob files and GC with a 75% chance initially; note that they might still be
 376     # enabled/disabled during the test via SetOptions
 377     "enable_blob_files": lambda: random.choice([0] + [1] * 3),
 378     "min_blob_size": lambda: random.choice([0, 8, 16]),
 379     "blob_file_size": lambda: random.choice([1048576, 16777216, 268435456, 1073741824]),
 380     "blob_compression_type": lambda: random.choice(["none", "snappy", "lz4", "zstd"]),
 381     "enable_blob_garbage_collection": lambda: random.choice([0] + [1] * 3),
 382     "blob_garbage_collection_age_cutoff": lambda: random.choice(
 383         [0.0, 0.25, 0.5, 0.75, 1.0]
 384     ),
 385     "blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]),
 386     "blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]),
 387     "blob_file_starting_level": lambda: random.choice(
 388         [0] * 4 + [1] * 3 + [2] * 2 + [3]
 389     ),
 390     "use_blob_cache": lambda: random.randint(0, 1),
 391     "use_shared_block_and_blob_cache": lambda: random.randint(0, 1),
 392     "blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]),
 393     "prepopulate_blob_cache": lambda: random.randint(0, 1),
 394 }
 395
 396 ts_params = {
 397     "test_cf_consistency": 0,
 398     "test_batches_snapshots": 0,
 399     "user_timestamp_size": 8,
 400     "use_merge": 0,
 401     "use_full_merge_v1": 0,
 402     "use_txn": 0,
 403     "enable_blob_files": 0,
 404     "use_blob_db": 0,
 405     "ingest_external_file_one_in": 0,
 406     # PutEntity with timestamps is not yet implemented
 407     "use_put_entity_one_in" : 0,
 408 }
 409
 410 tiered_params = {
 411     "enable_tiered_storage": 1,
 412     # Set tiered compaction hot data time as: 1 minute, 1 hour, 10 hour
 413     "preclude_last_level_data_seconds": lambda: random.choice([60, 3600, 36000]),
 414     # only test universal compaction for now, level has known issue of
 415     # endless compaction
 416     "compaction_style": 1,
 417     # tiered storage doesn't support blob db yet
 418     "enable_blob_files": 0,
 419     "use_blob_db": 0,
 420 }
 421
 422 multiops_txn_default_params = {
 423     "test_cf_consistency": 0,
 424     "test_batches_snapshots": 0,
 425     "test_multi_ops_txns": 1,
 426     "use_txn": 1,
 427     "two_write_queues": lambda: random.choice([0, 1]),
 428     # TODO: enable write-prepared
 429     "disable_wal": 0,
 430     "use_only_the_last_commit_time_batch_for_recovery": lambda: random.choice([0, 1]),
 431     "clear_column_family_one_in": 0,
 432     "column_families": 1,
 433     "enable_pipelined_write": lambda: random.choice([0, 1]),
 434     # This test already acquires snapshots in reads
 435     "acquire_snapshot_one_in": 0,
 436     "backup_one_in": 0,
 437     "writepercent": 0,
 438     "delpercent": 0,
 439     "delrangepercent": 0,
 440     "customopspercent": 80,
 441     "readpercent": 5,
 442     "iterpercent": 15,
 443     "prefixpercent": 0,
 444     "verify_db_one_in": 1000,
 445     "continuous_verification_interval": 1000,
 446     "delay_snapshot_read_one_in": 3,
 447     # 65536 is the smallest possible value for write_buffer_size. Smaller
 448     # values will be sanitized to 65536 during db open. SetOptions currently
 449     # does not sanitize options, but very small write_buffer_size may cause
 450     # assertion failure in
 451     # https://github.com/facebook/rocksdb/blob/7.0.fb/db/memtable.cc#L117.
 452     "write_buffer_size": 65536,
 453     # flush more frequently to generate more files, thus trigger more
 454     # compactions.
 455     "flush_one_in": 1000,
 456     "key_spaces_path": setup_multiops_txn_key_spaces_file(),
 457     "rollback_one_in": 4,
 458     # Re-enable once we have a compaction for MultiOpsTxnStressTest
 459     "enable_compaction_filter": 0,
 460     "create_timestamped_snapshot_one_in": 50,
 461     "sync_fault_injection": 0,
 462     # PutEntity in transactions is not yet implemented
 463     "use_put_entity_one_in" : 0,
 464 }
 465
 466 multiops_wc_txn_params = {
 467     "txn_write_policy": 0,
 468     # TODO re-enable pipelined write. Not well tested atm
 469     "enable_pipelined_write": 0,
 470 }
 471
 472 multiops_wp_txn_params = {
 473     "txn_write_policy": 1,
 474     "wp_snapshot_cache_bits": 1,
 475     # try small wp_commit_cache_bits, e.g. 0 once we explore storing full
 476     # commit sequence numbers in commit cache
 477     "wp_commit_cache_bits": 10,
 478     # pipeline write is not currnetly compatible with WritePrepared txns
 479     "enable_pipelined_write": 0,
 480     # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
 481     "checkpoint_one_in": 0,
 482     # Required to be 1 in order to use commit-time-batch
 483     "use_only_the_last_commit_time_batch_for_recovery": 1,
 484     "clear_wp_commit_cache_one_in": 10,
 485     "create_timestamped_snapshot_one_in": 0,
 486 }
 487
 488
 489 def finalize_and_sanitize(src_params):
 490     dest_params = {k: v() if callable(v) else v for (k, v) in src_params.items()}
 491     if is_release_mode():
 492         dest_params["read_fault_one_in"] = 0
 493     if dest_params.get("compression_max_dict_bytes") == 0:
 494         dest_params["compression_zstd_max_train_bytes"] = 0
 495         dest_params["compression_max_dict_buffer_bytes"] = 0
 496     if dest_params.get("compression_type") != "zstd":
 497         dest_params["compression_zstd_max_train_bytes"] = 0
 498     if dest_params.get("allow_concurrent_memtable_write", 1) == 1:
 499         dest_params["memtablerep"] = "skip_list"
 500     if dest_params["mmap_read"] == 1:
 501         dest_params["use_direct_io_for_flush_and_compaction"] = 0
 502         dest_params["use_direct_reads"] = 0
 503         if dest_params["file_checksum_impl"] != "none":
 504             # TODO(T109283569): there is a bug in `GenerateOneFileChecksum()`,
 505             # used by `IngestExternalFile()`, causing it to fail with mmap
 506             # reads. Remove this once it is fixed.
 507             dest_params["ingest_external_file_one_in"] = 0
 508     if (
 509         dest_params["use_direct_io_for_flush_and_compaction"] == 1
 510         or dest_params["use_direct_reads"] == 1
 511     ) and not is_direct_io_supported(dest_params["db"]):
 512         if is_release_mode():
 513             print(
 514                 "{} does not support direct IO. Disabling use_direct_reads and "
 515                 "use_direct_io_for_flush_and_compaction.\n".format(dest_params["db"])
 516             )
 517             dest_params["use_direct_reads"] = 0
 518             dest_params["use_direct_io_for_flush_and_compaction"] = 0
 519         else:
 520             dest_params["mock_direct_io"] = True
 521
 522     if dest_params["test_batches_snapshots"] == 1:
 523         dest_params["enable_compaction_filter"] = 0
 524         if dest_params["prefix_size"] < 0:
 525             dest_params["prefix_size"] = 1
 526
 527     # Multi-key operations are not currently compatible with transactions or
 528     # timestamp.
 529     if (dest_params.get("test_batches_snapshots") == 1 or
 530         dest_params.get("use_txn") == 1 or
 531         dest_params.get("user_timestamp_size") > 0):
 532         dest_params["ingest_external_file_one_in"] = 0
 533     if (dest_params.get("test_batches_snapshots") == 1 or
 534         dest_params.get("use_txn") == 1):
 535         dest_params["delpercent"] += dest_params["delrangepercent"]
 536         dest_params["delrangepercent"] = 0
 537     if (
 538         dest_params.get("disable_wal") == 1
 539         or dest_params.get("sync_fault_injection") == 1
 540         or dest_params.get("manual_wal_flush_one_in") > 0
 541     ):
 542         # File ingestion does not guarantee prefix-recoverability when unsynced
 543         # data can be lost. Ingesting a file syncs data immediately that is
 544         # newer than unsynced memtable data that can be lost on restart.
 545         #
 546         # Even if the above issue is fixed or worked around, our
 547         # trace-and-replay does not trace file ingestion, so in its current form
 548         # it would not recover the expected state to the correct point in time.
 549         dest_params["ingest_external_file_one_in"] = 0
 550         # The `DbStressCompactionFilter` can apply memtable updates to SST
 551         # files, which would be problematic when unsynced data can be lost in
 552         # crash recoveries.
 553         dest_params["enable_compaction_filter"] = 0
 554     # Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb
 555     if dest_params.get("unordered_write", 0) == 1:
 556         dest_params["txn_write_policy"] = 1
 557         dest_params["allow_concurrent_memtable_write"] = 1
 558     if dest_params.get("disable_wal", 0) == 1:
 559         dest_params["atomic_flush"] = 1
 560         dest_params["sync"] = 0
 561         dest_params["write_fault_one_in"] = 0
 562     if dest_params.get("open_files", 1) != -1:
 563         # Compaction TTL and periodic compactions are only compatible
 564         # with open_files = -1
 565         dest_params["compaction_ttl"] = 0
 566         dest_params["periodic_compaction_seconds"] = 0
 567     if dest_params.get("compaction_style", 0) == 2:
 568         # Disable compaction TTL in FIFO compaction, because right
 569         # now assertion failures are triggered.
 570         dest_params["compaction_ttl"] = 0
 571         dest_params["periodic_compaction_seconds"] = 0
 572     if dest_params["partition_filters"] == 1:
 573         if dest_params["index_type"] != 2:
 574             dest_params["partition_filters"] = 0
 575     if dest_params.get("atomic_flush", 0) == 1:
 576         # disable pipelined write when atomic flush is used.
 577         dest_params["enable_pipelined_write"] = 0
 578     if dest_params.get("sst_file_manager_bytes_per_sec", 0) == 0:
 579         dest_params["sst_file_manager_bytes_per_truncate"] = 0
 580     if dest_params.get("enable_compaction_filter", 0) == 1:
 581         # Compaction filter is incompatible with snapshots. Need to avoid taking
 582         # snapshots, as well as avoid operations that use snapshots for
 583         # verification.
 584         dest_params["acquire_snapshot_one_in"] = 0
 585         dest_params["compact_range_one_in"] = 0
 586         # Give the iterator ops away to reads.
 587         dest_params["readpercent"] += dest_params.get("iterpercent", 10)
 588         dest_params["iterpercent"] = 0
 589     if dest_params.get("prefix_size") == -1:
 590         dest_params["readpercent"] += dest_params.get("prefixpercent", 20)
 591         dest_params["prefixpercent"] = 0
 592     if (
 593         dest_params.get("prefix_size") == -1
 594         and dest_params.get("memtable_whole_key_filtering") == 0
 595     ):
 596         dest_params["memtable_prefix_bloom_size_ratio"] = 0
 597     if dest_params.get("two_write_queues") == 1:
 598         dest_params["enable_pipelined_write"] = 0
 599     if dest_params.get("best_efforts_recovery") == 1:
 600         dest_params["disable_wal"] = 1
 601         dest_params["atomic_flush"] = 0
 602         dest_params["enable_compaction_filter"] = 0
 603         dest_params["sync"] = 0
 604         dest_params["write_fault_one_in"] = 0
 605     if dest_params["secondary_cache_uri"] != "":
 606         # Currently the only cache type compatible with a secondary cache is LRUCache
 607         dest_params["cache_type"] = "lru_cache"
 608     # Remove the following once write-prepared/write-unprepared with/without
 609     # unordered write supports timestamped snapshots
 610     if dest_params.get("create_timestamped_snapshot_one_in", 0) > 0:
 611         dest_params["txn_write_policy"] = 0
 612         dest_params["unordered_write"] = 0
 613     # For TransactionDB, correctness testing with unsync data loss is currently
 614     # compatible with only write committed policy
 615     if (dest_params.get("use_txn") == 1 and dest_params.get("txn_write_policy") != 0):
 616         dest_params["sync_fault_injection"] = 0
 617         dest_params["manual_wal_flush_one_in"] = 0
 618     # PutEntity is currently not supported by SstFileWriter or in conjunction with Merge
 619     if dest_params["use_put_entity_one_in"] != 0:
 620         dest_params["ingest_external_file_one_in"] = 0
 621         dest_params["use_merge"] = 0
 622         dest_params["use_full_merge_v1"] = 0
 623
 624     return dest_params
 625
 626
 627 def gen_cmd_params(args):
 628     params = {}
 629
 630     params.update(default_params)
 631     if args.test_type == "blackbox":
 632         params.update(blackbox_default_params)
 633     if args.test_type == "whitebox":
 634         params.update(whitebox_default_params)
 635     if args.simple:
 636         params.update(simple_default_params)
 637         if args.test_type == "blackbox":
 638             params.update(blackbox_simple_default_params)
 639         if args.test_type == "whitebox":
 640             params.update(whitebox_simple_default_params)
 641     if args.cf_consistency:
 642         params.update(cf_consistency_params)
 643     if args.txn:
 644         params.update(txn_params)
 645     if args.test_best_efforts_recovery:
 646         params.update(best_efforts_recovery_params)
 647     if args.enable_ts:
 648         params.update(ts_params)
 649     if args.test_multiops_txn:
 650         params.update(multiops_txn_default_params)
 651         if args.write_policy == "write_committed":
 652             params.update(multiops_wc_txn_params)
 653         elif args.write_policy == "write_prepared":
 654             params.update(multiops_wp_txn_params)
 655     if args.test_tiered_storage:
 656         params.update(tiered_params)
 657
 658     # Best-effort recovery, user defined timestamp, tiered storage are currently
 659     # incompatible with BlobDB. Test BE recovery if specified on the command
 660     # line; otherwise, apply BlobDB related overrides with a 10% chance.
 661     if (
 662         not args.test_best_efforts_recovery
 663         and not args.enable_ts
 664         and not args.test_tiered_storage
 665         and random.choice([0] * 9 + [1]) == 1
 666     ):
 667         params.update(blob_params)
 668
 669     for k, v in vars(args).items():
 670         if v is not None:
 671             params[k] = v
 672     return params
 673
 674
 675 def gen_cmd(params, unknown_params):
 676     finalzied_params = finalize_and_sanitize(params)
 677     cmd = (
 678         [stress_cmd]
 679         + [
 680             "--{0}={1}".format(k, v)
 681             for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)]
 682             if k
 683             not in {
 684                 "test_type",
 685                 "simple",
 686                 "duration",
 687                 "interval",
 688                 "random_kill_odd",
 689                 "cf_consistency",
 690                 "txn",
 691                 "test_best_efforts_recovery",
 692                 "enable_ts",
 693                 "test_multiops_txn",
 694                 "write_policy",
 695                 "stress_cmd",
 696                 "test_tiered_storage",
 697                 "cleanup_cmd",
 698             }
 699             and v is not None
 700         ]
 701         + unknown_params
 702     )
 703     return cmd
 704
 705
 706 def execute_cmd(cmd, timeout):
 707     child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
 708     print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd)))
 709
 710     try:
 711         outs, errs = child.communicate(timeout=timeout)
 712         hit_timeout = False
 713         print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode)
 714     except subprocess.TimeoutExpired:
 715         hit_timeout = True
 716         child.kill()
 717         print("KILLED %d\n" % child.pid)
 718         outs, errs = child.communicate()
 719
 720     return hit_timeout, child.returncode, outs.decode("utf-8"), errs.decode("utf-8")
 721
 722
 723 # This script runs and kills db_stress multiple times. It checks consistency
 724 # in case of unsafe crashes in RocksDB.
 725 def blackbox_crash_main(args, unknown_args):
 726     cmd_params = gen_cmd_params(args)
 727     dbname = get_dbname("blackbox")
 728     exit_time = time.time() + cmd_params["duration"]
 729
 730     print(
 731         "Running blackbox-crash-test with \n"
 732         + "interval_between_crash="
 733         + str(cmd_params["interval"])
 734         + "\n"
 735         + "total-duration="
 736         + str(cmd_params["duration"])
 737         + "\n"
 738     )
 739
 740     while time.time() < exit_time:
 741         cmd = gen_cmd(
 742             dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
 743         )
 744
 745         hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params["interval"])
 746
 747         if not hit_timeout:
 748             print("Exit Before Killing")
 749             print("stdout:")
 750             print(outs)
 751             print("stderr:")
 752             print(errs)
 753             sys.exit(2)
 754
 755         for line in errs.split("\n"):
 756             if line != "" and not line.startswith("WARNING"):
 757                 print("stderr has error message:")
 758                 print("***" + line + "***")
 759
 760         time.sleep(1)  # time to stabilize before the next run
 761
 762         time.sleep(1)  # time to stabilize before the next run
 763
 764     # we need to clean up after ourselves -- only do this on test success
 765     shutil.rmtree(dbname, True)
 766
 767
 768 # This python script runs db_stress multiple times. Some runs with
 769 # kill_random_test that causes rocksdb to crash at various points in code.
 770 def whitebox_crash_main(args, unknown_args):
 771     cmd_params = gen_cmd_params(args)
 772     dbname = get_dbname("whitebox")
 773
 774     cur_time = time.time()
 775     exit_time = cur_time + cmd_params["duration"]
 776     half_time = cur_time + cmd_params["duration"] // 2
 777
 778     print(
 779         "Running whitebox-crash-test with \n"
 780         + "total-duration="
 781         + str(cmd_params["duration"])
 782         + "\n"
 783     )
 784
 785     total_check_mode = 4
 786     check_mode = 0
 787     kill_random_test = cmd_params["random_kill_odd"]
 788     kill_mode = 0
 789     prev_compaction_style = -1
 790     while time.time() < exit_time:
 791         if check_mode == 0:
 792             additional_opts = {
 793                 # use large ops per thread since we will kill it anyway
 794                 "ops_per_thread": 100
 795                 * cmd_params["ops_per_thread"],
 796             }
 797             # run with kill_random_test, with three modes.
 798             # Mode 0 covers all kill points. Mode 1 covers less kill points but
 799             # increases change of triggering them. Mode 2 covers even less
 800             # frequent kill points and further increases triggering change.
 801             if kill_mode == 0:
 802                 additional_opts.update(
 803                     {
 804                         "kill_random_test": kill_random_test,
 805                     }
 806                 )
 807             elif kill_mode == 1:
 808                 if cmd_params.get("disable_wal", 0) == 1:
 809                     my_kill_odd = kill_random_test // 50 + 1
 810                 else:
 811                     my_kill_odd = kill_random_test // 10 + 1
 812                 additional_opts.update(
 813                     {
 814                         "kill_random_test": my_kill_odd,
 815                         "kill_exclude_prefixes": "WritableFileWriter::Append,"
 816                         + "WritableFileWriter::WriteBuffered",
 817                     }
 818                 )
 819             elif kill_mode == 2:
 820                 # TODO: May need to adjust random odds if kill_random_test
 821                 # is too small.
 822                 additional_opts.update(
 823                     {
 824                         "kill_random_test": (kill_random_test // 5000 + 1),
 825                         "kill_exclude_prefixes": "WritableFileWriter::Append,"
 826                         "WritableFileWriter::WriteBuffered,"
 827                         "PosixMmapFile::Allocate,WritableFileWriter::Flush",
 828                     }
 829                 )
 830             # Run kill mode 0, 1 and 2 by turn.
 831             kill_mode = (kill_mode + 1) % 3
 832         elif check_mode == 1:
 833             # normal run with universal compaction mode
 834             additional_opts = {
 835                 "kill_random_test": None,
 836                 "ops_per_thread": cmd_params["ops_per_thread"],
 837                 "compaction_style": 1,
 838             }
 839             # Single level universal has a lot of special logic. Ensure we cover
 840             # it sometimes.
 841             if random.randint(0, 1) == 1:
 842                 additional_opts.update(
 843                     {
 844                         "num_levels": 1,
 845                     }
 846                 )
 847         elif check_mode == 2:
 848             # normal run with FIFO compaction mode
 849             # ops_per_thread is divided by 5 because FIFO compaction
 850             # style is quite a bit slower on reads with lot of files
 851             additional_opts = {
 852                 "kill_random_test": None,
 853                 "ops_per_thread": cmd_params["ops_per_thread"] // 5,
 854                 "compaction_style": 2,
 855             }
 856         else:
 857             # normal run
 858             additional_opts = {
 859                 "kill_random_test": None,
 860                 "ops_per_thread": cmd_params["ops_per_thread"],
 861             }
 862
 863         cur_compaction_style = additional_opts.get("compaction_style", cmd_params.get("compaction_style", 0))
 864         if prev_compaction_style != -1 and prev_compaction_style != cur_compaction_style:
 865             print("`compaction_style` is changed in current run so `destroy_db_initially` is set to 1 as a short-term solution to avoid cycling through previous db of different compaction style." + "\n")
 866             additional_opts["destroy_db_initially"] = 1
 867         prev_compaction_style = cur_compaction_style
 868
 869         cmd = gen_cmd(
 870             dict(
 871                 list(cmd_params.items())
 872                 + list(additional_opts.items())
 873                 + list({"db": dbname}.items())
 874             ),
 875             unknown_args,
 876         )
 877
 878         print(
 879             "Running:" + " ".join(cmd) + "\n"
 880         )  # noqa: E999 T25377293 Grandfathered in
 881
 882         # If the running time is 15 minutes over the run time, explicit kill and
 883         # exit even if white box kill didn't hit. This is to guarantee run time
 884         # limit, as if it runs as a job, running too long will create problems
 885         # for job scheduling or execution.
 886         # TODO detect a hanging condition. The job might run too long as RocksDB
 887         # hits a hanging bug.
 888         hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd(
 889             cmd, exit_time - time.time() + 900
 890         )
 891         msg = "check_mode={0}, kill option={1}, exitcode={2}\n".format(
 892             check_mode, additional_opts["kill_random_test"], retncode
 893         )
 894
 895         print(msg)
 896         print(stdoutdata)
 897         print(stderrdata)
 898
 899         if hit_timeout:
 900             print("Killing the run for running too long")
 901             break
 902
 903         expected = False
 904         if additional_opts["kill_random_test"] is None and (retncode == 0):
 905             # we expect zero retncode if no kill option
 906             expected = True
 907         elif additional_opts["kill_random_test"] is not None and retncode <= 0:
 908             # When kill option is given, the test MIGHT kill itself.
 909             # If it does, negative retncode is expected. Otherwise 0.
 910             expected = True
 911
 912         if not expected:
 913             print("TEST FAILED. See kill option and exit code above!!!\n")
 914             sys.exit(1)
 915
 916         stderrdata = stderrdata.lower()
 917         errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times")
 918         print("#times error occurred in output is " + str(errorcount) + "\n")
 919
 920         if errorcount > 0:
 921             print("TEST FAILED. Output has 'error'!!!\n")
 922             sys.exit(2)
 923         if stderrdata.find("fail") >= 0:
 924             print("TEST FAILED. Output has 'fail'!!!\n")
 925             sys.exit(2)
 926
 927         # First half of the duration, keep doing kill test. For the next half,
 928         # try different modes.
 929         if time.time() > half_time:
 930             # we need to clean up after ourselves -- only do this on test
 931             # success
 932             shutil.rmtree(dbname, True)
 933             if cleanup_cmd is not None:
 934                 print("Running DB cleanup command - %s\n" % cleanup_cmd)
 935                 ret = os.system(cleanup_cmd)
 936                 if ret != 0:
 937                     print("TEST FAILED. DB cleanup returned error %d\n" % ret)
 938                     sys.exit(1)
 939             os.mkdir(dbname)
 940             if (expected_values_dir is not None):
 941                 shutil.rmtree(expected_values_dir, True)
 942                 os.mkdir(expected_values_dir)
 943
 944             check_mode = (check_mode + 1) % total_check_mode
 945
 946         time.sleep(1)  # time to stabilize after a kill
 947
 948
 949 def main():
 950     global stress_cmd
 951     global cleanup_cmd
 952
 953     parser = argparse.ArgumentParser(
 954         description="This script runs and kills \
 955         db_stress multiple times"
 956     )
 957     parser.add_argument("test_type", choices=["blackbox", "whitebox"])
 958     parser.add_argument("--simple", action="store_true")
 959     parser.add_argument("--cf_consistency", action="store_true")
 960     parser.add_argument("--txn", action="store_true")
 961     parser.add_argument("--test_best_efforts_recovery", action="store_true")
 962     parser.add_argument("--enable_ts", action="store_true")
 963     parser.add_argument("--test_multiops_txn", action="store_true")
 964     parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"])
 965     parser.add_argument("--stress_cmd")
 966     parser.add_argument("--test_tiered_storage", action="store_true")
 967     parser.add_argument("--cleanup_cmd")
 968
 969     all_params = dict(
 970         list(default_params.items())
 971         + list(blackbox_default_params.items())
 972         + list(whitebox_default_params.items())
 973         + list(simple_default_params.items())
 974         + list(blackbox_simple_default_params.items())
 975         + list(whitebox_simple_default_params.items())
 976         + list(blob_params.items())
 977         + list(ts_params.items())
 978         + list(multiops_txn_default_params.items())
 979         + list(multiops_wc_txn_params.items())
 980         + list(multiops_wp_txn_params.items())
 981         + list(best_efforts_recovery_params.items())
 982         + list(cf_consistency_params.items())
 983         + list(tiered_params.items())
 984         + list(txn_params.items())
 985     )
 986
 987     for k, v in all_params.items():
 988         parser.add_argument("--" + k, type=type(v() if callable(v) else v))
 989     # unknown_args are passed directly to db_stress
 990     args, unknown_args = parser.parse_known_args()
 991
 992     test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
 993     if test_tmpdir is not None and not os.path.isdir(test_tmpdir):
 994         print(
 995             "%s env var is set to a non-existent directory: %s"
 996             % (_TEST_DIR_ENV_VAR, test_tmpdir)
 997         )
 998         sys.exit(1)
 999
1000     if args.stress_cmd:
1001         stress_cmd = args.stress_cmd
1002     if args.cleanup_cmd:
1003         cleanup_cmd = args.cleanup_cmd
1004     if args.test_type == "blackbox":
1005         blackbox_crash_main(args, unknown_args)
1006     if args.test_type == "whitebox":
1007         whitebox_crash_main(args, unknown_args)
1008     # Only delete the `expected_values_dir` if test passes
1009     if expected_values_dir is not None:
1010         shutil.rmtree(expected_values_dir)
1011     if multiops_txn_key_spaces_file is not None:
1012         os.remove(multiops_txn_key_spaces_file)
1013
1014
1015 if __name__ == "__main__":
1016     main()