]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/tools/db_crashtest.py
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / tools / db_crashtest.py
1 #!/usr/bin/env python3
2 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 from __future__ import absolute_import, division, print_function, unicode_literals
4
5 import argparse
6
7 import os
8 import random
9 import shutil
10 import subprocess
11 import sys
12 import tempfile
13 import time
14
15 # params overwrite priority:
16 # for default:
17 # default_params < {blackbox,whitebox}_default_params < args
18 # for simple:
19 # default_params < {blackbox,whitebox}_default_params <
20 # simple_default_params <
21 # {blackbox,whitebox}_simple_default_params < args
22 # for cf_consistency:
23 # default_params < {blackbox,whitebox}_default_params <
24 # cf_consistency_params < args
25 # for txn:
26 # default_params < {blackbox,whitebox}_default_params < txn_params < args
27 # for ts:
28 # default_params < {blackbox,whitebox}_default_params < ts_params < args
29 # for multiops_txn:
30 # default_params < {blackbox,whitebox}_default_params < multiops_txn_params < args
31
32
33 default_params = {
34 "acquire_snapshot_one_in": 10000,
35 "backup_max_size": 100 * 1024 * 1024,
36 # Consider larger number when backups considered more stable
37 "backup_one_in": 100000,
38 "batch_protection_bytes_per_key": lambda: random.choice([0, 8]),
39 "memtable_protection_bytes_per_key": lambda: random.choice([0, 1, 2, 4, 8]),
40 "block_size": 16384,
41 "bloom_bits": lambda: random.choice(
42 [random.randint(0, 19), random.lognormvariate(2.3, 1.3)]
43 ),
44 "cache_index_and_filter_blocks": lambda: random.randint(0, 1),
45 "cache_size": 8388608,
46 "charge_compression_dictionary_building_buffer": lambda: random.choice([0, 1]),
47 "charge_filter_construction": lambda: random.choice([0, 1]),
48 "charge_table_reader": lambda: random.choice([0, 1]),
49 "charge_file_metadata": lambda: random.choice([0, 1]),
50 "checkpoint_one_in": 1000000,
51 "compression_type": lambda: random.choice(
52 ["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]
53 ),
54 "bottommost_compression_type": lambda: "disable"
55 if random.randint(0, 1) == 0
56 else random.choice(["none", "snappy", "zlib", "lz4", "lz4hc", "xpress", "zstd"]),
57 "checksum_type": lambda: random.choice(
58 ["kCRC32c", "kxxHash", "kxxHash64", "kXXH3"]
59 ),
60 "compression_max_dict_bytes": lambda: 16384 * random.randint(0, 1),
61 "compression_zstd_max_train_bytes": lambda: 65536 * random.randint(0, 1),
62 # Disabled compression_parallel_threads as the feature is not stable
63 # lambda: random.choice([1] * 9 + [4])
64 "compression_parallel_threads": 1,
65 "compression_max_dict_buffer_bytes": lambda: (1 << random.randint(0, 40)) - 1,
66 "compression_use_zstd_dict_trainer": lambda: random.randint(0, 1),
67 "clear_column_family_one_in": 0,
68 "compact_files_one_in": 1000000,
69 "compact_range_one_in": 1000000,
70 "compaction_pri": random.randint(0, 4),
71 "data_block_index_type": lambda: random.choice([0, 1]),
72 "delpercent": 4,
73 "delrangepercent": 1,
74 "destroy_db_initially": 0,
75 "enable_pipelined_write": lambda: random.randint(0, 1),
76 "enable_compaction_filter": lambda: random.choice([0, 0, 0, 1]),
77 "expected_values_dir": lambda: setup_expected_values_dir(),
78 "fail_if_options_file_error": lambda: random.randint(0, 1),
79 "flush_one_in": 1000000,
80 "manual_wal_flush_one_in": lambda: random.choice([0, 0, 1000, 1000000]),
81 "file_checksum_impl": lambda: random.choice(["none", "crc32c", "xxh64", "big"]),
82 "get_live_files_one_in": 1000000,
83 # Note: the following two are intentionally disabled as the corresponding
84 # APIs are not guaranteed to succeed.
85 "get_sorted_wal_files_one_in": 0,
86 "get_current_wal_file_one_in": 0,
87 # Temporarily disable hash index
88 "index_type": lambda: random.choice([0, 0, 0, 2, 2, 3]),
89 "ingest_external_file_one_in": 1000000,
90 "iterpercent": 10,
91 "mark_for_compaction_one_file_in": lambda: 10 * random.randint(0, 1),
92 "max_background_compactions": 20,
93 "max_bytes_for_level_base": 10485760,
94 "max_key": 25000000,
95 "max_write_buffer_number": 3,
96 "mmap_read": lambda: random.randint(0, 1),
97 # Setting `nooverwritepercent > 0` is only possible because we do not vary
98 # the random seed, so the same keys are chosen by every run for disallowing
99 # overwrites.
100 "nooverwritepercent": 1,
101 "open_files": lambda: random.choice([-1, -1, 100, 500000]),
102 "optimize_filters_for_memory": lambda: random.randint(0, 1),
103 "partition_filters": lambda: random.randint(0, 1),
104 "partition_pinning": lambda: random.randint(0, 3),
105 "pause_background_one_in": 1000000,
106 "prefix_size": lambda: random.choice([-1, 1, 5, 7, 8]),
107 "prefixpercent": 5,
108 "progress_reports": 0,
109 "readpercent": 45,
110 "recycle_log_file_num": lambda: random.randint(0, 1),
111 "snapshot_hold_ops": 100000,
112 "sst_file_manager_bytes_per_sec": lambda: random.choice([0, 104857600]),
113 "sst_file_manager_bytes_per_truncate": lambda: random.choice([0, 1048576]),
114 "long_running_snapshots": lambda: random.randint(0, 1),
115 "subcompactions": lambda: random.randint(1, 4),
116 "target_file_size_base": 2097152,
117 "target_file_size_multiplier": 2,
118 "test_batches_snapshots": random.randint(0, 1),
119 "top_level_index_pinning": lambda: random.randint(0, 3),
120 "unpartitioned_pinning": lambda: random.randint(0, 3),
121 "use_direct_reads": lambda: random.randint(0, 1),
122 "use_direct_io_for_flush_and_compaction": lambda: random.randint(0, 1),
123 "mock_direct_io": False,
124 "cache_type": lambda: random.choice(["lru_cache", "hyper_clock_cache"]),
125 "use_full_merge_v1": lambda: random.randint(0, 1),
126 "use_merge": lambda: random.randint(0, 1),
127 # use_put_entity_one_in has to be the same across invocations for verification to work, hence no lambda
128 "use_put_entity_one_in": random.choice([0] * 7 + [1, 5, 10]),
129 # 999 -> use Bloom API
130 "ribbon_starting_level": lambda: random.choice([random.randint(-1, 10), 999]),
131 "value_size_mult": 32,
132 "verify_checksum": 1,
133 "write_buffer_size": 4 * 1024 * 1024,
134 "writepercent": 35,
135 "format_version": lambda: random.choice([2, 3, 4, 5, 5]),
136 "index_block_restart_interval": lambda: random.choice(range(1, 16)),
137 "use_multiget": lambda: random.randint(0, 1),
138 "periodic_compaction_seconds": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
139 # 0 = never (used by some), 10 = often (for threading bugs), 600 = default
140 "stats_dump_period_sec": lambda: random.choice([0, 10, 600]),
141 "compaction_ttl": lambda: random.choice([0, 0, 1, 2, 10, 100, 1000]),
142 # Test small max_manifest_file_size in a smaller chance, as most of the
143 # time we wnat manifest history to be preserved to help debug
144 "max_manifest_file_size": lambda: random.choice(
145 [t * 16384 if t < 3 else 1024 * 1024 * 1024 for t in range(1, 30)]
146 ),
147 # Sync mode might make test runs slower so running it in a smaller chance
148 "sync": lambda: random.choice([1 if t == 0 else 0 for t in range(0, 20)]),
149 "bytes_per_sync": lambda: random.choice([0, 262144]),
150 "wal_bytes_per_sync": lambda: random.choice([0, 524288]),
151 # Disable compaction_readahead_size because the test is not passing.
152 # "compaction_readahead_size" : lambda : random.choice(
153 # [0, 0, 1024 * 1024]),
154 "db_write_buffer_size": lambda: random.choice(
155 [0, 0, 0, 1024 * 1024, 8 * 1024 * 1024, 128 * 1024 * 1024]
156 ),
157 "avoid_unnecessary_blocking_io": random.randint(0, 1),
158 "write_dbid_to_manifest": random.randint(0, 1),
159 "avoid_flush_during_recovery": lambda: random.choice(
160 [1 if t == 0 else 0 for t in range(0, 8)]
161 ),
162 "max_write_batch_group_size_bytes": lambda: random.choice(
163 [16, 64, 1024 * 1024, 16 * 1024 * 1024]
164 ),
165 "level_compaction_dynamic_level_bytes": True,
166 "verify_checksum_one_in": 1000000,
167 "verify_db_one_in": 100000,
168 "continuous_verification_interval": 0,
169 "max_key_len": 3,
170 "key_len_percent_dist": "1,30,69",
171 "read_fault_one_in": lambda: random.choice([0, 32, 1000]),
172 "open_metadata_write_fault_one_in": lambda: random.choice([0, 0, 8]),
173 "open_write_fault_one_in": lambda: random.choice([0, 0, 16]),
174 "open_read_fault_one_in": lambda: random.choice([0, 0, 32]),
175 "sync_fault_injection": lambda: random.randint(0, 1),
176 "get_property_one_in": 1000000,
177 "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
178 "max_write_buffer_size_to_maintain": lambda: random.choice(
179 [0, 1024 * 1024, 2 * 1024 * 1024, 4 * 1024 * 1024, 8 * 1024 * 1024]
180 ),
181 "user_timestamp_size": 0,
182 "secondary_cache_fault_one_in": lambda: random.choice([0, 0, 32]),
183 "prepopulate_block_cache": lambda: random.choice([0, 1]),
184 "memtable_prefix_bloom_size_ratio": lambda: random.choice([0.001, 0.01, 0.1, 0.5]),
185 "memtable_whole_key_filtering": lambda: random.randint(0, 1),
186 "detect_filter_construct_corruption": lambda: random.choice([0, 1]),
187 "adaptive_readahead": lambda: random.choice([0, 1]),
188 "async_io": lambda: random.choice([0, 1]),
189 "wal_compression": lambda: random.choice(["none", "zstd"]),
190 "verify_sst_unique_id_in_manifest": 1, # always do unique_id verification
191 "secondary_cache_uri": lambda: random.choice(
192 [
193 "",
194 "compressed_secondary_cache://capacity=8388608",
195 "compressed_secondary_cache://capacity=8388608;enable_custom_split_merge=true",
196 ]
197 ),
198 "allow_data_in_errors": True,
199 "readahead_size": lambda: random.choice([0, 16384, 524288]),
200 "initial_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
201 "max_auto_readahead_size": lambda: random.choice([0, 16384, 524288]),
202 "num_file_reads_for_auto_readahead": lambda: random.choice([0, 1, 2]),
203 "min_write_buffer_number_to_merge": lambda: random.choice([1, 2]),
204 "preserve_internal_time_seconds": lambda: random.choice([0, 60, 3600, 36000]),
205 }
206
207 _TEST_DIR_ENV_VAR = "TEST_TMPDIR"
208 _DEBUG_LEVEL_ENV_VAR = "DEBUG_LEVEL"
209
210 stress_cmd = "./db_stress"
211 cleanup_cmd = None
212
213
214 def is_release_mode():
215 return os.environ.get(_DEBUG_LEVEL_ENV_VAR) == "0"
216
217
218 def get_dbname(test_name):
219 test_dir_name = "rocksdb_crashtest_" + test_name
220 test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
221 if test_tmpdir is None or test_tmpdir == "":
222 dbname = tempfile.mkdtemp(prefix=test_dir_name)
223 else:
224 dbname = test_tmpdir + "/" + test_dir_name
225 shutil.rmtree(dbname, True)
226 if cleanup_cmd is not None:
227 print("Running DB cleanup command - %s\n" % cleanup_cmd)
228 # Ignore failure
229 os.system(cleanup_cmd)
230 os.mkdir(dbname)
231 return dbname
232
233
234 expected_values_dir = None
235
236
237 def setup_expected_values_dir():
238 global expected_values_dir
239 if expected_values_dir is not None:
240 return expected_values_dir
241 expected_dir_prefix = "rocksdb_crashtest_expected_"
242 test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
243 if test_tmpdir is None or test_tmpdir == "":
244 expected_values_dir = tempfile.mkdtemp(prefix=expected_dir_prefix)
245 else:
246 # if tmpdir is specified, store the expected_values_dir under that dir
247 expected_values_dir = test_tmpdir + "/rocksdb_crashtest_expected"
248 if os.path.exists(expected_values_dir):
249 shutil.rmtree(expected_values_dir)
250 os.mkdir(expected_values_dir)
251 return expected_values_dir
252
253
254 multiops_txn_key_spaces_file = None
255
256
257 def setup_multiops_txn_key_spaces_file():
258 global multiops_txn_key_spaces_file
259 if multiops_txn_key_spaces_file is not None:
260 return multiops_txn_key_spaces_file
261 key_spaces_file_prefix = "rocksdb_crashtest_multiops_txn_key_spaces"
262 test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
263 if test_tmpdir is None or test_tmpdir == "":
264 multiops_txn_key_spaces_file = tempfile.mkstemp(prefix=key_spaces_file_prefix)[
265 1
266 ]
267 else:
268 if not os.path.exists(test_tmpdir):
269 os.mkdir(test_tmpdir)
270 multiops_txn_key_spaces_file = tempfile.mkstemp(
271 prefix=key_spaces_file_prefix, dir=test_tmpdir
272 )[1]
273 return multiops_txn_key_spaces_file
274
275
276 def is_direct_io_supported(dbname):
277 with tempfile.NamedTemporaryFile(dir=dbname) as f:
278 try:
279 os.open(f.name, os.O_DIRECT)
280 except BaseException:
281 return False
282 return True
283
284
285 blackbox_default_params = {
286 "disable_wal": lambda: random.choice([0, 0, 0, 1]),
287 # total time for this script to test db_stress
288 "duration": 6000,
289 # time for one db_stress instance to run
290 "interval": 120,
291 # since we will be killing anyway, use large value for ops_per_thread
292 "ops_per_thread": 100000000,
293 "reopen": 0,
294 "set_options_one_in": 10000,
295 }
296
297 whitebox_default_params = {
298 # TODO: enable this once we figure out how to adjust kill odds for WAL-
299 # disabled runs, and either (1) separate full `db_stress` runs out of
300 # whitebox crash or (2) support verification at end of `db_stress` runs
301 # that ran with WAL disabled.
302 "disable_wal": 0,
303 "duration": 10000,
304 "log2_keys_per_lock": 10,
305 "ops_per_thread": 200000,
306 "random_kill_odd": 888887,
307 "reopen": 20,
308 }
309
310 simple_default_params = {
311 "allow_concurrent_memtable_write": lambda: random.randint(0, 1),
312 "column_families": 1,
313 # TODO: re-enable once internal task T124324915 is fixed.
314 # "experimental_mempurge_threshold": lambda: 10.0*random.random(),
315 "max_background_compactions": 1,
316 "max_bytes_for_level_base": 67108864,
317 "memtablerep": "skip_list",
318 "target_file_size_base": 16777216,
319 "target_file_size_multiplier": 1,
320 "test_batches_snapshots": 0,
321 "write_buffer_size": 32 * 1024 * 1024,
322 "level_compaction_dynamic_level_bytes": False,
323 "paranoid_file_checks": lambda: random.choice([0, 1, 1, 1]),
324 "verify_iterator_with_expected_state_one_in": 5, # this locks a range of keys
325 }
326
327 blackbox_simple_default_params = {
328 "open_files": -1,
329 "set_options_one_in": 0,
330 }
331
332 whitebox_simple_default_params = {}
333
334 cf_consistency_params = {
335 "disable_wal": lambda: random.randint(0, 1),
336 "reopen": 0,
337 "test_cf_consistency": 1,
338 # use small value for write_buffer_size so that RocksDB triggers flush
339 # more frequently
340 "write_buffer_size": 1024 * 1024,
341 "enable_pipelined_write": lambda: random.randint(0, 1),
342 # Snapshots are used heavily in this test mode, while they are incompatible
343 # with compaction filter.
344 "enable_compaction_filter": 0,
345 # `CfConsistencyStressTest::TestIngestExternalFile()` is not implemented.
346 "ingest_external_file_one_in": 0,
347 }
348
349 txn_params = {
350 "use_txn": 1,
351 # Avoid lambda to set it once for the entire test
352 "txn_write_policy": random.randint(0, 2),
353 "unordered_write": random.randint(0, 1),
354 # TODO: there is such a thing as transactions with WAL disabled. We should
355 # cover that case.
356 "disable_wal": 0,
357 # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
358 "checkpoint_one_in": 0,
359 # pipeline write is not currnetly compatible with WritePrepared txns
360 "enable_pipelined_write": 0,
361 "create_timestamped_snapshot_one_in": random.choice([0, 20]),
362 # PutEntity in transactions is not yet implemented
363 "use_put_entity_one_in" : 0,
364 }
365
366 best_efforts_recovery_params = {
367 "best_efforts_recovery": 1,
368 "atomic_flush": 0,
369 "disable_wal": 1,
370 "column_families": 1,
371 }
372
373 blob_params = {
374 "allow_setting_blob_options_dynamically": 1,
375 # Enable blob files and GC with a 75% chance initially; note that they might still be
376 # enabled/disabled during the test via SetOptions
377 "enable_blob_files": lambda: random.choice([0] + [1] * 3),
378 "min_blob_size": lambda: random.choice([0, 8, 16]),
379 "blob_file_size": lambda: random.choice([1048576, 16777216, 268435456, 1073741824]),
380 "blob_compression_type": lambda: random.choice(["none", "snappy", "lz4", "zstd"]),
381 "enable_blob_garbage_collection": lambda: random.choice([0] + [1] * 3),
382 "blob_garbage_collection_age_cutoff": lambda: random.choice(
383 [0.0, 0.25, 0.5, 0.75, 1.0]
384 ),
385 "blob_garbage_collection_force_threshold": lambda: random.choice([0.5, 0.75, 1.0]),
386 "blob_compaction_readahead_size": lambda: random.choice([0, 1048576, 4194304]),
387 "blob_file_starting_level": lambda: random.choice(
388 [0] * 4 + [1] * 3 + [2] * 2 + [3]
389 ),
390 "use_blob_cache": lambda: random.randint(0, 1),
391 "use_shared_block_and_blob_cache": lambda: random.randint(0, 1),
392 "blob_cache_size": lambda: random.choice([1048576, 2097152, 4194304, 8388608]),
393 "prepopulate_blob_cache": lambda: random.randint(0, 1),
394 }
395
396 ts_params = {
397 "test_cf_consistency": 0,
398 "test_batches_snapshots": 0,
399 "user_timestamp_size": 8,
400 "use_merge": 0,
401 "use_full_merge_v1": 0,
402 "use_txn": 0,
403 "enable_blob_files": 0,
404 "use_blob_db": 0,
405 "ingest_external_file_one_in": 0,
406 # PutEntity with timestamps is not yet implemented
407 "use_put_entity_one_in" : 0,
408 }
409
410 tiered_params = {
411 "enable_tiered_storage": 1,
412 # Set tiered compaction hot data time as: 1 minute, 1 hour, 10 hour
413 "preclude_last_level_data_seconds": lambda: random.choice([60, 3600, 36000]),
414 # only test universal compaction for now, level has known issue of
415 # endless compaction
416 "compaction_style": 1,
417 # tiered storage doesn't support blob db yet
418 "enable_blob_files": 0,
419 "use_blob_db": 0,
420 }
421
422 multiops_txn_default_params = {
423 "test_cf_consistency": 0,
424 "test_batches_snapshots": 0,
425 "test_multi_ops_txns": 1,
426 "use_txn": 1,
427 "two_write_queues": lambda: random.choice([0, 1]),
428 # TODO: enable write-prepared
429 "disable_wal": 0,
430 "use_only_the_last_commit_time_batch_for_recovery": lambda: random.choice([0, 1]),
431 "clear_column_family_one_in": 0,
432 "column_families": 1,
433 "enable_pipelined_write": lambda: random.choice([0, 1]),
434 # This test already acquires snapshots in reads
435 "acquire_snapshot_one_in": 0,
436 "backup_one_in": 0,
437 "writepercent": 0,
438 "delpercent": 0,
439 "delrangepercent": 0,
440 "customopspercent": 80,
441 "readpercent": 5,
442 "iterpercent": 15,
443 "prefixpercent": 0,
444 "verify_db_one_in": 1000,
445 "continuous_verification_interval": 1000,
446 "delay_snapshot_read_one_in": 3,
447 # 65536 is the smallest possible value for write_buffer_size. Smaller
448 # values will be sanitized to 65536 during db open. SetOptions currently
449 # does not sanitize options, but very small write_buffer_size may cause
450 # assertion failure in
451 # https://github.com/facebook/rocksdb/blob/7.0.fb/db/memtable.cc#L117.
452 "write_buffer_size": 65536,
453 # flush more frequently to generate more files, thus trigger more
454 # compactions.
455 "flush_one_in": 1000,
456 "key_spaces_path": setup_multiops_txn_key_spaces_file(),
457 "rollback_one_in": 4,
458 # Re-enable once we have a compaction for MultiOpsTxnStressTest
459 "enable_compaction_filter": 0,
460 "create_timestamped_snapshot_one_in": 50,
461 "sync_fault_injection": 0,
462 # PutEntity in transactions is not yet implemented
463 "use_put_entity_one_in" : 0,
464 }
465
466 multiops_wc_txn_params = {
467 "txn_write_policy": 0,
468 # TODO re-enable pipelined write. Not well tested atm
469 "enable_pipelined_write": 0,
470 }
471
472 multiops_wp_txn_params = {
473 "txn_write_policy": 1,
474 "wp_snapshot_cache_bits": 1,
475 # try small wp_commit_cache_bits, e.g. 0 once we explore storing full
476 # commit sequence numbers in commit cache
477 "wp_commit_cache_bits": 10,
478 # pipeline write is not currnetly compatible with WritePrepared txns
479 "enable_pipelined_write": 0,
480 # OpenReadOnly after checkpoint is not currnetly compatible with WritePrepared txns
481 "checkpoint_one_in": 0,
482 # Required to be 1 in order to use commit-time-batch
483 "use_only_the_last_commit_time_batch_for_recovery": 1,
484 "clear_wp_commit_cache_one_in": 10,
485 "create_timestamped_snapshot_one_in": 0,
486 }
487
488
489 def finalize_and_sanitize(src_params):
490 dest_params = {k: v() if callable(v) else v for (k, v) in src_params.items()}
491 if is_release_mode():
492 dest_params["read_fault_one_in"] = 0
493 if dest_params.get("compression_max_dict_bytes") == 0:
494 dest_params["compression_zstd_max_train_bytes"] = 0
495 dest_params["compression_max_dict_buffer_bytes"] = 0
496 if dest_params.get("compression_type") != "zstd":
497 dest_params["compression_zstd_max_train_bytes"] = 0
498 if dest_params.get("allow_concurrent_memtable_write", 1) == 1:
499 dest_params["memtablerep"] = "skip_list"
500 if dest_params["mmap_read"] == 1:
501 dest_params["use_direct_io_for_flush_and_compaction"] = 0
502 dest_params["use_direct_reads"] = 0
503 if dest_params["file_checksum_impl"] != "none":
504 # TODO(T109283569): there is a bug in `GenerateOneFileChecksum()`,
505 # used by `IngestExternalFile()`, causing it to fail with mmap
506 # reads. Remove this once it is fixed.
507 dest_params["ingest_external_file_one_in"] = 0
508 if (
509 dest_params["use_direct_io_for_flush_and_compaction"] == 1
510 or dest_params["use_direct_reads"] == 1
511 ) and not is_direct_io_supported(dest_params["db"]):
512 if is_release_mode():
513 print(
514 "{} does not support direct IO. Disabling use_direct_reads and "
515 "use_direct_io_for_flush_and_compaction.\n".format(dest_params["db"])
516 )
517 dest_params["use_direct_reads"] = 0
518 dest_params["use_direct_io_for_flush_and_compaction"] = 0
519 else:
520 dest_params["mock_direct_io"] = True
521
522 if dest_params["test_batches_snapshots"] == 1:
523 dest_params["enable_compaction_filter"] = 0
524 if dest_params["prefix_size"] < 0:
525 dest_params["prefix_size"] = 1
526
527 # Multi-key operations are not currently compatible with transactions or
528 # timestamp.
529 if (dest_params.get("test_batches_snapshots") == 1 or
530 dest_params.get("use_txn") == 1 or
531 dest_params.get("user_timestamp_size") > 0):
532 dest_params["ingest_external_file_one_in"] = 0
533 if (dest_params.get("test_batches_snapshots") == 1 or
534 dest_params.get("use_txn") == 1):
535 dest_params["delpercent"] += dest_params["delrangepercent"]
536 dest_params["delrangepercent"] = 0
537 if (
538 dest_params.get("disable_wal") == 1
539 or dest_params.get("sync_fault_injection") == 1
540 or dest_params.get("manual_wal_flush_one_in") > 0
541 ):
542 # File ingestion does not guarantee prefix-recoverability when unsynced
543 # data can be lost. Ingesting a file syncs data immediately that is
544 # newer than unsynced memtable data that can be lost on restart.
545 #
546 # Even if the above issue is fixed or worked around, our
547 # trace-and-replay does not trace file ingestion, so in its current form
548 # it would not recover the expected state to the correct point in time.
549 dest_params["ingest_external_file_one_in"] = 0
550 # The `DbStressCompactionFilter` can apply memtable updates to SST
551 # files, which would be problematic when unsynced data can be lost in
552 # crash recoveries.
553 dest_params["enable_compaction_filter"] = 0
554 # Only under WritePrepared txns, unordered_write would provide the same guarnatees as vanilla rocksdb
555 if dest_params.get("unordered_write", 0) == 1:
556 dest_params["txn_write_policy"] = 1
557 dest_params["allow_concurrent_memtable_write"] = 1
558 if dest_params.get("disable_wal", 0) == 1:
559 dest_params["atomic_flush"] = 1
560 dest_params["sync"] = 0
561 dest_params["write_fault_one_in"] = 0
562 if dest_params.get("open_files", 1) != -1:
563 # Compaction TTL and periodic compactions are only compatible
564 # with open_files = -1
565 dest_params["compaction_ttl"] = 0
566 dest_params["periodic_compaction_seconds"] = 0
567 if dest_params.get("compaction_style", 0) == 2:
568 # Disable compaction TTL in FIFO compaction, because right
569 # now assertion failures are triggered.
570 dest_params["compaction_ttl"] = 0
571 dest_params["periodic_compaction_seconds"] = 0
572 if dest_params["partition_filters"] == 1:
573 if dest_params["index_type"] != 2:
574 dest_params["partition_filters"] = 0
575 if dest_params.get("atomic_flush", 0) == 1:
576 # disable pipelined write when atomic flush is used.
577 dest_params["enable_pipelined_write"] = 0
578 if dest_params.get("sst_file_manager_bytes_per_sec", 0) == 0:
579 dest_params["sst_file_manager_bytes_per_truncate"] = 0
580 if dest_params.get("enable_compaction_filter", 0) == 1:
581 # Compaction filter is incompatible with snapshots. Need to avoid taking
582 # snapshots, as well as avoid operations that use snapshots for
583 # verification.
584 dest_params["acquire_snapshot_one_in"] = 0
585 dest_params["compact_range_one_in"] = 0
586 # Give the iterator ops away to reads.
587 dest_params["readpercent"] += dest_params.get("iterpercent", 10)
588 dest_params["iterpercent"] = 0
589 if dest_params.get("prefix_size") == -1:
590 dest_params["readpercent"] += dest_params.get("prefixpercent", 20)
591 dest_params["prefixpercent"] = 0
592 if (
593 dest_params.get("prefix_size") == -1
594 and dest_params.get("memtable_whole_key_filtering") == 0
595 ):
596 dest_params["memtable_prefix_bloom_size_ratio"] = 0
597 if dest_params.get("two_write_queues") == 1:
598 dest_params["enable_pipelined_write"] = 0
599 if dest_params.get("best_efforts_recovery") == 1:
600 dest_params["disable_wal"] = 1
601 dest_params["atomic_flush"] = 0
602 dest_params["enable_compaction_filter"] = 0
603 dest_params["sync"] = 0
604 dest_params["write_fault_one_in"] = 0
605 if dest_params["secondary_cache_uri"] != "":
606 # Currently the only cache type compatible with a secondary cache is LRUCache
607 dest_params["cache_type"] = "lru_cache"
608 # Remove the following once write-prepared/write-unprepared with/without
609 # unordered write supports timestamped snapshots
610 if dest_params.get("create_timestamped_snapshot_one_in", 0) > 0:
611 dest_params["txn_write_policy"] = 0
612 dest_params["unordered_write"] = 0
613 # For TransactionDB, correctness testing with unsync data loss is currently
614 # compatible with only write committed policy
615 if (dest_params.get("use_txn") == 1 and dest_params.get("txn_write_policy") != 0):
616 dest_params["sync_fault_injection"] = 0
617 dest_params["manual_wal_flush_one_in"] = 0
618 # PutEntity is currently not supported by SstFileWriter or in conjunction with Merge
619 if dest_params["use_put_entity_one_in"] != 0:
620 dest_params["ingest_external_file_one_in"] = 0
621 dest_params["use_merge"] = 0
622 dest_params["use_full_merge_v1"] = 0
623
624 return dest_params
625
626
627 def gen_cmd_params(args):
628 params = {}
629
630 params.update(default_params)
631 if args.test_type == "blackbox":
632 params.update(blackbox_default_params)
633 if args.test_type == "whitebox":
634 params.update(whitebox_default_params)
635 if args.simple:
636 params.update(simple_default_params)
637 if args.test_type == "blackbox":
638 params.update(blackbox_simple_default_params)
639 if args.test_type == "whitebox":
640 params.update(whitebox_simple_default_params)
641 if args.cf_consistency:
642 params.update(cf_consistency_params)
643 if args.txn:
644 params.update(txn_params)
645 if args.test_best_efforts_recovery:
646 params.update(best_efforts_recovery_params)
647 if args.enable_ts:
648 params.update(ts_params)
649 if args.test_multiops_txn:
650 params.update(multiops_txn_default_params)
651 if args.write_policy == "write_committed":
652 params.update(multiops_wc_txn_params)
653 elif args.write_policy == "write_prepared":
654 params.update(multiops_wp_txn_params)
655 if args.test_tiered_storage:
656 params.update(tiered_params)
657
658 # Best-effort recovery, user defined timestamp, tiered storage are currently
659 # incompatible with BlobDB. Test BE recovery if specified on the command
660 # line; otherwise, apply BlobDB related overrides with a 10% chance.
661 if (
662 not args.test_best_efforts_recovery
663 and not args.enable_ts
664 and not args.test_tiered_storage
665 and random.choice([0] * 9 + [1]) == 1
666 ):
667 params.update(blob_params)
668
669 for k, v in vars(args).items():
670 if v is not None:
671 params[k] = v
672 return params
673
674
675 def gen_cmd(params, unknown_params):
676 finalzied_params = finalize_and_sanitize(params)
677 cmd = (
678 [stress_cmd]
679 + [
680 "--{0}={1}".format(k, v)
681 for k, v in [(k, finalzied_params[k]) for k in sorted(finalzied_params)]
682 if k
683 not in {
684 "test_type",
685 "simple",
686 "duration",
687 "interval",
688 "random_kill_odd",
689 "cf_consistency",
690 "txn",
691 "test_best_efforts_recovery",
692 "enable_ts",
693 "test_multiops_txn",
694 "write_policy",
695 "stress_cmd",
696 "test_tiered_storage",
697 "cleanup_cmd",
698 }
699 and v is not None
700 ]
701 + unknown_params
702 )
703 return cmd
704
705
706 def execute_cmd(cmd, timeout):
707 child = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
708 print("Running db_stress with pid=%d: %s\n\n" % (child.pid, " ".join(cmd)))
709
710 try:
711 outs, errs = child.communicate(timeout=timeout)
712 hit_timeout = False
713 print("WARNING: db_stress ended before kill: exitcode=%d\n" % child.returncode)
714 except subprocess.TimeoutExpired:
715 hit_timeout = True
716 child.kill()
717 print("KILLED %d\n" % child.pid)
718 outs, errs = child.communicate()
719
720 return hit_timeout, child.returncode, outs.decode("utf-8"), errs.decode("utf-8")
721
722
723 # This script runs and kills db_stress multiple times. It checks consistency
724 # in case of unsafe crashes in RocksDB.
725 def blackbox_crash_main(args, unknown_args):
726 cmd_params = gen_cmd_params(args)
727 dbname = get_dbname("blackbox")
728 exit_time = time.time() + cmd_params["duration"]
729
730 print(
731 "Running blackbox-crash-test with \n"
732 + "interval_between_crash="
733 + str(cmd_params["interval"])
734 + "\n"
735 + "total-duration="
736 + str(cmd_params["duration"])
737 + "\n"
738 )
739
740 while time.time() < exit_time:
741 cmd = gen_cmd(
742 dict(list(cmd_params.items()) + list({"db": dbname}.items())), unknown_args
743 )
744
745 hit_timeout, retcode, outs, errs = execute_cmd(cmd, cmd_params["interval"])
746
747 if not hit_timeout:
748 print("Exit Before Killing")
749 print("stdout:")
750 print(outs)
751 print("stderr:")
752 print(errs)
753 sys.exit(2)
754
755 for line in errs.split("\n"):
756 if line != "" and not line.startswith("WARNING"):
757 print("stderr has error message:")
758 print("***" + line + "***")
759
760 time.sleep(1) # time to stabilize before the next run
761
762 time.sleep(1) # time to stabilize before the next run
763
764 # we need to clean up after ourselves -- only do this on test success
765 shutil.rmtree(dbname, True)
766
767
768 # This python script runs db_stress multiple times. Some runs with
769 # kill_random_test that causes rocksdb to crash at various points in code.
770 def whitebox_crash_main(args, unknown_args):
771 cmd_params = gen_cmd_params(args)
772 dbname = get_dbname("whitebox")
773
774 cur_time = time.time()
775 exit_time = cur_time + cmd_params["duration"]
776 half_time = cur_time + cmd_params["duration"] // 2
777
778 print(
779 "Running whitebox-crash-test with \n"
780 + "total-duration="
781 + str(cmd_params["duration"])
782 + "\n"
783 )
784
785 total_check_mode = 4
786 check_mode = 0
787 kill_random_test = cmd_params["random_kill_odd"]
788 kill_mode = 0
789 prev_compaction_style = -1
790 while time.time() < exit_time:
791 if check_mode == 0:
792 additional_opts = {
793 # use large ops per thread since we will kill it anyway
794 "ops_per_thread": 100
795 * cmd_params["ops_per_thread"],
796 }
797 # run with kill_random_test, with three modes.
798 # Mode 0 covers all kill points. Mode 1 covers less kill points but
799 # increases change of triggering them. Mode 2 covers even less
800 # frequent kill points and further increases triggering change.
801 if kill_mode == 0:
802 additional_opts.update(
803 {
804 "kill_random_test": kill_random_test,
805 }
806 )
807 elif kill_mode == 1:
808 if cmd_params.get("disable_wal", 0) == 1:
809 my_kill_odd = kill_random_test // 50 + 1
810 else:
811 my_kill_odd = kill_random_test // 10 + 1
812 additional_opts.update(
813 {
814 "kill_random_test": my_kill_odd,
815 "kill_exclude_prefixes": "WritableFileWriter::Append,"
816 + "WritableFileWriter::WriteBuffered",
817 }
818 )
819 elif kill_mode == 2:
820 # TODO: May need to adjust random odds if kill_random_test
821 # is too small.
822 additional_opts.update(
823 {
824 "kill_random_test": (kill_random_test // 5000 + 1),
825 "kill_exclude_prefixes": "WritableFileWriter::Append,"
826 "WritableFileWriter::WriteBuffered,"
827 "PosixMmapFile::Allocate,WritableFileWriter::Flush",
828 }
829 )
830 # Run kill mode 0, 1 and 2 by turn.
831 kill_mode = (kill_mode + 1) % 3
832 elif check_mode == 1:
833 # normal run with universal compaction mode
834 additional_opts = {
835 "kill_random_test": None,
836 "ops_per_thread": cmd_params["ops_per_thread"],
837 "compaction_style": 1,
838 }
839 # Single level universal has a lot of special logic. Ensure we cover
840 # it sometimes.
841 if random.randint(0, 1) == 1:
842 additional_opts.update(
843 {
844 "num_levels": 1,
845 }
846 )
847 elif check_mode == 2:
848 # normal run with FIFO compaction mode
849 # ops_per_thread is divided by 5 because FIFO compaction
850 # style is quite a bit slower on reads with lot of files
851 additional_opts = {
852 "kill_random_test": None,
853 "ops_per_thread": cmd_params["ops_per_thread"] // 5,
854 "compaction_style": 2,
855 }
856 else:
857 # normal run
858 additional_opts = {
859 "kill_random_test": None,
860 "ops_per_thread": cmd_params["ops_per_thread"],
861 }
862
863 cur_compaction_style = additional_opts.get("compaction_style", cmd_params.get("compaction_style", 0))
864 if prev_compaction_style != -1 and prev_compaction_style != cur_compaction_style:
865 print("`compaction_style` is changed in current run so `destroy_db_initially` is set to 1 as a short-term solution to avoid cycling through previous db of different compaction style." + "\n")
866 additional_opts["destroy_db_initially"] = 1
867 prev_compaction_style = cur_compaction_style
868
869 cmd = gen_cmd(
870 dict(
871 list(cmd_params.items())
872 + list(additional_opts.items())
873 + list({"db": dbname}.items())
874 ),
875 unknown_args,
876 )
877
878 print(
879 "Running:" + " ".join(cmd) + "\n"
880 ) # noqa: E999 T25377293 Grandfathered in
881
882 # If the running time is 15 minutes over the run time, explicit kill and
883 # exit even if white box kill didn't hit. This is to guarantee run time
884 # limit, as if it runs as a job, running too long will create problems
885 # for job scheduling or execution.
886 # TODO detect a hanging condition. The job might run too long as RocksDB
887 # hits a hanging bug.
888 hit_timeout, retncode, stdoutdata, stderrdata = execute_cmd(
889 cmd, exit_time - time.time() + 900
890 )
891 msg = "check_mode={0}, kill option={1}, exitcode={2}\n".format(
892 check_mode, additional_opts["kill_random_test"], retncode
893 )
894
895 print(msg)
896 print(stdoutdata)
897 print(stderrdata)
898
899 if hit_timeout:
900 print("Killing the run for running too long")
901 break
902
903 expected = False
904 if additional_opts["kill_random_test"] is None and (retncode == 0):
905 # we expect zero retncode if no kill option
906 expected = True
907 elif additional_opts["kill_random_test"] is not None and retncode <= 0:
908 # When kill option is given, the test MIGHT kill itself.
909 # If it does, negative retncode is expected. Otherwise 0.
910 expected = True
911
912 if not expected:
913 print("TEST FAILED. See kill option and exit code above!!!\n")
914 sys.exit(1)
915
916 stderrdata = stderrdata.lower()
917 errorcount = stderrdata.count("error") - stderrdata.count("got errors 0 times")
918 print("#times error occurred in output is " + str(errorcount) + "\n")
919
920 if errorcount > 0:
921 print("TEST FAILED. Output has 'error'!!!\n")
922 sys.exit(2)
923 if stderrdata.find("fail") >= 0:
924 print("TEST FAILED. Output has 'fail'!!!\n")
925 sys.exit(2)
926
927 # First half of the duration, keep doing kill test. For the next half,
928 # try different modes.
929 if time.time() > half_time:
930 # we need to clean up after ourselves -- only do this on test
931 # success
932 shutil.rmtree(dbname, True)
933 if cleanup_cmd is not None:
934 print("Running DB cleanup command - %s\n" % cleanup_cmd)
935 ret = os.system(cleanup_cmd)
936 if ret != 0:
937 print("TEST FAILED. DB cleanup returned error %d\n" % ret)
938 sys.exit(1)
939 os.mkdir(dbname)
940 if (expected_values_dir is not None):
941 shutil.rmtree(expected_values_dir, True)
942 os.mkdir(expected_values_dir)
943
944 check_mode = (check_mode + 1) % total_check_mode
945
946 time.sleep(1) # time to stabilize after a kill
947
948
949 def main():
950 global stress_cmd
951 global cleanup_cmd
952
953 parser = argparse.ArgumentParser(
954 description="This script runs and kills \
955 db_stress multiple times"
956 )
957 parser.add_argument("test_type", choices=["blackbox", "whitebox"])
958 parser.add_argument("--simple", action="store_true")
959 parser.add_argument("--cf_consistency", action="store_true")
960 parser.add_argument("--txn", action="store_true")
961 parser.add_argument("--test_best_efforts_recovery", action="store_true")
962 parser.add_argument("--enable_ts", action="store_true")
963 parser.add_argument("--test_multiops_txn", action="store_true")
964 parser.add_argument("--write_policy", choices=["write_committed", "write_prepared"])
965 parser.add_argument("--stress_cmd")
966 parser.add_argument("--test_tiered_storage", action="store_true")
967 parser.add_argument("--cleanup_cmd")
968
969 all_params = dict(
970 list(default_params.items())
971 + list(blackbox_default_params.items())
972 + list(whitebox_default_params.items())
973 + list(simple_default_params.items())
974 + list(blackbox_simple_default_params.items())
975 + list(whitebox_simple_default_params.items())
976 + list(blob_params.items())
977 + list(ts_params.items())
978 + list(multiops_txn_default_params.items())
979 + list(multiops_wc_txn_params.items())
980 + list(multiops_wp_txn_params.items())
981 + list(best_efforts_recovery_params.items())
982 + list(cf_consistency_params.items())
983 + list(tiered_params.items())
984 + list(txn_params.items())
985 )
986
987 for k, v in all_params.items():
988 parser.add_argument("--" + k, type=type(v() if callable(v) else v))
989 # unknown_args are passed directly to db_stress
990 args, unknown_args = parser.parse_known_args()
991
992 test_tmpdir = os.environ.get(_TEST_DIR_ENV_VAR)
993 if test_tmpdir is not None and not os.path.isdir(test_tmpdir):
994 print(
995 "%s env var is set to a non-existent directory: %s"
996 % (_TEST_DIR_ENV_VAR, test_tmpdir)
997 )
998 sys.exit(1)
999
1000 if args.stress_cmd:
1001 stress_cmd = args.stress_cmd
1002 if args.cleanup_cmd:
1003 cleanup_cmd = args.cleanup_cmd
1004 if args.test_type == "blackbox":
1005 blackbox_crash_main(args, unknown_args)
1006 if args.test_type == "whitebox":
1007 whitebox_crash_main(args, unknown_args)
1008 # Only delete the `expected_values_dir` if test passes
1009 if expected_values_dir is not None:
1010 shutil.rmtree(expected_values_dir)
1011 if multiops_txn_key_spaces_file is not None:
1012 os.remove(multiops_txn_key_spaces_file)
1013
1014
1015 if __name__ == "__main__":
1016 main()