11 # params overwrite priority:
13 # default_params < {blackbox,whitebox}_default_params < args
15 # default_params < {blackbox,whitebox}_default_params <
16 # simple_default_params <
17 # {blackbox,whitebox}_simple_default_params < args
18 # for enable_atomic_flush:
19 # default_params < {blackbox,whitebox}_default_params <
20 # atomic_flush_params < args
22 expected_values_file
= tempfile
.NamedTemporaryFile()
25 "acquire_snapshot_one_in": 10000,
27 "cache_size": 1048576,
28 "checkpoint_one_in": 1000000,
29 "compression_type": "snappy",
30 "compression_max_dict_bytes": lambda: 16384 * random
.randint(0, 1),
31 "compression_zstd_max_train_bytes": lambda: 65536 * random
.randint(0, 1),
32 "clear_column_family_one_in": 0,
33 "compact_files_one_in": 1000000,
34 "compact_range_one_in": 1000000,
37 "destroy_db_initially": 0,
38 "enable_pipelined_write": lambda: random
.randint(0, 1),
39 "expected_values_path": expected_values_file
.name
,
40 "flush_one_in": 1000000,
41 "max_background_compactions": 20,
42 "max_bytes_for_level_base": 10485760,
44 "max_write_buffer_number": 3,
45 "mmap_read": lambda: random
.randint(0, 1),
46 "nooverwritepercent": 1,
49 "progress_reports": 0,
51 "recycle_log_file_num": lambda: random
.randint(0, 1),
53 "snapshot_hold_ops": 100000,
54 "subcompactions": lambda: random
.randint(1, 4),
55 "target_file_size_base": 2097152,
56 "target_file_size_multiplier": 2,
57 "use_direct_reads": lambda: random
.randint(0, 1),
58 "use_direct_io_for_flush_and_compaction": lambda: random
.randint(0, 1),
59 "use_full_merge_v1": lambda: random
.randint(0, 1),
60 "use_merge": lambda: random
.randint(0, 1),
62 "write_buffer_size": 4 * 1024 * 1024,
64 "format_version": lambda: random
.randint(2, 4),
65 "index_block_restart_interval": lambda: random
.choice(range(1, 16)),
68 _TEST_DIR_ENV_VAR
= 'TEST_TMPDIR'
71 def get_dbname(test_name
):
72 test_tmpdir
= os
.environ
.get(_TEST_DIR_ENV_VAR
)
73 if test_tmpdir
is None or test_tmpdir
== "":
74 dbname
= tempfile
.mkdtemp(prefix
='rocksdb_crashtest_' + test_name
)
76 dbname
= test_tmpdir
+ "/rocksdb_crashtest_" + test_name
77 shutil
.rmtree(dbname
, True)
82 def is_direct_io_supported(dbname
):
83 with tempfile
.NamedTemporaryFile(dir=dbname
) as f
:
85 os
.open(f
.name
, os
.O_DIRECT
)
91 blackbox_default_params
= {
92 # total time for this script to test db_stress
94 # time for one db_stress instance to run
96 # since we will be killing anyway, use large value for ops_per_thread
97 "ops_per_thread": 100000000,
98 "set_options_one_in": 10000,
99 "test_batches_snapshots": 1,
102 whitebox_default_params
= {
104 "log2_keys_per_lock": 10,
105 "ops_per_thread": 200000,
106 "random_kill_odd": 888887,
107 "test_batches_snapshots": lambda: random
.randint(0, 1),
110 simple_default_params
= {
111 "allow_concurrent_memtable_write": lambda: random
.randint(0, 1),
112 "column_families": 1,
113 "max_background_compactions": 1,
114 "max_bytes_for_level_base": 67108864,
115 "memtablerep": "skip_list",
118 "target_file_size_base": 16777216,
119 "target_file_size_multiplier": 1,
120 "test_batches_snapshots": 0,
121 "write_buffer_size": 32 * 1024 * 1024,
124 blackbox_simple_default_params
= {
126 "set_options_one_in": 0,
129 whitebox_simple_default_params
= {}
131 atomic_flush_params
= {
134 "test_atomic_flush": 1,
135 # use small value for write_buffer_size so that RocksDB triggers flush
137 "write_buffer_size": 1024 * 1024,
141 def finalize_and_sanitize(src_params
):
142 dest_params
= dict([(k
, v() if callable(v
) else v
)
143 for (k
, v
) in src_params
.items()])
144 if dest_params
.get("compression_type") != "zstd" or \
145 dest_params
.get("compression_max_dict_bytes") == 0:
146 dest_params
["compression_zstd_max_train_bytes"] = 0
147 if dest_params
.get("allow_concurrent_memtable_write", 1) == 1:
148 dest_params
["memtablerep"] = "skip_list"
149 if dest_params
["mmap_read"] == 1 or not is_direct_io_supported(
151 dest_params
["use_direct_io_for_flush_and_compaction"] = 0
152 dest_params
["use_direct_reads"] = 0
153 if dest_params
.get("test_batches_snapshots") == 1:
154 dest_params
["delpercent"] += dest_params
["delrangepercent"]
155 dest_params
["delrangepercent"] = 0
159 def gen_cmd_params(args
):
162 params
.update(default_params
)
163 if args
.test_type
== 'blackbox':
164 params
.update(blackbox_default_params
)
165 if args
.test_type
== 'whitebox':
166 params
.update(whitebox_default_params
)
168 params
.update(simple_default_params
)
169 if args
.test_type
== 'blackbox':
170 params
.update(blackbox_simple_default_params
)
171 if args
.test_type
== 'whitebox':
172 params
.update(whitebox_simple_default_params
)
173 if args
.enable_atomic_flush
:
174 params
.update(atomic_flush_params
)
176 for k
, v
in vars(args
).items():
182 def gen_cmd(params
, unknown_params
):
183 cmd
= ['./db_stress'] + [
184 '--{0}={1}'.format(k
, v
)
185 for k
, v
in finalize_and_sanitize(params
).items()
186 if k
not in set(['test_type', 'simple', 'duration', 'interval',
187 'random_kill_odd', 'enable_atomic_flush'])
188 and v
is not None] + unknown_params
192 # This script runs and kills db_stress multiple times. It checks consistency
193 # in case of unsafe crashes in RocksDB.
194 def blackbox_crash_main(args
, unknown_args
):
195 cmd_params
= gen_cmd_params(args
)
196 dbname
= get_dbname('blackbox')
197 exit_time
= time
.time() + cmd_params
['duration']
199 print("Running blackbox-crash-test with \n"
200 + "interval_between_crash=" + str(cmd_params
['interval']) + "\n"
201 + "total-duration=" + str(cmd_params
['duration']) + "\n")
203 while time
.time() < exit_time
:
204 run_had_errors
= False
205 killtime
= time
.time() + cmd_params
['interval']
209 {'db': dbname
}.items()), unknown_args
)
211 child
= subprocess
.Popen(cmd
, stderr
=subprocess
.PIPE
)
212 print("Running db_stress with pid=%d: %s\n\n"
213 % (child
.pid
, ' '.join(cmd
)))
216 while time
.time() < killtime
:
217 if child
.poll() is not None:
218 print("WARNING: db_stress ended before kill: exitcode=%d\n"
225 if child
.poll() is not None:
226 print("WARNING: db_stress ended before kill: exitcode=%d\n"
230 print("KILLED %d\n" % child
.pid
)
231 time
.sleep(1) # time to stabilize after a kill
234 line
= child
.stderr
.readline().strip()
237 elif not line
.startswith('WARNING'):
238 run_had_errors
= True
239 print('stderr has error message:')
240 print('***' + line
+ '***')
245 time
.sleep(1) # time to stabilize before the next run
247 # we need to clean up after ourselves -- only do this on test success
248 shutil
.rmtree(dbname
, True)
251 # This python script runs db_stress multiple times. Some runs with
252 # kill_random_test that causes rocksdb to crash at various points in code.
253 def whitebox_crash_main(args
, unknown_args
):
254 cmd_params
= gen_cmd_params(args
)
255 dbname
= get_dbname('whitebox')
257 cur_time
= time
.time()
258 exit_time
= cur_time
+ cmd_params
['duration']
259 half_time
= cur_time
+ cmd_params
['duration'] / 2
261 print("Running whitebox-crash-test with \n"
262 + "total-duration=" + str(cmd_params
['duration']) + "\n")
266 kill_random_test
= cmd_params
['random_kill_odd']
269 while time
.time() < exit_time
:
272 # use large ops per thread since we will kill it anyway
273 "ops_per_thread": 100 * cmd_params
['ops_per_thread'],
275 # run with kill_random_test, with three modes.
276 # Mode 0 covers all kill points. Mode 1 covers less kill points but
277 # increases change of triggering them. Mode 2 covers even less
278 # frequent kill points and further increases triggering change.
280 additional_opts
.update({
281 "kill_random_test": kill_random_test
,
284 additional_opts
.update({
285 "kill_random_test": (kill_random_test
/ 10 + 1),
286 "kill_prefix_blacklist": "WritableFileWriter::Append,"
287 + "WritableFileWriter::WriteBuffered",
290 # TODO: May need to adjust random odds if kill_random_test
292 additional_opts
.update({
293 "kill_random_test": (kill_random_test
/ 5000 + 1),
294 "kill_prefix_blacklist": "WritableFileWriter::Append,"
295 "WritableFileWriter::WriteBuffered,"
296 "PosixMmapFile::Allocate,WritableFileWriter::Flush",
298 # Run kill mode 0, 1 and 2 by turn.
299 kill_mode
= (kill_mode
+ 1) % 3
300 elif check_mode
== 1:
301 # normal run with universal compaction mode
303 "kill_random_test": None,
304 "ops_per_thread": cmd_params
['ops_per_thread'],
305 "compaction_style": 1,
307 elif check_mode
== 2:
308 # normal run with FIFO compaction mode
309 # ops_per_thread is divided by 5 because FIFO compaction
310 # style is quite a bit slower on reads with lot of files
312 "kill_random_test": None,
313 "ops_per_thread": cmd_params
['ops_per_thread'] / 5,
314 "compaction_style": 2,
319 "kill_random_test": None,
320 "ops_per_thread": cmd_params
['ops_per_thread'],
323 cmd
= gen_cmd(dict(cmd_params
.items() + additional_opts
.items()
324 + {'db': dbname
}.items()), unknown_args
)
326 print "Running:" + ' '.join(cmd
) + "\n" # noqa: E999 T25377293 Grandfathered in
328 popen
= subprocess
.Popen(cmd
, stdout
=subprocess
.PIPE
,
329 stderr
=subprocess
.STDOUT
)
330 stdoutdata
, stderrdata
= popen
.communicate()
331 retncode
= popen
.returncode
332 msg
= ("check_mode={0}, kill option={1}, exitcode={2}\n".format(
333 check_mode
, additional_opts
['kill_random_test'], retncode
))
338 if additional_opts
['kill_random_test'] is None and (retncode
== 0):
339 # we expect zero retncode if no kill option
341 elif additional_opts
['kill_random_test'] is not None and retncode
< 0:
342 # we expect negative retncode if kill option was given
346 print "TEST FAILED. See kill option and exit code above!!!\n"
349 stdoutdata
= stdoutdata
.lower()
350 errorcount
= (stdoutdata
.count('error') -
351 stdoutdata
.count('got errors 0 times'))
352 print "#times error occurred in output is " + str(errorcount
) + "\n"
355 print "TEST FAILED. Output has 'error'!!!\n"
357 if (stdoutdata
.find('fail') >= 0):
358 print "TEST FAILED. Output has 'fail'!!!\n"
361 # First half of the duration, keep doing kill test. For the next half,
362 # try different modes.
363 if time
.time() > half_time
:
364 # we need to clean up after ourselves -- only do this on test
366 shutil
.rmtree(dbname
, True)
368 cmd_params
.pop('expected_values_path', None)
369 check_mode
= (check_mode
+ 1) % total_check_mode
371 time
.sleep(1) # time to stabilize after a kill
375 parser
= argparse
.ArgumentParser(description
="This script runs and kills \
376 db_stress multiple times")
377 parser
.add_argument("test_type", choices
=["blackbox", "whitebox"])
378 parser
.add_argument("--simple", action
="store_true")
379 parser
.add_argument("--enable_atomic_flush", action
='store_true')
381 all_params
= dict(default_params
.items()
382 + blackbox_default_params
.items()
383 + whitebox_default_params
.items()
384 + simple_default_params
.items()
385 + blackbox_simple_default_params
.items()
386 + whitebox_simple_default_params
.items())
388 for k
, v
in all_params
.items():
389 parser
.add_argument("--" + k
, type=type(v() if callable(v
) else v
))
390 # unknown_args are passed directly to db_stress
391 args
, unknown_args
= parser
.parse_known_args()
393 test_tmpdir
= os
.environ
.get(_TEST_DIR_ENV_VAR
)
394 if test_tmpdir
is not None and not os
.path
.isdir(test_tmpdir
):
395 print('%s env var is set to a non-existent directory: %s' %
396 (_TEST_DIR_ENV_VAR
, test_tmpdir
))
399 if args
.test_type
== 'blackbox':
400 blackbox_crash_main(args
, unknown_args
)
401 if args
.test_type
== 'whitebox':
402 whitebox_crash_main(args
, unknown_args
)
404 if __name__
== '__main__':