]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / rocksdb / java / benchmark / src / main / java / org / rocksdb / benchmark / DbBenchmark.java
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5/**
6 * Copyright (C) 2011 the original author or authors.
7 * See the notice.md file distributed with this work for additional
8 * information regarding copyright ownership.
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22package org.rocksdb.benchmark;
23
24import java.io.IOException;
25import java.lang.Runnable;
26import java.lang.Math;
27import java.io.File;
28import java.lang.reflect.Constructor;
29import java.lang.reflect.InvocationTargetException;
30import java.nio.ByteBuffer;
31import java.nio.file.Files;
32import java.util.Collection;
33import java.util.Date;
34import java.util.EnumMap;
35import java.util.List;
36import java.util.Map;
37import java.util.Random;
38import java.util.concurrent.TimeUnit;
39import java.util.Arrays;
40import java.util.ArrayList;
41import java.util.concurrent.Callable;
42import java.util.concurrent.Executors;
43import java.util.concurrent.ExecutorService;
44import java.util.concurrent.Future;
45import java.util.concurrent.TimeUnit;
46import org.rocksdb.*;
47import org.rocksdb.RocksMemEnv;
48import org.rocksdb.util.SizeUnit;
49
50class Stats {
51 int id_;
52 long start_;
53 long finish_;
54 double seconds_;
55 long done_;
56 long found_;
57 long lastOpTime_;
58 long nextReport_;
59 long bytes_;
60 StringBuilder message_;
61 boolean excludeFromMerge_;
62
63 // TODO(yhchiang): use the following arguments:
64 // (Long)Flag.stats_interval
65 // (Integer)Flag.stats_per_interval
66
67 Stats(int id) {
68 id_ = id;
69 nextReport_ = 100;
70 done_ = 0;
71 bytes_ = 0;
72 seconds_ = 0;
73 start_ = System.nanoTime();
74 lastOpTime_ = start_;
75 finish_ = start_;
76 found_ = 0;
77 message_ = new StringBuilder("");
78 excludeFromMerge_ = false;
79 }
80
81 void merge(final Stats other) {
82 if (other.excludeFromMerge_) {
83 return;
84 }
85
86 done_ += other.done_;
87 found_ += other.found_;
88 bytes_ += other.bytes_;
89 seconds_ += other.seconds_;
90 if (other.start_ < start_) start_ = other.start_;
91 if (other.finish_ > finish_) finish_ = other.finish_;
92
93 // Just keep the messages from one thread
94 if (message_.length() == 0) {
95 message_ = other.message_;
96 }
97 }
98
99 void stop() {
100 finish_ = System.nanoTime();
101 seconds_ = (double) (finish_ - start_) * 1e-9;
102 }
103
104 void addMessage(String msg) {
105 if (message_.length() > 0) {
106 message_.append(" ");
107 }
108 message_.append(msg);
109 }
110
111 void setId(int id) { id_ = id; }
112 void setExcludeFromMerge() { excludeFromMerge_ = true; }
113
114 void finishedSingleOp(int bytes) {
115 done_++;
116 lastOpTime_ = System.nanoTime();
117 bytes_ += bytes;
118 if (done_ >= nextReport_) {
119 if (nextReport_ < 1000) {
120 nextReport_ += 100;
121 } else if (nextReport_ < 5000) {
122 nextReport_ += 500;
123 } else if (nextReport_ < 10000) {
124 nextReport_ += 1000;
125 } else if (nextReport_ < 50000) {
126 nextReport_ += 5000;
127 } else if (nextReport_ < 100000) {
128 nextReport_ += 10000;
129 } else if (nextReport_ < 500000) {
130 nextReport_ += 50000;
131 } else {
132 nextReport_ += 100000;
133 }
134 System.err.printf("... Task %s finished %d ops%30s\r", id_, done_, "");
135 }
136 }
137
138 void report(String name) {
139 // Pretend at least one op was done in case we are running a benchmark
140 // that does not call FinishedSingleOp().
141 if (done_ < 1) done_ = 1;
142
143 StringBuilder extra = new StringBuilder("");
144 if (bytes_ > 0) {
145 // Rate is computed on actual elapsed time, not the sum of per-thread
146 // elapsed times.
147 double elapsed = (finish_ - start_) * 1e-9;
148 extra.append(String.format("%6.1f MB/s", (bytes_ / 1048576.0) / elapsed));
149 }
150 extra.append(message_.toString());
151 double elapsed = (finish_ - start_);
152 double throughput = (double) done_ / (elapsed * 1e-9);
153
154 System.out.format("%-12s : %11.3f micros/op %d ops/sec;%s%s\n",
155 name, (elapsed * 1e-6) / done_,
156 (long) throughput, (extra.length() == 0 ? "" : " "), extra.toString());
157 }
158}
159
160public class DbBenchmark {
161 enum Order {
162 SEQUENTIAL,
163 RANDOM
164 }
165
166 enum DBState {
167 FRESH,
168 EXISTING
169 }
170
171 static {
172 RocksDB.loadLibrary();
173 }
174
175 abstract class BenchmarkTask implements Callable<Stats> {
176 // TODO(yhchiang): use (Integer)Flag.perf_level.
177 public BenchmarkTask(
178 int tid, long randSeed, long numEntries, long keyRange) {
179 tid_ = tid;
180 rand_ = new Random(randSeed + tid * 1000);
181 numEntries_ = numEntries;
182 keyRange_ = keyRange;
183 stats_ = new Stats(tid);
184 }
185
186 @Override public Stats call() throws RocksDBException {
187 stats_.start_ = System.nanoTime();
188 runTask();
189 stats_.finish_ = System.nanoTime();
190 return stats_;
191 }
192
193 abstract protected void runTask() throws RocksDBException;
194
195 protected int tid_;
196 protected Random rand_;
197 protected long numEntries_;
198 protected long keyRange_;
199 protected Stats stats_;
200
201 protected void getFixedKey(byte[] key, long sn) {
202 generateKeyFromLong(key, sn);
203 }
204
205 protected void getRandomKey(byte[] key, long range) {
206 generateKeyFromLong(key, Math.abs(rand_.nextLong() % range));
207 }
208 }
209
210 abstract class WriteTask extends BenchmarkTask {
211 public WriteTask(
212 int tid, long randSeed, long numEntries, long keyRange,
213 WriteOptions writeOpt, long entriesPerBatch) {
214 super(tid, randSeed, numEntries, keyRange);
215 writeOpt_ = writeOpt;
216 entriesPerBatch_ = entriesPerBatch;
217 maxWritesPerSecond_ = -1;
218 }
219
220 public WriteTask(
221 int tid, long randSeed, long numEntries, long keyRange,
222 WriteOptions writeOpt, long entriesPerBatch, long maxWritesPerSecond) {
223 super(tid, randSeed, numEntries, keyRange);
224 writeOpt_ = writeOpt;
225 entriesPerBatch_ = entriesPerBatch;
226 maxWritesPerSecond_ = maxWritesPerSecond;
227 }
228
229 @Override public void runTask() throws RocksDBException {
230 if (numEntries_ != DbBenchmark.this.num_) {
231 stats_.message_.append(String.format(" (%d ops)", numEntries_));
232 }
233 byte[] key = new byte[keySize_];
234 byte[] value = new byte[valueSize_];
235
236 try {
237 if (entriesPerBatch_ == 1) {
238 for (long i = 0; i < numEntries_; ++i) {
239 getKey(key, i, keyRange_);
240 DbBenchmark.this.gen_.generate(value);
241 db_.put(writeOpt_, key, value);
242 stats_.finishedSingleOp(keySize_ + valueSize_);
243 writeRateControl(i);
244 if (isFinished()) {
245 return;
246 }
247 }
248 } else {
249 for (long i = 0; i < numEntries_; i += entriesPerBatch_) {
250 WriteBatch batch = new WriteBatch();
251 for (long j = 0; j < entriesPerBatch_; j++) {
252 getKey(key, i + j, keyRange_);
253 DbBenchmark.this.gen_.generate(value);
254 batch.put(key, value);
255 stats_.finishedSingleOp(keySize_ + valueSize_);
256 }
257 db_.write(writeOpt_, batch);
258 batch.dispose();
259 writeRateControl(i);
260 if (isFinished()) {
261 return;
262 }
263 }
264 }
265 } catch (InterruptedException e) {
266 // thread has been terminated.
267 }
268 }
269
270 protected void writeRateControl(long writeCount)
271 throws InterruptedException {
272 if (maxWritesPerSecond_ <= 0) return;
273 long minInterval =
274 writeCount * TimeUnit.SECONDS.toNanos(1) / maxWritesPerSecond_;
275 long interval = System.nanoTime() - stats_.start_;
276 if (minInterval - interval > TimeUnit.MILLISECONDS.toNanos(1)) {
277 TimeUnit.NANOSECONDS.sleep(minInterval - interval);
278 }
279 }
280
281 abstract protected void getKey(byte[] key, long id, long range);
282 protected WriteOptions writeOpt_;
283 protected long entriesPerBatch_;
284 protected long maxWritesPerSecond_;
285 }
286
287 class WriteSequentialTask extends WriteTask {
288 public WriteSequentialTask(
289 int tid, long randSeed, long numEntries, long keyRange,
290 WriteOptions writeOpt, long entriesPerBatch) {
291 super(tid, randSeed, numEntries, keyRange,
292 writeOpt, entriesPerBatch);
293 }
294 public WriteSequentialTask(
295 int tid, long randSeed, long numEntries, long keyRange,
296 WriteOptions writeOpt, long entriesPerBatch,
297 long maxWritesPerSecond) {
298 super(tid, randSeed, numEntries, keyRange,
299 writeOpt, entriesPerBatch,
300 maxWritesPerSecond);
301 }
302 @Override protected void getKey(byte[] key, long id, long range) {
303 getFixedKey(key, id);
304 }
305 }
306
307 class WriteRandomTask extends WriteTask {
308 public WriteRandomTask(
309 int tid, long randSeed, long numEntries, long keyRange,
310 WriteOptions writeOpt, long entriesPerBatch) {
311 super(tid, randSeed, numEntries, keyRange,
312 writeOpt, entriesPerBatch);
313 }
314 public WriteRandomTask(
315 int tid, long randSeed, long numEntries, long keyRange,
316 WriteOptions writeOpt, long entriesPerBatch,
317 long maxWritesPerSecond) {
318 super(tid, randSeed, numEntries, keyRange,
319 writeOpt, entriesPerBatch,
320 maxWritesPerSecond);
321 }
322 @Override protected void getKey(byte[] key, long id, long range) {
323 getRandomKey(key, range);
324 }
325 }
326
327 class WriteUniqueRandomTask extends WriteTask {
328 static final int MAX_BUFFER_SIZE = 10000000;
329 public WriteUniqueRandomTask(
330 int tid, long randSeed, long numEntries, long keyRange,
331 WriteOptions writeOpt, long entriesPerBatch) {
332 super(tid, randSeed, numEntries, keyRange,
333 writeOpt, entriesPerBatch);
334 initRandomKeySequence();
335 }
336 public WriteUniqueRandomTask(
337 int tid, long randSeed, long numEntries, long keyRange,
338 WriteOptions writeOpt, long entriesPerBatch,
339 long maxWritesPerSecond) {
340 super(tid, randSeed, numEntries, keyRange,
341 writeOpt, entriesPerBatch,
342 maxWritesPerSecond);
343 initRandomKeySequence();
344 }
345 @Override protected void getKey(byte[] key, long id, long range) {
346 generateKeyFromLong(key, nextUniqueRandom());
347 }
348
349 protected void initRandomKeySequence() {
350 bufferSize_ = MAX_BUFFER_SIZE;
351 if (bufferSize_ > keyRange_) {
352 bufferSize_ = (int) keyRange_;
353 }
354 currentKeyCount_ = bufferSize_;
355 keyBuffer_ = new long[MAX_BUFFER_SIZE];
356 for (int k = 0; k < bufferSize_; ++k) {
357 keyBuffer_[k] = k;
358 }
359 }
360
361 /**
362 * Semi-randomly return the next unique key. It is guaranteed to be
363 * fully random if keyRange_ <= MAX_BUFFER_SIZE.
364 */
365 long nextUniqueRandom() {
366 if (bufferSize_ == 0) {
367 System.err.println("bufferSize_ == 0.");
368 return 0;
369 }
370 int r = rand_.nextInt(bufferSize_);
371 // randomly pick one from the keyBuffer
372 long randKey = keyBuffer_[r];
373 if (currentKeyCount_ < keyRange_) {
374 // if we have not yet inserted all keys, insert next new key to [r].
375 keyBuffer_[r] = currentKeyCount_++;
376 } else {
377 // move the last element to [r] and decrease the size by 1.
378 keyBuffer_[r] = keyBuffer_[--bufferSize_];
379 }
380 return randKey;
381 }
382
383 int bufferSize_;
384 long currentKeyCount_;
385 long[] keyBuffer_;
386 }
387
388 class ReadRandomTask extends BenchmarkTask {
389 public ReadRandomTask(
390 int tid, long randSeed, long numEntries, long keyRange) {
391 super(tid, randSeed, numEntries, keyRange);
392 }
393 @Override public void runTask() throws RocksDBException {
394 byte[] key = new byte[keySize_];
395 byte[] value = new byte[valueSize_];
396 for (long i = 0; i < numEntries_; i++) {
397 getRandomKey(key, keyRange_);
398 int len = db_.get(key, value);
399 if (len != RocksDB.NOT_FOUND) {
400 stats_.found_++;
401 stats_.finishedSingleOp(keySize_ + valueSize_);
402 } else {
403 stats_.finishedSingleOp(keySize_);
404 }
405 if (isFinished()) {
406 return;
407 }
408 }
409 }
410 }
411
412 class ReadSequentialTask extends BenchmarkTask {
413 public ReadSequentialTask(
414 int tid, long randSeed, long numEntries, long keyRange) {
415 super(tid, randSeed, numEntries, keyRange);
416 }
417 @Override public void runTask() throws RocksDBException {
418 RocksIterator iter = db_.newIterator();
419 long i;
420 for (iter.seekToFirst(), i = 0;
421 iter.isValid() && i < numEntries_;
422 iter.next(), ++i) {
423 stats_.found_++;
424 stats_.finishedSingleOp(iter.key().length + iter.value().length);
425 if (isFinished()) {
426 iter.dispose();
427 return;
428 }
429 }
430 iter.dispose();
431 }
432 }
433
434 public DbBenchmark(Map<Flag, Object> flags) throws Exception {
435 benchmarks_ = (List<String>) flags.get(Flag.benchmarks);
436 num_ = (Integer) flags.get(Flag.num);
437 threadNum_ = (Integer) flags.get(Flag.threads);
438 reads_ = (Integer) (flags.get(Flag.reads) == null ?
439 flags.get(Flag.num) : flags.get(Flag.reads));
440 keySize_ = (Integer) flags.get(Flag.key_size);
441 valueSize_ = (Integer) flags.get(Flag.value_size);
442 compressionRatio_ = (Double) flags.get(Flag.compression_ratio);
443 useExisting_ = (Boolean) flags.get(Flag.use_existing_db);
444 randSeed_ = (Long) flags.get(Flag.seed);
445 databaseDir_ = (String) flags.get(Flag.db);
446 writesPerSeconds_ = (Integer) flags.get(Flag.writes_per_second);
447 memtable_ = (String) flags.get(Flag.memtablerep);
448 maxWriteBufferNumber_ = (Integer) flags.get(Flag.max_write_buffer_number);
449 prefixSize_ = (Integer) flags.get(Flag.prefix_size);
450 keysPerPrefix_ = (Integer) flags.get(Flag.keys_per_prefix);
451 hashBucketCount_ = (Long) flags.get(Flag.hash_bucket_count);
452 usePlainTable_ = (Boolean) flags.get(Flag.use_plain_table);
453 useMemenv_ = (Boolean) flags.get(Flag.use_mem_env);
454 flags_ = flags;
455 finishLock_ = new Object();
456 // options.setPrefixSize((Integer)flags_.get(Flag.prefix_size));
457 // options.setKeysPerPrefix((Long)flags_.get(Flag.keys_per_prefix));
458 compressionType_ = (String) flags.get(Flag.compression_type);
459 compression_ = CompressionType.NO_COMPRESSION;
460 try {
461 if (compressionType_!=null) {
462 final CompressionType compressionType =
463 CompressionType.getCompressionType(compressionType_);
464 if (compressionType != null &&
465 compressionType != CompressionType.NO_COMPRESSION) {
466 System.loadLibrary(compressionType.getLibraryName());
467 }
468
469 }
470 } catch (UnsatisfiedLinkError e) {
471 System.err.format("Unable to load %s library:%s%n" +
472 "No compression is used.%n",
473 compressionType_, e.toString());
474 compressionType_ = "none";
475 }
476 gen_ = new RandomGenerator(randSeed_, compressionRatio_);
477 }
478
479 private void prepareReadOptions(ReadOptions options) {
480 options.setVerifyChecksums((Boolean)flags_.get(Flag.verify_checksum));
481 options.setTailing((Boolean)flags_.get(Flag.use_tailing_iterator));
482 }
483
484 private void prepareWriteOptions(WriteOptions options) {
485 options.setSync((Boolean)flags_.get(Flag.sync));
486 options.setDisableWAL((Boolean)flags_.get(Flag.disable_wal));
487 }
488
489 private void prepareOptions(Options options) throws RocksDBException {
490 if (!useExisting_) {
491 options.setCreateIfMissing(true);
492 } else {
493 options.setCreateIfMissing(false);
494 }
495 if (useMemenv_) {
494da23a 496 options.setEnv(new RocksMemEnv(Env.getDefault()));
7c673cae
FG
497 }
498 switch (memtable_) {
499 case "skip_list":
500 options.setMemTableConfig(new SkipListMemTableConfig());
501 break;
502 case "vector":
503 options.setMemTableConfig(new VectorMemTableConfig());
504 break;
505 case "hash_linkedlist":
506 options.setMemTableConfig(
507 new HashLinkedListMemTableConfig()
508 .setBucketCount(hashBucketCount_));
509 options.useFixedLengthPrefixExtractor(prefixSize_);
510 break;
511 case "hash_skiplist":
512 case "prefix_hash":
513 options.setMemTableConfig(
514 new HashSkipListMemTableConfig()
515 .setBucketCount(hashBucketCount_));
516 options.useFixedLengthPrefixExtractor(prefixSize_);
517 break;
518 default:
519 System.err.format(
520 "unable to detect the specified memtable, " +
521 "use the default memtable factory %s%n",
522 options.memTableFactoryName());
523 break;
524 }
525 if (usePlainTable_) {
526 options.setTableFormatConfig(
527 new PlainTableConfig().setKeySize(keySize_));
528 } else {
529 BlockBasedTableConfig table_options = new BlockBasedTableConfig();
530 table_options.setBlockSize((Long)flags_.get(Flag.block_size))
531 .setBlockCacheSize((Long)flags_.get(Flag.cache_size))
532 .setCacheNumShardBits(
533 (Integer)flags_.get(Flag.cache_numshardbits));
534 options.setTableFormatConfig(table_options);
535 }
536 options.setWriteBufferSize(
537 (Long)flags_.get(Flag.write_buffer_size));
538 options.setMaxWriteBufferNumber(
539 (Integer)flags_.get(Flag.max_write_buffer_number));
540 options.setMaxBackgroundCompactions(
541 (Integer)flags_.get(Flag.max_background_compactions));
542 options.getEnv().setBackgroundThreads(
543 (Integer)flags_.get(Flag.max_background_compactions));
544 options.setMaxBackgroundFlushes(
545 (Integer)flags_.get(Flag.max_background_flushes));
11fdf7f2 546 options.setMaxBackgroundJobs((Integer) flags_.get(Flag.max_background_jobs));
7c673cae
FG
547 options.setMaxOpenFiles(
548 (Integer)flags_.get(Flag.open_files));
549 options.setUseFsync(
550 (Boolean)flags_.get(Flag.use_fsync));
551 options.setWalDir(
552 (String)flags_.get(Flag.wal_dir));
553 options.setDeleteObsoleteFilesPeriodMicros(
554 (Integer)flags_.get(Flag.delete_obsolete_files_period_micros));
555 options.setTableCacheNumshardbits(
556 (Integer)flags_.get(Flag.table_cache_numshardbits));
557 options.setAllowMmapReads(
558 (Boolean)flags_.get(Flag.mmap_read));
559 options.setAllowMmapWrites(
560 (Boolean)flags_.get(Flag.mmap_write));
561 options.setAdviseRandomOnOpen(
562 (Boolean)flags_.get(Flag.advise_random_on_open));
563 options.setUseAdaptiveMutex(
564 (Boolean)flags_.get(Flag.use_adaptive_mutex));
565 options.setBytesPerSync(
566 (Long)flags_.get(Flag.bytes_per_sync));
567 options.setBloomLocality(
568 (Integer)flags_.get(Flag.bloom_locality));
569 options.setMinWriteBufferNumberToMerge(
570 (Integer)flags_.get(Flag.min_write_buffer_number_to_merge));
571 options.setMemtablePrefixBloomSizeRatio((Double) flags_.get(Flag.memtable_bloom_size_ratio));
1e59de90 572 options.setMemtableWholeKeyFiltering((Boolean) flags_.get(Flag.memtable_whole_key_filtering));
7c673cae
FG
573 options.setNumLevels(
574 (Integer)flags_.get(Flag.num_levels));
575 options.setTargetFileSizeBase(
576 (Integer)flags_.get(Flag.target_file_size_base));
577 options.setTargetFileSizeMultiplier((Integer)flags_.get(Flag.target_file_size_multiplier));
578 options.setMaxBytesForLevelBase(
579 (Integer)flags_.get(Flag.max_bytes_for_level_base));
580 options.setMaxBytesForLevelMultiplier((Double) flags_.get(Flag.max_bytes_for_level_multiplier));
581 options.setLevelZeroStopWritesTrigger(
582 (Integer)flags_.get(Flag.level0_stop_writes_trigger));
583 options.setLevelZeroSlowdownWritesTrigger(
584 (Integer)flags_.get(Flag.level0_slowdown_writes_trigger));
585 options.setLevelZeroFileNumCompactionTrigger(
586 (Integer)flags_.get(Flag.level0_file_num_compaction_trigger));
587 options.setMaxCompactionBytes(
588 (Long) flags_.get(Flag.max_compaction_bytes));
589 options.setDisableAutoCompactions(
590 (Boolean)flags_.get(Flag.disable_auto_compactions));
591 options.setMaxSuccessiveMerges(
592 (Integer)flags_.get(Flag.max_successive_merges));
593 options.setWalTtlSeconds((Long)flags_.get(Flag.wal_ttl_seconds));
594 options.setWalSizeLimitMB((Long)flags_.get(Flag.wal_size_limit_MB));
595 if(flags_.get(Flag.java_comparator) != null) {
596 options.setComparator(
597 (AbstractComparator)flags_.get(Flag.java_comparator));
598 }
599
600 /* TODO(yhchiang): enable the following parameters
601 options.setCompressionType((String)flags_.get(Flag.compression_type));
602 options.setCompressionLevel((Integer)flags_.get(Flag.compression_level));
603 options.setMinLevelToCompress((Integer)flags_.get(Flag.min_level_to_compress));
7c673cae
FG
604 options.setStatistics((Boolean)flags_.get(Flag.statistics));
605 options.setUniversalSizeRatio(
606 (Integer)flags_.get(Flag.universal_size_ratio));
607 options.setUniversalMinMergeWidth(
608 (Integer)flags_.get(Flag.universal_min_merge_width));
609 options.setUniversalMaxMergeWidth(
610 (Integer)flags_.get(Flag.universal_max_merge_width));
611 options.setUniversalMaxSizeAmplificationPercent(
612 (Integer)flags_.get(Flag.universal_max_size_amplification_percent));
613 options.setUniversalCompressionSizePercent(
614 (Integer)flags_.get(Flag.universal_compression_size_percent));
615 // TODO(yhchiang): add RocksDB.openForReadOnly() to enable Flag.readonly
616 // TODO(yhchiang): enable Flag.merge_operator by switch
617 options.setAccessHintOnCompactionStart(
618 (String)flags_.get(Flag.compaction_fadvice));
619 // available values of fadvice are "NONE", "NORMAL", "SEQUENTIAL", "WILLNEED" for fadvice
620 */
621 }
622
623 private void run() throws RocksDBException {
624 if (!useExisting_) {
625 destroyDb();
626 }
627 Options options = new Options();
628 prepareOptions(options);
629 open(options);
630
631 printHeader(options);
632
633 for (String benchmark : benchmarks_) {
634 List<Callable<Stats>> tasks = new ArrayList<Callable<Stats>>();
635 List<Callable<Stats>> bgTasks = new ArrayList<Callable<Stats>>();
636 WriteOptions writeOpt = new WriteOptions();
637 prepareWriteOptions(writeOpt);
638 ReadOptions readOpt = new ReadOptions();
639 prepareReadOptions(readOpt);
640 int currentTaskId = 0;
641 boolean known = true;
642
643 switch (benchmark) {
644 case "fillseq":
645 tasks.add(new WriteSequentialTask(
646 currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
647 break;
648 case "fillbatch":
f67539c2
TL
649 tasks.add(
650 new WriteSequentialTask(currentTaskId++, randSeed_, num_, num_, writeOpt, 1000));
7c673cae
FG
651 break;
652 case "fillrandom":
653 tasks.add(new WriteRandomTask(
654 currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
655 break;
656 case "filluniquerandom":
657 tasks.add(new WriteUniqueRandomTask(
658 currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
659 break;
660 case "fillsync":
661 writeOpt.setSync(true);
662 tasks.add(new WriteRandomTask(
663 currentTaskId++, randSeed_, num_ / 1000, num_ / 1000,
664 writeOpt, 1));
665 break;
666 case "readseq":
667 for (int t = 0; t < threadNum_; ++t) {
668 tasks.add(new ReadSequentialTask(
669 currentTaskId++, randSeed_, reads_ / threadNum_, num_));
670 }
671 break;
672 case "readrandom":
673 for (int t = 0; t < threadNum_; ++t) {
674 tasks.add(new ReadRandomTask(
675 currentTaskId++, randSeed_, reads_ / threadNum_, num_));
676 }
677 break;
678 case "readwhilewriting":
679 WriteTask writeTask = new WriteRandomTask(
680 -1, randSeed_, Long.MAX_VALUE, num_, writeOpt, 1, writesPerSeconds_);
681 writeTask.stats_.setExcludeFromMerge();
682 bgTasks.add(writeTask);
683 for (int t = 0; t < threadNum_; ++t) {
684 tasks.add(new ReadRandomTask(
685 currentTaskId++, randSeed_, reads_ / threadNum_, num_));
686 }
687 break;
688 case "readhot":
689 for (int t = 0; t < threadNum_; ++t) {
690 tasks.add(new ReadRandomTask(
691 currentTaskId++, randSeed_, reads_ / threadNum_, num_ / 100));
692 }
693 break;
694 case "delete":
695 destroyDb();
696 open(options);
697 break;
698 default:
699 known = false;
700 System.err.println("Unknown benchmark: " + benchmark);
701 break;
702 }
703 if (known) {
704 ExecutorService executor = Executors.newCachedThreadPool();
705 ExecutorService bgExecutor = Executors.newCachedThreadPool();
706 try {
707 // measure only the main executor time
708 List<Future<Stats>> bgResults = new ArrayList<Future<Stats>>();
709 for (Callable bgTask : bgTasks) {
710 bgResults.add(bgExecutor.submit(bgTask));
711 }
712 start();
713 List<Future<Stats>> results = executor.invokeAll(tasks);
714 executor.shutdown();
715 boolean finished = executor.awaitTermination(10, TimeUnit.SECONDS);
716 if (!finished) {
717 System.out.format(
718 "Benchmark %s was not finished before timeout.",
719 benchmark);
720 executor.shutdownNow();
721 }
722 setFinished(true);
723 bgExecutor.shutdown();
724 finished = bgExecutor.awaitTermination(10, TimeUnit.SECONDS);
725 if (!finished) {
726 System.out.format(
727 "Benchmark %s was not finished before timeout.",
728 benchmark);
729 bgExecutor.shutdownNow();
730 }
731
732 stop(benchmark, results, currentTaskId);
733 } catch (InterruptedException e) {
734 System.err.println(e);
735 }
736 }
737 writeOpt.dispose();
738 readOpt.dispose();
739 }
740 options.dispose();
741 db_.close();
742 }
743
744 private void printHeader(Options options) {
745 int kKeySize = 16;
746 System.out.printf("Keys: %d bytes each\n", kKeySize);
747 System.out.printf("Values: %d bytes each (%d bytes after compression)\n",
748 valueSize_,
749 (int) (valueSize_ * compressionRatio_ + 0.5));
750 System.out.printf("Entries: %d\n", num_);
751 System.out.printf("RawSize: %.1f MB (estimated)\n",
752 ((double)(kKeySize + valueSize_) * num_) / SizeUnit.MB);
753 System.out.printf("FileSize: %.1f MB (estimated)\n",
754 (((kKeySize + valueSize_ * compressionRatio_) * num_) / SizeUnit.MB));
755 System.out.format("Memtable Factory: %s%n", options.memTableFactoryName());
756 System.out.format("Prefix: %d bytes%n", prefixSize_);
757 System.out.format("Compression: %s%n", compressionType_);
758 printWarnings();
759 System.out.printf("------------------------------------------------\n");
760 }
761
762 void printWarnings() {
763 boolean assertsEnabled = false;
764 assert assertsEnabled = true; // Intentional side effect!!!
765 if (assertsEnabled) {
766 System.out.printf(
767 "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
768 }
769 }
770
771 private void open(Options options) throws RocksDBException {
772 System.out.println("Using database directory: " + databaseDir_);
773 db_ = RocksDB.open(options, databaseDir_);
774 }
775
776 private void start() {
777 setFinished(false);
778 startTime_ = System.nanoTime();
779 }
780
781 private void stop(
782 String benchmark, List<Future<Stats>> results, int concurrentThreads) {
783 long endTime = System.nanoTime();
784 double elapsedSeconds =
785 1.0d * (endTime - startTime_) / TimeUnit.SECONDS.toNanos(1);
786
787 Stats stats = new Stats(-1);
788 int taskFinishedCount = 0;
789 for (Future<Stats> result : results) {
790 if (result.isDone()) {
791 try {
792 Stats taskStats = result.get(3, TimeUnit.SECONDS);
793 if (!result.isCancelled()) {
794 taskFinishedCount++;
795 }
796 stats.merge(taskStats);
797 } catch (Exception e) {
798 // then it's not successful, the output will indicate this
799 }
800 }
801 }
802 String extra = "";
803 if (benchmark.indexOf("read") >= 0) {
804 extra = String.format(" %d / %d found; ", stats.found_, stats.done_);
805 } else {
806 extra = String.format(" %d ops done; ", stats.done_);
807 }
808
809 System.out.printf(
810 "%-16s : %11.5f micros/op; %6.1f MB/s;%s %d / %d task(s) finished.\n",
811 benchmark, elapsedSeconds / stats.done_ * 1e6,
812 (stats.bytes_ / 1048576.0) / elapsedSeconds, extra,
813 taskFinishedCount, concurrentThreads);
814 }
815
816 public void generateKeyFromLong(byte[] slice, long n) {
817 assert(n >= 0);
818 int startPos = 0;
819
820 if (keysPerPrefix_ > 0) {
821 long numPrefix = (num_ + keysPerPrefix_ - 1) / keysPerPrefix_;
822 long prefix = n % numPrefix;
823 int bytesToFill = Math.min(prefixSize_, 8);
824 for (int i = 0; i < bytesToFill; ++i) {
825 slice[i] = (byte) (prefix % 256);
826 prefix /= 256;
827 }
828 for (int i = 8; i < bytesToFill; ++i) {
829 slice[i] = '0';
830 }
831 startPos = bytesToFill;
832 }
833
834 for (int i = slice.length - 1; i >= startPos; --i) {
835 slice[i] = (byte) ('0' + (n % 10));
836 n /= 10;
837 }
838 }
839
840 private void destroyDb() {
841 if (db_ != null) {
842 db_.close();
843 }
844 // TODO(yhchiang): develop our own FileUtil
845 // FileUtil.deleteDir(databaseDir_);
846 }
847
848 private void printStats() {
849 }
850
851 static void printHelp() {
852 System.out.println("usage:");
853 for (Flag flag : Flag.values()) {
854 System.out.format(" --%s%n\t%s%n",
855 flag.name(),
856 flag.desc());
857 if (flag.getDefaultValue() != null) {
858 System.out.format("\tDEFAULT: %s%n",
859 flag.getDefaultValue().toString());
860 }
861 }
862 }
863
864 public static void main(String[] args) throws Exception {
865 Map<Flag, Object> flags = new EnumMap<Flag, Object>(Flag.class);
866 for (Flag flag : Flag.values()) {
867 if (flag.getDefaultValue() != null) {
868 flags.put(flag, flag.getDefaultValue());
869 }
870 }
871 for (String arg : args) {
872 boolean valid = false;
873 if (arg.equals("--help") || arg.equals("-h")) {
874 printHelp();
875 System.exit(0);
876 }
877 if (arg.startsWith("--")) {
878 try {
879 String[] parts = arg.substring(2).split("=");
880 if (parts.length >= 1) {
881 Flag key = Flag.valueOf(parts[0]);
882 if (key != null) {
883 Object value = null;
884 if (parts.length >= 2) {
885 value = key.parseValue(parts[1]);
886 }
887 flags.put(key, value);
888 valid = true;
889 }
890 }
891 }
892 catch (Exception e) {
893 }
894 }
895 if (!valid) {
896 System.err.println("Invalid argument " + arg);
897 System.exit(1);
898 }
899 }
900 new DbBenchmark(flags).run();
901 }
902
903 private enum Flag {
f67539c2
TL
904 benchmarks(Arrays.asList("fillseq", "readrandom", "fillrandom"),
905 "Comma-separated list of operations to run in the specified order\n"
906 + "\tActual benchmarks:\n"
907 + "\t\tfillseq -- write N values in sequential key order in async mode.\n"
908 + "\t\tfillrandom -- write N values in random key order in async mode.\n"
909 + "\t\tfillbatch -- write N/1000 batch where each batch has 1000 values\n"
910 + "\t\t in sequential key order in sync mode.\n"
911 + "\t\tfillsync -- write N/100 values in random key order in sync mode.\n"
912 + "\t\tfill100K -- write N/1000 100K values in random order in async mode.\n"
913 + "\t\treadseq -- read N times sequentially.\n"
914 + "\t\treadrandom -- read N times in random order.\n"
915 + "\t\treadhot -- read N times in random order from 1% section of DB.\n"
916 + "\t\treadwhilewriting -- measure the read performance of multiple readers\n"
917 + "\t\t with a bg single writer. The write rate of the bg\n"
918 + "\t\t is capped by --writes_per_second.\n"
919 + "\tMeta Operations:\n"
920 + "\t\tdelete -- delete DB") {
7c673cae
FG
921 @Override public Object parseValue(String value) {
922 return new ArrayList<String>(Arrays.asList(value.split(",")));
923 }
924 },
925 compression_ratio(0.5d,
926 "Arrange to generate values that shrink to this fraction of\n" +
927 "\ttheir original size after compression.") {
928 @Override public Object parseValue(String value) {
929 return Double.parseDouble(value);
930 }
931 },
932 use_existing_db(false,
933 "If true, do not destroy the existing database. If you set this\n" +
934 "\tflag and also specify a benchmark that wants a fresh database,\n" +
935 "\tthat benchmark will fail.") {
936 @Override public Object parseValue(String value) {
937 return parseBoolean(value);
938 }
939 },
940 num(1000000,
941 "Number of key/values to place in database.") {
942 @Override public Object parseValue(String value) {
943 return Integer.parseInt(value);
944 }
945 },
946 threads(1,
947 "Number of concurrent threads to run.") {
948 @Override public Object parseValue(String value) {
949 return Integer.parseInt(value);
950 }
951 },
952 reads(null,
953 "Number of read operations to do. If negative, do --nums reads.") {
954 @Override public Object parseValue(String value) {
955 return Integer.parseInt(value);
956 }
957 },
958 key_size(16,
959 "The size of each key in bytes.") {
960 @Override public Object parseValue(String value) {
961 return Integer.parseInt(value);
962 }
963 },
964 value_size(100,
965 "The size of each value in bytes.") {
966 @Override public Object parseValue(String value) {
967 return Integer.parseInt(value);
968 }
969 },
970 write_buffer_size(4L * SizeUnit.MB,
971 "Number of bytes to buffer in memtable before compacting\n" +
972 "\t(initialized to default value by 'main'.)") {
973 @Override public Object parseValue(String value) {
974 return Long.parseLong(value);
975 }
976 },
977 max_write_buffer_number(2,
978 "The number of in-memory memtables. Each memtable is of size\n" +
979 "\twrite_buffer_size.") {
980 @Override public Object parseValue(String value) {
981 return Integer.parseInt(value);
982 }
983 },
984 prefix_size(0, "Controls the prefix size for HashSkipList, HashLinkedList,\n" +
985 "\tand plain table.") {
986 @Override public Object parseValue(String value) {
987 return Integer.parseInt(value);
988 }
989 },
990 keys_per_prefix(0, "Controls the average number of keys generated\n" +
991 "\tper prefix, 0 means no special handling of the prefix,\n" +
992 "\ti.e. use the prefix comes with the generated random number.") {
993 @Override public Object parseValue(String value) {
994 return Integer.parseInt(value);
995 }
996 },
997 memtablerep("skip_list",
998 "The memtable format. Available options are\n" +
999 "\tskip_list,\n" +
1000 "\tvector,\n" +
1001 "\thash_linkedlist,\n" +
1002 "\thash_skiplist (prefix_hash.)") {
1003 @Override public Object parseValue(String value) {
1004 return value;
1005 }
1006 },
1007 hash_bucket_count(SizeUnit.MB,
1008 "The number of hash buckets used in the hash-bucket-based\n" +
1009 "\tmemtables. Memtables that currently support this argument are\n" +
1010 "\thash_linkedlist and hash_skiplist.") {
1011 @Override public Object parseValue(String value) {
1012 return Long.parseLong(value);
1013 }
1014 },
1015 writes_per_second(10000,
1016 "The write-rate of the background writer used in the\n" +
1017 "\t`readwhilewriting` benchmark. Non-positive number indicates\n" +
1018 "\tusing an unbounded write-rate in `readwhilewriting` benchmark.") {
1019 @Override public Object parseValue(String value) {
1020 return Integer.parseInt(value);
1021 }
1022 },
1023 use_plain_table(false,
1024 "Use plain-table sst format.") {
1025 @Override public Object parseValue(String value) {
1026 return parseBoolean(value);
1027 }
1028 },
1029 cache_size(-1L,
1030 "Number of bytes to use as a cache of uncompressed data.\n" +
1031 "\tNegative means use default settings.") {
1032 @Override public Object parseValue(String value) {
1033 return Long.parseLong(value);
1034 }
1035 },
1036 seed(0L,
1037 "Seed base for random number generators.") {
1038 @Override public Object parseValue(String value) {
1039 return Long.parseLong(value);
1040 }
1041 },
1042 num_levels(7,
1043 "The total number of levels.") {
1044 @Override public Object parseValue(String value) {
1045 return Integer.parseInt(value);
1046 }
1047 },
1048 numdistinct(1000L,
1049 "Number of distinct keys to use. Used in RandomWithVerify to\n" +
1050 "\tread/write on fewer keys so that gets are more likely to find the\n" +
1051 "\tkey and puts are more likely to update the same key.") {
1052 @Override public Object parseValue(String value) {
1053 return Long.parseLong(value);
1054 }
1055 },
1056 merge_keys(-1L,
1057 "Number of distinct keys to use for MergeRandom and\n" +
1058 "\tReadRandomMergeRandom.\n" +
1059 "\tIf negative, there will be FLAGS_num keys.") {
1060 @Override public Object parseValue(String value) {
1061 return Long.parseLong(value);
1062 }
1063 },
1064 bloom_locality(0,"Control bloom filter probes locality.") {
1065 @Override public Object parseValue(String value) {
1066 return Integer.parseInt(value);
1067 }
1068 },
1069 duration(0,"Time in seconds for the random-ops tests to run.\n" +
1070 "\tWhen 0 then num & reads determine the test duration.") {
1071 @Override public Object parseValue(String value) {
1072 return Integer.parseInt(value);
1073 }
1074 },
1075 num_multi_db(0,
1076 "Number of DBs used in the benchmark. 0 means single DB.") {
1077 @Override public Object parseValue(String value) {
1078 return Integer.parseInt(value);
1079 }
1080 },
1081 histogram(false,"Print histogram of operation timings.") {
1082 @Override public Object parseValue(String value) {
1083 return parseBoolean(value);
1084 }
1085 },
1086 min_write_buffer_number_to_merge(
1087 defaultOptions_.minWriteBufferNumberToMerge(),
1088 "The minimum number of write buffers that will be merged together\n" +
1089 "\tbefore writing to storage. This is cheap because it is an\n" +
1090 "\tin-memory merge. If this feature is not enabled, then all these\n" +
1091 "\twrite buffers are flushed to L0 as separate files and this\n" +
1092 "\tincreases read amplification because a get request has to check\n" +
1093 "\tin all of these files. Also, an in-memory merge may result in\n" +
1094 "\twriting less data to storage if there are duplicate records\n" +
1095 "\tin each of these individual write buffers.") {
1096 @Override public Object parseValue(String value) {
1097 return Integer.parseInt(value);
1098 }
1099 },
1100 max_background_compactions(
1101 defaultOptions_.maxBackgroundCompactions(),
1102 "The maximum number of concurrent background compactions\n" +
1103 "\tthat can occur in parallel.") {
1104 @Override public Object parseValue(String value) {
1105 return Integer.parseInt(value);
1106 }
1107 },
1108 max_background_flushes(
1109 defaultOptions_.maxBackgroundFlushes(),
1110 "The maximum number of concurrent background flushes\n" +
1111 "\tthat can occur in parallel.") {
1112 @Override public Object parseValue(String value) {
1113 return Integer.parseInt(value);
1114 }
1115 },
11fdf7f2
TL
1116 max_background_jobs(defaultOptions_.maxBackgroundJobs(),
1117 "The maximum number of concurrent background jobs\n"
1118 + "\tthat can occur in parallel.") {
1119 @Override
1120 public Object parseValue(String value) {
1121 return Integer.parseInt(value);
1122 }
1123 },
7c673cae
FG
1124 /* TODO(yhchiang): enable the following
1125 compaction_style((int32_t) defaultOptions_.compactionStyle(),
1126 "style of compaction: level-based vs universal.") {
1127 @Override public Object parseValue(String value) {
1128 return Integer.parseInt(value);
1129 }
1130 },*/
1131 universal_size_ratio(0,
1132 "Percentage flexibility while comparing file size\n" +
1133 "\t(for universal compaction only).") {
1134 @Override public Object parseValue(String value) {
1135 return Integer.parseInt(value);
1136 }
1137 },
1138 universal_min_merge_width(0,"The minimum number of files in a\n" +
1139 "\tsingle compaction run (for universal compaction only).") {
1140 @Override public Object parseValue(String value) {
1141 return Integer.parseInt(value);
1142 }
1143 },
1144 universal_max_merge_width(0,"The max number of files to compact\n" +
1145 "\tin universal style compaction.") {
1146 @Override public Object parseValue(String value) {
1147 return Integer.parseInt(value);
1148 }
1149 },
1150 universal_max_size_amplification_percent(0,
1151 "The max size amplification for universal style compaction.") {
1152 @Override public Object parseValue(String value) {
1153 return Integer.parseInt(value);
1154 }
1155 },
1156 universal_compression_size_percent(-1,
1157 "The percentage of the database to compress for universal\n" +
1158 "\tcompaction. -1 means compress everything.") {
1159 @Override public Object parseValue(String value) {
1160 return Integer.parseInt(value);
1161 }
1162 },
1163 block_size(defaultBlockBasedTableOptions_.blockSize(),
1164 "Number of bytes in a block.") {
1165 @Override public Object parseValue(String value) {
1166 return Long.parseLong(value);
1167 }
1168 },
1169 compressed_cache_size(-1L,
1170 "Number of bytes to use as a cache of compressed data.") {
1171 @Override public Object parseValue(String value) {
1172 return Long.parseLong(value);
1173 }
1174 },
1175 open_files(defaultOptions_.maxOpenFiles(),
1176 "Maximum number of files to keep open at the same time\n" +
1177 "\t(use default if == 0)") {
1178 @Override public Object parseValue(String value) {
1179 return Integer.parseInt(value);
1180 }
1181 },
1182 bloom_bits(-1,"Bloom filter bits per key. Negative means\n" +
1183 "\tuse default settings.") {
1184 @Override public Object parseValue(String value) {
1185 return Integer.parseInt(value);
1186 }
1187 },
1188 memtable_bloom_size_ratio(0.0d, "Ratio of memtable used by the bloom filter.\n"
1189 + "\t0 means no bloom filter.") {
1190 @Override public Object parseValue(String value) {
1191 return Double.parseDouble(value);
1192 }
1193 },
1e59de90
TL
1194 memtable_whole_key_filtering(false, "Enable whole key bloom filter in memtable.") {
1195 @Override
1196 public Object parseValue(String value) {
1197 return parseBoolean(value);
1198 }
1199 },
7c673cae
FG
1200 cache_numshardbits(-1,"Number of shards for the block cache\n" +
1201 "\tis 2 ** cache_numshardbits. Negative means use default settings.\n" +
1202 "\tThis is applied only if FLAGS_cache_size is non-negative.") {
1203 @Override public Object parseValue(String value) {
1204 return Integer.parseInt(value);
1205 }
1206 },
1207 verify_checksum(false,"Verify checksum for every block read\n" +
1208 "\tfrom storage.") {
1209 @Override public Object parseValue(String value) {
1210 return parseBoolean(value);
1211 }
1212 },
1213 statistics(false,"Database statistics.") {
1214 @Override public Object parseValue(String value) {
1215 return parseBoolean(value);
1216 }
1217 },
1218 writes(-1L, "Number of write operations to do. If negative, do\n" +
1219 "\t--num reads.") {
1220 @Override public Object parseValue(String value) {
1221 return Long.parseLong(value);
1222 }
1223 },
1224 sync(false,"Sync all writes to disk.") {
1225 @Override public Object parseValue(String value) {
1226 return parseBoolean(value);
1227 }
1228 },
1229 use_fsync(false,"If true, issue fsync instead of fdatasync.") {
1230 @Override public Object parseValue(String value) {
1231 return parseBoolean(value);
1232 }
1233 },
1234 disable_wal(false,"If true, do not write WAL for write.") {
1235 @Override public Object parseValue(String value) {
1236 return parseBoolean(value);
1237 }
1238 },
1239 wal_dir("", "If not empty, use the given dir for WAL.") {
1240 @Override public Object parseValue(String value) {
1241 return value;
1242 }
1243 },
1244 target_file_size_base(2 * 1048576,"Target file size at level-1") {
1245 @Override public Object parseValue(String value) {
1246 return Integer.parseInt(value);
1247 }
1248 },
1249 target_file_size_multiplier(1,
1250 "A multiplier to compute target level-N file size (N >= 2)") {
1251 @Override public Object parseValue(String value) {
1252 return Integer.parseInt(value);
1253 }
1254 },
1255 max_bytes_for_level_base(10 * 1048576,
1256 "Max bytes for level-1") {
1257 @Override public Object parseValue(String value) {
1258 return Integer.parseInt(value);
1259 }
1260 },
1261 max_bytes_for_level_multiplier(10.0d,
1262 "A multiplier to compute max bytes for level-N (N >= 2)") {
1263 @Override public Object parseValue(String value) {
1264 return Double.parseDouble(value);
1265 }
1266 },
1267 level0_stop_writes_trigger(12,"Number of files in level-0\n" +
1268 "\tthat will trigger put stop.") {
1269 @Override public Object parseValue(String value) {
1270 return Integer.parseInt(value);
1271 }
1272 },
1273 level0_slowdown_writes_trigger(8,"Number of files in level-0\n" +
1274 "\tthat will slow down writes.") {
1275 @Override public Object parseValue(String value) {
1276 return Integer.parseInt(value);
1277 }
1278 },
1279 level0_file_num_compaction_trigger(4,"Number of files in level-0\n" +
1280 "\twhen compactions start.") {
1281 @Override public Object parseValue(String value) {
1282 return Integer.parseInt(value);
1283 }
1284 },
1285 readwritepercent(90,"Ratio of reads to reads/writes (expressed\n" +
1286 "\tas percentage) for the ReadRandomWriteRandom workload. The\n" +
1287 "\tdefault value 90 means 90% operations out of all reads and writes\n" +
1288 "\toperations are reads. In other words, 9 gets for every 1 put.") {
1289 @Override public Object parseValue(String value) {
1290 return Integer.parseInt(value);
1291 }
1292 },
1293 mergereadpercent(70,"Ratio of merges to merges&reads (expressed\n" +
1294 "\tas percentage) for the ReadRandomMergeRandom workload. The\n" +
1295 "\tdefault value 70 means 70% out of all read and merge operations\n" +
1296 "\tare merges. In other words, 7 merges for every 3 gets.") {
1297 @Override public Object parseValue(String value) {
1298 return Integer.parseInt(value);
1299 }
1300 },
1301 deletepercent(2,"Percentage of deletes out of reads/writes/\n" +
1302 "\tdeletes (used in RandomWithVerify only). RandomWithVerify\n" +
1303 "\tcalculates writepercent as (100 - FLAGS_readwritepercent -\n" +
1304 "\tdeletepercent), so deletepercent must be smaller than (100 -\n" +
1305 "\tFLAGS_readwritepercent)") {
1306 @Override public Object parseValue(String value) {
1307 return Integer.parseInt(value);
1308 }
1309 },
1310 delete_obsolete_files_period_micros(0,"Option to delete\n" +
1311 "\tobsolete files periodically. 0 means that obsolete files are\n" +
1312 "\tdeleted after every compaction run.") {
1313 @Override public Object parseValue(String value) {
1314 return Integer.parseInt(value);
1315 }
1316 },
1317 compression_type("snappy",
1318 "Algorithm used to compress the database.") {
1319 @Override public Object parseValue(String value) {
1320 return value;
1321 }
1322 },
1323 compression_level(-1,
1324 "Compression level. For zlib this should be -1 for the\n" +
1325 "\tdefault level, or between 0 and 9.") {
1326 @Override public Object parseValue(String value) {
1327 return Integer.parseInt(value);
1328 }
1329 },
1330 min_level_to_compress(-1,"If non-negative, compression starts\n" +
1331 "\tfrom this level. Levels with number < min_level_to_compress are\n" +
1332 "\tnot compressed. Otherwise, apply compression_type to\n" +
1333 "\tall levels.") {
1334 @Override public Object parseValue(String value) {
1335 return Integer.parseInt(value);
1336 }
1337 },
1338 table_cache_numshardbits(4,"") {
1339 @Override public Object parseValue(String value) {
1340 return Integer.parseInt(value);
1341 }
1342 },
1343 stats_interval(0L, "Stats are reported every N operations when\n" +
1344 "\tthis is greater than zero. When 0 the interval grows over time.") {
1345 @Override public Object parseValue(String value) {
1346 return Long.parseLong(value);
1347 }
1348 },
1349 stats_per_interval(0,"Reports additional stats per interval when\n" +
1350 "\tthis is greater than 0.") {
1351 @Override public Object parseValue(String value) {
1352 return Integer.parseInt(value);
1353 }
1354 },
1355 perf_level(0,"Level of perf collection.") {
1356 @Override public Object parseValue(String value) {
1357 return Integer.parseInt(value);
7c673cae
FG
1358 }
1359 },
1360 max_compaction_bytes(0L, "Limit number of bytes in one compaction to be lower than this\n" +
1361 "\threshold. But it's not guaranteed.") {
1362 @Override public Object parseValue(String value) {
1363 return Long.parseLong(value);
1364 }
1365 },
1366 readonly(false,"Run read only benchmarks.") {
1367 @Override public Object parseValue(String value) {
1368 return parseBoolean(value);
1369 }
1370 },
1371 disable_auto_compactions(false,"Do not auto trigger compactions.") {
1372 @Override public Object parseValue(String value) {
1373 return parseBoolean(value);
1374 }
1375 },
1376 wal_ttl_seconds(0L,"Set the TTL for the WAL Files in seconds.") {
1377 @Override public Object parseValue(String value) {
1378 return Long.parseLong(value);
1379 }
1380 },
1381 wal_size_limit_MB(0L,"Set the size limit for the WAL Files\n" +
1382 "\tin MB.") {
1383 @Override public Object parseValue(String value) {
1384 return Long.parseLong(value);
1385 }
1386 },
1387 /* TODO(yhchiang): enable the following
1388 direct_reads(rocksdb::EnvOptions().use_direct_reads,
1389 "Allow direct I/O reads.") {
1390 @Override public Object parseValue(String value) {
1391 return parseBoolean(value);
1392 }
1393 },
1394 direct_writes(rocksdb::EnvOptions().use_direct_reads,
1395 "Allow direct I/O reads.") {
1396 @Override public Object parseValue(String value) {
1397 return parseBoolean(value);
1398 }
1399 },
1400 */
1401 mmap_read(false,
1402 "Allow reads to occur via mmap-ing files.") {
1403 @Override public Object parseValue(String value) {
1404 return parseBoolean(value);
1405 }
1406 },
1407 mmap_write(false,
1408 "Allow writes to occur via mmap-ing files.") {
1409 @Override public Object parseValue(String value) {
1410 return parseBoolean(value);
1411 }
1412 },
1413 advise_random_on_open(defaultOptions_.adviseRandomOnOpen(),
1414 "Advise random access on table file open.") {
1415 @Override public Object parseValue(String value) {
1416 return parseBoolean(value);
1417 }
1418 },
1419 compaction_fadvice("NORMAL",
1420 "Access pattern advice when a file is compacted.") {
1421 @Override public Object parseValue(String value) {
1422 return value;
1423 }
1424 },
1425 use_tailing_iterator(false,
1426 "Use tailing iterator to access a series of keys instead of get.") {
1427 @Override public Object parseValue(String value) {
1428 return parseBoolean(value);
1429 }
1430 },
1431 use_adaptive_mutex(defaultOptions_.useAdaptiveMutex(),
1432 "Use adaptive mutex.") {
1433 @Override public Object parseValue(String value) {
1434 return parseBoolean(value);
1435 }
1436 },
1437 bytes_per_sync(defaultOptions_.bytesPerSync(),
1438 "Allows OS to incrementally sync files to disk while they are\n" +
1439 "\tbeing written, in the background. Issue one request for every\n" +
1440 "\tbytes_per_sync written. 0 turns it off.") {
1441 @Override public Object parseValue(String value) {
1442 return Long.parseLong(value);
1443 }
1444 },
1445 filter_deletes(false," On true, deletes use bloom-filter and drop\n" +
1446 "\tthe delete if key not present.") {
1447 @Override public Object parseValue(String value) {
1448 return parseBoolean(value);
1449 }
1450 },
1451 max_successive_merges(0,"Maximum number of successive merge\n" +
1452 "\toperations on a key in the memtable.") {
1453 @Override public Object parseValue(String value) {
1454 return Integer.parseInt(value);
1455 }
1456 },
1457 db(getTempDir("rocksdb-jni"),
1458 "Use the db with the following name.") {
1459 @Override public Object parseValue(String value) {
1460 return value;
1461 }
1462 },
1463 use_mem_env(false, "Use RocksMemEnv instead of default filesystem based\n" +
1464 "environment.") {
1465 @Override public Object parseValue(String value) {
1466 return parseBoolean(value);
1467 }
1468 },
1469 java_comparator(null, "Class name of a Java Comparator to use instead\n" +
1470 "\tof the default C++ ByteWiseComparatorImpl. Must be available on\n" +
1471 "\tthe classpath") {
1472 @Override
1473 protected Object parseValue(final String value) {
1474 try {
1475 final ComparatorOptions copt = new ComparatorOptions();
1476 final Class<AbstractComparator> clsComparator =
1477 (Class<AbstractComparator>)Class.forName(value);
1478 final Constructor cstr =
1479 clsComparator.getConstructor(ComparatorOptions.class);
1480 return cstr.newInstance(copt);
1481 } catch(final ClassNotFoundException cnfe) {
1482 throw new IllegalArgumentException("Java Comparator '" + value + "'" +
1483 " not found on the classpath", cnfe);
1484 } catch(final NoSuchMethodException nsme) {
1485 throw new IllegalArgumentException("Java Comparator '" + value + "'" +
1486 " does not have a public ComparatorOptions constructor", nsme);
1487 } catch(final IllegalAccessException | InstantiationException
1488 | InvocationTargetException ie) {
1489 throw new IllegalArgumentException("Unable to construct Java" +
1490 " Comparator '" + value + "'", ie);
1491 }
1492 }
1493 };
1494
1495 private Flag(Object defaultValue, String desc) {
1496 defaultValue_ = defaultValue;
1497 desc_ = desc;
1498 }
1499
1500 public Object getDefaultValue() {
1501 return defaultValue_;
1502 }
1503
1504 public String desc() {
1505 return desc_;
1506 }
1507
1508 public boolean parseBoolean(String value) {
1509 if (value.equals("1")) {
1510 return true;
1511 } else if (value.equals("0")) {
1512 return false;
1513 }
1514 return Boolean.parseBoolean(value);
1515 }
1516
1517 protected abstract Object parseValue(String value);
1518
1519 private final Object defaultValue_;
1520 private final String desc_;
1521 }
1522
1523 private final static String DEFAULT_TEMP_DIR = "/tmp";
1524
1525 private static String getTempDir(final String dirName) {
1526 try {
1527 return Files.createTempDirectory(dirName).toAbsolutePath().toString();
1528 } catch(final IOException ioe) {
1529 System.err.println("Unable to create temp directory, defaulting to: " +
1530 DEFAULT_TEMP_DIR);
1531 return DEFAULT_TEMP_DIR + File.pathSeparator + dirName;
1532 }
1533 }
1534
1535 private static class RandomGenerator {
1536 private final byte[] data_;
1537 private int dataLength_;
1538 private int position_;
1539 private double compressionRatio_;
1540 Random rand_;
1541
1542 private RandomGenerator(long seed, double compressionRatio) {
1543 // We use a limited amount of data over and over again and ensure
1544 // that it is larger than the compression window (32KB), and also
1545 byte[] value = new byte[100];
1546 // large enough to serve all typical value sizes we want to write.
1547 rand_ = new Random(seed);
1548 dataLength_ = value.length * 10000;
1549 data_ = new byte[dataLength_];
1550 compressionRatio_ = compressionRatio;
1551 int pos = 0;
1552 while (pos < dataLength_) {
1553 compressibleBytes(value);
1554 System.arraycopy(value, 0, data_, pos,
1555 Math.min(value.length, dataLength_ - pos));
1556 pos += value.length;
1557 }
1558 }
1559
1560 private void compressibleBytes(byte[] value) {
1561 int baseLength = value.length;
1562 if (compressionRatio_ < 1.0d) {
1563 baseLength = (int) (compressionRatio_ * value.length + 0.5);
1564 }
1565 if (baseLength <= 0) {
1566 baseLength = 1;
1567 }
1568 int pos;
1569 for (pos = 0; pos < baseLength; ++pos) {
1570 value[pos] = (byte) (' ' + rand_.nextInt(95)); // ' ' .. '~'
1571 }
1572 while (pos < value.length) {
1573 System.arraycopy(value, 0, value, pos,
1574 Math.min(baseLength, value.length - pos));
1575 pos += baseLength;
1576 }
1577 }
1578
1579 private void generate(byte[] value) {
1580 if (position_ + value.length > data_.length) {
1581 position_ = 0;
1582 assert(value.length <= data_.length);
1583 }
1584 position_ += value.length;
1585 System.arraycopy(data_, position_ - value.length,
1586 value, 0, value.length);
1587 }
1588 }
1589
1590 boolean isFinished() {
1591 synchronized(finishLock_) {
1592 return isFinished_;
1593 }
1594 }
1595
1596 void setFinished(boolean flag) {
1597 synchronized(finishLock_) {
1598 isFinished_ = flag;
1599 }
1600 }
1601
1602 RocksDB db_;
1603 final List<String> benchmarks_;
1604 final int num_;
1605 final int reads_;
1606 final int keySize_;
1607 final int valueSize_;
1608 final int threadNum_;
1609 final int writesPerSeconds_;
1610 final long randSeed_;
1611 final boolean useExisting_;
1612 final String databaseDir_;
1613 double compressionRatio_;
1614 RandomGenerator gen_;
1615 long startTime_;
1616
1617 // env
1618 boolean useMemenv_;
1619
1620 // memtable related
1621 final int maxWriteBufferNumber_;
1622 final int prefixSize_;
1623 final int keysPerPrefix_;
1624 final String memtable_;
1625 final long hashBucketCount_;
1626
1627 // sst format related
1628 boolean usePlainTable_;
1629
1630 Object finishLock_;
1631 boolean isFinished_;
1632 Map<Flag, Object> flags_;
1633 // as the scope of a static member equals to the scope of the problem,
1634 // we let its c++ pointer to be disposed in its finalizer.
1635 static Options defaultOptions_ = new Options();
1636 static BlockBasedTableConfig defaultBlockBasedTableOptions_ =
1637 new BlockBasedTableConfig();
1638 String compressionType_;
1639 CompressionType compression_;
1640}