]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / java / benchmark / src / main / java / org / rocksdb / benchmark / DbBenchmark.java
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5/**
6 * Copyright (C) 2011 the original author or authors.
7 * See the notice.md file distributed with this work for additional
8 * information regarding copyright ownership.
9 *
10 * Licensed under the Apache License, Version 2.0 (the "License");
11 * you may not use this file except in compliance with the License.
12 * You may obtain a copy of the License at
13 *
14 * http://www.apache.org/licenses/LICENSE-2.0
15 *
16 * Unless required by applicable law or agreed to in writing, software
17 * distributed under the License is distributed on an "AS IS" BASIS,
18 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 * See the License for the specific language governing permissions and
20 * limitations under the License.
21 */
22package org.rocksdb.benchmark;
23
24import java.io.IOException;
25import java.lang.Runnable;
26import java.lang.Math;
27import java.io.File;
28import java.lang.reflect.Constructor;
29import java.lang.reflect.InvocationTargetException;
30import java.nio.ByteBuffer;
31import java.nio.file.Files;
32import java.util.Collection;
33import java.util.Date;
34import java.util.EnumMap;
35import java.util.List;
36import java.util.Map;
37import java.util.Random;
38import java.util.concurrent.TimeUnit;
39import java.util.Arrays;
40import java.util.ArrayList;
41import java.util.concurrent.Callable;
42import java.util.concurrent.Executors;
43import java.util.concurrent.ExecutorService;
44import java.util.concurrent.Future;
45import java.util.concurrent.TimeUnit;
46import org.rocksdb.*;
47import org.rocksdb.RocksMemEnv;
48import org.rocksdb.util.SizeUnit;
49
50class Stats {
51 int id_;
52 long start_;
53 long finish_;
54 double seconds_;
55 long done_;
56 long found_;
57 long lastOpTime_;
58 long nextReport_;
59 long bytes_;
60 StringBuilder message_;
61 boolean excludeFromMerge_;
62
63 // TODO(yhchiang): use the following arguments:
64 // (Long)Flag.stats_interval
65 // (Integer)Flag.stats_per_interval
66
67 Stats(int id) {
68 id_ = id;
69 nextReport_ = 100;
70 done_ = 0;
71 bytes_ = 0;
72 seconds_ = 0;
73 start_ = System.nanoTime();
74 lastOpTime_ = start_;
75 finish_ = start_;
76 found_ = 0;
77 message_ = new StringBuilder("");
78 excludeFromMerge_ = false;
79 }
80
81 void merge(final Stats other) {
82 if (other.excludeFromMerge_) {
83 return;
84 }
85
86 done_ += other.done_;
87 found_ += other.found_;
88 bytes_ += other.bytes_;
89 seconds_ += other.seconds_;
90 if (other.start_ < start_) start_ = other.start_;
91 if (other.finish_ > finish_) finish_ = other.finish_;
92
93 // Just keep the messages from one thread
94 if (message_.length() == 0) {
95 message_ = other.message_;
96 }
97 }
98
99 void stop() {
100 finish_ = System.nanoTime();
101 seconds_ = (double) (finish_ - start_) * 1e-9;
102 }
103
104 void addMessage(String msg) {
105 if (message_.length() > 0) {
106 message_.append(" ");
107 }
108 message_.append(msg);
109 }
110
111 void setId(int id) { id_ = id; }
112 void setExcludeFromMerge() { excludeFromMerge_ = true; }
113
114 void finishedSingleOp(int bytes) {
115 done_++;
116 lastOpTime_ = System.nanoTime();
117 bytes_ += bytes;
118 if (done_ >= nextReport_) {
119 if (nextReport_ < 1000) {
120 nextReport_ += 100;
121 } else if (nextReport_ < 5000) {
122 nextReport_ += 500;
123 } else if (nextReport_ < 10000) {
124 nextReport_ += 1000;
125 } else if (nextReport_ < 50000) {
126 nextReport_ += 5000;
127 } else if (nextReport_ < 100000) {
128 nextReport_ += 10000;
129 } else if (nextReport_ < 500000) {
130 nextReport_ += 50000;
131 } else {
132 nextReport_ += 100000;
133 }
134 System.err.printf("... Task %s finished %d ops%30s\r", id_, done_, "");
135 }
136 }
137
138 void report(String name) {
139 // Pretend at least one op was done in case we are running a benchmark
140 // that does not call FinishedSingleOp().
141 if (done_ < 1) done_ = 1;
142
143 StringBuilder extra = new StringBuilder("");
144 if (bytes_ > 0) {
145 // Rate is computed on actual elapsed time, not the sum of per-thread
146 // elapsed times.
147 double elapsed = (finish_ - start_) * 1e-9;
148 extra.append(String.format("%6.1f MB/s", (bytes_ / 1048576.0) / elapsed));
149 }
150 extra.append(message_.toString());
151 double elapsed = (finish_ - start_);
152 double throughput = (double) done_ / (elapsed * 1e-9);
153
154 System.out.format("%-12s : %11.3f micros/op %d ops/sec;%s%s\n",
155 name, (elapsed * 1e-6) / done_,
156 (long) throughput, (extra.length() == 0 ? "" : " "), extra.toString());
157 }
158}
159
160public class DbBenchmark {
161 enum Order {
162 SEQUENTIAL,
163 RANDOM
164 }
165
166 enum DBState {
167 FRESH,
168 EXISTING
169 }
170
171 static {
172 RocksDB.loadLibrary();
173 }
174
175 abstract class BenchmarkTask implements Callable<Stats> {
176 // TODO(yhchiang): use (Integer)Flag.perf_level.
177 public BenchmarkTask(
178 int tid, long randSeed, long numEntries, long keyRange) {
179 tid_ = tid;
180 rand_ = new Random(randSeed + tid * 1000);
181 numEntries_ = numEntries;
182 keyRange_ = keyRange;
183 stats_ = new Stats(tid);
184 }
185
186 @Override public Stats call() throws RocksDBException {
187 stats_.start_ = System.nanoTime();
188 runTask();
189 stats_.finish_ = System.nanoTime();
190 return stats_;
191 }
192
193 abstract protected void runTask() throws RocksDBException;
194
195 protected int tid_;
196 protected Random rand_;
197 protected long numEntries_;
198 protected long keyRange_;
199 protected Stats stats_;
200
201 protected void getFixedKey(byte[] key, long sn) {
202 generateKeyFromLong(key, sn);
203 }
204
205 protected void getRandomKey(byte[] key, long range) {
206 generateKeyFromLong(key, Math.abs(rand_.nextLong() % range));
207 }
208 }
209
210 abstract class WriteTask extends BenchmarkTask {
211 public WriteTask(
212 int tid, long randSeed, long numEntries, long keyRange,
213 WriteOptions writeOpt, long entriesPerBatch) {
214 super(tid, randSeed, numEntries, keyRange);
215 writeOpt_ = writeOpt;
216 entriesPerBatch_ = entriesPerBatch;
217 maxWritesPerSecond_ = -1;
218 }
219
220 public WriteTask(
221 int tid, long randSeed, long numEntries, long keyRange,
222 WriteOptions writeOpt, long entriesPerBatch, long maxWritesPerSecond) {
223 super(tid, randSeed, numEntries, keyRange);
224 writeOpt_ = writeOpt;
225 entriesPerBatch_ = entriesPerBatch;
226 maxWritesPerSecond_ = maxWritesPerSecond;
227 }
228
229 @Override public void runTask() throws RocksDBException {
230 if (numEntries_ != DbBenchmark.this.num_) {
231 stats_.message_.append(String.format(" (%d ops)", numEntries_));
232 }
233 byte[] key = new byte[keySize_];
234 byte[] value = new byte[valueSize_];
235
236 try {
237 if (entriesPerBatch_ == 1) {
238 for (long i = 0; i < numEntries_; ++i) {
239 getKey(key, i, keyRange_);
240 DbBenchmark.this.gen_.generate(value);
241 db_.put(writeOpt_, key, value);
242 stats_.finishedSingleOp(keySize_ + valueSize_);
243 writeRateControl(i);
244 if (isFinished()) {
245 return;
246 }
247 }
248 } else {
249 for (long i = 0; i < numEntries_; i += entriesPerBatch_) {
250 WriteBatch batch = new WriteBatch();
251 for (long j = 0; j < entriesPerBatch_; j++) {
252 getKey(key, i + j, keyRange_);
253 DbBenchmark.this.gen_.generate(value);
254 batch.put(key, value);
255 stats_.finishedSingleOp(keySize_ + valueSize_);
256 }
257 db_.write(writeOpt_, batch);
258 batch.dispose();
259 writeRateControl(i);
260 if (isFinished()) {
261 return;
262 }
263 }
264 }
265 } catch (InterruptedException e) {
266 // thread has been terminated.
267 }
268 }
269
270 protected void writeRateControl(long writeCount)
271 throws InterruptedException {
272 if (maxWritesPerSecond_ <= 0) return;
273 long minInterval =
274 writeCount * TimeUnit.SECONDS.toNanos(1) / maxWritesPerSecond_;
275 long interval = System.nanoTime() - stats_.start_;
276 if (minInterval - interval > TimeUnit.MILLISECONDS.toNanos(1)) {
277 TimeUnit.NANOSECONDS.sleep(minInterval - interval);
278 }
279 }
280
281 abstract protected void getKey(byte[] key, long id, long range);
282 protected WriteOptions writeOpt_;
283 protected long entriesPerBatch_;
284 protected long maxWritesPerSecond_;
285 }
286
287 class WriteSequentialTask extends WriteTask {
288 public WriteSequentialTask(
289 int tid, long randSeed, long numEntries, long keyRange,
290 WriteOptions writeOpt, long entriesPerBatch) {
291 super(tid, randSeed, numEntries, keyRange,
292 writeOpt, entriesPerBatch);
293 }
294 public WriteSequentialTask(
295 int tid, long randSeed, long numEntries, long keyRange,
296 WriteOptions writeOpt, long entriesPerBatch,
297 long maxWritesPerSecond) {
298 super(tid, randSeed, numEntries, keyRange,
299 writeOpt, entriesPerBatch,
300 maxWritesPerSecond);
301 }
302 @Override protected void getKey(byte[] key, long id, long range) {
303 getFixedKey(key, id);
304 }
305 }
306
307 class WriteRandomTask extends WriteTask {
308 public WriteRandomTask(
309 int tid, long randSeed, long numEntries, long keyRange,
310 WriteOptions writeOpt, long entriesPerBatch) {
311 super(tid, randSeed, numEntries, keyRange,
312 writeOpt, entriesPerBatch);
313 }
314 public WriteRandomTask(
315 int tid, long randSeed, long numEntries, long keyRange,
316 WriteOptions writeOpt, long entriesPerBatch,
317 long maxWritesPerSecond) {
318 super(tid, randSeed, numEntries, keyRange,
319 writeOpt, entriesPerBatch,
320 maxWritesPerSecond);
321 }
322 @Override protected void getKey(byte[] key, long id, long range) {
323 getRandomKey(key, range);
324 }
325 }
326
327 class WriteUniqueRandomTask extends WriteTask {
328 static final int MAX_BUFFER_SIZE = 10000000;
329 public WriteUniqueRandomTask(
330 int tid, long randSeed, long numEntries, long keyRange,
331 WriteOptions writeOpt, long entriesPerBatch) {
332 super(tid, randSeed, numEntries, keyRange,
333 writeOpt, entriesPerBatch);
334 initRandomKeySequence();
335 }
336 public WriteUniqueRandomTask(
337 int tid, long randSeed, long numEntries, long keyRange,
338 WriteOptions writeOpt, long entriesPerBatch,
339 long maxWritesPerSecond) {
340 super(tid, randSeed, numEntries, keyRange,
341 writeOpt, entriesPerBatch,
342 maxWritesPerSecond);
343 initRandomKeySequence();
344 }
345 @Override protected void getKey(byte[] key, long id, long range) {
346 generateKeyFromLong(key, nextUniqueRandom());
347 }
348
349 protected void initRandomKeySequence() {
350 bufferSize_ = MAX_BUFFER_SIZE;
351 if (bufferSize_ > keyRange_) {
352 bufferSize_ = (int) keyRange_;
353 }
354 currentKeyCount_ = bufferSize_;
355 keyBuffer_ = new long[MAX_BUFFER_SIZE];
356 for (int k = 0; k < bufferSize_; ++k) {
357 keyBuffer_[k] = k;
358 }
359 }
360
361 /**
362 * Semi-randomly return the next unique key. It is guaranteed to be
363 * fully random if keyRange_ <= MAX_BUFFER_SIZE.
364 */
365 long nextUniqueRandom() {
366 if (bufferSize_ == 0) {
367 System.err.println("bufferSize_ == 0.");
368 return 0;
369 }
370 int r = rand_.nextInt(bufferSize_);
371 // randomly pick one from the keyBuffer
372 long randKey = keyBuffer_[r];
373 if (currentKeyCount_ < keyRange_) {
374 // if we have not yet inserted all keys, insert next new key to [r].
375 keyBuffer_[r] = currentKeyCount_++;
376 } else {
377 // move the last element to [r] and decrease the size by 1.
378 keyBuffer_[r] = keyBuffer_[--bufferSize_];
379 }
380 return randKey;
381 }
382
383 int bufferSize_;
384 long currentKeyCount_;
385 long[] keyBuffer_;
386 }
387
388 class ReadRandomTask extends BenchmarkTask {
389 public ReadRandomTask(
390 int tid, long randSeed, long numEntries, long keyRange) {
391 super(tid, randSeed, numEntries, keyRange);
392 }
393 @Override public void runTask() throws RocksDBException {
394 byte[] key = new byte[keySize_];
395 byte[] value = new byte[valueSize_];
396 for (long i = 0; i < numEntries_; i++) {
397 getRandomKey(key, keyRange_);
398 int len = db_.get(key, value);
399 if (len != RocksDB.NOT_FOUND) {
400 stats_.found_++;
401 stats_.finishedSingleOp(keySize_ + valueSize_);
402 } else {
403 stats_.finishedSingleOp(keySize_);
404 }
405 if (isFinished()) {
406 return;
407 }
408 }
409 }
410 }
411
412 class ReadSequentialTask extends BenchmarkTask {
413 public ReadSequentialTask(
414 int tid, long randSeed, long numEntries, long keyRange) {
415 super(tid, randSeed, numEntries, keyRange);
416 }
417 @Override public void runTask() throws RocksDBException {
418 RocksIterator iter = db_.newIterator();
419 long i;
420 for (iter.seekToFirst(), i = 0;
421 iter.isValid() && i < numEntries_;
422 iter.next(), ++i) {
423 stats_.found_++;
424 stats_.finishedSingleOp(iter.key().length + iter.value().length);
425 if (isFinished()) {
426 iter.dispose();
427 return;
428 }
429 }
430 iter.dispose();
431 }
432 }
433
434 public DbBenchmark(Map<Flag, Object> flags) throws Exception {
435 benchmarks_ = (List<String>) flags.get(Flag.benchmarks);
436 num_ = (Integer) flags.get(Flag.num);
437 threadNum_ = (Integer) flags.get(Flag.threads);
438 reads_ = (Integer) (flags.get(Flag.reads) == null ?
439 flags.get(Flag.num) : flags.get(Flag.reads));
440 keySize_ = (Integer) flags.get(Flag.key_size);
441 valueSize_ = (Integer) flags.get(Flag.value_size);
442 compressionRatio_ = (Double) flags.get(Flag.compression_ratio);
443 useExisting_ = (Boolean) flags.get(Flag.use_existing_db);
444 randSeed_ = (Long) flags.get(Flag.seed);
445 databaseDir_ = (String) flags.get(Flag.db);
446 writesPerSeconds_ = (Integer) flags.get(Flag.writes_per_second);
447 memtable_ = (String) flags.get(Flag.memtablerep);
448 maxWriteBufferNumber_ = (Integer) flags.get(Flag.max_write_buffer_number);
449 prefixSize_ = (Integer) flags.get(Flag.prefix_size);
450 keysPerPrefix_ = (Integer) flags.get(Flag.keys_per_prefix);
451 hashBucketCount_ = (Long) flags.get(Flag.hash_bucket_count);
452 usePlainTable_ = (Boolean) flags.get(Flag.use_plain_table);
453 useMemenv_ = (Boolean) flags.get(Flag.use_mem_env);
454 flags_ = flags;
455 finishLock_ = new Object();
456 // options.setPrefixSize((Integer)flags_.get(Flag.prefix_size));
457 // options.setKeysPerPrefix((Long)flags_.get(Flag.keys_per_prefix));
458 compressionType_ = (String) flags.get(Flag.compression_type);
459 compression_ = CompressionType.NO_COMPRESSION;
460 try {
461 if (compressionType_!=null) {
462 final CompressionType compressionType =
463 CompressionType.getCompressionType(compressionType_);
464 if (compressionType != null &&
465 compressionType != CompressionType.NO_COMPRESSION) {
466 System.loadLibrary(compressionType.getLibraryName());
467 }
468
469 }
470 } catch (UnsatisfiedLinkError e) {
471 System.err.format("Unable to load %s library:%s%n" +
472 "No compression is used.%n",
473 compressionType_, e.toString());
474 compressionType_ = "none";
475 }
476 gen_ = new RandomGenerator(randSeed_, compressionRatio_);
477 }
478
479 private void prepareReadOptions(ReadOptions options) {
480 options.setVerifyChecksums((Boolean)flags_.get(Flag.verify_checksum));
481 options.setTailing((Boolean)flags_.get(Flag.use_tailing_iterator));
482 }
483
484 private void prepareWriteOptions(WriteOptions options) {
485 options.setSync((Boolean)flags_.get(Flag.sync));
486 options.setDisableWAL((Boolean)flags_.get(Flag.disable_wal));
487 }
488
489 private void prepareOptions(Options options) throws RocksDBException {
490 if (!useExisting_) {
491 options.setCreateIfMissing(true);
492 } else {
493 options.setCreateIfMissing(false);
494 }
495 if (useMemenv_) {
494da23a 496 options.setEnv(new RocksMemEnv(Env.getDefault()));
7c673cae
FG
497 }
498 switch (memtable_) {
499 case "skip_list":
500 options.setMemTableConfig(new SkipListMemTableConfig());
501 break;
502 case "vector":
503 options.setMemTableConfig(new VectorMemTableConfig());
504 break;
505 case "hash_linkedlist":
506 options.setMemTableConfig(
507 new HashLinkedListMemTableConfig()
508 .setBucketCount(hashBucketCount_));
509 options.useFixedLengthPrefixExtractor(prefixSize_);
510 break;
511 case "hash_skiplist":
512 case "prefix_hash":
513 options.setMemTableConfig(
514 new HashSkipListMemTableConfig()
515 .setBucketCount(hashBucketCount_));
516 options.useFixedLengthPrefixExtractor(prefixSize_);
517 break;
518 default:
519 System.err.format(
520 "unable to detect the specified memtable, " +
521 "use the default memtable factory %s%n",
522 options.memTableFactoryName());
523 break;
524 }
525 if (usePlainTable_) {
526 options.setTableFormatConfig(
527 new PlainTableConfig().setKeySize(keySize_));
528 } else {
529 BlockBasedTableConfig table_options = new BlockBasedTableConfig();
530 table_options.setBlockSize((Long)flags_.get(Flag.block_size))
531 .setBlockCacheSize((Long)flags_.get(Flag.cache_size))
532 .setCacheNumShardBits(
533 (Integer)flags_.get(Flag.cache_numshardbits));
534 options.setTableFormatConfig(table_options);
535 }
536 options.setWriteBufferSize(
537 (Long)flags_.get(Flag.write_buffer_size));
538 options.setMaxWriteBufferNumber(
539 (Integer)flags_.get(Flag.max_write_buffer_number));
540 options.setMaxBackgroundCompactions(
541 (Integer)flags_.get(Flag.max_background_compactions));
542 options.getEnv().setBackgroundThreads(
543 (Integer)flags_.get(Flag.max_background_compactions));
544 options.setMaxBackgroundFlushes(
545 (Integer)flags_.get(Flag.max_background_flushes));
11fdf7f2 546 options.setMaxBackgroundJobs((Integer) flags_.get(Flag.max_background_jobs));
7c673cae
FG
547 options.setMaxOpenFiles(
548 (Integer)flags_.get(Flag.open_files));
549 options.setUseFsync(
550 (Boolean)flags_.get(Flag.use_fsync));
551 options.setWalDir(
552 (String)flags_.get(Flag.wal_dir));
553 options.setDeleteObsoleteFilesPeriodMicros(
554 (Integer)flags_.get(Flag.delete_obsolete_files_period_micros));
555 options.setTableCacheNumshardbits(
556 (Integer)flags_.get(Flag.table_cache_numshardbits));
557 options.setAllowMmapReads(
558 (Boolean)flags_.get(Flag.mmap_read));
559 options.setAllowMmapWrites(
560 (Boolean)flags_.get(Flag.mmap_write));
561 options.setAdviseRandomOnOpen(
562 (Boolean)flags_.get(Flag.advise_random_on_open));
563 options.setUseAdaptiveMutex(
564 (Boolean)flags_.get(Flag.use_adaptive_mutex));
565 options.setBytesPerSync(
566 (Long)flags_.get(Flag.bytes_per_sync));
567 options.setBloomLocality(
568 (Integer)flags_.get(Flag.bloom_locality));
569 options.setMinWriteBufferNumberToMerge(
570 (Integer)flags_.get(Flag.min_write_buffer_number_to_merge));
571 options.setMemtablePrefixBloomSizeRatio((Double) flags_.get(Flag.memtable_bloom_size_ratio));
572 options.setNumLevels(
573 (Integer)flags_.get(Flag.num_levels));
574 options.setTargetFileSizeBase(
575 (Integer)flags_.get(Flag.target_file_size_base));
576 options.setTargetFileSizeMultiplier((Integer)flags_.get(Flag.target_file_size_multiplier));
577 options.setMaxBytesForLevelBase(
578 (Integer)flags_.get(Flag.max_bytes_for_level_base));
579 options.setMaxBytesForLevelMultiplier((Double) flags_.get(Flag.max_bytes_for_level_multiplier));
580 options.setLevelZeroStopWritesTrigger(
581 (Integer)flags_.get(Flag.level0_stop_writes_trigger));
582 options.setLevelZeroSlowdownWritesTrigger(
583 (Integer)flags_.get(Flag.level0_slowdown_writes_trigger));
584 options.setLevelZeroFileNumCompactionTrigger(
585 (Integer)flags_.get(Flag.level0_file_num_compaction_trigger));
586 options.setMaxCompactionBytes(
587 (Long) flags_.get(Flag.max_compaction_bytes));
588 options.setDisableAutoCompactions(
589 (Boolean)flags_.get(Flag.disable_auto_compactions));
590 options.setMaxSuccessiveMerges(
591 (Integer)flags_.get(Flag.max_successive_merges));
592 options.setWalTtlSeconds((Long)flags_.get(Flag.wal_ttl_seconds));
593 options.setWalSizeLimitMB((Long)flags_.get(Flag.wal_size_limit_MB));
594 if(flags_.get(Flag.java_comparator) != null) {
595 options.setComparator(
596 (AbstractComparator)flags_.get(Flag.java_comparator));
597 }
598
599 /* TODO(yhchiang): enable the following parameters
600 options.setCompressionType((String)flags_.get(Flag.compression_type));
601 options.setCompressionLevel((Integer)flags_.get(Flag.compression_level));
602 options.setMinLevelToCompress((Integer)flags_.get(Flag.min_level_to_compress));
603 options.setHdfs((String)flags_.get(Flag.hdfs)); // env
604 options.setStatistics((Boolean)flags_.get(Flag.statistics));
605 options.setUniversalSizeRatio(
606 (Integer)flags_.get(Flag.universal_size_ratio));
607 options.setUniversalMinMergeWidth(
608 (Integer)flags_.get(Flag.universal_min_merge_width));
609 options.setUniversalMaxMergeWidth(
610 (Integer)flags_.get(Flag.universal_max_merge_width));
611 options.setUniversalMaxSizeAmplificationPercent(
612 (Integer)flags_.get(Flag.universal_max_size_amplification_percent));
613 options.setUniversalCompressionSizePercent(
614 (Integer)flags_.get(Flag.universal_compression_size_percent));
615 // TODO(yhchiang): add RocksDB.openForReadOnly() to enable Flag.readonly
616 // TODO(yhchiang): enable Flag.merge_operator by switch
617 options.setAccessHintOnCompactionStart(
618 (String)flags_.get(Flag.compaction_fadvice));
619 // available values of fadvice are "NONE", "NORMAL", "SEQUENTIAL", "WILLNEED" for fadvice
620 */
621 }
622
623 private void run() throws RocksDBException {
624 if (!useExisting_) {
625 destroyDb();
626 }
627 Options options = new Options();
628 prepareOptions(options);
629 open(options);
630
631 printHeader(options);
632
633 for (String benchmark : benchmarks_) {
634 List<Callable<Stats>> tasks = new ArrayList<Callable<Stats>>();
635 List<Callable<Stats>> bgTasks = new ArrayList<Callable<Stats>>();
636 WriteOptions writeOpt = new WriteOptions();
637 prepareWriteOptions(writeOpt);
638 ReadOptions readOpt = new ReadOptions();
639 prepareReadOptions(readOpt);
640 int currentTaskId = 0;
641 boolean known = true;
642
643 switch (benchmark) {
644 case "fillseq":
645 tasks.add(new WriteSequentialTask(
646 currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
647 break;
648 case "fillbatch":
649 tasks.add(new WriteRandomTask(
650 currentTaskId++, randSeed_, num_ / 1000, num_, writeOpt, 1000));
651 break;
652 case "fillrandom":
653 tasks.add(new WriteRandomTask(
654 currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
655 break;
656 case "filluniquerandom":
657 tasks.add(new WriteUniqueRandomTask(
658 currentTaskId++, randSeed_, num_, num_, writeOpt, 1));
659 break;
660 case "fillsync":
661 writeOpt.setSync(true);
662 tasks.add(new WriteRandomTask(
663 currentTaskId++, randSeed_, num_ / 1000, num_ / 1000,
664 writeOpt, 1));
665 break;
666 case "readseq":
667 for (int t = 0; t < threadNum_; ++t) {
668 tasks.add(new ReadSequentialTask(
669 currentTaskId++, randSeed_, reads_ / threadNum_, num_));
670 }
671 break;
672 case "readrandom":
673 for (int t = 0; t < threadNum_; ++t) {
674 tasks.add(new ReadRandomTask(
675 currentTaskId++, randSeed_, reads_ / threadNum_, num_));
676 }
677 break;
678 case "readwhilewriting":
679 WriteTask writeTask = new WriteRandomTask(
680 -1, randSeed_, Long.MAX_VALUE, num_, writeOpt, 1, writesPerSeconds_);
681 writeTask.stats_.setExcludeFromMerge();
682 bgTasks.add(writeTask);
683 for (int t = 0; t < threadNum_; ++t) {
684 tasks.add(new ReadRandomTask(
685 currentTaskId++, randSeed_, reads_ / threadNum_, num_));
686 }
687 break;
688 case "readhot":
689 for (int t = 0; t < threadNum_; ++t) {
690 tasks.add(new ReadRandomTask(
691 currentTaskId++, randSeed_, reads_ / threadNum_, num_ / 100));
692 }
693 break;
694 case "delete":
695 destroyDb();
696 open(options);
697 break;
698 default:
699 known = false;
700 System.err.println("Unknown benchmark: " + benchmark);
701 break;
702 }
703 if (known) {
704 ExecutorService executor = Executors.newCachedThreadPool();
705 ExecutorService bgExecutor = Executors.newCachedThreadPool();
706 try {
707 // measure only the main executor time
708 List<Future<Stats>> bgResults = new ArrayList<Future<Stats>>();
709 for (Callable bgTask : bgTasks) {
710 bgResults.add(bgExecutor.submit(bgTask));
711 }
712 start();
713 List<Future<Stats>> results = executor.invokeAll(tasks);
714 executor.shutdown();
715 boolean finished = executor.awaitTermination(10, TimeUnit.SECONDS);
716 if (!finished) {
717 System.out.format(
718 "Benchmark %s was not finished before timeout.",
719 benchmark);
720 executor.shutdownNow();
721 }
722 setFinished(true);
723 bgExecutor.shutdown();
724 finished = bgExecutor.awaitTermination(10, TimeUnit.SECONDS);
725 if (!finished) {
726 System.out.format(
727 "Benchmark %s was not finished before timeout.",
728 benchmark);
729 bgExecutor.shutdownNow();
730 }
731
732 stop(benchmark, results, currentTaskId);
733 } catch (InterruptedException e) {
734 System.err.println(e);
735 }
736 }
737 writeOpt.dispose();
738 readOpt.dispose();
739 }
740 options.dispose();
741 db_.close();
742 }
743
744 private void printHeader(Options options) {
745 int kKeySize = 16;
746 System.out.printf("Keys: %d bytes each\n", kKeySize);
747 System.out.printf("Values: %d bytes each (%d bytes after compression)\n",
748 valueSize_,
749 (int) (valueSize_ * compressionRatio_ + 0.5));
750 System.out.printf("Entries: %d\n", num_);
751 System.out.printf("RawSize: %.1f MB (estimated)\n",
752 ((double)(kKeySize + valueSize_) * num_) / SizeUnit.MB);
753 System.out.printf("FileSize: %.1f MB (estimated)\n",
754 (((kKeySize + valueSize_ * compressionRatio_) * num_) / SizeUnit.MB));
755 System.out.format("Memtable Factory: %s%n", options.memTableFactoryName());
756 System.out.format("Prefix: %d bytes%n", prefixSize_);
757 System.out.format("Compression: %s%n", compressionType_);
758 printWarnings();
759 System.out.printf("------------------------------------------------\n");
760 }
761
762 void printWarnings() {
763 boolean assertsEnabled = false;
764 assert assertsEnabled = true; // Intentional side effect!!!
765 if (assertsEnabled) {
766 System.out.printf(
767 "WARNING: Assertions are enabled; benchmarks unnecessarily slow\n");
768 }
769 }
770
771 private void open(Options options) throws RocksDBException {
772 System.out.println("Using database directory: " + databaseDir_);
773 db_ = RocksDB.open(options, databaseDir_);
774 }
775
776 private void start() {
777 setFinished(false);
778 startTime_ = System.nanoTime();
779 }
780
781 private void stop(
782 String benchmark, List<Future<Stats>> results, int concurrentThreads) {
783 long endTime = System.nanoTime();
784 double elapsedSeconds =
785 1.0d * (endTime - startTime_) / TimeUnit.SECONDS.toNanos(1);
786
787 Stats stats = new Stats(-1);
788 int taskFinishedCount = 0;
789 for (Future<Stats> result : results) {
790 if (result.isDone()) {
791 try {
792 Stats taskStats = result.get(3, TimeUnit.SECONDS);
793 if (!result.isCancelled()) {
794 taskFinishedCount++;
795 }
796 stats.merge(taskStats);
797 } catch (Exception e) {
798 // then it's not successful, the output will indicate this
799 }
800 }
801 }
802 String extra = "";
803 if (benchmark.indexOf("read") >= 0) {
804 extra = String.format(" %d / %d found; ", stats.found_, stats.done_);
805 } else {
806 extra = String.format(" %d ops done; ", stats.done_);
807 }
808
809 System.out.printf(
810 "%-16s : %11.5f micros/op; %6.1f MB/s;%s %d / %d task(s) finished.\n",
811 benchmark, elapsedSeconds / stats.done_ * 1e6,
812 (stats.bytes_ / 1048576.0) / elapsedSeconds, extra,
813 taskFinishedCount, concurrentThreads);
814 }
815
816 public void generateKeyFromLong(byte[] slice, long n) {
817 assert(n >= 0);
818 int startPos = 0;
819
820 if (keysPerPrefix_ > 0) {
821 long numPrefix = (num_ + keysPerPrefix_ - 1) / keysPerPrefix_;
822 long prefix = n % numPrefix;
823 int bytesToFill = Math.min(prefixSize_, 8);
824 for (int i = 0; i < bytesToFill; ++i) {
825 slice[i] = (byte) (prefix % 256);
826 prefix /= 256;
827 }
828 for (int i = 8; i < bytesToFill; ++i) {
829 slice[i] = '0';
830 }
831 startPos = bytesToFill;
832 }
833
834 for (int i = slice.length - 1; i >= startPos; --i) {
835 slice[i] = (byte) ('0' + (n % 10));
836 n /= 10;
837 }
838 }
839
840 private void destroyDb() {
841 if (db_ != null) {
842 db_.close();
843 }
844 // TODO(yhchiang): develop our own FileUtil
845 // FileUtil.deleteDir(databaseDir_);
846 }
847
848 private void printStats() {
849 }
850
851 static void printHelp() {
852 System.out.println("usage:");
853 for (Flag flag : Flag.values()) {
854 System.out.format(" --%s%n\t%s%n",
855 flag.name(),
856 flag.desc());
857 if (flag.getDefaultValue() != null) {
858 System.out.format("\tDEFAULT: %s%n",
859 flag.getDefaultValue().toString());
860 }
861 }
862 }
863
864 public static void main(String[] args) throws Exception {
865 Map<Flag, Object> flags = new EnumMap<Flag, Object>(Flag.class);
866 for (Flag flag : Flag.values()) {
867 if (flag.getDefaultValue() != null) {
868 flags.put(flag, flag.getDefaultValue());
869 }
870 }
871 for (String arg : args) {
872 boolean valid = false;
873 if (arg.equals("--help") || arg.equals("-h")) {
874 printHelp();
875 System.exit(0);
876 }
877 if (arg.startsWith("--")) {
878 try {
879 String[] parts = arg.substring(2).split("=");
880 if (parts.length >= 1) {
881 Flag key = Flag.valueOf(parts[0]);
882 if (key != null) {
883 Object value = null;
884 if (parts.length >= 2) {
885 value = key.parseValue(parts[1]);
886 }
887 flags.put(key, value);
888 valid = true;
889 }
890 }
891 }
892 catch (Exception e) {
893 }
894 }
895 if (!valid) {
896 System.err.println("Invalid argument " + arg);
897 System.exit(1);
898 }
899 }
900 new DbBenchmark(flags).run();
901 }
902
903 private enum Flag {
904 benchmarks(
905 Arrays.asList(
906 "fillseq",
907 "readrandom",
908 "fillrandom"),
909 "Comma-separated list of operations to run in the specified order\n" +
910 "\tActual benchmarks:\n" +
911 "\t\tfillseq -- write N values in sequential key order in async mode.\n" +
912 "\t\tfillrandom -- write N values in random key order in async mode.\n" +
913 "\t\tfillbatch -- write N/1000 batch where each batch has 1000 values\n" +
914 "\t\t in random key order in sync mode.\n" +
915 "\t\tfillsync -- write N/100 values in random key order in sync mode.\n" +
916 "\t\tfill100K -- write N/1000 100K values in random order in async mode.\n" +
917 "\t\treadseq -- read N times sequentially.\n" +
918 "\t\treadrandom -- read N times in random order.\n" +
919 "\t\treadhot -- read N times in random order from 1% section of DB.\n" +
920 "\t\treadwhilewriting -- measure the read performance of multiple readers\n" +
921 "\t\t with a bg single writer. The write rate of the bg\n" +
922 "\t\t is capped by --writes_per_second.\n" +
923 "\tMeta Operations:\n" +
924 "\t\tdelete -- delete DB") {
925 @Override public Object parseValue(String value) {
926 return new ArrayList<String>(Arrays.asList(value.split(",")));
927 }
928 },
929 compression_ratio(0.5d,
930 "Arrange to generate values that shrink to this fraction of\n" +
931 "\ttheir original size after compression.") {
932 @Override public Object parseValue(String value) {
933 return Double.parseDouble(value);
934 }
935 },
936 use_existing_db(false,
937 "If true, do not destroy the existing database. If you set this\n" +
938 "\tflag and also specify a benchmark that wants a fresh database,\n" +
939 "\tthat benchmark will fail.") {
940 @Override public Object parseValue(String value) {
941 return parseBoolean(value);
942 }
943 },
944 num(1000000,
945 "Number of key/values to place in database.") {
946 @Override public Object parseValue(String value) {
947 return Integer.parseInt(value);
948 }
949 },
950 threads(1,
951 "Number of concurrent threads to run.") {
952 @Override public Object parseValue(String value) {
953 return Integer.parseInt(value);
954 }
955 },
956 reads(null,
957 "Number of read operations to do. If negative, do --nums reads.") {
958 @Override public Object parseValue(String value) {
959 return Integer.parseInt(value);
960 }
961 },
962 key_size(16,
963 "The size of each key in bytes.") {
964 @Override public Object parseValue(String value) {
965 return Integer.parseInt(value);
966 }
967 },
968 value_size(100,
969 "The size of each value in bytes.") {
970 @Override public Object parseValue(String value) {
971 return Integer.parseInt(value);
972 }
973 },
974 write_buffer_size(4L * SizeUnit.MB,
975 "Number of bytes to buffer in memtable before compacting\n" +
976 "\t(initialized to default value by 'main'.)") {
977 @Override public Object parseValue(String value) {
978 return Long.parseLong(value);
979 }
980 },
981 max_write_buffer_number(2,
982 "The number of in-memory memtables. Each memtable is of size\n" +
983 "\twrite_buffer_size.") {
984 @Override public Object parseValue(String value) {
985 return Integer.parseInt(value);
986 }
987 },
988 prefix_size(0, "Controls the prefix size for HashSkipList, HashLinkedList,\n" +
989 "\tand plain table.") {
990 @Override public Object parseValue(String value) {
991 return Integer.parseInt(value);
992 }
993 },
994 keys_per_prefix(0, "Controls the average number of keys generated\n" +
995 "\tper prefix, 0 means no special handling of the prefix,\n" +
996 "\ti.e. use the prefix comes with the generated random number.") {
997 @Override public Object parseValue(String value) {
998 return Integer.parseInt(value);
999 }
1000 },
1001 memtablerep("skip_list",
1002 "The memtable format. Available options are\n" +
1003 "\tskip_list,\n" +
1004 "\tvector,\n" +
1005 "\thash_linkedlist,\n" +
1006 "\thash_skiplist (prefix_hash.)") {
1007 @Override public Object parseValue(String value) {
1008 return value;
1009 }
1010 },
1011 hash_bucket_count(SizeUnit.MB,
1012 "The number of hash buckets used in the hash-bucket-based\n" +
1013 "\tmemtables. Memtables that currently support this argument are\n" +
1014 "\thash_linkedlist and hash_skiplist.") {
1015 @Override public Object parseValue(String value) {
1016 return Long.parseLong(value);
1017 }
1018 },
1019 writes_per_second(10000,
1020 "The write-rate of the background writer used in the\n" +
1021 "\t`readwhilewriting` benchmark. Non-positive number indicates\n" +
1022 "\tusing an unbounded write-rate in `readwhilewriting` benchmark.") {
1023 @Override public Object parseValue(String value) {
1024 return Integer.parseInt(value);
1025 }
1026 },
1027 use_plain_table(false,
1028 "Use plain-table sst format.") {
1029 @Override public Object parseValue(String value) {
1030 return parseBoolean(value);
1031 }
1032 },
1033 cache_size(-1L,
1034 "Number of bytes to use as a cache of uncompressed data.\n" +
1035 "\tNegative means use default settings.") {
1036 @Override public Object parseValue(String value) {
1037 return Long.parseLong(value);
1038 }
1039 },
1040 seed(0L,
1041 "Seed base for random number generators.") {
1042 @Override public Object parseValue(String value) {
1043 return Long.parseLong(value);
1044 }
1045 },
1046 num_levels(7,
1047 "The total number of levels.") {
1048 @Override public Object parseValue(String value) {
1049 return Integer.parseInt(value);
1050 }
1051 },
1052 numdistinct(1000L,
1053 "Number of distinct keys to use. Used in RandomWithVerify to\n" +
1054 "\tread/write on fewer keys so that gets are more likely to find the\n" +
1055 "\tkey and puts are more likely to update the same key.") {
1056 @Override public Object parseValue(String value) {
1057 return Long.parseLong(value);
1058 }
1059 },
1060 merge_keys(-1L,
1061 "Number of distinct keys to use for MergeRandom and\n" +
1062 "\tReadRandomMergeRandom.\n" +
1063 "\tIf negative, there will be FLAGS_num keys.") {
1064 @Override public Object parseValue(String value) {
1065 return Long.parseLong(value);
1066 }
1067 },
1068 bloom_locality(0,"Control bloom filter probes locality.") {
1069 @Override public Object parseValue(String value) {
1070 return Integer.parseInt(value);
1071 }
1072 },
1073 duration(0,"Time in seconds for the random-ops tests to run.\n" +
1074 "\tWhen 0 then num & reads determine the test duration.") {
1075 @Override public Object parseValue(String value) {
1076 return Integer.parseInt(value);
1077 }
1078 },
1079 num_multi_db(0,
1080 "Number of DBs used in the benchmark. 0 means single DB.") {
1081 @Override public Object parseValue(String value) {
1082 return Integer.parseInt(value);
1083 }
1084 },
1085 histogram(false,"Print histogram of operation timings.") {
1086 @Override public Object parseValue(String value) {
1087 return parseBoolean(value);
1088 }
1089 },
1090 min_write_buffer_number_to_merge(
1091 defaultOptions_.minWriteBufferNumberToMerge(),
1092 "The minimum number of write buffers that will be merged together\n" +
1093 "\tbefore writing to storage. This is cheap because it is an\n" +
1094 "\tin-memory merge. If this feature is not enabled, then all these\n" +
1095 "\twrite buffers are flushed to L0 as separate files and this\n" +
1096 "\tincreases read amplification because a get request has to check\n" +
1097 "\tin all of these files. Also, an in-memory merge may result in\n" +
1098 "\twriting less data to storage if there are duplicate records\n" +
1099 "\tin each of these individual write buffers.") {
1100 @Override public Object parseValue(String value) {
1101 return Integer.parseInt(value);
1102 }
1103 },
1104 max_background_compactions(
1105 defaultOptions_.maxBackgroundCompactions(),
1106 "The maximum number of concurrent background compactions\n" +
1107 "\tthat can occur in parallel.") {
1108 @Override public Object parseValue(String value) {
1109 return Integer.parseInt(value);
1110 }
1111 },
1112 max_background_flushes(
1113 defaultOptions_.maxBackgroundFlushes(),
1114 "The maximum number of concurrent background flushes\n" +
1115 "\tthat can occur in parallel.") {
1116 @Override public Object parseValue(String value) {
1117 return Integer.parseInt(value);
1118 }
1119 },
11fdf7f2
TL
1120 max_background_jobs(defaultOptions_.maxBackgroundJobs(),
1121 "The maximum number of concurrent background jobs\n"
1122 + "\tthat can occur in parallel.") {
1123 @Override
1124 public Object parseValue(String value) {
1125 return Integer.parseInt(value);
1126 }
1127 },
7c673cae
FG
1128 /* TODO(yhchiang): enable the following
1129 compaction_style((int32_t) defaultOptions_.compactionStyle(),
1130 "style of compaction: level-based vs universal.") {
1131 @Override public Object parseValue(String value) {
1132 return Integer.parseInt(value);
1133 }
1134 },*/
1135 universal_size_ratio(0,
1136 "Percentage flexibility while comparing file size\n" +
1137 "\t(for universal compaction only).") {
1138 @Override public Object parseValue(String value) {
1139 return Integer.parseInt(value);
1140 }
1141 },
1142 universal_min_merge_width(0,"The minimum number of files in a\n" +
1143 "\tsingle compaction run (for universal compaction only).") {
1144 @Override public Object parseValue(String value) {
1145 return Integer.parseInt(value);
1146 }
1147 },
1148 universal_max_merge_width(0,"The max number of files to compact\n" +
1149 "\tin universal style compaction.") {
1150 @Override public Object parseValue(String value) {
1151 return Integer.parseInt(value);
1152 }
1153 },
1154 universal_max_size_amplification_percent(0,
1155 "The max size amplification for universal style compaction.") {
1156 @Override public Object parseValue(String value) {
1157 return Integer.parseInt(value);
1158 }
1159 },
1160 universal_compression_size_percent(-1,
1161 "The percentage of the database to compress for universal\n" +
1162 "\tcompaction. -1 means compress everything.") {
1163 @Override public Object parseValue(String value) {
1164 return Integer.parseInt(value);
1165 }
1166 },
1167 block_size(defaultBlockBasedTableOptions_.blockSize(),
1168 "Number of bytes in a block.") {
1169 @Override public Object parseValue(String value) {
1170 return Long.parseLong(value);
1171 }
1172 },
1173 compressed_cache_size(-1L,
1174 "Number of bytes to use as a cache of compressed data.") {
1175 @Override public Object parseValue(String value) {
1176 return Long.parseLong(value);
1177 }
1178 },
1179 open_files(defaultOptions_.maxOpenFiles(),
1180 "Maximum number of files to keep open at the same time\n" +
1181 "\t(use default if == 0)") {
1182 @Override public Object parseValue(String value) {
1183 return Integer.parseInt(value);
1184 }
1185 },
1186 bloom_bits(-1,"Bloom filter bits per key. Negative means\n" +
1187 "\tuse default settings.") {
1188 @Override public Object parseValue(String value) {
1189 return Integer.parseInt(value);
1190 }
1191 },
1192 memtable_bloom_size_ratio(0.0d, "Ratio of memtable used by the bloom filter.\n"
1193 + "\t0 means no bloom filter.") {
1194 @Override public Object parseValue(String value) {
1195 return Double.parseDouble(value);
1196 }
1197 },
1198 cache_numshardbits(-1,"Number of shards for the block cache\n" +
1199 "\tis 2 ** cache_numshardbits. Negative means use default settings.\n" +
1200 "\tThis is applied only if FLAGS_cache_size is non-negative.") {
1201 @Override public Object parseValue(String value) {
1202 return Integer.parseInt(value);
1203 }
1204 },
1205 verify_checksum(false,"Verify checksum for every block read\n" +
1206 "\tfrom storage.") {
1207 @Override public Object parseValue(String value) {
1208 return parseBoolean(value);
1209 }
1210 },
1211 statistics(false,"Database statistics.") {
1212 @Override public Object parseValue(String value) {
1213 return parseBoolean(value);
1214 }
1215 },
1216 writes(-1L, "Number of write operations to do. If negative, do\n" +
1217 "\t--num reads.") {
1218 @Override public Object parseValue(String value) {
1219 return Long.parseLong(value);
1220 }
1221 },
1222 sync(false,"Sync all writes to disk.") {
1223 @Override public Object parseValue(String value) {
1224 return parseBoolean(value);
1225 }
1226 },
1227 use_fsync(false,"If true, issue fsync instead of fdatasync.") {
1228 @Override public Object parseValue(String value) {
1229 return parseBoolean(value);
1230 }
1231 },
1232 disable_wal(false,"If true, do not write WAL for write.") {
1233 @Override public Object parseValue(String value) {
1234 return parseBoolean(value);
1235 }
1236 },
1237 wal_dir("", "If not empty, use the given dir for WAL.") {
1238 @Override public Object parseValue(String value) {
1239 return value;
1240 }
1241 },
1242 target_file_size_base(2 * 1048576,"Target file size at level-1") {
1243 @Override public Object parseValue(String value) {
1244 return Integer.parseInt(value);
1245 }
1246 },
1247 target_file_size_multiplier(1,
1248 "A multiplier to compute target level-N file size (N >= 2)") {
1249 @Override public Object parseValue(String value) {
1250 return Integer.parseInt(value);
1251 }
1252 },
1253 max_bytes_for_level_base(10 * 1048576,
1254 "Max bytes for level-1") {
1255 @Override public Object parseValue(String value) {
1256 return Integer.parseInt(value);
1257 }
1258 },
1259 max_bytes_for_level_multiplier(10.0d,
1260 "A multiplier to compute max bytes for level-N (N >= 2)") {
1261 @Override public Object parseValue(String value) {
1262 return Double.parseDouble(value);
1263 }
1264 },
1265 level0_stop_writes_trigger(12,"Number of files in level-0\n" +
1266 "\tthat will trigger put stop.") {
1267 @Override public Object parseValue(String value) {
1268 return Integer.parseInt(value);
1269 }
1270 },
1271 level0_slowdown_writes_trigger(8,"Number of files in level-0\n" +
1272 "\tthat will slow down writes.") {
1273 @Override public Object parseValue(String value) {
1274 return Integer.parseInt(value);
1275 }
1276 },
1277 level0_file_num_compaction_trigger(4,"Number of files in level-0\n" +
1278 "\twhen compactions start.") {
1279 @Override public Object parseValue(String value) {
1280 return Integer.parseInt(value);
1281 }
1282 },
1283 readwritepercent(90,"Ratio of reads to reads/writes (expressed\n" +
1284 "\tas percentage) for the ReadRandomWriteRandom workload. The\n" +
1285 "\tdefault value 90 means 90% operations out of all reads and writes\n" +
1286 "\toperations are reads. In other words, 9 gets for every 1 put.") {
1287 @Override public Object parseValue(String value) {
1288 return Integer.parseInt(value);
1289 }
1290 },
1291 mergereadpercent(70,"Ratio of merges to merges&reads (expressed\n" +
1292 "\tas percentage) for the ReadRandomMergeRandom workload. The\n" +
1293 "\tdefault value 70 means 70% out of all read and merge operations\n" +
1294 "\tare merges. In other words, 7 merges for every 3 gets.") {
1295 @Override public Object parseValue(String value) {
1296 return Integer.parseInt(value);
1297 }
1298 },
1299 deletepercent(2,"Percentage of deletes out of reads/writes/\n" +
1300 "\tdeletes (used in RandomWithVerify only). RandomWithVerify\n" +
1301 "\tcalculates writepercent as (100 - FLAGS_readwritepercent -\n" +
1302 "\tdeletepercent), so deletepercent must be smaller than (100 -\n" +
1303 "\tFLAGS_readwritepercent)") {
1304 @Override public Object parseValue(String value) {
1305 return Integer.parseInt(value);
1306 }
1307 },
1308 delete_obsolete_files_period_micros(0,"Option to delete\n" +
1309 "\tobsolete files periodically. 0 means that obsolete files are\n" +
1310 "\tdeleted after every compaction run.") {
1311 @Override public Object parseValue(String value) {
1312 return Integer.parseInt(value);
1313 }
1314 },
1315 compression_type("snappy",
1316 "Algorithm used to compress the database.") {
1317 @Override public Object parseValue(String value) {
1318 return value;
1319 }
1320 },
1321 compression_level(-1,
1322 "Compression level. For zlib this should be -1 for the\n" +
1323 "\tdefault level, or between 0 and 9.") {
1324 @Override public Object parseValue(String value) {
1325 return Integer.parseInt(value);
1326 }
1327 },
1328 min_level_to_compress(-1,"If non-negative, compression starts\n" +
1329 "\tfrom this level. Levels with number < min_level_to_compress are\n" +
1330 "\tnot compressed. Otherwise, apply compression_type to\n" +
1331 "\tall levels.") {
1332 @Override public Object parseValue(String value) {
1333 return Integer.parseInt(value);
1334 }
1335 },
1336 table_cache_numshardbits(4,"") {
1337 @Override public Object parseValue(String value) {
1338 return Integer.parseInt(value);
1339 }
1340 },
1341 stats_interval(0L, "Stats are reported every N operations when\n" +
1342 "\tthis is greater than zero. When 0 the interval grows over time.") {
1343 @Override public Object parseValue(String value) {
1344 return Long.parseLong(value);
1345 }
1346 },
1347 stats_per_interval(0,"Reports additional stats per interval when\n" +
1348 "\tthis is greater than 0.") {
1349 @Override public Object parseValue(String value) {
1350 return Integer.parseInt(value);
1351 }
1352 },
1353 perf_level(0,"Level of perf collection.") {
1354 @Override public Object parseValue(String value) {
1355 return Integer.parseInt(value);
1356 }
1357 },
1358 soft_rate_limit(0.0d,"") {
1359 @Override public Object parseValue(String value) {
1360 return Double.parseDouble(value);
1361 }
1362 },
1363 hard_rate_limit(0.0d,"When not equal to 0 this make threads\n" +
1364 "\tsleep at each stats reporting interval until the compaction\n" +
1365 "\tscore for all levels is less than or equal to this value.") {
1366 @Override public Object parseValue(String value) {
1367 return Double.parseDouble(value);
1368 }
1369 },
1370 rate_limit_delay_max_milliseconds(1000,
1371 "When hard_rate_limit is set then this is the max time a put will\n" +
1372 "\tbe stalled.") {
1373 @Override public Object parseValue(String value) {
1374 return Integer.parseInt(value);
1375 }
1376 },
1377 max_compaction_bytes(0L, "Limit number of bytes in one compaction to be lower than this\n" +
1378 "\threshold. But it's not guaranteed.") {
1379 @Override public Object parseValue(String value) {
1380 return Long.parseLong(value);
1381 }
1382 },
1383 readonly(false,"Run read only benchmarks.") {
1384 @Override public Object parseValue(String value) {
1385 return parseBoolean(value);
1386 }
1387 },
1388 disable_auto_compactions(false,"Do not auto trigger compactions.") {
1389 @Override public Object parseValue(String value) {
1390 return parseBoolean(value);
1391 }
1392 },
1393 wal_ttl_seconds(0L,"Set the TTL for the WAL Files in seconds.") {
1394 @Override public Object parseValue(String value) {
1395 return Long.parseLong(value);
1396 }
1397 },
1398 wal_size_limit_MB(0L,"Set the size limit for the WAL Files\n" +
1399 "\tin MB.") {
1400 @Override public Object parseValue(String value) {
1401 return Long.parseLong(value);
1402 }
1403 },
1404 /* TODO(yhchiang): enable the following
1405 direct_reads(rocksdb::EnvOptions().use_direct_reads,
1406 "Allow direct I/O reads.") {
1407 @Override public Object parseValue(String value) {
1408 return parseBoolean(value);
1409 }
1410 },
1411 direct_writes(rocksdb::EnvOptions().use_direct_reads,
1412 "Allow direct I/O reads.") {
1413 @Override public Object parseValue(String value) {
1414 return parseBoolean(value);
1415 }
1416 },
1417 */
1418 mmap_read(false,
1419 "Allow reads to occur via mmap-ing files.") {
1420 @Override public Object parseValue(String value) {
1421 return parseBoolean(value);
1422 }
1423 },
1424 mmap_write(false,
1425 "Allow writes to occur via mmap-ing files.") {
1426 @Override public Object parseValue(String value) {
1427 return parseBoolean(value);
1428 }
1429 },
1430 advise_random_on_open(defaultOptions_.adviseRandomOnOpen(),
1431 "Advise random access on table file open.") {
1432 @Override public Object parseValue(String value) {
1433 return parseBoolean(value);
1434 }
1435 },
1436 compaction_fadvice("NORMAL",
1437 "Access pattern advice when a file is compacted.") {
1438 @Override public Object parseValue(String value) {
1439 return value;
1440 }
1441 },
1442 use_tailing_iterator(false,
1443 "Use tailing iterator to access a series of keys instead of get.") {
1444 @Override public Object parseValue(String value) {
1445 return parseBoolean(value);
1446 }
1447 },
1448 use_adaptive_mutex(defaultOptions_.useAdaptiveMutex(),
1449 "Use adaptive mutex.") {
1450 @Override public Object parseValue(String value) {
1451 return parseBoolean(value);
1452 }
1453 },
1454 bytes_per_sync(defaultOptions_.bytesPerSync(),
1455 "Allows OS to incrementally sync files to disk while they are\n" +
1456 "\tbeing written, in the background. Issue one request for every\n" +
1457 "\tbytes_per_sync written. 0 turns it off.") {
1458 @Override public Object parseValue(String value) {
1459 return Long.parseLong(value);
1460 }
1461 },
1462 filter_deletes(false," On true, deletes use bloom-filter and drop\n" +
1463 "\tthe delete if key not present.") {
1464 @Override public Object parseValue(String value) {
1465 return parseBoolean(value);
1466 }
1467 },
1468 max_successive_merges(0,"Maximum number of successive merge\n" +
1469 "\toperations on a key in the memtable.") {
1470 @Override public Object parseValue(String value) {
1471 return Integer.parseInt(value);
1472 }
1473 },
1474 db(getTempDir("rocksdb-jni"),
1475 "Use the db with the following name.") {
1476 @Override public Object parseValue(String value) {
1477 return value;
1478 }
1479 },
1480 use_mem_env(false, "Use RocksMemEnv instead of default filesystem based\n" +
1481 "environment.") {
1482 @Override public Object parseValue(String value) {
1483 return parseBoolean(value);
1484 }
1485 },
1486 java_comparator(null, "Class name of a Java Comparator to use instead\n" +
1487 "\tof the default C++ ByteWiseComparatorImpl. Must be available on\n" +
1488 "\tthe classpath") {
1489 @Override
1490 protected Object parseValue(final String value) {
1491 try {
1492 final ComparatorOptions copt = new ComparatorOptions();
1493 final Class<AbstractComparator> clsComparator =
1494 (Class<AbstractComparator>)Class.forName(value);
1495 final Constructor cstr =
1496 clsComparator.getConstructor(ComparatorOptions.class);
1497 return cstr.newInstance(copt);
1498 } catch(final ClassNotFoundException cnfe) {
1499 throw new IllegalArgumentException("Java Comparator '" + value + "'" +
1500 " not found on the classpath", cnfe);
1501 } catch(final NoSuchMethodException nsme) {
1502 throw new IllegalArgumentException("Java Comparator '" + value + "'" +
1503 " does not have a public ComparatorOptions constructor", nsme);
1504 } catch(final IllegalAccessException | InstantiationException
1505 | InvocationTargetException ie) {
1506 throw new IllegalArgumentException("Unable to construct Java" +
1507 " Comparator '" + value + "'", ie);
1508 }
1509 }
1510 };
1511
1512 private Flag(Object defaultValue, String desc) {
1513 defaultValue_ = defaultValue;
1514 desc_ = desc;
1515 }
1516
1517 public Object getDefaultValue() {
1518 return defaultValue_;
1519 }
1520
1521 public String desc() {
1522 return desc_;
1523 }
1524
1525 public boolean parseBoolean(String value) {
1526 if (value.equals("1")) {
1527 return true;
1528 } else if (value.equals("0")) {
1529 return false;
1530 }
1531 return Boolean.parseBoolean(value);
1532 }
1533
1534 protected abstract Object parseValue(String value);
1535
1536 private final Object defaultValue_;
1537 private final String desc_;
1538 }
1539
1540 private final static String DEFAULT_TEMP_DIR = "/tmp";
1541
1542 private static String getTempDir(final String dirName) {
1543 try {
1544 return Files.createTempDirectory(dirName).toAbsolutePath().toString();
1545 } catch(final IOException ioe) {
1546 System.err.println("Unable to create temp directory, defaulting to: " +
1547 DEFAULT_TEMP_DIR);
1548 return DEFAULT_TEMP_DIR + File.pathSeparator + dirName;
1549 }
1550 }
1551
1552 private static class RandomGenerator {
1553 private final byte[] data_;
1554 private int dataLength_;
1555 private int position_;
1556 private double compressionRatio_;
1557 Random rand_;
1558
1559 private RandomGenerator(long seed, double compressionRatio) {
1560 // We use a limited amount of data over and over again and ensure
1561 // that it is larger than the compression window (32KB), and also
1562 byte[] value = new byte[100];
1563 // large enough to serve all typical value sizes we want to write.
1564 rand_ = new Random(seed);
1565 dataLength_ = value.length * 10000;
1566 data_ = new byte[dataLength_];
1567 compressionRatio_ = compressionRatio;
1568 int pos = 0;
1569 while (pos < dataLength_) {
1570 compressibleBytes(value);
1571 System.arraycopy(value, 0, data_, pos,
1572 Math.min(value.length, dataLength_ - pos));
1573 pos += value.length;
1574 }
1575 }
1576
1577 private void compressibleBytes(byte[] value) {
1578 int baseLength = value.length;
1579 if (compressionRatio_ < 1.0d) {
1580 baseLength = (int) (compressionRatio_ * value.length + 0.5);
1581 }
1582 if (baseLength <= 0) {
1583 baseLength = 1;
1584 }
1585 int pos;
1586 for (pos = 0; pos < baseLength; ++pos) {
1587 value[pos] = (byte) (' ' + rand_.nextInt(95)); // ' ' .. '~'
1588 }
1589 while (pos < value.length) {
1590 System.arraycopy(value, 0, value, pos,
1591 Math.min(baseLength, value.length - pos));
1592 pos += baseLength;
1593 }
1594 }
1595
1596 private void generate(byte[] value) {
1597 if (position_ + value.length > data_.length) {
1598 position_ = 0;
1599 assert(value.length <= data_.length);
1600 }
1601 position_ += value.length;
1602 System.arraycopy(data_, position_ - value.length,
1603 value, 0, value.length);
1604 }
1605 }
1606
1607 boolean isFinished() {
1608 synchronized(finishLock_) {
1609 return isFinished_;
1610 }
1611 }
1612
1613 void setFinished(boolean flag) {
1614 synchronized(finishLock_) {
1615 isFinished_ = flag;
1616 }
1617 }
1618
1619 RocksDB db_;
1620 final List<String> benchmarks_;
1621 final int num_;
1622 final int reads_;
1623 final int keySize_;
1624 final int valueSize_;
1625 final int threadNum_;
1626 final int writesPerSeconds_;
1627 final long randSeed_;
1628 final boolean useExisting_;
1629 final String databaseDir_;
1630 double compressionRatio_;
1631 RandomGenerator gen_;
1632 long startTime_;
1633
1634 // env
1635 boolean useMemenv_;
1636
1637 // memtable related
1638 final int maxWriteBufferNumber_;
1639 final int prefixSize_;
1640 final int keysPerPrefix_;
1641 final String memtable_;
1642 final long hashBucketCount_;
1643
1644 // sst format related
1645 boolean usePlainTable_;
1646
1647 Object finishLock_;
1648 boolean isFinished_;
1649 Map<Flag, Object> flags_;
1650 // as the scope of a static member equals to the scope of the problem,
1651 // we let its c++ pointer to be disposed in its finalizer.
1652 static Options defaultOptions_ = new Options();
1653 static BlockBasedTableConfig defaultBlockBasedTableOptions_ =
1654 new BlockBasedTableConfig();
1655 String compressionType_;
1656 CompressionType compression_;
1657}