1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
12 #include "db/arena_wrapped_db_iter.h"
13 #include "db/db_iter.h"
14 #include "db/db_test_util.h"
15 #include "port/port.h"
16 #include "port/stack_trace.h"
17 #include "rocksdb/iostats_context.h"
18 #include "rocksdb/perf_context.h"
19 #include "table/block_based/flush_block_policy.h"
20 #include "util/random.h"
22 namespace ROCKSDB_NAMESPACE
{
24 // A dumb ReadCallback which saying every key is committed.
25 class DummyReadCallback
: public ReadCallback
{
27 DummyReadCallback() : ReadCallback(kMaxSequenceNumber
) {}
28 bool IsVisibleFullCheck(SequenceNumber
/*seq*/) override
{ return true; }
29 void SetSnapshot(SequenceNumber seq
) { max_visible_seq_
= seq
; }
33 // bool: whether to pass read_callback to NewIterator().
34 class DBIteratorTest
: public DBTestBase
,
35 public testing::WithParamInterface
<bool> {
37 DBIteratorTest() : DBTestBase("/db_iterator_test", /*env_do_fsync=*/true) {}
39 Iterator
* NewIterator(const ReadOptions
& read_options
,
40 ColumnFamilyHandle
* column_family
= nullptr) {
41 if (column_family
== nullptr) {
42 column_family
= db_
->DefaultColumnFamily();
45 static_cast_with_check
<ColumnFamilyHandleImpl
>(column_family
)->cfd();
46 SequenceNumber seq
= read_options
.snapshot
!= nullptr
47 ? read_options
.snapshot
->GetSequenceNumber()
48 : db_
->GetLatestSequenceNumber();
49 bool use_read_callback
= GetParam();
50 DummyReadCallback
* read_callback
= nullptr;
51 if (use_read_callback
) {
52 read_callback
= new DummyReadCallback();
53 read_callback
->SetSnapshot(seq
);
54 InstrumentedMutexLock
lock(&mutex_
);
55 read_callbacks_
.push_back(
56 std::unique_ptr
<DummyReadCallback
>(read_callback
));
58 return dbfull()->NewIteratorImpl(read_options
, cfd
, seq
, read_callback
);
62 InstrumentedMutex mutex_
;
63 std::vector
<std::unique_ptr
<DummyReadCallback
>> read_callbacks_
;
66 TEST_P(DBIteratorTest
, IteratorProperty
) {
67 // The test needs to be changed if kPersistedTier is supported in iterator.
68 Options options
= CurrentOptions();
69 CreateAndReopenWithCF({"pikachu"}, options
);
73 ropt
.pin_data
= false;
75 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
, handles_
[1]));
77 std::string prop_value
;
78 ASSERT_NOK(iter
->GetProperty("non_existing.value", &prop_value
));
79 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
80 ASSERT_EQ("0", prop_value
);
81 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
82 ASSERT_EQ("1", prop_value
);
84 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
85 ASSERT_EQ("Iterator is not valid.", prop_value
);
87 // Get internal key at which the iteration stopped (tombstone in this case).
88 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
89 ASSERT_EQ("2", prop_value
);
94 TEST_P(DBIteratorTest
, PersistedTierOnIterator
) {
95 // The test needs to be changed if kPersistedTier is supported in iterator.
96 Options options
= CurrentOptions();
97 CreateAndReopenWithCF({"pikachu"}, options
);
99 ropt
.read_tier
= kPersistedTier
;
101 auto* iter
= db_
->NewIterator(ropt
, handles_
[1]);
102 ASSERT_TRUE(iter
->status().IsNotSupported());
105 std::vector
<Iterator
*> iters
;
106 ASSERT_TRUE(db_
->NewIterators(ropt
, {handles_
[1]}, &iters
).IsNotSupported());
110 TEST_P(DBIteratorTest
, NonBlockingIteration
) {
112 ReadOptions non_blocking_opts
, regular_opts
;
113 Options options
= CurrentOptions();
114 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
115 non_blocking_opts
.read_tier
= kBlockCacheTier
;
116 CreateAndReopenWithCF({"pikachu"}, options
);
117 // write one kv to the database.
118 ASSERT_OK(Put(1, "a", "b"));
120 // scan using non-blocking iterator. We should find it because
121 // it is in memtable.
122 Iterator
* iter
= NewIterator(non_blocking_opts
, handles_
[1]);
124 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
125 ASSERT_OK(iter
->status());
131 // flush memtable to storage. Now, the key should not be in the
132 // memtable neither in the block cache.
135 // verify that a non-blocking iterator does not find any
136 // kvs. Neither does it do any IOs to storage.
137 uint64_t numopen
= TestGetTickerCount(options
, NO_FILE_OPENS
);
138 uint64_t cache_added
= TestGetTickerCount(options
, BLOCK_CACHE_ADD
);
139 iter
= NewIterator(non_blocking_opts
, handles_
[1]);
141 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
145 ASSERT_TRUE(iter
->status().IsIncomplete());
146 ASSERT_EQ(numopen
, TestGetTickerCount(options
, NO_FILE_OPENS
));
147 ASSERT_EQ(cache_added
, TestGetTickerCount(options
, BLOCK_CACHE_ADD
));
150 // read in the specified block via a regular get
151 ASSERT_EQ(Get(1, "a"), "b");
153 // verify that we can find it via a non-blocking scan
154 numopen
= TestGetTickerCount(options
, NO_FILE_OPENS
);
155 cache_added
= TestGetTickerCount(options
, BLOCK_CACHE_ADD
);
156 iter
= NewIterator(non_blocking_opts
, handles_
[1]);
158 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
159 ASSERT_OK(iter
->status());
163 ASSERT_EQ(numopen
, TestGetTickerCount(options
, NO_FILE_OPENS
));
164 ASSERT_EQ(cache_added
, TestGetTickerCount(options
, BLOCK_CACHE_ADD
));
167 // This test verifies block cache behaviors, which is not used by plain
169 } while (ChangeOptions(kSkipPlainTable
| kSkipNoSeekToLast
| kSkipMmapReads
));
172 TEST_P(DBIteratorTest
, IterSeekBeforePrev
) {
173 ASSERT_OK(Put("a", "b"));
174 ASSERT_OK(Put("c", "d"));
175 dbfull()->Flush(FlushOptions());
176 ASSERT_OK(Put("0", "f"));
177 ASSERT_OK(Put("1", "h"));
178 dbfull()->Flush(FlushOptions());
179 ASSERT_OK(Put("2", "j"));
180 auto iter
= NewIterator(ReadOptions());
181 iter
->Seek(Slice("c"));
183 iter
->Seek(Slice("a"));
188 TEST_P(DBIteratorTest
, IterReseekNewUpperBound
) {
190 Options options
= CurrentOptions();
191 BlockBasedTableOptions table_options
;
192 table_options
.block_size
= 1024;
193 table_options
.block_size_deviation
= 50;
194 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
195 options
.compression
= kNoCompression
;
198 ASSERT_OK(Put("a", rnd
.RandomString(400)));
199 ASSERT_OK(Put("aabb", rnd
.RandomString(400)));
200 ASSERT_OK(Put("aaef", rnd
.RandomString(400)));
201 ASSERT_OK(Put("b", rnd
.RandomString(400)));
202 dbfull()->Flush(FlushOptions());
204 Slice ub
= Slice("aa");
205 opts
.iterate_upper_bound
= &ub
;
206 auto iter
= NewIterator(opts
);
207 iter
->Seek(Slice("a"));
209 iter
->Seek(Slice("aabc"));
210 ASSERT_TRUE(iter
->Valid());
211 ASSERT_EQ(iter
->key().ToString(), "aaef");
215 TEST_P(DBIteratorTest
, IterSeekForPrevBeforeNext
) {
216 ASSERT_OK(Put("a", "b"));
217 ASSERT_OK(Put("c", "d"));
218 dbfull()->Flush(FlushOptions());
219 ASSERT_OK(Put("0", "f"));
220 ASSERT_OK(Put("1", "h"));
221 dbfull()->Flush(FlushOptions());
222 ASSERT_OK(Put("2", "j"));
223 auto iter
= NewIterator(ReadOptions());
224 iter
->SeekForPrev(Slice("0"));
226 iter
->SeekForPrev(Slice("1"));
232 std::string
MakeLongKey(size_t length
, char c
) {
233 return std::string(length
, c
);
237 TEST_P(DBIteratorTest
, IterLongKeys
) {
238 ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
239 ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
240 ASSERT_OK(Put("a", "b"));
241 dbfull()->Flush(FlushOptions());
242 ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
243 ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
244 ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
245 auto iter
= NewIterator(ReadOptions());
247 // Create a key that needs to be skipped for Seq too new
248 iter
->Seek(MakeLongKey(20, 0));
249 ASSERT_EQ(IterStatus(iter
), MakeLongKey(20, 0) + "->0");
251 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
253 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
255 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
257 ASSERT_EQ(IterStatus(iter
), MakeLongKey(64, 4) + "->4");
259 iter
->SeekForPrev(MakeLongKey(127, 3));
260 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
262 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
264 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
267 iter
= NewIterator(ReadOptions());
268 iter
->Seek(MakeLongKey(50, 1));
269 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
271 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
273 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
277 TEST_P(DBIteratorTest
, IterNextWithNewerSeq
) {
278 ASSERT_OK(Put("0", "0"));
279 dbfull()->Flush(FlushOptions());
280 ASSERT_OK(Put("a", "b"));
281 ASSERT_OK(Put("c", "d"));
282 ASSERT_OK(Put("d", "e"));
283 auto iter
= NewIterator(ReadOptions());
285 // Create a key that needs to be skipped for Seq too new
286 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
288 ASSERT_OK(Put("b", "f"));
291 iter
->Seek(Slice("a"));
292 ASSERT_EQ(IterStatus(iter
), "a->b");
294 ASSERT_EQ(IterStatus(iter
), "c->d");
295 iter
->SeekForPrev(Slice("b"));
296 ASSERT_EQ(IterStatus(iter
), "a->b");
298 ASSERT_EQ(IterStatus(iter
), "c->d");
303 TEST_P(DBIteratorTest
, IterPrevWithNewerSeq
) {
304 ASSERT_OK(Put("0", "0"));
305 dbfull()->Flush(FlushOptions());
306 ASSERT_OK(Put("a", "b"));
307 ASSERT_OK(Put("c", "d"));
308 ASSERT_OK(Put("d", "e"));
309 auto iter
= NewIterator(ReadOptions());
311 // Create a key that needs to be skipped for Seq too new
312 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
314 ASSERT_OK(Put("b", "f"));
317 iter
->Seek(Slice("d"));
318 ASSERT_EQ(IterStatus(iter
), "d->e");
320 ASSERT_EQ(IterStatus(iter
), "c->d");
322 ASSERT_EQ(IterStatus(iter
), "a->b");
324 iter
->SeekForPrev(Slice("d"));
325 ASSERT_EQ(IterStatus(iter
), "d->e");
327 ASSERT_EQ(IterStatus(iter
), "c->d");
329 ASSERT_EQ(IterStatus(iter
), "a->b");
334 TEST_P(DBIteratorTest
, IterPrevWithNewerSeq2
) {
335 ASSERT_OK(Put("0", "0"));
336 dbfull()->Flush(FlushOptions());
337 ASSERT_OK(Put("a", "b"));
338 ASSERT_OK(Put("c", "d"));
339 ASSERT_OK(Put("e", "f"));
340 auto iter
= NewIterator(ReadOptions());
341 auto iter2
= NewIterator(ReadOptions());
342 iter
->Seek(Slice("c"));
343 iter2
->SeekForPrev(Slice("d"));
344 ASSERT_EQ(IterStatus(iter
), "c->d");
345 ASSERT_EQ(IterStatus(iter2
), "c->d");
347 // Create a key that needs to be skipped for Seq too new
348 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
350 ASSERT_OK(Put("b", "f"));
354 ASSERT_EQ(IterStatus(iter
), "a->b");
357 ASSERT_EQ(IterStatus(iter2
), "a->b");
363 TEST_P(DBIteratorTest
, IterEmpty
) {
365 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
366 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
369 ASSERT_EQ(IterStatus(iter
), "(invalid)");
372 ASSERT_EQ(IterStatus(iter
), "(invalid)");
375 ASSERT_EQ(IterStatus(iter
), "(invalid)");
377 iter
->SeekForPrev("foo");
378 ASSERT_EQ(IterStatus(iter
), "(invalid)");
381 } while (ChangeCompactOptions());
384 TEST_P(DBIteratorTest
, IterSingle
) {
386 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
387 ASSERT_OK(Put(1, "a", "va"));
388 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
391 ASSERT_EQ(IterStatus(iter
), "a->va");
393 ASSERT_EQ(IterStatus(iter
), "(invalid)");
395 ASSERT_EQ(IterStatus(iter
), "a->va");
397 ASSERT_EQ(IterStatus(iter
), "(invalid)");
400 ASSERT_EQ(IterStatus(iter
), "a->va");
402 ASSERT_EQ(IterStatus(iter
), "(invalid)");
404 ASSERT_EQ(IterStatus(iter
), "a->va");
406 ASSERT_EQ(IterStatus(iter
), "(invalid)");
409 ASSERT_EQ(IterStatus(iter
), "a->va");
411 ASSERT_EQ(IterStatus(iter
), "(invalid)");
412 iter
->SeekForPrev("");
413 ASSERT_EQ(IterStatus(iter
), "(invalid)");
416 ASSERT_EQ(IterStatus(iter
), "a->va");
418 ASSERT_EQ(IterStatus(iter
), "(invalid)");
419 iter
->SeekForPrev("a");
420 ASSERT_EQ(IterStatus(iter
), "a->va");
422 ASSERT_EQ(IterStatus(iter
), "(invalid)");
425 ASSERT_EQ(IterStatus(iter
), "(invalid)");
426 iter
->SeekForPrev("b");
427 ASSERT_EQ(IterStatus(iter
), "a->va");
429 ASSERT_EQ(IterStatus(iter
), "(invalid)");
432 } while (ChangeCompactOptions());
435 TEST_P(DBIteratorTest
, IterMulti
) {
437 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
438 ASSERT_OK(Put(1, "a", "va"));
439 ASSERT_OK(Put(1, "b", "vb"));
440 ASSERT_OK(Put(1, "c", "vc"));
441 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
444 ASSERT_EQ(IterStatus(iter
), "a->va");
446 ASSERT_EQ(IterStatus(iter
), "b->vb");
448 ASSERT_EQ(IterStatus(iter
), "c->vc");
450 ASSERT_EQ(IterStatus(iter
), "(invalid)");
452 ASSERT_EQ(IterStatus(iter
), "a->va");
454 ASSERT_EQ(IterStatus(iter
), "(invalid)");
457 ASSERT_EQ(IterStatus(iter
), "c->vc");
459 ASSERT_EQ(IterStatus(iter
), "b->vb");
461 ASSERT_EQ(IterStatus(iter
), "a->va");
463 ASSERT_EQ(IterStatus(iter
), "(invalid)");
465 ASSERT_EQ(IterStatus(iter
), "c->vc");
467 ASSERT_EQ(IterStatus(iter
), "(invalid)");
470 ASSERT_EQ(IterStatus(iter
), "a->va");
472 ASSERT_EQ(IterStatus(iter
), "a->va");
474 ASSERT_EQ(IterStatus(iter
), "b->vb");
475 iter
->SeekForPrev("d");
476 ASSERT_EQ(IterStatus(iter
), "c->vc");
477 iter
->SeekForPrev("c");
478 ASSERT_EQ(IterStatus(iter
), "c->vc");
479 iter
->SeekForPrev("bx");
480 ASSERT_EQ(IterStatus(iter
), "b->vb");
483 ASSERT_EQ(IterStatus(iter
), "b->vb");
485 ASSERT_EQ(IterStatus(iter
), "(invalid)");
486 iter
->SeekForPrev("b");
487 ASSERT_EQ(IterStatus(iter
), "b->vb");
488 iter
->SeekForPrev("");
489 ASSERT_EQ(IterStatus(iter
), "(invalid)");
491 // Switch from reverse to forward
496 ASSERT_EQ(IterStatus(iter
), "b->vb");
498 // Switch from forward to reverse
503 ASSERT_EQ(IterStatus(iter
), "b->vb");
505 // Make sure iter stays at snapshot
506 ASSERT_OK(Put(1, "a", "va2"));
507 ASSERT_OK(Put(1, "a2", "va3"));
508 ASSERT_OK(Put(1, "b", "vb2"));
509 ASSERT_OK(Put(1, "c", "vc2"));
510 ASSERT_OK(Delete(1, "b"));
512 ASSERT_EQ(IterStatus(iter
), "a->va");
514 ASSERT_EQ(IterStatus(iter
), "b->vb");
516 ASSERT_EQ(IterStatus(iter
), "c->vc");
518 ASSERT_EQ(IterStatus(iter
), "(invalid)");
520 ASSERT_EQ(IterStatus(iter
), "c->vc");
522 ASSERT_EQ(IterStatus(iter
), "b->vb");
524 ASSERT_EQ(IterStatus(iter
), "a->va");
526 ASSERT_EQ(IterStatus(iter
), "(invalid)");
529 } while (ChangeCompactOptions());
532 // Check that we can skip over a run of user keys
533 // by using reseek rather than sequential scan
534 TEST_P(DBIteratorTest
, IterReseek
) {
535 anon::OptionsOverride options_override
;
536 options_override
.skip_policy
= kSkipNoSnapshot
;
537 Options options
= CurrentOptions(options_override
);
538 options
.max_sequential_skip_in_iterations
= 3;
539 options
.create_if_missing
= true;
540 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
541 DestroyAndReopen(options
);
542 CreateAndReopenWithCF({"pikachu"}, options
);
544 // insert three keys with same userkey and verify that
545 // reseek is not invoked. For each of these test cases,
546 // verify that we can find the next key "b".
547 ASSERT_OK(Put(1, "a", "zero"));
548 ASSERT_OK(Put(1, "a", "one"));
549 ASSERT_OK(Put(1, "a", "two"));
550 ASSERT_OK(Put(1, "b", "bone"));
551 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
553 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
554 ASSERT_EQ(IterStatus(iter
), "a->two");
556 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
557 ASSERT_EQ(IterStatus(iter
), "b->bone");
560 // insert a total of three keys with same userkey and verify
561 // that reseek is still not invoked.
562 ASSERT_OK(Put(1, "a", "three"));
563 iter
= NewIterator(ReadOptions(), handles_
[1]);
565 ASSERT_EQ(IterStatus(iter
), "a->three");
567 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
568 ASSERT_EQ(IterStatus(iter
), "b->bone");
571 // insert a total of four keys with same userkey and verify
572 // that reseek is invoked.
573 ASSERT_OK(Put(1, "a", "four"));
574 iter
= NewIterator(ReadOptions(), handles_
[1]);
576 ASSERT_EQ(IterStatus(iter
), "a->four");
577 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
579 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
580 ASSERT_EQ(IterStatus(iter
), "b->bone");
583 // Testing reverse iterator
584 // At this point, we have three versions of "a" and one version of "b".
585 // The reseek statistics is already at 1.
586 int num_reseeks
= static_cast<int>(
587 TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
));
589 // Insert another version of b and assert that reseek is not invoked
590 ASSERT_OK(Put(1, "b", "btwo"));
591 iter
= NewIterator(ReadOptions(), handles_
[1]);
593 ASSERT_EQ(IterStatus(iter
), "b->btwo");
594 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
597 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
599 ASSERT_EQ(IterStatus(iter
), "a->four");
602 // insert two more versions of b. This makes a total of 4 versions
603 // of b and 4 versions of a.
604 ASSERT_OK(Put(1, "b", "bthree"));
605 ASSERT_OK(Put(1, "b", "bfour"));
606 iter
= NewIterator(ReadOptions(), handles_
[1]);
608 ASSERT_EQ(IterStatus(iter
), "b->bfour");
609 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
613 // the previous Prev call should have invoked reseek
614 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
616 ASSERT_EQ(IterStatus(iter
), "a->four");
620 TEST_P(DBIteratorTest
, IterSmallAndLargeMix
) {
622 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
623 ASSERT_OK(Put(1, "a", "va"));
624 ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
625 ASSERT_OK(Put(1, "c", "vc"));
626 ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
627 ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
629 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
632 ASSERT_EQ(IterStatus(iter
), "a->va");
634 ASSERT_EQ(IterStatus(iter
), "b->" + std::string(100000, 'b'));
636 ASSERT_EQ(IterStatus(iter
), "c->vc");
638 ASSERT_EQ(IterStatus(iter
), "d->" + std::string(100000, 'd'));
640 ASSERT_EQ(IterStatus(iter
), "e->" + std::string(100000, 'e'));
642 ASSERT_EQ(IterStatus(iter
), "(invalid)");
645 ASSERT_EQ(IterStatus(iter
), "e->" + std::string(100000, 'e'));
647 ASSERT_EQ(IterStatus(iter
), "d->" + std::string(100000, 'd'));
649 ASSERT_EQ(IterStatus(iter
), "c->vc");
651 ASSERT_EQ(IterStatus(iter
), "b->" + std::string(100000, 'b'));
653 ASSERT_EQ(IterStatus(iter
), "a->va");
655 ASSERT_EQ(IterStatus(iter
), "(invalid)");
658 } while (ChangeCompactOptions());
661 TEST_P(DBIteratorTest
, IterMultiWithDelete
) {
663 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
664 ASSERT_OK(Put(1, "ka", "va"));
665 ASSERT_OK(Put(1, "kb", "vb"));
666 ASSERT_OK(Put(1, "kc", "vc"));
667 ASSERT_OK(Delete(1, "kb"));
668 ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
670 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
672 ASSERT_EQ(IterStatus(iter
), "kc->vc");
673 if (!CurrentOptions().merge_operator
) {
674 // TODO: merge operator does not support backward iteration yet
675 if (kPlainTableAllBytesPrefix
!= option_config_
&&
676 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
677 kHashLinkList
!= option_config_
&&
678 kHashSkipList
!= option_config_
) { // doesn't support SeekToLast
680 ASSERT_EQ(IterStatus(iter
), "ka->va");
684 } while (ChangeOptions());
687 TEST_P(DBIteratorTest
, IterPrevMaxSkip
) {
689 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
690 for (int i
= 0; i
< 2; i
++) {
691 ASSERT_OK(Put(1, "key1", "v1"));
692 ASSERT_OK(Put(1, "key2", "v2"));
693 ASSERT_OK(Put(1, "key3", "v3"));
694 ASSERT_OK(Put(1, "key4", "v4"));
695 ASSERT_OK(Put(1, "key5", "v5"));
698 VerifyIterLast("key5->v5", 1);
700 ASSERT_OK(Delete(1, "key5"));
701 VerifyIterLast("key4->v4", 1);
703 ASSERT_OK(Delete(1, "key4"));
704 VerifyIterLast("key3->v3", 1);
706 ASSERT_OK(Delete(1, "key3"));
707 VerifyIterLast("key2->v2", 1);
709 ASSERT_OK(Delete(1, "key2"));
710 VerifyIterLast("key1->v1", 1);
712 ASSERT_OK(Delete(1, "key1"));
713 VerifyIterLast("(invalid)", 1);
714 } while (ChangeOptions(kSkipMergePut
| kSkipNoSeekToLast
));
717 TEST_P(DBIteratorTest
, IterWithSnapshot
) {
718 anon::OptionsOverride options_override
;
719 options_override
.skip_policy
= kSkipNoSnapshot
;
721 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override
));
722 ASSERT_OK(Put(1, "key1", "val1"));
723 ASSERT_OK(Put(1, "key2", "val2"));
724 ASSERT_OK(Put(1, "key3", "val3"));
725 ASSERT_OK(Put(1, "key4", "val4"));
726 ASSERT_OK(Put(1, "key5", "val5"));
728 const Snapshot
* snapshot
= db_
->GetSnapshot();
730 options
.snapshot
= snapshot
;
731 Iterator
* iter
= NewIterator(options
, handles_
[1]);
733 ASSERT_OK(Put(1, "key0", "val0"));
734 // Put more values after the snapshot
735 ASSERT_OK(Put(1, "key100", "val100"));
736 ASSERT_OK(Put(1, "key101", "val101"));
739 ASSERT_EQ(IterStatus(iter
), "key5->val5");
740 if (!CurrentOptions().merge_operator
) {
741 // TODO: merge operator does not support backward iteration yet
742 if (kPlainTableAllBytesPrefix
!= option_config_
&&
743 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
744 kHashLinkList
!= option_config_
&& kHashSkipList
!= option_config_
) {
746 ASSERT_EQ(IterStatus(iter
), "key4->val4");
748 ASSERT_EQ(IterStatus(iter
), "key3->val3");
751 ASSERT_EQ(IterStatus(iter
), "key4->val4");
753 ASSERT_EQ(IterStatus(iter
), "key5->val5");
756 ASSERT_TRUE(!iter
->Valid());
759 if (!CurrentOptions().merge_operator
) {
760 // TODO(gzh): merge operator does not support backward iteration yet
761 if (kPlainTableAllBytesPrefix
!= option_config_
&&
762 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
763 kHashLinkList
!= option_config_
&& kHashSkipList
!= option_config_
) {
764 iter
->SeekForPrev("key1");
765 ASSERT_EQ(IterStatus(iter
), "key1->val1");
767 ASSERT_EQ(IterStatus(iter
), "key2->val2");
769 ASSERT_EQ(IterStatus(iter
), "key3->val3");
771 ASSERT_EQ(IterStatus(iter
), "key2->val2");
773 ASSERT_EQ(IterStatus(iter
), "key1->val1");
775 ASSERT_TRUE(!iter
->Valid());
778 db_
->ReleaseSnapshot(snapshot
);
780 } while (ChangeOptions());
783 TEST_P(DBIteratorTest
, IteratorPinsRef
) {
785 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
786 Put(1, "foo", "hello");
788 // Get iterator that will yield the current contents of the DB.
789 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
791 // Write to force compactions
792 Put(1, "foo", "newvalue1");
793 for (int i
= 0; i
< 100; i
++) {
795 ASSERT_OK(Put(1, Key(i
), Key(i
) + std::string(100000, 'v')));
797 Put(1, "foo", "newvalue2");
800 ASSERT_TRUE(iter
->Valid());
801 ASSERT_EQ("foo", iter
->key().ToString());
802 ASSERT_EQ("hello", iter
->value().ToString());
804 ASSERT_TRUE(!iter
->Valid());
806 } while (ChangeCompactOptions());
809 TEST_P(DBIteratorTest
, IteratorDeleteAfterCfDelete
) {
810 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
812 Put(1, "foo", "delete-cf-then-delete-iter");
813 Put(1, "hello", "value2");
815 ColumnFamilyHandle
* cf
= handles_
[1];
818 auto* iter
= db_
->NewIterator(ro
, cf
);
820 ASSERT_EQ(IterStatus(iter
), "foo->delete-cf-then-delete-iter");
823 db_
->DestroyColumnFamilyHandle(cf
);
824 handles_
.erase(std::begin(handles_
) + 1);
826 // delete Iterator after CF handle is deleted
828 ASSERT_EQ(IterStatus(iter
), "hello->value2");
832 TEST_P(DBIteratorTest
, IteratorDeleteAfterCfDrop
) {
833 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
835 Put(1, "foo", "drop-cf-then-delete-iter");
838 ColumnFamilyHandle
* cf
= handles_
[1];
840 auto* iter
= db_
->NewIterator(ro
, cf
);
842 ASSERT_EQ(IterStatus(iter
), "foo->drop-cf-then-delete-iter");
844 // drop and delete CF
845 db_
->DropColumnFamily(cf
);
846 db_
->DestroyColumnFamilyHandle(cf
);
847 handles_
.erase(std::begin(handles_
) + 1);
849 // delete Iterator after CF handle is dropped
853 // SetOptions not defined in ROCKSDB LITE
855 TEST_P(DBIteratorTest
, DBIteratorBoundTest
) {
856 Options options
= CurrentOptions();
858 options
.create_if_missing
= true;
860 options
.prefix_extractor
= nullptr;
861 DestroyAndReopen(options
);
862 ASSERT_OK(Put("a", "0"));
863 ASSERT_OK(Put("foo", "bar"));
864 ASSERT_OK(Put("foo1", "bar1"));
865 ASSERT_OK(Put("g1", "0"));
867 // testing basic case with no iterate_upper_bound and no prefix_extractor
870 ro
.iterate_upper_bound
= nullptr;
872 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
876 ASSERT_TRUE(iter
->Valid());
877 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
880 ASSERT_TRUE(iter
->Valid());
881 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
884 ASSERT_TRUE(iter
->Valid());
885 ASSERT_EQ(iter
->key().compare(Slice("g1")), 0);
887 iter
->SeekForPrev("g1");
889 ASSERT_TRUE(iter
->Valid());
890 ASSERT_EQ(iter
->key().compare(Slice("g1")), 0);
893 ASSERT_TRUE(iter
->Valid());
894 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
897 ASSERT_TRUE(iter
->Valid());
898 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
901 // testing iterate_upper_bound and forward iterator
902 // to make sure it stops at bound
905 // iterate_upper_bound points beyond the last expected entry
906 Slice
prefix("foo2");
907 ro
.iterate_upper_bound
= &prefix
;
909 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
913 ASSERT_TRUE(iter
->Valid());
914 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
917 ASSERT_TRUE(iter
->Valid());
918 ASSERT_EQ(iter
->key().compare(("foo1")), 0);
921 // should stop here...
922 ASSERT_TRUE(!iter
->Valid());
924 // Testing SeekToLast with iterate_upper_bound set
929 ro
.iterate_upper_bound
= &prefix
;
931 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
934 ASSERT_TRUE(iter
->Valid());
935 ASSERT_EQ(iter
->key().compare(Slice("a")), 0);
938 // prefix is the first letter of the key
939 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
940 ASSERT_OK(Put("a", "0"));
941 ASSERT_OK(Put("foo", "bar"));
942 ASSERT_OK(Put("foo1", "bar1"));
943 ASSERT_OK(Put("g1", "0"));
945 // testing with iterate_upper_bound and prefix_extractor
946 // Seek target and iterate_upper_bound are not is same prefix
947 // This should be an error
950 Slice
upper_bound("g");
951 ro
.iterate_upper_bound
= &upper_bound
;
953 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
957 ASSERT_TRUE(iter
->Valid());
958 ASSERT_EQ("foo", iter
->key().ToString());
961 ASSERT_TRUE(iter
->Valid());
962 ASSERT_EQ("foo1", iter
->key().ToString());
965 ASSERT_TRUE(!iter
->Valid());
968 // testing that iterate_upper_bound prevents iterating over deleted items
969 // if the bound has already reached
971 options
.prefix_extractor
= nullptr;
972 DestroyAndReopen(options
);
973 ASSERT_OK(Put("a", "0"));
974 ASSERT_OK(Put("b", "0"));
975 ASSERT_OK(Put("b1", "0"));
976 ASSERT_OK(Put("c", "0"));
977 ASSERT_OK(Put("d", "0"));
978 ASSERT_OK(Put("e", "0"));
979 ASSERT_OK(Delete("c"));
980 ASSERT_OK(Delete("d"));
982 // base case with no bound
984 ro
.iterate_upper_bound
= nullptr;
986 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
989 ASSERT_TRUE(iter
->Valid());
990 ASSERT_EQ(iter
->key().compare(Slice("b")), 0);
993 ASSERT_TRUE(iter
->Valid());
994 ASSERT_EQ(iter
->key().compare(("b1")), 0);
996 get_perf_context()->Reset();
999 ASSERT_TRUE(iter
->Valid());
1000 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count
), 2);
1002 // now testing with iterate_bound
1004 ro
.iterate_upper_bound
= &prefix
;
1006 iter
.reset(NewIterator(ro
));
1008 get_perf_context()->Reset();
1011 ASSERT_TRUE(iter
->Valid());
1012 ASSERT_EQ(iter
->key().compare(Slice("b")), 0);
1015 ASSERT_TRUE(iter
->Valid());
1016 ASSERT_EQ(iter
->key().compare(("b1")), 0);
1019 // the iteration should stop as soon as the bound key is reached
1020 // even though the key is deleted
1021 // hence internal_delete_skipped_count should be 0
1022 ASSERT_TRUE(!iter
->Valid());
1023 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count
), 0);
1027 TEST_P(DBIteratorTest
, DBIteratorBoundMultiSeek
) {
1028 Options options
= CurrentOptions();
1030 options
.create_if_missing
= true;
1031 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1032 options
.prefix_extractor
= nullptr;
1033 DestroyAndReopen(options
);
1034 ASSERT_OK(Put("a", "0"));
1035 ASSERT_OK(Put("z", "0"));
1037 ASSERT_OK(Put("foo1", "bar1"));
1038 ASSERT_OK(Put("foo2", "bar2"));
1039 ASSERT_OK(Put("foo3", "bar3"));
1040 ASSERT_OK(Put("foo4", "bar4"));
1043 std::string up_str
= "foo5";
1046 ro
.iterate_upper_bound
= &up
;
1047 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
1050 ASSERT_TRUE(iter
->Valid());
1051 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
1053 uint64_t prev_block_cache_hit
=
1054 TestGetTickerCount(options
, BLOCK_CACHE_HIT
);
1055 uint64_t prev_block_cache_miss
=
1056 TestGetTickerCount(options
, BLOCK_CACHE_MISS
);
1058 ASSERT_GT(prev_block_cache_hit
+ prev_block_cache_miss
, 0);
1061 ASSERT_TRUE(iter
->Valid());
1062 ASSERT_EQ(iter
->key().compare(Slice("foo4")), 0);
1063 ASSERT_EQ(prev_block_cache_hit
,
1064 TestGetTickerCount(options
, BLOCK_CACHE_HIT
));
1065 ASSERT_EQ(prev_block_cache_miss
,
1066 TestGetTickerCount(options
, BLOCK_CACHE_MISS
));
1069 ASSERT_TRUE(iter
->Valid());
1070 ASSERT_EQ(iter
->key().compare(Slice("foo2")), 0);
1072 ASSERT_TRUE(iter
->Valid());
1073 ASSERT_EQ(iter
->key().compare(Slice("foo3")), 0);
1074 ASSERT_EQ(prev_block_cache_hit
,
1075 TestGetTickerCount(options
, BLOCK_CACHE_HIT
));
1076 ASSERT_EQ(prev_block_cache_miss
,
1077 TestGetTickerCount(options
, BLOCK_CACHE_MISS
));
1082 TEST_P(DBIteratorTest
, DBIteratorBoundOptimizationTest
) {
1083 for (auto format_version
: {2, 3, 4}) {
1084 int upper_bound_hits
= 0;
1085 Options options
= CurrentOptions();
1086 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1087 "BlockBasedTableIterator:out_of_bound",
1088 [&upper_bound_hits
](void*) { upper_bound_hits
++; });
1089 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1091 options
.create_if_missing
= true;
1092 options
.prefix_extractor
= nullptr;
1093 BlockBasedTableOptions table_options
;
1094 table_options
.format_version
= format_version
;
1095 table_options
.flush_block_policy_factory
=
1096 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1097 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1099 DestroyAndReopen(options
);
1100 ASSERT_OK(Put("foo1", "bar1"));
1101 ASSERT_OK(Put("foo2", "bar2"));
1102 ASSERT_OK(Put("foo4", "bar4"));
1107 ro
.iterate_upper_bound
= &ub
;
1109 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
1112 ASSERT_TRUE(iter
->Valid());
1113 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
1114 ASSERT_EQ(upper_bound_hits
, 0);
1117 ASSERT_TRUE(iter
->Valid());
1118 ASSERT_EQ(iter
->key().compare(Slice("foo2")), 0);
1119 ASSERT_EQ(upper_bound_hits
, 0);
1122 ASSERT_FALSE(iter
->Valid());
1123 ASSERT_EQ(upper_bound_hits
, 1);
1127 // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
1128 // they don't do unnecessary block reads.
1129 TEST_P(DBIteratorTest
, IndexWithFirstKey
) {
1130 for (int tailing
= 0; tailing
< 2; ++tailing
) {
1131 SCOPED_TRACE("tailing = " + std::to_string(tailing
));
1132 Options options
= CurrentOptions();
1134 options
.create_if_missing
= true;
1135 options
.prefix_extractor
= nullptr;
1136 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1137 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1138 Statistics
* stats
= options
.statistics
.get();
1139 BlockBasedTableOptions table_options
;
1140 table_options
.index_type
=
1141 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey
;
1142 table_options
.index_shortening
=
1143 BlockBasedTableOptions::IndexShorteningMode::kNoShortening
;
1144 table_options
.flush_block_policy_factory
=
1145 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1146 table_options
.block_cache
=
1147 NewLRUCache(8000); // fits all blocks and their cache metadata overhead
1148 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1150 DestroyAndReopen(options
);
1151 ASSERT_OK(Merge("a1", "x1"));
1152 ASSERT_OK(Merge("b1", "y1"));
1153 ASSERT_OK(Merge("c0", "z1"));
1155 ASSERT_OK(Merge("a2", "x2"));
1156 ASSERT_OK(Merge("b2", "y2"));
1157 ASSERT_OK(Merge("c0", "z2"));
1159 ASSERT_OK(Merge("a3", "x3"));
1160 ASSERT_OK(Merge("b3", "y3"));
1161 ASSERT_OK(Merge("c3", "z3"));
1164 // Block cache is not important for this test.
1165 // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
1166 // available way of counting block accesses.
1169 ropt
.tailing
= tailing
;
1170 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
));
1172 ropt
.read_tier
= ReadTier::kBlockCacheTier
;
1173 std::unique_ptr
<Iterator
> nonblocking_iter(NewIterator(ropt
));
1176 ASSERT_TRUE(iter
->Valid());
1177 EXPECT_EQ("b2", iter
->key().ToString());
1178 EXPECT_EQ("y2", iter
->value().ToString());
1179 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1181 // The cache-only iterator should succeed too, using the blocks pulled into
1182 // the cache by the previous iterator.
1183 nonblocking_iter
->Seek("b10");
1184 ASSERT_TRUE(nonblocking_iter
->Valid());
1185 EXPECT_EQ("b2", nonblocking_iter
->key().ToString());
1186 EXPECT_EQ("y2", nonblocking_iter
->value().ToString());
1187 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1189 // ... but it shouldn't be able to step forward since the next block is
1190 // not in cache yet.
1191 nonblocking_iter
->Next();
1192 ASSERT_FALSE(nonblocking_iter
->Valid());
1193 ASSERT_TRUE(nonblocking_iter
->status().IsIncomplete());
1195 // ... nor should a seek to the next key succeed.
1196 nonblocking_iter
->Seek("b20");
1197 ASSERT_FALSE(nonblocking_iter
->Valid());
1198 ASSERT_TRUE(nonblocking_iter
->status().IsIncomplete());
1201 ASSERT_TRUE(iter
->Valid());
1202 EXPECT_EQ("b3", iter
->key().ToString());
1203 EXPECT_EQ("y3", iter
->value().ToString());
1204 EXPECT_EQ(4, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1205 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1207 // After the blocking iterator loaded the next block, the nonblocking
1208 // iterator's seek should succeed.
1209 nonblocking_iter
->Seek("b20");
1210 ASSERT_TRUE(nonblocking_iter
->Valid());
1211 EXPECT_EQ("b3", nonblocking_iter
->key().ToString());
1212 EXPECT_EQ("y3", nonblocking_iter
->value().ToString());
1213 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1216 ASSERT_TRUE(iter
->Valid());
1217 EXPECT_EQ("c0", iter
->key().ToString());
1218 EXPECT_EQ("z1,z2", iter
->value().ToString());
1219 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1220 EXPECT_EQ(6, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1223 ASSERT_TRUE(iter
->Valid());
1224 EXPECT_EQ("c3", iter
->key().ToString());
1225 EXPECT_EQ("z3", iter
->value().ToString());
1226 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1227 EXPECT_EQ(7, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1231 // Enable iterate_upper_bound and check that iterator is not trying to read
1232 // blocks that are fully above upper bound.
1233 std::string ub
= "b3";
1235 ropt
.iterate_upper_bound
= &ub_slice
;
1236 iter
.reset(NewIterator(ropt
));
1239 ASSERT_TRUE(iter
->Valid());
1240 EXPECT_EQ("b2", iter
->key().ToString());
1241 EXPECT_EQ("y2", iter
->value().ToString());
1242 EXPECT_EQ(3, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1243 EXPECT_EQ(7, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1246 ASSERT_FALSE(iter
->Valid());
1247 EXPECT_EQ(3, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1248 EXPECT_EQ(7, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1252 TEST_P(DBIteratorTest
, IndexWithFirstKeyGet
) {
1253 Options options
= CurrentOptions();
1255 options
.create_if_missing
= true;
1256 options
.prefix_extractor
= nullptr;
1257 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1258 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1259 Statistics
* stats
= options
.statistics
.get();
1260 BlockBasedTableOptions table_options
;
1261 table_options
.index_type
=
1262 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey
;
1263 table_options
.index_shortening
=
1264 BlockBasedTableOptions::IndexShorteningMode::kNoShortening
;
1265 table_options
.flush_block_policy_factory
=
1266 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1267 table_options
.block_cache
= NewLRUCache(1000); // fits all blocks
1268 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1270 DestroyAndReopen(options
);
1271 ASSERT_OK(Merge("a", "x1"));
1272 ASSERT_OK(Merge("c", "y1"));
1273 ASSERT_OK(Merge("e", "z1"));
1275 ASSERT_OK(Merge("c", "y2"));
1276 ASSERT_OK(Merge("e", "z2"));
1279 // Get() between blocks shouldn't read any blocks.
1280 ASSERT_EQ("NOT_FOUND", Get("b"));
1281 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1282 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1284 // Get() of an existing key shouldn't read any unnecessary blocks when there's
1285 // only one key per block.
1287 ASSERT_EQ("y1,y2", Get("c"));
1288 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1289 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1291 ASSERT_EQ("x1", Get("a"));
1292 EXPECT_EQ(3, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1293 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1295 EXPECT_EQ(std::vector
<std::string
>({"NOT_FOUND", "z1,z2"}),
1296 MultiGet({"b", "e"}));
1299 // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1300 // return the biggest key which is smaller than the seek key.
1301 TEST_P(DBIteratorTest
, PrevAfterAndNextAfterMerge
) {
1303 options
.create_if_missing
= true;
1304 options
.merge_operator
= MergeOperators::CreatePutOperator();
1306 DestroyAndReopen(options
);
1308 // write three entries with different keys using Merge()
1310 db_
->Merge(wopts
, "1", "data1");
1311 db_
->Merge(wopts
, "2", "data2");
1312 db_
->Merge(wopts
, "3", "data3");
1314 std::unique_ptr
<Iterator
> it(NewIterator(ReadOptions()));
1317 ASSERT_TRUE(it
->Valid());
1318 ASSERT_EQ("2", it
->key().ToString());
1321 ASSERT_TRUE(it
->Valid());
1322 ASSERT_EQ("1", it
->key().ToString());
1324 it
->SeekForPrev("1");
1325 ASSERT_TRUE(it
->Valid());
1326 ASSERT_EQ("1", it
->key().ToString());
1329 ASSERT_TRUE(it
->Valid());
1330 ASSERT_EQ("2", it
->key().ToString());
1333 class DBIteratorTestForPinnedData
: public DBIteratorTest
{
1338 COMPACT_BEFORE_READ
,
1342 DBIteratorTestForPinnedData() : DBIteratorTest() {}
1343 void PinnedDataIteratorRandomized(TestConfig run_config
) {
1344 // Generate Random data
1348 int key_pool
= static_cast<int>(puts
* 0.7);
1350 int val_size
= 1000;
1351 int seeks_percentage
= 20; // 20% of keys will be used to test seek()
1352 int delete_percentage
= 20; // 20% of keys will be deleted
1353 int merge_percentage
= 20; // 20% of keys will be added using Merge()
1355 Options options
= CurrentOptions();
1356 BlockBasedTableOptions table_options
;
1357 table_options
.use_delta_encoding
= false;
1358 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1359 options
.merge_operator
= MergeOperators::CreatePutOperator();
1360 DestroyAndReopen(options
);
1362 std::vector
<std::string
> generated_keys(key_pool
);
1363 for (int i
= 0; i
< key_pool
; i
++) {
1364 generated_keys
[i
] = rnd
.RandomString(key_size
);
1367 std::map
<std::string
, std::string
> true_data
;
1368 std::vector
<std::string
> random_keys
;
1369 std::vector
<std::string
> deleted_keys
;
1370 for (int i
= 0; i
< puts
; i
++) {
1371 auto& k
= generated_keys
[rnd
.Next() % key_pool
];
1372 auto v
= rnd
.RandomString(val_size
);
1374 // Insert data to true_data map and to DB
1376 if (rnd
.PercentTrue(merge_percentage
)) {
1377 ASSERT_OK(db_
->Merge(WriteOptions(), k
, v
));
1379 ASSERT_OK(Put(k
, v
));
1382 // Pick random keys to be used to test Seek()
1383 if (rnd
.PercentTrue(seeks_percentage
)) {
1384 random_keys
.push_back(k
);
1387 // Delete some random keys
1388 if (rnd
.PercentTrue(delete_percentage
)) {
1389 deleted_keys
.push_back(k
);
1391 ASSERT_OK(Delete(k
));
1394 if (run_config
== TestConfig::FLUSH_EVERY_1000
) {
1395 if (i
&& i
% 1000 == 0) {
1401 if (run_config
== TestConfig::CLOSE_AND_OPEN
) {
1404 } else if (run_config
== TestConfig::COMPACT_BEFORE_READ
) {
1405 db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr);
1410 auto iter
= NewIterator(ro
);
1413 // Test Seek to random keys
1414 std::vector
<Slice
> keys_slices
;
1415 std::vector
<std::string
> true_keys
;
1416 for (auto& k
: random_keys
) {
1418 if (!iter
->Valid()) {
1419 ASSERT_EQ(true_data
.lower_bound(k
), true_data
.end());
1422 std::string prop_value
;
1424 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1425 ASSERT_EQ("1", prop_value
);
1426 keys_slices
.push_back(iter
->key());
1427 true_keys
.push_back(true_data
.lower_bound(k
)->first
);
1430 for (size_t i
= 0; i
< keys_slices
.size(); i
++) {
1431 ASSERT_EQ(keys_slices
[i
].ToString(), true_keys
[i
]);
1436 // Test SeekForPrev to random keys
1437 std::vector
<Slice
> keys_slices
;
1438 std::vector
<std::string
> true_keys
;
1439 for (auto& k
: random_keys
) {
1440 iter
->SeekForPrev(k
);
1441 if (!iter
->Valid()) {
1442 ASSERT_EQ(true_data
.upper_bound(k
), true_data
.begin());
1445 std::string prop_value
;
1447 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1448 ASSERT_EQ("1", prop_value
);
1449 keys_slices
.push_back(iter
->key());
1450 true_keys
.push_back((--true_data
.upper_bound(k
))->first
);
1453 for (size_t i
= 0; i
< keys_slices
.size(); i
++) {
1454 ASSERT_EQ(keys_slices
[i
].ToString(), true_keys
[i
]);
1459 // Test iterating all data forward
1460 std::vector
<Slice
> all_keys
;
1461 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1462 std::string prop_value
;
1464 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1465 ASSERT_EQ("1", prop_value
);
1466 all_keys
.push_back(iter
->key());
1468 ASSERT_EQ(all_keys
.size(), true_data
.size());
1470 // Verify that all keys slices are valid
1471 auto data_iter
= true_data
.begin();
1472 for (size_t i
= 0; i
< all_keys
.size(); i
++) {
1473 ASSERT_EQ(all_keys
[i
].ToString(), data_iter
->first
);
1479 // Test iterating all data backward
1480 std::vector
<Slice
> all_keys
;
1481 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
1482 std::string prop_value
;
1484 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1485 ASSERT_EQ("1", prop_value
);
1486 all_keys
.push_back(iter
->key());
1488 ASSERT_EQ(all_keys
.size(), true_data
.size());
1490 // Verify that all keys slices are valid (backward)
1491 auto data_iter
= true_data
.rbegin();
1492 for (size_t i
= 0; i
< all_keys
.size(); i
++) {
1493 ASSERT_EQ(all_keys
[i
].ToString(), data_iter
->first
);
1502 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedNormal
) {
1503 PinnedDataIteratorRandomized(TestConfig::NORMAL
);
1506 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedCLoseAndOpen
) {
1507 PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN
);
1510 TEST_P(DBIteratorTestForPinnedData
,
1511 PinnedDataIteratorRandomizedCompactBeforeRead
) {
1512 PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ
);
1515 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedFlush
) {
1516 PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000
);
1519 #ifndef ROCKSDB_LITE
1520 TEST_P(DBIteratorTest
, PinnedDataIteratorMultipleFiles
) {
1521 Options options
= CurrentOptions();
1522 BlockBasedTableOptions table_options
;
1523 table_options
.use_delta_encoding
= false;
1524 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1525 options
.disable_auto_compactions
= true;
1526 options
.write_buffer_size
= 1024 * 1024 * 10; // 10 Mb
1527 DestroyAndReopen(options
);
1529 std::map
<std::string
, std::string
> true_data
;
1531 // Generate 4 sst files in L2
1533 for (int i
= 1; i
<= 1000; i
++) {
1534 std::string k
= Key(i
* 3);
1535 std::string v
= rnd
.RandomString(100);
1536 ASSERT_OK(Put(k
, v
));
1542 ASSERT_EQ(FilesPerLevel(0), "4");
1543 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1544 ASSERT_EQ(FilesPerLevel(0), "0,4");
1546 // Generate 4 sst files in L0
1547 for (int i
= 1; i
<= 1000; i
++) {
1548 std::string k
= Key(i
* 2);
1549 std::string v
= rnd
.RandomString(100);
1550 ASSERT_OK(Put(k
, v
));
1556 ASSERT_EQ(FilesPerLevel(0), "4,4");
1558 // Add some keys/values in memtables
1559 for (int i
= 1; i
<= 1000; i
++) {
1560 std::string k
= Key(i
);
1561 std::string v
= rnd
.RandomString(100);
1562 ASSERT_OK(Put(k
, v
));
1565 ASSERT_EQ(FilesPerLevel(0), "4,4");
1569 auto iter
= NewIterator(ro
);
1571 std::vector
<std::pair
<Slice
, std::string
>> results
;
1572 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1573 std::string prop_value
;
1574 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1575 ASSERT_EQ("1", prop_value
);
1576 results
.emplace_back(iter
->key(), iter
->value().ToString());
1579 ASSERT_EQ(results
.size(), true_data
.size());
1580 auto data_iter
= true_data
.begin();
1581 for (size_t i
= 0; i
< results
.size(); i
++, data_iter
++) {
1582 auto& kv
= results
[i
];
1583 ASSERT_EQ(kv
.first
, data_iter
->first
);
1584 ASSERT_EQ(kv
.second
, data_iter
->second
);
1591 TEST_P(DBIteratorTest
, PinnedDataIteratorMergeOperator
) {
1592 Options options
= CurrentOptions();
1593 BlockBasedTableOptions table_options
;
1594 table_options
.use_delta_encoding
= false;
1595 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1596 options
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
1597 DestroyAndReopen(options
);
1599 std::string numbers
[7];
1600 for (int val
= 0; val
<= 6; val
++) {
1601 PutFixed64(numbers
+ val
, val
);
1604 // +1 all keys in range [ 0 => 999]
1605 for (int i
= 0; i
< 1000; i
++) {
1607 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[1]));
1610 // +2 all keys divisible by 2 in range [ 0 => 999]
1611 for (int i
= 0; i
< 1000; i
+= 2) {
1613 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[2]));
1616 // +3 all keys divisible by 5 in range [ 0 => 999]
1617 for (int i
= 0; i
< 1000; i
+= 5) {
1619 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[3]));
1624 auto iter
= NewIterator(ro
);
1626 std::vector
<std::pair
<Slice
, std::string
>> results
;
1627 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1628 std::string prop_value
;
1629 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1630 ASSERT_EQ("1", prop_value
);
1631 results
.emplace_back(iter
->key(), iter
->value().ToString());
1634 ASSERT_EQ(results
.size(), 1000);
1635 for (size_t i
= 0; i
< results
.size(); i
++) {
1636 auto& kv
= results
[i
];
1637 ASSERT_EQ(kv
.first
, Key(static_cast<int>(i
)));
1638 int expected_val
= 1;
1645 ASSERT_EQ(kv
.second
, numbers
[expected_val
]);
1651 TEST_P(DBIteratorTest
, PinnedDataIteratorReadAfterUpdate
) {
1652 Options options
= CurrentOptions();
1653 BlockBasedTableOptions table_options
;
1654 table_options
.use_delta_encoding
= false;
1655 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1656 options
.write_buffer_size
= 100000;
1657 DestroyAndReopen(options
);
1661 std::map
<std::string
, std::string
> true_data
;
1662 for (int i
= 0; i
< 1000; i
++) {
1663 std::string k
= rnd
.RandomString(10);
1664 std::string v
= rnd
.RandomString(1000);
1665 ASSERT_OK(Put(k
, v
));
1671 auto iter
= NewIterator(ro
);
1673 // Delete 50% of the keys and update the other 50%
1674 for (auto& kv
: true_data
) {
1676 ASSERT_OK(Delete(kv
.first
));
1678 std::string new_val
= rnd
.RandomString(1000);
1679 ASSERT_OK(Put(kv
.first
, new_val
));
1683 std::vector
<std::pair
<Slice
, std::string
>> results
;
1684 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1685 std::string prop_value
;
1686 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1687 ASSERT_EQ("1", prop_value
);
1688 results
.emplace_back(iter
->key(), iter
->value().ToString());
1691 auto data_iter
= true_data
.begin();
1692 for (size_t i
= 0; i
< results
.size(); i
++, data_iter
++) {
1693 auto& kv
= results
[i
];
1694 ASSERT_EQ(kv
.first
, data_iter
->first
);
1695 ASSERT_EQ(kv
.second
, data_iter
->second
);
1701 class SliceTransformLimitedDomainGeneric
: public SliceTransform
{
1702 const char* Name() const override
{
1703 return "SliceTransformLimitedDomainGeneric";
1706 Slice
Transform(const Slice
& src
) const override
{
1707 return Slice(src
.data(), 1);
1710 bool InDomain(const Slice
& src
) const override
{
1711 // prefix will be x????
1712 return src
.size() >= 1;
1715 bool InRange(const Slice
& dst
) const override
{
1716 // prefix will be x????
1717 return dst
.size() == 1;
1721 TEST_P(DBIteratorTest
, IterSeekForPrevCrossingFiles
) {
1722 Options options
= CurrentOptions();
1723 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
1724 options
.disable_auto_compactions
= true;
1725 // Enable prefix bloom for SST files
1726 BlockBasedTableOptions table_options
;
1727 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10, true));
1728 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1729 DestroyAndReopen(options
);
1731 ASSERT_OK(Put("a1", "va1"));
1732 ASSERT_OK(Put("a2", "va2"));
1733 ASSERT_OK(Put("a3", "va3"));
1736 ASSERT_OK(Put("b1", "vb1"));
1737 ASSERT_OK(Put("b2", "vb2"));
1738 ASSERT_OK(Put("b3", "vb3"));
1741 ASSERT_OK(Put("b4", "vb4"));
1742 ASSERT_OK(Put("d1", "vd1"));
1743 ASSERT_OK(Put("d2", "vd2"));
1744 ASSERT_OK(Put("d4", "vd4"));
1747 MoveFilesToLevel(1);
1750 Iterator
* iter
= NewIterator(ro
);
1752 iter
->SeekForPrev("a4");
1753 ASSERT_EQ(iter
->key().ToString(), "a3");
1754 ASSERT_EQ(iter
->value().ToString(), "va3");
1756 iter
->SeekForPrev("c2");
1757 ASSERT_EQ(iter
->key().ToString(), "b3");
1758 iter
->SeekForPrev("d3");
1759 ASSERT_EQ(iter
->key().ToString(), "d2");
1760 iter
->SeekForPrev("b5");
1761 ASSERT_EQ(iter
->key().ToString(), "b4");
1767 ro
.prefix_same_as_start
= true;
1768 Iterator
* iter
= NewIterator(ro
);
1769 iter
->SeekForPrev("c2");
1770 ASSERT_TRUE(!iter
->Valid());
1775 TEST_P(DBIteratorTest
, IterSeekForPrevCrossingFilesCustomPrefixExtractor
) {
1776 Options options
= CurrentOptions();
1777 options
.prefix_extractor
=
1778 std::make_shared
<SliceTransformLimitedDomainGeneric
>();
1779 options
.disable_auto_compactions
= true;
1780 // Enable prefix bloom for SST files
1781 BlockBasedTableOptions table_options
;
1782 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10, true));
1783 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1784 DestroyAndReopen(options
);
1786 ASSERT_OK(Put("a1", "va1"));
1787 ASSERT_OK(Put("a2", "va2"));
1788 ASSERT_OK(Put("a3", "va3"));
1791 ASSERT_OK(Put("b1", "vb1"));
1792 ASSERT_OK(Put("b2", "vb2"));
1793 ASSERT_OK(Put("b3", "vb3"));
1796 ASSERT_OK(Put("b4", "vb4"));
1797 ASSERT_OK(Put("d1", "vd1"));
1798 ASSERT_OK(Put("d2", "vd2"));
1799 ASSERT_OK(Put("d4", "vd4"));
1802 MoveFilesToLevel(1);
1805 Iterator
* iter
= NewIterator(ro
);
1807 iter
->SeekForPrev("a4");
1808 ASSERT_EQ(iter
->key().ToString(), "a3");
1809 ASSERT_EQ(iter
->value().ToString(), "va3");
1811 iter
->SeekForPrev("c2");
1812 ASSERT_EQ(iter
->key().ToString(), "b3");
1813 iter
->SeekForPrev("d3");
1814 ASSERT_EQ(iter
->key().ToString(), "d2");
1815 iter
->SeekForPrev("b5");
1816 ASSERT_EQ(iter
->key().ToString(), "b4");
1822 ro
.prefix_same_as_start
= true;
1823 Iterator
* iter
= NewIterator(ro
);
1824 iter
->SeekForPrev("c2");
1825 ASSERT_TRUE(!iter
->Valid());
1830 TEST_P(DBIteratorTest
, IterPrevKeyCrossingBlocks
) {
1831 Options options
= CurrentOptions();
1832 BlockBasedTableOptions table_options
;
1833 table_options
.block_size
= 1; // every block will contain one entry
1834 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1835 options
.merge_operator
= MergeOperators::CreateStringAppendTESTOperator();
1836 options
.disable_auto_compactions
= true;
1837 options
.max_sequential_skip_in_iterations
= 8;
1839 DestroyAndReopen(options
);
1841 // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1842 for (int file_num
= 0; file_num
< 10; file_num
++) {
1843 ASSERT_OK(Delete("key4"));
1847 // First File containing 5 blocks of puts
1848 ASSERT_OK(Put("key1", "val1.0"));
1849 ASSERT_OK(Put("key2", "val2.0"));
1850 ASSERT_OK(Put("key3", "val3.0"));
1851 ASSERT_OK(Put("key4", "val4.0"));
1852 ASSERT_OK(Put("key5", "val5.0"));
1855 // Second file containing 9 blocks of merge operands
1856 ASSERT_OK(db_
->Merge(WriteOptions(), "key1", "val1.1"));
1857 ASSERT_OK(db_
->Merge(WriteOptions(), "key1", "val1.2"));
1859 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.1"));
1860 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.2"));
1861 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.3"));
1863 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.1"));
1864 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.2"));
1865 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.3"));
1866 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.4"));
1871 ro
.fill_cache
= false;
1872 Iterator
* iter
= NewIterator(ro
);
1875 ASSERT_EQ(iter
->key().ToString(), "key5");
1876 ASSERT_EQ(iter
->value().ToString(), "val5.0");
1879 ASSERT_EQ(iter
->key().ToString(), "key4");
1880 ASSERT_EQ(iter
->value().ToString(), "val4.0");
1883 ASSERT_EQ(iter
->key().ToString(), "key3");
1884 ASSERT_EQ(iter
->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1887 ASSERT_EQ(iter
->key().ToString(), "key2");
1888 ASSERT_EQ(iter
->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1891 ASSERT_EQ(iter
->key().ToString(), "key1");
1892 ASSERT_EQ(iter
->value().ToString(), "val1.0,val1.1,val1.2");
1898 TEST_P(DBIteratorTest
, IterPrevKeyCrossingBlocksRandomized
) {
1899 Options options
= CurrentOptions();
1900 options
.merge_operator
= MergeOperators::CreateStringAppendTESTOperator();
1901 options
.disable_auto_compactions
= true;
1902 options
.level0_slowdown_writes_trigger
= (1 << 30);
1903 options
.level0_stop_writes_trigger
= (1 << 30);
1904 options
.max_sequential_skip_in_iterations
= 8;
1905 DestroyAndReopen(options
);
1907 const int kNumKeys
= 500;
1908 // Small number of merge operands to make sure that DBIter::Prev() don't
1909 // fall back to Seek()
1910 const int kNumMergeOperands
= 3;
1911 // Use value size that will make sure that every block contain 1 key
1912 const int kValSize
=
1913 static_cast<int>(BlockBasedTableOptions().block_size
) * 4;
1914 // Percentage of keys that wont get merge operations
1915 const int kNoMergeOpPercentage
= 20;
1916 // Percentage of keys that will be deleted
1917 const int kDeletePercentage
= 10;
1919 // For half of the key range we will write multiple deletes first to
1920 // force DBIter::Prev() to fall back to Seek()
1921 for (int file_num
= 0; file_num
< 10; file_num
++) {
1922 for (int i
= 0; i
< kNumKeys
; i
+= 2) {
1923 ASSERT_OK(Delete(Key(i
)));
1929 std::map
<std::string
, std::string
> true_data
;
1930 std::string gen_key
;
1931 std::string gen_val
;
1933 for (int i
= 0; i
< kNumKeys
; i
++) {
1935 gen_val
= rnd
.RandomString(kValSize
);
1937 ASSERT_OK(Put(gen_key
, gen_val
));
1938 true_data
[gen_key
] = gen_val
;
1942 // Separate values and merge operands in different file so that we
1943 // make sure that we don't merge them while flushing but actually
1944 // merge them in the read path
1945 for (int i
= 0; i
< kNumKeys
; i
++) {
1946 if (rnd
.PercentTrue(kNoMergeOpPercentage
)) {
1947 // Dont give merge operations for some keys
1951 for (int j
= 0; j
< kNumMergeOperands
; j
++) {
1953 gen_val
= rnd
.RandomString(kValSize
);
1955 ASSERT_OK(db_
->Merge(WriteOptions(), gen_key
, gen_val
));
1956 true_data
[gen_key
] += "," + gen_val
;
1961 for (int i
= 0; i
< kNumKeys
; i
++) {
1962 if (rnd
.PercentTrue(kDeletePercentage
)) {
1965 ASSERT_OK(Delete(gen_key
));
1966 true_data
.erase(gen_key
);
1973 ro
.fill_cache
= false;
1974 Iterator
* iter
= NewIterator(ro
);
1975 auto data_iter
= true_data
.rbegin();
1977 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
1978 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
1979 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
1982 ASSERT_EQ(data_iter
, true_data
.rend());
1989 ro
.fill_cache
= false;
1990 Iterator
* iter
= NewIterator(ro
);
1991 auto data_iter
= true_data
.rbegin();
1993 int entries_right
= 0;
1994 std::string seek_key
;
1995 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
1996 // Verify key/value of current position
1997 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
1998 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2000 bool restore_position_with_seek
= rnd
.Uniform(2);
2001 if (restore_position_with_seek
) {
2002 seek_key
= iter
->key().ToString();
2005 // Do some Next() operations the restore the iterator to orignal position
2007 entries_right
> 0 ? rnd
.Uniform(std::min(entries_right
, 10)) : 0;
2008 for (int i
= 0; i
< next_count
; i
++) {
2012 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2013 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2016 if (restore_position_with_seek
) {
2017 // Restore orignal position using Seek()
2018 iter
->Seek(seek_key
);
2019 for (int i
= 0; i
< next_count
; i
++) {
2023 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2024 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2026 // Restore original position using Prev()
2027 for (int i
= 0; i
< next_count
; i
++) {
2031 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2032 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2039 ASSERT_EQ(data_iter
, true_data
.rend());
2045 TEST_P(DBIteratorTest
, IteratorWithLocalStatistics
) {
2046 Options options
= CurrentOptions();
2047 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2048 DestroyAndReopen(options
);
2051 for (int i
= 0; i
< 1000; i
++) {
2052 // Key 10 bytes / Value 10 bytes
2053 ASSERT_OK(Put(rnd
.RandomString(10), rnd
.RandomString(10)));
2056 std::atomic
<uint64_t> total_next(0);
2057 std::atomic
<uint64_t> total_next_found(0);
2058 std::atomic
<uint64_t> total_prev(0);
2059 std::atomic
<uint64_t> total_prev_found(0);
2060 std::atomic
<uint64_t> total_bytes(0);
2062 std::vector
<port::Thread
> threads
;
2063 std::function
<void()> reader_func_next
= [&]() {
2064 SetPerfLevel(kEnableCount
);
2065 get_perf_context()->Reset();
2066 Iterator
* iter
= NewIterator(ReadOptions());
2068 iter
->SeekToFirst();
2069 // Seek will bump ITER_BYTES_READ
2071 bytes
+= iter
->key().size();
2072 bytes
+= iter
->value().size();
2077 if (!iter
->Valid()) {
2081 bytes
+= iter
->key().size();
2082 bytes
+= iter
->value().size();
2086 ASSERT_EQ(bytes
, get_perf_context()->iter_read_bytes
);
2087 SetPerfLevel(kDisable
);
2088 total_bytes
+= bytes
;
2091 std::function
<void()> reader_func_prev
= [&]() {
2092 SetPerfLevel(kEnableCount
);
2093 Iterator
* iter
= NewIterator(ReadOptions());
2096 // Seek will bump ITER_BYTES_READ
2098 bytes
+= iter
->key().size();
2099 bytes
+= iter
->value().size();
2104 if (!iter
->Valid()) {
2108 bytes
+= iter
->key().size();
2109 bytes
+= iter
->value().size();
2113 ASSERT_EQ(bytes
, get_perf_context()->iter_read_bytes
);
2114 SetPerfLevel(kDisable
);
2115 total_bytes
+= bytes
;
2118 for (int i
= 0; i
< 10; i
++) {
2119 threads
.emplace_back(reader_func_next
);
2121 for (int i
= 0; i
< 15; i
++) {
2122 threads
.emplace_back(reader_func_prev
);
2125 for (auto& t
: threads
) {
2129 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_NEXT
), (uint64_t)total_next
);
2130 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_NEXT_FOUND
),
2131 (uint64_t)total_next_found
);
2132 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_PREV
), (uint64_t)total_prev
);
2133 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_PREV_FOUND
),
2134 (uint64_t)total_prev_found
);
2135 ASSERT_EQ(TestGetTickerCount(options
, ITER_BYTES_READ
), (uint64_t)total_bytes
);
2139 TEST_P(DBIteratorTest
, ReadAhead
) {
2141 env_
->count_random_reads_
= true;
2143 options
.disable_auto_compactions
= true;
2144 options
.write_buffer_size
= 4 << 20;
2145 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2146 BlockBasedTableOptions table_options
;
2147 table_options
.block_size
= 1024;
2148 table_options
.no_block_cache
= true;
2149 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2152 std::string
value(1024, 'a');
2153 for (int i
= 0; i
< 100; i
++) {
2157 MoveFilesToLevel(2);
2159 for (int i
= 0; i
< 100; i
++) {
2163 MoveFilesToLevel(1);
2165 for (int i
= 0; i
< 100; i
++) {
2169 #ifndef ROCKSDB_LITE
2170 ASSERT_EQ("1,1,1", FilesPerLevel());
2171 #endif // !ROCKSDB_LITE
2173 env_
->random_read_bytes_counter_
= 0;
2174 options
.statistics
->setTickerCount(NO_FILE_OPENS
, 0);
2175 ReadOptions read_options
;
2176 auto* iter
= NewIterator(read_options
);
2177 iter
->SeekToFirst();
2178 int64_t num_file_opens
= TestGetTickerCount(options
, NO_FILE_OPENS
);
2179 size_t bytes_read
= env_
->random_read_bytes_counter_
;
2182 int64_t num_file_closes
= TestGetTickerCount(options
, NO_FILE_CLOSES
);
2183 env_
->random_read_bytes_counter_
= 0;
2184 options
.statistics
->setTickerCount(NO_FILE_OPENS
, 0);
2185 read_options
.readahead_size
= 1024 * 10;
2186 iter
= NewIterator(read_options
);
2187 iter
->SeekToFirst();
2188 int64_t num_file_opens_readahead
= TestGetTickerCount(options
, NO_FILE_OPENS
);
2189 size_t bytes_read_readahead
= env_
->random_read_bytes_counter_
;
2191 int64_t num_file_closes_readahead
=
2192 TestGetTickerCount(options
, NO_FILE_CLOSES
);
2193 ASSERT_EQ(num_file_opens
, num_file_opens_readahead
);
2194 ASSERT_EQ(num_file_closes
, num_file_closes_readahead
);
2195 ASSERT_GT(bytes_read_readahead
, bytes_read
);
2196 ASSERT_GT(bytes_read_readahead
, read_options
.readahead_size
* 3);
2198 // Verify correctness.
2199 iter
= NewIterator(read_options
);
2201 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
2202 ASSERT_EQ(value
, iter
->value());
2205 ASSERT_EQ(100, count
);
2206 for (int i
= 0; i
< 100; i
++) {
2208 ASSERT_EQ(value
, iter
->value());
2213 // Insert a key, create a snapshot iterator, overwrite key lots of times,
2214 // seek to a smaller key. Expect DBIter to fall back to a seek instead of
2215 // going through all the overwrites linearly.
2216 TEST_P(DBIteratorTest
, DBIteratorSkipRecentDuplicatesTest
) {
2217 Options options
= CurrentOptions();
2219 options
.create_if_missing
= true;
2220 options
.max_sequential_skip_in_iterations
= 3;
2221 options
.prefix_extractor
= nullptr;
2222 options
.write_buffer_size
= 1 << 27; // big enough to avoid flush
2223 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2224 DestroyAndReopen(options
);
2227 ASSERT_OK(Put("b", "0"));
2231 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
2234 for (int i
= 0; i
< 100; ++i
) {
2235 ASSERT_OK(Put("b", std::to_string(i
+ 1).c_str()));
2238 #ifndef ROCKSDB_LITE
2239 // Check that memtable wasn't flushed.
2241 ASSERT_TRUE(db_
->GetProperty("rocksdb.num-files-at-level0", &val
));
2242 EXPECT_EQ("0", val
);
2245 // Seek iterator to a smaller key.
2246 get_perf_context()->Reset();
2248 ASSERT_TRUE(iter
->Valid());
2249 EXPECT_EQ("b", iter
->key().ToString());
2250 EXPECT_EQ("0", iter
->value().ToString());
2252 // Check that the seek didn't do too much work.
2253 // Checks are not tight, just make sure that everything is well below 100.
2254 EXPECT_LT(get_perf_context()->internal_key_skipped_count
, 4);
2255 EXPECT_LT(get_perf_context()->internal_recent_skipped_count
, 8);
2256 EXPECT_LT(get_perf_context()->seek_on_memtable_count
, 10);
2257 EXPECT_LT(get_perf_context()->next_on_memtable_count
, 10);
2258 EXPECT_LT(get_perf_context()->prev_on_memtable_count
, 10);
2260 // Check that iterator did something like what we expect.
2261 EXPECT_EQ(get_perf_context()->internal_delete_skipped_count
, 0);
2262 EXPECT_EQ(get_perf_context()->internal_merge_count
, 0);
2263 EXPECT_GE(get_perf_context()->internal_recent_skipped_count
, 2);
2264 EXPECT_GE(get_perf_context()->seek_on_memtable_count
, 2);
2265 EXPECT_EQ(1, options
.statistics
->getTickerCount(
2266 NUMBER_OF_RESEEKS_IN_ITERATION
));
2269 TEST_P(DBIteratorTest
, Refresh
) {
2270 ASSERT_OK(Put("x", "y"));
2272 std::unique_ptr
<Iterator
> iter(NewIterator(ReadOptions()));
2273 iter
->Seek(Slice("a"));
2274 ASSERT_TRUE(iter
->Valid());
2275 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2277 ASSERT_FALSE(iter
->Valid());
2279 ASSERT_OK(Put("c", "d"));
2281 iter
->Seek(Slice("a"));
2282 ASSERT_TRUE(iter
->Valid());
2283 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2285 ASSERT_FALSE(iter
->Valid());
2289 iter
->Seek(Slice("a"));
2290 ASSERT_TRUE(iter
->Valid());
2291 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2293 ASSERT_TRUE(iter
->Valid());
2294 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2296 ASSERT_FALSE(iter
->Valid());
2298 dbfull()->Flush(FlushOptions());
2300 ASSERT_OK(Put("m", "n"));
2302 iter
->Seek(Slice("a"));
2303 ASSERT_TRUE(iter
->Valid());
2304 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2306 ASSERT_TRUE(iter
->Valid());
2307 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2309 ASSERT_FALSE(iter
->Valid());
2313 iter
->Seek(Slice("a"));
2314 ASSERT_TRUE(iter
->Valid());
2315 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2317 ASSERT_TRUE(iter
->Valid());
2318 ASSERT_EQ(iter
->key().compare(Slice("m")), 0);
2320 ASSERT_TRUE(iter
->Valid());
2321 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2323 ASSERT_FALSE(iter
->Valid());
2328 TEST_P(DBIteratorTest
, RefreshWithSnapshot
) {
2329 ASSERT_OK(Put("x", "y"));
2330 const Snapshot
* snapshot
= db_
->GetSnapshot();
2331 ReadOptions options
;
2332 options
.snapshot
= snapshot
;
2333 Iterator
* iter
= NewIterator(options
);
2335 iter
->Seek(Slice("a"));
2336 ASSERT_TRUE(iter
->Valid());
2337 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2339 ASSERT_FALSE(iter
->Valid());
2341 ASSERT_OK(Put("c", "d"));
2343 iter
->Seek(Slice("a"));
2344 ASSERT_TRUE(iter
->Valid());
2345 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2347 ASSERT_FALSE(iter
->Valid());
2350 s
= iter
->Refresh();
2351 ASSERT_TRUE(s
.IsNotSupported());
2352 db_
->ReleaseSnapshot(snapshot
);
2356 TEST_P(DBIteratorTest
, CreationFailure
) {
2357 SyncPoint::GetInstance()->SetCallBack(
2358 "DBImpl::NewInternalIterator:StatusCallback", [](void* arg
) {
2359 *(reinterpret_cast<Status
*>(arg
)) = Status::Corruption("test status");
2361 SyncPoint::GetInstance()->EnableProcessing();
2363 Iterator
* iter
= NewIterator(ReadOptions());
2364 ASSERT_FALSE(iter
->Valid());
2365 ASSERT_TRUE(iter
->status().IsCorruption());
2369 TEST_P(DBIteratorTest
, UpperBoundWithChangeDirection
) {
2370 Options options
= CurrentOptions();
2371 options
.max_sequential_skip_in_iterations
= 3;
2372 DestroyAndReopen(options
);
2374 // write a bunch of kvs to the database.
2375 ASSERT_OK(Put("a", "1"));
2376 ASSERT_OK(Put("y", "1"));
2377 ASSERT_OK(Put("y1", "1"));
2378 ASSERT_OK(Put("y2", "1"));
2379 ASSERT_OK(Put("y3", "1"));
2380 ASSERT_OK(Put("z", "1"));
2382 ASSERT_OK(Put("a", "1"));
2383 ASSERT_OK(Put("z", "1"));
2384 ASSERT_OK(Put("bar", "1"));
2385 ASSERT_OK(Put("foo", "1"));
2387 std::string upper_bound
= "x";
2388 Slice
ub_slice(upper_bound
);
2390 ro
.iterate_upper_bound
= &ub_slice
;
2391 ro
.max_skippable_internal_keys
= 1000;
2393 Iterator
* iter
= NewIterator(ro
);
2395 ASSERT_TRUE(iter
->Valid());
2396 ASSERT_EQ("foo", iter
->key().ToString());
2399 ASSERT_TRUE(iter
->Valid());
2400 ASSERT_OK(iter
->status());
2401 ASSERT_EQ("bar", iter
->key().ToString());
2406 TEST_P(DBIteratorTest
, TableFilter
) {
2407 ASSERT_OK(Put("a", "1"));
2408 dbfull()->Flush(FlushOptions());
2409 ASSERT_OK(Put("b", "2"));
2410 ASSERT_OK(Put("c", "3"));
2411 dbfull()->Flush(FlushOptions());
2412 ASSERT_OK(Put("d", "4"));
2413 ASSERT_OK(Put("e", "5"));
2414 ASSERT_OK(Put("f", "6"));
2415 dbfull()->Flush(FlushOptions());
2417 // Ensure the table_filter callback is called once for each table.
2419 std::set
<uint64_t> unseen
{1, 2, 3};
2421 opts
.table_filter
= [&](const TableProperties
& props
) {
2422 auto it
= unseen
.find(props
.num_entries
);
2423 if (it
== unseen
.end()) {
2424 ADD_FAILURE() << "saw table properties with an unexpected "
2425 << props
.num_entries
<< " entries";
2431 auto iter
= NewIterator(opts
);
2432 iter
->SeekToFirst();
2433 ASSERT_EQ(IterStatus(iter
), "a->1");
2435 ASSERT_EQ(IterStatus(iter
), "b->2");
2437 ASSERT_EQ(IterStatus(iter
), "c->3");
2439 ASSERT_EQ(IterStatus(iter
), "d->4");
2441 ASSERT_EQ(IterStatus(iter
), "e->5");
2443 ASSERT_EQ(IterStatus(iter
), "f->6");
2445 ASSERT_FALSE(iter
->Valid());
2446 ASSERT_TRUE(unseen
.empty());
2450 // Ensure returning false in the table_filter hides the keys from that table
2451 // during iteration.
2454 opts
.table_filter
= [](const TableProperties
& props
) {
2455 return props
.num_entries
!= 2;
2457 auto iter
= NewIterator(opts
);
2458 iter
->SeekToFirst();
2459 ASSERT_EQ(IterStatus(iter
), "a->1");
2461 ASSERT_EQ(IterStatus(iter
), "d->4");
2463 ASSERT_EQ(IterStatus(iter
), "e->5");
2465 ASSERT_EQ(IterStatus(iter
), "f->6");
2467 ASSERT_FALSE(iter
->Valid());
2472 TEST_P(DBIteratorTest
, UpperBoundWithPrevReseek
) {
2473 Options options
= CurrentOptions();
2474 options
.max_sequential_skip_in_iterations
= 3;
2475 DestroyAndReopen(options
);
2477 // write a bunch of kvs to the database.
2478 ASSERT_OK(Put("a", "1"));
2479 ASSERT_OK(Put("y", "1"));
2480 ASSERT_OK(Put("z", "1"));
2482 ASSERT_OK(Put("a", "1"));
2483 ASSERT_OK(Put("z", "1"));
2484 ASSERT_OK(Put("bar", "1"));
2485 ASSERT_OK(Put("foo", "1"));
2486 ASSERT_OK(Put("foo", "2"));
2488 ASSERT_OK(Put("foo", "3"));
2489 ASSERT_OK(Put("foo", "4"));
2490 ASSERT_OK(Put("foo", "5"));
2491 const Snapshot
* snapshot
= db_
->GetSnapshot();
2492 ASSERT_OK(Put("foo", "6"));
2494 std::string upper_bound
= "x";
2495 Slice
ub_slice(upper_bound
);
2497 ro
.snapshot
= snapshot
;
2498 ro
.iterate_upper_bound
= &ub_slice
;
2500 Iterator
* iter
= NewIterator(ro
);
2501 iter
->SeekForPrev("goo");
2502 ASSERT_TRUE(iter
->Valid());
2503 ASSERT_EQ("foo", iter
->key().ToString());
2506 ASSERT_TRUE(iter
->Valid());
2507 ASSERT_EQ("bar", iter
->key().ToString());
2510 db_
->ReleaseSnapshot(snapshot
);
2513 TEST_P(DBIteratorTest
, SkipStatistics
) {
2514 Options options
= CurrentOptions();
2515 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2516 DestroyAndReopen(options
);
2520 // write a bunch of kvs to the database.
2521 ASSERT_OK(Put("a", "1"));
2522 ASSERT_OK(Put("b", "1"));
2523 ASSERT_OK(Put("c", "1"));
2525 ASSERT_OK(Put("d", "1"));
2526 ASSERT_OK(Put("e", "1"));
2527 ASSERT_OK(Put("f", "1"));
2528 ASSERT_OK(Put("a", "2"));
2529 ASSERT_OK(Put("b", "2"));
2531 ASSERT_OK(Delete("d"));
2532 ASSERT_OK(Delete("e"));
2533 ASSERT_OK(Delete("f"));
2535 Iterator
* iter
= NewIterator(ReadOptions());
2537 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
2538 ASSERT_OK(iter
->status());
2541 ASSERT_EQ(count
, 3);
2543 skip_count
+= 8; // 3 deletes + 3 original keys + 2 lower in sequence
2544 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2546 iter
= NewIterator(ReadOptions());
2548 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2549 ASSERT_OK(iter
->status());
2552 ASSERT_EQ(count
, 3);
2554 skip_count
+= 8; // Same as above, but in reverse order
2555 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2557 ASSERT_OK(Put("aa", "1"));
2558 ASSERT_OK(Put("ab", "1"));
2559 ASSERT_OK(Put("ac", "1"));
2560 ASSERT_OK(Put("ad", "1"));
2562 ASSERT_OK(Delete("ab"));
2563 ASSERT_OK(Delete("ac"));
2564 ASSERT_OK(Delete("ad"));
2568 ro
.iterate_upper_bound
= &prefix
;
2570 iter
= NewIterator(ro
);
2572 for(iter
->Seek("aa"); iter
->Valid(); iter
->Next()) {
2573 ASSERT_OK(iter
->status());
2576 ASSERT_EQ(count
, 1);
2578 skip_count
+= 6; // 3 deletes + 3 original keys
2579 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2581 iter
= NewIterator(ro
);
2583 for(iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2584 ASSERT_OK(iter
->status());
2587 ASSERT_EQ(count
, 2);
2589 // 3 deletes + 3 original keys + lower sequence of "a"
2591 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2594 TEST_P(DBIteratorTest
, SeekAfterHittingManyInternalKeys
) {
2595 Options options
= CurrentOptions();
2596 DestroyAndReopen(options
);
2598 ropts
.max_skippable_internal_keys
= 2;
2601 // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2608 std::unique_ptr
<Iterator
> iter(NewIterator(ropts
));
2609 iter
->SeekToFirst();
2611 ASSERT_TRUE(iter
->Valid());
2612 ASSERT_EQ(iter
->key().ToString(), "1");
2613 ASSERT_EQ(iter
->value().ToString(), "val_1");
2615 // This should fail as incomplete due to too many non-visible internal keys on
2616 // the way to the next valid user key.
2618 ASSERT_TRUE(!iter
->Valid());
2619 ASSERT_TRUE(iter
->status().IsIncomplete());
2621 // Get the internal key at which Next() failed.
2622 std::string prop_value
;
2623 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
2624 ASSERT_EQ("4", prop_value
);
2626 // Create a new iterator to seek to the internal key.
2627 std::unique_ptr
<Iterator
> iter2(NewIterator(ropts
));
2628 iter2
->Seek(prop_value
);
2629 ASSERT_TRUE(iter2
->Valid());
2630 ASSERT_OK(iter2
->status());
2632 ASSERT_EQ(iter2
->key().ToString(), "6");
2633 ASSERT_EQ(iter2
->value().ToString(), "val_6");
2636 // Reproduces a former bug where iterator would skip some records when DBIter
2637 // re-seeks subiterator with Incomplete status.
2638 TEST_P(DBIteratorTest
, NonBlockingIterationBugRepro
) {
2639 Options options
= CurrentOptions();
2640 BlockBasedTableOptions table_options
;
2641 // Make sure the sst file has more than one block.
2642 table_options
.flush_block_policy_factory
=
2643 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
2644 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2645 DestroyAndReopen(options
);
2647 // Two records in sst file, each in its own block.
2652 // Create a nonblocking iterator before writing to memtable.
2654 ropt
.read_tier
= kBlockCacheTier
;
2655 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
));
2657 // Overwrite a key in memtable many times to hit
2658 // max_sequential_skip_in_iterations (which is 8 by default).
2659 for (int i
= 0; i
< 20; ++i
) {
2663 // Load the second block in sst file into the block cache.
2665 std::unique_ptr
<Iterator
> iter2(NewIterator(ReadOptions()));
2669 // Finally seek the nonblocking iterator.
2671 // With the bug, the status used to be OK, and the iterator used to point to
2673 EXPECT_TRUE(iter
->status().IsIncomplete());
2676 TEST_P(DBIteratorTest
, SeekBackwardAfterOutOfUpperBound
) {
2683 ropt
.iterate_upper_bound
= &ub
;
2685 std::unique_ptr
<Iterator
> it(dbfull()->NewIterator(ropt
));
2686 it
->SeekForPrev("a");
2687 ASSERT_TRUE(it
->Valid());
2688 ASSERT_OK(it
->status());
2689 ASSERT_EQ("a", it
->key().ToString());
2691 ASSERT_FALSE(it
->Valid());
2692 ASSERT_OK(it
->status());
2693 it
->SeekForPrev("a");
2694 ASSERT_OK(it
->status());
2696 ASSERT_TRUE(it
->Valid());
2697 ASSERT_EQ("a", it
->key().ToString());
2700 TEST_P(DBIteratorTest
, AvoidReseekLevelIterator
) {
2701 Options options
= CurrentOptions();
2702 options
.compression
= CompressionType::kNoCompression
;
2703 BlockBasedTableOptions table_options
;
2704 table_options
.block_size
= 800;
2705 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2709 std::string random_str
= rnd
.RandomString(180);
2711 ASSERT_OK(Put("1", random_str
));
2712 ASSERT_OK(Put("2", random_str
));
2713 ASSERT_OK(Put("3", random_str
));
2714 ASSERT_OK(Put("4", random_str
));
2716 ASSERT_OK(Put("5", random_str
));
2717 ASSERT_OK(Put("6", random_str
));
2718 ASSERT_OK(Put("7", random_str
));
2720 ASSERT_OK(Put("8", random_str
));
2721 ASSERT_OK(Put("9", random_str
));
2723 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2725 int num_find_file_in_level
= 0;
2726 int num_idx_blk_seek
= 0;
2727 SyncPoint::GetInstance()->SetCallBack(
2728 "LevelIterator::Seek:BeforeFindFile",
2729 [&](void* /*arg*/) { num_find_file_in_level
++; });
2730 SyncPoint::GetInstance()->SetCallBack(
2731 "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek
++; });
2732 SyncPoint::GetInstance()->EnableProcessing();
2735 std::unique_ptr
<Iterator
> iter(NewIterator(ReadOptions()));
2737 ASSERT_TRUE(iter
->Valid());
2738 ASSERT_EQ(1, num_find_file_in_level
);
2739 ASSERT_EQ(1, num_idx_blk_seek
);
2742 ASSERT_TRUE(iter
->Valid());
2743 ASSERT_EQ(1, num_find_file_in_level
);
2744 ASSERT_EQ(1, num_idx_blk_seek
);
2747 ASSERT_TRUE(iter
->Valid());
2748 ASSERT_EQ(1, num_find_file_in_level
);
2749 ASSERT_EQ(1, num_idx_blk_seek
);
2752 ASSERT_TRUE(iter
->Valid());
2753 ASSERT_EQ(1, num_find_file_in_level
);
2754 ASSERT_EQ(1, num_idx_blk_seek
);
2757 ASSERT_TRUE(iter
->Valid());
2758 ASSERT_EQ(1, num_find_file_in_level
);
2759 ASSERT_EQ(2, num_idx_blk_seek
);
2762 ASSERT_TRUE(iter
->Valid());
2763 ASSERT_EQ(1, num_find_file_in_level
);
2764 ASSERT_EQ(2, num_idx_blk_seek
);
2767 ASSERT_TRUE(iter
->Valid());
2768 ASSERT_EQ(1, num_find_file_in_level
);
2769 ASSERT_EQ(3, num_idx_blk_seek
);
2772 ASSERT_TRUE(iter
->Valid());
2773 ASSERT_EQ(2, num_find_file_in_level
);
2774 // Still re-seek because "8" is the boundary key, which has
2775 // the same user key as the seek key.
2776 ASSERT_EQ(4, num_idx_blk_seek
);
2779 ASSERT_TRUE(iter
->Valid());
2780 ASSERT_EQ(3, num_find_file_in_level
);
2781 ASSERT_EQ(5, num_idx_blk_seek
);
2784 ASSERT_TRUE(iter
->Valid());
2785 ASSERT_EQ(3, num_find_file_in_level
);
2786 ASSERT_EQ(5, num_idx_blk_seek
);
2788 // Seek backward never triggers the index block seek to be skipped
2790 ASSERT_TRUE(iter
->Valid());
2791 ASSERT_EQ(3, num_find_file_in_level
);
2792 ASSERT_EQ(6, num_idx_blk_seek
);
2795 SyncPoint::GetInstance()->DisableProcessing();
2798 // MyRocks may change iterate bounds before seek. Simply test to make sure such
2799 // usage doesn't break iterator.
2800 TEST_P(DBIteratorTest
, IterateBoundChangedBeforeSeek
) {
2801 Options options
= CurrentOptions();
2802 options
.compression
= CompressionType::kNoCompression
;
2803 BlockBasedTableOptions table_options
;
2804 table_options
.block_size
= 100;
2805 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2806 std::string
value(50, 'v');
2808 ASSERT_OK(Put("aaa", value
));
2810 ASSERT_OK(Put("bbb", "v"));
2811 ASSERT_OK(Put("ccc", "v"));
2812 ASSERT_OK(Put("ddd", "v"));
2814 ASSERT_OK(Put("eee", "v"));
2816 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2818 std::string ub1
= "e";
2819 std::string ub2
= "c";
2821 ReadOptions read_opts1
;
2822 read_opts1
.iterate_upper_bound
= &ub
;
2823 Iterator
* iter
= NewIterator(read_opts1
);
2824 // Seek and iterate accross block boundary.
2826 ASSERT_TRUE(iter
->Valid());
2827 ASSERT_OK(iter
->status());
2828 ASSERT_EQ("bbb", iter
->key());
2831 ASSERT_TRUE(iter
->Valid());
2832 ASSERT_OK(iter
->status());
2833 ASSERT_EQ("bbb", iter
->key());
2835 ASSERT_FALSE(iter
->Valid());
2836 ASSERT_OK(iter
->status());
2839 std::string lb1
= "a";
2840 std::string lb2
= "c";
2842 ReadOptions read_opts2
;
2843 read_opts2
.iterate_lower_bound
= &lb
;
2844 iter
= NewIterator(read_opts2
);
2845 iter
->SeekForPrev("d");
2846 ASSERT_TRUE(iter
->Valid());
2847 ASSERT_OK(iter
->status());
2848 ASSERT_EQ("ccc", iter
->key());
2850 iter
->SeekForPrev("d");
2851 ASSERT_TRUE(iter
->Valid());
2852 ASSERT_OK(iter
->status());
2853 ASSERT_EQ("ccc", iter
->key());
2855 ASSERT_FALSE(iter
->Valid());
2856 ASSERT_OK(iter
->status());
2860 TEST_P(DBIteratorTest
, IterateWithLowerBoundAcrossFileBoundary
) {
2861 ASSERT_OK(Put("aaa", "v"));
2862 ASSERT_OK(Put("bbb", "v"));
2864 ASSERT_OK(Put("ccc", "v"));
2865 ASSERT_OK(Put("ddd", "v"));
2867 // Move both files to bottom level.
2868 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2869 Slice
lower_bound("b");
2870 ReadOptions read_opts
;
2871 read_opts
.iterate_lower_bound
= &lower_bound
;
2872 std::unique_ptr
<Iterator
> iter(NewIterator(read_opts
));
2873 iter
->SeekForPrev("d");
2874 ASSERT_TRUE(iter
->Valid());
2875 ASSERT_OK(iter
->status());
2876 ASSERT_EQ("ccc", iter
->key());
2878 ASSERT_TRUE(iter
->Valid());
2879 ASSERT_OK(iter
->status());
2880 ASSERT_EQ("bbb", iter
->key());
2882 ASSERT_FALSE(iter
->Valid());
2883 ASSERT_OK(iter
->status());
2886 INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance
, DBIteratorTest
,
2887 testing::Values(true, false));
2889 // Tests how DBIter work with ReadCallback
2890 class DBIteratorWithReadCallbackTest
: public DBIteratorTest
{};
2892 TEST_F(DBIteratorWithReadCallbackTest
, ReadCallback
) {
2893 class TestReadCallback
: public ReadCallback
{
2895 explicit TestReadCallback(SequenceNumber _max_visible_seq
)
2896 : ReadCallback(_max_visible_seq
) {}
2898 bool IsVisibleFullCheck(SequenceNumber seq
) override
{
2899 return seq
<= max_visible_seq_
;
2903 ASSERT_OK(Put("foo", "v1"));
2904 ASSERT_OK(Put("foo", "v2"));
2905 ASSERT_OK(Put("foo", "v3"));
2906 ASSERT_OK(Put("a", "va"));
2907 ASSERT_OK(Put("z", "vz"));
2908 SequenceNumber seq1
= db_
->GetLatestSequenceNumber();
2909 TestReadCallback
callback1(seq1
);
2910 ASSERT_OK(Put("foo", "v4"));
2911 ASSERT_OK(Put("foo", "v5"));
2912 ASSERT_OK(Put("bar", "v7"));
2914 SequenceNumber seq2
= db_
->GetLatestSequenceNumber();
2916 static_cast_with_check
<ColumnFamilyHandleImpl
>(db_
->DefaultColumnFamily())
2918 // The iterator are suppose to see data before seq1.
2920 dbfull()->NewIteratorImpl(ReadOptions(), cfd
, seq2
, &callback1
);
2923 // The latest value of "foo" before seq1 is "v3"
2925 ASSERT_TRUE(iter
->Valid());
2926 ASSERT_OK(iter
->status());
2927 ASSERT_EQ("foo", iter
->key());
2928 ASSERT_EQ("v3", iter
->value());
2929 // "bar" is not visible to the iterator. It will move on to the next key
2932 ASSERT_TRUE(iter
->Valid());
2933 ASSERT_OK(iter
->status());
2934 ASSERT_EQ("foo", iter
->key());
2935 ASSERT_EQ("v3", iter
->value());
2940 ASSERT_TRUE(iter
->Valid());
2941 ASSERT_OK(iter
->status());
2942 ASSERT_EQ("va", iter
->value());
2943 // "bar" is not visible to the iterator. It will move on to the next key
2946 ASSERT_TRUE(iter
->Valid());
2947 ASSERT_OK(iter
->status());
2948 ASSERT_EQ("foo", iter
->key());
2949 ASSERT_EQ("v3", iter
->value());
2954 ASSERT_TRUE(iter
->Valid());
2955 ASSERT_OK(iter
->status());
2956 ASSERT_EQ("vz", iter
->value());
2957 // The previous key is "foo", which is visible to the iterator.
2959 ASSERT_TRUE(iter
->Valid());
2960 ASSERT_OK(iter
->status());
2961 ASSERT_EQ("foo", iter
->key());
2962 ASSERT_EQ("v3", iter
->value());
2963 // "bar" is not visible to the iterator. It will move on to the next key "a".
2964 iter
->Prev(); // skipping "bar"
2965 ASSERT_TRUE(iter
->Valid());
2966 ASSERT_OK(iter
->status());
2967 ASSERT_EQ("a", iter
->key());
2968 ASSERT_EQ("va", iter
->value());
2971 // The previous key is "foo", which is visible to the iterator.
2972 iter
->SeekForPrev("y");
2973 ASSERT_TRUE(iter
->Valid());
2974 ASSERT_OK(iter
->status());
2975 ASSERT_EQ("foo", iter
->key());
2976 ASSERT_EQ("v3", iter
->value());
2977 // "bar" is not visible to the iterator. It will move on to the next key "a".
2978 iter
->SeekForPrev("bar");
2979 ASSERT_TRUE(iter
->Valid());
2980 ASSERT_OK(iter
->status());
2981 ASSERT_EQ("a", iter
->key());
2982 ASSERT_EQ("va", iter
->value());
2986 // Prev beyond max_sequential_skip_in_iterations
2987 uint64_t num_versions
=
2988 CurrentOptions().max_sequential_skip_in_iterations
+ 10;
2989 for (uint64_t i
= 0; i
< num_versions
; i
++) {
2990 ASSERT_OK(Put("bar", ToString(i
)));
2992 SequenceNumber seq3
= db_
->GetLatestSequenceNumber();
2993 TestReadCallback
callback2(seq3
);
2994 ASSERT_OK(Put("bar", "v8"));
2995 SequenceNumber seq4
= db_
->GetLatestSequenceNumber();
2997 // The iterator is suppose to see data before seq3.
2998 iter
= dbfull()->NewIteratorImpl(ReadOptions(), cfd
, seq4
, &callback2
);
2999 // Seek to "z", which is visible.
3001 ASSERT_TRUE(iter
->Valid());
3002 ASSERT_OK(iter
->status());
3003 ASSERT_EQ("vz", iter
->value());
3004 // Previous key is "foo" and the last value "v5" is visible.
3006 ASSERT_TRUE(iter
->Valid());
3007 ASSERT_OK(iter
->status());
3008 ASSERT_EQ("foo", iter
->key());
3009 ASSERT_EQ("v5", iter
->value());
3010 // Since the number of values of "bar" is more than
3011 // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
3012 // seek in forward direction. Here we test the fallback seek is correct.
3013 // The last visible value should be (num_versions - 1), as "v8" is not
3016 ASSERT_TRUE(iter
->Valid());
3017 ASSERT_OK(iter
->status());
3018 ASSERT_EQ("bar", iter
->key());
3019 ASSERT_EQ(ToString(num_versions
- 1), iter
->value());
3024 TEST_F(DBIteratorTest
, BackwardIterationOnInplaceUpdateMemtable
) {
3025 Options options
= CurrentOptions();
3026 options
.create_if_missing
= true;
3027 options
.inplace_update_support
= false;
3029 DestroyAndReopen(options
);
3030 constexpr int kNumKeys
= 10;
3032 // Write kNumKeys to WAL.
3033 for (int i
= 0; i
< kNumKeys
; ++i
) {
3034 ASSERT_OK(Put(Key(i
), "val"));
3036 ReadOptions read_opts
;
3037 read_opts
.total_order_seek
= true;
3039 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_opts
));
3041 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
3044 ASSERT_EQ(kNumKeys
, count
);
3047 // Reopen and rebuild the memtable from WAL.
3048 options
.create_if_missing
= false;
3049 options
.avoid_flush_during_recovery
= true;
3050 options
.inplace_update_support
= true;
3051 options
.allow_concurrent_memtable_write
= false;
3054 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_opts
));
3056 // Backward iteration not supported due to inplace_update_support = true.
3057 ASSERT_TRUE(iter
->status().IsNotSupported());
3058 ASSERT_FALSE(iter
->Valid());
3062 } // namespace ROCKSDB_NAMESPACE
3064 int main(int argc
, char** argv
) {
3065 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
3066 ::testing::InitGoogleTest(&argc
, argv
);
3067 return RUN_ALL_TESTS();