1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
12 #include "db/arena_wrapped_db_iter.h"
13 #include "db/db_iter.h"
14 #include "db/db_test_util.h"
15 #include "port/port.h"
16 #include "port/stack_trace.h"
17 #include "rocksdb/iostats_context.h"
18 #include "rocksdb/perf_context.h"
19 #include "table/block_based/flush_block_policy.h"
20 #include "util/random.h"
21 #include "utilities/merge_operators/string_append/stringappend2.h"
23 namespace ROCKSDB_NAMESPACE
{
25 // A dumb ReadCallback which saying every key is committed.
26 class DummyReadCallback
: public ReadCallback
{
28 DummyReadCallback() : ReadCallback(kMaxSequenceNumber
) {}
29 bool IsVisibleFullCheck(SequenceNumber
/*seq*/) override
{ return true; }
30 void SetSnapshot(SequenceNumber seq
) { max_visible_seq_
= seq
; }
34 // bool: whether to pass read_callback to NewIterator().
35 class DBIteratorTest
: public DBTestBase
,
36 public testing::WithParamInterface
<bool> {
38 DBIteratorTest() : DBTestBase("db_iterator_test", /*env_do_fsync=*/true) {}
40 Iterator
* NewIterator(const ReadOptions
& read_options
,
41 ColumnFamilyHandle
* column_family
= nullptr) {
42 if (column_family
== nullptr) {
43 column_family
= db_
->DefaultColumnFamily();
46 static_cast_with_check
<ColumnFamilyHandleImpl
>(column_family
)->cfd();
47 SequenceNumber seq
= read_options
.snapshot
!= nullptr
48 ? read_options
.snapshot
->GetSequenceNumber()
49 : db_
->GetLatestSequenceNumber();
50 bool use_read_callback
= GetParam();
51 DummyReadCallback
* read_callback
= nullptr;
52 if (use_read_callback
) {
53 read_callback
= new DummyReadCallback();
54 read_callback
->SetSnapshot(seq
);
55 InstrumentedMutexLock
lock(&mutex_
);
56 read_callbacks_
.push_back(
57 std::unique_ptr
<DummyReadCallback
>(read_callback
));
59 return dbfull()->NewIteratorImpl(read_options
, cfd
, seq
, read_callback
);
63 InstrumentedMutex mutex_
;
64 std::vector
<std::unique_ptr
<DummyReadCallback
>> read_callbacks_
;
67 TEST_P(DBIteratorTest
, IteratorProperty
) {
68 // The test needs to be changed if kPersistedTier is supported in iterator.
69 Options options
= CurrentOptions();
70 CreateAndReopenWithCF({"pikachu"}, options
);
71 ASSERT_OK(Put(1, "1", "2"));
72 ASSERT_OK(Delete(1, "2"));
74 ropt
.pin_data
= false;
76 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
, handles_
[1]));
78 std::string prop_value
;
79 ASSERT_NOK(iter
->GetProperty("non_existing.value", &prop_value
));
80 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
81 ASSERT_EQ("0", prop_value
);
82 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
83 ASSERT_EQ("1", prop_value
);
85 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
86 ASSERT_EQ("Iterator is not valid.", prop_value
);
88 // Get internal key at which the iteration stopped (tombstone in this case).
89 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
90 ASSERT_EQ("2", prop_value
);
95 TEST_P(DBIteratorTest
, PersistedTierOnIterator
) {
96 // The test needs to be changed if kPersistedTier is supported in iterator.
97 Options options
= CurrentOptions();
98 CreateAndReopenWithCF({"pikachu"}, options
);
100 ropt
.read_tier
= kPersistedTier
;
102 auto* iter
= db_
->NewIterator(ropt
, handles_
[1]);
103 ASSERT_TRUE(iter
->status().IsNotSupported());
106 std::vector
<Iterator
*> iters
;
107 ASSERT_TRUE(db_
->NewIterators(ropt
, {handles_
[1]}, &iters
).IsNotSupported());
111 TEST_P(DBIteratorTest
, NonBlockingIteration
) {
113 ReadOptions non_blocking_opts
, regular_opts
;
114 anon::OptionsOverride options_override
;
115 options_override
.full_block_cache
= true;
116 Options options
= CurrentOptions(options_override
);
117 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
118 non_blocking_opts
.read_tier
= kBlockCacheTier
;
120 CreateAndReopenWithCF({"pikachu"}, options
);
121 // write one kv to the database.
122 ASSERT_OK(Put(1, "a", "b"));
124 // scan using non-blocking iterator. We should find it because
125 // it is in memtable.
126 Iterator
* iter
= NewIterator(non_blocking_opts
, handles_
[1]);
128 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
129 ASSERT_OK(iter
->status());
135 // flush memtable to storage. Now, the key should not be in the
136 // memtable neither in the block cache.
139 // verify that a non-blocking iterator does not find any
140 // kvs. Neither does it do any IOs to storage.
141 uint64_t numopen
= TestGetTickerCount(options
, NO_FILE_OPENS
);
142 uint64_t cache_added
= TestGetTickerCount(options
, BLOCK_CACHE_ADD
);
143 iter
= NewIterator(non_blocking_opts
, handles_
[1]);
145 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
149 ASSERT_TRUE(iter
->status().IsIncomplete());
150 ASSERT_EQ(numopen
, TestGetTickerCount(options
, NO_FILE_OPENS
));
151 ASSERT_EQ(cache_added
, TestGetTickerCount(options
, BLOCK_CACHE_ADD
));
154 // read in the specified block via a regular get
155 ASSERT_EQ(Get(1, "a"), "b");
157 // verify that we can find it via a non-blocking scan
158 numopen
= TestGetTickerCount(options
, NO_FILE_OPENS
);
159 cache_added
= TestGetTickerCount(options
, BLOCK_CACHE_ADD
);
160 iter
= NewIterator(non_blocking_opts
, handles_
[1]);
162 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
163 ASSERT_OK(iter
->status());
167 ASSERT_EQ(numopen
, TestGetTickerCount(options
, NO_FILE_OPENS
));
168 ASSERT_EQ(cache_added
, TestGetTickerCount(options
, BLOCK_CACHE_ADD
));
171 // This test verifies block cache behaviors, which is not used by plain
173 } while (ChangeOptions(kSkipPlainTable
| kSkipNoSeekToLast
| kSkipMmapReads
));
176 TEST_P(DBIteratorTest
, IterSeekBeforePrev
) {
177 ASSERT_OK(Put("a", "b"));
178 ASSERT_OK(Put("c", "d"));
179 EXPECT_OK(dbfull()->Flush(FlushOptions()));
180 ASSERT_OK(Put("0", "f"));
181 ASSERT_OK(Put("1", "h"));
182 EXPECT_OK(dbfull()->Flush(FlushOptions()));
183 ASSERT_OK(Put("2", "j"));
184 auto iter
= NewIterator(ReadOptions());
185 iter
->Seek(Slice("c"));
187 iter
->Seek(Slice("a"));
192 TEST_P(DBIteratorTest
, IterReseekNewUpperBound
) {
194 Options options
= CurrentOptions();
195 BlockBasedTableOptions table_options
;
196 table_options
.block_size
= 1024;
197 table_options
.block_size_deviation
= 50;
198 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
199 options
.compression
= kNoCompression
;
202 ASSERT_OK(Put("a", rnd
.RandomString(400)));
203 ASSERT_OK(Put("aabb", rnd
.RandomString(400)));
204 ASSERT_OK(Put("aaef", rnd
.RandomString(400)));
205 ASSERT_OK(Put("b", rnd
.RandomString(400)));
206 EXPECT_OK(dbfull()->Flush(FlushOptions()));
208 Slice ub
= Slice("aa");
209 opts
.iterate_upper_bound
= &ub
;
210 auto iter
= NewIterator(opts
);
211 iter
->Seek(Slice("a"));
213 iter
->Seek(Slice("aabc"));
214 ASSERT_TRUE(iter
->Valid());
215 ASSERT_EQ(iter
->key().ToString(), "aaef");
219 TEST_P(DBIteratorTest
, IterSeekForPrevBeforeNext
) {
220 ASSERT_OK(Put("a", "b"));
221 ASSERT_OK(Put("c", "d"));
222 EXPECT_OK(dbfull()->Flush(FlushOptions()));
223 ASSERT_OK(Put("0", "f"));
224 ASSERT_OK(Put("1", "h"));
225 EXPECT_OK(dbfull()->Flush(FlushOptions()));
226 ASSERT_OK(Put("2", "j"));
227 auto iter
= NewIterator(ReadOptions());
228 iter
->SeekForPrev(Slice("0"));
230 iter
->SeekForPrev(Slice("1"));
236 std::string
MakeLongKey(size_t length
, char c
) {
237 return std::string(length
, c
);
239 } // anonymous namespace
241 TEST_P(DBIteratorTest
, IterLongKeys
) {
242 ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
243 ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
244 ASSERT_OK(Put("a", "b"));
245 EXPECT_OK(dbfull()->Flush(FlushOptions()));
246 ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
247 ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
248 ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
249 auto iter
= NewIterator(ReadOptions());
251 // Create a key that needs to be skipped for Seq too new
252 iter
->Seek(MakeLongKey(20, 0));
253 ASSERT_EQ(IterStatus(iter
), MakeLongKey(20, 0) + "->0");
255 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
257 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
259 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
261 ASSERT_EQ(IterStatus(iter
), MakeLongKey(64, 4) + "->4");
263 iter
->SeekForPrev(MakeLongKey(127, 3));
264 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
266 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
268 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
271 iter
= NewIterator(ReadOptions());
272 iter
->Seek(MakeLongKey(50, 1));
273 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
275 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
277 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
281 TEST_P(DBIteratorTest
, IterNextWithNewerSeq
) {
282 ASSERT_OK(Put("0", "0"));
283 EXPECT_OK(dbfull()->Flush(FlushOptions()));
284 ASSERT_OK(Put("a", "b"));
285 ASSERT_OK(Put("c", "d"));
286 ASSERT_OK(Put("d", "e"));
287 auto iter
= NewIterator(ReadOptions());
289 // Create a key that needs to be skipped for Seq too new
290 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
292 ASSERT_OK(Put("b", "f"));
295 iter
->Seek(Slice("a"));
296 ASSERT_EQ(IterStatus(iter
), "a->b");
298 ASSERT_EQ(IterStatus(iter
), "c->d");
299 iter
->SeekForPrev(Slice("b"));
300 ASSERT_EQ(IterStatus(iter
), "a->b");
302 ASSERT_EQ(IterStatus(iter
), "c->d");
307 TEST_P(DBIteratorTest
, IterPrevWithNewerSeq
) {
308 ASSERT_OK(Put("0", "0"));
309 EXPECT_OK(dbfull()->Flush(FlushOptions()));
310 ASSERT_OK(Put("a", "b"));
311 ASSERT_OK(Put("c", "d"));
312 ASSERT_OK(Put("d", "e"));
313 auto iter
= NewIterator(ReadOptions());
315 // Create a key that needs to be skipped for Seq too new
316 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
318 ASSERT_OK(Put("b", "f"));
321 iter
->Seek(Slice("d"));
322 ASSERT_EQ(IterStatus(iter
), "d->e");
324 ASSERT_EQ(IterStatus(iter
), "c->d");
326 ASSERT_EQ(IterStatus(iter
), "a->b");
328 iter
->SeekForPrev(Slice("d"));
329 ASSERT_EQ(IterStatus(iter
), "d->e");
331 ASSERT_EQ(IterStatus(iter
), "c->d");
333 ASSERT_EQ(IterStatus(iter
), "a->b");
338 TEST_P(DBIteratorTest
, IterPrevWithNewerSeq2
) {
339 ASSERT_OK(Put("0", "0"));
340 EXPECT_OK(dbfull()->Flush(FlushOptions()));
341 ASSERT_OK(Put("a", "b"));
342 ASSERT_OK(Put("c", "d"));
343 ASSERT_OK(Put("e", "f"));
344 auto iter
= NewIterator(ReadOptions());
345 auto iter2
= NewIterator(ReadOptions());
346 iter
->Seek(Slice("c"));
347 iter2
->SeekForPrev(Slice("d"));
348 ASSERT_EQ(IterStatus(iter
), "c->d");
349 ASSERT_EQ(IterStatus(iter2
), "c->d");
351 // Create a key that needs to be skipped for Seq too new
352 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
354 ASSERT_OK(Put("b", "f"));
358 ASSERT_EQ(IterStatus(iter
), "a->b");
361 ASSERT_EQ(IterStatus(iter2
), "a->b");
367 TEST_P(DBIteratorTest
, IterEmpty
) {
369 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
370 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
373 ASSERT_EQ(IterStatus(iter
), "(invalid)");
376 ASSERT_EQ(IterStatus(iter
), "(invalid)");
379 ASSERT_EQ(IterStatus(iter
), "(invalid)");
381 iter
->SeekForPrev("foo");
382 ASSERT_EQ(IterStatus(iter
), "(invalid)");
384 ASSERT_OK(iter
->status());
387 } while (ChangeCompactOptions());
390 TEST_P(DBIteratorTest
, IterSingle
) {
392 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
393 ASSERT_OK(Put(1, "a", "va"));
394 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
397 ASSERT_EQ(IterStatus(iter
), "a->va");
399 ASSERT_EQ(IterStatus(iter
), "(invalid)");
401 ASSERT_EQ(IterStatus(iter
), "a->va");
403 ASSERT_EQ(IterStatus(iter
), "(invalid)");
406 ASSERT_EQ(IterStatus(iter
), "a->va");
408 ASSERT_EQ(IterStatus(iter
), "(invalid)");
410 ASSERT_EQ(IterStatus(iter
), "a->va");
412 ASSERT_EQ(IterStatus(iter
), "(invalid)");
415 ASSERT_EQ(IterStatus(iter
), "a->va");
417 ASSERT_EQ(IterStatus(iter
), "(invalid)");
418 iter
->SeekForPrev("");
419 ASSERT_EQ(IterStatus(iter
), "(invalid)");
422 ASSERT_EQ(IterStatus(iter
), "a->va");
424 ASSERT_EQ(IterStatus(iter
), "(invalid)");
425 iter
->SeekForPrev("a");
426 ASSERT_EQ(IterStatus(iter
), "a->va");
428 ASSERT_EQ(IterStatus(iter
), "(invalid)");
431 ASSERT_EQ(IterStatus(iter
), "(invalid)");
432 iter
->SeekForPrev("b");
433 ASSERT_EQ(IterStatus(iter
), "a->va");
435 ASSERT_EQ(IterStatus(iter
), "(invalid)");
438 } while (ChangeCompactOptions());
441 TEST_P(DBIteratorTest
, IterMulti
) {
443 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
444 ASSERT_OK(Put(1, "a", "va"));
445 ASSERT_OK(Put(1, "b", "vb"));
446 ASSERT_OK(Put(1, "c", "vc"));
447 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
450 ASSERT_EQ(IterStatus(iter
), "a->va");
452 ASSERT_EQ(IterStatus(iter
), "b->vb");
454 ASSERT_EQ(IterStatus(iter
), "c->vc");
456 ASSERT_EQ(IterStatus(iter
), "(invalid)");
458 ASSERT_EQ(IterStatus(iter
), "a->va");
460 ASSERT_EQ(IterStatus(iter
), "(invalid)");
463 ASSERT_EQ(IterStatus(iter
), "c->vc");
465 ASSERT_EQ(IterStatus(iter
), "b->vb");
467 ASSERT_EQ(IterStatus(iter
), "a->va");
469 ASSERT_EQ(IterStatus(iter
), "(invalid)");
471 ASSERT_EQ(IterStatus(iter
), "c->vc");
473 ASSERT_EQ(IterStatus(iter
), "(invalid)");
476 ASSERT_EQ(IterStatus(iter
), "a->va");
478 ASSERT_EQ(IterStatus(iter
), "a->va");
480 ASSERT_EQ(IterStatus(iter
), "b->vb");
481 iter
->SeekForPrev("d");
482 ASSERT_EQ(IterStatus(iter
), "c->vc");
483 iter
->SeekForPrev("c");
484 ASSERT_EQ(IterStatus(iter
), "c->vc");
485 iter
->SeekForPrev("bx");
486 ASSERT_EQ(IterStatus(iter
), "b->vb");
489 ASSERT_EQ(IterStatus(iter
), "b->vb");
491 ASSERT_EQ(IterStatus(iter
), "(invalid)");
492 iter
->SeekForPrev("b");
493 ASSERT_EQ(IterStatus(iter
), "b->vb");
494 iter
->SeekForPrev("");
495 ASSERT_EQ(IterStatus(iter
), "(invalid)");
497 // Switch from reverse to forward
502 ASSERT_EQ(IterStatus(iter
), "b->vb");
504 // Switch from forward to reverse
509 ASSERT_EQ(IterStatus(iter
), "b->vb");
511 // Make sure iter stays at snapshot
512 ASSERT_OK(Put(1, "a", "va2"));
513 ASSERT_OK(Put(1, "a2", "va3"));
514 ASSERT_OK(Put(1, "b", "vb2"));
515 ASSERT_OK(Put(1, "c", "vc2"));
516 ASSERT_OK(Delete(1, "b"));
518 ASSERT_EQ(IterStatus(iter
), "a->va");
520 ASSERT_EQ(IterStatus(iter
), "b->vb");
522 ASSERT_EQ(IterStatus(iter
), "c->vc");
524 ASSERT_EQ(IterStatus(iter
), "(invalid)");
526 ASSERT_EQ(IterStatus(iter
), "c->vc");
528 ASSERT_EQ(IterStatus(iter
), "b->vb");
530 ASSERT_EQ(IterStatus(iter
), "a->va");
532 ASSERT_EQ(IterStatus(iter
), "(invalid)");
535 } while (ChangeCompactOptions());
538 // Check that we can skip over a run of user keys
539 // by using reseek rather than sequential scan
540 TEST_P(DBIteratorTest
, IterReseek
) {
541 anon::OptionsOverride options_override
;
542 options_override
.skip_policy
= kSkipNoSnapshot
;
543 Options options
= CurrentOptions(options_override
);
544 options
.max_sequential_skip_in_iterations
= 3;
545 options
.create_if_missing
= true;
546 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
547 DestroyAndReopen(options
);
548 CreateAndReopenWithCF({"pikachu"}, options
);
550 // insert three keys with same userkey and verify that
551 // reseek is not invoked. For each of these test cases,
552 // verify that we can find the next key "b".
553 ASSERT_OK(Put(1, "a", "zero"));
554 ASSERT_OK(Put(1, "a", "one"));
555 ASSERT_OK(Put(1, "a", "two"));
556 ASSERT_OK(Put(1, "b", "bone"));
557 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
559 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
560 ASSERT_EQ(IterStatus(iter
), "a->two");
562 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
563 ASSERT_EQ(IterStatus(iter
), "b->bone");
566 // insert a total of three keys with same userkey and verify
567 // that reseek is still not invoked.
568 ASSERT_OK(Put(1, "a", "three"));
569 iter
= NewIterator(ReadOptions(), handles_
[1]);
571 ASSERT_EQ(IterStatus(iter
), "a->three");
573 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
574 ASSERT_EQ(IterStatus(iter
), "b->bone");
577 // insert a total of four keys with same userkey and verify
578 // that reseek is invoked.
579 ASSERT_OK(Put(1, "a", "four"));
580 iter
= NewIterator(ReadOptions(), handles_
[1]);
582 ASSERT_EQ(IterStatus(iter
), "a->four");
583 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
585 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
586 ASSERT_EQ(IterStatus(iter
), "b->bone");
589 // Testing reverse iterator
590 // At this point, we have three versions of "a" and one version of "b".
591 // The reseek statistics is already at 1.
592 int num_reseeks
= static_cast<int>(
593 TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
));
595 // Insert another version of b and assert that reseek is not invoked
596 ASSERT_OK(Put(1, "b", "btwo"));
597 iter
= NewIterator(ReadOptions(), handles_
[1]);
599 ASSERT_EQ(IterStatus(iter
), "b->btwo");
600 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
603 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
605 ASSERT_EQ(IterStatus(iter
), "a->four");
608 // insert two more versions of b. This makes a total of 4 versions
609 // of b and 4 versions of a.
610 ASSERT_OK(Put(1, "b", "bthree"));
611 ASSERT_OK(Put(1, "b", "bfour"));
612 iter
= NewIterator(ReadOptions(), handles_
[1]);
614 ASSERT_EQ(IterStatus(iter
), "b->bfour");
615 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
619 // the previous Prev call should have invoked reseek
620 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
622 ASSERT_EQ(IterStatus(iter
), "a->four");
626 TEST_F(DBIteratorTest
, ReseekUponDirectionChange
) {
627 Options options
= GetDefaultOptions();
628 options
.create_if_missing
= true;
629 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
630 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
631 options
.merge_operator
.reset(
632 new StringAppendTESTOperator(/*delim_char=*/' '));
633 DestroyAndReopen(options
);
634 ASSERT_OK(Put("foo", "value"));
635 ASSERT_OK(Put("bar", "value"));
637 std::unique_ptr
<Iterator
> it(db_
->NewIterator(ReadOptions()));
643 options
.statistics
->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION
));
645 const std::string
merge_key("good");
646 ASSERT_OK(Put(merge_key
, "orig"));
647 ASSERT_OK(Merge(merge_key
, "suffix"));
649 std::unique_ptr
<Iterator
> it(db_
->NewIterator(ReadOptions()));
651 ASSERT_TRUE(it
->Valid());
652 const uint64_t prev_reseek_count
=
653 options
.statistics
->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION
);
655 ASSERT_EQ(prev_reseek_count
+ 1, options
.statistics
->getTickerCount(
656 NUMBER_OF_RESEEKS_IN_ITERATION
));
660 TEST_P(DBIteratorTest
, IterSmallAndLargeMix
) {
662 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
663 ASSERT_OK(Put(1, "a", "va"));
664 ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
665 ASSERT_OK(Put(1, "c", "vc"));
666 ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
667 ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
669 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
672 ASSERT_EQ(IterStatus(iter
), "a->va");
674 ASSERT_EQ(IterStatus(iter
), "b->" + std::string(100000, 'b'));
676 ASSERT_EQ(IterStatus(iter
), "c->vc");
678 ASSERT_EQ(IterStatus(iter
), "d->" + std::string(100000, 'd'));
680 ASSERT_EQ(IterStatus(iter
), "e->" + std::string(100000, 'e'));
682 ASSERT_EQ(IterStatus(iter
), "(invalid)");
685 ASSERT_EQ(IterStatus(iter
), "e->" + std::string(100000, 'e'));
687 ASSERT_EQ(IterStatus(iter
), "d->" + std::string(100000, 'd'));
689 ASSERT_EQ(IterStatus(iter
), "c->vc");
691 ASSERT_EQ(IterStatus(iter
), "b->" + std::string(100000, 'b'));
693 ASSERT_EQ(IterStatus(iter
), "a->va");
695 ASSERT_EQ(IterStatus(iter
), "(invalid)");
698 } while (ChangeCompactOptions());
701 TEST_P(DBIteratorTest
, IterMultiWithDelete
) {
703 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
704 ASSERT_OK(Put(1, "ka", "va"));
705 ASSERT_OK(Put(1, "kb", "vb"));
706 ASSERT_OK(Put(1, "kc", "vc"));
707 ASSERT_OK(Delete(1, "kb"));
708 ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
710 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
712 ASSERT_EQ(IterStatus(iter
), "kc->vc");
713 if (!CurrentOptions().merge_operator
) {
714 // TODO: merge operator does not support backward iteration yet
715 if (kPlainTableAllBytesPrefix
!= option_config_
&&
716 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
717 kHashLinkList
!= option_config_
&&
718 kHashSkipList
!= option_config_
) { // doesn't support SeekToLast
720 ASSERT_EQ(IterStatus(iter
), "ka->va");
724 } while (ChangeOptions());
727 TEST_P(DBIteratorTest
, IterPrevMaxSkip
) {
729 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
730 for (int i
= 0; i
< 2; i
++) {
731 ASSERT_OK(Put(1, "key1", "v1"));
732 ASSERT_OK(Put(1, "key2", "v2"));
733 ASSERT_OK(Put(1, "key3", "v3"));
734 ASSERT_OK(Put(1, "key4", "v4"));
735 ASSERT_OK(Put(1, "key5", "v5"));
738 VerifyIterLast("key5->v5", 1);
740 ASSERT_OK(Delete(1, "key5"));
741 VerifyIterLast("key4->v4", 1);
743 ASSERT_OK(Delete(1, "key4"));
744 VerifyIterLast("key3->v3", 1);
746 ASSERT_OK(Delete(1, "key3"));
747 VerifyIterLast("key2->v2", 1);
749 ASSERT_OK(Delete(1, "key2"));
750 VerifyIterLast("key1->v1", 1);
752 ASSERT_OK(Delete(1, "key1"));
753 VerifyIterLast("(invalid)", 1);
754 } while (ChangeOptions(kSkipMergePut
| kSkipNoSeekToLast
));
757 TEST_P(DBIteratorTest
, IterWithSnapshot
) {
758 anon::OptionsOverride options_override
;
759 options_override
.skip_policy
= kSkipNoSnapshot
;
761 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override
));
762 ASSERT_OK(Put(1, "key1", "val1"));
763 ASSERT_OK(Put(1, "key2", "val2"));
764 ASSERT_OK(Put(1, "key3", "val3"));
765 ASSERT_OK(Put(1, "key4", "val4"));
766 ASSERT_OK(Put(1, "key5", "val5"));
768 const Snapshot
* snapshot
= db_
->GetSnapshot();
770 options
.snapshot
= snapshot
;
771 Iterator
* iter
= NewIterator(options
, handles_
[1]);
773 ASSERT_OK(Put(1, "key0", "val0"));
774 // Put more values after the snapshot
775 ASSERT_OK(Put(1, "key100", "val100"));
776 ASSERT_OK(Put(1, "key101", "val101"));
779 ASSERT_EQ(IterStatus(iter
), "key5->val5");
780 if (!CurrentOptions().merge_operator
) {
781 // TODO: merge operator does not support backward iteration yet
782 if (kPlainTableAllBytesPrefix
!= option_config_
&&
783 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
784 kHashLinkList
!= option_config_
&& kHashSkipList
!= option_config_
) {
786 ASSERT_EQ(IterStatus(iter
), "key4->val4");
788 ASSERT_EQ(IterStatus(iter
), "key3->val3");
791 ASSERT_EQ(IterStatus(iter
), "key4->val4");
793 ASSERT_EQ(IterStatus(iter
), "key5->val5");
796 ASSERT_TRUE(!iter
->Valid());
799 if (!CurrentOptions().merge_operator
) {
800 // TODO(gzh): merge operator does not support backward iteration yet
801 if (kPlainTableAllBytesPrefix
!= option_config_
&&
802 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
803 kHashLinkList
!= option_config_
&& kHashSkipList
!= option_config_
) {
804 iter
->SeekForPrev("key1");
805 ASSERT_EQ(IterStatus(iter
), "key1->val1");
807 ASSERT_EQ(IterStatus(iter
), "key2->val2");
809 ASSERT_EQ(IterStatus(iter
), "key3->val3");
811 ASSERT_EQ(IterStatus(iter
), "key2->val2");
813 ASSERT_EQ(IterStatus(iter
), "key1->val1");
815 ASSERT_TRUE(!iter
->Valid());
818 db_
->ReleaseSnapshot(snapshot
);
820 } while (ChangeOptions());
823 TEST_P(DBIteratorTest
, IteratorPinsRef
) {
825 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
826 ASSERT_OK(Put(1, "foo", "hello"));
828 // Get iterator that will yield the current contents of the DB.
829 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
831 // Write to force compactions
832 ASSERT_OK(Put(1, "foo", "newvalue1"));
833 for (int i
= 0; i
< 100; i
++) {
835 ASSERT_OK(Put(1, Key(i
), Key(i
) + std::string(100000, 'v')));
837 ASSERT_OK(Put(1, "foo", "newvalue2"));
840 ASSERT_TRUE(iter
->Valid());
841 ASSERT_EQ("foo", iter
->key().ToString());
842 ASSERT_EQ("hello", iter
->value().ToString());
844 ASSERT_TRUE(!iter
->Valid());
846 } while (ChangeCompactOptions());
849 TEST_P(DBIteratorTest
, IteratorDeleteAfterCfDelete
) {
850 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
852 ASSERT_OK(Put(1, "foo", "delete-cf-then-delete-iter"));
853 ASSERT_OK(Put(1, "hello", "value2"));
855 ColumnFamilyHandle
* cf
= handles_
[1];
858 auto* iter
= db_
->NewIterator(ro
, cf
);
860 ASSERT_EQ(IterStatus(iter
), "foo->delete-cf-then-delete-iter");
863 EXPECT_OK(db_
->DestroyColumnFamilyHandle(cf
));
864 handles_
.erase(std::begin(handles_
) + 1);
866 // delete Iterator after CF handle is deleted
868 ASSERT_EQ(IterStatus(iter
), "hello->value2");
872 TEST_P(DBIteratorTest
, IteratorDeleteAfterCfDrop
) {
873 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
875 ASSERT_OK(Put(1, "foo", "drop-cf-then-delete-iter"));
878 ColumnFamilyHandle
* cf
= handles_
[1];
880 auto* iter
= db_
->NewIterator(ro
, cf
);
882 ASSERT_EQ(IterStatus(iter
), "foo->drop-cf-then-delete-iter");
884 // drop and delete CF
885 EXPECT_OK(db_
->DropColumnFamily(cf
));
886 EXPECT_OK(db_
->DestroyColumnFamilyHandle(cf
));
887 handles_
.erase(std::begin(handles_
) + 1);
889 // delete Iterator after CF handle is dropped
893 // SetOptions not defined in ROCKSDB LITE
895 TEST_P(DBIteratorTest
, DBIteratorBoundTest
) {
896 Options options
= CurrentOptions();
898 options
.create_if_missing
= true;
900 options
.prefix_extractor
= nullptr;
901 DestroyAndReopen(options
);
902 ASSERT_OK(Put("a", "0"));
903 ASSERT_OK(Put("foo", "bar"));
904 ASSERT_OK(Put("foo1", "bar1"));
905 ASSERT_OK(Put("g1", "0"));
907 // testing basic case with no iterate_upper_bound and no prefix_extractor
910 ro
.iterate_upper_bound
= nullptr;
912 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
916 ASSERT_TRUE(iter
->Valid());
917 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
920 ASSERT_TRUE(iter
->Valid());
921 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
924 ASSERT_TRUE(iter
->Valid());
925 ASSERT_EQ(iter
->key().compare(Slice("g1")), 0);
927 iter
->SeekForPrev("g1");
929 ASSERT_TRUE(iter
->Valid());
930 ASSERT_EQ(iter
->key().compare(Slice("g1")), 0);
933 ASSERT_TRUE(iter
->Valid());
934 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
937 ASSERT_TRUE(iter
->Valid());
938 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
941 // testing iterate_upper_bound and forward iterator
942 // to make sure it stops at bound
945 // iterate_upper_bound points beyond the last expected entry
946 Slice
prefix("foo2");
947 ro
.iterate_upper_bound
= &prefix
;
949 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
953 ASSERT_TRUE(iter
->Valid());
954 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
957 ASSERT_TRUE(iter
->Valid());
958 ASSERT_EQ(iter
->key().compare(("foo1")), 0);
961 // should stop here...
962 ASSERT_TRUE(!iter
->Valid());
964 // Testing SeekToLast with iterate_upper_bound set
969 ro
.iterate_upper_bound
= &prefix
;
971 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
974 ASSERT_TRUE(iter
->Valid());
975 ASSERT_EQ(iter
->key().compare(Slice("a")), 0);
978 // prefix is the first letter of the key
979 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
980 ASSERT_OK(Put("a", "0"));
981 ASSERT_OK(Put("foo", "bar"));
982 ASSERT_OK(Put("foo1", "bar1"));
983 ASSERT_OK(Put("g1", "0"));
985 // testing with iterate_upper_bound and prefix_extractor
986 // Seek target and iterate_upper_bound are not is same prefix
987 // This should be an error
990 Slice
upper_bound("g");
991 ro
.iterate_upper_bound
= &upper_bound
;
993 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
997 ASSERT_TRUE(iter
->Valid());
998 ASSERT_EQ("foo", iter
->key().ToString());
1001 ASSERT_TRUE(iter
->Valid());
1002 ASSERT_EQ("foo1", iter
->key().ToString());
1005 ASSERT_TRUE(!iter
->Valid());
1008 // testing that iterate_upper_bound prevents iterating over deleted items
1009 // if the bound has already reached
1011 options
.prefix_extractor
= nullptr;
1012 DestroyAndReopen(options
);
1013 ASSERT_OK(Put("a", "0"));
1014 ASSERT_OK(Put("b", "0"));
1015 ASSERT_OK(Put("b1", "0"));
1016 ASSERT_OK(Put("c", "0"));
1017 ASSERT_OK(Put("d", "0"));
1018 ASSERT_OK(Put("e", "0"));
1019 ASSERT_OK(Delete("c"));
1020 ASSERT_OK(Delete("d"));
1022 // base case with no bound
1024 ro
.iterate_upper_bound
= nullptr;
1026 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
1029 ASSERT_TRUE(iter
->Valid());
1030 ASSERT_EQ(iter
->key().compare(Slice("b")), 0);
1033 ASSERT_TRUE(iter
->Valid());
1034 ASSERT_EQ(iter
->key().compare(("b1")), 0);
1036 get_perf_context()->Reset();
1039 ASSERT_TRUE(iter
->Valid());
1041 static_cast<int>(get_perf_context()->internal_delete_skipped_count
), 2);
1043 // now testing with iterate_bound
1045 ro
.iterate_upper_bound
= &prefix
;
1047 iter
.reset(NewIterator(ro
));
1049 get_perf_context()->Reset();
1052 ASSERT_TRUE(iter
->Valid());
1053 ASSERT_EQ(iter
->key().compare(Slice("b")), 0);
1056 ASSERT_TRUE(iter
->Valid());
1057 ASSERT_EQ(iter
->key().compare(("b1")), 0);
1060 // the iteration should stop as soon as the bound key is reached
1061 // even though the key is deleted
1062 // hence internal_delete_skipped_count should be 0
1063 ASSERT_TRUE(!iter
->Valid());
1065 static_cast<int>(get_perf_context()->internal_delete_skipped_count
), 0);
1069 TEST_P(DBIteratorTest
, DBIteratorBoundMultiSeek
) {
1070 Options options
= CurrentOptions();
1072 options
.create_if_missing
= true;
1073 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1074 options
.prefix_extractor
= nullptr;
1075 DestroyAndReopen(options
);
1076 ASSERT_OK(Put("a", "0"));
1077 ASSERT_OK(Put("z", "0"));
1079 ASSERT_OK(Put("foo1", "bar1"));
1080 ASSERT_OK(Put("foo2", "bar2"));
1081 ASSERT_OK(Put("foo3", "bar3"));
1082 ASSERT_OK(Put("foo4", "bar4"));
1085 std::string up_str
= "foo5";
1088 ro
.iterate_upper_bound
= &up
;
1089 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
1092 ASSERT_TRUE(iter
->Valid());
1093 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
1095 uint64_t prev_block_cache_hit
=
1096 TestGetTickerCount(options
, BLOCK_CACHE_HIT
);
1097 uint64_t prev_block_cache_miss
=
1098 TestGetTickerCount(options
, BLOCK_CACHE_MISS
);
1100 ASSERT_GT(prev_block_cache_hit
+ prev_block_cache_miss
, 0);
1103 ASSERT_TRUE(iter
->Valid());
1104 ASSERT_EQ(iter
->key().compare(Slice("foo4")), 0);
1105 ASSERT_EQ(prev_block_cache_hit
,
1106 TestGetTickerCount(options
, BLOCK_CACHE_HIT
));
1107 ASSERT_EQ(prev_block_cache_miss
,
1108 TestGetTickerCount(options
, BLOCK_CACHE_MISS
));
1111 ASSERT_TRUE(iter
->Valid());
1112 ASSERT_EQ(iter
->key().compare(Slice("foo2")), 0);
1114 ASSERT_TRUE(iter
->Valid());
1115 ASSERT_EQ(iter
->key().compare(Slice("foo3")), 0);
1116 ASSERT_EQ(prev_block_cache_hit
,
1117 TestGetTickerCount(options
, BLOCK_CACHE_HIT
));
1118 ASSERT_EQ(prev_block_cache_miss
,
1119 TestGetTickerCount(options
, BLOCK_CACHE_MISS
));
1124 TEST_P(DBIteratorTest
, DBIteratorBoundOptimizationTest
) {
1125 for (auto format_version
: {2, 3, 4}) {
1126 int upper_bound_hits
= 0;
1127 Options options
= CurrentOptions();
1128 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1129 "BlockBasedTableIterator:out_of_bound",
1130 [&upper_bound_hits
](void*) { upper_bound_hits
++; });
1131 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1133 options
.create_if_missing
= true;
1134 options
.prefix_extractor
= nullptr;
1135 BlockBasedTableOptions table_options
;
1136 table_options
.format_version
= format_version
;
1137 table_options
.flush_block_policy_factory
=
1138 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1139 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1141 DestroyAndReopen(options
);
1142 ASSERT_OK(Put("foo1", "bar1"));
1143 ASSERT_OK(Put("foo2", "bar2"));
1144 ASSERT_OK(Put("foo4", "bar4"));
1149 ro
.iterate_upper_bound
= &ub
;
1151 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
1154 ASSERT_TRUE(iter
->Valid());
1155 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
1156 ASSERT_EQ(upper_bound_hits
, 0);
1159 ASSERT_TRUE(iter
->Valid());
1160 ASSERT_EQ(iter
->key().compare(Slice("foo2")), 0);
1161 ASSERT_EQ(upper_bound_hits
, 0);
1164 ASSERT_FALSE(iter
->Valid());
1165 ASSERT_EQ(upper_bound_hits
, 1);
1169 // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
1170 // they don't do unnecessary block reads.
1171 TEST_P(DBIteratorTest
, IndexWithFirstKey
) {
1172 for (int tailing
= 0; tailing
< 2; ++tailing
) {
1173 SCOPED_TRACE("tailing = " + std::to_string(tailing
));
1174 Options options
= CurrentOptions();
1176 options
.create_if_missing
= true;
1177 options
.prefix_extractor
= nullptr;
1178 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1179 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1180 Statistics
* stats
= options
.statistics
.get();
1181 BlockBasedTableOptions table_options
;
1182 table_options
.index_type
=
1183 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey
;
1184 table_options
.index_shortening
=
1185 BlockBasedTableOptions::IndexShorteningMode::kNoShortening
;
1186 table_options
.flush_block_policy_factory
=
1187 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1188 table_options
.block_cache
=
1189 NewLRUCache(8000); // fits all blocks and their cache metadata overhead
1190 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1192 DestroyAndReopen(options
);
1193 ASSERT_OK(Merge("a1", "x1"));
1194 ASSERT_OK(Merge("b1", "y1"));
1195 ASSERT_OK(Merge("c0", "z1"));
1197 ASSERT_OK(Merge("a2", "x2"));
1198 ASSERT_OK(Merge("b2", "y2"));
1199 ASSERT_OK(Merge("c0", "z2"));
1201 ASSERT_OK(Merge("a3", "x3"));
1202 ASSERT_OK(Merge("b3", "y3"));
1203 ASSERT_OK(Merge("c3", "z3"));
1206 // Block cache is not important for this test.
1207 // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
1208 // available way of counting block accesses.
1211 ropt
.tailing
= tailing
;
1212 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
));
1214 ropt
.read_tier
= ReadTier::kBlockCacheTier
;
1215 std::unique_ptr
<Iterator
> nonblocking_iter(NewIterator(ropt
));
1218 ASSERT_TRUE(iter
->Valid());
1219 EXPECT_EQ("b2", iter
->key().ToString());
1220 EXPECT_EQ("y2", iter
->value().ToString());
1221 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1223 // The cache-only iterator should succeed too, using the blocks pulled into
1224 // the cache by the previous iterator.
1225 nonblocking_iter
->Seek("b10");
1226 ASSERT_TRUE(nonblocking_iter
->Valid());
1227 EXPECT_EQ("b2", nonblocking_iter
->key().ToString());
1228 EXPECT_EQ("y2", nonblocking_iter
->value().ToString());
1229 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1231 // ... but it shouldn't be able to step forward since the next block is
1232 // not in cache yet.
1233 nonblocking_iter
->Next();
1234 ASSERT_FALSE(nonblocking_iter
->Valid());
1235 ASSERT_TRUE(nonblocking_iter
->status().IsIncomplete());
1237 // ... nor should a seek to the next key succeed.
1238 nonblocking_iter
->Seek("b20");
1239 ASSERT_FALSE(nonblocking_iter
->Valid());
1240 ASSERT_TRUE(nonblocking_iter
->status().IsIncomplete());
1243 ASSERT_TRUE(iter
->Valid());
1244 EXPECT_EQ("b3", iter
->key().ToString());
1245 EXPECT_EQ("y3", iter
->value().ToString());
1246 EXPECT_EQ(4, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1247 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1249 // After the blocking iterator loaded the next block, the nonblocking
1250 // iterator's seek should succeed.
1251 nonblocking_iter
->Seek("b20");
1252 ASSERT_TRUE(nonblocking_iter
->Valid());
1253 EXPECT_EQ("b3", nonblocking_iter
->key().ToString());
1254 EXPECT_EQ("y3", nonblocking_iter
->value().ToString());
1255 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1258 ASSERT_TRUE(iter
->Valid());
1259 EXPECT_EQ("c0", iter
->key().ToString());
1260 EXPECT_EQ("z1,z2", iter
->value().ToString());
1261 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1262 EXPECT_EQ(6, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1265 ASSERT_TRUE(iter
->Valid());
1266 EXPECT_EQ("c3", iter
->key().ToString());
1267 EXPECT_EQ("z3", iter
->value().ToString());
1268 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1269 EXPECT_EQ(7, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1273 // Enable iterate_upper_bound and check that iterator is not trying to read
1274 // blocks that are fully above upper bound.
1275 std::string ub
= "b3";
1277 ropt
.iterate_upper_bound
= &ub_slice
;
1278 iter
.reset(NewIterator(ropt
));
1281 ASSERT_TRUE(iter
->Valid());
1282 EXPECT_EQ("b2", iter
->key().ToString());
1283 EXPECT_EQ("y2", iter
->value().ToString());
1284 EXPECT_EQ(3, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1285 EXPECT_EQ(7, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1288 ASSERT_FALSE(iter
->Valid());
1289 EXPECT_EQ(3, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1290 EXPECT_EQ(7, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1294 TEST_P(DBIteratorTest
, IndexWithFirstKeyGet
) {
1295 Options options
= CurrentOptions();
1297 options
.create_if_missing
= true;
1298 options
.prefix_extractor
= nullptr;
1299 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1300 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1301 Statistics
* stats
= options
.statistics
.get();
1302 BlockBasedTableOptions table_options
;
1303 table_options
.index_type
=
1304 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey
;
1305 table_options
.index_shortening
=
1306 BlockBasedTableOptions::IndexShorteningMode::kNoShortening
;
1307 table_options
.flush_block_policy_factory
=
1308 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1309 table_options
.block_cache
= NewLRUCache(1000); // fits all blocks
1310 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1312 DestroyAndReopen(options
);
1313 ASSERT_OK(Merge("a", "x1"));
1314 ASSERT_OK(Merge("c", "y1"));
1315 ASSERT_OK(Merge("e", "z1"));
1317 ASSERT_OK(Merge("c", "y2"));
1318 ASSERT_OK(Merge("e", "z2"));
1321 // Get() between blocks shouldn't read any blocks.
1322 ASSERT_EQ("NOT_FOUND", Get("b"));
1323 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1324 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1326 // Get() of an existing key shouldn't read any unnecessary blocks when there's
1327 // only one key per block.
1329 ASSERT_EQ("y1,y2", Get("c"));
1330 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1331 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1333 ASSERT_EQ("x1", Get("a"));
1334 EXPECT_EQ(3, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1335 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1337 EXPECT_EQ(std::vector
<std::string
>({"NOT_FOUND", "z1,z2"}),
1338 MultiGet({"b", "e"}));
1341 // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1342 // return the biggest key which is smaller than the seek key.
1343 TEST_P(DBIteratorTest
, PrevAfterAndNextAfterMerge
) {
1345 options
.create_if_missing
= true;
1346 options
.merge_operator
= MergeOperators::CreatePutOperator();
1348 DestroyAndReopen(options
);
1350 // write three entries with different keys using Merge()
1352 ASSERT_OK(db_
->Merge(wopts
, "1", "data1"));
1353 ASSERT_OK(db_
->Merge(wopts
, "2", "data2"));
1354 ASSERT_OK(db_
->Merge(wopts
, "3", "data3"));
1356 std::unique_ptr
<Iterator
> it(NewIterator(ReadOptions()));
1359 ASSERT_TRUE(it
->Valid());
1360 ASSERT_EQ("2", it
->key().ToString());
1363 ASSERT_TRUE(it
->Valid());
1364 ASSERT_EQ("1", it
->key().ToString());
1366 it
->SeekForPrev("1");
1367 ASSERT_TRUE(it
->Valid());
1368 ASSERT_EQ("1", it
->key().ToString());
1371 ASSERT_TRUE(it
->Valid());
1372 ASSERT_EQ("2", it
->key().ToString());
1375 class DBIteratorTestForPinnedData
: public DBIteratorTest
{
1380 COMPACT_BEFORE_READ
,
1384 DBIteratorTestForPinnedData() : DBIteratorTest() {}
1385 void PinnedDataIteratorRandomized(TestConfig run_config
) {
1386 // Generate Random data
1390 int key_pool
= static_cast<int>(puts
* 0.7);
1392 int val_size
= 1000;
1393 int seeks_percentage
= 20; // 20% of keys will be used to test seek()
1394 int delete_percentage
= 20; // 20% of keys will be deleted
1395 int merge_percentage
= 20; // 20% of keys will be added using Merge()
1397 Options options
= CurrentOptions();
1398 BlockBasedTableOptions table_options
;
1399 table_options
.use_delta_encoding
= false;
1400 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1401 options
.merge_operator
= MergeOperators::CreatePutOperator();
1402 DestroyAndReopen(options
);
1404 std::vector
<std::string
> generated_keys(key_pool
);
1405 for (int i
= 0; i
< key_pool
; i
++) {
1406 generated_keys
[i
] = rnd
.RandomString(key_size
);
1409 std::map
<std::string
, std::string
> true_data
;
1410 std::vector
<std::string
> random_keys
;
1411 std::vector
<std::string
> deleted_keys
;
1412 for (int i
= 0; i
< puts
; i
++) {
1413 auto& k
= generated_keys
[rnd
.Next() % key_pool
];
1414 auto v
= rnd
.RandomString(val_size
);
1416 // Insert data to true_data map and to DB
1418 if (rnd
.PercentTrue(merge_percentage
)) {
1419 ASSERT_OK(db_
->Merge(WriteOptions(), k
, v
));
1421 ASSERT_OK(Put(k
, v
));
1424 // Pick random keys to be used to test Seek()
1425 if (rnd
.PercentTrue(seeks_percentage
)) {
1426 random_keys
.push_back(k
);
1429 // Delete some random keys
1430 if (rnd
.PercentTrue(delete_percentage
)) {
1431 deleted_keys
.push_back(k
);
1433 ASSERT_OK(Delete(k
));
1436 if (run_config
== TestConfig::FLUSH_EVERY_1000
) {
1437 if (i
&& i
% 1000 == 0) {
1443 if (run_config
== TestConfig::CLOSE_AND_OPEN
) {
1446 } else if (run_config
== TestConfig::COMPACT_BEFORE_READ
) {
1447 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1452 auto iter
= NewIterator(ro
);
1455 // Test Seek to random keys
1456 std::vector
<Slice
> keys_slices
;
1457 std::vector
<std::string
> true_keys
;
1458 for (auto& k
: random_keys
) {
1460 if (!iter
->Valid()) {
1461 ASSERT_EQ(true_data
.lower_bound(k
), true_data
.end());
1464 std::string prop_value
;
1466 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1467 ASSERT_EQ("1", prop_value
);
1468 keys_slices
.push_back(iter
->key());
1469 true_keys
.push_back(true_data
.lower_bound(k
)->first
);
1472 for (size_t i
= 0; i
< keys_slices
.size(); i
++) {
1473 ASSERT_EQ(keys_slices
[i
].ToString(), true_keys
[i
]);
1478 // Test SeekForPrev to random keys
1479 std::vector
<Slice
> keys_slices
;
1480 std::vector
<std::string
> true_keys
;
1481 for (auto& k
: random_keys
) {
1482 iter
->SeekForPrev(k
);
1483 if (!iter
->Valid()) {
1484 ASSERT_EQ(true_data
.upper_bound(k
), true_data
.begin());
1487 std::string prop_value
;
1489 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1490 ASSERT_EQ("1", prop_value
);
1491 keys_slices
.push_back(iter
->key());
1492 true_keys
.push_back((--true_data
.upper_bound(k
))->first
);
1495 for (size_t i
= 0; i
< keys_slices
.size(); i
++) {
1496 ASSERT_EQ(keys_slices
[i
].ToString(), true_keys
[i
]);
1501 // Test iterating all data forward
1502 std::vector
<Slice
> all_keys
;
1503 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1504 std::string prop_value
;
1506 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1507 ASSERT_EQ("1", prop_value
);
1508 all_keys
.push_back(iter
->key());
1510 ASSERT_EQ(all_keys
.size(), true_data
.size());
1512 // Verify that all keys slices are valid
1513 auto data_iter
= true_data
.begin();
1514 for (size_t i
= 0; i
< all_keys
.size(); i
++) {
1515 ASSERT_EQ(all_keys
[i
].ToString(), data_iter
->first
);
1521 // Test iterating all data backward
1522 std::vector
<Slice
> all_keys
;
1523 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
1524 std::string prop_value
;
1526 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1527 ASSERT_EQ("1", prop_value
);
1528 all_keys
.push_back(iter
->key());
1530 ASSERT_EQ(all_keys
.size(), true_data
.size());
1532 // Verify that all keys slices are valid (backward)
1533 auto data_iter
= true_data
.rbegin();
1534 for (size_t i
= 0; i
< all_keys
.size(); i
++) {
1535 ASSERT_EQ(all_keys
[i
].ToString(), data_iter
->first
);
1544 #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1545 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedNormal
) {
1546 PinnedDataIteratorRandomized(TestConfig::NORMAL
);
1548 #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1550 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedCLoseAndOpen
) {
1551 PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN
);
1554 TEST_P(DBIteratorTestForPinnedData
,
1555 PinnedDataIteratorRandomizedCompactBeforeRead
) {
1556 PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ
);
1559 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedFlush
) {
1560 PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000
);
1563 INSTANTIATE_TEST_CASE_P(DBIteratorTestForPinnedDataInstance
,
1564 DBIteratorTestForPinnedData
,
1565 testing::Values(true, false));
1567 #ifndef ROCKSDB_LITE
1568 TEST_P(DBIteratorTest
, PinnedDataIteratorMultipleFiles
) {
1569 Options options
= CurrentOptions();
1570 BlockBasedTableOptions table_options
;
1571 table_options
.use_delta_encoding
= false;
1572 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1573 options
.disable_auto_compactions
= true;
1574 options
.write_buffer_size
= 1024 * 1024 * 10; // 10 Mb
1575 DestroyAndReopen(options
);
1577 std::map
<std::string
, std::string
> true_data
;
1579 // Generate 4 sst files in L2
1581 for (int i
= 1; i
<= 1000; i
++) {
1582 std::string k
= Key(i
* 3);
1583 std::string v
= rnd
.RandomString(100);
1584 ASSERT_OK(Put(k
, v
));
1590 ASSERT_EQ(FilesPerLevel(0), "4");
1591 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1592 ASSERT_EQ(FilesPerLevel(0), "0,4");
1594 // Generate 4 sst files in L0
1595 for (int i
= 1; i
<= 1000; i
++) {
1596 std::string k
= Key(i
* 2);
1597 std::string v
= rnd
.RandomString(100);
1598 ASSERT_OK(Put(k
, v
));
1604 ASSERT_EQ(FilesPerLevel(0), "4,4");
1606 // Add some keys/values in memtables
1607 for (int i
= 1; i
<= 1000; i
++) {
1608 std::string k
= Key(i
);
1609 std::string v
= rnd
.RandomString(100);
1610 ASSERT_OK(Put(k
, v
));
1613 ASSERT_EQ(FilesPerLevel(0), "4,4");
1617 auto iter
= NewIterator(ro
);
1619 std::vector
<std::pair
<Slice
, std::string
>> results
;
1620 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1621 std::string prop_value
;
1622 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1623 ASSERT_EQ("1", prop_value
);
1624 results
.emplace_back(iter
->key(), iter
->value().ToString());
1627 ASSERT_EQ(results
.size(), true_data
.size());
1628 auto data_iter
= true_data
.begin();
1629 for (size_t i
= 0; i
< results
.size(); i
++, data_iter
++) {
1630 auto& kv
= results
[i
];
1631 ASSERT_EQ(kv
.first
, data_iter
->first
);
1632 ASSERT_EQ(kv
.second
, data_iter
->second
);
1639 TEST_P(DBIteratorTest
, PinnedDataIteratorMergeOperator
) {
1640 Options options
= CurrentOptions();
1641 BlockBasedTableOptions table_options
;
1642 table_options
.use_delta_encoding
= false;
1643 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1644 options
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
1645 DestroyAndReopen(options
);
1647 std::string numbers
[7];
1648 for (int val
= 0; val
<= 6; val
++) {
1649 PutFixed64(numbers
+ val
, val
);
1652 // +1 all keys in range [ 0 => 999]
1653 for (int i
= 0; i
< 1000; i
++) {
1655 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[1]));
1658 // +2 all keys divisible by 2 in range [ 0 => 999]
1659 for (int i
= 0; i
< 1000; i
+= 2) {
1661 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[2]));
1664 // +3 all keys divisible by 5 in range [ 0 => 999]
1665 for (int i
= 0; i
< 1000; i
+= 5) {
1667 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[3]));
1672 auto iter
= NewIterator(ro
);
1674 std::vector
<std::pair
<Slice
, std::string
>> results
;
1675 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1676 std::string prop_value
;
1677 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1678 ASSERT_EQ("1", prop_value
);
1679 results
.emplace_back(iter
->key(), iter
->value().ToString());
1682 ASSERT_EQ(results
.size(), 1000);
1683 for (size_t i
= 0; i
< results
.size(); i
++) {
1684 auto& kv
= results
[i
];
1685 ASSERT_EQ(kv
.first
, Key(static_cast<int>(i
)));
1686 int expected_val
= 1;
1693 ASSERT_EQ(kv
.second
, numbers
[expected_val
]);
1699 TEST_P(DBIteratorTest
, PinnedDataIteratorReadAfterUpdate
) {
1700 Options options
= CurrentOptions();
1701 BlockBasedTableOptions table_options
;
1702 table_options
.use_delta_encoding
= false;
1703 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1704 options
.write_buffer_size
= 100000;
1705 DestroyAndReopen(options
);
1709 std::map
<std::string
, std::string
> true_data
;
1710 for (int i
= 0; i
< 1000; i
++) {
1711 std::string k
= rnd
.RandomString(10);
1712 std::string v
= rnd
.RandomString(1000);
1713 ASSERT_OK(Put(k
, v
));
1719 auto iter
= NewIterator(ro
);
1721 // Delete 50% of the keys and update the other 50%
1722 for (auto& kv
: true_data
) {
1724 ASSERT_OK(Delete(kv
.first
));
1726 std::string new_val
= rnd
.RandomString(1000);
1727 ASSERT_OK(Put(kv
.first
, new_val
));
1731 std::vector
<std::pair
<Slice
, std::string
>> results
;
1732 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1733 std::string prop_value
;
1734 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1735 ASSERT_EQ("1", prop_value
);
1736 results
.emplace_back(iter
->key(), iter
->value().ToString());
1739 auto data_iter
= true_data
.begin();
1740 for (size_t i
= 0; i
< results
.size(); i
++, data_iter
++) {
1741 auto& kv
= results
[i
];
1742 ASSERT_EQ(kv
.first
, data_iter
->first
);
1743 ASSERT_EQ(kv
.second
, data_iter
->second
);
1749 class SliceTransformLimitedDomainGeneric
: public SliceTransform
{
1750 const char* Name() const override
{
1751 return "SliceTransformLimitedDomainGeneric";
1754 Slice
Transform(const Slice
& src
) const override
{
1755 return Slice(src
.data(), 1);
1758 bool InDomain(const Slice
& src
) const override
{
1759 // prefix will be x????
1760 return src
.size() >= 1;
1763 bool InRange(const Slice
& dst
) const override
{
1764 // prefix will be x????
1765 return dst
.size() == 1;
1769 TEST_P(DBIteratorTest
, IterSeekForPrevCrossingFiles
) {
1770 Options options
= CurrentOptions();
1771 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
1772 options
.disable_auto_compactions
= true;
1773 // Enable prefix bloom for SST files
1774 BlockBasedTableOptions table_options
;
1775 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10, true));
1776 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1777 DestroyAndReopen(options
);
1779 ASSERT_OK(Put("a1", "va1"));
1780 ASSERT_OK(Put("a2", "va2"));
1781 ASSERT_OK(Put("a3", "va3"));
1784 ASSERT_OK(Put("b1", "vb1"));
1785 ASSERT_OK(Put("b2", "vb2"));
1786 ASSERT_OK(Put("b3", "vb3"));
1789 ASSERT_OK(Put("b4", "vb4"));
1790 ASSERT_OK(Put("d1", "vd1"));
1791 ASSERT_OK(Put("d2", "vd2"));
1792 ASSERT_OK(Put("d4", "vd4"));
1795 MoveFilesToLevel(1);
1798 Iterator
* iter
= NewIterator(ro
);
1800 iter
->SeekForPrev("a4");
1801 ASSERT_EQ(iter
->key().ToString(), "a3");
1802 ASSERT_EQ(iter
->value().ToString(), "va3");
1804 iter
->SeekForPrev("c2");
1805 ASSERT_EQ(iter
->key().ToString(), "b3");
1806 iter
->SeekForPrev("d3");
1807 ASSERT_EQ(iter
->key().ToString(), "d2");
1808 iter
->SeekForPrev("b5");
1809 ASSERT_EQ(iter
->key().ToString(), "b4");
1815 ro
.prefix_same_as_start
= true;
1816 Iterator
* iter
= NewIterator(ro
);
1817 iter
->SeekForPrev("c2");
1818 ASSERT_TRUE(!iter
->Valid());
1819 ASSERT_OK(iter
->status());
1824 TEST_P(DBIteratorTest
, IterSeekForPrevCrossingFilesCustomPrefixExtractor
) {
1825 Options options
= CurrentOptions();
1826 options
.prefix_extractor
=
1827 std::make_shared
<SliceTransformLimitedDomainGeneric
>();
1828 options
.disable_auto_compactions
= true;
1829 // Enable prefix bloom for SST files
1830 BlockBasedTableOptions table_options
;
1831 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10, true));
1832 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1833 DestroyAndReopen(options
);
1835 ASSERT_OK(Put("a1", "va1"));
1836 ASSERT_OK(Put("a2", "va2"));
1837 ASSERT_OK(Put("a3", "va3"));
1840 ASSERT_OK(Put("b1", "vb1"));
1841 ASSERT_OK(Put("b2", "vb2"));
1842 ASSERT_OK(Put("b3", "vb3"));
1845 ASSERT_OK(Put("b4", "vb4"));
1846 ASSERT_OK(Put("d1", "vd1"));
1847 ASSERT_OK(Put("d2", "vd2"));
1848 ASSERT_OK(Put("d4", "vd4"));
1851 MoveFilesToLevel(1);
1854 Iterator
* iter
= NewIterator(ro
);
1856 iter
->SeekForPrev("a4");
1857 ASSERT_EQ(iter
->key().ToString(), "a3");
1858 ASSERT_EQ(iter
->value().ToString(), "va3");
1860 iter
->SeekForPrev("c2");
1861 ASSERT_EQ(iter
->key().ToString(), "b3");
1862 iter
->SeekForPrev("d3");
1863 ASSERT_EQ(iter
->key().ToString(), "d2");
1864 iter
->SeekForPrev("b5");
1865 ASSERT_EQ(iter
->key().ToString(), "b4");
1871 ro
.prefix_same_as_start
= true;
1872 Iterator
* iter
= NewIterator(ro
);
1873 iter
->SeekForPrev("c2");
1874 ASSERT_TRUE(!iter
->Valid());
1875 ASSERT_OK(iter
->status());
1880 TEST_P(DBIteratorTest
, IterPrevKeyCrossingBlocks
) {
1881 Options options
= CurrentOptions();
1882 BlockBasedTableOptions table_options
;
1883 table_options
.block_size
= 1; // every block will contain one entry
1884 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1885 options
.merge_operator
= MergeOperators::CreateStringAppendTESTOperator();
1886 options
.disable_auto_compactions
= true;
1887 options
.max_sequential_skip_in_iterations
= 8;
1889 DestroyAndReopen(options
);
1891 // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1892 for (int file_num
= 0; file_num
< 10; file_num
++) {
1893 ASSERT_OK(Delete("key4"));
1897 // First File containing 5 blocks of puts
1898 ASSERT_OK(Put("key1", "val1.0"));
1899 ASSERT_OK(Put("key2", "val2.0"));
1900 ASSERT_OK(Put("key3", "val3.0"));
1901 ASSERT_OK(Put("key4", "val4.0"));
1902 ASSERT_OK(Put("key5", "val5.0"));
1905 // Second file containing 9 blocks of merge operands
1906 ASSERT_OK(db_
->Merge(WriteOptions(), "key1", "val1.1"));
1907 ASSERT_OK(db_
->Merge(WriteOptions(), "key1", "val1.2"));
1909 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.1"));
1910 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.2"));
1911 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.3"));
1913 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.1"));
1914 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.2"));
1915 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.3"));
1916 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.4"));
1921 ro
.fill_cache
= false;
1922 Iterator
* iter
= NewIterator(ro
);
1925 ASSERT_EQ(iter
->key().ToString(), "key5");
1926 ASSERT_EQ(iter
->value().ToString(), "val5.0");
1929 ASSERT_EQ(iter
->key().ToString(), "key4");
1930 ASSERT_EQ(iter
->value().ToString(), "val4.0");
1933 ASSERT_EQ(iter
->key().ToString(), "key3");
1934 ASSERT_EQ(iter
->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1937 ASSERT_EQ(iter
->key().ToString(), "key2");
1938 ASSERT_EQ(iter
->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1941 ASSERT_EQ(iter
->key().ToString(), "key1");
1942 ASSERT_EQ(iter
->value().ToString(), "val1.0,val1.1,val1.2");
1948 TEST_P(DBIteratorTest
, IterPrevKeyCrossingBlocksRandomized
) {
1949 Options options
= CurrentOptions();
1950 options
.merge_operator
= MergeOperators::CreateStringAppendTESTOperator();
1951 options
.disable_auto_compactions
= true;
1952 options
.level0_slowdown_writes_trigger
= (1 << 30);
1953 options
.level0_stop_writes_trigger
= (1 << 30);
1954 options
.max_sequential_skip_in_iterations
= 8;
1955 DestroyAndReopen(options
);
1957 const int kNumKeys
= 500;
1958 // Small number of merge operands to make sure that DBIter::Prev() don't
1959 // fall back to Seek()
1960 const int kNumMergeOperands
= 3;
1961 // Use value size that will make sure that every block contain 1 key
1962 const int kValSize
=
1963 static_cast<int>(BlockBasedTableOptions().block_size
) * 4;
1964 // Percentage of keys that wont get merge operations
1965 const int kNoMergeOpPercentage
= 20;
1966 // Percentage of keys that will be deleted
1967 const int kDeletePercentage
= 10;
1969 // For half of the key range we will write multiple deletes first to
1970 // force DBIter::Prev() to fall back to Seek()
1971 for (int file_num
= 0; file_num
< 10; file_num
++) {
1972 for (int i
= 0; i
< kNumKeys
; i
+= 2) {
1973 ASSERT_OK(Delete(Key(i
)));
1979 std::map
<std::string
, std::string
> true_data
;
1980 std::string gen_key
;
1981 std::string gen_val
;
1983 for (int i
= 0; i
< kNumKeys
; i
++) {
1985 gen_val
= rnd
.RandomString(kValSize
);
1987 ASSERT_OK(Put(gen_key
, gen_val
));
1988 true_data
[gen_key
] = gen_val
;
1992 // Separate values and merge operands in different file so that we
1993 // make sure that we don't merge them while flushing but actually
1994 // merge them in the read path
1995 for (int i
= 0; i
< kNumKeys
; i
++) {
1996 if (rnd
.PercentTrue(kNoMergeOpPercentage
)) {
1997 // Dont give merge operations for some keys
2001 for (int j
= 0; j
< kNumMergeOperands
; j
++) {
2003 gen_val
= rnd
.RandomString(kValSize
);
2005 ASSERT_OK(db_
->Merge(WriteOptions(), gen_key
, gen_val
));
2006 true_data
[gen_key
] += "," + gen_val
;
2011 for (int i
= 0; i
< kNumKeys
; i
++) {
2012 if (rnd
.PercentTrue(kDeletePercentage
)) {
2015 ASSERT_OK(Delete(gen_key
));
2016 true_data
.erase(gen_key
);
2023 ro
.fill_cache
= false;
2024 Iterator
* iter
= NewIterator(ro
);
2025 auto data_iter
= true_data
.rbegin();
2027 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2028 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2029 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2032 ASSERT_EQ(data_iter
, true_data
.rend());
2039 ro
.fill_cache
= false;
2040 Iterator
* iter
= NewIterator(ro
);
2041 auto data_iter
= true_data
.rbegin();
2043 int entries_right
= 0;
2044 std::string seek_key
;
2045 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2046 // Verify key/value of current position
2047 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2048 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2050 bool restore_position_with_seek
= rnd
.Uniform(2);
2051 if (restore_position_with_seek
) {
2052 seek_key
= iter
->key().ToString();
2055 // Do some Next() operations the restore the iterator to orignal position
2057 entries_right
> 0 ? rnd
.Uniform(std::min(entries_right
, 10)) : 0;
2058 for (int i
= 0; i
< next_count
; i
++) {
2062 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2063 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2066 if (restore_position_with_seek
) {
2067 // Restore orignal position using Seek()
2068 iter
->Seek(seek_key
);
2069 for (int i
= 0; i
< next_count
; i
++) {
2073 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2074 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2076 // Restore original position using Prev()
2077 for (int i
= 0; i
< next_count
; i
++) {
2081 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2082 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2089 ASSERT_EQ(data_iter
, true_data
.rend());
2095 TEST_P(DBIteratorTest
, IteratorWithLocalStatistics
) {
2096 Options options
= CurrentOptions();
2097 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2098 DestroyAndReopen(options
);
2101 for (int i
= 0; i
< 1000; i
++) {
2102 // Key 10 bytes / Value 10 bytes
2103 ASSERT_OK(Put(rnd
.RandomString(10), rnd
.RandomString(10)));
2106 std::atomic
<uint64_t> total_next(0);
2107 std::atomic
<uint64_t> total_next_found(0);
2108 std::atomic
<uint64_t> total_prev(0);
2109 std::atomic
<uint64_t> total_prev_found(0);
2110 std::atomic
<uint64_t> total_bytes(0);
2112 std::vector
<port::Thread
> threads
;
2113 std::function
<void()> reader_func_next
= [&]() {
2114 SetPerfLevel(kEnableCount
);
2115 get_perf_context()->Reset();
2116 Iterator
* iter
= NewIterator(ReadOptions());
2118 iter
->SeekToFirst();
2119 // Seek will bump ITER_BYTES_READ
2121 bytes
+= iter
->key().size();
2122 bytes
+= iter
->value().size();
2127 if (!iter
->Valid()) {
2131 bytes
+= iter
->key().size();
2132 bytes
+= iter
->value().size();
2136 ASSERT_EQ(bytes
, get_perf_context()->iter_read_bytes
);
2137 SetPerfLevel(kDisable
);
2138 total_bytes
+= bytes
;
2141 std::function
<void()> reader_func_prev
= [&]() {
2142 SetPerfLevel(kEnableCount
);
2143 Iterator
* iter
= NewIterator(ReadOptions());
2146 // Seek will bump ITER_BYTES_READ
2148 bytes
+= iter
->key().size();
2149 bytes
+= iter
->value().size();
2154 if (!iter
->Valid()) {
2158 bytes
+= iter
->key().size();
2159 bytes
+= iter
->value().size();
2163 ASSERT_EQ(bytes
, get_perf_context()->iter_read_bytes
);
2164 SetPerfLevel(kDisable
);
2165 total_bytes
+= bytes
;
2168 for (int i
= 0; i
< 10; i
++) {
2169 threads
.emplace_back(reader_func_next
);
2171 for (int i
= 0; i
< 15; i
++) {
2172 threads
.emplace_back(reader_func_prev
);
2175 for (auto& t
: threads
) {
2179 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_NEXT
), (uint64_t)total_next
);
2180 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_NEXT_FOUND
),
2181 (uint64_t)total_next_found
);
2182 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_PREV
), (uint64_t)total_prev
);
2183 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_PREV_FOUND
),
2184 (uint64_t)total_prev_found
);
2185 ASSERT_EQ(TestGetTickerCount(options
, ITER_BYTES_READ
),
2186 (uint64_t)total_bytes
);
2189 TEST_P(DBIteratorTest
, ReadAhead
) {
2191 env_
->count_random_reads_
= true;
2193 options
.disable_auto_compactions
= true;
2194 options
.write_buffer_size
= 4 << 20;
2195 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2196 BlockBasedTableOptions table_options
;
2197 table_options
.block_size
= 1024;
2198 table_options
.no_block_cache
= true;
2199 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2202 std::string
value(1024, 'a');
2203 for (int i
= 0; i
< 100; i
++) {
2204 ASSERT_OK(Put(Key(i
), value
));
2207 MoveFilesToLevel(2);
2209 for (int i
= 0; i
< 100; i
++) {
2210 ASSERT_OK(Put(Key(i
), value
));
2213 MoveFilesToLevel(1);
2215 for (int i
= 0; i
< 100; i
++) {
2216 ASSERT_OK(Put(Key(i
), value
));
2219 #ifndef ROCKSDB_LITE
2220 ASSERT_EQ("1,1,1", FilesPerLevel());
2221 #endif // !ROCKSDB_LITE
2223 env_
->random_read_bytes_counter_
= 0;
2224 options
.statistics
->setTickerCount(NO_FILE_OPENS
, 0);
2225 ReadOptions read_options
;
2226 auto* iter
= NewIterator(read_options
);
2227 iter
->SeekToFirst();
2228 int64_t num_file_opens
= TestGetTickerCount(options
, NO_FILE_OPENS
);
2229 size_t bytes_read
= env_
->random_read_bytes_counter_
;
2232 int64_t num_file_closes
= TestGetTickerCount(options
, NO_FILE_CLOSES
);
2233 env_
->random_read_bytes_counter_
= 0;
2234 options
.statistics
->setTickerCount(NO_FILE_OPENS
, 0);
2235 read_options
.readahead_size
= 1024 * 10;
2236 iter
= NewIterator(read_options
);
2237 iter
->SeekToFirst();
2238 int64_t num_file_opens_readahead
= TestGetTickerCount(options
, NO_FILE_OPENS
);
2239 size_t bytes_read_readahead
= env_
->random_read_bytes_counter_
;
2241 int64_t num_file_closes_readahead
=
2242 TestGetTickerCount(options
, NO_FILE_CLOSES
);
2243 ASSERT_EQ(num_file_opens
, num_file_opens_readahead
);
2244 ASSERT_EQ(num_file_closes
, num_file_closes_readahead
);
2245 ASSERT_GT(bytes_read_readahead
, bytes_read
);
2246 ASSERT_GT(bytes_read_readahead
, read_options
.readahead_size
* 3);
2248 // Verify correctness.
2249 iter
= NewIterator(read_options
);
2251 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
2252 ASSERT_EQ(value
, iter
->value());
2255 ASSERT_EQ(100, count
);
2256 for (int i
= 0; i
< 100; i
++) {
2258 ASSERT_EQ(value
, iter
->value());
2263 // Insert a key, create a snapshot iterator, overwrite key lots of times,
2264 // seek to a smaller key. Expect DBIter to fall back to a seek instead of
2265 // going through all the overwrites linearly.
2266 TEST_P(DBIteratorTest
, DBIteratorSkipRecentDuplicatesTest
) {
2267 Options options
= CurrentOptions();
2269 options
.create_if_missing
= true;
2270 options
.max_sequential_skip_in_iterations
= 3;
2271 options
.prefix_extractor
= nullptr;
2272 options
.write_buffer_size
= 1 << 27; // big enough to avoid flush
2273 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2274 DestroyAndReopen(options
);
2277 ASSERT_OK(Put("b", "0"));
2281 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
2284 for (int i
= 0; i
< 100; ++i
) {
2285 ASSERT_OK(Put("b", std::to_string(i
+ 1).c_str()));
2288 #ifndef ROCKSDB_LITE
2289 // Check that memtable wasn't flushed.
2291 ASSERT_TRUE(db_
->GetProperty("rocksdb.num-files-at-level0", &val
));
2292 EXPECT_EQ("0", val
);
2295 // Seek iterator to a smaller key.
2296 get_perf_context()->Reset();
2298 ASSERT_TRUE(iter
->Valid());
2299 EXPECT_EQ("b", iter
->key().ToString());
2300 EXPECT_EQ("0", iter
->value().ToString());
2302 // Check that the seek didn't do too much work.
2303 // Checks are not tight, just make sure that everything is well below 100.
2304 EXPECT_LT(get_perf_context()->internal_key_skipped_count
, 4);
2305 EXPECT_LT(get_perf_context()->internal_recent_skipped_count
, 8);
2306 EXPECT_LT(get_perf_context()->seek_on_memtable_count
, 10);
2307 EXPECT_LT(get_perf_context()->next_on_memtable_count
, 10);
2308 EXPECT_LT(get_perf_context()->prev_on_memtable_count
, 10);
2310 // Check that iterator did something like what we expect.
2311 EXPECT_EQ(get_perf_context()->internal_delete_skipped_count
, 0);
2312 EXPECT_EQ(get_perf_context()->internal_merge_count
, 0);
2313 EXPECT_GE(get_perf_context()->internal_recent_skipped_count
, 2);
2314 EXPECT_GE(get_perf_context()->seek_on_memtable_count
, 2);
2316 options
.statistics
->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION
));
2319 TEST_P(DBIteratorTest
, Refresh
) {
2320 ASSERT_OK(Put("x", "y"));
2322 std::unique_ptr
<Iterator
> iter(NewIterator(ReadOptions()));
2323 ASSERT_OK(iter
->status());
2324 iter
->Seek(Slice("a"));
2325 ASSERT_TRUE(iter
->Valid());
2326 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2328 ASSERT_FALSE(iter
->Valid());
2330 ASSERT_OK(Put("c", "d"));
2332 iter
->Seek(Slice("a"));
2333 ASSERT_TRUE(iter
->Valid());
2334 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2336 ASSERT_FALSE(iter
->Valid());
2338 ASSERT_OK(iter
->status());
2339 ASSERT_OK(iter
->Refresh());
2341 iter
->Seek(Slice("a"));
2342 ASSERT_TRUE(iter
->Valid());
2343 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2345 ASSERT_TRUE(iter
->Valid());
2346 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2348 ASSERT_FALSE(iter
->Valid());
2350 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2352 ASSERT_OK(Put("m", "n"));
2354 iter
->Seek(Slice("a"));
2355 ASSERT_TRUE(iter
->Valid());
2356 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2358 ASSERT_TRUE(iter
->Valid());
2359 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2361 ASSERT_FALSE(iter
->Valid());
2363 ASSERT_OK(iter
->status());
2364 ASSERT_OK(iter
->Refresh());
2366 iter
->Seek(Slice("a"));
2367 ASSERT_TRUE(iter
->Valid());
2368 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2370 ASSERT_TRUE(iter
->Valid());
2371 ASSERT_EQ(iter
->key().compare(Slice("m")), 0);
2373 ASSERT_TRUE(iter
->Valid());
2374 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2376 ASSERT_FALSE(iter
->Valid());
2381 TEST_P(DBIteratorTest
, RefreshWithSnapshot
) {
2382 ASSERT_OK(Put("x", "y"));
2383 const Snapshot
* snapshot
= db_
->GetSnapshot();
2384 ReadOptions options
;
2385 options
.snapshot
= snapshot
;
2386 Iterator
* iter
= NewIterator(options
);
2387 ASSERT_OK(iter
->status());
2389 iter
->Seek(Slice("a"));
2390 ASSERT_TRUE(iter
->Valid());
2391 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2393 ASSERT_FALSE(iter
->Valid());
2395 ASSERT_OK(Put("c", "d"));
2397 iter
->Seek(Slice("a"));
2398 ASSERT_TRUE(iter
->Valid());
2399 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2401 ASSERT_FALSE(iter
->Valid());
2403 ASSERT_OK(iter
->status());
2404 Status s
= iter
->Refresh();
2405 ASSERT_TRUE(s
.IsNotSupported());
2406 db_
->ReleaseSnapshot(snapshot
);
2410 TEST_P(DBIteratorTest
, CreationFailure
) {
2411 SyncPoint::GetInstance()->SetCallBack(
2412 "DBImpl::NewInternalIterator:StatusCallback", [](void* arg
) {
2413 *(reinterpret_cast<Status
*>(arg
)) = Status::Corruption("test status");
2415 SyncPoint::GetInstance()->EnableProcessing();
2417 Iterator
* iter
= NewIterator(ReadOptions());
2418 ASSERT_FALSE(iter
->Valid());
2419 ASSERT_TRUE(iter
->status().IsCorruption());
2423 TEST_P(DBIteratorTest
, UpperBoundWithChangeDirection
) {
2424 Options options
= CurrentOptions();
2425 options
.max_sequential_skip_in_iterations
= 3;
2426 DestroyAndReopen(options
);
2428 // write a bunch of kvs to the database.
2429 ASSERT_OK(Put("a", "1"));
2430 ASSERT_OK(Put("y", "1"));
2431 ASSERT_OK(Put("y1", "1"));
2432 ASSERT_OK(Put("y2", "1"));
2433 ASSERT_OK(Put("y3", "1"));
2434 ASSERT_OK(Put("z", "1"));
2436 ASSERT_OK(Put("a", "1"));
2437 ASSERT_OK(Put("z", "1"));
2438 ASSERT_OK(Put("bar", "1"));
2439 ASSERT_OK(Put("foo", "1"));
2441 std::string upper_bound
= "x";
2442 Slice
ub_slice(upper_bound
);
2444 ro
.iterate_upper_bound
= &ub_slice
;
2445 ro
.max_skippable_internal_keys
= 1000;
2447 Iterator
* iter
= NewIterator(ro
);
2449 ASSERT_TRUE(iter
->Valid());
2450 ASSERT_EQ("foo", iter
->key().ToString());
2453 ASSERT_TRUE(iter
->Valid());
2454 ASSERT_OK(iter
->status());
2455 ASSERT_EQ("bar", iter
->key().ToString());
2460 TEST_P(DBIteratorTest
, TableFilter
) {
2461 ASSERT_OK(Put("a", "1"));
2462 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2463 ASSERT_OK(Put("b", "2"));
2464 ASSERT_OK(Put("c", "3"));
2465 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2466 ASSERT_OK(Put("d", "4"));
2467 ASSERT_OK(Put("e", "5"));
2468 ASSERT_OK(Put("f", "6"));
2469 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2471 // Ensure the table_filter callback is called once for each table.
2473 std::set
<uint64_t> unseen
{1, 2, 3};
2475 opts
.table_filter
= [&](const TableProperties
& props
) {
2476 auto it
= unseen
.find(props
.num_entries
);
2477 if (it
== unseen
.end()) {
2478 ADD_FAILURE() << "saw table properties with an unexpected "
2479 << props
.num_entries
<< " entries";
2485 auto iter
= NewIterator(opts
);
2486 iter
->SeekToFirst();
2487 ASSERT_EQ(IterStatus(iter
), "a->1");
2489 ASSERT_EQ(IterStatus(iter
), "b->2");
2491 ASSERT_EQ(IterStatus(iter
), "c->3");
2493 ASSERT_EQ(IterStatus(iter
), "d->4");
2495 ASSERT_EQ(IterStatus(iter
), "e->5");
2497 ASSERT_EQ(IterStatus(iter
), "f->6");
2499 ASSERT_FALSE(iter
->Valid());
2500 ASSERT_TRUE(unseen
.empty());
2504 // Ensure returning false in the table_filter hides the keys from that table
2505 // during iteration.
2508 opts
.table_filter
= [](const TableProperties
& props
) {
2509 return props
.num_entries
!= 2;
2511 auto iter
= NewIterator(opts
);
2512 iter
->SeekToFirst();
2513 ASSERT_EQ(IterStatus(iter
), "a->1");
2515 ASSERT_EQ(IterStatus(iter
), "d->4");
2517 ASSERT_EQ(IterStatus(iter
), "e->5");
2519 ASSERT_EQ(IterStatus(iter
), "f->6");
2521 ASSERT_FALSE(iter
->Valid());
2526 TEST_P(DBIteratorTest
, UpperBoundWithPrevReseek
) {
2527 Options options
= CurrentOptions();
2528 options
.max_sequential_skip_in_iterations
= 3;
2529 DestroyAndReopen(options
);
2531 // write a bunch of kvs to the database.
2532 ASSERT_OK(Put("a", "1"));
2533 ASSERT_OK(Put("y", "1"));
2534 ASSERT_OK(Put("z", "1"));
2536 ASSERT_OK(Put("a", "1"));
2537 ASSERT_OK(Put("z", "1"));
2538 ASSERT_OK(Put("bar", "1"));
2539 ASSERT_OK(Put("foo", "1"));
2540 ASSERT_OK(Put("foo", "2"));
2542 ASSERT_OK(Put("foo", "3"));
2543 ASSERT_OK(Put("foo", "4"));
2544 ASSERT_OK(Put("foo", "5"));
2545 const Snapshot
* snapshot
= db_
->GetSnapshot();
2546 ASSERT_OK(Put("foo", "6"));
2548 std::string upper_bound
= "x";
2549 Slice
ub_slice(upper_bound
);
2551 ro
.snapshot
= snapshot
;
2552 ro
.iterate_upper_bound
= &ub_slice
;
2554 Iterator
* iter
= NewIterator(ro
);
2555 iter
->SeekForPrev("goo");
2556 ASSERT_TRUE(iter
->Valid());
2557 ASSERT_EQ("foo", iter
->key().ToString());
2560 ASSERT_TRUE(iter
->Valid());
2561 ASSERT_EQ("bar", iter
->key().ToString());
2564 db_
->ReleaseSnapshot(snapshot
);
2567 TEST_P(DBIteratorTest
, SkipStatistics
) {
2568 Options options
= CurrentOptions();
2569 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2570 DestroyAndReopen(options
);
2574 // write a bunch of kvs to the database.
2575 ASSERT_OK(Put("a", "1"));
2576 ASSERT_OK(Put("b", "1"));
2577 ASSERT_OK(Put("c", "1"));
2579 ASSERT_OK(Put("d", "1"));
2580 ASSERT_OK(Put("e", "1"));
2581 ASSERT_OK(Put("f", "1"));
2582 ASSERT_OK(Put("a", "2"));
2583 ASSERT_OK(Put("b", "2"));
2585 ASSERT_OK(Delete("d"));
2586 ASSERT_OK(Delete("e"));
2587 ASSERT_OK(Delete("f"));
2589 Iterator
* iter
= NewIterator(ReadOptions());
2591 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
2592 ASSERT_OK(iter
->status());
2595 ASSERT_EQ(count
, 3);
2597 skip_count
+= 8; // 3 deletes + 3 original keys + 2 lower in sequence
2598 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2600 iter
= NewIterator(ReadOptions());
2602 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2603 ASSERT_OK(iter
->status());
2606 ASSERT_EQ(count
, 3);
2608 skip_count
+= 8; // Same as above, but in reverse order
2609 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2611 ASSERT_OK(Put("aa", "1"));
2612 ASSERT_OK(Put("ab", "1"));
2613 ASSERT_OK(Put("ac", "1"));
2614 ASSERT_OK(Put("ad", "1"));
2616 ASSERT_OK(Delete("ab"));
2617 ASSERT_OK(Delete("ac"));
2618 ASSERT_OK(Delete("ad"));
2622 ro
.iterate_upper_bound
= &prefix
;
2624 iter
= NewIterator(ro
);
2626 for (iter
->Seek("aa"); iter
->Valid(); iter
->Next()) {
2627 ASSERT_OK(iter
->status());
2630 ASSERT_EQ(count
, 1);
2632 skip_count
+= 6; // 3 deletes + 3 original keys
2633 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2635 iter
= NewIterator(ro
);
2637 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2638 ASSERT_OK(iter
->status());
2641 ASSERT_EQ(count
, 2);
2643 // 3 deletes + 3 original keys + lower sequence of "a"
2645 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2648 TEST_P(DBIteratorTest
, SeekAfterHittingManyInternalKeys
) {
2649 Options options
= CurrentOptions();
2650 DestroyAndReopen(options
);
2652 ropts
.max_skippable_internal_keys
= 2;
2654 ASSERT_OK(Put("1", "val_1"));
2655 // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2656 ASSERT_OK(Delete("2"));
2657 ASSERT_OK(Delete("3"));
2658 ASSERT_OK(Delete("4"));
2659 ASSERT_OK(Delete("5"));
2660 ASSERT_OK(Put("6", "val_6"));
2662 std::unique_ptr
<Iterator
> iter(NewIterator(ropts
));
2663 iter
->SeekToFirst();
2665 ASSERT_TRUE(iter
->Valid());
2666 ASSERT_EQ(iter
->key().ToString(), "1");
2667 ASSERT_EQ(iter
->value().ToString(), "val_1");
2669 // This should fail as incomplete due to too many non-visible internal keys on
2670 // the way to the next valid user key.
2672 ASSERT_TRUE(!iter
->Valid());
2673 ASSERT_TRUE(iter
->status().IsIncomplete());
2675 // Get the internal key at which Next() failed.
2676 std::string prop_value
;
2677 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
2678 ASSERT_EQ("4", prop_value
);
2680 // Create a new iterator to seek to the internal key.
2681 std::unique_ptr
<Iterator
> iter2(NewIterator(ropts
));
2682 iter2
->Seek(prop_value
);
2683 ASSERT_TRUE(iter2
->Valid());
2684 ASSERT_OK(iter2
->status());
2686 ASSERT_EQ(iter2
->key().ToString(), "6");
2687 ASSERT_EQ(iter2
->value().ToString(), "val_6");
2690 // Reproduces a former bug where iterator would skip some records when DBIter
2691 // re-seeks subiterator with Incomplete status.
2692 TEST_P(DBIteratorTest
, NonBlockingIterationBugRepro
) {
2693 Options options
= CurrentOptions();
2694 BlockBasedTableOptions table_options
;
2695 // Make sure the sst file has more than one block.
2696 table_options
.flush_block_policy_factory
=
2697 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
2698 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2699 DestroyAndReopen(options
);
2701 // Two records in sst file, each in its own block.
2702 ASSERT_OK(Put("b", ""));
2703 ASSERT_OK(Put("d", ""));
2706 // Create a nonblocking iterator before writing to memtable.
2708 ropt
.read_tier
= kBlockCacheTier
;
2709 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
));
2711 // Overwrite a key in memtable many times to hit
2712 // max_sequential_skip_in_iterations (which is 8 by default).
2713 for (int i
= 0; i
< 20; ++i
) {
2714 ASSERT_OK(Put("c", ""));
2717 // Load the second block in sst file into the block cache.
2719 std::unique_ptr
<Iterator
> iter2(NewIterator(ReadOptions()));
2723 // Finally seek the nonblocking iterator.
2725 // With the bug, the status used to be OK, and the iterator used to point to
2727 EXPECT_TRUE(iter
->status().IsIncomplete());
2730 TEST_P(DBIteratorTest
, SeekBackwardAfterOutOfUpperBound
) {
2731 ASSERT_OK(Put("a", ""));
2732 ASSERT_OK(Put("b", ""));
2737 ropt
.iterate_upper_bound
= &ub
;
2739 std::unique_ptr
<Iterator
> it(dbfull()->NewIterator(ropt
));
2740 it
->SeekForPrev("a");
2741 ASSERT_TRUE(it
->Valid());
2742 ASSERT_OK(it
->status());
2743 ASSERT_EQ("a", it
->key().ToString());
2745 ASSERT_FALSE(it
->Valid());
2746 ASSERT_OK(it
->status());
2747 it
->SeekForPrev("a");
2748 ASSERT_OK(it
->status());
2750 ASSERT_TRUE(it
->Valid());
2751 ASSERT_EQ("a", it
->key().ToString());
2754 TEST_P(DBIteratorTest
, AvoidReseekLevelIterator
) {
2755 Options options
= CurrentOptions();
2756 options
.compression
= CompressionType::kNoCompression
;
2757 BlockBasedTableOptions table_options
;
2758 table_options
.block_size
= 800;
2759 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2763 std::string random_str
= rnd
.RandomString(180);
2765 ASSERT_OK(Put("1", random_str
));
2766 ASSERT_OK(Put("2", random_str
));
2767 ASSERT_OK(Put("3", random_str
));
2768 ASSERT_OK(Put("4", random_str
));
2770 ASSERT_OK(Put("5", random_str
));
2771 ASSERT_OK(Put("6", random_str
));
2772 ASSERT_OK(Put("7", random_str
));
2774 ASSERT_OK(Put("8", random_str
));
2775 ASSERT_OK(Put("9", random_str
));
2777 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2779 int num_find_file_in_level
= 0;
2780 int num_idx_blk_seek
= 0;
2781 SyncPoint::GetInstance()->SetCallBack(
2782 "LevelIterator::Seek:BeforeFindFile",
2783 [&](void* /*arg*/) { num_find_file_in_level
++; });
2784 SyncPoint::GetInstance()->SetCallBack(
2785 "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek
++; });
2786 SyncPoint::GetInstance()->EnableProcessing();
2789 std::unique_ptr
<Iterator
> iter(NewIterator(ReadOptions()));
2791 ASSERT_TRUE(iter
->Valid());
2792 ASSERT_EQ(1, num_find_file_in_level
);
2793 ASSERT_EQ(1, num_idx_blk_seek
);
2796 ASSERT_TRUE(iter
->Valid());
2797 ASSERT_EQ(1, num_find_file_in_level
);
2798 ASSERT_EQ(1, num_idx_blk_seek
);
2801 ASSERT_TRUE(iter
->Valid());
2802 ASSERT_EQ(1, num_find_file_in_level
);
2803 ASSERT_EQ(1, num_idx_blk_seek
);
2806 ASSERT_TRUE(iter
->Valid());
2807 ASSERT_EQ(1, num_find_file_in_level
);
2808 ASSERT_EQ(1, num_idx_blk_seek
);
2811 ASSERT_TRUE(iter
->Valid());
2812 ASSERT_EQ(1, num_find_file_in_level
);
2813 ASSERT_EQ(2, num_idx_blk_seek
);
2816 ASSERT_TRUE(iter
->Valid());
2817 ASSERT_EQ(1, num_find_file_in_level
);
2818 ASSERT_EQ(2, num_idx_blk_seek
);
2821 ASSERT_TRUE(iter
->Valid());
2822 ASSERT_EQ(1, num_find_file_in_level
);
2823 ASSERT_EQ(3, num_idx_blk_seek
);
2826 ASSERT_TRUE(iter
->Valid());
2827 ASSERT_EQ(2, num_find_file_in_level
);
2828 // Still re-seek because "8" is the boundary key, which has
2829 // the same user key as the seek key.
2830 ASSERT_EQ(4, num_idx_blk_seek
);
2833 ASSERT_TRUE(iter
->Valid());
2834 ASSERT_EQ(3, num_find_file_in_level
);
2835 ASSERT_EQ(5, num_idx_blk_seek
);
2838 ASSERT_TRUE(iter
->Valid());
2839 ASSERT_EQ(3, num_find_file_in_level
);
2840 ASSERT_EQ(5, num_idx_blk_seek
);
2842 // Seek backward never triggers the index block seek to be skipped
2844 ASSERT_TRUE(iter
->Valid());
2845 ASSERT_EQ(3, num_find_file_in_level
);
2846 ASSERT_EQ(6, num_idx_blk_seek
);
2849 SyncPoint::GetInstance()->DisableProcessing();
2852 // MyRocks may change iterate bounds before seek. Simply test to make sure such
2853 // usage doesn't break iterator.
2854 TEST_P(DBIteratorTest
, IterateBoundChangedBeforeSeek
) {
2855 Options options
= CurrentOptions();
2856 options
.compression
= CompressionType::kNoCompression
;
2857 BlockBasedTableOptions table_options
;
2858 table_options
.block_size
= 100;
2859 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2860 std::string
value(50, 'v');
2862 ASSERT_OK(Put("aaa", value
));
2864 ASSERT_OK(Put("bbb", "v"));
2865 ASSERT_OK(Put("ccc", "v"));
2866 ASSERT_OK(Put("ddd", "v"));
2868 ASSERT_OK(Put("eee", "v"));
2870 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2872 std::string ub1
= "e";
2873 std::string ub2
= "c";
2875 ReadOptions read_opts1
;
2876 read_opts1
.iterate_upper_bound
= &ub
;
2877 Iterator
* iter
= NewIterator(read_opts1
);
2878 // Seek and iterate accross block boundary.
2880 ASSERT_TRUE(iter
->Valid());
2881 ASSERT_OK(iter
->status());
2882 ASSERT_EQ("bbb", iter
->key());
2885 ASSERT_TRUE(iter
->Valid());
2886 ASSERT_OK(iter
->status());
2887 ASSERT_EQ("bbb", iter
->key());
2889 ASSERT_FALSE(iter
->Valid());
2890 ASSERT_OK(iter
->status());
2893 std::string lb1
= "a";
2894 std::string lb2
= "c";
2896 ReadOptions read_opts2
;
2897 read_opts2
.iterate_lower_bound
= &lb
;
2898 iter
= NewIterator(read_opts2
);
2899 iter
->SeekForPrev("d");
2900 ASSERT_TRUE(iter
->Valid());
2901 ASSERT_OK(iter
->status());
2902 ASSERT_EQ("ccc", iter
->key());
2904 iter
->SeekForPrev("d");
2905 ASSERT_TRUE(iter
->Valid());
2906 ASSERT_OK(iter
->status());
2907 ASSERT_EQ("ccc", iter
->key());
2909 ASSERT_FALSE(iter
->Valid());
2910 ASSERT_OK(iter
->status());
2914 TEST_P(DBIteratorTest
, IterateWithLowerBoundAcrossFileBoundary
) {
2915 ASSERT_OK(Put("aaa", "v"));
2916 ASSERT_OK(Put("bbb", "v"));
2918 ASSERT_OK(Put("ccc", "v"));
2919 ASSERT_OK(Put("ddd", "v"));
2921 // Move both files to bottom level.
2922 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2923 Slice
lower_bound("b");
2924 ReadOptions read_opts
;
2925 read_opts
.iterate_lower_bound
= &lower_bound
;
2926 std::unique_ptr
<Iterator
> iter(NewIterator(read_opts
));
2927 iter
->SeekForPrev("d");
2928 ASSERT_TRUE(iter
->Valid());
2929 ASSERT_OK(iter
->status());
2930 ASSERT_EQ("ccc", iter
->key());
2932 ASSERT_TRUE(iter
->Valid());
2933 ASSERT_OK(iter
->status());
2934 ASSERT_EQ("bbb", iter
->key());
2936 ASSERT_FALSE(iter
->Valid());
2937 ASSERT_OK(iter
->status());
2940 TEST_P(DBIteratorTest
, Blob
) {
2941 Options options
= CurrentOptions();
2942 options
.enable_blob_files
= true;
2943 options
.max_sequential_skip_in_iterations
= 2;
2944 options
.statistics
= CreateDBStatistics();
2948 // Note: we have 4 KVs (3 of which are hidden) for key "b" and
2949 // max_sequential_skip_in_iterations is set to 2. Thus, we need to do a reseek
2950 // anytime we move from "b" to "c" or vice versa.
2951 ASSERT_OK(Put("a", "va"));
2953 ASSERT_OK(Put("b", "vb0"));
2955 ASSERT_OK(Put("b", "vb1"));
2957 ASSERT_OK(Put("b", "vb2"));
2959 ASSERT_OK(Put("b", "vb3"));
2961 ASSERT_OK(Put("c", "vc"));
2964 std::unique_ptr
<Iterator
> iter_guard(NewIterator(ReadOptions()));
2965 Iterator
* const iter
= iter_guard
.get();
2967 iter
->SeekToFirst();
2968 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
2969 ASSERT_EQ(IterStatus(iter
), "a->va");
2971 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
2972 ASSERT_EQ(IterStatus(iter
), "b->vb3");
2974 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
2975 ASSERT_EQ(IterStatus(iter
), "c->vc");
2977 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
2978 ASSERT_EQ(IterStatus(iter
), "(invalid)");
2979 iter
->SeekToFirst();
2980 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
2981 ASSERT_EQ(IterStatus(iter
), "a->va");
2983 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
2984 ASSERT_EQ(IterStatus(iter
), "(invalid)");
2987 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
2988 ASSERT_EQ(IterStatus(iter
), "c->vc");
2990 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
2991 ASSERT_EQ(IterStatus(iter
), "b->vb3");
2993 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
2994 ASSERT_EQ(IterStatus(iter
), "a->va");
2996 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
2997 ASSERT_EQ(IterStatus(iter
), "(invalid)");
2999 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
3000 ASSERT_EQ(IterStatus(iter
), "c->vc");
3002 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
3003 ASSERT_EQ(IterStatus(iter
), "(invalid)");
3006 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
3007 ASSERT_EQ(IterStatus(iter
), "a->va");
3009 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
3010 ASSERT_EQ(IterStatus(iter
), "a->va");
3012 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
3013 ASSERT_EQ(IterStatus(iter
), "b->vb3");
3015 iter
->SeekForPrev("d");
3016 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
3017 ASSERT_EQ(IterStatus(iter
), "c->vc");
3018 iter
->SeekForPrev("c");
3019 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 2);
3020 ASSERT_EQ(IterStatus(iter
), "c->vc");
3021 iter
->SeekForPrev("bx");
3022 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 3);
3023 ASSERT_EQ(IterStatus(iter
), "b->vb3");
3026 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 3);
3027 ASSERT_EQ(IterStatus(iter
), "b->vb3");
3029 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 3);
3030 ASSERT_EQ(IterStatus(iter
), "(invalid)");
3031 iter
->SeekForPrev("b");
3032 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 4);
3033 ASSERT_EQ(IterStatus(iter
), "b->vb3");
3034 iter
->SeekForPrev("");
3035 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 4);
3036 ASSERT_EQ(IterStatus(iter
), "(invalid)");
3038 // Switch from reverse to forward
3040 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 4);
3042 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 5);
3044 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 5);
3046 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 6);
3047 ASSERT_EQ(IterStatus(iter
), "b->vb3");
3049 // Switch from forward to reverse
3050 iter
->SeekToFirst();
3051 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 6);
3053 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 6);
3055 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 7);
3057 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 8);
3058 ASSERT_EQ(IterStatus(iter
), "b->vb3");
3061 INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance
, DBIteratorTest
,
3062 testing::Values(true, false));
3064 // Tests how DBIter work with ReadCallback
3065 class DBIteratorWithReadCallbackTest
: public DBIteratorTest
{};
3067 TEST_F(DBIteratorWithReadCallbackTest
, ReadCallback
) {
3068 class TestReadCallback
: public ReadCallback
{
3070 explicit TestReadCallback(SequenceNumber _max_visible_seq
)
3071 : ReadCallback(_max_visible_seq
) {}
3073 bool IsVisibleFullCheck(SequenceNumber seq
) override
{
3074 return seq
<= max_visible_seq_
;
3078 ASSERT_OK(Put("foo", "v1"));
3079 ASSERT_OK(Put("foo", "v2"));
3080 ASSERT_OK(Put("foo", "v3"));
3081 ASSERT_OK(Put("a", "va"));
3082 ASSERT_OK(Put("z", "vz"));
3083 SequenceNumber seq1
= db_
->GetLatestSequenceNumber();
3084 TestReadCallback
callback1(seq1
);
3085 ASSERT_OK(Put("foo", "v4"));
3086 ASSERT_OK(Put("foo", "v5"));
3087 ASSERT_OK(Put("bar", "v7"));
3089 SequenceNumber seq2
= db_
->GetLatestSequenceNumber();
3091 static_cast_with_check
<ColumnFamilyHandleImpl
>(db_
->DefaultColumnFamily())
3093 // The iterator are suppose to see data before seq1.
3095 dbfull()->NewIteratorImpl(ReadOptions(), cfd
, seq2
, &callback1
);
3098 // The latest value of "foo" before seq1 is "v3"
3100 ASSERT_TRUE(iter
->Valid());
3101 ASSERT_OK(iter
->status());
3102 ASSERT_EQ("foo", iter
->key());
3103 ASSERT_EQ("v3", iter
->value());
3104 // "bar" is not visible to the iterator. It will move on to the next key
3107 ASSERT_TRUE(iter
->Valid());
3108 ASSERT_OK(iter
->status());
3109 ASSERT_EQ("foo", iter
->key());
3110 ASSERT_EQ("v3", iter
->value());
3115 ASSERT_TRUE(iter
->Valid());
3116 ASSERT_OK(iter
->status());
3117 ASSERT_EQ("va", iter
->value());
3118 // "bar" is not visible to the iterator. It will move on to the next key
3121 ASSERT_TRUE(iter
->Valid());
3122 ASSERT_OK(iter
->status());
3123 ASSERT_EQ("foo", iter
->key());
3124 ASSERT_EQ("v3", iter
->value());
3129 ASSERT_TRUE(iter
->Valid());
3130 ASSERT_OK(iter
->status());
3131 ASSERT_EQ("vz", iter
->value());
3132 // The previous key is "foo", which is visible to the iterator.
3134 ASSERT_TRUE(iter
->Valid());
3135 ASSERT_OK(iter
->status());
3136 ASSERT_EQ("foo", iter
->key());
3137 ASSERT_EQ("v3", iter
->value());
3138 // "bar" is not visible to the iterator. It will move on to the next key "a".
3139 iter
->Prev(); // skipping "bar"
3140 ASSERT_TRUE(iter
->Valid());
3141 ASSERT_OK(iter
->status());
3142 ASSERT_EQ("a", iter
->key());
3143 ASSERT_EQ("va", iter
->value());
3146 // The previous key is "foo", which is visible to the iterator.
3147 iter
->SeekForPrev("y");
3148 ASSERT_TRUE(iter
->Valid());
3149 ASSERT_OK(iter
->status());
3150 ASSERT_EQ("foo", iter
->key());
3151 ASSERT_EQ("v3", iter
->value());
3152 // "bar" is not visible to the iterator. It will move on to the next key "a".
3153 iter
->SeekForPrev("bar");
3154 ASSERT_TRUE(iter
->Valid());
3155 ASSERT_OK(iter
->status());
3156 ASSERT_EQ("a", iter
->key());
3157 ASSERT_EQ("va", iter
->value());
3161 // Prev beyond max_sequential_skip_in_iterations
3162 uint64_t num_versions
=
3163 CurrentOptions().max_sequential_skip_in_iterations
+ 10;
3164 for (uint64_t i
= 0; i
< num_versions
; i
++) {
3165 ASSERT_OK(Put("bar", std::to_string(i
)));
3167 SequenceNumber seq3
= db_
->GetLatestSequenceNumber();
3168 TestReadCallback
callback2(seq3
);
3169 ASSERT_OK(Put("bar", "v8"));
3170 SequenceNumber seq4
= db_
->GetLatestSequenceNumber();
3172 // The iterator is suppose to see data before seq3.
3173 iter
= dbfull()->NewIteratorImpl(ReadOptions(), cfd
, seq4
, &callback2
);
3174 // Seek to "z", which is visible.
3176 ASSERT_TRUE(iter
->Valid());
3177 ASSERT_OK(iter
->status());
3178 ASSERT_EQ("vz", iter
->value());
3179 // Previous key is "foo" and the last value "v5" is visible.
3181 ASSERT_TRUE(iter
->Valid());
3182 ASSERT_OK(iter
->status());
3183 ASSERT_EQ("foo", iter
->key());
3184 ASSERT_EQ("v5", iter
->value());
3185 // Since the number of values of "bar" is more than
3186 // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
3187 // seek in forward direction. Here we test the fallback seek is correct.
3188 // The last visible value should be (num_versions - 1), as "v8" is not
3191 ASSERT_TRUE(iter
->Valid());
3192 ASSERT_OK(iter
->status());
3193 ASSERT_EQ("bar", iter
->key());
3194 ASSERT_EQ(std::to_string(num_versions
- 1), iter
->value());
3199 TEST_F(DBIteratorTest
, BackwardIterationOnInplaceUpdateMemtable
) {
3200 Options options
= CurrentOptions();
3201 options
.create_if_missing
= true;
3202 options
.inplace_update_support
= false;
3204 DestroyAndReopen(options
);
3205 constexpr int kNumKeys
= 10;
3207 // Write kNumKeys to WAL.
3208 for (int i
= 0; i
< kNumKeys
; ++i
) {
3209 ASSERT_OK(Put(Key(i
), "val"));
3211 ReadOptions read_opts
;
3212 read_opts
.total_order_seek
= true;
3214 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_opts
));
3216 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
3219 ASSERT_EQ(kNumKeys
, count
);
3222 // Reopen and rebuild the memtable from WAL.
3223 options
.create_if_missing
= false;
3224 options
.avoid_flush_during_recovery
= true;
3225 options
.inplace_update_support
= true;
3226 options
.allow_concurrent_memtable_write
= false;
3229 std::unique_ptr
<Iterator
> iter(db_
->NewIterator(read_opts
));
3231 // Backward iteration not supported due to inplace_update_support = true.
3232 ASSERT_TRUE(iter
->status().IsNotSupported());
3233 ASSERT_FALSE(iter
->Valid());
3237 TEST_F(DBIteratorTest
, IteratorRefreshReturnSV
) {
3238 Options options
= CurrentOptions();
3239 options
.disable_auto_compactions
= true;
3240 DestroyAndReopen(options
);
3242 db_
->DeleteRange(WriteOptions(), db_
->DefaultColumnFamily(), "a", "z"));
3243 std::unique_ptr
<Iterator
> iter
{db_
->NewIterator(ReadOptions())};
3244 SyncPoint::GetInstance()->SetCallBack(
3245 "ArenaWrappedDBIter::Refresh:SV", [&](void*) {
3246 ASSERT_OK(db_
->Put(WriteOptions(), "dummy", "new SV"));
3247 // This makes the local SV obselete.
3249 SyncPoint::GetInstance()->DisableProcessing();
3251 SyncPoint::GetInstance()->EnableProcessing();
3252 ASSERT_OK(iter
->Refresh());
3254 // iter used to not cleanup SV, so the Close() below would hit an assertion
3259 } // namespace ROCKSDB_NAMESPACE
3261 int main(int argc
, char** argv
) {
3262 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
3263 ::testing::InitGoogleTest(&argc
, argv
);
3264 return RUN_ALL_TESTS();