1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
12 #include "db/arena_wrapped_db_iter.h"
13 #include "db/db_iter.h"
14 #include "db/db_test_util.h"
15 #include "port/port.h"
16 #include "port/stack_trace.h"
17 #include "rocksdb/iostats_context.h"
18 #include "rocksdb/perf_context.h"
19 #include "table/block_based/flush_block_policy.h"
21 namespace ROCKSDB_NAMESPACE
{
23 // A dumb ReadCallback which saying every key is committed.
24 class DummyReadCallback
: public ReadCallback
{
26 DummyReadCallback() : ReadCallback(kMaxSequenceNumber
) {}
27 bool IsVisibleFullCheck(SequenceNumber
/*seq*/) override
{ return true; }
28 void SetSnapshot(SequenceNumber seq
) { max_visible_seq_
= seq
; }
32 // bool: whether to pass read_callback to NewIterator().
33 class DBIteratorTest
: public DBTestBase
,
34 public testing::WithParamInterface
<bool> {
36 DBIteratorTest() : DBTestBase("/db_iterator_test") {}
38 Iterator
* NewIterator(const ReadOptions
& read_options
,
39 ColumnFamilyHandle
* column_family
= nullptr) {
40 if (column_family
== nullptr) {
41 column_family
= db_
->DefaultColumnFamily();
43 auto* cfd
= reinterpret_cast<ColumnFamilyHandleImpl
*>(column_family
)->cfd();
44 SequenceNumber seq
= read_options
.snapshot
!= nullptr
45 ? read_options
.snapshot
->GetSequenceNumber()
46 : db_
->GetLatestSequenceNumber();
47 bool use_read_callback
= GetParam();
48 DummyReadCallback
* read_callback
= nullptr;
49 if (use_read_callback
) {
50 read_callback
= new DummyReadCallback();
51 read_callback
->SetSnapshot(seq
);
52 InstrumentedMutexLock
lock(&mutex_
);
53 read_callbacks_
.push_back(
54 std::unique_ptr
<DummyReadCallback
>(read_callback
));
56 return dbfull()->NewIteratorImpl(read_options
, cfd
, seq
, read_callback
);
60 InstrumentedMutex mutex_
;
61 std::vector
<std::unique_ptr
<DummyReadCallback
>> read_callbacks_
;
64 TEST_P(DBIteratorTest
, IteratorProperty
) {
65 // The test needs to be changed if kPersistedTier is supported in iterator.
66 Options options
= CurrentOptions();
67 CreateAndReopenWithCF({"pikachu"}, options
);
71 ropt
.pin_data
= false;
73 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
, handles_
[1]));
75 std::string prop_value
;
76 ASSERT_NOK(iter
->GetProperty("non_existing.value", &prop_value
));
77 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
78 ASSERT_EQ("0", prop_value
);
79 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
80 ASSERT_EQ("1", prop_value
);
82 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
83 ASSERT_EQ("Iterator is not valid.", prop_value
);
85 // Get internal key at which the iteration stopped (tombstone in this case).
86 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
87 ASSERT_EQ("2", prop_value
);
92 TEST_P(DBIteratorTest
, PersistedTierOnIterator
) {
93 // The test needs to be changed if kPersistedTier is supported in iterator.
94 Options options
= CurrentOptions();
95 CreateAndReopenWithCF({"pikachu"}, options
);
97 ropt
.read_tier
= kPersistedTier
;
99 auto* iter
= db_
->NewIterator(ropt
, handles_
[1]);
100 ASSERT_TRUE(iter
->status().IsNotSupported());
103 std::vector
<Iterator
*> iters
;
104 ASSERT_TRUE(db_
->NewIterators(ropt
, {handles_
[1]}, &iters
).IsNotSupported());
108 TEST_P(DBIteratorTest
, NonBlockingIteration
) {
110 ReadOptions non_blocking_opts
, regular_opts
;
111 Options options
= CurrentOptions();
112 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
113 non_blocking_opts
.read_tier
= kBlockCacheTier
;
114 CreateAndReopenWithCF({"pikachu"}, options
);
115 // write one kv to the database.
116 ASSERT_OK(Put(1, "a", "b"));
118 // scan using non-blocking iterator. We should find it because
119 // it is in memtable.
120 Iterator
* iter
= NewIterator(non_blocking_opts
, handles_
[1]);
122 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
123 ASSERT_OK(iter
->status());
129 // flush memtable to storage. Now, the key should not be in the
130 // memtable neither in the block cache.
133 // verify that a non-blocking iterator does not find any
134 // kvs. Neither does it do any IOs to storage.
135 uint64_t numopen
= TestGetTickerCount(options
, NO_FILE_OPENS
);
136 uint64_t cache_added
= TestGetTickerCount(options
, BLOCK_CACHE_ADD
);
137 iter
= NewIterator(non_blocking_opts
, handles_
[1]);
139 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
143 ASSERT_TRUE(iter
->status().IsIncomplete());
144 ASSERT_EQ(numopen
, TestGetTickerCount(options
, NO_FILE_OPENS
));
145 ASSERT_EQ(cache_added
, TestGetTickerCount(options
, BLOCK_CACHE_ADD
));
148 // read in the specified block via a regular get
149 ASSERT_EQ(Get(1, "a"), "b");
151 // verify that we can find it via a non-blocking scan
152 numopen
= TestGetTickerCount(options
, NO_FILE_OPENS
);
153 cache_added
= TestGetTickerCount(options
, BLOCK_CACHE_ADD
);
154 iter
= NewIterator(non_blocking_opts
, handles_
[1]);
156 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
157 ASSERT_OK(iter
->status());
161 ASSERT_EQ(numopen
, TestGetTickerCount(options
, NO_FILE_OPENS
));
162 ASSERT_EQ(cache_added
, TestGetTickerCount(options
, BLOCK_CACHE_ADD
));
165 // This test verifies block cache behaviors, which is not used by plain
167 } while (ChangeOptions(kSkipPlainTable
| kSkipNoSeekToLast
| kSkipMmapReads
));
170 TEST_P(DBIteratorTest
, IterSeekBeforePrev
) {
171 ASSERT_OK(Put("a", "b"));
172 ASSERT_OK(Put("c", "d"));
173 dbfull()->Flush(FlushOptions());
174 ASSERT_OK(Put("0", "f"));
175 ASSERT_OK(Put("1", "h"));
176 dbfull()->Flush(FlushOptions());
177 ASSERT_OK(Put("2", "j"));
178 auto iter
= NewIterator(ReadOptions());
179 iter
->Seek(Slice("c"));
181 iter
->Seek(Slice("a"));
186 TEST_P(DBIteratorTest
, IterReseekNewUpperBound
) {
188 Options options
= CurrentOptions();
189 BlockBasedTableOptions table_options
;
190 table_options
.block_size
= 1024;
191 table_options
.block_size_deviation
= 50;
192 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
193 options
.compression
= kNoCompression
;
196 ASSERT_OK(Put("a", RandomString(&rnd
, 400)));
197 ASSERT_OK(Put("aabb", RandomString(&rnd
, 400)));
198 ASSERT_OK(Put("aaef", RandomString(&rnd
, 400)));
199 ASSERT_OK(Put("b", RandomString(&rnd
, 400)));
200 dbfull()->Flush(FlushOptions());
202 Slice ub
= Slice("aa");
203 opts
.iterate_upper_bound
= &ub
;
204 auto iter
= NewIterator(opts
);
205 iter
->Seek(Slice("a"));
207 iter
->Seek(Slice("aabc"));
208 ASSERT_TRUE(iter
->Valid());
209 ASSERT_EQ(iter
->key().ToString(), "aaef");
213 TEST_P(DBIteratorTest
, IterSeekForPrevBeforeNext
) {
214 ASSERT_OK(Put("a", "b"));
215 ASSERT_OK(Put("c", "d"));
216 dbfull()->Flush(FlushOptions());
217 ASSERT_OK(Put("0", "f"));
218 ASSERT_OK(Put("1", "h"));
219 dbfull()->Flush(FlushOptions());
220 ASSERT_OK(Put("2", "j"));
221 auto iter
= NewIterator(ReadOptions());
222 iter
->SeekForPrev(Slice("0"));
224 iter
->SeekForPrev(Slice("1"));
230 std::string
MakeLongKey(size_t length
, char c
) {
231 return std::string(length
, c
);
235 TEST_P(DBIteratorTest
, IterLongKeys
) {
236 ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
237 ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
238 ASSERT_OK(Put("a", "b"));
239 dbfull()->Flush(FlushOptions());
240 ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
241 ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
242 ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
243 auto iter
= NewIterator(ReadOptions());
245 // Create a key that needs to be skipped for Seq too new
246 iter
->Seek(MakeLongKey(20, 0));
247 ASSERT_EQ(IterStatus(iter
), MakeLongKey(20, 0) + "->0");
249 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
251 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
253 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
255 ASSERT_EQ(IterStatus(iter
), MakeLongKey(64, 4) + "->4");
257 iter
->SeekForPrev(MakeLongKey(127, 3));
258 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
260 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
262 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
265 iter
= NewIterator(ReadOptions());
266 iter
->Seek(MakeLongKey(50, 1));
267 ASSERT_EQ(IterStatus(iter
), MakeLongKey(50, 1) + "->1");
269 ASSERT_EQ(IterStatus(iter
), MakeLongKey(32, 2) + "->2");
271 ASSERT_EQ(IterStatus(iter
), MakeLongKey(127, 3) + "->3");
275 TEST_P(DBIteratorTest
, IterNextWithNewerSeq
) {
276 ASSERT_OK(Put("0", "0"));
277 dbfull()->Flush(FlushOptions());
278 ASSERT_OK(Put("a", "b"));
279 ASSERT_OK(Put("c", "d"));
280 ASSERT_OK(Put("d", "e"));
281 auto iter
= NewIterator(ReadOptions());
283 // Create a key that needs to be skipped for Seq too new
284 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
286 ASSERT_OK(Put("b", "f"));
289 iter
->Seek(Slice("a"));
290 ASSERT_EQ(IterStatus(iter
), "a->b");
292 ASSERT_EQ(IterStatus(iter
), "c->d");
293 iter
->SeekForPrev(Slice("b"));
294 ASSERT_EQ(IterStatus(iter
), "a->b");
296 ASSERT_EQ(IterStatus(iter
), "c->d");
301 TEST_P(DBIteratorTest
, IterPrevWithNewerSeq
) {
302 ASSERT_OK(Put("0", "0"));
303 dbfull()->Flush(FlushOptions());
304 ASSERT_OK(Put("a", "b"));
305 ASSERT_OK(Put("c", "d"));
306 ASSERT_OK(Put("d", "e"));
307 auto iter
= NewIterator(ReadOptions());
309 // Create a key that needs to be skipped for Seq too new
310 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
312 ASSERT_OK(Put("b", "f"));
315 iter
->Seek(Slice("d"));
316 ASSERT_EQ(IterStatus(iter
), "d->e");
318 ASSERT_EQ(IterStatus(iter
), "c->d");
320 ASSERT_EQ(IterStatus(iter
), "a->b");
322 iter
->SeekForPrev(Slice("d"));
323 ASSERT_EQ(IterStatus(iter
), "d->e");
325 ASSERT_EQ(IterStatus(iter
), "c->d");
327 ASSERT_EQ(IterStatus(iter
), "a->b");
332 TEST_P(DBIteratorTest
, IterPrevWithNewerSeq2
) {
333 ASSERT_OK(Put("0", "0"));
334 dbfull()->Flush(FlushOptions());
335 ASSERT_OK(Put("a", "b"));
336 ASSERT_OK(Put("c", "d"));
337 ASSERT_OK(Put("e", "f"));
338 auto iter
= NewIterator(ReadOptions());
339 auto iter2
= NewIterator(ReadOptions());
340 iter
->Seek(Slice("c"));
341 iter2
->SeekForPrev(Slice("d"));
342 ASSERT_EQ(IterStatus(iter
), "c->d");
343 ASSERT_EQ(IterStatus(iter2
), "c->d");
345 // Create a key that needs to be skipped for Seq too new
346 for (uint64_t i
= 0; i
< last_options_
.max_sequential_skip_in_iterations
+ 1;
348 ASSERT_OK(Put("b", "f"));
352 ASSERT_EQ(IterStatus(iter
), "a->b");
355 ASSERT_EQ(IterStatus(iter2
), "a->b");
361 TEST_P(DBIteratorTest
, IterEmpty
) {
363 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
364 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
367 ASSERT_EQ(IterStatus(iter
), "(invalid)");
370 ASSERT_EQ(IterStatus(iter
), "(invalid)");
373 ASSERT_EQ(IterStatus(iter
), "(invalid)");
375 iter
->SeekForPrev("foo");
376 ASSERT_EQ(IterStatus(iter
), "(invalid)");
379 } while (ChangeCompactOptions());
382 TEST_P(DBIteratorTest
, IterSingle
) {
384 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
385 ASSERT_OK(Put(1, "a", "va"));
386 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
389 ASSERT_EQ(IterStatus(iter
), "a->va");
391 ASSERT_EQ(IterStatus(iter
), "(invalid)");
393 ASSERT_EQ(IterStatus(iter
), "a->va");
395 ASSERT_EQ(IterStatus(iter
), "(invalid)");
398 ASSERT_EQ(IterStatus(iter
), "a->va");
400 ASSERT_EQ(IterStatus(iter
), "(invalid)");
402 ASSERT_EQ(IterStatus(iter
), "a->va");
404 ASSERT_EQ(IterStatus(iter
), "(invalid)");
407 ASSERT_EQ(IterStatus(iter
), "a->va");
409 ASSERT_EQ(IterStatus(iter
), "(invalid)");
410 iter
->SeekForPrev("");
411 ASSERT_EQ(IterStatus(iter
), "(invalid)");
414 ASSERT_EQ(IterStatus(iter
), "a->va");
416 ASSERT_EQ(IterStatus(iter
), "(invalid)");
417 iter
->SeekForPrev("a");
418 ASSERT_EQ(IterStatus(iter
), "a->va");
420 ASSERT_EQ(IterStatus(iter
), "(invalid)");
423 ASSERT_EQ(IterStatus(iter
), "(invalid)");
424 iter
->SeekForPrev("b");
425 ASSERT_EQ(IterStatus(iter
), "a->va");
427 ASSERT_EQ(IterStatus(iter
), "(invalid)");
430 } while (ChangeCompactOptions());
433 TEST_P(DBIteratorTest
, IterMulti
) {
435 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
436 ASSERT_OK(Put(1, "a", "va"));
437 ASSERT_OK(Put(1, "b", "vb"));
438 ASSERT_OK(Put(1, "c", "vc"));
439 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
442 ASSERT_EQ(IterStatus(iter
), "a->va");
444 ASSERT_EQ(IterStatus(iter
), "b->vb");
446 ASSERT_EQ(IterStatus(iter
), "c->vc");
448 ASSERT_EQ(IterStatus(iter
), "(invalid)");
450 ASSERT_EQ(IterStatus(iter
), "a->va");
452 ASSERT_EQ(IterStatus(iter
), "(invalid)");
455 ASSERT_EQ(IterStatus(iter
), "c->vc");
457 ASSERT_EQ(IterStatus(iter
), "b->vb");
459 ASSERT_EQ(IterStatus(iter
), "a->va");
461 ASSERT_EQ(IterStatus(iter
), "(invalid)");
463 ASSERT_EQ(IterStatus(iter
), "c->vc");
465 ASSERT_EQ(IterStatus(iter
), "(invalid)");
468 ASSERT_EQ(IterStatus(iter
), "a->va");
470 ASSERT_EQ(IterStatus(iter
), "a->va");
472 ASSERT_EQ(IterStatus(iter
), "b->vb");
473 iter
->SeekForPrev("d");
474 ASSERT_EQ(IterStatus(iter
), "c->vc");
475 iter
->SeekForPrev("c");
476 ASSERT_EQ(IterStatus(iter
), "c->vc");
477 iter
->SeekForPrev("bx");
478 ASSERT_EQ(IterStatus(iter
), "b->vb");
481 ASSERT_EQ(IterStatus(iter
), "b->vb");
483 ASSERT_EQ(IterStatus(iter
), "(invalid)");
484 iter
->SeekForPrev("b");
485 ASSERT_EQ(IterStatus(iter
), "b->vb");
486 iter
->SeekForPrev("");
487 ASSERT_EQ(IterStatus(iter
), "(invalid)");
489 // Switch from reverse to forward
494 ASSERT_EQ(IterStatus(iter
), "b->vb");
496 // Switch from forward to reverse
501 ASSERT_EQ(IterStatus(iter
), "b->vb");
503 // Make sure iter stays at snapshot
504 ASSERT_OK(Put(1, "a", "va2"));
505 ASSERT_OK(Put(1, "a2", "va3"));
506 ASSERT_OK(Put(1, "b", "vb2"));
507 ASSERT_OK(Put(1, "c", "vc2"));
508 ASSERT_OK(Delete(1, "b"));
510 ASSERT_EQ(IterStatus(iter
), "a->va");
512 ASSERT_EQ(IterStatus(iter
), "b->vb");
514 ASSERT_EQ(IterStatus(iter
), "c->vc");
516 ASSERT_EQ(IterStatus(iter
), "(invalid)");
518 ASSERT_EQ(IterStatus(iter
), "c->vc");
520 ASSERT_EQ(IterStatus(iter
), "b->vb");
522 ASSERT_EQ(IterStatus(iter
), "a->va");
524 ASSERT_EQ(IterStatus(iter
), "(invalid)");
527 } while (ChangeCompactOptions());
530 // Check that we can skip over a run of user keys
531 // by using reseek rather than sequential scan
532 TEST_P(DBIteratorTest
, IterReseek
) {
533 anon::OptionsOverride options_override
;
534 options_override
.skip_policy
= kSkipNoSnapshot
;
535 Options options
= CurrentOptions(options_override
);
536 options
.max_sequential_skip_in_iterations
= 3;
537 options
.create_if_missing
= true;
538 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
539 DestroyAndReopen(options
);
540 CreateAndReopenWithCF({"pikachu"}, options
);
542 // insert three keys with same userkey and verify that
543 // reseek is not invoked. For each of these test cases,
544 // verify that we can find the next key "b".
545 ASSERT_OK(Put(1, "a", "zero"));
546 ASSERT_OK(Put(1, "a", "one"));
547 ASSERT_OK(Put(1, "a", "two"));
548 ASSERT_OK(Put(1, "b", "bone"));
549 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
551 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
552 ASSERT_EQ(IterStatus(iter
), "a->two");
554 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
555 ASSERT_EQ(IterStatus(iter
), "b->bone");
558 // insert a total of three keys with same userkey and verify
559 // that reseek is still not invoked.
560 ASSERT_OK(Put(1, "a", "three"));
561 iter
= NewIterator(ReadOptions(), handles_
[1]);
563 ASSERT_EQ(IterStatus(iter
), "a->three");
565 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
566 ASSERT_EQ(IterStatus(iter
), "b->bone");
569 // insert a total of four keys with same userkey and verify
570 // that reseek is invoked.
571 ASSERT_OK(Put(1, "a", "four"));
572 iter
= NewIterator(ReadOptions(), handles_
[1]);
574 ASSERT_EQ(IterStatus(iter
), "a->four");
575 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 0);
577 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
), 1);
578 ASSERT_EQ(IterStatus(iter
), "b->bone");
581 // Testing reverse iterator
582 // At this point, we have three versions of "a" and one version of "b".
583 // The reseek statistics is already at 1.
584 int num_reseeks
= static_cast<int>(
585 TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
));
587 // Insert another version of b and assert that reseek is not invoked
588 ASSERT_OK(Put(1, "b", "btwo"));
589 iter
= NewIterator(ReadOptions(), handles_
[1]);
591 ASSERT_EQ(IterStatus(iter
), "b->btwo");
592 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
595 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
597 ASSERT_EQ(IterStatus(iter
), "a->four");
600 // insert two more versions of b. This makes a total of 4 versions
601 // of b and 4 versions of a.
602 ASSERT_OK(Put(1, "b", "bthree"));
603 ASSERT_OK(Put(1, "b", "bfour"));
604 iter
= NewIterator(ReadOptions(), handles_
[1]);
606 ASSERT_EQ(IterStatus(iter
), "b->bfour");
607 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
611 // the previous Prev call should have invoked reseek
612 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_OF_RESEEKS_IN_ITERATION
),
614 ASSERT_EQ(IterStatus(iter
), "a->four");
618 TEST_P(DBIteratorTest
, IterSmallAndLargeMix
) {
620 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
621 ASSERT_OK(Put(1, "a", "va"));
622 ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
623 ASSERT_OK(Put(1, "c", "vc"));
624 ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
625 ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
627 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
630 ASSERT_EQ(IterStatus(iter
), "a->va");
632 ASSERT_EQ(IterStatus(iter
), "b->" + std::string(100000, 'b'));
634 ASSERT_EQ(IterStatus(iter
), "c->vc");
636 ASSERT_EQ(IterStatus(iter
), "d->" + std::string(100000, 'd'));
638 ASSERT_EQ(IterStatus(iter
), "e->" + std::string(100000, 'e'));
640 ASSERT_EQ(IterStatus(iter
), "(invalid)");
643 ASSERT_EQ(IterStatus(iter
), "e->" + std::string(100000, 'e'));
645 ASSERT_EQ(IterStatus(iter
), "d->" + std::string(100000, 'd'));
647 ASSERT_EQ(IterStatus(iter
), "c->vc");
649 ASSERT_EQ(IterStatus(iter
), "b->" + std::string(100000, 'b'));
651 ASSERT_EQ(IterStatus(iter
), "a->va");
653 ASSERT_EQ(IterStatus(iter
), "(invalid)");
656 } while (ChangeCompactOptions());
659 TEST_P(DBIteratorTest
, IterMultiWithDelete
) {
661 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
662 ASSERT_OK(Put(1, "ka", "va"));
663 ASSERT_OK(Put(1, "kb", "vb"));
664 ASSERT_OK(Put(1, "kc", "vc"));
665 ASSERT_OK(Delete(1, "kb"));
666 ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
668 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
670 ASSERT_EQ(IterStatus(iter
), "kc->vc");
671 if (!CurrentOptions().merge_operator
) {
672 // TODO: merge operator does not support backward iteration yet
673 if (kPlainTableAllBytesPrefix
!= option_config_
&&
674 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
675 kHashLinkList
!= option_config_
&&
676 kHashSkipList
!= option_config_
) { // doesn't support SeekToLast
678 ASSERT_EQ(IterStatus(iter
), "ka->va");
682 } while (ChangeOptions());
685 TEST_P(DBIteratorTest
, IterPrevMaxSkip
) {
687 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
688 for (int i
= 0; i
< 2; i
++) {
689 ASSERT_OK(Put(1, "key1", "v1"));
690 ASSERT_OK(Put(1, "key2", "v2"));
691 ASSERT_OK(Put(1, "key3", "v3"));
692 ASSERT_OK(Put(1, "key4", "v4"));
693 ASSERT_OK(Put(1, "key5", "v5"));
696 VerifyIterLast("key5->v5", 1);
698 ASSERT_OK(Delete(1, "key5"));
699 VerifyIterLast("key4->v4", 1);
701 ASSERT_OK(Delete(1, "key4"));
702 VerifyIterLast("key3->v3", 1);
704 ASSERT_OK(Delete(1, "key3"));
705 VerifyIterLast("key2->v2", 1);
707 ASSERT_OK(Delete(1, "key2"));
708 VerifyIterLast("key1->v1", 1);
710 ASSERT_OK(Delete(1, "key1"));
711 VerifyIterLast("(invalid)", 1);
712 } while (ChangeOptions(kSkipMergePut
| kSkipNoSeekToLast
));
715 TEST_P(DBIteratorTest
, IterWithSnapshot
) {
716 anon::OptionsOverride options_override
;
717 options_override
.skip_policy
= kSkipNoSnapshot
;
719 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override
));
720 ASSERT_OK(Put(1, "key1", "val1"));
721 ASSERT_OK(Put(1, "key2", "val2"));
722 ASSERT_OK(Put(1, "key3", "val3"));
723 ASSERT_OK(Put(1, "key4", "val4"));
724 ASSERT_OK(Put(1, "key5", "val5"));
726 const Snapshot
* snapshot
= db_
->GetSnapshot();
728 options
.snapshot
= snapshot
;
729 Iterator
* iter
= NewIterator(options
, handles_
[1]);
731 ASSERT_OK(Put(1, "key0", "val0"));
732 // Put more values after the snapshot
733 ASSERT_OK(Put(1, "key100", "val100"));
734 ASSERT_OK(Put(1, "key101", "val101"));
737 ASSERT_EQ(IterStatus(iter
), "key5->val5");
738 if (!CurrentOptions().merge_operator
) {
739 // TODO: merge operator does not support backward iteration yet
740 if (kPlainTableAllBytesPrefix
!= option_config_
&&
741 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
742 kHashLinkList
!= option_config_
&& kHashSkipList
!= option_config_
) {
744 ASSERT_EQ(IterStatus(iter
), "key4->val4");
746 ASSERT_EQ(IterStatus(iter
), "key3->val3");
749 ASSERT_EQ(IterStatus(iter
), "key4->val4");
751 ASSERT_EQ(IterStatus(iter
), "key5->val5");
754 ASSERT_TRUE(!iter
->Valid());
757 if (!CurrentOptions().merge_operator
) {
758 // TODO(gzh): merge operator does not support backward iteration yet
759 if (kPlainTableAllBytesPrefix
!= option_config_
&&
760 kBlockBasedTableWithWholeKeyHashIndex
!= option_config_
&&
761 kHashLinkList
!= option_config_
&& kHashSkipList
!= option_config_
) {
762 iter
->SeekForPrev("key1");
763 ASSERT_EQ(IterStatus(iter
), "key1->val1");
765 ASSERT_EQ(IterStatus(iter
), "key2->val2");
767 ASSERT_EQ(IterStatus(iter
), "key3->val3");
769 ASSERT_EQ(IterStatus(iter
), "key2->val2");
771 ASSERT_EQ(IterStatus(iter
), "key1->val1");
773 ASSERT_TRUE(!iter
->Valid());
776 db_
->ReleaseSnapshot(snapshot
);
778 } while (ChangeOptions());
781 TEST_P(DBIteratorTest
, IteratorPinsRef
) {
783 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
784 Put(1, "foo", "hello");
786 // Get iterator that will yield the current contents of the DB.
787 Iterator
* iter
= NewIterator(ReadOptions(), handles_
[1]);
789 // Write to force compactions
790 Put(1, "foo", "newvalue1");
791 for (int i
= 0; i
< 100; i
++) {
793 ASSERT_OK(Put(1, Key(i
), Key(i
) + std::string(100000, 'v')));
795 Put(1, "foo", "newvalue2");
798 ASSERT_TRUE(iter
->Valid());
799 ASSERT_EQ("foo", iter
->key().ToString());
800 ASSERT_EQ("hello", iter
->value().ToString());
802 ASSERT_TRUE(!iter
->Valid());
804 } while (ChangeCompactOptions());
807 TEST_P(DBIteratorTest
, IteratorDeleteAfterCfDelete
) {
808 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
810 Put(1, "foo", "delete-cf-then-delete-iter");
811 Put(1, "hello", "value2");
813 ColumnFamilyHandle
* cf
= handles_
[1];
816 auto* iter
= db_
->NewIterator(ro
, cf
);
818 ASSERT_EQ(IterStatus(iter
), "foo->delete-cf-then-delete-iter");
821 db_
->DestroyColumnFamilyHandle(cf
);
822 handles_
.erase(std::begin(handles_
) + 1);
824 // delete Iterator after CF handle is deleted
826 ASSERT_EQ(IterStatus(iter
), "hello->value2");
830 TEST_P(DBIteratorTest
, IteratorDeleteAfterCfDrop
) {
831 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
833 Put(1, "foo", "drop-cf-then-delete-iter");
836 ColumnFamilyHandle
* cf
= handles_
[1];
838 auto* iter
= db_
->NewIterator(ro
, cf
);
840 ASSERT_EQ(IterStatus(iter
), "foo->drop-cf-then-delete-iter");
842 // drop and delete CF
843 db_
->DropColumnFamily(cf
);
844 db_
->DestroyColumnFamilyHandle(cf
);
845 handles_
.erase(std::begin(handles_
) + 1);
847 // delete Iterator after CF handle is dropped
851 // SetOptions not defined in ROCKSDB LITE
853 TEST_P(DBIteratorTest
, DBIteratorBoundTest
) {
854 Options options
= CurrentOptions();
856 options
.create_if_missing
= true;
858 options
.prefix_extractor
= nullptr;
859 DestroyAndReopen(options
);
860 ASSERT_OK(Put("a", "0"));
861 ASSERT_OK(Put("foo", "bar"));
862 ASSERT_OK(Put("foo1", "bar1"));
863 ASSERT_OK(Put("g1", "0"));
865 // testing basic case with no iterate_upper_bound and no prefix_extractor
868 ro
.iterate_upper_bound
= nullptr;
870 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
874 ASSERT_TRUE(iter
->Valid());
875 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
878 ASSERT_TRUE(iter
->Valid());
879 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
882 ASSERT_TRUE(iter
->Valid());
883 ASSERT_EQ(iter
->key().compare(Slice("g1")), 0);
885 iter
->SeekForPrev("g1");
887 ASSERT_TRUE(iter
->Valid());
888 ASSERT_EQ(iter
->key().compare(Slice("g1")), 0);
891 ASSERT_TRUE(iter
->Valid());
892 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
895 ASSERT_TRUE(iter
->Valid());
896 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
899 // testing iterate_upper_bound and forward iterator
900 // to make sure it stops at bound
903 // iterate_upper_bound points beyond the last expected entry
904 Slice
prefix("foo2");
905 ro
.iterate_upper_bound
= &prefix
;
907 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
911 ASSERT_TRUE(iter
->Valid());
912 ASSERT_EQ(iter
->key().compare(Slice("foo")), 0);
915 ASSERT_TRUE(iter
->Valid());
916 ASSERT_EQ(iter
->key().compare(("foo1")), 0);
919 // should stop here...
920 ASSERT_TRUE(!iter
->Valid());
922 // Testing SeekToLast with iterate_upper_bound set
927 ro
.iterate_upper_bound
= &prefix
;
929 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
932 ASSERT_TRUE(iter
->Valid());
933 ASSERT_EQ(iter
->key().compare(Slice("a")), 0);
936 // prefix is the first letter of the key
937 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
938 ASSERT_OK(Put("a", "0"));
939 ASSERT_OK(Put("foo", "bar"));
940 ASSERT_OK(Put("foo1", "bar1"));
941 ASSERT_OK(Put("g1", "0"));
943 // testing with iterate_upper_bound and prefix_extractor
944 // Seek target and iterate_upper_bound are not is same prefix
945 // This should be an error
948 Slice
upper_bound("g");
949 ro
.iterate_upper_bound
= &upper_bound
;
951 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
955 ASSERT_TRUE(iter
->Valid());
956 ASSERT_EQ("foo", iter
->key().ToString());
959 ASSERT_TRUE(iter
->Valid());
960 ASSERT_EQ("foo1", iter
->key().ToString());
963 ASSERT_TRUE(!iter
->Valid());
966 // testing that iterate_upper_bound prevents iterating over deleted items
967 // if the bound has already reached
969 options
.prefix_extractor
= nullptr;
970 DestroyAndReopen(options
);
971 ASSERT_OK(Put("a", "0"));
972 ASSERT_OK(Put("b", "0"));
973 ASSERT_OK(Put("b1", "0"));
974 ASSERT_OK(Put("c", "0"));
975 ASSERT_OK(Put("d", "0"));
976 ASSERT_OK(Put("e", "0"));
977 ASSERT_OK(Delete("c"));
978 ASSERT_OK(Delete("d"));
980 // base case with no bound
982 ro
.iterate_upper_bound
= nullptr;
984 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
987 ASSERT_TRUE(iter
->Valid());
988 ASSERT_EQ(iter
->key().compare(Slice("b")), 0);
991 ASSERT_TRUE(iter
->Valid());
992 ASSERT_EQ(iter
->key().compare(("b1")), 0);
994 get_perf_context()->Reset();
997 ASSERT_TRUE(iter
->Valid());
998 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count
), 2);
1000 // now testing with iterate_bound
1002 ro
.iterate_upper_bound
= &prefix
;
1004 iter
.reset(NewIterator(ro
));
1006 get_perf_context()->Reset();
1009 ASSERT_TRUE(iter
->Valid());
1010 ASSERT_EQ(iter
->key().compare(Slice("b")), 0);
1013 ASSERT_TRUE(iter
->Valid());
1014 ASSERT_EQ(iter
->key().compare(("b1")), 0);
1017 // the iteration should stop as soon as the bound key is reached
1018 // even though the key is deleted
1019 // hence internal_delete_skipped_count should be 0
1020 ASSERT_TRUE(!iter
->Valid());
1021 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count
), 0);
1025 TEST_P(DBIteratorTest
, DBIteratorBoundMultiSeek
) {
1026 Options options
= CurrentOptions();
1028 options
.create_if_missing
= true;
1029 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1030 options
.prefix_extractor
= nullptr;
1031 DestroyAndReopen(options
);
1032 ASSERT_OK(Put("a", "0"));
1033 ASSERT_OK(Put("z", "0"));
1035 ASSERT_OK(Put("foo1", "bar1"));
1036 ASSERT_OK(Put("foo2", "bar2"));
1037 ASSERT_OK(Put("foo3", "bar3"));
1038 ASSERT_OK(Put("foo4", "bar4"));
1041 std::string up_str
= "foo5";
1044 ro
.iterate_upper_bound
= &up
;
1045 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
1048 ASSERT_TRUE(iter
->Valid());
1049 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
1051 uint64_t prev_block_cache_hit
=
1052 TestGetTickerCount(options
, BLOCK_CACHE_HIT
);
1053 uint64_t prev_block_cache_miss
=
1054 TestGetTickerCount(options
, BLOCK_CACHE_MISS
);
1056 ASSERT_GT(prev_block_cache_hit
+ prev_block_cache_miss
, 0);
1059 ASSERT_TRUE(iter
->Valid());
1060 ASSERT_EQ(iter
->key().compare(Slice("foo4")), 0);
1061 ASSERT_EQ(prev_block_cache_hit
,
1062 TestGetTickerCount(options
, BLOCK_CACHE_HIT
));
1063 ASSERT_EQ(prev_block_cache_miss
,
1064 TestGetTickerCount(options
, BLOCK_CACHE_MISS
));
1067 ASSERT_TRUE(iter
->Valid());
1068 ASSERT_EQ(iter
->key().compare(Slice("foo2")), 0);
1070 ASSERT_TRUE(iter
->Valid());
1071 ASSERT_EQ(iter
->key().compare(Slice("foo3")), 0);
1072 ASSERT_EQ(prev_block_cache_hit
,
1073 TestGetTickerCount(options
, BLOCK_CACHE_HIT
));
1074 ASSERT_EQ(prev_block_cache_miss
,
1075 TestGetTickerCount(options
, BLOCK_CACHE_MISS
));
1080 TEST_P(DBIteratorTest
, DBIteratorBoundOptimizationTest
) {
1081 for (auto format_version
: {2, 3, 4}) {
1082 int upper_bound_hits
= 0;
1083 Options options
= CurrentOptions();
1084 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1085 "BlockBasedTableIterator:out_of_bound",
1086 [&upper_bound_hits
](void*) { upper_bound_hits
++; });
1087 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1089 options
.create_if_missing
= true;
1090 options
.prefix_extractor
= nullptr;
1091 BlockBasedTableOptions table_options
;
1092 table_options
.format_version
= format_version
;
1093 table_options
.flush_block_policy_factory
=
1094 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1095 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1097 DestroyAndReopen(options
);
1098 ASSERT_OK(Put("foo1", "bar1"));
1099 ASSERT_OK(Put("foo2", "bar2"));
1100 ASSERT_OK(Put("foo4", "bar4"));
1105 ro
.iterate_upper_bound
= &ub
;
1107 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
1110 ASSERT_TRUE(iter
->Valid());
1111 ASSERT_EQ(iter
->key().compare(Slice("foo1")), 0);
1112 ASSERT_EQ(upper_bound_hits
, 0);
1115 ASSERT_TRUE(iter
->Valid());
1116 ASSERT_EQ(iter
->key().compare(Slice("foo2")), 0);
1117 ASSERT_EQ(upper_bound_hits
, 0);
1120 ASSERT_FALSE(iter
->Valid());
1121 ASSERT_EQ(upper_bound_hits
, 1);
1125 // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
1126 // they don't do unnecessary block reads.
1127 TEST_P(DBIteratorTest
, IndexWithFirstKey
) {
1128 for (int tailing
= 0; tailing
< 2; ++tailing
) {
1129 SCOPED_TRACE("tailing = " + std::to_string(tailing
));
1130 Options options
= CurrentOptions();
1132 options
.create_if_missing
= true;
1133 options
.prefix_extractor
= nullptr;
1134 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1135 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1136 Statistics
* stats
= options
.statistics
.get();
1137 BlockBasedTableOptions table_options
;
1138 table_options
.index_type
=
1139 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey
;
1140 table_options
.index_shortening
=
1141 BlockBasedTableOptions::IndexShorteningMode::kNoShortening
;
1142 table_options
.flush_block_policy_factory
=
1143 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1144 table_options
.block_cache
=
1145 NewLRUCache(8000); // fits all blocks and their cache metadata overhead
1146 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1148 DestroyAndReopen(options
);
1149 ASSERT_OK(Merge("a1", "x1"));
1150 ASSERT_OK(Merge("b1", "y1"));
1151 ASSERT_OK(Merge("c0", "z1"));
1153 ASSERT_OK(Merge("a2", "x2"));
1154 ASSERT_OK(Merge("b2", "y2"));
1155 ASSERT_OK(Merge("c0", "z2"));
1157 ASSERT_OK(Merge("a3", "x3"));
1158 ASSERT_OK(Merge("b3", "y3"));
1159 ASSERT_OK(Merge("c3", "z3"));
1162 // Block cache is not important for this test.
1163 // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
1164 // available way of counting block accesses.
1167 ropt
.tailing
= tailing
;
1168 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
));
1171 ASSERT_TRUE(iter
->Valid());
1172 EXPECT_EQ("b2", iter
->key().ToString());
1173 EXPECT_EQ("y2", iter
->value().ToString());
1174 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1177 ASSERT_TRUE(iter
->Valid());
1178 EXPECT_EQ("b3", iter
->key().ToString());
1179 EXPECT_EQ("y3", iter
->value().ToString());
1180 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1181 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1184 ASSERT_TRUE(iter
->Valid());
1185 EXPECT_EQ("c0", iter
->key().ToString());
1186 EXPECT_EQ("z1,z2", iter
->value().ToString());
1187 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1188 EXPECT_EQ(4, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1191 ASSERT_TRUE(iter
->Valid());
1192 EXPECT_EQ("c3", iter
->key().ToString());
1193 EXPECT_EQ("z3", iter
->value().ToString());
1194 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1195 EXPECT_EQ(5, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1199 // Enable iterate_upper_bound and check that iterator is not trying to read
1200 // blocks that are fully above upper bound.
1201 std::string ub
= "b3";
1203 ropt
.iterate_upper_bound
= &ub_slice
;
1204 iter
.reset(NewIterator(ropt
));
1207 ASSERT_TRUE(iter
->Valid());
1208 EXPECT_EQ("b2", iter
->key().ToString());
1209 EXPECT_EQ("y2", iter
->value().ToString());
1210 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1211 EXPECT_EQ(5, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1214 ASSERT_FALSE(iter
->Valid());
1215 EXPECT_EQ(1, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1216 EXPECT_EQ(5, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1220 TEST_P(DBIteratorTest
, IndexWithFirstKeyGet
) {
1221 Options options
= CurrentOptions();
1223 options
.create_if_missing
= true;
1224 options
.prefix_extractor
= nullptr;
1225 options
.merge_operator
= MergeOperators::CreateStringAppendOperator();
1226 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
1227 Statistics
* stats
= options
.statistics
.get();
1228 BlockBasedTableOptions table_options
;
1229 table_options
.index_type
=
1230 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey
;
1231 table_options
.index_shortening
=
1232 BlockBasedTableOptions::IndexShorteningMode::kNoShortening
;
1233 table_options
.flush_block_policy_factory
=
1234 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
1235 table_options
.block_cache
= NewLRUCache(1000); // fits all blocks
1236 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1238 DestroyAndReopen(options
);
1239 ASSERT_OK(Merge("a", "x1"));
1240 ASSERT_OK(Merge("c", "y1"));
1241 ASSERT_OK(Merge("e", "z1"));
1243 ASSERT_OK(Merge("c", "y2"));
1244 ASSERT_OK(Merge("e", "z2"));
1247 // Get() between blocks shouldn't read any blocks.
1248 ASSERT_EQ("NOT_FOUND", Get("b"));
1249 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1250 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1252 // Get() of an existing key shouldn't read any unnecessary blocks when there's
1253 // only one key per block.
1255 ASSERT_EQ("y1,y2", Get("c"));
1256 EXPECT_EQ(2, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1257 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1259 ASSERT_EQ("x1", Get("a"));
1260 EXPECT_EQ(3, stats
->getTickerCount(BLOCK_CACHE_DATA_MISS
));
1261 EXPECT_EQ(0, stats
->getTickerCount(BLOCK_CACHE_DATA_HIT
));
1263 EXPECT_EQ(std::vector
<std::string
>({"NOT_FOUND", "z1,z2"}),
1264 MultiGet({"b", "e"}));
1267 // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1268 // return the biggest key which is smaller than the seek key.
1269 TEST_P(DBIteratorTest
, PrevAfterAndNextAfterMerge
) {
1271 options
.create_if_missing
= true;
1272 options
.merge_operator
= MergeOperators::CreatePutOperator();
1274 DestroyAndReopen(options
);
1276 // write three entries with different keys using Merge()
1278 db_
->Merge(wopts
, "1", "data1");
1279 db_
->Merge(wopts
, "2", "data2");
1280 db_
->Merge(wopts
, "3", "data3");
1282 std::unique_ptr
<Iterator
> it(NewIterator(ReadOptions()));
1285 ASSERT_TRUE(it
->Valid());
1286 ASSERT_EQ("2", it
->key().ToString());
1289 ASSERT_TRUE(it
->Valid());
1290 ASSERT_EQ("1", it
->key().ToString());
1292 it
->SeekForPrev("1");
1293 ASSERT_TRUE(it
->Valid());
1294 ASSERT_EQ("1", it
->key().ToString());
1297 ASSERT_TRUE(it
->Valid());
1298 ASSERT_EQ("2", it
->key().ToString());
1301 class DBIteratorTestForPinnedData
: public DBIteratorTest
{
1306 COMPACT_BEFORE_READ
,
1310 DBIteratorTestForPinnedData() : DBIteratorTest() {}
1311 void PinnedDataIteratorRandomized(TestConfig run_config
) {
1312 // Generate Random data
1316 int key_pool
= static_cast<int>(puts
* 0.7);
1318 int val_size
= 1000;
1319 int seeks_percentage
= 20; // 20% of keys will be used to test seek()
1320 int delete_percentage
= 20; // 20% of keys will be deleted
1321 int merge_percentage
= 20; // 20% of keys will be added using Merge()
1323 Options options
= CurrentOptions();
1324 BlockBasedTableOptions table_options
;
1325 table_options
.use_delta_encoding
= false;
1326 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1327 options
.merge_operator
= MergeOperators::CreatePutOperator();
1328 DestroyAndReopen(options
);
1330 std::vector
<std::string
> generated_keys(key_pool
);
1331 for (int i
= 0; i
< key_pool
; i
++) {
1332 generated_keys
[i
] = RandomString(&rnd
, key_size
);
1335 std::map
<std::string
, std::string
> true_data
;
1336 std::vector
<std::string
> random_keys
;
1337 std::vector
<std::string
> deleted_keys
;
1338 for (int i
= 0; i
< puts
; i
++) {
1339 auto& k
= generated_keys
[rnd
.Next() % key_pool
];
1340 auto v
= RandomString(&rnd
, val_size
);
1342 // Insert data to true_data map and to DB
1344 if (rnd
.PercentTrue(merge_percentage
)) {
1345 ASSERT_OK(db_
->Merge(WriteOptions(), k
, v
));
1347 ASSERT_OK(Put(k
, v
));
1350 // Pick random keys to be used to test Seek()
1351 if (rnd
.PercentTrue(seeks_percentage
)) {
1352 random_keys
.push_back(k
);
1355 // Delete some random keys
1356 if (rnd
.PercentTrue(delete_percentage
)) {
1357 deleted_keys
.push_back(k
);
1359 ASSERT_OK(Delete(k
));
1362 if (run_config
== TestConfig::FLUSH_EVERY_1000
) {
1363 if (i
&& i
% 1000 == 0) {
1369 if (run_config
== TestConfig::CLOSE_AND_OPEN
) {
1372 } else if (run_config
== TestConfig::COMPACT_BEFORE_READ
) {
1373 db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr);
1378 auto iter
= NewIterator(ro
);
1381 // Test Seek to random keys
1382 std::vector
<Slice
> keys_slices
;
1383 std::vector
<std::string
> true_keys
;
1384 for (auto& k
: random_keys
) {
1386 if (!iter
->Valid()) {
1387 ASSERT_EQ(true_data
.lower_bound(k
), true_data
.end());
1390 std::string prop_value
;
1392 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1393 ASSERT_EQ("1", prop_value
);
1394 keys_slices
.push_back(iter
->key());
1395 true_keys
.push_back(true_data
.lower_bound(k
)->first
);
1398 for (size_t i
= 0; i
< keys_slices
.size(); i
++) {
1399 ASSERT_EQ(keys_slices
[i
].ToString(), true_keys
[i
]);
1404 // Test SeekForPrev to random keys
1405 std::vector
<Slice
> keys_slices
;
1406 std::vector
<std::string
> true_keys
;
1407 for (auto& k
: random_keys
) {
1408 iter
->SeekForPrev(k
);
1409 if (!iter
->Valid()) {
1410 ASSERT_EQ(true_data
.upper_bound(k
), true_data
.begin());
1413 std::string prop_value
;
1415 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1416 ASSERT_EQ("1", prop_value
);
1417 keys_slices
.push_back(iter
->key());
1418 true_keys
.push_back((--true_data
.upper_bound(k
))->first
);
1421 for (size_t i
= 0; i
< keys_slices
.size(); i
++) {
1422 ASSERT_EQ(keys_slices
[i
].ToString(), true_keys
[i
]);
1427 // Test iterating all data forward
1428 std::vector
<Slice
> all_keys
;
1429 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1430 std::string prop_value
;
1432 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1433 ASSERT_EQ("1", prop_value
);
1434 all_keys
.push_back(iter
->key());
1436 ASSERT_EQ(all_keys
.size(), true_data
.size());
1438 // Verify that all keys slices are valid
1439 auto data_iter
= true_data
.begin();
1440 for (size_t i
= 0; i
< all_keys
.size(); i
++) {
1441 ASSERT_EQ(all_keys
[i
].ToString(), data_iter
->first
);
1447 // Test iterating all data backward
1448 std::vector
<Slice
> all_keys
;
1449 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
1450 std::string prop_value
;
1452 iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1453 ASSERT_EQ("1", prop_value
);
1454 all_keys
.push_back(iter
->key());
1456 ASSERT_EQ(all_keys
.size(), true_data
.size());
1458 // Verify that all keys slices are valid (backward)
1459 auto data_iter
= true_data
.rbegin();
1460 for (size_t i
= 0; i
< all_keys
.size(); i
++) {
1461 ASSERT_EQ(all_keys
[i
].ToString(), data_iter
->first
);
1470 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedNormal
) {
1471 PinnedDataIteratorRandomized(TestConfig::NORMAL
);
1474 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedCLoseAndOpen
) {
1475 PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN
);
1478 TEST_P(DBIteratorTestForPinnedData
,
1479 PinnedDataIteratorRandomizedCompactBeforeRead
) {
1480 PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ
);
1483 TEST_P(DBIteratorTestForPinnedData
, PinnedDataIteratorRandomizedFlush
) {
1484 PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000
);
1487 #ifndef ROCKSDB_LITE
1488 TEST_P(DBIteratorTest
, PinnedDataIteratorMultipleFiles
) {
1489 Options options
= CurrentOptions();
1490 BlockBasedTableOptions table_options
;
1491 table_options
.use_delta_encoding
= false;
1492 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1493 options
.disable_auto_compactions
= true;
1494 options
.write_buffer_size
= 1024 * 1024 * 10; // 10 Mb
1495 DestroyAndReopen(options
);
1497 std::map
<std::string
, std::string
> true_data
;
1499 // Generate 4 sst files in L2
1501 for (int i
= 1; i
<= 1000; i
++) {
1502 std::string k
= Key(i
* 3);
1503 std::string v
= RandomString(&rnd
, 100);
1504 ASSERT_OK(Put(k
, v
));
1510 ASSERT_EQ(FilesPerLevel(0), "4");
1511 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1512 ASSERT_EQ(FilesPerLevel(0), "0,4");
1514 // Generate 4 sst files in L0
1515 for (int i
= 1; i
<= 1000; i
++) {
1516 std::string k
= Key(i
* 2);
1517 std::string v
= RandomString(&rnd
, 100);
1518 ASSERT_OK(Put(k
, v
));
1524 ASSERT_EQ(FilesPerLevel(0), "4,4");
1526 // Add some keys/values in memtables
1527 for (int i
= 1; i
<= 1000; i
++) {
1528 std::string k
= Key(i
);
1529 std::string v
= RandomString(&rnd
, 100);
1530 ASSERT_OK(Put(k
, v
));
1533 ASSERT_EQ(FilesPerLevel(0), "4,4");
1537 auto iter
= NewIterator(ro
);
1539 std::vector
<std::pair
<Slice
, std::string
>> results
;
1540 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1541 std::string prop_value
;
1542 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1543 ASSERT_EQ("1", prop_value
);
1544 results
.emplace_back(iter
->key(), iter
->value().ToString());
1547 ASSERT_EQ(results
.size(), true_data
.size());
1548 auto data_iter
= true_data
.begin();
1549 for (size_t i
= 0; i
< results
.size(); i
++, data_iter
++) {
1550 auto& kv
= results
[i
];
1551 ASSERT_EQ(kv
.first
, data_iter
->first
);
1552 ASSERT_EQ(kv
.second
, data_iter
->second
);
1559 TEST_P(DBIteratorTest
, PinnedDataIteratorMergeOperator
) {
1560 Options options
= CurrentOptions();
1561 BlockBasedTableOptions table_options
;
1562 table_options
.use_delta_encoding
= false;
1563 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1564 options
.merge_operator
= MergeOperators::CreateUInt64AddOperator();
1565 DestroyAndReopen(options
);
1567 std::string numbers
[7];
1568 for (int val
= 0; val
<= 6; val
++) {
1569 PutFixed64(numbers
+ val
, val
);
1572 // +1 all keys in range [ 0 => 999]
1573 for (int i
= 0; i
< 1000; i
++) {
1575 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[1]));
1578 // +2 all keys divisible by 2 in range [ 0 => 999]
1579 for (int i
= 0; i
< 1000; i
+= 2) {
1581 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[2]));
1584 // +3 all keys divisible by 5 in range [ 0 => 999]
1585 for (int i
= 0; i
< 1000; i
+= 5) {
1587 ASSERT_OK(db_
->Merge(wo
, Key(i
), numbers
[3]));
1592 auto iter
= NewIterator(ro
);
1594 std::vector
<std::pair
<Slice
, std::string
>> results
;
1595 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1596 std::string prop_value
;
1597 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1598 ASSERT_EQ("1", prop_value
);
1599 results
.emplace_back(iter
->key(), iter
->value().ToString());
1602 ASSERT_EQ(results
.size(), 1000);
1603 for (size_t i
= 0; i
< results
.size(); i
++) {
1604 auto& kv
= results
[i
];
1605 ASSERT_EQ(kv
.first
, Key(static_cast<int>(i
)));
1606 int expected_val
= 1;
1613 ASSERT_EQ(kv
.second
, numbers
[expected_val
]);
1619 TEST_P(DBIteratorTest
, PinnedDataIteratorReadAfterUpdate
) {
1620 Options options
= CurrentOptions();
1621 BlockBasedTableOptions table_options
;
1622 table_options
.use_delta_encoding
= false;
1623 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1624 options
.write_buffer_size
= 100000;
1625 DestroyAndReopen(options
);
1629 std::map
<std::string
, std::string
> true_data
;
1630 for (int i
= 0; i
< 1000; i
++) {
1631 std::string k
= RandomString(&rnd
, 10);
1632 std::string v
= RandomString(&rnd
, 1000);
1633 ASSERT_OK(Put(k
, v
));
1639 auto iter
= NewIterator(ro
);
1641 // Delete 50% of the keys and update the other 50%
1642 for (auto& kv
: true_data
) {
1644 ASSERT_OK(Delete(kv
.first
));
1646 std::string new_val
= RandomString(&rnd
, 1000);
1647 ASSERT_OK(Put(kv
.first
, new_val
));
1651 std::vector
<std::pair
<Slice
, std::string
>> results
;
1652 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
1653 std::string prop_value
;
1654 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value
));
1655 ASSERT_EQ("1", prop_value
);
1656 results
.emplace_back(iter
->key(), iter
->value().ToString());
1659 auto data_iter
= true_data
.begin();
1660 for (size_t i
= 0; i
< results
.size(); i
++, data_iter
++) {
1661 auto& kv
= results
[i
];
1662 ASSERT_EQ(kv
.first
, data_iter
->first
);
1663 ASSERT_EQ(kv
.second
, data_iter
->second
);
1669 class SliceTransformLimitedDomainGeneric
: public SliceTransform
{
1670 const char* Name() const override
{
1671 return "SliceTransformLimitedDomainGeneric";
1674 Slice
Transform(const Slice
& src
) const override
{
1675 return Slice(src
.data(), 1);
1678 bool InDomain(const Slice
& src
) const override
{
1679 // prefix will be x????
1680 return src
.size() >= 1;
1683 bool InRange(const Slice
& dst
) const override
{
1684 // prefix will be x????
1685 return dst
.size() == 1;
1689 TEST_P(DBIteratorTest
, IterSeekForPrevCrossingFiles
) {
1690 Options options
= CurrentOptions();
1691 options
.prefix_extractor
.reset(NewFixedPrefixTransform(1));
1692 options
.disable_auto_compactions
= true;
1693 // Enable prefix bloom for SST files
1694 BlockBasedTableOptions table_options
;
1695 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10, true));
1696 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1697 DestroyAndReopen(options
);
1699 ASSERT_OK(Put("a1", "va1"));
1700 ASSERT_OK(Put("a2", "va2"));
1701 ASSERT_OK(Put("a3", "va3"));
1704 ASSERT_OK(Put("b1", "vb1"));
1705 ASSERT_OK(Put("b2", "vb2"));
1706 ASSERT_OK(Put("b3", "vb3"));
1709 ASSERT_OK(Put("b4", "vb4"));
1710 ASSERT_OK(Put("d1", "vd1"));
1711 ASSERT_OK(Put("d2", "vd2"));
1712 ASSERT_OK(Put("d4", "vd4"));
1715 MoveFilesToLevel(1);
1718 Iterator
* iter
= NewIterator(ro
);
1720 iter
->SeekForPrev("a4");
1721 ASSERT_EQ(iter
->key().ToString(), "a3");
1722 ASSERT_EQ(iter
->value().ToString(), "va3");
1724 iter
->SeekForPrev("c2");
1725 ASSERT_EQ(iter
->key().ToString(), "b3");
1726 iter
->SeekForPrev("d3");
1727 ASSERT_EQ(iter
->key().ToString(), "d2");
1728 iter
->SeekForPrev("b5");
1729 ASSERT_EQ(iter
->key().ToString(), "b4");
1735 ro
.prefix_same_as_start
= true;
1736 Iterator
* iter
= NewIterator(ro
);
1737 iter
->SeekForPrev("c2");
1738 ASSERT_TRUE(!iter
->Valid());
1743 TEST_P(DBIteratorTest
, IterSeekForPrevCrossingFilesCustomPrefixExtractor
) {
1744 Options options
= CurrentOptions();
1745 options
.prefix_extractor
=
1746 std::make_shared
<SliceTransformLimitedDomainGeneric
>();
1747 options
.disable_auto_compactions
= true;
1748 // Enable prefix bloom for SST files
1749 BlockBasedTableOptions table_options
;
1750 table_options
.filter_policy
.reset(NewBloomFilterPolicy(10, true));
1751 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1752 DestroyAndReopen(options
);
1754 ASSERT_OK(Put("a1", "va1"));
1755 ASSERT_OK(Put("a2", "va2"));
1756 ASSERT_OK(Put("a3", "va3"));
1759 ASSERT_OK(Put("b1", "vb1"));
1760 ASSERT_OK(Put("b2", "vb2"));
1761 ASSERT_OK(Put("b3", "vb3"));
1764 ASSERT_OK(Put("b4", "vb4"));
1765 ASSERT_OK(Put("d1", "vd1"));
1766 ASSERT_OK(Put("d2", "vd2"));
1767 ASSERT_OK(Put("d4", "vd4"));
1770 MoveFilesToLevel(1);
1773 Iterator
* iter
= NewIterator(ro
);
1775 iter
->SeekForPrev("a4");
1776 ASSERT_EQ(iter
->key().ToString(), "a3");
1777 ASSERT_EQ(iter
->value().ToString(), "va3");
1779 iter
->SeekForPrev("c2");
1780 ASSERT_EQ(iter
->key().ToString(), "b3");
1781 iter
->SeekForPrev("d3");
1782 ASSERT_EQ(iter
->key().ToString(), "d2");
1783 iter
->SeekForPrev("b5");
1784 ASSERT_EQ(iter
->key().ToString(), "b4");
1790 ro
.prefix_same_as_start
= true;
1791 Iterator
* iter
= NewIterator(ro
);
1792 iter
->SeekForPrev("c2");
1793 ASSERT_TRUE(!iter
->Valid());
1798 TEST_P(DBIteratorTest
, IterPrevKeyCrossingBlocks
) {
1799 Options options
= CurrentOptions();
1800 BlockBasedTableOptions table_options
;
1801 table_options
.block_size
= 1; // every block will contain one entry
1802 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
1803 options
.merge_operator
= MergeOperators::CreateStringAppendTESTOperator();
1804 options
.disable_auto_compactions
= true;
1805 options
.max_sequential_skip_in_iterations
= 8;
1807 DestroyAndReopen(options
);
1809 // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1810 for (int file_num
= 0; file_num
< 10; file_num
++) {
1811 ASSERT_OK(Delete("key4"));
1815 // First File containing 5 blocks of puts
1816 ASSERT_OK(Put("key1", "val1.0"));
1817 ASSERT_OK(Put("key2", "val2.0"));
1818 ASSERT_OK(Put("key3", "val3.0"));
1819 ASSERT_OK(Put("key4", "val4.0"));
1820 ASSERT_OK(Put("key5", "val5.0"));
1823 // Second file containing 9 blocks of merge operands
1824 ASSERT_OK(db_
->Merge(WriteOptions(), "key1", "val1.1"));
1825 ASSERT_OK(db_
->Merge(WriteOptions(), "key1", "val1.2"));
1827 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.1"));
1828 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.2"));
1829 ASSERT_OK(db_
->Merge(WriteOptions(), "key2", "val2.3"));
1831 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.1"));
1832 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.2"));
1833 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.3"));
1834 ASSERT_OK(db_
->Merge(WriteOptions(), "key3", "val3.4"));
1839 ro
.fill_cache
= false;
1840 Iterator
* iter
= NewIterator(ro
);
1843 ASSERT_EQ(iter
->key().ToString(), "key5");
1844 ASSERT_EQ(iter
->value().ToString(), "val5.0");
1847 ASSERT_EQ(iter
->key().ToString(), "key4");
1848 ASSERT_EQ(iter
->value().ToString(), "val4.0");
1851 ASSERT_EQ(iter
->key().ToString(), "key3");
1852 ASSERT_EQ(iter
->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1855 ASSERT_EQ(iter
->key().ToString(), "key2");
1856 ASSERT_EQ(iter
->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1859 ASSERT_EQ(iter
->key().ToString(), "key1");
1860 ASSERT_EQ(iter
->value().ToString(), "val1.0,val1.1,val1.2");
1866 TEST_P(DBIteratorTest
, IterPrevKeyCrossingBlocksRandomized
) {
1867 Options options
= CurrentOptions();
1868 options
.merge_operator
= MergeOperators::CreateStringAppendTESTOperator();
1869 options
.disable_auto_compactions
= true;
1870 options
.level0_slowdown_writes_trigger
= (1 << 30);
1871 options
.level0_stop_writes_trigger
= (1 << 30);
1872 options
.max_sequential_skip_in_iterations
= 8;
1873 DestroyAndReopen(options
);
1875 const int kNumKeys
= 500;
1876 // Small number of merge operands to make sure that DBIter::Prev() dont
1877 // fall back to Seek()
1878 const int kNumMergeOperands
= 3;
1879 // Use value size that will make sure that every block contain 1 key
1880 const int kValSize
=
1881 static_cast<int>(BlockBasedTableOptions().block_size
) * 4;
1882 // Percentage of keys that wont get merge operations
1883 const int kNoMergeOpPercentage
= 20;
1884 // Percentage of keys that will be deleted
1885 const int kDeletePercentage
= 10;
1887 // For half of the key range we will write multiple deletes first to
1888 // force DBIter::Prev() to fall back to Seek()
1889 for (int file_num
= 0; file_num
< 10; file_num
++) {
1890 for (int i
= 0; i
< kNumKeys
; i
+= 2) {
1891 ASSERT_OK(Delete(Key(i
)));
1897 std::map
<std::string
, std::string
> true_data
;
1898 std::string gen_key
;
1899 std::string gen_val
;
1901 for (int i
= 0; i
< kNumKeys
; i
++) {
1903 gen_val
= RandomString(&rnd
, kValSize
);
1905 ASSERT_OK(Put(gen_key
, gen_val
));
1906 true_data
[gen_key
] = gen_val
;
1910 // Separate values and merge operands in different file so that we
1911 // make sure that we dont merge them while flushing but actually
1912 // merge them in the read path
1913 for (int i
= 0; i
< kNumKeys
; i
++) {
1914 if (rnd
.PercentTrue(kNoMergeOpPercentage
)) {
1915 // Dont give merge operations for some keys
1919 for (int j
= 0; j
< kNumMergeOperands
; j
++) {
1921 gen_val
= RandomString(&rnd
, kValSize
);
1923 ASSERT_OK(db_
->Merge(WriteOptions(), gen_key
, gen_val
));
1924 true_data
[gen_key
] += "," + gen_val
;
1929 for (int i
= 0; i
< kNumKeys
; i
++) {
1930 if (rnd
.PercentTrue(kDeletePercentage
)) {
1933 ASSERT_OK(Delete(gen_key
));
1934 true_data
.erase(gen_key
);
1941 ro
.fill_cache
= false;
1942 Iterator
* iter
= NewIterator(ro
);
1943 auto data_iter
= true_data
.rbegin();
1945 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
1946 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
1947 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
1950 ASSERT_EQ(data_iter
, true_data
.rend());
1957 ro
.fill_cache
= false;
1958 Iterator
* iter
= NewIterator(ro
);
1959 auto data_iter
= true_data
.rbegin();
1961 int entries_right
= 0;
1962 std::string seek_key
;
1963 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
1964 // Verify key/value of current position
1965 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
1966 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
1968 bool restore_position_with_seek
= rnd
.Uniform(2);
1969 if (restore_position_with_seek
) {
1970 seek_key
= iter
->key().ToString();
1973 // Do some Next() operations the restore the iterator to orignal position
1975 entries_right
> 0 ? rnd
.Uniform(std::min(entries_right
, 10)) : 0;
1976 for (int i
= 0; i
< next_count
; i
++) {
1980 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
1981 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
1984 if (restore_position_with_seek
) {
1985 // Restore orignal position using Seek()
1986 iter
->Seek(seek_key
);
1987 for (int i
= 0; i
< next_count
; i
++) {
1991 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
1992 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
1994 // Restore original position using Prev()
1995 for (int i
= 0; i
< next_count
; i
++) {
1999 ASSERT_EQ(iter
->key().ToString(), data_iter
->first
);
2000 ASSERT_EQ(iter
->value().ToString(), data_iter
->second
);
2007 ASSERT_EQ(data_iter
, true_data
.rend());
2013 TEST_P(DBIteratorTest
, IteratorWithLocalStatistics
) {
2014 Options options
= CurrentOptions();
2015 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2016 DestroyAndReopen(options
);
2019 for (int i
= 0; i
< 1000; i
++) {
2020 // Key 10 bytes / Value 10 bytes
2021 ASSERT_OK(Put(RandomString(&rnd
, 10), RandomString(&rnd
, 10)));
2024 std::atomic
<uint64_t> total_next(0);
2025 std::atomic
<uint64_t> total_next_found(0);
2026 std::atomic
<uint64_t> total_prev(0);
2027 std::atomic
<uint64_t> total_prev_found(0);
2028 std::atomic
<uint64_t> total_bytes(0);
2030 std::vector
<port::Thread
> threads
;
2031 std::function
<void()> reader_func_next
= [&]() {
2032 SetPerfLevel(kEnableCount
);
2033 get_perf_context()->Reset();
2034 Iterator
* iter
= NewIterator(ReadOptions());
2036 iter
->SeekToFirst();
2037 // Seek will bump ITER_BYTES_READ
2039 bytes
+= iter
->key().size();
2040 bytes
+= iter
->value().size();
2045 if (!iter
->Valid()) {
2049 bytes
+= iter
->key().size();
2050 bytes
+= iter
->value().size();
2054 ASSERT_EQ(bytes
, get_perf_context()->iter_read_bytes
);
2055 SetPerfLevel(kDisable
);
2056 total_bytes
+= bytes
;
2059 std::function
<void()> reader_func_prev
= [&]() {
2060 SetPerfLevel(kEnableCount
);
2061 Iterator
* iter
= NewIterator(ReadOptions());
2064 // Seek will bump ITER_BYTES_READ
2066 bytes
+= iter
->key().size();
2067 bytes
+= iter
->value().size();
2072 if (!iter
->Valid()) {
2076 bytes
+= iter
->key().size();
2077 bytes
+= iter
->value().size();
2081 ASSERT_EQ(bytes
, get_perf_context()->iter_read_bytes
);
2082 SetPerfLevel(kDisable
);
2083 total_bytes
+= bytes
;
2086 for (int i
= 0; i
< 10; i
++) {
2087 threads
.emplace_back(reader_func_next
);
2089 for (int i
= 0; i
< 15; i
++) {
2090 threads
.emplace_back(reader_func_prev
);
2093 for (auto& t
: threads
) {
2097 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_NEXT
), (uint64_t)total_next
);
2098 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_NEXT_FOUND
),
2099 (uint64_t)total_next_found
);
2100 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_PREV
), (uint64_t)total_prev
);
2101 ASSERT_EQ(TestGetTickerCount(options
, NUMBER_DB_PREV_FOUND
),
2102 (uint64_t)total_prev_found
);
2103 ASSERT_EQ(TestGetTickerCount(options
, ITER_BYTES_READ
), (uint64_t)total_bytes
);
2107 TEST_P(DBIteratorTest
, ReadAhead
) {
2109 env_
->count_random_reads_
= true;
2111 options
.disable_auto_compactions
= true;
2112 options
.write_buffer_size
= 4 << 20;
2113 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2114 BlockBasedTableOptions table_options
;
2115 table_options
.block_size
= 1024;
2116 table_options
.no_block_cache
= true;
2117 options
.table_factory
.reset(new BlockBasedTableFactory(table_options
));
2120 std::string
value(1024, 'a');
2121 for (int i
= 0; i
< 100; i
++) {
2125 MoveFilesToLevel(2);
2127 for (int i
= 0; i
< 100; i
++) {
2131 MoveFilesToLevel(1);
2133 for (int i
= 0; i
< 100; i
++) {
2137 #ifndef ROCKSDB_LITE
2138 ASSERT_EQ("1,1,1", FilesPerLevel());
2139 #endif // !ROCKSDB_LITE
2141 env_
->random_read_bytes_counter_
= 0;
2142 options
.statistics
->setTickerCount(NO_FILE_OPENS
, 0);
2143 ReadOptions read_options
;
2144 auto* iter
= NewIterator(read_options
);
2145 iter
->SeekToFirst();
2146 int64_t num_file_opens
= TestGetTickerCount(options
, NO_FILE_OPENS
);
2147 size_t bytes_read
= env_
->random_read_bytes_counter_
;
2150 int64_t num_file_closes
= TestGetTickerCount(options
, NO_FILE_CLOSES
);
2151 env_
->random_read_bytes_counter_
= 0;
2152 options
.statistics
->setTickerCount(NO_FILE_OPENS
, 0);
2153 read_options
.readahead_size
= 1024 * 10;
2154 iter
= NewIterator(read_options
);
2155 iter
->SeekToFirst();
2156 int64_t num_file_opens_readahead
= TestGetTickerCount(options
, NO_FILE_OPENS
);
2157 size_t bytes_read_readahead
= env_
->random_read_bytes_counter_
;
2159 int64_t num_file_closes_readahead
=
2160 TestGetTickerCount(options
, NO_FILE_CLOSES
);
2161 ASSERT_EQ(num_file_opens
, num_file_opens_readahead
);
2162 ASSERT_EQ(num_file_closes
, num_file_closes_readahead
);
2163 ASSERT_GT(bytes_read_readahead
, bytes_read
);
2164 ASSERT_GT(bytes_read_readahead
, read_options
.readahead_size
* 3);
2166 // Verify correctness.
2167 iter
= NewIterator(read_options
);
2169 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
2170 ASSERT_EQ(value
, iter
->value());
2173 ASSERT_EQ(100, count
);
2174 for (int i
= 0; i
< 100; i
++) {
2176 ASSERT_EQ(value
, iter
->value());
2181 // Insert a key, create a snapshot iterator, overwrite key lots of times,
2182 // seek to a smaller key. Expect DBIter to fall back to a seek instead of
2183 // going through all the overwrites linearly.
2184 TEST_P(DBIteratorTest
, DBIteratorSkipRecentDuplicatesTest
) {
2185 Options options
= CurrentOptions();
2187 options
.create_if_missing
= true;
2188 options
.max_sequential_skip_in_iterations
= 3;
2189 options
.prefix_extractor
= nullptr;
2190 options
.write_buffer_size
= 1 << 27; // big enough to avoid flush
2191 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2192 DestroyAndReopen(options
);
2195 ASSERT_OK(Put("b", "0"));
2199 std::unique_ptr
<Iterator
> iter(NewIterator(ro
));
2202 for (int i
= 0; i
< 100; ++i
) {
2203 ASSERT_OK(Put("b", std::to_string(i
+ 1).c_str()));
2206 #ifndef ROCKSDB_LITE
2207 // Check that memtable wasn't flushed.
2209 ASSERT_TRUE(db_
->GetProperty("rocksdb.num-files-at-level0", &val
));
2210 EXPECT_EQ("0", val
);
2213 // Seek iterator to a smaller key.
2214 get_perf_context()->Reset();
2216 ASSERT_TRUE(iter
->Valid());
2217 EXPECT_EQ("b", iter
->key().ToString());
2218 EXPECT_EQ("0", iter
->value().ToString());
2220 // Check that the seek didn't do too much work.
2221 // Checks are not tight, just make sure that everything is well below 100.
2222 EXPECT_LT(get_perf_context()->internal_key_skipped_count
, 4);
2223 EXPECT_LT(get_perf_context()->internal_recent_skipped_count
, 8);
2224 EXPECT_LT(get_perf_context()->seek_on_memtable_count
, 10);
2225 EXPECT_LT(get_perf_context()->next_on_memtable_count
, 10);
2226 EXPECT_LT(get_perf_context()->prev_on_memtable_count
, 10);
2228 // Check that iterator did something like what we expect.
2229 EXPECT_EQ(get_perf_context()->internal_delete_skipped_count
, 0);
2230 EXPECT_EQ(get_perf_context()->internal_merge_count
, 0);
2231 EXPECT_GE(get_perf_context()->internal_recent_skipped_count
, 2);
2232 EXPECT_GE(get_perf_context()->seek_on_memtable_count
, 2);
2233 EXPECT_EQ(1, options
.statistics
->getTickerCount(
2234 NUMBER_OF_RESEEKS_IN_ITERATION
));
2237 TEST_P(DBIteratorTest
, Refresh
) {
2238 ASSERT_OK(Put("x", "y"));
2240 std::unique_ptr
<Iterator
> iter(NewIterator(ReadOptions()));
2241 iter
->Seek(Slice("a"));
2242 ASSERT_TRUE(iter
->Valid());
2243 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2245 ASSERT_FALSE(iter
->Valid());
2247 ASSERT_OK(Put("c", "d"));
2249 iter
->Seek(Slice("a"));
2250 ASSERT_TRUE(iter
->Valid());
2251 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2253 ASSERT_FALSE(iter
->Valid());
2257 iter
->Seek(Slice("a"));
2258 ASSERT_TRUE(iter
->Valid());
2259 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2261 ASSERT_TRUE(iter
->Valid());
2262 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2264 ASSERT_FALSE(iter
->Valid());
2266 dbfull()->Flush(FlushOptions());
2268 ASSERT_OK(Put("m", "n"));
2270 iter
->Seek(Slice("a"));
2271 ASSERT_TRUE(iter
->Valid());
2272 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2274 ASSERT_TRUE(iter
->Valid());
2275 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2277 ASSERT_FALSE(iter
->Valid());
2281 iter
->Seek(Slice("a"));
2282 ASSERT_TRUE(iter
->Valid());
2283 ASSERT_EQ(iter
->key().compare(Slice("c")), 0);
2285 ASSERT_TRUE(iter
->Valid());
2286 ASSERT_EQ(iter
->key().compare(Slice("m")), 0);
2288 ASSERT_TRUE(iter
->Valid());
2289 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2291 ASSERT_FALSE(iter
->Valid());
2296 TEST_P(DBIteratorTest
, RefreshWithSnapshot
) {
2297 ASSERT_OK(Put("x", "y"));
2298 const Snapshot
* snapshot
= db_
->GetSnapshot();
2299 ReadOptions options
;
2300 options
.snapshot
= snapshot
;
2301 Iterator
* iter
= NewIterator(options
);
2303 iter
->Seek(Slice("a"));
2304 ASSERT_TRUE(iter
->Valid());
2305 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2307 ASSERT_FALSE(iter
->Valid());
2309 ASSERT_OK(Put("c", "d"));
2311 iter
->Seek(Slice("a"));
2312 ASSERT_TRUE(iter
->Valid());
2313 ASSERT_EQ(iter
->key().compare(Slice("x")), 0);
2315 ASSERT_FALSE(iter
->Valid());
2318 s
= iter
->Refresh();
2319 ASSERT_TRUE(s
.IsNotSupported());
2320 db_
->ReleaseSnapshot(snapshot
);
2324 TEST_P(DBIteratorTest
, CreationFailure
) {
2325 SyncPoint::GetInstance()->SetCallBack(
2326 "DBImpl::NewInternalIterator:StatusCallback", [](void* arg
) {
2327 *(reinterpret_cast<Status
*>(arg
)) = Status::Corruption("test status");
2329 SyncPoint::GetInstance()->EnableProcessing();
2331 Iterator
* iter
= NewIterator(ReadOptions());
2332 ASSERT_FALSE(iter
->Valid());
2333 ASSERT_TRUE(iter
->status().IsCorruption());
2337 TEST_P(DBIteratorTest
, UpperBoundWithChangeDirection
) {
2338 Options options
= CurrentOptions();
2339 options
.max_sequential_skip_in_iterations
= 3;
2340 DestroyAndReopen(options
);
2342 // write a bunch of kvs to the database.
2343 ASSERT_OK(Put("a", "1"));
2344 ASSERT_OK(Put("y", "1"));
2345 ASSERT_OK(Put("y1", "1"));
2346 ASSERT_OK(Put("y2", "1"));
2347 ASSERT_OK(Put("y3", "1"));
2348 ASSERT_OK(Put("z", "1"));
2350 ASSERT_OK(Put("a", "1"));
2351 ASSERT_OK(Put("z", "1"));
2352 ASSERT_OK(Put("bar", "1"));
2353 ASSERT_OK(Put("foo", "1"));
2355 std::string upper_bound
= "x";
2356 Slice
ub_slice(upper_bound
);
2358 ro
.iterate_upper_bound
= &ub_slice
;
2359 ro
.max_skippable_internal_keys
= 1000;
2361 Iterator
* iter
= NewIterator(ro
);
2363 ASSERT_TRUE(iter
->Valid());
2364 ASSERT_EQ("foo", iter
->key().ToString());
2367 ASSERT_TRUE(iter
->Valid());
2368 ASSERT_OK(iter
->status());
2369 ASSERT_EQ("bar", iter
->key().ToString());
2374 TEST_P(DBIteratorTest
, TableFilter
) {
2375 ASSERT_OK(Put("a", "1"));
2376 dbfull()->Flush(FlushOptions());
2377 ASSERT_OK(Put("b", "2"));
2378 ASSERT_OK(Put("c", "3"));
2379 dbfull()->Flush(FlushOptions());
2380 ASSERT_OK(Put("d", "4"));
2381 ASSERT_OK(Put("e", "5"));
2382 ASSERT_OK(Put("f", "6"));
2383 dbfull()->Flush(FlushOptions());
2385 // Ensure the table_filter callback is called once for each table.
2387 std::set
<uint64_t> unseen
{1, 2, 3};
2389 opts
.table_filter
= [&](const TableProperties
& props
) {
2390 auto it
= unseen
.find(props
.num_entries
);
2391 if (it
== unseen
.end()) {
2392 ADD_FAILURE() << "saw table properties with an unexpected "
2393 << props
.num_entries
<< " entries";
2399 auto iter
= NewIterator(opts
);
2400 iter
->SeekToFirst();
2401 ASSERT_EQ(IterStatus(iter
), "a->1");
2403 ASSERT_EQ(IterStatus(iter
), "b->2");
2405 ASSERT_EQ(IterStatus(iter
), "c->3");
2407 ASSERT_EQ(IterStatus(iter
), "d->4");
2409 ASSERT_EQ(IterStatus(iter
), "e->5");
2411 ASSERT_EQ(IterStatus(iter
), "f->6");
2413 ASSERT_FALSE(iter
->Valid());
2414 ASSERT_TRUE(unseen
.empty());
2418 // Ensure returning false in the table_filter hides the keys from that table
2419 // during iteration.
2422 opts
.table_filter
= [](const TableProperties
& props
) {
2423 return props
.num_entries
!= 2;
2425 auto iter
= NewIterator(opts
);
2426 iter
->SeekToFirst();
2427 ASSERT_EQ(IterStatus(iter
), "a->1");
2429 ASSERT_EQ(IterStatus(iter
), "d->4");
2431 ASSERT_EQ(IterStatus(iter
), "e->5");
2433 ASSERT_EQ(IterStatus(iter
), "f->6");
2435 ASSERT_FALSE(iter
->Valid());
2440 TEST_P(DBIteratorTest
, UpperBoundWithPrevReseek
) {
2441 Options options
= CurrentOptions();
2442 options
.max_sequential_skip_in_iterations
= 3;
2443 DestroyAndReopen(options
);
2445 // write a bunch of kvs to the database.
2446 ASSERT_OK(Put("a", "1"));
2447 ASSERT_OK(Put("y", "1"));
2448 ASSERT_OK(Put("z", "1"));
2450 ASSERT_OK(Put("a", "1"));
2451 ASSERT_OK(Put("z", "1"));
2452 ASSERT_OK(Put("bar", "1"));
2453 ASSERT_OK(Put("foo", "1"));
2454 ASSERT_OK(Put("foo", "2"));
2456 ASSERT_OK(Put("foo", "3"));
2457 ASSERT_OK(Put("foo", "4"));
2458 ASSERT_OK(Put("foo", "5"));
2459 const Snapshot
* snapshot
= db_
->GetSnapshot();
2460 ASSERT_OK(Put("foo", "6"));
2462 std::string upper_bound
= "x";
2463 Slice
ub_slice(upper_bound
);
2465 ro
.snapshot
= snapshot
;
2466 ro
.iterate_upper_bound
= &ub_slice
;
2468 Iterator
* iter
= NewIterator(ro
);
2469 iter
->SeekForPrev("goo");
2470 ASSERT_TRUE(iter
->Valid());
2471 ASSERT_EQ("foo", iter
->key().ToString());
2474 ASSERT_TRUE(iter
->Valid());
2475 ASSERT_EQ("bar", iter
->key().ToString());
2478 db_
->ReleaseSnapshot(snapshot
);
2481 TEST_P(DBIteratorTest
, SkipStatistics
) {
2482 Options options
= CurrentOptions();
2483 options
.statistics
= ROCKSDB_NAMESPACE::CreateDBStatistics();
2484 DestroyAndReopen(options
);
2488 // write a bunch of kvs to the database.
2489 ASSERT_OK(Put("a", "1"));
2490 ASSERT_OK(Put("b", "1"));
2491 ASSERT_OK(Put("c", "1"));
2493 ASSERT_OK(Put("d", "1"));
2494 ASSERT_OK(Put("e", "1"));
2495 ASSERT_OK(Put("f", "1"));
2496 ASSERT_OK(Put("a", "2"));
2497 ASSERT_OK(Put("b", "2"));
2499 ASSERT_OK(Delete("d"));
2500 ASSERT_OK(Delete("e"));
2501 ASSERT_OK(Delete("f"));
2503 Iterator
* iter
= NewIterator(ReadOptions());
2505 for (iter
->SeekToFirst(); iter
->Valid(); iter
->Next()) {
2506 ASSERT_OK(iter
->status());
2509 ASSERT_EQ(count
, 3);
2511 skip_count
+= 8; // 3 deletes + 3 original keys + 2 lower in sequence
2512 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2514 iter
= NewIterator(ReadOptions());
2516 for (iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2517 ASSERT_OK(iter
->status());
2520 ASSERT_EQ(count
, 3);
2522 skip_count
+= 8; // Same as above, but in reverse order
2523 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2525 ASSERT_OK(Put("aa", "1"));
2526 ASSERT_OK(Put("ab", "1"));
2527 ASSERT_OK(Put("ac", "1"));
2528 ASSERT_OK(Put("ad", "1"));
2530 ASSERT_OK(Delete("ab"));
2531 ASSERT_OK(Delete("ac"));
2532 ASSERT_OK(Delete("ad"));
2536 ro
.iterate_upper_bound
= &prefix
;
2538 iter
= NewIterator(ro
);
2540 for(iter
->Seek("aa"); iter
->Valid(); iter
->Next()) {
2541 ASSERT_OK(iter
->status());
2544 ASSERT_EQ(count
, 1);
2546 skip_count
+= 6; // 3 deletes + 3 original keys
2547 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2549 iter
= NewIterator(ro
);
2551 for(iter
->SeekToLast(); iter
->Valid(); iter
->Prev()) {
2552 ASSERT_OK(iter
->status());
2555 ASSERT_EQ(count
, 2);
2557 // 3 deletes + 3 original keys + lower sequence of "a"
2559 ASSERT_EQ(skip_count
, TestGetTickerCount(options
, NUMBER_ITER_SKIP
));
2562 TEST_P(DBIteratorTest
, SeekAfterHittingManyInternalKeys
) {
2563 Options options
= CurrentOptions();
2564 DestroyAndReopen(options
);
2566 ropts
.max_skippable_internal_keys
= 2;
2569 // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2576 std::unique_ptr
<Iterator
> iter(NewIterator(ropts
));
2577 iter
->SeekToFirst();
2579 ASSERT_TRUE(iter
->Valid());
2580 ASSERT_EQ(iter
->key().ToString(), "1");
2581 ASSERT_EQ(iter
->value().ToString(), "val_1");
2583 // This should fail as incomplete due to too many non-visible internal keys on
2584 // the way to the next valid user key.
2586 ASSERT_TRUE(!iter
->Valid());
2587 ASSERT_TRUE(iter
->status().IsIncomplete());
2589 // Get the internal key at which Next() failed.
2590 std::string prop_value
;
2591 ASSERT_OK(iter
->GetProperty("rocksdb.iterator.internal-key", &prop_value
));
2592 ASSERT_EQ("4", prop_value
);
2594 // Create a new iterator to seek to the internal key.
2595 std::unique_ptr
<Iterator
> iter2(NewIterator(ropts
));
2596 iter2
->Seek(prop_value
);
2597 ASSERT_TRUE(iter2
->Valid());
2598 ASSERT_OK(iter2
->status());
2600 ASSERT_EQ(iter2
->key().ToString(), "6");
2601 ASSERT_EQ(iter2
->value().ToString(), "val_6");
2604 // Reproduces a former bug where iterator would skip some records when DBIter
2605 // re-seeks subiterator with Incomplete status.
2606 TEST_P(DBIteratorTest
, NonBlockingIterationBugRepro
) {
2607 Options options
= CurrentOptions();
2608 BlockBasedTableOptions table_options
;
2609 // Make sure the sst file has more than one block.
2610 table_options
.flush_block_policy_factory
=
2611 std::make_shared
<FlushBlockEveryKeyPolicyFactory
>();
2612 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2613 DestroyAndReopen(options
);
2615 // Two records in sst file, each in its own block.
2620 // Create a nonblocking iterator before writing to memtable.
2622 ropt
.read_tier
= kBlockCacheTier
;
2623 std::unique_ptr
<Iterator
> iter(NewIterator(ropt
));
2625 // Overwrite a key in memtable many times to hit
2626 // max_sequential_skip_in_iterations (which is 8 by default).
2627 for (int i
= 0; i
< 20; ++i
) {
2631 // Load the second block in sst file into the block cache.
2633 std::unique_ptr
<Iterator
> iter2(NewIterator(ReadOptions()));
2637 // Finally seek the nonblocking iterator.
2639 // With the bug, the status used to be OK, and the iterator used to point to
2641 EXPECT_TRUE(iter
->status().IsIncomplete());
2644 TEST_P(DBIteratorTest
, SeekBackwardAfterOutOfUpperBound
) {
2651 ropt
.iterate_upper_bound
= &ub
;
2653 std::unique_ptr
<Iterator
> it(dbfull()->NewIterator(ropt
));
2654 it
->SeekForPrev("a");
2655 ASSERT_TRUE(it
->Valid());
2656 ASSERT_OK(it
->status());
2657 ASSERT_EQ("a", it
->key().ToString());
2659 ASSERT_FALSE(it
->Valid());
2660 ASSERT_OK(it
->status());
2661 it
->SeekForPrev("a");
2662 ASSERT_OK(it
->status());
2664 ASSERT_TRUE(it
->Valid());
2665 ASSERT_EQ("a", it
->key().ToString());
2668 TEST_P(DBIteratorTest
, AvoidReseekLevelIterator
) {
2669 Options options
= CurrentOptions();
2670 options
.compression
= CompressionType::kNoCompression
;
2671 BlockBasedTableOptions table_options
;
2672 table_options
.block_size
= 800;
2673 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2677 std::string random_str
= RandomString(&rnd
, 180);
2679 ASSERT_OK(Put("1", random_str
));
2680 ASSERT_OK(Put("2", random_str
));
2681 ASSERT_OK(Put("3", random_str
));
2682 ASSERT_OK(Put("4", random_str
));
2684 ASSERT_OK(Put("5", random_str
));
2685 ASSERT_OK(Put("6", random_str
));
2686 ASSERT_OK(Put("7", random_str
));
2688 ASSERT_OK(Put("8", random_str
));
2689 ASSERT_OK(Put("9", random_str
));
2691 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2693 int num_find_file_in_level
= 0;
2694 int num_idx_blk_seek
= 0;
2695 SyncPoint::GetInstance()->SetCallBack(
2696 "LevelIterator::Seek:BeforeFindFile",
2697 [&](void* /*arg*/) { num_find_file_in_level
++; });
2698 SyncPoint::GetInstance()->SetCallBack(
2699 "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek
++; });
2700 SyncPoint::GetInstance()->EnableProcessing();
2703 std::unique_ptr
<Iterator
> iter(NewIterator(ReadOptions()));
2705 ASSERT_TRUE(iter
->Valid());
2706 ASSERT_EQ(1, num_find_file_in_level
);
2707 ASSERT_EQ(1, num_idx_blk_seek
);
2710 ASSERT_TRUE(iter
->Valid());
2711 ASSERT_EQ(1, num_find_file_in_level
);
2712 ASSERT_EQ(1, num_idx_blk_seek
);
2715 ASSERT_TRUE(iter
->Valid());
2716 ASSERT_EQ(1, num_find_file_in_level
);
2717 ASSERT_EQ(1, num_idx_blk_seek
);
2720 ASSERT_TRUE(iter
->Valid());
2721 ASSERT_EQ(1, num_find_file_in_level
);
2722 ASSERT_EQ(1, num_idx_blk_seek
);
2725 ASSERT_TRUE(iter
->Valid());
2726 ASSERT_EQ(1, num_find_file_in_level
);
2727 ASSERT_EQ(2, num_idx_blk_seek
);
2730 ASSERT_TRUE(iter
->Valid());
2731 ASSERT_EQ(1, num_find_file_in_level
);
2732 ASSERT_EQ(2, num_idx_blk_seek
);
2735 ASSERT_TRUE(iter
->Valid());
2736 ASSERT_EQ(1, num_find_file_in_level
);
2737 ASSERT_EQ(3, num_idx_blk_seek
);
2740 ASSERT_TRUE(iter
->Valid());
2741 ASSERT_EQ(2, num_find_file_in_level
);
2742 // Still re-seek because "8" is the boundary key, which has
2743 // the same user key as the seek key.
2744 ASSERT_EQ(4, num_idx_blk_seek
);
2747 ASSERT_TRUE(iter
->Valid());
2748 ASSERT_EQ(3, num_find_file_in_level
);
2749 ASSERT_EQ(5, num_idx_blk_seek
);
2752 ASSERT_TRUE(iter
->Valid());
2753 ASSERT_EQ(3, num_find_file_in_level
);
2754 ASSERT_EQ(5, num_idx_blk_seek
);
2756 // Seek backward never triggers the index block seek to be skipped
2758 ASSERT_TRUE(iter
->Valid());
2759 ASSERT_EQ(3, num_find_file_in_level
);
2760 ASSERT_EQ(6, num_idx_blk_seek
);
2763 SyncPoint::GetInstance()->DisableProcessing();
2766 // MyRocks may change iterate bounds before seek. Simply test to make sure such
2767 // usage doesn't break iterator.
2768 TEST_P(DBIteratorTest
, IterateBoundChangedBeforeSeek
) {
2769 Options options
= CurrentOptions();
2770 options
.compression
= CompressionType::kNoCompression
;
2771 BlockBasedTableOptions table_options
;
2772 table_options
.block_size
= 100;
2773 options
.table_factory
.reset(NewBlockBasedTableFactory(table_options
));
2774 std::string
value(50, 'v');
2776 ASSERT_OK(Put("aaa", value
));
2778 ASSERT_OK(Put("bbb", "v"));
2779 ASSERT_OK(Put("ccc", "v"));
2780 ASSERT_OK(Put("ddd", "v"));
2782 ASSERT_OK(Put("eee", "v"));
2784 ASSERT_OK(db_
->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2786 std::string ub1
= "e";
2787 std::string ub2
= "c";
2789 ReadOptions read_opts1
;
2790 read_opts1
.iterate_upper_bound
= &ub
;
2791 Iterator
* iter
= NewIterator(read_opts1
);
2792 // Seek and iterate accross block boundary.
2794 ASSERT_TRUE(iter
->Valid());
2795 ASSERT_OK(iter
->status());
2796 ASSERT_EQ("bbb", iter
->key());
2799 ASSERT_TRUE(iter
->Valid());
2800 ASSERT_OK(iter
->status());
2801 ASSERT_EQ("bbb", iter
->key());
2803 ASSERT_FALSE(iter
->Valid());
2804 ASSERT_OK(iter
->status());
2807 std::string lb1
= "a";
2808 std::string lb2
= "c";
2810 ReadOptions read_opts2
;
2811 read_opts2
.iterate_lower_bound
= &lb
;
2812 iter
= NewIterator(read_opts2
);
2813 iter
->SeekForPrev("d");
2814 ASSERT_TRUE(iter
->Valid());
2815 ASSERT_OK(iter
->status());
2816 ASSERT_EQ("ccc", iter
->key());
2818 iter
->SeekForPrev("d");
2819 ASSERT_TRUE(iter
->Valid());
2820 ASSERT_OK(iter
->status());
2821 ASSERT_EQ("ccc", iter
->key());
2823 ASSERT_FALSE(iter
->Valid());
2824 ASSERT_OK(iter
->status());
2828 TEST_P(DBIteratorTest
, IterateWithLowerBoundAcrossFileBoundary
) {
2829 ASSERT_OK(Put("aaa", "v"));
2830 ASSERT_OK(Put("bbb", "v"));
2832 ASSERT_OK(Put("ccc", "v"));
2833 ASSERT_OK(Put("ddd", "v"));
2835 // Move both files to bottom level.
2836 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2837 Slice
lower_bound("b");
2838 ReadOptions read_opts
;
2839 read_opts
.iterate_lower_bound
= &lower_bound
;
2840 std::unique_ptr
<Iterator
> iter(NewIterator(read_opts
));
2841 iter
->SeekForPrev("d");
2842 ASSERT_TRUE(iter
->Valid());
2843 ASSERT_OK(iter
->status());
2844 ASSERT_EQ("ccc", iter
->key());
2846 ASSERT_TRUE(iter
->Valid());
2847 ASSERT_OK(iter
->status());
2848 ASSERT_EQ("bbb", iter
->key());
2850 ASSERT_FALSE(iter
->Valid());
2851 ASSERT_OK(iter
->status());
2854 INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance
, DBIteratorTest
,
2855 testing::Values(true, false));
2857 // Tests how DBIter work with ReadCallback
2858 class DBIteratorWithReadCallbackTest
: public DBIteratorTest
{};
2860 TEST_F(DBIteratorWithReadCallbackTest
, ReadCallback
) {
2861 class TestReadCallback
: public ReadCallback
{
2863 explicit TestReadCallback(SequenceNumber _max_visible_seq
)
2864 : ReadCallback(_max_visible_seq
) {}
2866 bool IsVisibleFullCheck(SequenceNumber seq
) override
{
2867 return seq
<= max_visible_seq_
;
2871 ASSERT_OK(Put("foo", "v1"));
2872 ASSERT_OK(Put("foo", "v2"));
2873 ASSERT_OK(Put("foo", "v3"));
2874 ASSERT_OK(Put("a", "va"));
2875 ASSERT_OK(Put("z", "vz"));
2876 SequenceNumber seq1
= db_
->GetLatestSequenceNumber();
2877 TestReadCallback
callback1(seq1
);
2878 ASSERT_OK(Put("foo", "v4"));
2879 ASSERT_OK(Put("foo", "v5"));
2880 ASSERT_OK(Put("bar", "v7"));
2882 SequenceNumber seq2
= db_
->GetLatestSequenceNumber();
2884 reinterpret_cast<ColumnFamilyHandleImpl
*>(db_
->DefaultColumnFamily())
2886 // The iterator are suppose to see data before seq1.
2888 dbfull()->NewIteratorImpl(ReadOptions(), cfd
, seq2
, &callback1
);
2891 // The latest value of "foo" before seq1 is "v3"
2893 ASSERT_TRUE(iter
->Valid());
2894 ASSERT_OK(iter
->status());
2895 ASSERT_EQ("foo", iter
->key());
2896 ASSERT_EQ("v3", iter
->value());
2897 // "bar" is not visible to the iterator. It will move on to the next key
2900 ASSERT_TRUE(iter
->Valid());
2901 ASSERT_OK(iter
->status());
2902 ASSERT_EQ("foo", iter
->key());
2903 ASSERT_EQ("v3", iter
->value());
2908 ASSERT_TRUE(iter
->Valid());
2909 ASSERT_OK(iter
->status());
2910 ASSERT_EQ("va", iter
->value());
2911 // "bar" is not visible to the iterator. It will move on to the next key
2914 ASSERT_TRUE(iter
->Valid());
2915 ASSERT_OK(iter
->status());
2916 ASSERT_EQ("foo", iter
->key());
2917 ASSERT_EQ("v3", iter
->value());
2922 ASSERT_TRUE(iter
->Valid());
2923 ASSERT_OK(iter
->status());
2924 ASSERT_EQ("vz", iter
->value());
2925 // The previous key is "foo", which is visible to the iterator.
2927 ASSERT_TRUE(iter
->Valid());
2928 ASSERT_OK(iter
->status());
2929 ASSERT_EQ("foo", iter
->key());
2930 ASSERT_EQ("v3", iter
->value());
2931 // "bar" is not visible to the iterator. It will move on to the next key "a".
2932 iter
->Prev(); // skipping "bar"
2933 ASSERT_TRUE(iter
->Valid());
2934 ASSERT_OK(iter
->status());
2935 ASSERT_EQ("a", iter
->key());
2936 ASSERT_EQ("va", iter
->value());
2939 // The previous key is "foo", which is visible to the iterator.
2940 iter
->SeekForPrev("y");
2941 ASSERT_TRUE(iter
->Valid());
2942 ASSERT_OK(iter
->status());
2943 ASSERT_EQ("foo", iter
->key());
2944 ASSERT_EQ("v3", iter
->value());
2945 // "bar" is not visible to the iterator. It will move on to the next key "a".
2946 iter
->SeekForPrev("bar");
2947 ASSERT_TRUE(iter
->Valid());
2948 ASSERT_OK(iter
->status());
2949 ASSERT_EQ("a", iter
->key());
2950 ASSERT_EQ("va", iter
->value());
2954 // Prev beyond max_sequential_skip_in_iterations
2955 uint64_t num_versions
=
2956 CurrentOptions().max_sequential_skip_in_iterations
+ 10;
2957 for (uint64_t i
= 0; i
< num_versions
; i
++) {
2958 ASSERT_OK(Put("bar", ToString(i
)));
2960 SequenceNumber seq3
= db_
->GetLatestSequenceNumber();
2961 TestReadCallback
callback2(seq3
);
2962 ASSERT_OK(Put("bar", "v8"));
2963 SequenceNumber seq4
= db_
->GetLatestSequenceNumber();
2965 // The iterator is suppose to see data before seq3.
2966 iter
= dbfull()->NewIteratorImpl(ReadOptions(), cfd
, seq4
, &callback2
);
2967 // Seek to "z", which is visible.
2969 ASSERT_TRUE(iter
->Valid());
2970 ASSERT_OK(iter
->status());
2971 ASSERT_EQ("vz", iter
->value());
2972 // Previous key is "foo" and the last value "v5" is visible.
2974 ASSERT_TRUE(iter
->Valid());
2975 ASSERT_OK(iter
->status());
2976 ASSERT_EQ("foo", iter
->key());
2977 ASSERT_EQ("v5", iter
->value());
2978 // Since the number of values of "bar" is more than
2979 // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
2980 // seek in forward direction. Here we test the fallback seek is correct.
2981 // The last visible value should be (num_versions - 1), as "v8" is not
2984 ASSERT_TRUE(iter
->Valid());
2985 ASSERT_OK(iter
->status());
2986 ASSERT_EQ("bar", iter
->key());
2987 ASSERT_EQ(ToString(num_versions
- 1), iter
->value());
2992 } // namespace ROCKSDB_NAMESPACE
2994 int main(int argc
, char** argv
) {
2995 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
2996 ::testing::InitGoogleTest(&argc
, argv
);
2997 return RUN_ALL_TESTS();