]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/db_iterator_test.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / db / db_iterator_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include <functional>
11
12 #include "db/arena_wrapped_db_iter.h"
13 #include "db/db_iter.h"
14 #include "db/db_test_util.h"
15 #include "port/port.h"
16 #include "port/stack_trace.h"
17 #include "rocksdb/iostats_context.h"
18 #include "rocksdb/perf_context.h"
19 #include "table/block_based/flush_block_policy.h"
20
21 namespace ROCKSDB_NAMESPACE {
22
23 // A dumb ReadCallback which saying every key is committed.
24 class DummyReadCallback : public ReadCallback {
25 public:
26 DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {}
27 bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; }
28 void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
29 };
30
31 // Test param:
32 // bool: whether to pass read_callback to NewIterator().
33 class DBIteratorTest : public DBTestBase,
34 public testing::WithParamInterface<bool> {
35 public:
36 DBIteratorTest() : DBTestBase("/db_iterator_test") {}
37
38 Iterator* NewIterator(const ReadOptions& read_options,
39 ColumnFamilyHandle* column_family = nullptr) {
40 if (column_family == nullptr) {
41 column_family = db_->DefaultColumnFamily();
42 }
43 auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family)->cfd();
44 SequenceNumber seq = read_options.snapshot != nullptr
45 ? read_options.snapshot->GetSequenceNumber()
46 : db_->GetLatestSequenceNumber();
47 bool use_read_callback = GetParam();
48 DummyReadCallback* read_callback = nullptr;
49 if (use_read_callback) {
50 read_callback = new DummyReadCallback();
51 read_callback->SetSnapshot(seq);
52 InstrumentedMutexLock lock(&mutex_);
53 read_callbacks_.push_back(
54 std::unique_ptr<DummyReadCallback>(read_callback));
55 }
56 return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback);
57 }
58
59 private:
60 InstrumentedMutex mutex_;
61 std::vector<std::unique_ptr<DummyReadCallback>> read_callbacks_;
62 };
63
64 TEST_P(DBIteratorTest, IteratorProperty) {
65 // The test needs to be changed if kPersistedTier is supported in iterator.
66 Options options = CurrentOptions();
67 CreateAndReopenWithCF({"pikachu"}, options);
68 Put(1, "1", "2");
69 Delete(1, "2");
70 ReadOptions ropt;
71 ropt.pin_data = false;
72 {
73 std::unique_ptr<Iterator> iter(NewIterator(ropt, handles_[1]));
74 iter->SeekToFirst();
75 std::string prop_value;
76 ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
77 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
78 ASSERT_EQ("0", prop_value);
79 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
80 ASSERT_EQ("1", prop_value);
81 iter->Next();
82 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
83 ASSERT_EQ("Iterator is not valid.", prop_value);
84
85 // Get internal key at which the iteration stopped (tombstone in this case).
86 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
87 ASSERT_EQ("2", prop_value);
88 }
89 Close();
90 }
91
92 TEST_P(DBIteratorTest, PersistedTierOnIterator) {
93 // The test needs to be changed if kPersistedTier is supported in iterator.
94 Options options = CurrentOptions();
95 CreateAndReopenWithCF({"pikachu"}, options);
96 ReadOptions ropt;
97 ropt.read_tier = kPersistedTier;
98
99 auto* iter = db_->NewIterator(ropt, handles_[1]);
100 ASSERT_TRUE(iter->status().IsNotSupported());
101 delete iter;
102
103 std::vector<Iterator*> iters;
104 ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
105 Close();
106 }
107
108 TEST_P(DBIteratorTest, NonBlockingIteration) {
109 do {
110 ReadOptions non_blocking_opts, regular_opts;
111 Options options = CurrentOptions();
112 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
113 non_blocking_opts.read_tier = kBlockCacheTier;
114 CreateAndReopenWithCF({"pikachu"}, options);
115 // write one kv to the database.
116 ASSERT_OK(Put(1, "a", "b"));
117
118 // scan using non-blocking iterator. We should find it because
119 // it is in memtable.
120 Iterator* iter = NewIterator(non_blocking_opts, handles_[1]);
121 int count = 0;
122 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
123 ASSERT_OK(iter->status());
124 count++;
125 }
126 ASSERT_EQ(count, 1);
127 delete iter;
128
129 // flush memtable to storage. Now, the key should not be in the
130 // memtable neither in the block cache.
131 ASSERT_OK(Flush(1));
132
133 // verify that a non-blocking iterator does not find any
134 // kvs. Neither does it do any IOs to storage.
135 uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
136 uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
137 iter = NewIterator(non_blocking_opts, handles_[1]);
138 count = 0;
139 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
140 count++;
141 }
142 ASSERT_EQ(count, 0);
143 ASSERT_TRUE(iter->status().IsIncomplete());
144 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
145 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
146 delete iter;
147
148 // read in the specified block via a regular get
149 ASSERT_EQ(Get(1, "a"), "b");
150
151 // verify that we can find it via a non-blocking scan
152 numopen = TestGetTickerCount(options, NO_FILE_OPENS);
153 cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
154 iter = NewIterator(non_blocking_opts, handles_[1]);
155 count = 0;
156 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
157 ASSERT_OK(iter->status());
158 count++;
159 }
160 ASSERT_EQ(count, 1);
161 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
162 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
163 delete iter;
164
165 // This test verifies block cache behaviors, which is not used by plain
166 // table format.
167 } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
168 }
169
170 TEST_P(DBIteratorTest, IterSeekBeforePrev) {
171 ASSERT_OK(Put("a", "b"));
172 ASSERT_OK(Put("c", "d"));
173 dbfull()->Flush(FlushOptions());
174 ASSERT_OK(Put("0", "f"));
175 ASSERT_OK(Put("1", "h"));
176 dbfull()->Flush(FlushOptions());
177 ASSERT_OK(Put("2", "j"));
178 auto iter = NewIterator(ReadOptions());
179 iter->Seek(Slice("c"));
180 iter->Prev();
181 iter->Seek(Slice("a"));
182 iter->Prev();
183 delete iter;
184 }
185
186 TEST_P(DBIteratorTest, IterReseekNewUpperBound) {
187 Random rnd(301);
188 Options options = CurrentOptions();
189 BlockBasedTableOptions table_options;
190 table_options.block_size = 1024;
191 table_options.block_size_deviation = 50;
192 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
193 options.compression = kNoCompression;
194 Reopen(options);
195
196 ASSERT_OK(Put("a", RandomString(&rnd, 400)));
197 ASSERT_OK(Put("aabb", RandomString(&rnd, 400)));
198 ASSERT_OK(Put("aaef", RandomString(&rnd, 400)));
199 ASSERT_OK(Put("b", RandomString(&rnd, 400)));
200 dbfull()->Flush(FlushOptions());
201 ReadOptions opts;
202 Slice ub = Slice("aa");
203 opts.iterate_upper_bound = &ub;
204 auto iter = NewIterator(opts);
205 iter->Seek(Slice("a"));
206 ub = Slice("b");
207 iter->Seek(Slice("aabc"));
208 ASSERT_TRUE(iter->Valid());
209 ASSERT_EQ(iter->key().ToString(), "aaef");
210 delete iter;
211 }
212
213 TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) {
214 ASSERT_OK(Put("a", "b"));
215 ASSERT_OK(Put("c", "d"));
216 dbfull()->Flush(FlushOptions());
217 ASSERT_OK(Put("0", "f"));
218 ASSERT_OK(Put("1", "h"));
219 dbfull()->Flush(FlushOptions());
220 ASSERT_OK(Put("2", "j"));
221 auto iter = NewIterator(ReadOptions());
222 iter->SeekForPrev(Slice("0"));
223 iter->Next();
224 iter->SeekForPrev(Slice("1"));
225 iter->Next();
226 delete iter;
227 }
228
229 namespace {
230 std::string MakeLongKey(size_t length, char c) {
231 return std::string(length, c);
232 }
233 } // namespace
234
235 TEST_P(DBIteratorTest, IterLongKeys) {
236 ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
237 ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
238 ASSERT_OK(Put("a", "b"));
239 dbfull()->Flush(FlushOptions());
240 ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
241 ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
242 ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
243 auto iter = NewIterator(ReadOptions());
244
245 // Create a key that needs to be skipped for Seq too new
246 iter->Seek(MakeLongKey(20, 0));
247 ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
248 iter->Next();
249 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
250 iter->Next();
251 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
252 iter->Next();
253 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
254 iter->Next();
255 ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
256
257 iter->SeekForPrev(MakeLongKey(127, 3));
258 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
259 iter->Prev();
260 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
261 iter->Prev();
262 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
263 delete iter;
264
265 iter = NewIterator(ReadOptions());
266 iter->Seek(MakeLongKey(50, 1));
267 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
268 iter->Next();
269 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
270 iter->Next();
271 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
272 delete iter;
273 }
274
275 TEST_P(DBIteratorTest, IterNextWithNewerSeq) {
276 ASSERT_OK(Put("0", "0"));
277 dbfull()->Flush(FlushOptions());
278 ASSERT_OK(Put("a", "b"));
279 ASSERT_OK(Put("c", "d"));
280 ASSERT_OK(Put("d", "e"));
281 auto iter = NewIterator(ReadOptions());
282
283 // Create a key that needs to be skipped for Seq too new
284 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
285 i++) {
286 ASSERT_OK(Put("b", "f"));
287 }
288
289 iter->Seek(Slice("a"));
290 ASSERT_EQ(IterStatus(iter), "a->b");
291 iter->Next();
292 ASSERT_EQ(IterStatus(iter), "c->d");
293 iter->SeekForPrev(Slice("b"));
294 ASSERT_EQ(IterStatus(iter), "a->b");
295 iter->Next();
296 ASSERT_EQ(IterStatus(iter), "c->d");
297
298 delete iter;
299 }
300
301 TEST_P(DBIteratorTest, IterPrevWithNewerSeq) {
302 ASSERT_OK(Put("0", "0"));
303 dbfull()->Flush(FlushOptions());
304 ASSERT_OK(Put("a", "b"));
305 ASSERT_OK(Put("c", "d"));
306 ASSERT_OK(Put("d", "e"));
307 auto iter = NewIterator(ReadOptions());
308
309 // Create a key that needs to be skipped for Seq too new
310 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
311 i++) {
312 ASSERT_OK(Put("b", "f"));
313 }
314
315 iter->Seek(Slice("d"));
316 ASSERT_EQ(IterStatus(iter), "d->e");
317 iter->Prev();
318 ASSERT_EQ(IterStatus(iter), "c->d");
319 iter->Prev();
320 ASSERT_EQ(IterStatus(iter), "a->b");
321 iter->Prev();
322 iter->SeekForPrev(Slice("d"));
323 ASSERT_EQ(IterStatus(iter), "d->e");
324 iter->Prev();
325 ASSERT_EQ(IterStatus(iter), "c->d");
326 iter->Prev();
327 ASSERT_EQ(IterStatus(iter), "a->b");
328 iter->Prev();
329 delete iter;
330 }
331
332 TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) {
333 ASSERT_OK(Put("0", "0"));
334 dbfull()->Flush(FlushOptions());
335 ASSERT_OK(Put("a", "b"));
336 ASSERT_OK(Put("c", "d"));
337 ASSERT_OK(Put("e", "f"));
338 auto iter = NewIterator(ReadOptions());
339 auto iter2 = NewIterator(ReadOptions());
340 iter->Seek(Slice("c"));
341 iter2->SeekForPrev(Slice("d"));
342 ASSERT_EQ(IterStatus(iter), "c->d");
343 ASSERT_EQ(IterStatus(iter2), "c->d");
344
345 // Create a key that needs to be skipped for Seq too new
346 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
347 i++) {
348 ASSERT_OK(Put("b", "f"));
349 }
350
351 iter->Prev();
352 ASSERT_EQ(IterStatus(iter), "a->b");
353 iter->Prev();
354 iter2->Prev();
355 ASSERT_EQ(IterStatus(iter2), "a->b");
356 iter2->Prev();
357 delete iter;
358 delete iter2;
359 }
360
361 TEST_P(DBIteratorTest, IterEmpty) {
362 do {
363 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
364 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
365
366 iter->SeekToFirst();
367 ASSERT_EQ(IterStatus(iter), "(invalid)");
368
369 iter->SeekToLast();
370 ASSERT_EQ(IterStatus(iter), "(invalid)");
371
372 iter->Seek("foo");
373 ASSERT_EQ(IterStatus(iter), "(invalid)");
374
375 iter->SeekForPrev("foo");
376 ASSERT_EQ(IterStatus(iter), "(invalid)");
377
378 delete iter;
379 } while (ChangeCompactOptions());
380 }
381
382 TEST_P(DBIteratorTest, IterSingle) {
383 do {
384 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
385 ASSERT_OK(Put(1, "a", "va"));
386 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
387
388 iter->SeekToFirst();
389 ASSERT_EQ(IterStatus(iter), "a->va");
390 iter->Next();
391 ASSERT_EQ(IterStatus(iter), "(invalid)");
392 iter->SeekToFirst();
393 ASSERT_EQ(IterStatus(iter), "a->va");
394 iter->Prev();
395 ASSERT_EQ(IterStatus(iter), "(invalid)");
396
397 iter->SeekToLast();
398 ASSERT_EQ(IterStatus(iter), "a->va");
399 iter->Next();
400 ASSERT_EQ(IterStatus(iter), "(invalid)");
401 iter->SeekToLast();
402 ASSERT_EQ(IterStatus(iter), "a->va");
403 iter->Prev();
404 ASSERT_EQ(IterStatus(iter), "(invalid)");
405
406 iter->Seek("");
407 ASSERT_EQ(IterStatus(iter), "a->va");
408 iter->Next();
409 ASSERT_EQ(IterStatus(iter), "(invalid)");
410 iter->SeekForPrev("");
411 ASSERT_EQ(IterStatus(iter), "(invalid)");
412
413 iter->Seek("a");
414 ASSERT_EQ(IterStatus(iter), "a->va");
415 iter->Next();
416 ASSERT_EQ(IterStatus(iter), "(invalid)");
417 iter->SeekForPrev("a");
418 ASSERT_EQ(IterStatus(iter), "a->va");
419 iter->Prev();
420 ASSERT_EQ(IterStatus(iter), "(invalid)");
421
422 iter->Seek("b");
423 ASSERT_EQ(IterStatus(iter), "(invalid)");
424 iter->SeekForPrev("b");
425 ASSERT_EQ(IterStatus(iter), "a->va");
426 iter->Prev();
427 ASSERT_EQ(IterStatus(iter), "(invalid)");
428
429 delete iter;
430 } while (ChangeCompactOptions());
431 }
432
433 TEST_P(DBIteratorTest, IterMulti) {
434 do {
435 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
436 ASSERT_OK(Put(1, "a", "va"));
437 ASSERT_OK(Put(1, "b", "vb"));
438 ASSERT_OK(Put(1, "c", "vc"));
439 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
440
441 iter->SeekToFirst();
442 ASSERT_EQ(IterStatus(iter), "a->va");
443 iter->Next();
444 ASSERT_EQ(IterStatus(iter), "b->vb");
445 iter->Next();
446 ASSERT_EQ(IterStatus(iter), "c->vc");
447 iter->Next();
448 ASSERT_EQ(IterStatus(iter), "(invalid)");
449 iter->SeekToFirst();
450 ASSERT_EQ(IterStatus(iter), "a->va");
451 iter->Prev();
452 ASSERT_EQ(IterStatus(iter), "(invalid)");
453
454 iter->SeekToLast();
455 ASSERT_EQ(IterStatus(iter), "c->vc");
456 iter->Prev();
457 ASSERT_EQ(IterStatus(iter), "b->vb");
458 iter->Prev();
459 ASSERT_EQ(IterStatus(iter), "a->va");
460 iter->Prev();
461 ASSERT_EQ(IterStatus(iter), "(invalid)");
462 iter->SeekToLast();
463 ASSERT_EQ(IterStatus(iter), "c->vc");
464 iter->Next();
465 ASSERT_EQ(IterStatus(iter), "(invalid)");
466
467 iter->Seek("");
468 ASSERT_EQ(IterStatus(iter), "a->va");
469 iter->Seek("a");
470 ASSERT_EQ(IterStatus(iter), "a->va");
471 iter->Seek("ax");
472 ASSERT_EQ(IterStatus(iter), "b->vb");
473 iter->SeekForPrev("d");
474 ASSERT_EQ(IterStatus(iter), "c->vc");
475 iter->SeekForPrev("c");
476 ASSERT_EQ(IterStatus(iter), "c->vc");
477 iter->SeekForPrev("bx");
478 ASSERT_EQ(IterStatus(iter), "b->vb");
479
480 iter->Seek("b");
481 ASSERT_EQ(IterStatus(iter), "b->vb");
482 iter->Seek("z");
483 ASSERT_EQ(IterStatus(iter), "(invalid)");
484 iter->SeekForPrev("b");
485 ASSERT_EQ(IterStatus(iter), "b->vb");
486 iter->SeekForPrev("");
487 ASSERT_EQ(IterStatus(iter), "(invalid)");
488
489 // Switch from reverse to forward
490 iter->SeekToLast();
491 iter->Prev();
492 iter->Prev();
493 iter->Next();
494 ASSERT_EQ(IterStatus(iter), "b->vb");
495
496 // Switch from forward to reverse
497 iter->SeekToFirst();
498 iter->Next();
499 iter->Next();
500 iter->Prev();
501 ASSERT_EQ(IterStatus(iter), "b->vb");
502
503 // Make sure iter stays at snapshot
504 ASSERT_OK(Put(1, "a", "va2"));
505 ASSERT_OK(Put(1, "a2", "va3"));
506 ASSERT_OK(Put(1, "b", "vb2"));
507 ASSERT_OK(Put(1, "c", "vc2"));
508 ASSERT_OK(Delete(1, "b"));
509 iter->SeekToFirst();
510 ASSERT_EQ(IterStatus(iter), "a->va");
511 iter->Next();
512 ASSERT_EQ(IterStatus(iter), "b->vb");
513 iter->Next();
514 ASSERT_EQ(IterStatus(iter), "c->vc");
515 iter->Next();
516 ASSERT_EQ(IterStatus(iter), "(invalid)");
517 iter->SeekToLast();
518 ASSERT_EQ(IterStatus(iter), "c->vc");
519 iter->Prev();
520 ASSERT_EQ(IterStatus(iter), "b->vb");
521 iter->Prev();
522 ASSERT_EQ(IterStatus(iter), "a->va");
523 iter->Prev();
524 ASSERT_EQ(IterStatus(iter), "(invalid)");
525
526 delete iter;
527 } while (ChangeCompactOptions());
528 }
529
530 // Check that we can skip over a run of user keys
531 // by using reseek rather than sequential scan
532 TEST_P(DBIteratorTest, IterReseek) {
533 anon::OptionsOverride options_override;
534 options_override.skip_policy = kSkipNoSnapshot;
535 Options options = CurrentOptions(options_override);
536 options.max_sequential_skip_in_iterations = 3;
537 options.create_if_missing = true;
538 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
539 DestroyAndReopen(options);
540 CreateAndReopenWithCF({"pikachu"}, options);
541
542 // insert three keys with same userkey and verify that
543 // reseek is not invoked. For each of these test cases,
544 // verify that we can find the next key "b".
545 ASSERT_OK(Put(1, "a", "zero"));
546 ASSERT_OK(Put(1, "a", "one"));
547 ASSERT_OK(Put(1, "a", "two"));
548 ASSERT_OK(Put(1, "b", "bone"));
549 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
550 iter->SeekToFirst();
551 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
552 ASSERT_EQ(IterStatus(iter), "a->two");
553 iter->Next();
554 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
555 ASSERT_EQ(IterStatus(iter), "b->bone");
556 delete iter;
557
558 // insert a total of three keys with same userkey and verify
559 // that reseek is still not invoked.
560 ASSERT_OK(Put(1, "a", "three"));
561 iter = NewIterator(ReadOptions(), handles_[1]);
562 iter->SeekToFirst();
563 ASSERT_EQ(IterStatus(iter), "a->three");
564 iter->Next();
565 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
566 ASSERT_EQ(IterStatus(iter), "b->bone");
567 delete iter;
568
569 // insert a total of four keys with same userkey and verify
570 // that reseek is invoked.
571 ASSERT_OK(Put(1, "a", "four"));
572 iter = NewIterator(ReadOptions(), handles_[1]);
573 iter->SeekToFirst();
574 ASSERT_EQ(IterStatus(iter), "a->four");
575 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
576 iter->Next();
577 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
578 ASSERT_EQ(IterStatus(iter), "b->bone");
579 delete iter;
580
581 // Testing reverse iterator
582 // At this point, we have three versions of "a" and one version of "b".
583 // The reseek statistics is already at 1.
584 int num_reseeks = static_cast<int>(
585 TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
586
587 // Insert another version of b and assert that reseek is not invoked
588 ASSERT_OK(Put(1, "b", "btwo"));
589 iter = NewIterator(ReadOptions(), handles_[1]);
590 iter->SeekToLast();
591 ASSERT_EQ(IterStatus(iter), "b->btwo");
592 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
593 num_reseeks);
594 iter->Prev();
595 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
596 num_reseeks + 1);
597 ASSERT_EQ(IterStatus(iter), "a->four");
598 delete iter;
599
600 // insert two more versions of b. This makes a total of 4 versions
601 // of b and 4 versions of a.
602 ASSERT_OK(Put(1, "b", "bthree"));
603 ASSERT_OK(Put(1, "b", "bfour"));
604 iter = NewIterator(ReadOptions(), handles_[1]);
605 iter->SeekToLast();
606 ASSERT_EQ(IterStatus(iter), "b->bfour");
607 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
608 num_reseeks + 2);
609 iter->Prev();
610
611 // the previous Prev call should have invoked reseek
612 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
613 num_reseeks + 3);
614 ASSERT_EQ(IterStatus(iter), "a->four");
615 delete iter;
616 }
617
618 TEST_P(DBIteratorTest, IterSmallAndLargeMix) {
619 do {
620 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
621 ASSERT_OK(Put(1, "a", "va"));
622 ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
623 ASSERT_OK(Put(1, "c", "vc"));
624 ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
625 ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
626
627 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
628
629 iter->SeekToFirst();
630 ASSERT_EQ(IterStatus(iter), "a->va");
631 iter->Next();
632 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
633 iter->Next();
634 ASSERT_EQ(IterStatus(iter), "c->vc");
635 iter->Next();
636 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
637 iter->Next();
638 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
639 iter->Next();
640 ASSERT_EQ(IterStatus(iter), "(invalid)");
641
642 iter->SeekToLast();
643 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
644 iter->Prev();
645 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
646 iter->Prev();
647 ASSERT_EQ(IterStatus(iter), "c->vc");
648 iter->Prev();
649 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
650 iter->Prev();
651 ASSERT_EQ(IterStatus(iter), "a->va");
652 iter->Prev();
653 ASSERT_EQ(IterStatus(iter), "(invalid)");
654
655 delete iter;
656 } while (ChangeCompactOptions());
657 }
658
659 TEST_P(DBIteratorTest, IterMultiWithDelete) {
660 do {
661 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
662 ASSERT_OK(Put(1, "ka", "va"));
663 ASSERT_OK(Put(1, "kb", "vb"));
664 ASSERT_OK(Put(1, "kc", "vc"));
665 ASSERT_OK(Delete(1, "kb"));
666 ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
667
668 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
669 iter->Seek("kc");
670 ASSERT_EQ(IterStatus(iter), "kc->vc");
671 if (!CurrentOptions().merge_operator) {
672 // TODO: merge operator does not support backward iteration yet
673 if (kPlainTableAllBytesPrefix != option_config_ &&
674 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
675 kHashLinkList != option_config_ &&
676 kHashSkipList != option_config_) { // doesn't support SeekToLast
677 iter->Prev();
678 ASSERT_EQ(IterStatus(iter), "ka->va");
679 }
680 }
681 delete iter;
682 } while (ChangeOptions());
683 }
684
685 TEST_P(DBIteratorTest, IterPrevMaxSkip) {
686 do {
687 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
688 for (int i = 0; i < 2; i++) {
689 ASSERT_OK(Put(1, "key1", "v1"));
690 ASSERT_OK(Put(1, "key2", "v2"));
691 ASSERT_OK(Put(1, "key3", "v3"));
692 ASSERT_OK(Put(1, "key4", "v4"));
693 ASSERT_OK(Put(1, "key5", "v5"));
694 }
695
696 VerifyIterLast("key5->v5", 1);
697
698 ASSERT_OK(Delete(1, "key5"));
699 VerifyIterLast("key4->v4", 1);
700
701 ASSERT_OK(Delete(1, "key4"));
702 VerifyIterLast("key3->v3", 1);
703
704 ASSERT_OK(Delete(1, "key3"));
705 VerifyIterLast("key2->v2", 1);
706
707 ASSERT_OK(Delete(1, "key2"));
708 VerifyIterLast("key1->v1", 1);
709
710 ASSERT_OK(Delete(1, "key1"));
711 VerifyIterLast("(invalid)", 1);
712 } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
713 }
714
715 TEST_P(DBIteratorTest, IterWithSnapshot) {
716 anon::OptionsOverride options_override;
717 options_override.skip_policy = kSkipNoSnapshot;
718 do {
719 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
720 ASSERT_OK(Put(1, "key1", "val1"));
721 ASSERT_OK(Put(1, "key2", "val2"));
722 ASSERT_OK(Put(1, "key3", "val3"));
723 ASSERT_OK(Put(1, "key4", "val4"));
724 ASSERT_OK(Put(1, "key5", "val5"));
725
726 const Snapshot* snapshot = db_->GetSnapshot();
727 ReadOptions options;
728 options.snapshot = snapshot;
729 Iterator* iter = NewIterator(options, handles_[1]);
730
731 ASSERT_OK(Put(1, "key0", "val0"));
732 // Put more values after the snapshot
733 ASSERT_OK(Put(1, "key100", "val100"));
734 ASSERT_OK(Put(1, "key101", "val101"));
735
736 iter->Seek("key5");
737 ASSERT_EQ(IterStatus(iter), "key5->val5");
738 if (!CurrentOptions().merge_operator) {
739 // TODO: merge operator does not support backward iteration yet
740 if (kPlainTableAllBytesPrefix != option_config_ &&
741 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
742 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
743 iter->Prev();
744 ASSERT_EQ(IterStatus(iter), "key4->val4");
745 iter->Prev();
746 ASSERT_EQ(IterStatus(iter), "key3->val3");
747
748 iter->Next();
749 ASSERT_EQ(IterStatus(iter), "key4->val4");
750 iter->Next();
751 ASSERT_EQ(IterStatus(iter), "key5->val5");
752 }
753 iter->Next();
754 ASSERT_TRUE(!iter->Valid());
755 }
756
757 if (!CurrentOptions().merge_operator) {
758 // TODO(gzh): merge operator does not support backward iteration yet
759 if (kPlainTableAllBytesPrefix != option_config_ &&
760 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
761 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
762 iter->SeekForPrev("key1");
763 ASSERT_EQ(IterStatus(iter), "key1->val1");
764 iter->Next();
765 ASSERT_EQ(IterStatus(iter), "key2->val2");
766 iter->Next();
767 ASSERT_EQ(IterStatus(iter), "key3->val3");
768 iter->Prev();
769 ASSERT_EQ(IterStatus(iter), "key2->val2");
770 iter->Prev();
771 ASSERT_EQ(IterStatus(iter), "key1->val1");
772 iter->Prev();
773 ASSERT_TRUE(!iter->Valid());
774 }
775 }
776 db_->ReleaseSnapshot(snapshot);
777 delete iter;
778 } while (ChangeOptions());
779 }
780
781 TEST_P(DBIteratorTest, IteratorPinsRef) {
782 do {
783 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
784 Put(1, "foo", "hello");
785
786 // Get iterator that will yield the current contents of the DB.
787 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
788
789 // Write to force compactions
790 Put(1, "foo", "newvalue1");
791 for (int i = 0; i < 100; i++) {
792 // 100K values
793 ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
794 }
795 Put(1, "foo", "newvalue2");
796
797 iter->SeekToFirst();
798 ASSERT_TRUE(iter->Valid());
799 ASSERT_EQ("foo", iter->key().ToString());
800 ASSERT_EQ("hello", iter->value().ToString());
801 iter->Next();
802 ASSERT_TRUE(!iter->Valid());
803 delete iter;
804 } while (ChangeCompactOptions());
805 }
806
807 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) {
808 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
809
810 Put(1, "foo", "delete-cf-then-delete-iter");
811 Put(1, "hello", "value2");
812
813 ColumnFamilyHandle* cf = handles_[1];
814 ReadOptions ro;
815
816 auto* iter = db_->NewIterator(ro, cf);
817 iter->SeekToFirst();
818 ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter");
819
820 // delete CF handle
821 db_->DestroyColumnFamilyHandle(cf);
822 handles_.erase(std::begin(handles_) + 1);
823
824 // delete Iterator after CF handle is deleted
825 iter->Next();
826 ASSERT_EQ(IterStatus(iter), "hello->value2");
827 delete iter;
828 }
829
830 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) {
831 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
832
833 Put(1, "foo", "drop-cf-then-delete-iter");
834
835 ReadOptions ro;
836 ColumnFamilyHandle* cf = handles_[1];
837
838 auto* iter = db_->NewIterator(ro, cf);
839 iter->SeekToFirst();
840 ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter");
841
842 // drop and delete CF
843 db_->DropColumnFamily(cf);
844 db_->DestroyColumnFamilyHandle(cf);
845 handles_.erase(std::begin(handles_) + 1);
846
847 // delete Iterator after CF handle is dropped
848 delete iter;
849 }
850
851 // SetOptions not defined in ROCKSDB LITE
852 #ifndef ROCKSDB_LITE
853 TEST_P(DBIteratorTest, DBIteratorBoundTest) {
854 Options options = CurrentOptions();
855 options.env = env_;
856 options.create_if_missing = true;
857
858 options.prefix_extractor = nullptr;
859 DestroyAndReopen(options);
860 ASSERT_OK(Put("a", "0"));
861 ASSERT_OK(Put("foo", "bar"));
862 ASSERT_OK(Put("foo1", "bar1"));
863 ASSERT_OK(Put("g1", "0"));
864
865 // testing basic case with no iterate_upper_bound and no prefix_extractor
866 {
867 ReadOptions ro;
868 ro.iterate_upper_bound = nullptr;
869
870 std::unique_ptr<Iterator> iter(NewIterator(ro));
871
872 iter->Seek("foo");
873
874 ASSERT_TRUE(iter->Valid());
875 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
876
877 iter->Next();
878 ASSERT_TRUE(iter->Valid());
879 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
880
881 iter->Next();
882 ASSERT_TRUE(iter->Valid());
883 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
884
885 iter->SeekForPrev("g1");
886
887 ASSERT_TRUE(iter->Valid());
888 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
889
890 iter->Prev();
891 ASSERT_TRUE(iter->Valid());
892 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
893
894 iter->Prev();
895 ASSERT_TRUE(iter->Valid());
896 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
897 }
898
899 // testing iterate_upper_bound and forward iterator
900 // to make sure it stops at bound
901 {
902 ReadOptions ro;
903 // iterate_upper_bound points beyond the last expected entry
904 Slice prefix("foo2");
905 ro.iterate_upper_bound = &prefix;
906
907 std::unique_ptr<Iterator> iter(NewIterator(ro));
908
909 iter->Seek("foo");
910
911 ASSERT_TRUE(iter->Valid());
912 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
913
914 iter->Next();
915 ASSERT_TRUE(iter->Valid());
916 ASSERT_EQ(iter->key().compare(("foo1")), 0);
917
918 iter->Next();
919 // should stop here...
920 ASSERT_TRUE(!iter->Valid());
921 }
922 // Testing SeekToLast with iterate_upper_bound set
923 {
924 ReadOptions ro;
925
926 Slice prefix("foo");
927 ro.iterate_upper_bound = &prefix;
928
929 std::unique_ptr<Iterator> iter(NewIterator(ro));
930
931 iter->SeekToLast();
932 ASSERT_TRUE(iter->Valid());
933 ASSERT_EQ(iter->key().compare(Slice("a")), 0);
934 }
935
936 // prefix is the first letter of the key
937 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
938 ASSERT_OK(Put("a", "0"));
939 ASSERT_OK(Put("foo", "bar"));
940 ASSERT_OK(Put("foo1", "bar1"));
941 ASSERT_OK(Put("g1", "0"));
942
943 // testing with iterate_upper_bound and prefix_extractor
944 // Seek target and iterate_upper_bound are not is same prefix
945 // This should be an error
946 {
947 ReadOptions ro;
948 Slice upper_bound("g");
949 ro.iterate_upper_bound = &upper_bound;
950
951 std::unique_ptr<Iterator> iter(NewIterator(ro));
952
953 iter->Seek("foo");
954
955 ASSERT_TRUE(iter->Valid());
956 ASSERT_EQ("foo", iter->key().ToString());
957
958 iter->Next();
959 ASSERT_TRUE(iter->Valid());
960 ASSERT_EQ("foo1", iter->key().ToString());
961
962 iter->Next();
963 ASSERT_TRUE(!iter->Valid());
964 }
965
966 // testing that iterate_upper_bound prevents iterating over deleted items
967 // if the bound has already reached
968 {
969 options.prefix_extractor = nullptr;
970 DestroyAndReopen(options);
971 ASSERT_OK(Put("a", "0"));
972 ASSERT_OK(Put("b", "0"));
973 ASSERT_OK(Put("b1", "0"));
974 ASSERT_OK(Put("c", "0"));
975 ASSERT_OK(Put("d", "0"));
976 ASSERT_OK(Put("e", "0"));
977 ASSERT_OK(Delete("c"));
978 ASSERT_OK(Delete("d"));
979
980 // base case with no bound
981 ReadOptions ro;
982 ro.iterate_upper_bound = nullptr;
983
984 std::unique_ptr<Iterator> iter(NewIterator(ro));
985
986 iter->Seek("b");
987 ASSERT_TRUE(iter->Valid());
988 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
989
990 iter->Next();
991 ASSERT_TRUE(iter->Valid());
992 ASSERT_EQ(iter->key().compare(("b1")), 0);
993
994 get_perf_context()->Reset();
995 iter->Next();
996
997 ASSERT_TRUE(iter->Valid());
998 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 2);
999
1000 // now testing with iterate_bound
1001 Slice prefix("c");
1002 ro.iterate_upper_bound = &prefix;
1003
1004 iter.reset(NewIterator(ro));
1005
1006 get_perf_context()->Reset();
1007
1008 iter->Seek("b");
1009 ASSERT_TRUE(iter->Valid());
1010 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
1011
1012 iter->Next();
1013 ASSERT_TRUE(iter->Valid());
1014 ASSERT_EQ(iter->key().compare(("b1")), 0);
1015
1016 iter->Next();
1017 // the iteration should stop as soon as the bound key is reached
1018 // even though the key is deleted
1019 // hence internal_delete_skipped_count should be 0
1020 ASSERT_TRUE(!iter->Valid());
1021 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 0);
1022 }
1023 }
1024
1025 TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
1026 Options options = CurrentOptions();
1027 options.env = env_;
1028 options.create_if_missing = true;
1029 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1030 options.prefix_extractor = nullptr;
1031 DestroyAndReopen(options);
1032 ASSERT_OK(Put("a", "0"));
1033 ASSERT_OK(Put("z", "0"));
1034 ASSERT_OK(Flush());
1035 ASSERT_OK(Put("foo1", "bar1"));
1036 ASSERT_OK(Put("foo2", "bar2"));
1037 ASSERT_OK(Put("foo3", "bar3"));
1038 ASSERT_OK(Put("foo4", "bar4"));
1039
1040 {
1041 std::string up_str = "foo5";
1042 Slice up(up_str);
1043 ReadOptions ro;
1044 ro.iterate_upper_bound = &up;
1045 std::unique_ptr<Iterator> iter(NewIterator(ro));
1046
1047 iter->Seek("foo1");
1048 ASSERT_TRUE(iter->Valid());
1049 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1050
1051 uint64_t prev_block_cache_hit =
1052 TestGetTickerCount(options, BLOCK_CACHE_HIT);
1053 uint64_t prev_block_cache_miss =
1054 TestGetTickerCount(options, BLOCK_CACHE_MISS);
1055
1056 ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0);
1057
1058 iter->Seek("foo4");
1059 ASSERT_TRUE(iter->Valid());
1060 ASSERT_EQ(iter->key().compare(Slice("foo4")), 0);
1061 ASSERT_EQ(prev_block_cache_hit,
1062 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1063 ASSERT_EQ(prev_block_cache_miss,
1064 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1065
1066 iter->Seek("foo2");
1067 ASSERT_TRUE(iter->Valid());
1068 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1069 iter->Next();
1070 ASSERT_TRUE(iter->Valid());
1071 ASSERT_EQ(iter->key().compare(Slice("foo3")), 0);
1072 ASSERT_EQ(prev_block_cache_hit,
1073 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1074 ASSERT_EQ(prev_block_cache_miss,
1075 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1076 }
1077 }
1078 #endif
1079
1080 TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
1081 for (auto format_version : {2, 3, 4}) {
1082 int upper_bound_hits = 0;
1083 Options options = CurrentOptions();
1084 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1085 "BlockBasedTableIterator:out_of_bound",
1086 [&upper_bound_hits](void*) { upper_bound_hits++; });
1087 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1088 options.env = env_;
1089 options.create_if_missing = true;
1090 options.prefix_extractor = nullptr;
1091 BlockBasedTableOptions table_options;
1092 table_options.format_version = format_version;
1093 table_options.flush_block_policy_factory =
1094 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1095 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1096
1097 DestroyAndReopen(options);
1098 ASSERT_OK(Put("foo1", "bar1"));
1099 ASSERT_OK(Put("foo2", "bar2"));
1100 ASSERT_OK(Put("foo4", "bar4"));
1101 ASSERT_OK(Flush());
1102
1103 Slice ub("foo3");
1104 ReadOptions ro;
1105 ro.iterate_upper_bound = &ub;
1106
1107 std::unique_ptr<Iterator> iter(NewIterator(ro));
1108
1109 iter->Seek("foo");
1110 ASSERT_TRUE(iter->Valid());
1111 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1112 ASSERT_EQ(upper_bound_hits, 0);
1113
1114 iter->Next();
1115 ASSERT_TRUE(iter->Valid());
1116 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1117 ASSERT_EQ(upper_bound_hits, 0);
1118
1119 iter->Next();
1120 ASSERT_FALSE(iter->Valid());
1121 ASSERT_EQ(upper_bound_hits, 1);
1122 }
1123 }
1124
1125 // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
1126 // they don't do unnecessary block reads.
1127 TEST_P(DBIteratorTest, IndexWithFirstKey) {
1128 for (int tailing = 0; tailing < 2; ++tailing) {
1129 SCOPED_TRACE("tailing = " + std::to_string(tailing));
1130 Options options = CurrentOptions();
1131 options.env = env_;
1132 options.create_if_missing = true;
1133 options.prefix_extractor = nullptr;
1134 options.merge_operator = MergeOperators::CreateStringAppendOperator();
1135 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1136 Statistics* stats = options.statistics.get();
1137 BlockBasedTableOptions table_options;
1138 table_options.index_type =
1139 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1140 table_options.index_shortening =
1141 BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1142 table_options.flush_block_policy_factory =
1143 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1144 table_options.block_cache =
1145 NewLRUCache(8000); // fits all blocks and their cache metadata overhead
1146 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1147
1148 DestroyAndReopen(options);
1149 ASSERT_OK(Merge("a1", "x1"));
1150 ASSERT_OK(Merge("b1", "y1"));
1151 ASSERT_OK(Merge("c0", "z1"));
1152 ASSERT_OK(Flush());
1153 ASSERT_OK(Merge("a2", "x2"));
1154 ASSERT_OK(Merge("b2", "y2"));
1155 ASSERT_OK(Merge("c0", "z2"));
1156 ASSERT_OK(Flush());
1157 ASSERT_OK(Merge("a3", "x3"));
1158 ASSERT_OK(Merge("b3", "y3"));
1159 ASSERT_OK(Merge("c3", "z3"));
1160 ASSERT_OK(Flush());
1161
1162 // Block cache is not important for this test.
1163 // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
1164 // available way of counting block accesses.
1165
1166 ReadOptions ropt;
1167 ropt.tailing = tailing;
1168 std::unique_ptr<Iterator> iter(NewIterator(ropt));
1169
1170 iter->Seek("b10");
1171 ASSERT_TRUE(iter->Valid());
1172 EXPECT_EQ("b2", iter->key().ToString());
1173 EXPECT_EQ("y2", iter->value().ToString());
1174 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1175
1176 iter->Next();
1177 ASSERT_TRUE(iter->Valid());
1178 EXPECT_EQ("b3", iter->key().ToString());
1179 EXPECT_EQ("y3", iter->value().ToString());
1180 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1181 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1182
1183 iter->Seek("c0");
1184 ASSERT_TRUE(iter->Valid());
1185 EXPECT_EQ("c0", iter->key().ToString());
1186 EXPECT_EQ("z1,z2", iter->value().ToString());
1187 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1188 EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1189
1190 iter->Next();
1191 ASSERT_TRUE(iter->Valid());
1192 EXPECT_EQ("c3", iter->key().ToString());
1193 EXPECT_EQ("z3", iter->value().ToString());
1194 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1195 EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1196
1197 iter.reset();
1198
1199 // Enable iterate_upper_bound and check that iterator is not trying to read
1200 // blocks that are fully above upper bound.
1201 std::string ub = "b3";
1202 Slice ub_slice(ub);
1203 ropt.iterate_upper_bound = &ub_slice;
1204 iter.reset(NewIterator(ropt));
1205
1206 iter->Seek("b2");
1207 ASSERT_TRUE(iter->Valid());
1208 EXPECT_EQ("b2", iter->key().ToString());
1209 EXPECT_EQ("y2", iter->value().ToString());
1210 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1211 EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1212
1213 iter->Next();
1214 ASSERT_FALSE(iter->Valid());
1215 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1216 EXPECT_EQ(5, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1217 }
1218 }
1219
1220 TEST_P(DBIteratorTest, IndexWithFirstKeyGet) {
1221 Options options = CurrentOptions();
1222 options.env = env_;
1223 options.create_if_missing = true;
1224 options.prefix_extractor = nullptr;
1225 options.merge_operator = MergeOperators::CreateStringAppendOperator();
1226 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1227 Statistics* stats = options.statistics.get();
1228 BlockBasedTableOptions table_options;
1229 table_options.index_type =
1230 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1231 table_options.index_shortening =
1232 BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1233 table_options.flush_block_policy_factory =
1234 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1235 table_options.block_cache = NewLRUCache(1000); // fits all blocks
1236 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1237
1238 DestroyAndReopen(options);
1239 ASSERT_OK(Merge("a", "x1"));
1240 ASSERT_OK(Merge("c", "y1"));
1241 ASSERT_OK(Merge("e", "z1"));
1242 ASSERT_OK(Flush());
1243 ASSERT_OK(Merge("c", "y2"));
1244 ASSERT_OK(Merge("e", "z2"));
1245 ASSERT_OK(Flush());
1246
1247 // Get() between blocks shouldn't read any blocks.
1248 ASSERT_EQ("NOT_FOUND", Get("b"));
1249 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1250 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1251
1252 // Get() of an existing key shouldn't read any unnecessary blocks when there's
1253 // only one key per block.
1254
1255 ASSERT_EQ("y1,y2", Get("c"));
1256 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1257 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1258
1259 ASSERT_EQ("x1", Get("a"));
1260 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1261 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1262
1263 EXPECT_EQ(std::vector<std::string>({"NOT_FOUND", "z1,z2"}),
1264 MultiGet({"b", "e"}));
1265 }
1266
1267 // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1268 // return the biggest key which is smaller than the seek key.
1269 TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) {
1270 Options options;
1271 options.create_if_missing = true;
1272 options.merge_operator = MergeOperators::CreatePutOperator();
1273 options.env = env_;
1274 DestroyAndReopen(options);
1275
1276 // write three entries with different keys using Merge()
1277 WriteOptions wopts;
1278 db_->Merge(wopts, "1", "data1");
1279 db_->Merge(wopts, "2", "data2");
1280 db_->Merge(wopts, "3", "data3");
1281
1282 std::unique_ptr<Iterator> it(NewIterator(ReadOptions()));
1283
1284 it->Seek("2");
1285 ASSERT_TRUE(it->Valid());
1286 ASSERT_EQ("2", it->key().ToString());
1287
1288 it->Prev();
1289 ASSERT_TRUE(it->Valid());
1290 ASSERT_EQ("1", it->key().ToString());
1291
1292 it->SeekForPrev("1");
1293 ASSERT_TRUE(it->Valid());
1294 ASSERT_EQ("1", it->key().ToString());
1295
1296 it->Next();
1297 ASSERT_TRUE(it->Valid());
1298 ASSERT_EQ("2", it->key().ToString());
1299 }
1300
1301 class DBIteratorTestForPinnedData : public DBIteratorTest {
1302 public:
1303 enum TestConfig {
1304 NORMAL,
1305 CLOSE_AND_OPEN,
1306 COMPACT_BEFORE_READ,
1307 FLUSH_EVERY_1000,
1308 MAX
1309 };
1310 DBIteratorTestForPinnedData() : DBIteratorTest() {}
1311 void PinnedDataIteratorRandomized(TestConfig run_config) {
1312 // Generate Random data
1313 Random rnd(301);
1314
1315 int puts = 100000;
1316 int key_pool = static_cast<int>(puts * 0.7);
1317 int key_size = 100;
1318 int val_size = 1000;
1319 int seeks_percentage = 20; // 20% of keys will be used to test seek()
1320 int delete_percentage = 20; // 20% of keys will be deleted
1321 int merge_percentage = 20; // 20% of keys will be added using Merge()
1322
1323 Options options = CurrentOptions();
1324 BlockBasedTableOptions table_options;
1325 table_options.use_delta_encoding = false;
1326 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1327 options.merge_operator = MergeOperators::CreatePutOperator();
1328 DestroyAndReopen(options);
1329
1330 std::vector<std::string> generated_keys(key_pool);
1331 for (int i = 0; i < key_pool; i++) {
1332 generated_keys[i] = RandomString(&rnd, key_size);
1333 }
1334
1335 std::map<std::string, std::string> true_data;
1336 std::vector<std::string> random_keys;
1337 std::vector<std::string> deleted_keys;
1338 for (int i = 0; i < puts; i++) {
1339 auto& k = generated_keys[rnd.Next() % key_pool];
1340 auto v = RandomString(&rnd, val_size);
1341
1342 // Insert data to true_data map and to DB
1343 true_data[k] = v;
1344 if (rnd.PercentTrue(merge_percentage)) {
1345 ASSERT_OK(db_->Merge(WriteOptions(), k, v));
1346 } else {
1347 ASSERT_OK(Put(k, v));
1348 }
1349
1350 // Pick random keys to be used to test Seek()
1351 if (rnd.PercentTrue(seeks_percentage)) {
1352 random_keys.push_back(k);
1353 }
1354
1355 // Delete some random keys
1356 if (rnd.PercentTrue(delete_percentage)) {
1357 deleted_keys.push_back(k);
1358 true_data.erase(k);
1359 ASSERT_OK(Delete(k));
1360 }
1361
1362 if (run_config == TestConfig::FLUSH_EVERY_1000) {
1363 if (i && i % 1000 == 0) {
1364 Flush();
1365 }
1366 }
1367 }
1368
1369 if (run_config == TestConfig::CLOSE_AND_OPEN) {
1370 Close();
1371 Reopen(options);
1372 } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
1373 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
1374 }
1375
1376 ReadOptions ro;
1377 ro.pin_data = true;
1378 auto iter = NewIterator(ro);
1379
1380 {
1381 // Test Seek to random keys
1382 std::vector<Slice> keys_slices;
1383 std::vector<std::string> true_keys;
1384 for (auto& k : random_keys) {
1385 iter->Seek(k);
1386 if (!iter->Valid()) {
1387 ASSERT_EQ(true_data.lower_bound(k), true_data.end());
1388 continue;
1389 }
1390 std::string prop_value;
1391 ASSERT_OK(
1392 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1393 ASSERT_EQ("1", prop_value);
1394 keys_slices.push_back(iter->key());
1395 true_keys.push_back(true_data.lower_bound(k)->first);
1396 }
1397
1398 for (size_t i = 0; i < keys_slices.size(); i++) {
1399 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1400 }
1401 }
1402
1403 {
1404 // Test SeekForPrev to random keys
1405 std::vector<Slice> keys_slices;
1406 std::vector<std::string> true_keys;
1407 for (auto& k : random_keys) {
1408 iter->SeekForPrev(k);
1409 if (!iter->Valid()) {
1410 ASSERT_EQ(true_data.upper_bound(k), true_data.begin());
1411 continue;
1412 }
1413 std::string prop_value;
1414 ASSERT_OK(
1415 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1416 ASSERT_EQ("1", prop_value);
1417 keys_slices.push_back(iter->key());
1418 true_keys.push_back((--true_data.upper_bound(k))->first);
1419 }
1420
1421 for (size_t i = 0; i < keys_slices.size(); i++) {
1422 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1423 }
1424 }
1425
1426 {
1427 // Test iterating all data forward
1428 std::vector<Slice> all_keys;
1429 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1430 std::string prop_value;
1431 ASSERT_OK(
1432 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1433 ASSERT_EQ("1", prop_value);
1434 all_keys.push_back(iter->key());
1435 }
1436 ASSERT_EQ(all_keys.size(), true_data.size());
1437
1438 // Verify that all keys slices are valid
1439 auto data_iter = true_data.begin();
1440 for (size_t i = 0; i < all_keys.size(); i++) {
1441 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1442 data_iter++;
1443 }
1444 }
1445
1446 {
1447 // Test iterating all data backward
1448 std::vector<Slice> all_keys;
1449 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1450 std::string prop_value;
1451 ASSERT_OK(
1452 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1453 ASSERT_EQ("1", prop_value);
1454 all_keys.push_back(iter->key());
1455 }
1456 ASSERT_EQ(all_keys.size(), true_data.size());
1457
1458 // Verify that all keys slices are valid (backward)
1459 auto data_iter = true_data.rbegin();
1460 for (size_t i = 0; i < all_keys.size(); i++) {
1461 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1462 data_iter++;
1463 }
1464 }
1465
1466 delete iter;
1467 }
1468 };
1469
1470 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) {
1471 PinnedDataIteratorRandomized(TestConfig::NORMAL);
1472 }
1473
1474 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) {
1475 PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN);
1476 }
1477
1478 TEST_P(DBIteratorTestForPinnedData,
1479 PinnedDataIteratorRandomizedCompactBeforeRead) {
1480 PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ);
1481 }
1482
1483 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) {
1484 PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000);
1485 }
1486
1487 #ifndef ROCKSDB_LITE
1488 TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
1489 Options options = CurrentOptions();
1490 BlockBasedTableOptions table_options;
1491 table_options.use_delta_encoding = false;
1492 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1493 options.disable_auto_compactions = true;
1494 options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb
1495 DestroyAndReopen(options);
1496
1497 std::map<std::string, std::string> true_data;
1498
1499 // Generate 4 sst files in L2
1500 Random rnd(301);
1501 for (int i = 1; i <= 1000; i++) {
1502 std::string k = Key(i * 3);
1503 std::string v = RandomString(&rnd, 100);
1504 ASSERT_OK(Put(k, v));
1505 true_data[k] = v;
1506 if (i % 250 == 0) {
1507 ASSERT_OK(Flush());
1508 }
1509 }
1510 ASSERT_EQ(FilesPerLevel(0), "4");
1511 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1512 ASSERT_EQ(FilesPerLevel(0), "0,4");
1513
1514 // Generate 4 sst files in L0
1515 for (int i = 1; i <= 1000; i++) {
1516 std::string k = Key(i * 2);
1517 std::string v = RandomString(&rnd, 100);
1518 ASSERT_OK(Put(k, v));
1519 true_data[k] = v;
1520 if (i % 250 == 0) {
1521 ASSERT_OK(Flush());
1522 }
1523 }
1524 ASSERT_EQ(FilesPerLevel(0), "4,4");
1525
1526 // Add some keys/values in memtables
1527 for (int i = 1; i <= 1000; i++) {
1528 std::string k = Key(i);
1529 std::string v = RandomString(&rnd, 100);
1530 ASSERT_OK(Put(k, v));
1531 true_data[k] = v;
1532 }
1533 ASSERT_EQ(FilesPerLevel(0), "4,4");
1534
1535 ReadOptions ro;
1536 ro.pin_data = true;
1537 auto iter = NewIterator(ro);
1538
1539 std::vector<std::pair<Slice, std::string>> results;
1540 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1541 std::string prop_value;
1542 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1543 ASSERT_EQ("1", prop_value);
1544 results.emplace_back(iter->key(), iter->value().ToString());
1545 }
1546
1547 ASSERT_EQ(results.size(), true_data.size());
1548 auto data_iter = true_data.begin();
1549 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1550 auto& kv = results[i];
1551 ASSERT_EQ(kv.first, data_iter->first);
1552 ASSERT_EQ(kv.second, data_iter->second);
1553 }
1554
1555 delete iter;
1556 }
1557 #endif
1558
1559 TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) {
1560 Options options = CurrentOptions();
1561 BlockBasedTableOptions table_options;
1562 table_options.use_delta_encoding = false;
1563 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1564 options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1565 DestroyAndReopen(options);
1566
1567 std::string numbers[7];
1568 for (int val = 0; val <= 6; val++) {
1569 PutFixed64(numbers + val, val);
1570 }
1571
1572 // +1 all keys in range [ 0 => 999]
1573 for (int i = 0; i < 1000; i++) {
1574 WriteOptions wo;
1575 ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
1576 }
1577
1578 // +2 all keys divisible by 2 in range [ 0 => 999]
1579 for (int i = 0; i < 1000; i += 2) {
1580 WriteOptions wo;
1581 ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
1582 }
1583
1584 // +3 all keys divisible by 5 in range [ 0 => 999]
1585 for (int i = 0; i < 1000; i += 5) {
1586 WriteOptions wo;
1587 ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
1588 }
1589
1590 ReadOptions ro;
1591 ro.pin_data = true;
1592 auto iter = NewIterator(ro);
1593
1594 std::vector<std::pair<Slice, std::string>> results;
1595 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1596 std::string prop_value;
1597 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1598 ASSERT_EQ("1", prop_value);
1599 results.emplace_back(iter->key(), iter->value().ToString());
1600 }
1601
1602 ASSERT_EQ(results.size(), 1000);
1603 for (size_t i = 0; i < results.size(); i++) {
1604 auto& kv = results[i];
1605 ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
1606 int expected_val = 1;
1607 if (i % 2 == 0) {
1608 expected_val += 2;
1609 }
1610 if (i % 5 == 0) {
1611 expected_val += 3;
1612 }
1613 ASSERT_EQ(kv.second, numbers[expected_val]);
1614 }
1615
1616 delete iter;
1617 }
1618
1619 TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
1620 Options options = CurrentOptions();
1621 BlockBasedTableOptions table_options;
1622 table_options.use_delta_encoding = false;
1623 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1624 options.write_buffer_size = 100000;
1625 DestroyAndReopen(options);
1626
1627 Random rnd(301);
1628
1629 std::map<std::string, std::string> true_data;
1630 for (int i = 0; i < 1000; i++) {
1631 std::string k = RandomString(&rnd, 10);
1632 std::string v = RandomString(&rnd, 1000);
1633 ASSERT_OK(Put(k, v));
1634 true_data[k] = v;
1635 }
1636
1637 ReadOptions ro;
1638 ro.pin_data = true;
1639 auto iter = NewIterator(ro);
1640
1641 // Delete 50% of the keys and update the other 50%
1642 for (auto& kv : true_data) {
1643 if (rnd.OneIn(2)) {
1644 ASSERT_OK(Delete(kv.first));
1645 } else {
1646 std::string new_val = RandomString(&rnd, 1000);
1647 ASSERT_OK(Put(kv.first, new_val));
1648 }
1649 }
1650
1651 std::vector<std::pair<Slice, std::string>> results;
1652 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1653 std::string prop_value;
1654 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1655 ASSERT_EQ("1", prop_value);
1656 results.emplace_back(iter->key(), iter->value().ToString());
1657 }
1658
1659 auto data_iter = true_data.begin();
1660 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1661 auto& kv = results[i];
1662 ASSERT_EQ(kv.first, data_iter->first);
1663 ASSERT_EQ(kv.second, data_iter->second);
1664 }
1665
1666 delete iter;
1667 }
1668
1669 class SliceTransformLimitedDomainGeneric : public SliceTransform {
1670 const char* Name() const override {
1671 return "SliceTransformLimitedDomainGeneric";
1672 }
1673
1674 Slice Transform(const Slice& src) const override {
1675 return Slice(src.data(), 1);
1676 }
1677
1678 bool InDomain(const Slice& src) const override {
1679 // prefix will be x????
1680 return src.size() >= 1;
1681 }
1682
1683 bool InRange(const Slice& dst) const override {
1684 // prefix will be x????
1685 return dst.size() == 1;
1686 }
1687 };
1688
1689 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) {
1690 Options options = CurrentOptions();
1691 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1692 options.disable_auto_compactions = true;
1693 // Enable prefix bloom for SST files
1694 BlockBasedTableOptions table_options;
1695 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1696 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1697 DestroyAndReopen(options);
1698
1699 ASSERT_OK(Put("a1", "va1"));
1700 ASSERT_OK(Put("a2", "va2"));
1701 ASSERT_OK(Put("a3", "va3"));
1702 ASSERT_OK(Flush());
1703
1704 ASSERT_OK(Put("b1", "vb1"));
1705 ASSERT_OK(Put("b2", "vb2"));
1706 ASSERT_OK(Put("b3", "vb3"));
1707 ASSERT_OK(Flush());
1708
1709 ASSERT_OK(Put("b4", "vb4"));
1710 ASSERT_OK(Put("d1", "vd1"));
1711 ASSERT_OK(Put("d2", "vd2"));
1712 ASSERT_OK(Put("d4", "vd4"));
1713 ASSERT_OK(Flush());
1714
1715 MoveFilesToLevel(1);
1716 {
1717 ReadOptions ro;
1718 Iterator* iter = NewIterator(ro);
1719
1720 iter->SeekForPrev("a4");
1721 ASSERT_EQ(iter->key().ToString(), "a3");
1722 ASSERT_EQ(iter->value().ToString(), "va3");
1723
1724 iter->SeekForPrev("c2");
1725 ASSERT_EQ(iter->key().ToString(), "b3");
1726 iter->SeekForPrev("d3");
1727 ASSERT_EQ(iter->key().ToString(), "d2");
1728 iter->SeekForPrev("b5");
1729 ASSERT_EQ(iter->key().ToString(), "b4");
1730 delete iter;
1731 }
1732
1733 {
1734 ReadOptions ro;
1735 ro.prefix_same_as_start = true;
1736 Iterator* iter = NewIterator(ro);
1737 iter->SeekForPrev("c2");
1738 ASSERT_TRUE(!iter->Valid());
1739 delete iter;
1740 }
1741 }
1742
1743 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) {
1744 Options options = CurrentOptions();
1745 options.prefix_extractor =
1746 std::make_shared<SliceTransformLimitedDomainGeneric>();
1747 options.disable_auto_compactions = true;
1748 // Enable prefix bloom for SST files
1749 BlockBasedTableOptions table_options;
1750 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1751 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1752 DestroyAndReopen(options);
1753
1754 ASSERT_OK(Put("a1", "va1"));
1755 ASSERT_OK(Put("a2", "va2"));
1756 ASSERT_OK(Put("a3", "va3"));
1757 ASSERT_OK(Flush());
1758
1759 ASSERT_OK(Put("b1", "vb1"));
1760 ASSERT_OK(Put("b2", "vb2"));
1761 ASSERT_OK(Put("b3", "vb3"));
1762 ASSERT_OK(Flush());
1763
1764 ASSERT_OK(Put("b4", "vb4"));
1765 ASSERT_OK(Put("d1", "vd1"));
1766 ASSERT_OK(Put("d2", "vd2"));
1767 ASSERT_OK(Put("d4", "vd4"));
1768 ASSERT_OK(Flush());
1769
1770 MoveFilesToLevel(1);
1771 {
1772 ReadOptions ro;
1773 Iterator* iter = NewIterator(ro);
1774
1775 iter->SeekForPrev("a4");
1776 ASSERT_EQ(iter->key().ToString(), "a3");
1777 ASSERT_EQ(iter->value().ToString(), "va3");
1778
1779 iter->SeekForPrev("c2");
1780 ASSERT_EQ(iter->key().ToString(), "b3");
1781 iter->SeekForPrev("d3");
1782 ASSERT_EQ(iter->key().ToString(), "d2");
1783 iter->SeekForPrev("b5");
1784 ASSERT_EQ(iter->key().ToString(), "b4");
1785 delete iter;
1786 }
1787
1788 {
1789 ReadOptions ro;
1790 ro.prefix_same_as_start = true;
1791 Iterator* iter = NewIterator(ro);
1792 iter->SeekForPrev("c2");
1793 ASSERT_TRUE(!iter->Valid());
1794 delete iter;
1795 }
1796 }
1797
1798 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) {
1799 Options options = CurrentOptions();
1800 BlockBasedTableOptions table_options;
1801 table_options.block_size = 1; // every block will contain one entry
1802 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1803 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1804 options.disable_auto_compactions = true;
1805 options.max_sequential_skip_in_iterations = 8;
1806
1807 DestroyAndReopen(options);
1808
1809 // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1810 for (int file_num = 0; file_num < 10; file_num++) {
1811 ASSERT_OK(Delete("key4"));
1812 ASSERT_OK(Flush());
1813 }
1814
1815 // First File containing 5 blocks of puts
1816 ASSERT_OK(Put("key1", "val1.0"));
1817 ASSERT_OK(Put("key2", "val2.0"));
1818 ASSERT_OK(Put("key3", "val3.0"));
1819 ASSERT_OK(Put("key4", "val4.0"));
1820 ASSERT_OK(Put("key5", "val5.0"));
1821 ASSERT_OK(Flush());
1822
1823 // Second file containing 9 blocks of merge operands
1824 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
1825 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
1826
1827 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
1828 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
1829 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
1830
1831 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
1832 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
1833 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
1834 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
1835 ASSERT_OK(Flush());
1836
1837 {
1838 ReadOptions ro;
1839 ro.fill_cache = false;
1840 Iterator* iter = NewIterator(ro);
1841
1842 iter->SeekToLast();
1843 ASSERT_EQ(iter->key().ToString(), "key5");
1844 ASSERT_EQ(iter->value().ToString(), "val5.0");
1845
1846 iter->Prev();
1847 ASSERT_EQ(iter->key().ToString(), "key4");
1848 ASSERT_EQ(iter->value().ToString(), "val4.0");
1849
1850 iter->Prev();
1851 ASSERT_EQ(iter->key().ToString(), "key3");
1852 ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1853
1854 iter->Prev();
1855 ASSERT_EQ(iter->key().ToString(), "key2");
1856 ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1857
1858 iter->Prev();
1859 ASSERT_EQ(iter->key().ToString(), "key1");
1860 ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
1861
1862 delete iter;
1863 }
1864 }
1865
1866 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
1867 Options options = CurrentOptions();
1868 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1869 options.disable_auto_compactions = true;
1870 options.level0_slowdown_writes_trigger = (1 << 30);
1871 options.level0_stop_writes_trigger = (1 << 30);
1872 options.max_sequential_skip_in_iterations = 8;
1873 DestroyAndReopen(options);
1874
1875 const int kNumKeys = 500;
1876 // Small number of merge operands to make sure that DBIter::Prev() dont
1877 // fall back to Seek()
1878 const int kNumMergeOperands = 3;
1879 // Use value size that will make sure that every block contain 1 key
1880 const int kValSize =
1881 static_cast<int>(BlockBasedTableOptions().block_size) * 4;
1882 // Percentage of keys that wont get merge operations
1883 const int kNoMergeOpPercentage = 20;
1884 // Percentage of keys that will be deleted
1885 const int kDeletePercentage = 10;
1886
1887 // For half of the key range we will write multiple deletes first to
1888 // force DBIter::Prev() to fall back to Seek()
1889 for (int file_num = 0; file_num < 10; file_num++) {
1890 for (int i = 0; i < kNumKeys; i += 2) {
1891 ASSERT_OK(Delete(Key(i)));
1892 }
1893 ASSERT_OK(Flush());
1894 }
1895
1896 Random rnd(301);
1897 std::map<std::string, std::string> true_data;
1898 std::string gen_key;
1899 std::string gen_val;
1900
1901 for (int i = 0; i < kNumKeys; i++) {
1902 gen_key = Key(i);
1903 gen_val = RandomString(&rnd, kValSize);
1904
1905 ASSERT_OK(Put(gen_key, gen_val));
1906 true_data[gen_key] = gen_val;
1907 }
1908 ASSERT_OK(Flush());
1909
1910 // Separate values and merge operands in different file so that we
1911 // make sure that we dont merge them while flushing but actually
1912 // merge them in the read path
1913 for (int i = 0; i < kNumKeys; i++) {
1914 if (rnd.PercentTrue(kNoMergeOpPercentage)) {
1915 // Dont give merge operations for some keys
1916 continue;
1917 }
1918
1919 for (int j = 0; j < kNumMergeOperands; j++) {
1920 gen_key = Key(i);
1921 gen_val = RandomString(&rnd, kValSize);
1922
1923 ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
1924 true_data[gen_key] += "," + gen_val;
1925 }
1926 }
1927 ASSERT_OK(Flush());
1928
1929 for (int i = 0; i < kNumKeys; i++) {
1930 if (rnd.PercentTrue(kDeletePercentage)) {
1931 gen_key = Key(i);
1932
1933 ASSERT_OK(Delete(gen_key));
1934 true_data.erase(gen_key);
1935 }
1936 }
1937 ASSERT_OK(Flush());
1938
1939 {
1940 ReadOptions ro;
1941 ro.fill_cache = false;
1942 Iterator* iter = NewIterator(ro);
1943 auto data_iter = true_data.rbegin();
1944
1945 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1946 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1947 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1948 data_iter++;
1949 }
1950 ASSERT_EQ(data_iter, true_data.rend());
1951
1952 delete iter;
1953 }
1954
1955 {
1956 ReadOptions ro;
1957 ro.fill_cache = false;
1958 Iterator* iter = NewIterator(ro);
1959 auto data_iter = true_data.rbegin();
1960
1961 int entries_right = 0;
1962 std::string seek_key;
1963 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1964 // Verify key/value of current position
1965 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1966 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1967
1968 bool restore_position_with_seek = rnd.Uniform(2);
1969 if (restore_position_with_seek) {
1970 seek_key = iter->key().ToString();
1971 }
1972
1973 // Do some Next() operations the restore the iterator to orignal position
1974 int next_count =
1975 entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
1976 for (int i = 0; i < next_count; i++) {
1977 iter->Next();
1978 data_iter--;
1979
1980 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1981 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1982 }
1983
1984 if (restore_position_with_seek) {
1985 // Restore orignal position using Seek()
1986 iter->Seek(seek_key);
1987 for (int i = 0; i < next_count; i++) {
1988 data_iter++;
1989 }
1990
1991 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1992 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1993 } else {
1994 // Restore original position using Prev()
1995 for (int i = 0; i < next_count; i++) {
1996 iter->Prev();
1997 data_iter++;
1998
1999 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2000 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2001 }
2002 }
2003
2004 entries_right++;
2005 data_iter++;
2006 }
2007 ASSERT_EQ(data_iter, true_data.rend());
2008
2009 delete iter;
2010 }
2011 }
2012
2013 TEST_P(DBIteratorTest, IteratorWithLocalStatistics) {
2014 Options options = CurrentOptions();
2015 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2016 DestroyAndReopen(options);
2017
2018 Random rnd(301);
2019 for (int i = 0; i < 1000; i++) {
2020 // Key 10 bytes / Value 10 bytes
2021 ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
2022 }
2023
2024 std::atomic<uint64_t> total_next(0);
2025 std::atomic<uint64_t> total_next_found(0);
2026 std::atomic<uint64_t> total_prev(0);
2027 std::atomic<uint64_t> total_prev_found(0);
2028 std::atomic<uint64_t> total_bytes(0);
2029
2030 std::vector<port::Thread> threads;
2031 std::function<void()> reader_func_next = [&]() {
2032 SetPerfLevel(kEnableCount);
2033 get_perf_context()->Reset();
2034 Iterator* iter = NewIterator(ReadOptions());
2035
2036 iter->SeekToFirst();
2037 // Seek will bump ITER_BYTES_READ
2038 uint64_t bytes = 0;
2039 bytes += iter->key().size();
2040 bytes += iter->value().size();
2041 while (true) {
2042 iter->Next();
2043 total_next++;
2044
2045 if (!iter->Valid()) {
2046 break;
2047 }
2048 total_next_found++;
2049 bytes += iter->key().size();
2050 bytes += iter->value().size();
2051 }
2052
2053 delete iter;
2054 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2055 SetPerfLevel(kDisable);
2056 total_bytes += bytes;
2057 };
2058
2059 std::function<void()> reader_func_prev = [&]() {
2060 SetPerfLevel(kEnableCount);
2061 Iterator* iter = NewIterator(ReadOptions());
2062
2063 iter->SeekToLast();
2064 // Seek will bump ITER_BYTES_READ
2065 uint64_t bytes = 0;
2066 bytes += iter->key().size();
2067 bytes += iter->value().size();
2068 while (true) {
2069 iter->Prev();
2070 total_prev++;
2071
2072 if (!iter->Valid()) {
2073 break;
2074 }
2075 total_prev_found++;
2076 bytes += iter->key().size();
2077 bytes += iter->value().size();
2078 }
2079
2080 delete iter;
2081 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2082 SetPerfLevel(kDisable);
2083 total_bytes += bytes;
2084 };
2085
2086 for (int i = 0; i < 10; i++) {
2087 threads.emplace_back(reader_func_next);
2088 }
2089 for (int i = 0; i < 15; i++) {
2090 threads.emplace_back(reader_func_prev);
2091 }
2092
2093 for (auto& t : threads) {
2094 t.join();
2095 }
2096
2097 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next);
2098 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
2099 (uint64_t)total_next_found);
2100 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev);
2101 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
2102 (uint64_t)total_prev_found);
2103 ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), (uint64_t)total_bytes);
2104
2105 }
2106
2107 TEST_P(DBIteratorTest, ReadAhead) {
2108 Options options;
2109 env_->count_random_reads_ = true;
2110 options.env = env_;
2111 options.disable_auto_compactions = true;
2112 options.write_buffer_size = 4 << 20;
2113 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2114 BlockBasedTableOptions table_options;
2115 table_options.block_size = 1024;
2116 table_options.no_block_cache = true;
2117 options.table_factory.reset(new BlockBasedTableFactory(table_options));
2118 Reopen(options);
2119
2120 std::string value(1024, 'a');
2121 for (int i = 0; i < 100; i++) {
2122 Put(Key(i), value);
2123 }
2124 ASSERT_OK(Flush());
2125 MoveFilesToLevel(2);
2126
2127 for (int i = 0; i < 100; i++) {
2128 Put(Key(i), value);
2129 }
2130 ASSERT_OK(Flush());
2131 MoveFilesToLevel(1);
2132
2133 for (int i = 0; i < 100; i++) {
2134 Put(Key(i), value);
2135 }
2136 ASSERT_OK(Flush());
2137 #ifndef ROCKSDB_LITE
2138 ASSERT_EQ("1,1,1", FilesPerLevel());
2139 #endif // !ROCKSDB_LITE
2140
2141 env_->random_read_bytes_counter_ = 0;
2142 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2143 ReadOptions read_options;
2144 auto* iter = NewIterator(read_options);
2145 iter->SeekToFirst();
2146 int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
2147 size_t bytes_read = env_->random_read_bytes_counter_;
2148 delete iter;
2149
2150 int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES);
2151 env_->random_read_bytes_counter_ = 0;
2152 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2153 read_options.readahead_size = 1024 * 10;
2154 iter = NewIterator(read_options);
2155 iter->SeekToFirst();
2156 int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
2157 size_t bytes_read_readahead = env_->random_read_bytes_counter_;
2158 delete iter;
2159 int64_t num_file_closes_readahead =
2160 TestGetTickerCount(options, NO_FILE_CLOSES);
2161 ASSERT_EQ(num_file_opens, num_file_opens_readahead);
2162 ASSERT_EQ(num_file_closes, num_file_closes_readahead);
2163 ASSERT_GT(bytes_read_readahead, bytes_read);
2164 ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
2165
2166 // Verify correctness.
2167 iter = NewIterator(read_options);
2168 int count = 0;
2169 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2170 ASSERT_EQ(value, iter->value());
2171 count++;
2172 }
2173 ASSERT_EQ(100, count);
2174 for (int i = 0; i < 100; i++) {
2175 iter->Seek(Key(i));
2176 ASSERT_EQ(value, iter->value());
2177 }
2178 delete iter;
2179 }
2180
2181 // Insert a key, create a snapshot iterator, overwrite key lots of times,
2182 // seek to a smaller key. Expect DBIter to fall back to a seek instead of
2183 // going through all the overwrites linearly.
2184 TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) {
2185 Options options = CurrentOptions();
2186 options.env = env_;
2187 options.create_if_missing = true;
2188 options.max_sequential_skip_in_iterations = 3;
2189 options.prefix_extractor = nullptr;
2190 options.write_buffer_size = 1 << 27; // big enough to avoid flush
2191 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2192 DestroyAndReopen(options);
2193
2194 // Insert.
2195 ASSERT_OK(Put("b", "0"));
2196
2197 // Create iterator.
2198 ReadOptions ro;
2199 std::unique_ptr<Iterator> iter(NewIterator(ro));
2200
2201 // Insert a lot.
2202 for (int i = 0; i < 100; ++i) {
2203 ASSERT_OK(Put("b", std::to_string(i + 1).c_str()));
2204 }
2205
2206 #ifndef ROCKSDB_LITE
2207 // Check that memtable wasn't flushed.
2208 std::string val;
2209 ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val));
2210 EXPECT_EQ("0", val);
2211 #endif
2212
2213 // Seek iterator to a smaller key.
2214 get_perf_context()->Reset();
2215 iter->Seek("a");
2216 ASSERT_TRUE(iter->Valid());
2217 EXPECT_EQ("b", iter->key().ToString());
2218 EXPECT_EQ("0", iter->value().ToString());
2219
2220 // Check that the seek didn't do too much work.
2221 // Checks are not tight, just make sure that everything is well below 100.
2222 EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4);
2223 EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8);
2224 EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10);
2225 EXPECT_LT(get_perf_context()->next_on_memtable_count, 10);
2226 EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10);
2227
2228 // Check that iterator did something like what we expect.
2229 EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0);
2230 EXPECT_EQ(get_perf_context()->internal_merge_count, 0);
2231 EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2);
2232 EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2);
2233 EXPECT_EQ(1, options.statistics->getTickerCount(
2234 NUMBER_OF_RESEEKS_IN_ITERATION));
2235 }
2236
2237 TEST_P(DBIteratorTest, Refresh) {
2238 ASSERT_OK(Put("x", "y"));
2239
2240 std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2241 iter->Seek(Slice("a"));
2242 ASSERT_TRUE(iter->Valid());
2243 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2244 iter->Next();
2245 ASSERT_FALSE(iter->Valid());
2246
2247 ASSERT_OK(Put("c", "d"));
2248
2249 iter->Seek(Slice("a"));
2250 ASSERT_TRUE(iter->Valid());
2251 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2252 iter->Next();
2253 ASSERT_FALSE(iter->Valid());
2254
2255 iter->Refresh();
2256
2257 iter->Seek(Slice("a"));
2258 ASSERT_TRUE(iter->Valid());
2259 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2260 iter->Next();
2261 ASSERT_TRUE(iter->Valid());
2262 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2263 iter->Next();
2264 ASSERT_FALSE(iter->Valid());
2265
2266 dbfull()->Flush(FlushOptions());
2267
2268 ASSERT_OK(Put("m", "n"));
2269
2270 iter->Seek(Slice("a"));
2271 ASSERT_TRUE(iter->Valid());
2272 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2273 iter->Next();
2274 ASSERT_TRUE(iter->Valid());
2275 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2276 iter->Next();
2277 ASSERT_FALSE(iter->Valid());
2278
2279 iter->Refresh();
2280
2281 iter->Seek(Slice("a"));
2282 ASSERT_TRUE(iter->Valid());
2283 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2284 iter->Next();
2285 ASSERT_TRUE(iter->Valid());
2286 ASSERT_EQ(iter->key().compare(Slice("m")), 0);
2287 iter->Next();
2288 ASSERT_TRUE(iter->Valid());
2289 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2290 iter->Next();
2291 ASSERT_FALSE(iter->Valid());
2292
2293 iter.reset();
2294 }
2295
2296 TEST_P(DBIteratorTest, RefreshWithSnapshot) {
2297 ASSERT_OK(Put("x", "y"));
2298 const Snapshot* snapshot = db_->GetSnapshot();
2299 ReadOptions options;
2300 options.snapshot = snapshot;
2301 Iterator* iter = NewIterator(options);
2302
2303 iter->Seek(Slice("a"));
2304 ASSERT_TRUE(iter->Valid());
2305 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2306 iter->Next();
2307 ASSERT_FALSE(iter->Valid());
2308
2309 ASSERT_OK(Put("c", "d"));
2310
2311 iter->Seek(Slice("a"));
2312 ASSERT_TRUE(iter->Valid());
2313 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2314 iter->Next();
2315 ASSERT_FALSE(iter->Valid());
2316
2317 Status s;
2318 s = iter->Refresh();
2319 ASSERT_TRUE(s.IsNotSupported());
2320 db_->ReleaseSnapshot(snapshot);
2321 delete iter;
2322 }
2323
2324 TEST_P(DBIteratorTest, CreationFailure) {
2325 SyncPoint::GetInstance()->SetCallBack(
2326 "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) {
2327 *(reinterpret_cast<Status*>(arg)) = Status::Corruption("test status");
2328 });
2329 SyncPoint::GetInstance()->EnableProcessing();
2330
2331 Iterator* iter = NewIterator(ReadOptions());
2332 ASSERT_FALSE(iter->Valid());
2333 ASSERT_TRUE(iter->status().IsCorruption());
2334 delete iter;
2335 }
2336
2337 TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
2338 Options options = CurrentOptions();
2339 options.max_sequential_skip_in_iterations = 3;
2340 DestroyAndReopen(options);
2341
2342 // write a bunch of kvs to the database.
2343 ASSERT_OK(Put("a", "1"));
2344 ASSERT_OK(Put("y", "1"));
2345 ASSERT_OK(Put("y1", "1"));
2346 ASSERT_OK(Put("y2", "1"));
2347 ASSERT_OK(Put("y3", "1"));
2348 ASSERT_OK(Put("z", "1"));
2349 ASSERT_OK(Flush());
2350 ASSERT_OK(Put("a", "1"));
2351 ASSERT_OK(Put("z", "1"));
2352 ASSERT_OK(Put("bar", "1"));
2353 ASSERT_OK(Put("foo", "1"));
2354
2355 std::string upper_bound = "x";
2356 Slice ub_slice(upper_bound);
2357 ReadOptions ro;
2358 ro.iterate_upper_bound = &ub_slice;
2359 ro.max_skippable_internal_keys = 1000;
2360
2361 Iterator* iter = NewIterator(ro);
2362 iter->Seek("foo");
2363 ASSERT_TRUE(iter->Valid());
2364 ASSERT_EQ("foo", iter->key().ToString());
2365
2366 iter->Prev();
2367 ASSERT_TRUE(iter->Valid());
2368 ASSERT_OK(iter->status());
2369 ASSERT_EQ("bar", iter->key().ToString());
2370
2371 delete iter;
2372 }
2373
2374 TEST_P(DBIteratorTest, TableFilter) {
2375 ASSERT_OK(Put("a", "1"));
2376 dbfull()->Flush(FlushOptions());
2377 ASSERT_OK(Put("b", "2"));
2378 ASSERT_OK(Put("c", "3"));
2379 dbfull()->Flush(FlushOptions());
2380 ASSERT_OK(Put("d", "4"));
2381 ASSERT_OK(Put("e", "5"));
2382 ASSERT_OK(Put("f", "6"));
2383 dbfull()->Flush(FlushOptions());
2384
2385 // Ensure the table_filter callback is called once for each table.
2386 {
2387 std::set<uint64_t> unseen{1, 2, 3};
2388 ReadOptions opts;
2389 opts.table_filter = [&](const TableProperties& props) {
2390 auto it = unseen.find(props.num_entries);
2391 if (it == unseen.end()) {
2392 ADD_FAILURE() << "saw table properties with an unexpected "
2393 << props.num_entries << " entries";
2394 } else {
2395 unseen.erase(it);
2396 }
2397 return true;
2398 };
2399 auto iter = NewIterator(opts);
2400 iter->SeekToFirst();
2401 ASSERT_EQ(IterStatus(iter), "a->1");
2402 iter->Next();
2403 ASSERT_EQ(IterStatus(iter), "b->2");
2404 iter->Next();
2405 ASSERT_EQ(IterStatus(iter), "c->3");
2406 iter->Next();
2407 ASSERT_EQ(IterStatus(iter), "d->4");
2408 iter->Next();
2409 ASSERT_EQ(IterStatus(iter), "e->5");
2410 iter->Next();
2411 ASSERT_EQ(IterStatus(iter), "f->6");
2412 iter->Next();
2413 ASSERT_FALSE(iter->Valid());
2414 ASSERT_TRUE(unseen.empty());
2415 delete iter;
2416 }
2417
2418 // Ensure returning false in the table_filter hides the keys from that table
2419 // during iteration.
2420 {
2421 ReadOptions opts;
2422 opts.table_filter = [](const TableProperties& props) {
2423 return props.num_entries != 2;
2424 };
2425 auto iter = NewIterator(opts);
2426 iter->SeekToFirst();
2427 ASSERT_EQ(IterStatus(iter), "a->1");
2428 iter->Next();
2429 ASSERT_EQ(IterStatus(iter), "d->4");
2430 iter->Next();
2431 ASSERT_EQ(IterStatus(iter), "e->5");
2432 iter->Next();
2433 ASSERT_EQ(IterStatus(iter), "f->6");
2434 iter->Next();
2435 ASSERT_FALSE(iter->Valid());
2436 delete iter;
2437 }
2438 }
2439
2440 TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
2441 Options options = CurrentOptions();
2442 options.max_sequential_skip_in_iterations = 3;
2443 DestroyAndReopen(options);
2444
2445 // write a bunch of kvs to the database.
2446 ASSERT_OK(Put("a", "1"));
2447 ASSERT_OK(Put("y", "1"));
2448 ASSERT_OK(Put("z", "1"));
2449 ASSERT_OK(Flush());
2450 ASSERT_OK(Put("a", "1"));
2451 ASSERT_OK(Put("z", "1"));
2452 ASSERT_OK(Put("bar", "1"));
2453 ASSERT_OK(Put("foo", "1"));
2454 ASSERT_OK(Put("foo", "2"));
2455
2456 ASSERT_OK(Put("foo", "3"));
2457 ASSERT_OK(Put("foo", "4"));
2458 ASSERT_OK(Put("foo", "5"));
2459 const Snapshot* snapshot = db_->GetSnapshot();
2460 ASSERT_OK(Put("foo", "6"));
2461
2462 std::string upper_bound = "x";
2463 Slice ub_slice(upper_bound);
2464 ReadOptions ro;
2465 ro.snapshot = snapshot;
2466 ro.iterate_upper_bound = &ub_slice;
2467
2468 Iterator* iter = NewIterator(ro);
2469 iter->SeekForPrev("goo");
2470 ASSERT_TRUE(iter->Valid());
2471 ASSERT_EQ("foo", iter->key().ToString());
2472 iter->Prev();
2473
2474 ASSERT_TRUE(iter->Valid());
2475 ASSERT_EQ("bar", iter->key().ToString());
2476
2477 delete iter;
2478 db_->ReleaseSnapshot(snapshot);
2479 }
2480
2481 TEST_P(DBIteratorTest, SkipStatistics) {
2482 Options options = CurrentOptions();
2483 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2484 DestroyAndReopen(options);
2485
2486 int skip_count = 0;
2487
2488 // write a bunch of kvs to the database.
2489 ASSERT_OK(Put("a", "1"));
2490 ASSERT_OK(Put("b", "1"));
2491 ASSERT_OK(Put("c", "1"));
2492 ASSERT_OK(Flush());
2493 ASSERT_OK(Put("d", "1"));
2494 ASSERT_OK(Put("e", "1"));
2495 ASSERT_OK(Put("f", "1"));
2496 ASSERT_OK(Put("a", "2"));
2497 ASSERT_OK(Put("b", "2"));
2498 ASSERT_OK(Flush());
2499 ASSERT_OK(Delete("d"));
2500 ASSERT_OK(Delete("e"));
2501 ASSERT_OK(Delete("f"));
2502
2503 Iterator* iter = NewIterator(ReadOptions());
2504 int count = 0;
2505 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2506 ASSERT_OK(iter->status());
2507 count++;
2508 }
2509 ASSERT_EQ(count, 3);
2510 delete iter;
2511 skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
2512 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2513
2514 iter = NewIterator(ReadOptions());
2515 count = 0;
2516 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2517 ASSERT_OK(iter->status());
2518 count++;
2519 }
2520 ASSERT_EQ(count, 3);
2521 delete iter;
2522 skip_count += 8; // Same as above, but in reverse order
2523 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2524
2525 ASSERT_OK(Put("aa", "1"));
2526 ASSERT_OK(Put("ab", "1"));
2527 ASSERT_OK(Put("ac", "1"));
2528 ASSERT_OK(Put("ad", "1"));
2529 ASSERT_OK(Flush());
2530 ASSERT_OK(Delete("ab"));
2531 ASSERT_OK(Delete("ac"));
2532 ASSERT_OK(Delete("ad"));
2533
2534 ReadOptions ro;
2535 Slice prefix("b");
2536 ro.iterate_upper_bound = &prefix;
2537
2538 iter = NewIterator(ro);
2539 count = 0;
2540 for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
2541 ASSERT_OK(iter->status());
2542 count++;
2543 }
2544 ASSERT_EQ(count, 1);
2545 delete iter;
2546 skip_count += 6; // 3 deletes + 3 original keys
2547 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2548
2549 iter = NewIterator(ro);
2550 count = 0;
2551 for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2552 ASSERT_OK(iter->status());
2553 count++;
2554 }
2555 ASSERT_EQ(count, 2);
2556 delete iter;
2557 // 3 deletes + 3 original keys + lower sequence of "a"
2558 skip_count += 7;
2559 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2560 }
2561
2562 TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) {
2563 Options options = CurrentOptions();
2564 DestroyAndReopen(options);
2565 ReadOptions ropts;
2566 ropts.max_skippable_internal_keys = 2;
2567
2568 Put("1", "val_1");
2569 // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2570 Delete("2");
2571 Delete("3");
2572 Delete("4");
2573 Delete("5");
2574 Put("6", "val_6");
2575
2576 std::unique_ptr<Iterator> iter(NewIterator(ropts));
2577 iter->SeekToFirst();
2578
2579 ASSERT_TRUE(iter->Valid());
2580 ASSERT_EQ(iter->key().ToString(), "1");
2581 ASSERT_EQ(iter->value().ToString(), "val_1");
2582
2583 // This should fail as incomplete due to too many non-visible internal keys on
2584 // the way to the next valid user key.
2585 iter->Next();
2586 ASSERT_TRUE(!iter->Valid());
2587 ASSERT_TRUE(iter->status().IsIncomplete());
2588
2589 // Get the internal key at which Next() failed.
2590 std::string prop_value;
2591 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
2592 ASSERT_EQ("4", prop_value);
2593
2594 // Create a new iterator to seek to the internal key.
2595 std::unique_ptr<Iterator> iter2(NewIterator(ropts));
2596 iter2->Seek(prop_value);
2597 ASSERT_TRUE(iter2->Valid());
2598 ASSERT_OK(iter2->status());
2599
2600 ASSERT_EQ(iter2->key().ToString(), "6");
2601 ASSERT_EQ(iter2->value().ToString(), "val_6");
2602 }
2603
2604 // Reproduces a former bug where iterator would skip some records when DBIter
2605 // re-seeks subiterator with Incomplete status.
2606 TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
2607 Options options = CurrentOptions();
2608 BlockBasedTableOptions table_options;
2609 // Make sure the sst file has more than one block.
2610 table_options.flush_block_policy_factory =
2611 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
2612 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2613 DestroyAndReopen(options);
2614
2615 // Two records in sst file, each in its own block.
2616 Put("b", "");
2617 Put("d", "");
2618 Flush();
2619
2620 // Create a nonblocking iterator before writing to memtable.
2621 ReadOptions ropt;
2622 ropt.read_tier = kBlockCacheTier;
2623 std::unique_ptr<Iterator> iter(NewIterator(ropt));
2624
2625 // Overwrite a key in memtable many times to hit
2626 // max_sequential_skip_in_iterations (which is 8 by default).
2627 for (int i = 0; i < 20; ++i) {
2628 Put("c", "");
2629 }
2630
2631 // Load the second block in sst file into the block cache.
2632 {
2633 std::unique_ptr<Iterator> iter2(NewIterator(ReadOptions()));
2634 iter2->Seek("d");
2635 }
2636
2637 // Finally seek the nonblocking iterator.
2638 iter->Seek("a");
2639 // With the bug, the status used to be OK, and the iterator used to point to
2640 // "d".
2641 EXPECT_TRUE(iter->status().IsIncomplete());
2642 }
2643
2644 TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
2645 Put("a", "");
2646 Put("b", "");
2647 Flush();
2648
2649 ReadOptions ropt;
2650 Slice ub = "b";
2651 ropt.iterate_upper_bound = &ub;
2652
2653 std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
2654 it->SeekForPrev("a");
2655 ASSERT_TRUE(it->Valid());
2656 ASSERT_OK(it->status());
2657 ASSERT_EQ("a", it->key().ToString());
2658 it->Next();
2659 ASSERT_FALSE(it->Valid());
2660 ASSERT_OK(it->status());
2661 it->SeekForPrev("a");
2662 ASSERT_OK(it->status());
2663
2664 ASSERT_TRUE(it->Valid());
2665 ASSERT_EQ("a", it->key().ToString());
2666 }
2667
2668 TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
2669 Options options = CurrentOptions();
2670 options.compression = CompressionType::kNoCompression;
2671 BlockBasedTableOptions table_options;
2672 table_options.block_size = 800;
2673 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2674 Reopen(options);
2675
2676 Random rnd(301);
2677 std::string random_str = RandomString(&rnd, 180);
2678
2679 ASSERT_OK(Put("1", random_str));
2680 ASSERT_OK(Put("2", random_str));
2681 ASSERT_OK(Put("3", random_str));
2682 ASSERT_OK(Put("4", random_str));
2683 // A new block
2684 ASSERT_OK(Put("5", random_str));
2685 ASSERT_OK(Put("6", random_str));
2686 ASSERT_OK(Put("7", random_str));
2687 ASSERT_OK(Flush());
2688 ASSERT_OK(Put("8", random_str));
2689 ASSERT_OK(Put("9", random_str));
2690 ASSERT_OK(Flush());
2691 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2692
2693 int num_find_file_in_level = 0;
2694 int num_idx_blk_seek = 0;
2695 SyncPoint::GetInstance()->SetCallBack(
2696 "LevelIterator::Seek:BeforeFindFile",
2697 [&](void* /*arg*/) { num_find_file_in_level++; });
2698 SyncPoint::GetInstance()->SetCallBack(
2699 "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
2700 SyncPoint::GetInstance()->EnableProcessing();
2701
2702 {
2703 std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2704 iter->Seek("1");
2705 ASSERT_TRUE(iter->Valid());
2706 ASSERT_EQ(1, num_find_file_in_level);
2707 ASSERT_EQ(1, num_idx_blk_seek);
2708
2709 iter->Seek("2");
2710 ASSERT_TRUE(iter->Valid());
2711 ASSERT_EQ(1, num_find_file_in_level);
2712 ASSERT_EQ(1, num_idx_blk_seek);
2713
2714 iter->Seek("3");
2715 ASSERT_TRUE(iter->Valid());
2716 ASSERT_EQ(1, num_find_file_in_level);
2717 ASSERT_EQ(1, num_idx_blk_seek);
2718
2719 iter->Next();
2720 ASSERT_TRUE(iter->Valid());
2721 ASSERT_EQ(1, num_find_file_in_level);
2722 ASSERT_EQ(1, num_idx_blk_seek);
2723
2724 iter->Seek("5");
2725 ASSERT_TRUE(iter->Valid());
2726 ASSERT_EQ(1, num_find_file_in_level);
2727 ASSERT_EQ(2, num_idx_blk_seek);
2728
2729 iter->Seek("6");
2730 ASSERT_TRUE(iter->Valid());
2731 ASSERT_EQ(1, num_find_file_in_level);
2732 ASSERT_EQ(2, num_idx_blk_seek);
2733
2734 iter->Seek("7");
2735 ASSERT_TRUE(iter->Valid());
2736 ASSERT_EQ(1, num_find_file_in_level);
2737 ASSERT_EQ(3, num_idx_blk_seek);
2738
2739 iter->Seek("8");
2740 ASSERT_TRUE(iter->Valid());
2741 ASSERT_EQ(2, num_find_file_in_level);
2742 // Still re-seek because "8" is the boundary key, which has
2743 // the same user key as the seek key.
2744 ASSERT_EQ(4, num_idx_blk_seek);
2745
2746 iter->Seek("5");
2747 ASSERT_TRUE(iter->Valid());
2748 ASSERT_EQ(3, num_find_file_in_level);
2749 ASSERT_EQ(5, num_idx_blk_seek);
2750
2751 iter->Next();
2752 ASSERT_TRUE(iter->Valid());
2753 ASSERT_EQ(3, num_find_file_in_level);
2754 ASSERT_EQ(5, num_idx_blk_seek);
2755
2756 // Seek backward never triggers the index block seek to be skipped
2757 iter->Seek("5");
2758 ASSERT_TRUE(iter->Valid());
2759 ASSERT_EQ(3, num_find_file_in_level);
2760 ASSERT_EQ(6, num_idx_blk_seek);
2761 }
2762
2763 SyncPoint::GetInstance()->DisableProcessing();
2764 }
2765
2766 // MyRocks may change iterate bounds before seek. Simply test to make sure such
2767 // usage doesn't break iterator.
2768 TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) {
2769 Options options = CurrentOptions();
2770 options.compression = CompressionType::kNoCompression;
2771 BlockBasedTableOptions table_options;
2772 table_options.block_size = 100;
2773 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2774 std::string value(50, 'v');
2775 Reopen(options);
2776 ASSERT_OK(Put("aaa", value));
2777 ASSERT_OK(Flush());
2778 ASSERT_OK(Put("bbb", "v"));
2779 ASSERT_OK(Put("ccc", "v"));
2780 ASSERT_OK(Put("ddd", "v"));
2781 ASSERT_OK(Flush());
2782 ASSERT_OK(Put("eee", "v"));
2783 ASSERT_OK(Flush());
2784 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2785
2786 std::string ub1 = "e";
2787 std::string ub2 = "c";
2788 Slice ub(ub1);
2789 ReadOptions read_opts1;
2790 read_opts1.iterate_upper_bound = &ub;
2791 Iterator* iter = NewIterator(read_opts1);
2792 // Seek and iterate accross block boundary.
2793 iter->Seek("b");
2794 ASSERT_TRUE(iter->Valid());
2795 ASSERT_OK(iter->status());
2796 ASSERT_EQ("bbb", iter->key());
2797 ub = Slice(ub2);
2798 iter->Seek("b");
2799 ASSERT_TRUE(iter->Valid());
2800 ASSERT_OK(iter->status());
2801 ASSERT_EQ("bbb", iter->key());
2802 iter->Next();
2803 ASSERT_FALSE(iter->Valid());
2804 ASSERT_OK(iter->status());
2805 delete iter;
2806
2807 std::string lb1 = "a";
2808 std::string lb2 = "c";
2809 Slice lb(lb1);
2810 ReadOptions read_opts2;
2811 read_opts2.iterate_lower_bound = &lb;
2812 iter = NewIterator(read_opts2);
2813 iter->SeekForPrev("d");
2814 ASSERT_TRUE(iter->Valid());
2815 ASSERT_OK(iter->status());
2816 ASSERT_EQ("ccc", iter->key());
2817 lb = Slice(lb2);
2818 iter->SeekForPrev("d");
2819 ASSERT_TRUE(iter->Valid());
2820 ASSERT_OK(iter->status());
2821 ASSERT_EQ("ccc", iter->key());
2822 iter->Prev();
2823 ASSERT_FALSE(iter->Valid());
2824 ASSERT_OK(iter->status());
2825 delete iter;
2826 }
2827
2828 TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) {
2829 ASSERT_OK(Put("aaa", "v"));
2830 ASSERT_OK(Put("bbb", "v"));
2831 ASSERT_OK(Flush());
2832 ASSERT_OK(Put("ccc", "v"));
2833 ASSERT_OK(Put("ddd", "v"));
2834 ASSERT_OK(Flush());
2835 // Move both files to bottom level.
2836 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2837 Slice lower_bound("b");
2838 ReadOptions read_opts;
2839 read_opts.iterate_lower_bound = &lower_bound;
2840 std::unique_ptr<Iterator> iter(NewIterator(read_opts));
2841 iter->SeekForPrev("d");
2842 ASSERT_TRUE(iter->Valid());
2843 ASSERT_OK(iter->status());
2844 ASSERT_EQ("ccc", iter->key());
2845 iter->Prev();
2846 ASSERT_TRUE(iter->Valid());
2847 ASSERT_OK(iter->status());
2848 ASSERT_EQ("bbb", iter->key());
2849 iter->Prev();
2850 ASSERT_FALSE(iter->Valid());
2851 ASSERT_OK(iter->status());
2852 }
2853
2854 INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
2855 testing::Values(true, false));
2856
2857 // Tests how DBIter work with ReadCallback
2858 class DBIteratorWithReadCallbackTest : public DBIteratorTest {};
2859
2860 TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) {
2861 class TestReadCallback : public ReadCallback {
2862 public:
2863 explicit TestReadCallback(SequenceNumber _max_visible_seq)
2864 : ReadCallback(_max_visible_seq) {}
2865
2866 bool IsVisibleFullCheck(SequenceNumber seq) override {
2867 return seq <= max_visible_seq_;
2868 }
2869 };
2870
2871 ASSERT_OK(Put("foo", "v1"));
2872 ASSERT_OK(Put("foo", "v2"));
2873 ASSERT_OK(Put("foo", "v3"));
2874 ASSERT_OK(Put("a", "va"));
2875 ASSERT_OK(Put("z", "vz"));
2876 SequenceNumber seq1 = db_->GetLatestSequenceNumber();
2877 TestReadCallback callback1(seq1);
2878 ASSERT_OK(Put("foo", "v4"));
2879 ASSERT_OK(Put("foo", "v5"));
2880 ASSERT_OK(Put("bar", "v7"));
2881
2882 SequenceNumber seq2 = db_->GetLatestSequenceNumber();
2883 auto* cfd =
2884 reinterpret_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())
2885 ->cfd();
2886 // The iterator are suppose to see data before seq1.
2887 Iterator* iter =
2888 dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1);
2889
2890 // Seek
2891 // The latest value of "foo" before seq1 is "v3"
2892 iter->Seek("foo");
2893 ASSERT_TRUE(iter->Valid());
2894 ASSERT_OK(iter->status());
2895 ASSERT_EQ("foo", iter->key());
2896 ASSERT_EQ("v3", iter->value());
2897 // "bar" is not visible to the iterator. It will move on to the next key
2898 // "foo".
2899 iter->Seek("bar");
2900 ASSERT_TRUE(iter->Valid());
2901 ASSERT_OK(iter->status());
2902 ASSERT_EQ("foo", iter->key());
2903 ASSERT_EQ("v3", iter->value());
2904
2905 // Next
2906 // Seek to "a"
2907 iter->Seek("a");
2908 ASSERT_TRUE(iter->Valid());
2909 ASSERT_OK(iter->status());
2910 ASSERT_EQ("va", iter->value());
2911 // "bar" is not visible to the iterator. It will move on to the next key
2912 // "foo".
2913 iter->Next();
2914 ASSERT_TRUE(iter->Valid());
2915 ASSERT_OK(iter->status());
2916 ASSERT_EQ("foo", iter->key());
2917 ASSERT_EQ("v3", iter->value());
2918
2919 // Prev
2920 // Seek to "z"
2921 iter->Seek("z");
2922 ASSERT_TRUE(iter->Valid());
2923 ASSERT_OK(iter->status());
2924 ASSERT_EQ("vz", iter->value());
2925 // The previous key is "foo", which is visible to the iterator.
2926 iter->Prev();
2927 ASSERT_TRUE(iter->Valid());
2928 ASSERT_OK(iter->status());
2929 ASSERT_EQ("foo", iter->key());
2930 ASSERT_EQ("v3", iter->value());
2931 // "bar" is not visible to the iterator. It will move on to the next key "a".
2932 iter->Prev(); // skipping "bar"
2933 ASSERT_TRUE(iter->Valid());
2934 ASSERT_OK(iter->status());
2935 ASSERT_EQ("a", iter->key());
2936 ASSERT_EQ("va", iter->value());
2937
2938 // SeekForPrev
2939 // The previous key is "foo", which is visible to the iterator.
2940 iter->SeekForPrev("y");
2941 ASSERT_TRUE(iter->Valid());
2942 ASSERT_OK(iter->status());
2943 ASSERT_EQ("foo", iter->key());
2944 ASSERT_EQ("v3", iter->value());
2945 // "bar" is not visible to the iterator. It will move on to the next key "a".
2946 iter->SeekForPrev("bar");
2947 ASSERT_TRUE(iter->Valid());
2948 ASSERT_OK(iter->status());
2949 ASSERT_EQ("a", iter->key());
2950 ASSERT_EQ("va", iter->value());
2951
2952 delete iter;
2953
2954 // Prev beyond max_sequential_skip_in_iterations
2955 uint64_t num_versions =
2956 CurrentOptions().max_sequential_skip_in_iterations + 10;
2957 for (uint64_t i = 0; i < num_versions; i++) {
2958 ASSERT_OK(Put("bar", ToString(i)));
2959 }
2960 SequenceNumber seq3 = db_->GetLatestSequenceNumber();
2961 TestReadCallback callback2(seq3);
2962 ASSERT_OK(Put("bar", "v8"));
2963 SequenceNumber seq4 = db_->GetLatestSequenceNumber();
2964
2965 // The iterator is suppose to see data before seq3.
2966 iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2);
2967 // Seek to "z", which is visible.
2968 iter->Seek("z");
2969 ASSERT_TRUE(iter->Valid());
2970 ASSERT_OK(iter->status());
2971 ASSERT_EQ("vz", iter->value());
2972 // Previous key is "foo" and the last value "v5" is visible.
2973 iter->Prev();
2974 ASSERT_TRUE(iter->Valid());
2975 ASSERT_OK(iter->status());
2976 ASSERT_EQ("foo", iter->key());
2977 ASSERT_EQ("v5", iter->value());
2978 // Since the number of values of "bar" is more than
2979 // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
2980 // seek in forward direction. Here we test the fallback seek is correct.
2981 // The last visible value should be (num_versions - 1), as "v8" is not
2982 // visible.
2983 iter->Prev();
2984 ASSERT_TRUE(iter->Valid());
2985 ASSERT_OK(iter->status());
2986 ASSERT_EQ("bar", iter->key());
2987 ASSERT_EQ(ToString(num_versions - 1), iter->value());
2988
2989 delete iter;
2990 }
2991
2992 } // namespace ROCKSDB_NAMESPACE
2993
2994 int main(int argc, char** argv) {
2995 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
2996 ::testing::InitGoogleTest(&argc, argv);
2997 return RUN_ALL_TESTS();
2998 }