]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/db_iterator_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / db_iterator_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include <functional>
11
12 #include "db/arena_wrapped_db_iter.h"
13 #include "db/db_iter.h"
14 #include "db/db_test_util.h"
15 #include "port/port.h"
16 #include "port/stack_trace.h"
17 #include "rocksdb/iostats_context.h"
18 #include "rocksdb/perf_context.h"
19 #include "table/block_based/flush_block_policy.h"
20 #include "util/random.h"
21
22 namespace ROCKSDB_NAMESPACE {
23
24 // A dumb ReadCallback which saying every key is committed.
25 class DummyReadCallback : public ReadCallback {
26 public:
27 DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {}
28 bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; }
29 void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
30 };
31
32 // Test param:
33 // bool: whether to pass read_callback to NewIterator().
34 class DBIteratorTest : public DBTestBase,
35 public testing::WithParamInterface<bool> {
36 public:
37 DBIteratorTest() : DBTestBase("/db_iterator_test", /*env_do_fsync=*/true) {}
38
39 Iterator* NewIterator(const ReadOptions& read_options,
40 ColumnFamilyHandle* column_family = nullptr) {
41 if (column_family == nullptr) {
42 column_family = db_->DefaultColumnFamily();
43 }
44 auto* cfd =
45 static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
46 SequenceNumber seq = read_options.snapshot != nullptr
47 ? read_options.snapshot->GetSequenceNumber()
48 : db_->GetLatestSequenceNumber();
49 bool use_read_callback = GetParam();
50 DummyReadCallback* read_callback = nullptr;
51 if (use_read_callback) {
52 read_callback = new DummyReadCallback();
53 read_callback->SetSnapshot(seq);
54 InstrumentedMutexLock lock(&mutex_);
55 read_callbacks_.push_back(
56 std::unique_ptr<DummyReadCallback>(read_callback));
57 }
58 return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback);
59 }
60
61 private:
62 InstrumentedMutex mutex_;
63 std::vector<std::unique_ptr<DummyReadCallback>> read_callbacks_;
64 };
65
66 TEST_P(DBIteratorTest, IteratorProperty) {
67 // The test needs to be changed if kPersistedTier is supported in iterator.
68 Options options = CurrentOptions();
69 CreateAndReopenWithCF({"pikachu"}, options);
70 Put(1, "1", "2");
71 Delete(1, "2");
72 ReadOptions ropt;
73 ropt.pin_data = false;
74 {
75 std::unique_ptr<Iterator> iter(NewIterator(ropt, handles_[1]));
76 iter->SeekToFirst();
77 std::string prop_value;
78 ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
79 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
80 ASSERT_EQ("0", prop_value);
81 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
82 ASSERT_EQ("1", prop_value);
83 iter->Next();
84 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
85 ASSERT_EQ("Iterator is not valid.", prop_value);
86
87 // Get internal key at which the iteration stopped (tombstone in this case).
88 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
89 ASSERT_EQ("2", prop_value);
90 }
91 Close();
92 }
93
94 TEST_P(DBIteratorTest, PersistedTierOnIterator) {
95 // The test needs to be changed if kPersistedTier is supported in iterator.
96 Options options = CurrentOptions();
97 CreateAndReopenWithCF({"pikachu"}, options);
98 ReadOptions ropt;
99 ropt.read_tier = kPersistedTier;
100
101 auto* iter = db_->NewIterator(ropt, handles_[1]);
102 ASSERT_TRUE(iter->status().IsNotSupported());
103 delete iter;
104
105 std::vector<Iterator*> iters;
106 ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
107 Close();
108 }
109
110 TEST_P(DBIteratorTest, NonBlockingIteration) {
111 do {
112 ReadOptions non_blocking_opts, regular_opts;
113 Options options = CurrentOptions();
114 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
115 non_blocking_opts.read_tier = kBlockCacheTier;
116 CreateAndReopenWithCF({"pikachu"}, options);
117 // write one kv to the database.
118 ASSERT_OK(Put(1, "a", "b"));
119
120 // scan using non-blocking iterator. We should find it because
121 // it is in memtable.
122 Iterator* iter = NewIterator(non_blocking_opts, handles_[1]);
123 int count = 0;
124 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
125 ASSERT_OK(iter->status());
126 count++;
127 }
128 ASSERT_EQ(count, 1);
129 delete iter;
130
131 // flush memtable to storage. Now, the key should not be in the
132 // memtable neither in the block cache.
133 ASSERT_OK(Flush(1));
134
135 // verify that a non-blocking iterator does not find any
136 // kvs. Neither does it do any IOs to storage.
137 uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
138 uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
139 iter = NewIterator(non_blocking_opts, handles_[1]);
140 count = 0;
141 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
142 count++;
143 }
144 ASSERT_EQ(count, 0);
145 ASSERT_TRUE(iter->status().IsIncomplete());
146 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
147 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
148 delete iter;
149
150 // read in the specified block via a regular get
151 ASSERT_EQ(Get(1, "a"), "b");
152
153 // verify that we can find it via a non-blocking scan
154 numopen = TestGetTickerCount(options, NO_FILE_OPENS);
155 cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
156 iter = NewIterator(non_blocking_opts, handles_[1]);
157 count = 0;
158 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
159 ASSERT_OK(iter->status());
160 count++;
161 }
162 ASSERT_EQ(count, 1);
163 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
164 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
165 delete iter;
166
167 // This test verifies block cache behaviors, which is not used by plain
168 // table format.
169 } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
170 }
171
172 TEST_P(DBIteratorTest, IterSeekBeforePrev) {
173 ASSERT_OK(Put("a", "b"));
174 ASSERT_OK(Put("c", "d"));
175 dbfull()->Flush(FlushOptions());
176 ASSERT_OK(Put("0", "f"));
177 ASSERT_OK(Put("1", "h"));
178 dbfull()->Flush(FlushOptions());
179 ASSERT_OK(Put("2", "j"));
180 auto iter = NewIterator(ReadOptions());
181 iter->Seek(Slice("c"));
182 iter->Prev();
183 iter->Seek(Slice("a"));
184 iter->Prev();
185 delete iter;
186 }
187
188 TEST_P(DBIteratorTest, IterReseekNewUpperBound) {
189 Random rnd(301);
190 Options options = CurrentOptions();
191 BlockBasedTableOptions table_options;
192 table_options.block_size = 1024;
193 table_options.block_size_deviation = 50;
194 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
195 options.compression = kNoCompression;
196 Reopen(options);
197
198 ASSERT_OK(Put("a", rnd.RandomString(400)));
199 ASSERT_OK(Put("aabb", rnd.RandomString(400)));
200 ASSERT_OK(Put("aaef", rnd.RandomString(400)));
201 ASSERT_OK(Put("b", rnd.RandomString(400)));
202 dbfull()->Flush(FlushOptions());
203 ReadOptions opts;
204 Slice ub = Slice("aa");
205 opts.iterate_upper_bound = &ub;
206 auto iter = NewIterator(opts);
207 iter->Seek(Slice("a"));
208 ub = Slice("b");
209 iter->Seek(Slice("aabc"));
210 ASSERT_TRUE(iter->Valid());
211 ASSERT_EQ(iter->key().ToString(), "aaef");
212 delete iter;
213 }
214
215 TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) {
216 ASSERT_OK(Put("a", "b"));
217 ASSERT_OK(Put("c", "d"));
218 dbfull()->Flush(FlushOptions());
219 ASSERT_OK(Put("0", "f"));
220 ASSERT_OK(Put("1", "h"));
221 dbfull()->Flush(FlushOptions());
222 ASSERT_OK(Put("2", "j"));
223 auto iter = NewIterator(ReadOptions());
224 iter->SeekForPrev(Slice("0"));
225 iter->Next();
226 iter->SeekForPrev(Slice("1"));
227 iter->Next();
228 delete iter;
229 }
230
231 namespace {
232 std::string MakeLongKey(size_t length, char c) {
233 return std::string(length, c);
234 }
235 } // namespace
236
237 TEST_P(DBIteratorTest, IterLongKeys) {
238 ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
239 ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
240 ASSERT_OK(Put("a", "b"));
241 dbfull()->Flush(FlushOptions());
242 ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
243 ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
244 ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
245 auto iter = NewIterator(ReadOptions());
246
247 // Create a key that needs to be skipped for Seq too new
248 iter->Seek(MakeLongKey(20, 0));
249 ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
250 iter->Next();
251 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
252 iter->Next();
253 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
254 iter->Next();
255 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
256 iter->Next();
257 ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
258
259 iter->SeekForPrev(MakeLongKey(127, 3));
260 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
261 iter->Prev();
262 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
263 iter->Prev();
264 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
265 delete iter;
266
267 iter = NewIterator(ReadOptions());
268 iter->Seek(MakeLongKey(50, 1));
269 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
270 iter->Next();
271 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
272 iter->Next();
273 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
274 delete iter;
275 }
276
277 TEST_P(DBIteratorTest, IterNextWithNewerSeq) {
278 ASSERT_OK(Put("0", "0"));
279 dbfull()->Flush(FlushOptions());
280 ASSERT_OK(Put("a", "b"));
281 ASSERT_OK(Put("c", "d"));
282 ASSERT_OK(Put("d", "e"));
283 auto iter = NewIterator(ReadOptions());
284
285 // Create a key that needs to be skipped for Seq too new
286 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
287 i++) {
288 ASSERT_OK(Put("b", "f"));
289 }
290
291 iter->Seek(Slice("a"));
292 ASSERT_EQ(IterStatus(iter), "a->b");
293 iter->Next();
294 ASSERT_EQ(IterStatus(iter), "c->d");
295 iter->SeekForPrev(Slice("b"));
296 ASSERT_EQ(IterStatus(iter), "a->b");
297 iter->Next();
298 ASSERT_EQ(IterStatus(iter), "c->d");
299
300 delete iter;
301 }
302
303 TEST_P(DBIteratorTest, IterPrevWithNewerSeq) {
304 ASSERT_OK(Put("0", "0"));
305 dbfull()->Flush(FlushOptions());
306 ASSERT_OK(Put("a", "b"));
307 ASSERT_OK(Put("c", "d"));
308 ASSERT_OK(Put("d", "e"));
309 auto iter = NewIterator(ReadOptions());
310
311 // Create a key that needs to be skipped for Seq too new
312 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
313 i++) {
314 ASSERT_OK(Put("b", "f"));
315 }
316
317 iter->Seek(Slice("d"));
318 ASSERT_EQ(IterStatus(iter), "d->e");
319 iter->Prev();
320 ASSERT_EQ(IterStatus(iter), "c->d");
321 iter->Prev();
322 ASSERT_EQ(IterStatus(iter), "a->b");
323 iter->Prev();
324 iter->SeekForPrev(Slice("d"));
325 ASSERT_EQ(IterStatus(iter), "d->e");
326 iter->Prev();
327 ASSERT_EQ(IterStatus(iter), "c->d");
328 iter->Prev();
329 ASSERT_EQ(IterStatus(iter), "a->b");
330 iter->Prev();
331 delete iter;
332 }
333
334 TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) {
335 ASSERT_OK(Put("0", "0"));
336 dbfull()->Flush(FlushOptions());
337 ASSERT_OK(Put("a", "b"));
338 ASSERT_OK(Put("c", "d"));
339 ASSERT_OK(Put("e", "f"));
340 auto iter = NewIterator(ReadOptions());
341 auto iter2 = NewIterator(ReadOptions());
342 iter->Seek(Slice("c"));
343 iter2->SeekForPrev(Slice("d"));
344 ASSERT_EQ(IterStatus(iter), "c->d");
345 ASSERT_EQ(IterStatus(iter2), "c->d");
346
347 // Create a key that needs to be skipped for Seq too new
348 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
349 i++) {
350 ASSERT_OK(Put("b", "f"));
351 }
352
353 iter->Prev();
354 ASSERT_EQ(IterStatus(iter), "a->b");
355 iter->Prev();
356 iter2->Prev();
357 ASSERT_EQ(IterStatus(iter2), "a->b");
358 iter2->Prev();
359 delete iter;
360 delete iter2;
361 }
362
363 TEST_P(DBIteratorTest, IterEmpty) {
364 do {
365 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
366 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
367
368 iter->SeekToFirst();
369 ASSERT_EQ(IterStatus(iter), "(invalid)");
370
371 iter->SeekToLast();
372 ASSERT_EQ(IterStatus(iter), "(invalid)");
373
374 iter->Seek("foo");
375 ASSERT_EQ(IterStatus(iter), "(invalid)");
376
377 iter->SeekForPrev("foo");
378 ASSERT_EQ(IterStatus(iter), "(invalid)");
379
380 delete iter;
381 } while (ChangeCompactOptions());
382 }
383
384 TEST_P(DBIteratorTest, IterSingle) {
385 do {
386 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
387 ASSERT_OK(Put(1, "a", "va"));
388 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
389
390 iter->SeekToFirst();
391 ASSERT_EQ(IterStatus(iter), "a->va");
392 iter->Next();
393 ASSERT_EQ(IterStatus(iter), "(invalid)");
394 iter->SeekToFirst();
395 ASSERT_EQ(IterStatus(iter), "a->va");
396 iter->Prev();
397 ASSERT_EQ(IterStatus(iter), "(invalid)");
398
399 iter->SeekToLast();
400 ASSERT_EQ(IterStatus(iter), "a->va");
401 iter->Next();
402 ASSERT_EQ(IterStatus(iter), "(invalid)");
403 iter->SeekToLast();
404 ASSERT_EQ(IterStatus(iter), "a->va");
405 iter->Prev();
406 ASSERT_EQ(IterStatus(iter), "(invalid)");
407
408 iter->Seek("");
409 ASSERT_EQ(IterStatus(iter), "a->va");
410 iter->Next();
411 ASSERT_EQ(IterStatus(iter), "(invalid)");
412 iter->SeekForPrev("");
413 ASSERT_EQ(IterStatus(iter), "(invalid)");
414
415 iter->Seek("a");
416 ASSERT_EQ(IterStatus(iter), "a->va");
417 iter->Next();
418 ASSERT_EQ(IterStatus(iter), "(invalid)");
419 iter->SeekForPrev("a");
420 ASSERT_EQ(IterStatus(iter), "a->va");
421 iter->Prev();
422 ASSERT_EQ(IterStatus(iter), "(invalid)");
423
424 iter->Seek("b");
425 ASSERT_EQ(IterStatus(iter), "(invalid)");
426 iter->SeekForPrev("b");
427 ASSERT_EQ(IterStatus(iter), "a->va");
428 iter->Prev();
429 ASSERT_EQ(IterStatus(iter), "(invalid)");
430
431 delete iter;
432 } while (ChangeCompactOptions());
433 }
434
435 TEST_P(DBIteratorTest, IterMulti) {
436 do {
437 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
438 ASSERT_OK(Put(1, "a", "va"));
439 ASSERT_OK(Put(1, "b", "vb"));
440 ASSERT_OK(Put(1, "c", "vc"));
441 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
442
443 iter->SeekToFirst();
444 ASSERT_EQ(IterStatus(iter), "a->va");
445 iter->Next();
446 ASSERT_EQ(IterStatus(iter), "b->vb");
447 iter->Next();
448 ASSERT_EQ(IterStatus(iter), "c->vc");
449 iter->Next();
450 ASSERT_EQ(IterStatus(iter), "(invalid)");
451 iter->SeekToFirst();
452 ASSERT_EQ(IterStatus(iter), "a->va");
453 iter->Prev();
454 ASSERT_EQ(IterStatus(iter), "(invalid)");
455
456 iter->SeekToLast();
457 ASSERT_EQ(IterStatus(iter), "c->vc");
458 iter->Prev();
459 ASSERT_EQ(IterStatus(iter), "b->vb");
460 iter->Prev();
461 ASSERT_EQ(IterStatus(iter), "a->va");
462 iter->Prev();
463 ASSERT_EQ(IterStatus(iter), "(invalid)");
464 iter->SeekToLast();
465 ASSERT_EQ(IterStatus(iter), "c->vc");
466 iter->Next();
467 ASSERT_EQ(IterStatus(iter), "(invalid)");
468
469 iter->Seek("");
470 ASSERT_EQ(IterStatus(iter), "a->va");
471 iter->Seek("a");
472 ASSERT_EQ(IterStatus(iter), "a->va");
473 iter->Seek("ax");
474 ASSERT_EQ(IterStatus(iter), "b->vb");
475 iter->SeekForPrev("d");
476 ASSERT_EQ(IterStatus(iter), "c->vc");
477 iter->SeekForPrev("c");
478 ASSERT_EQ(IterStatus(iter), "c->vc");
479 iter->SeekForPrev("bx");
480 ASSERT_EQ(IterStatus(iter), "b->vb");
481
482 iter->Seek("b");
483 ASSERT_EQ(IterStatus(iter), "b->vb");
484 iter->Seek("z");
485 ASSERT_EQ(IterStatus(iter), "(invalid)");
486 iter->SeekForPrev("b");
487 ASSERT_EQ(IterStatus(iter), "b->vb");
488 iter->SeekForPrev("");
489 ASSERT_EQ(IterStatus(iter), "(invalid)");
490
491 // Switch from reverse to forward
492 iter->SeekToLast();
493 iter->Prev();
494 iter->Prev();
495 iter->Next();
496 ASSERT_EQ(IterStatus(iter), "b->vb");
497
498 // Switch from forward to reverse
499 iter->SeekToFirst();
500 iter->Next();
501 iter->Next();
502 iter->Prev();
503 ASSERT_EQ(IterStatus(iter), "b->vb");
504
505 // Make sure iter stays at snapshot
506 ASSERT_OK(Put(1, "a", "va2"));
507 ASSERT_OK(Put(1, "a2", "va3"));
508 ASSERT_OK(Put(1, "b", "vb2"));
509 ASSERT_OK(Put(1, "c", "vc2"));
510 ASSERT_OK(Delete(1, "b"));
511 iter->SeekToFirst();
512 ASSERT_EQ(IterStatus(iter), "a->va");
513 iter->Next();
514 ASSERT_EQ(IterStatus(iter), "b->vb");
515 iter->Next();
516 ASSERT_EQ(IterStatus(iter), "c->vc");
517 iter->Next();
518 ASSERT_EQ(IterStatus(iter), "(invalid)");
519 iter->SeekToLast();
520 ASSERT_EQ(IterStatus(iter), "c->vc");
521 iter->Prev();
522 ASSERT_EQ(IterStatus(iter), "b->vb");
523 iter->Prev();
524 ASSERT_EQ(IterStatus(iter), "a->va");
525 iter->Prev();
526 ASSERT_EQ(IterStatus(iter), "(invalid)");
527
528 delete iter;
529 } while (ChangeCompactOptions());
530 }
531
532 // Check that we can skip over a run of user keys
533 // by using reseek rather than sequential scan
534 TEST_P(DBIteratorTest, IterReseek) {
535 anon::OptionsOverride options_override;
536 options_override.skip_policy = kSkipNoSnapshot;
537 Options options = CurrentOptions(options_override);
538 options.max_sequential_skip_in_iterations = 3;
539 options.create_if_missing = true;
540 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
541 DestroyAndReopen(options);
542 CreateAndReopenWithCF({"pikachu"}, options);
543
544 // insert three keys with same userkey and verify that
545 // reseek is not invoked. For each of these test cases,
546 // verify that we can find the next key "b".
547 ASSERT_OK(Put(1, "a", "zero"));
548 ASSERT_OK(Put(1, "a", "one"));
549 ASSERT_OK(Put(1, "a", "two"));
550 ASSERT_OK(Put(1, "b", "bone"));
551 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
552 iter->SeekToFirst();
553 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
554 ASSERT_EQ(IterStatus(iter), "a->two");
555 iter->Next();
556 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
557 ASSERT_EQ(IterStatus(iter), "b->bone");
558 delete iter;
559
560 // insert a total of three keys with same userkey and verify
561 // that reseek is still not invoked.
562 ASSERT_OK(Put(1, "a", "three"));
563 iter = NewIterator(ReadOptions(), handles_[1]);
564 iter->SeekToFirst();
565 ASSERT_EQ(IterStatus(iter), "a->three");
566 iter->Next();
567 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
568 ASSERT_EQ(IterStatus(iter), "b->bone");
569 delete iter;
570
571 // insert a total of four keys with same userkey and verify
572 // that reseek is invoked.
573 ASSERT_OK(Put(1, "a", "four"));
574 iter = NewIterator(ReadOptions(), handles_[1]);
575 iter->SeekToFirst();
576 ASSERT_EQ(IterStatus(iter), "a->four");
577 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
578 iter->Next();
579 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
580 ASSERT_EQ(IterStatus(iter), "b->bone");
581 delete iter;
582
583 // Testing reverse iterator
584 // At this point, we have three versions of "a" and one version of "b".
585 // The reseek statistics is already at 1.
586 int num_reseeks = static_cast<int>(
587 TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
588
589 // Insert another version of b and assert that reseek is not invoked
590 ASSERT_OK(Put(1, "b", "btwo"));
591 iter = NewIterator(ReadOptions(), handles_[1]);
592 iter->SeekToLast();
593 ASSERT_EQ(IterStatus(iter), "b->btwo");
594 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
595 num_reseeks);
596 iter->Prev();
597 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
598 num_reseeks + 1);
599 ASSERT_EQ(IterStatus(iter), "a->four");
600 delete iter;
601
602 // insert two more versions of b. This makes a total of 4 versions
603 // of b and 4 versions of a.
604 ASSERT_OK(Put(1, "b", "bthree"));
605 ASSERT_OK(Put(1, "b", "bfour"));
606 iter = NewIterator(ReadOptions(), handles_[1]);
607 iter->SeekToLast();
608 ASSERT_EQ(IterStatus(iter), "b->bfour");
609 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
610 num_reseeks + 2);
611 iter->Prev();
612
613 // the previous Prev call should have invoked reseek
614 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
615 num_reseeks + 3);
616 ASSERT_EQ(IterStatus(iter), "a->four");
617 delete iter;
618 }
619
620 TEST_P(DBIteratorTest, IterSmallAndLargeMix) {
621 do {
622 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
623 ASSERT_OK(Put(1, "a", "va"));
624 ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
625 ASSERT_OK(Put(1, "c", "vc"));
626 ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
627 ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
628
629 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
630
631 iter->SeekToFirst();
632 ASSERT_EQ(IterStatus(iter), "a->va");
633 iter->Next();
634 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
635 iter->Next();
636 ASSERT_EQ(IterStatus(iter), "c->vc");
637 iter->Next();
638 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
639 iter->Next();
640 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
641 iter->Next();
642 ASSERT_EQ(IterStatus(iter), "(invalid)");
643
644 iter->SeekToLast();
645 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
646 iter->Prev();
647 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
648 iter->Prev();
649 ASSERT_EQ(IterStatus(iter), "c->vc");
650 iter->Prev();
651 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
652 iter->Prev();
653 ASSERT_EQ(IterStatus(iter), "a->va");
654 iter->Prev();
655 ASSERT_EQ(IterStatus(iter), "(invalid)");
656
657 delete iter;
658 } while (ChangeCompactOptions());
659 }
660
661 TEST_P(DBIteratorTest, IterMultiWithDelete) {
662 do {
663 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
664 ASSERT_OK(Put(1, "ka", "va"));
665 ASSERT_OK(Put(1, "kb", "vb"));
666 ASSERT_OK(Put(1, "kc", "vc"));
667 ASSERT_OK(Delete(1, "kb"));
668 ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
669
670 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
671 iter->Seek("kc");
672 ASSERT_EQ(IterStatus(iter), "kc->vc");
673 if (!CurrentOptions().merge_operator) {
674 // TODO: merge operator does not support backward iteration yet
675 if (kPlainTableAllBytesPrefix != option_config_ &&
676 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
677 kHashLinkList != option_config_ &&
678 kHashSkipList != option_config_) { // doesn't support SeekToLast
679 iter->Prev();
680 ASSERT_EQ(IterStatus(iter), "ka->va");
681 }
682 }
683 delete iter;
684 } while (ChangeOptions());
685 }
686
687 TEST_P(DBIteratorTest, IterPrevMaxSkip) {
688 do {
689 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
690 for (int i = 0; i < 2; i++) {
691 ASSERT_OK(Put(1, "key1", "v1"));
692 ASSERT_OK(Put(1, "key2", "v2"));
693 ASSERT_OK(Put(1, "key3", "v3"));
694 ASSERT_OK(Put(1, "key4", "v4"));
695 ASSERT_OK(Put(1, "key5", "v5"));
696 }
697
698 VerifyIterLast("key5->v5", 1);
699
700 ASSERT_OK(Delete(1, "key5"));
701 VerifyIterLast("key4->v4", 1);
702
703 ASSERT_OK(Delete(1, "key4"));
704 VerifyIterLast("key3->v3", 1);
705
706 ASSERT_OK(Delete(1, "key3"));
707 VerifyIterLast("key2->v2", 1);
708
709 ASSERT_OK(Delete(1, "key2"));
710 VerifyIterLast("key1->v1", 1);
711
712 ASSERT_OK(Delete(1, "key1"));
713 VerifyIterLast("(invalid)", 1);
714 } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
715 }
716
717 TEST_P(DBIteratorTest, IterWithSnapshot) {
718 anon::OptionsOverride options_override;
719 options_override.skip_policy = kSkipNoSnapshot;
720 do {
721 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
722 ASSERT_OK(Put(1, "key1", "val1"));
723 ASSERT_OK(Put(1, "key2", "val2"));
724 ASSERT_OK(Put(1, "key3", "val3"));
725 ASSERT_OK(Put(1, "key4", "val4"));
726 ASSERT_OK(Put(1, "key5", "val5"));
727
728 const Snapshot* snapshot = db_->GetSnapshot();
729 ReadOptions options;
730 options.snapshot = snapshot;
731 Iterator* iter = NewIterator(options, handles_[1]);
732
733 ASSERT_OK(Put(1, "key0", "val0"));
734 // Put more values after the snapshot
735 ASSERT_OK(Put(1, "key100", "val100"));
736 ASSERT_OK(Put(1, "key101", "val101"));
737
738 iter->Seek("key5");
739 ASSERT_EQ(IterStatus(iter), "key5->val5");
740 if (!CurrentOptions().merge_operator) {
741 // TODO: merge operator does not support backward iteration yet
742 if (kPlainTableAllBytesPrefix != option_config_ &&
743 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
744 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
745 iter->Prev();
746 ASSERT_EQ(IterStatus(iter), "key4->val4");
747 iter->Prev();
748 ASSERT_EQ(IterStatus(iter), "key3->val3");
749
750 iter->Next();
751 ASSERT_EQ(IterStatus(iter), "key4->val4");
752 iter->Next();
753 ASSERT_EQ(IterStatus(iter), "key5->val5");
754 }
755 iter->Next();
756 ASSERT_TRUE(!iter->Valid());
757 }
758
759 if (!CurrentOptions().merge_operator) {
760 // TODO(gzh): merge operator does not support backward iteration yet
761 if (kPlainTableAllBytesPrefix != option_config_ &&
762 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
763 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
764 iter->SeekForPrev("key1");
765 ASSERT_EQ(IterStatus(iter), "key1->val1");
766 iter->Next();
767 ASSERT_EQ(IterStatus(iter), "key2->val2");
768 iter->Next();
769 ASSERT_EQ(IterStatus(iter), "key3->val3");
770 iter->Prev();
771 ASSERT_EQ(IterStatus(iter), "key2->val2");
772 iter->Prev();
773 ASSERT_EQ(IterStatus(iter), "key1->val1");
774 iter->Prev();
775 ASSERT_TRUE(!iter->Valid());
776 }
777 }
778 db_->ReleaseSnapshot(snapshot);
779 delete iter;
780 } while (ChangeOptions());
781 }
782
783 TEST_P(DBIteratorTest, IteratorPinsRef) {
784 do {
785 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
786 Put(1, "foo", "hello");
787
788 // Get iterator that will yield the current contents of the DB.
789 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
790
791 // Write to force compactions
792 Put(1, "foo", "newvalue1");
793 for (int i = 0; i < 100; i++) {
794 // 100K values
795 ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
796 }
797 Put(1, "foo", "newvalue2");
798
799 iter->SeekToFirst();
800 ASSERT_TRUE(iter->Valid());
801 ASSERT_EQ("foo", iter->key().ToString());
802 ASSERT_EQ("hello", iter->value().ToString());
803 iter->Next();
804 ASSERT_TRUE(!iter->Valid());
805 delete iter;
806 } while (ChangeCompactOptions());
807 }
808
809 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) {
810 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
811
812 Put(1, "foo", "delete-cf-then-delete-iter");
813 Put(1, "hello", "value2");
814
815 ColumnFamilyHandle* cf = handles_[1];
816 ReadOptions ro;
817
818 auto* iter = db_->NewIterator(ro, cf);
819 iter->SeekToFirst();
820 ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter");
821
822 // delete CF handle
823 db_->DestroyColumnFamilyHandle(cf);
824 handles_.erase(std::begin(handles_) + 1);
825
826 // delete Iterator after CF handle is deleted
827 iter->Next();
828 ASSERT_EQ(IterStatus(iter), "hello->value2");
829 delete iter;
830 }
831
832 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) {
833 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
834
835 Put(1, "foo", "drop-cf-then-delete-iter");
836
837 ReadOptions ro;
838 ColumnFamilyHandle* cf = handles_[1];
839
840 auto* iter = db_->NewIterator(ro, cf);
841 iter->SeekToFirst();
842 ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter");
843
844 // drop and delete CF
845 db_->DropColumnFamily(cf);
846 db_->DestroyColumnFamilyHandle(cf);
847 handles_.erase(std::begin(handles_) + 1);
848
849 // delete Iterator after CF handle is dropped
850 delete iter;
851 }
852
853 // SetOptions not defined in ROCKSDB LITE
854 #ifndef ROCKSDB_LITE
855 TEST_P(DBIteratorTest, DBIteratorBoundTest) {
856 Options options = CurrentOptions();
857 options.env = env_;
858 options.create_if_missing = true;
859
860 options.prefix_extractor = nullptr;
861 DestroyAndReopen(options);
862 ASSERT_OK(Put("a", "0"));
863 ASSERT_OK(Put("foo", "bar"));
864 ASSERT_OK(Put("foo1", "bar1"));
865 ASSERT_OK(Put("g1", "0"));
866
867 // testing basic case with no iterate_upper_bound and no prefix_extractor
868 {
869 ReadOptions ro;
870 ro.iterate_upper_bound = nullptr;
871
872 std::unique_ptr<Iterator> iter(NewIterator(ro));
873
874 iter->Seek("foo");
875
876 ASSERT_TRUE(iter->Valid());
877 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
878
879 iter->Next();
880 ASSERT_TRUE(iter->Valid());
881 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
882
883 iter->Next();
884 ASSERT_TRUE(iter->Valid());
885 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
886
887 iter->SeekForPrev("g1");
888
889 ASSERT_TRUE(iter->Valid());
890 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
891
892 iter->Prev();
893 ASSERT_TRUE(iter->Valid());
894 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
895
896 iter->Prev();
897 ASSERT_TRUE(iter->Valid());
898 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
899 }
900
901 // testing iterate_upper_bound and forward iterator
902 // to make sure it stops at bound
903 {
904 ReadOptions ro;
905 // iterate_upper_bound points beyond the last expected entry
906 Slice prefix("foo2");
907 ro.iterate_upper_bound = &prefix;
908
909 std::unique_ptr<Iterator> iter(NewIterator(ro));
910
911 iter->Seek("foo");
912
913 ASSERT_TRUE(iter->Valid());
914 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
915
916 iter->Next();
917 ASSERT_TRUE(iter->Valid());
918 ASSERT_EQ(iter->key().compare(("foo1")), 0);
919
920 iter->Next();
921 // should stop here...
922 ASSERT_TRUE(!iter->Valid());
923 }
924 // Testing SeekToLast with iterate_upper_bound set
925 {
926 ReadOptions ro;
927
928 Slice prefix("foo");
929 ro.iterate_upper_bound = &prefix;
930
931 std::unique_ptr<Iterator> iter(NewIterator(ro));
932
933 iter->SeekToLast();
934 ASSERT_TRUE(iter->Valid());
935 ASSERT_EQ(iter->key().compare(Slice("a")), 0);
936 }
937
938 // prefix is the first letter of the key
939 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
940 ASSERT_OK(Put("a", "0"));
941 ASSERT_OK(Put("foo", "bar"));
942 ASSERT_OK(Put("foo1", "bar1"));
943 ASSERT_OK(Put("g1", "0"));
944
945 // testing with iterate_upper_bound and prefix_extractor
946 // Seek target and iterate_upper_bound are not is same prefix
947 // This should be an error
948 {
949 ReadOptions ro;
950 Slice upper_bound("g");
951 ro.iterate_upper_bound = &upper_bound;
952
953 std::unique_ptr<Iterator> iter(NewIterator(ro));
954
955 iter->Seek("foo");
956
957 ASSERT_TRUE(iter->Valid());
958 ASSERT_EQ("foo", iter->key().ToString());
959
960 iter->Next();
961 ASSERT_TRUE(iter->Valid());
962 ASSERT_EQ("foo1", iter->key().ToString());
963
964 iter->Next();
965 ASSERT_TRUE(!iter->Valid());
966 }
967
968 // testing that iterate_upper_bound prevents iterating over deleted items
969 // if the bound has already reached
970 {
971 options.prefix_extractor = nullptr;
972 DestroyAndReopen(options);
973 ASSERT_OK(Put("a", "0"));
974 ASSERT_OK(Put("b", "0"));
975 ASSERT_OK(Put("b1", "0"));
976 ASSERT_OK(Put("c", "0"));
977 ASSERT_OK(Put("d", "0"));
978 ASSERT_OK(Put("e", "0"));
979 ASSERT_OK(Delete("c"));
980 ASSERT_OK(Delete("d"));
981
982 // base case with no bound
983 ReadOptions ro;
984 ro.iterate_upper_bound = nullptr;
985
986 std::unique_ptr<Iterator> iter(NewIterator(ro));
987
988 iter->Seek("b");
989 ASSERT_TRUE(iter->Valid());
990 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
991
992 iter->Next();
993 ASSERT_TRUE(iter->Valid());
994 ASSERT_EQ(iter->key().compare(("b1")), 0);
995
996 get_perf_context()->Reset();
997 iter->Next();
998
999 ASSERT_TRUE(iter->Valid());
1000 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 2);
1001
1002 // now testing with iterate_bound
1003 Slice prefix("c");
1004 ro.iterate_upper_bound = &prefix;
1005
1006 iter.reset(NewIterator(ro));
1007
1008 get_perf_context()->Reset();
1009
1010 iter->Seek("b");
1011 ASSERT_TRUE(iter->Valid());
1012 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
1013
1014 iter->Next();
1015 ASSERT_TRUE(iter->Valid());
1016 ASSERT_EQ(iter->key().compare(("b1")), 0);
1017
1018 iter->Next();
1019 // the iteration should stop as soon as the bound key is reached
1020 // even though the key is deleted
1021 // hence internal_delete_skipped_count should be 0
1022 ASSERT_TRUE(!iter->Valid());
1023 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 0);
1024 }
1025 }
1026
1027 TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
1028 Options options = CurrentOptions();
1029 options.env = env_;
1030 options.create_if_missing = true;
1031 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1032 options.prefix_extractor = nullptr;
1033 DestroyAndReopen(options);
1034 ASSERT_OK(Put("a", "0"));
1035 ASSERT_OK(Put("z", "0"));
1036 ASSERT_OK(Flush());
1037 ASSERT_OK(Put("foo1", "bar1"));
1038 ASSERT_OK(Put("foo2", "bar2"));
1039 ASSERT_OK(Put("foo3", "bar3"));
1040 ASSERT_OK(Put("foo4", "bar4"));
1041
1042 {
1043 std::string up_str = "foo5";
1044 Slice up(up_str);
1045 ReadOptions ro;
1046 ro.iterate_upper_bound = &up;
1047 std::unique_ptr<Iterator> iter(NewIterator(ro));
1048
1049 iter->Seek("foo1");
1050 ASSERT_TRUE(iter->Valid());
1051 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1052
1053 uint64_t prev_block_cache_hit =
1054 TestGetTickerCount(options, BLOCK_CACHE_HIT);
1055 uint64_t prev_block_cache_miss =
1056 TestGetTickerCount(options, BLOCK_CACHE_MISS);
1057
1058 ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0);
1059
1060 iter->Seek("foo4");
1061 ASSERT_TRUE(iter->Valid());
1062 ASSERT_EQ(iter->key().compare(Slice("foo4")), 0);
1063 ASSERT_EQ(prev_block_cache_hit,
1064 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1065 ASSERT_EQ(prev_block_cache_miss,
1066 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1067
1068 iter->Seek("foo2");
1069 ASSERT_TRUE(iter->Valid());
1070 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1071 iter->Next();
1072 ASSERT_TRUE(iter->Valid());
1073 ASSERT_EQ(iter->key().compare(Slice("foo3")), 0);
1074 ASSERT_EQ(prev_block_cache_hit,
1075 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1076 ASSERT_EQ(prev_block_cache_miss,
1077 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1078 }
1079 }
1080 #endif
1081
1082 TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
1083 for (auto format_version : {2, 3, 4}) {
1084 int upper_bound_hits = 0;
1085 Options options = CurrentOptions();
1086 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1087 "BlockBasedTableIterator:out_of_bound",
1088 [&upper_bound_hits](void*) { upper_bound_hits++; });
1089 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1090 options.env = env_;
1091 options.create_if_missing = true;
1092 options.prefix_extractor = nullptr;
1093 BlockBasedTableOptions table_options;
1094 table_options.format_version = format_version;
1095 table_options.flush_block_policy_factory =
1096 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1097 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1098
1099 DestroyAndReopen(options);
1100 ASSERT_OK(Put("foo1", "bar1"));
1101 ASSERT_OK(Put("foo2", "bar2"));
1102 ASSERT_OK(Put("foo4", "bar4"));
1103 ASSERT_OK(Flush());
1104
1105 Slice ub("foo3");
1106 ReadOptions ro;
1107 ro.iterate_upper_bound = &ub;
1108
1109 std::unique_ptr<Iterator> iter(NewIterator(ro));
1110
1111 iter->Seek("foo");
1112 ASSERT_TRUE(iter->Valid());
1113 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1114 ASSERT_EQ(upper_bound_hits, 0);
1115
1116 iter->Next();
1117 ASSERT_TRUE(iter->Valid());
1118 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1119 ASSERT_EQ(upper_bound_hits, 0);
1120
1121 iter->Next();
1122 ASSERT_FALSE(iter->Valid());
1123 ASSERT_EQ(upper_bound_hits, 1);
1124 }
1125 }
1126
1127 // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
1128 // they don't do unnecessary block reads.
1129 TEST_P(DBIteratorTest, IndexWithFirstKey) {
1130 for (int tailing = 0; tailing < 2; ++tailing) {
1131 SCOPED_TRACE("tailing = " + std::to_string(tailing));
1132 Options options = CurrentOptions();
1133 options.env = env_;
1134 options.create_if_missing = true;
1135 options.prefix_extractor = nullptr;
1136 options.merge_operator = MergeOperators::CreateStringAppendOperator();
1137 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1138 Statistics* stats = options.statistics.get();
1139 BlockBasedTableOptions table_options;
1140 table_options.index_type =
1141 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1142 table_options.index_shortening =
1143 BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1144 table_options.flush_block_policy_factory =
1145 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1146 table_options.block_cache =
1147 NewLRUCache(8000); // fits all blocks and their cache metadata overhead
1148 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1149
1150 DestroyAndReopen(options);
1151 ASSERT_OK(Merge("a1", "x1"));
1152 ASSERT_OK(Merge("b1", "y1"));
1153 ASSERT_OK(Merge("c0", "z1"));
1154 ASSERT_OK(Flush());
1155 ASSERT_OK(Merge("a2", "x2"));
1156 ASSERT_OK(Merge("b2", "y2"));
1157 ASSERT_OK(Merge("c0", "z2"));
1158 ASSERT_OK(Flush());
1159 ASSERT_OK(Merge("a3", "x3"));
1160 ASSERT_OK(Merge("b3", "y3"));
1161 ASSERT_OK(Merge("c3", "z3"));
1162 ASSERT_OK(Flush());
1163
1164 // Block cache is not important for this test.
1165 // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
1166 // available way of counting block accesses.
1167
1168 ReadOptions ropt;
1169 ropt.tailing = tailing;
1170 std::unique_ptr<Iterator> iter(NewIterator(ropt));
1171
1172 ropt.read_tier = ReadTier::kBlockCacheTier;
1173 std::unique_ptr<Iterator> nonblocking_iter(NewIterator(ropt));
1174
1175 iter->Seek("b10");
1176 ASSERT_TRUE(iter->Valid());
1177 EXPECT_EQ("b2", iter->key().ToString());
1178 EXPECT_EQ("y2", iter->value().ToString());
1179 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1180
1181 // The cache-only iterator should succeed too, using the blocks pulled into
1182 // the cache by the previous iterator.
1183 nonblocking_iter->Seek("b10");
1184 ASSERT_TRUE(nonblocking_iter->Valid());
1185 EXPECT_EQ("b2", nonblocking_iter->key().ToString());
1186 EXPECT_EQ("y2", nonblocking_iter->value().ToString());
1187 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1188
1189 // ... but it shouldn't be able to step forward since the next block is
1190 // not in cache yet.
1191 nonblocking_iter->Next();
1192 ASSERT_FALSE(nonblocking_iter->Valid());
1193 ASSERT_TRUE(nonblocking_iter->status().IsIncomplete());
1194
1195 // ... nor should a seek to the next key succeed.
1196 nonblocking_iter->Seek("b20");
1197 ASSERT_FALSE(nonblocking_iter->Valid());
1198 ASSERT_TRUE(nonblocking_iter->status().IsIncomplete());
1199
1200 iter->Next();
1201 ASSERT_TRUE(iter->Valid());
1202 EXPECT_EQ("b3", iter->key().ToString());
1203 EXPECT_EQ("y3", iter->value().ToString());
1204 EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1205 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1206
1207 // After the blocking iterator loaded the next block, the nonblocking
1208 // iterator's seek should succeed.
1209 nonblocking_iter->Seek("b20");
1210 ASSERT_TRUE(nonblocking_iter->Valid());
1211 EXPECT_EQ("b3", nonblocking_iter->key().ToString());
1212 EXPECT_EQ("y3", nonblocking_iter->value().ToString());
1213 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1214
1215 iter->Seek("c0");
1216 ASSERT_TRUE(iter->Valid());
1217 EXPECT_EQ("c0", iter->key().ToString());
1218 EXPECT_EQ("z1,z2", iter->value().ToString());
1219 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1220 EXPECT_EQ(6, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1221
1222 iter->Next();
1223 ASSERT_TRUE(iter->Valid());
1224 EXPECT_EQ("c3", iter->key().ToString());
1225 EXPECT_EQ("z3", iter->value().ToString());
1226 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1227 EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1228
1229 iter.reset();
1230
1231 // Enable iterate_upper_bound and check that iterator is not trying to read
1232 // blocks that are fully above upper bound.
1233 std::string ub = "b3";
1234 Slice ub_slice(ub);
1235 ropt.iterate_upper_bound = &ub_slice;
1236 iter.reset(NewIterator(ropt));
1237
1238 iter->Seek("b2");
1239 ASSERT_TRUE(iter->Valid());
1240 EXPECT_EQ("b2", iter->key().ToString());
1241 EXPECT_EQ("y2", iter->value().ToString());
1242 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1243 EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1244
1245 iter->Next();
1246 ASSERT_FALSE(iter->Valid());
1247 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1248 EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1249 }
1250 }
1251
1252 TEST_P(DBIteratorTest, IndexWithFirstKeyGet) {
1253 Options options = CurrentOptions();
1254 options.env = env_;
1255 options.create_if_missing = true;
1256 options.prefix_extractor = nullptr;
1257 options.merge_operator = MergeOperators::CreateStringAppendOperator();
1258 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1259 Statistics* stats = options.statistics.get();
1260 BlockBasedTableOptions table_options;
1261 table_options.index_type =
1262 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1263 table_options.index_shortening =
1264 BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1265 table_options.flush_block_policy_factory =
1266 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1267 table_options.block_cache = NewLRUCache(1000); // fits all blocks
1268 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1269
1270 DestroyAndReopen(options);
1271 ASSERT_OK(Merge("a", "x1"));
1272 ASSERT_OK(Merge("c", "y1"));
1273 ASSERT_OK(Merge("e", "z1"));
1274 ASSERT_OK(Flush());
1275 ASSERT_OK(Merge("c", "y2"));
1276 ASSERT_OK(Merge("e", "z2"));
1277 ASSERT_OK(Flush());
1278
1279 // Get() between blocks shouldn't read any blocks.
1280 ASSERT_EQ("NOT_FOUND", Get("b"));
1281 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1282 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1283
1284 // Get() of an existing key shouldn't read any unnecessary blocks when there's
1285 // only one key per block.
1286
1287 ASSERT_EQ("y1,y2", Get("c"));
1288 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1289 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1290
1291 ASSERT_EQ("x1", Get("a"));
1292 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1293 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1294
1295 EXPECT_EQ(std::vector<std::string>({"NOT_FOUND", "z1,z2"}),
1296 MultiGet({"b", "e"}));
1297 }
1298
1299 // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1300 // return the biggest key which is smaller than the seek key.
1301 TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) {
1302 Options options;
1303 options.create_if_missing = true;
1304 options.merge_operator = MergeOperators::CreatePutOperator();
1305 options.env = env_;
1306 DestroyAndReopen(options);
1307
1308 // write three entries with different keys using Merge()
1309 WriteOptions wopts;
1310 db_->Merge(wopts, "1", "data1");
1311 db_->Merge(wopts, "2", "data2");
1312 db_->Merge(wopts, "3", "data3");
1313
1314 std::unique_ptr<Iterator> it(NewIterator(ReadOptions()));
1315
1316 it->Seek("2");
1317 ASSERT_TRUE(it->Valid());
1318 ASSERT_EQ("2", it->key().ToString());
1319
1320 it->Prev();
1321 ASSERT_TRUE(it->Valid());
1322 ASSERT_EQ("1", it->key().ToString());
1323
1324 it->SeekForPrev("1");
1325 ASSERT_TRUE(it->Valid());
1326 ASSERT_EQ("1", it->key().ToString());
1327
1328 it->Next();
1329 ASSERT_TRUE(it->Valid());
1330 ASSERT_EQ("2", it->key().ToString());
1331 }
1332
1333 class DBIteratorTestForPinnedData : public DBIteratorTest {
1334 public:
1335 enum TestConfig {
1336 NORMAL,
1337 CLOSE_AND_OPEN,
1338 COMPACT_BEFORE_READ,
1339 FLUSH_EVERY_1000,
1340 MAX
1341 };
1342 DBIteratorTestForPinnedData() : DBIteratorTest() {}
1343 void PinnedDataIteratorRandomized(TestConfig run_config) {
1344 // Generate Random data
1345 Random rnd(301);
1346
1347 int puts = 100000;
1348 int key_pool = static_cast<int>(puts * 0.7);
1349 int key_size = 100;
1350 int val_size = 1000;
1351 int seeks_percentage = 20; // 20% of keys will be used to test seek()
1352 int delete_percentage = 20; // 20% of keys will be deleted
1353 int merge_percentage = 20; // 20% of keys will be added using Merge()
1354
1355 Options options = CurrentOptions();
1356 BlockBasedTableOptions table_options;
1357 table_options.use_delta_encoding = false;
1358 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1359 options.merge_operator = MergeOperators::CreatePutOperator();
1360 DestroyAndReopen(options);
1361
1362 std::vector<std::string> generated_keys(key_pool);
1363 for (int i = 0; i < key_pool; i++) {
1364 generated_keys[i] = rnd.RandomString(key_size);
1365 }
1366
1367 std::map<std::string, std::string> true_data;
1368 std::vector<std::string> random_keys;
1369 std::vector<std::string> deleted_keys;
1370 for (int i = 0; i < puts; i++) {
1371 auto& k = generated_keys[rnd.Next() % key_pool];
1372 auto v = rnd.RandomString(val_size);
1373
1374 // Insert data to true_data map and to DB
1375 true_data[k] = v;
1376 if (rnd.PercentTrue(merge_percentage)) {
1377 ASSERT_OK(db_->Merge(WriteOptions(), k, v));
1378 } else {
1379 ASSERT_OK(Put(k, v));
1380 }
1381
1382 // Pick random keys to be used to test Seek()
1383 if (rnd.PercentTrue(seeks_percentage)) {
1384 random_keys.push_back(k);
1385 }
1386
1387 // Delete some random keys
1388 if (rnd.PercentTrue(delete_percentage)) {
1389 deleted_keys.push_back(k);
1390 true_data.erase(k);
1391 ASSERT_OK(Delete(k));
1392 }
1393
1394 if (run_config == TestConfig::FLUSH_EVERY_1000) {
1395 if (i && i % 1000 == 0) {
1396 Flush();
1397 }
1398 }
1399 }
1400
1401 if (run_config == TestConfig::CLOSE_AND_OPEN) {
1402 Close();
1403 Reopen(options);
1404 } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
1405 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
1406 }
1407
1408 ReadOptions ro;
1409 ro.pin_data = true;
1410 auto iter = NewIterator(ro);
1411
1412 {
1413 // Test Seek to random keys
1414 std::vector<Slice> keys_slices;
1415 std::vector<std::string> true_keys;
1416 for (auto& k : random_keys) {
1417 iter->Seek(k);
1418 if (!iter->Valid()) {
1419 ASSERT_EQ(true_data.lower_bound(k), true_data.end());
1420 continue;
1421 }
1422 std::string prop_value;
1423 ASSERT_OK(
1424 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1425 ASSERT_EQ("1", prop_value);
1426 keys_slices.push_back(iter->key());
1427 true_keys.push_back(true_data.lower_bound(k)->first);
1428 }
1429
1430 for (size_t i = 0; i < keys_slices.size(); i++) {
1431 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1432 }
1433 }
1434
1435 {
1436 // Test SeekForPrev to random keys
1437 std::vector<Slice> keys_slices;
1438 std::vector<std::string> true_keys;
1439 for (auto& k : random_keys) {
1440 iter->SeekForPrev(k);
1441 if (!iter->Valid()) {
1442 ASSERT_EQ(true_data.upper_bound(k), true_data.begin());
1443 continue;
1444 }
1445 std::string prop_value;
1446 ASSERT_OK(
1447 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1448 ASSERT_EQ("1", prop_value);
1449 keys_slices.push_back(iter->key());
1450 true_keys.push_back((--true_data.upper_bound(k))->first);
1451 }
1452
1453 for (size_t i = 0; i < keys_slices.size(); i++) {
1454 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1455 }
1456 }
1457
1458 {
1459 // Test iterating all data forward
1460 std::vector<Slice> all_keys;
1461 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1462 std::string prop_value;
1463 ASSERT_OK(
1464 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1465 ASSERT_EQ("1", prop_value);
1466 all_keys.push_back(iter->key());
1467 }
1468 ASSERT_EQ(all_keys.size(), true_data.size());
1469
1470 // Verify that all keys slices are valid
1471 auto data_iter = true_data.begin();
1472 for (size_t i = 0; i < all_keys.size(); i++) {
1473 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1474 data_iter++;
1475 }
1476 }
1477
1478 {
1479 // Test iterating all data backward
1480 std::vector<Slice> all_keys;
1481 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1482 std::string prop_value;
1483 ASSERT_OK(
1484 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1485 ASSERT_EQ("1", prop_value);
1486 all_keys.push_back(iter->key());
1487 }
1488 ASSERT_EQ(all_keys.size(), true_data.size());
1489
1490 // Verify that all keys slices are valid (backward)
1491 auto data_iter = true_data.rbegin();
1492 for (size_t i = 0; i < all_keys.size(); i++) {
1493 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1494 data_iter++;
1495 }
1496 }
1497
1498 delete iter;
1499 }
1500 };
1501
1502 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) {
1503 PinnedDataIteratorRandomized(TestConfig::NORMAL);
1504 }
1505
1506 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) {
1507 PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN);
1508 }
1509
1510 TEST_P(DBIteratorTestForPinnedData,
1511 PinnedDataIteratorRandomizedCompactBeforeRead) {
1512 PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ);
1513 }
1514
1515 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) {
1516 PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000);
1517 }
1518
1519 #ifndef ROCKSDB_LITE
1520 TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
1521 Options options = CurrentOptions();
1522 BlockBasedTableOptions table_options;
1523 table_options.use_delta_encoding = false;
1524 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1525 options.disable_auto_compactions = true;
1526 options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb
1527 DestroyAndReopen(options);
1528
1529 std::map<std::string, std::string> true_data;
1530
1531 // Generate 4 sst files in L2
1532 Random rnd(301);
1533 for (int i = 1; i <= 1000; i++) {
1534 std::string k = Key(i * 3);
1535 std::string v = rnd.RandomString(100);
1536 ASSERT_OK(Put(k, v));
1537 true_data[k] = v;
1538 if (i % 250 == 0) {
1539 ASSERT_OK(Flush());
1540 }
1541 }
1542 ASSERT_EQ(FilesPerLevel(0), "4");
1543 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1544 ASSERT_EQ(FilesPerLevel(0), "0,4");
1545
1546 // Generate 4 sst files in L0
1547 for (int i = 1; i <= 1000; i++) {
1548 std::string k = Key(i * 2);
1549 std::string v = rnd.RandomString(100);
1550 ASSERT_OK(Put(k, v));
1551 true_data[k] = v;
1552 if (i % 250 == 0) {
1553 ASSERT_OK(Flush());
1554 }
1555 }
1556 ASSERT_EQ(FilesPerLevel(0), "4,4");
1557
1558 // Add some keys/values in memtables
1559 for (int i = 1; i <= 1000; i++) {
1560 std::string k = Key(i);
1561 std::string v = rnd.RandomString(100);
1562 ASSERT_OK(Put(k, v));
1563 true_data[k] = v;
1564 }
1565 ASSERT_EQ(FilesPerLevel(0), "4,4");
1566
1567 ReadOptions ro;
1568 ro.pin_data = true;
1569 auto iter = NewIterator(ro);
1570
1571 std::vector<std::pair<Slice, std::string>> results;
1572 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1573 std::string prop_value;
1574 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1575 ASSERT_EQ("1", prop_value);
1576 results.emplace_back(iter->key(), iter->value().ToString());
1577 }
1578
1579 ASSERT_EQ(results.size(), true_data.size());
1580 auto data_iter = true_data.begin();
1581 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1582 auto& kv = results[i];
1583 ASSERT_EQ(kv.first, data_iter->first);
1584 ASSERT_EQ(kv.second, data_iter->second);
1585 }
1586
1587 delete iter;
1588 }
1589 #endif
1590
1591 TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) {
1592 Options options = CurrentOptions();
1593 BlockBasedTableOptions table_options;
1594 table_options.use_delta_encoding = false;
1595 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1596 options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1597 DestroyAndReopen(options);
1598
1599 std::string numbers[7];
1600 for (int val = 0; val <= 6; val++) {
1601 PutFixed64(numbers + val, val);
1602 }
1603
1604 // +1 all keys in range [ 0 => 999]
1605 for (int i = 0; i < 1000; i++) {
1606 WriteOptions wo;
1607 ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
1608 }
1609
1610 // +2 all keys divisible by 2 in range [ 0 => 999]
1611 for (int i = 0; i < 1000; i += 2) {
1612 WriteOptions wo;
1613 ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
1614 }
1615
1616 // +3 all keys divisible by 5 in range [ 0 => 999]
1617 for (int i = 0; i < 1000; i += 5) {
1618 WriteOptions wo;
1619 ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
1620 }
1621
1622 ReadOptions ro;
1623 ro.pin_data = true;
1624 auto iter = NewIterator(ro);
1625
1626 std::vector<std::pair<Slice, std::string>> results;
1627 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1628 std::string prop_value;
1629 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1630 ASSERT_EQ("1", prop_value);
1631 results.emplace_back(iter->key(), iter->value().ToString());
1632 }
1633
1634 ASSERT_EQ(results.size(), 1000);
1635 for (size_t i = 0; i < results.size(); i++) {
1636 auto& kv = results[i];
1637 ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
1638 int expected_val = 1;
1639 if (i % 2 == 0) {
1640 expected_val += 2;
1641 }
1642 if (i % 5 == 0) {
1643 expected_val += 3;
1644 }
1645 ASSERT_EQ(kv.second, numbers[expected_val]);
1646 }
1647
1648 delete iter;
1649 }
1650
1651 TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
1652 Options options = CurrentOptions();
1653 BlockBasedTableOptions table_options;
1654 table_options.use_delta_encoding = false;
1655 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1656 options.write_buffer_size = 100000;
1657 DestroyAndReopen(options);
1658
1659 Random rnd(301);
1660
1661 std::map<std::string, std::string> true_data;
1662 for (int i = 0; i < 1000; i++) {
1663 std::string k = rnd.RandomString(10);
1664 std::string v = rnd.RandomString(1000);
1665 ASSERT_OK(Put(k, v));
1666 true_data[k] = v;
1667 }
1668
1669 ReadOptions ro;
1670 ro.pin_data = true;
1671 auto iter = NewIterator(ro);
1672
1673 // Delete 50% of the keys and update the other 50%
1674 for (auto& kv : true_data) {
1675 if (rnd.OneIn(2)) {
1676 ASSERT_OK(Delete(kv.first));
1677 } else {
1678 std::string new_val = rnd.RandomString(1000);
1679 ASSERT_OK(Put(kv.first, new_val));
1680 }
1681 }
1682
1683 std::vector<std::pair<Slice, std::string>> results;
1684 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1685 std::string prop_value;
1686 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1687 ASSERT_EQ("1", prop_value);
1688 results.emplace_back(iter->key(), iter->value().ToString());
1689 }
1690
1691 auto data_iter = true_data.begin();
1692 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1693 auto& kv = results[i];
1694 ASSERT_EQ(kv.first, data_iter->first);
1695 ASSERT_EQ(kv.second, data_iter->second);
1696 }
1697
1698 delete iter;
1699 }
1700
1701 class SliceTransformLimitedDomainGeneric : public SliceTransform {
1702 const char* Name() const override {
1703 return "SliceTransformLimitedDomainGeneric";
1704 }
1705
1706 Slice Transform(const Slice& src) const override {
1707 return Slice(src.data(), 1);
1708 }
1709
1710 bool InDomain(const Slice& src) const override {
1711 // prefix will be x????
1712 return src.size() >= 1;
1713 }
1714
1715 bool InRange(const Slice& dst) const override {
1716 // prefix will be x????
1717 return dst.size() == 1;
1718 }
1719 };
1720
1721 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) {
1722 Options options = CurrentOptions();
1723 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1724 options.disable_auto_compactions = true;
1725 // Enable prefix bloom for SST files
1726 BlockBasedTableOptions table_options;
1727 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1728 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1729 DestroyAndReopen(options);
1730
1731 ASSERT_OK(Put("a1", "va1"));
1732 ASSERT_OK(Put("a2", "va2"));
1733 ASSERT_OK(Put("a3", "va3"));
1734 ASSERT_OK(Flush());
1735
1736 ASSERT_OK(Put("b1", "vb1"));
1737 ASSERT_OK(Put("b2", "vb2"));
1738 ASSERT_OK(Put("b3", "vb3"));
1739 ASSERT_OK(Flush());
1740
1741 ASSERT_OK(Put("b4", "vb4"));
1742 ASSERT_OK(Put("d1", "vd1"));
1743 ASSERT_OK(Put("d2", "vd2"));
1744 ASSERT_OK(Put("d4", "vd4"));
1745 ASSERT_OK(Flush());
1746
1747 MoveFilesToLevel(1);
1748 {
1749 ReadOptions ro;
1750 Iterator* iter = NewIterator(ro);
1751
1752 iter->SeekForPrev("a4");
1753 ASSERT_EQ(iter->key().ToString(), "a3");
1754 ASSERT_EQ(iter->value().ToString(), "va3");
1755
1756 iter->SeekForPrev("c2");
1757 ASSERT_EQ(iter->key().ToString(), "b3");
1758 iter->SeekForPrev("d3");
1759 ASSERT_EQ(iter->key().ToString(), "d2");
1760 iter->SeekForPrev("b5");
1761 ASSERT_EQ(iter->key().ToString(), "b4");
1762 delete iter;
1763 }
1764
1765 {
1766 ReadOptions ro;
1767 ro.prefix_same_as_start = true;
1768 Iterator* iter = NewIterator(ro);
1769 iter->SeekForPrev("c2");
1770 ASSERT_TRUE(!iter->Valid());
1771 delete iter;
1772 }
1773 }
1774
1775 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) {
1776 Options options = CurrentOptions();
1777 options.prefix_extractor =
1778 std::make_shared<SliceTransformLimitedDomainGeneric>();
1779 options.disable_auto_compactions = true;
1780 // Enable prefix bloom for SST files
1781 BlockBasedTableOptions table_options;
1782 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1783 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1784 DestroyAndReopen(options);
1785
1786 ASSERT_OK(Put("a1", "va1"));
1787 ASSERT_OK(Put("a2", "va2"));
1788 ASSERT_OK(Put("a3", "va3"));
1789 ASSERT_OK(Flush());
1790
1791 ASSERT_OK(Put("b1", "vb1"));
1792 ASSERT_OK(Put("b2", "vb2"));
1793 ASSERT_OK(Put("b3", "vb3"));
1794 ASSERT_OK(Flush());
1795
1796 ASSERT_OK(Put("b4", "vb4"));
1797 ASSERT_OK(Put("d1", "vd1"));
1798 ASSERT_OK(Put("d2", "vd2"));
1799 ASSERT_OK(Put("d4", "vd4"));
1800 ASSERT_OK(Flush());
1801
1802 MoveFilesToLevel(1);
1803 {
1804 ReadOptions ro;
1805 Iterator* iter = NewIterator(ro);
1806
1807 iter->SeekForPrev("a4");
1808 ASSERT_EQ(iter->key().ToString(), "a3");
1809 ASSERT_EQ(iter->value().ToString(), "va3");
1810
1811 iter->SeekForPrev("c2");
1812 ASSERT_EQ(iter->key().ToString(), "b3");
1813 iter->SeekForPrev("d3");
1814 ASSERT_EQ(iter->key().ToString(), "d2");
1815 iter->SeekForPrev("b5");
1816 ASSERT_EQ(iter->key().ToString(), "b4");
1817 delete iter;
1818 }
1819
1820 {
1821 ReadOptions ro;
1822 ro.prefix_same_as_start = true;
1823 Iterator* iter = NewIterator(ro);
1824 iter->SeekForPrev("c2");
1825 ASSERT_TRUE(!iter->Valid());
1826 delete iter;
1827 }
1828 }
1829
1830 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) {
1831 Options options = CurrentOptions();
1832 BlockBasedTableOptions table_options;
1833 table_options.block_size = 1; // every block will contain one entry
1834 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1835 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1836 options.disable_auto_compactions = true;
1837 options.max_sequential_skip_in_iterations = 8;
1838
1839 DestroyAndReopen(options);
1840
1841 // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1842 for (int file_num = 0; file_num < 10; file_num++) {
1843 ASSERT_OK(Delete("key4"));
1844 ASSERT_OK(Flush());
1845 }
1846
1847 // First File containing 5 blocks of puts
1848 ASSERT_OK(Put("key1", "val1.0"));
1849 ASSERT_OK(Put("key2", "val2.0"));
1850 ASSERT_OK(Put("key3", "val3.0"));
1851 ASSERT_OK(Put("key4", "val4.0"));
1852 ASSERT_OK(Put("key5", "val5.0"));
1853 ASSERT_OK(Flush());
1854
1855 // Second file containing 9 blocks of merge operands
1856 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
1857 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
1858
1859 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
1860 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
1861 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
1862
1863 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
1864 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
1865 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
1866 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
1867 ASSERT_OK(Flush());
1868
1869 {
1870 ReadOptions ro;
1871 ro.fill_cache = false;
1872 Iterator* iter = NewIterator(ro);
1873
1874 iter->SeekToLast();
1875 ASSERT_EQ(iter->key().ToString(), "key5");
1876 ASSERT_EQ(iter->value().ToString(), "val5.0");
1877
1878 iter->Prev();
1879 ASSERT_EQ(iter->key().ToString(), "key4");
1880 ASSERT_EQ(iter->value().ToString(), "val4.0");
1881
1882 iter->Prev();
1883 ASSERT_EQ(iter->key().ToString(), "key3");
1884 ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1885
1886 iter->Prev();
1887 ASSERT_EQ(iter->key().ToString(), "key2");
1888 ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1889
1890 iter->Prev();
1891 ASSERT_EQ(iter->key().ToString(), "key1");
1892 ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
1893
1894 delete iter;
1895 }
1896 }
1897
1898 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
1899 Options options = CurrentOptions();
1900 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1901 options.disable_auto_compactions = true;
1902 options.level0_slowdown_writes_trigger = (1 << 30);
1903 options.level0_stop_writes_trigger = (1 << 30);
1904 options.max_sequential_skip_in_iterations = 8;
1905 DestroyAndReopen(options);
1906
1907 const int kNumKeys = 500;
1908 // Small number of merge operands to make sure that DBIter::Prev() don't
1909 // fall back to Seek()
1910 const int kNumMergeOperands = 3;
1911 // Use value size that will make sure that every block contain 1 key
1912 const int kValSize =
1913 static_cast<int>(BlockBasedTableOptions().block_size) * 4;
1914 // Percentage of keys that wont get merge operations
1915 const int kNoMergeOpPercentage = 20;
1916 // Percentage of keys that will be deleted
1917 const int kDeletePercentage = 10;
1918
1919 // For half of the key range we will write multiple deletes first to
1920 // force DBIter::Prev() to fall back to Seek()
1921 for (int file_num = 0; file_num < 10; file_num++) {
1922 for (int i = 0; i < kNumKeys; i += 2) {
1923 ASSERT_OK(Delete(Key(i)));
1924 }
1925 ASSERT_OK(Flush());
1926 }
1927
1928 Random rnd(301);
1929 std::map<std::string, std::string> true_data;
1930 std::string gen_key;
1931 std::string gen_val;
1932
1933 for (int i = 0; i < kNumKeys; i++) {
1934 gen_key = Key(i);
1935 gen_val = rnd.RandomString(kValSize);
1936
1937 ASSERT_OK(Put(gen_key, gen_val));
1938 true_data[gen_key] = gen_val;
1939 }
1940 ASSERT_OK(Flush());
1941
1942 // Separate values and merge operands in different file so that we
1943 // make sure that we don't merge them while flushing but actually
1944 // merge them in the read path
1945 for (int i = 0; i < kNumKeys; i++) {
1946 if (rnd.PercentTrue(kNoMergeOpPercentage)) {
1947 // Dont give merge operations for some keys
1948 continue;
1949 }
1950
1951 for (int j = 0; j < kNumMergeOperands; j++) {
1952 gen_key = Key(i);
1953 gen_val = rnd.RandomString(kValSize);
1954
1955 ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
1956 true_data[gen_key] += "," + gen_val;
1957 }
1958 }
1959 ASSERT_OK(Flush());
1960
1961 for (int i = 0; i < kNumKeys; i++) {
1962 if (rnd.PercentTrue(kDeletePercentage)) {
1963 gen_key = Key(i);
1964
1965 ASSERT_OK(Delete(gen_key));
1966 true_data.erase(gen_key);
1967 }
1968 }
1969 ASSERT_OK(Flush());
1970
1971 {
1972 ReadOptions ro;
1973 ro.fill_cache = false;
1974 Iterator* iter = NewIterator(ro);
1975 auto data_iter = true_data.rbegin();
1976
1977 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1978 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1979 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1980 data_iter++;
1981 }
1982 ASSERT_EQ(data_iter, true_data.rend());
1983
1984 delete iter;
1985 }
1986
1987 {
1988 ReadOptions ro;
1989 ro.fill_cache = false;
1990 Iterator* iter = NewIterator(ro);
1991 auto data_iter = true_data.rbegin();
1992
1993 int entries_right = 0;
1994 std::string seek_key;
1995 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1996 // Verify key/value of current position
1997 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1998 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1999
2000 bool restore_position_with_seek = rnd.Uniform(2);
2001 if (restore_position_with_seek) {
2002 seek_key = iter->key().ToString();
2003 }
2004
2005 // Do some Next() operations the restore the iterator to orignal position
2006 int next_count =
2007 entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
2008 for (int i = 0; i < next_count; i++) {
2009 iter->Next();
2010 data_iter--;
2011
2012 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2013 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2014 }
2015
2016 if (restore_position_with_seek) {
2017 // Restore orignal position using Seek()
2018 iter->Seek(seek_key);
2019 for (int i = 0; i < next_count; i++) {
2020 data_iter++;
2021 }
2022
2023 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2024 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2025 } else {
2026 // Restore original position using Prev()
2027 for (int i = 0; i < next_count; i++) {
2028 iter->Prev();
2029 data_iter++;
2030
2031 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2032 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2033 }
2034 }
2035
2036 entries_right++;
2037 data_iter++;
2038 }
2039 ASSERT_EQ(data_iter, true_data.rend());
2040
2041 delete iter;
2042 }
2043 }
2044
2045 TEST_P(DBIteratorTest, IteratorWithLocalStatistics) {
2046 Options options = CurrentOptions();
2047 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2048 DestroyAndReopen(options);
2049
2050 Random rnd(301);
2051 for (int i = 0; i < 1000; i++) {
2052 // Key 10 bytes / Value 10 bytes
2053 ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
2054 }
2055
2056 std::atomic<uint64_t> total_next(0);
2057 std::atomic<uint64_t> total_next_found(0);
2058 std::atomic<uint64_t> total_prev(0);
2059 std::atomic<uint64_t> total_prev_found(0);
2060 std::atomic<uint64_t> total_bytes(0);
2061
2062 std::vector<port::Thread> threads;
2063 std::function<void()> reader_func_next = [&]() {
2064 SetPerfLevel(kEnableCount);
2065 get_perf_context()->Reset();
2066 Iterator* iter = NewIterator(ReadOptions());
2067
2068 iter->SeekToFirst();
2069 // Seek will bump ITER_BYTES_READ
2070 uint64_t bytes = 0;
2071 bytes += iter->key().size();
2072 bytes += iter->value().size();
2073 while (true) {
2074 iter->Next();
2075 total_next++;
2076
2077 if (!iter->Valid()) {
2078 break;
2079 }
2080 total_next_found++;
2081 bytes += iter->key().size();
2082 bytes += iter->value().size();
2083 }
2084
2085 delete iter;
2086 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2087 SetPerfLevel(kDisable);
2088 total_bytes += bytes;
2089 };
2090
2091 std::function<void()> reader_func_prev = [&]() {
2092 SetPerfLevel(kEnableCount);
2093 Iterator* iter = NewIterator(ReadOptions());
2094
2095 iter->SeekToLast();
2096 // Seek will bump ITER_BYTES_READ
2097 uint64_t bytes = 0;
2098 bytes += iter->key().size();
2099 bytes += iter->value().size();
2100 while (true) {
2101 iter->Prev();
2102 total_prev++;
2103
2104 if (!iter->Valid()) {
2105 break;
2106 }
2107 total_prev_found++;
2108 bytes += iter->key().size();
2109 bytes += iter->value().size();
2110 }
2111
2112 delete iter;
2113 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2114 SetPerfLevel(kDisable);
2115 total_bytes += bytes;
2116 };
2117
2118 for (int i = 0; i < 10; i++) {
2119 threads.emplace_back(reader_func_next);
2120 }
2121 for (int i = 0; i < 15; i++) {
2122 threads.emplace_back(reader_func_prev);
2123 }
2124
2125 for (auto& t : threads) {
2126 t.join();
2127 }
2128
2129 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next);
2130 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
2131 (uint64_t)total_next_found);
2132 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev);
2133 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
2134 (uint64_t)total_prev_found);
2135 ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), (uint64_t)total_bytes);
2136
2137 }
2138
2139 TEST_P(DBIteratorTest, ReadAhead) {
2140 Options options;
2141 env_->count_random_reads_ = true;
2142 options.env = env_;
2143 options.disable_auto_compactions = true;
2144 options.write_buffer_size = 4 << 20;
2145 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2146 BlockBasedTableOptions table_options;
2147 table_options.block_size = 1024;
2148 table_options.no_block_cache = true;
2149 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2150 Reopen(options);
2151
2152 std::string value(1024, 'a');
2153 for (int i = 0; i < 100; i++) {
2154 Put(Key(i), value);
2155 }
2156 ASSERT_OK(Flush());
2157 MoveFilesToLevel(2);
2158
2159 for (int i = 0; i < 100; i++) {
2160 Put(Key(i), value);
2161 }
2162 ASSERT_OK(Flush());
2163 MoveFilesToLevel(1);
2164
2165 for (int i = 0; i < 100; i++) {
2166 Put(Key(i), value);
2167 }
2168 ASSERT_OK(Flush());
2169 #ifndef ROCKSDB_LITE
2170 ASSERT_EQ("1,1,1", FilesPerLevel());
2171 #endif // !ROCKSDB_LITE
2172
2173 env_->random_read_bytes_counter_ = 0;
2174 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2175 ReadOptions read_options;
2176 auto* iter = NewIterator(read_options);
2177 iter->SeekToFirst();
2178 int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
2179 size_t bytes_read = env_->random_read_bytes_counter_;
2180 delete iter;
2181
2182 int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES);
2183 env_->random_read_bytes_counter_ = 0;
2184 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2185 read_options.readahead_size = 1024 * 10;
2186 iter = NewIterator(read_options);
2187 iter->SeekToFirst();
2188 int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
2189 size_t bytes_read_readahead = env_->random_read_bytes_counter_;
2190 delete iter;
2191 int64_t num_file_closes_readahead =
2192 TestGetTickerCount(options, NO_FILE_CLOSES);
2193 ASSERT_EQ(num_file_opens, num_file_opens_readahead);
2194 ASSERT_EQ(num_file_closes, num_file_closes_readahead);
2195 ASSERT_GT(bytes_read_readahead, bytes_read);
2196 ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
2197
2198 // Verify correctness.
2199 iter = NewIterator(read_options);
2200 int count = 0;
2201 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2202 ASSERT_EQ(value, iter->value());
2203 count++;
2204 }
2205 ASSERT_EQ(100, count);
2206 for (int i = 0; i < 100; i++) {
2207 iter->Seek(Key(i));
2208 ASSERT_EQ(value, iter->value());
2209 }
2210 delete iter;
2211 }
2212
2213 // Insert a key, create a snapshot iterator, overwrite key lots of times,
2214 // seek to a smaller key. Expect DBIter to fall back to a seek instead of
2215 // going through all the overwrites linearly.
2216 TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) {
2217 Options options = CurrentOptions();
2218 options.env = env_;
2219 options.create_if_missing = true;
2220 options.max_sequential_skip_in_iterations = 3;
2221 options.prefix_extractor = nullptr;
2222 options.write_buffer_size = 1 << 27; // big enough to avoid flush
2223 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2224 DestroyAndReopen(options);
2225
2226 // Insert.
2227 ASSERT_OK(Put("b", "0"));
2228
2229 // Create iterator.
2230 ReadOptions ro;
2231 std::unique_ptr<Iterator> iter(NewIterator(ro));
2232
2233 // Insert a lot.
2234 for (int i = 0; i < 100; ++i) {
2235 ASSERT_OK(Put("b", std::to_string(i + 1).c_str()));
2236 }
2237
2238 #ifndef ROCKSDB_LITE
2239 // Check that memtable wasn't flushed.
2240 std::string val;
2241 ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val));
2242 EXPECT_EQ("0", val);
2243 #endif
2244
2245 // Seek iterator to a smaller key.
2246 get_perf_context()->Reset();
2247 iter->Seek("a");
2248 ASSERT_TRUE(iter->Valid());
2249 EXPECT_EQ("b", iter->key().ToString());
2250 EXPECT_EQ("0", iter->value().ToString());
2251
2252 // Check that the seek didn't do too much work.
2253 // Checks are not tight, just make sure that everything is well below 100.
2254 EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4);
2255 EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8);
2256 EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10);
2257 EXPECT_LT(get_perf_context()->next_on_memtable_count, 10);
2258 EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10);
2259
2260 // Check that iterator did something like what we expect.
2261 EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0);
2262 EXPECT_EQ(get_perf_context()->internal_merge_count, 0);
2263 EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2);
2264 EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2);
2265 EXPECT_EQ(1, options.statistics->getTickerCount(
2266 NUMBER_OF_RESEEKS_IN_ITERATION));
2267 }
2268
2269 TEST_P(DBIteratorTest, Refresh) {
2270 ASSERT_OK(Put("x", "y"));
2271
2272 std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2273 iter->Seek(Slice("a"));
2274 ASSERT_TRUE(iter->Valid());
2275 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2276 iter->Next();
2277 ASSERT_FALSE(iter->Valid());
2278
2279 ASSERT_OK(Put("c", "d"));
2280
2281 iter->Seek(Slice("a"));
2282 ASSERT_TRUE(iter->Valid());
2283 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2284 iter->Next();
2285 ASSERT_FALSE(iter->Valid());
2286
2287 iter->Refresh();
2288
2289 iter->Seek(Slice("a"));
2290 ASSERT_TRUE(iter->Valid());
2291 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2292 iter->Next();
2293 ASSERT_TRUE(iter->Valid());
2294 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2295 iter->Next();
2296 ASSERT_FALSE(iter->Valid());
2297
2298 dbfull()->Flush(FlushOptions());
2299
2300 ASSERT_OK(Put("m", "n"));
2301
2302 iter->Seek(Slice("a"));
2303 ASSERT_TRUE(iter->Valid());
2304 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2305 iter->Next();
2306 ASSERT_TRUE(iter->Valid());
2307 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2308 iter->Next();
2309 ASSERT_FALSE(iter->Valid());
2310
2311 iter->Refresh();
2312
2313 iter->Seek(Slice("a"));
2314 ASSERT_TRUE(iter->Valid());
2315 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2316 iter->Next();
2317 ASSERT_TRUE(iter->Valid());
2318 ASSERT_EQ(iter->key().compare(Slice("m")), 0);
2319 iter->Next();
2320 ASSERT_TRUE(iter->Valid());
2321 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2322 iter->Next();
2323 ASSERT_FALSE(iter->Valid());
2324
2325 iter.reset();
2326 }
2327
2328 TEST_P(DBIteratorTest, RefreshWithSnapshot) {
2329 ASSERT_OK(Put("x", "y"));
2330 const Snapshot* snapshot = db_->GetSnapshot();
2331 ReadOptions options;
2332 options.snapshot = snapshot;
2333 Iterator* iter = NewIterator(options);
2334
2335 iter->Seek(Slice("a"));
2336 ASSERT_TRUE(iter->Valid());
2337 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2338 iter->Next();
2339 ASSERT_FALSE(iter->Valid());
2340
2341 ASSERT_OK(Put("c", "d"));
2342
2343 iter->Seek(Slice("a"));
2344 ASSERT_TRUE(iter->Valid());
2345 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2346 iter->Next();
2347 ASSERT_FALSE(iter->Valid());
2348
2349 Status s;
2350 s = iter->Refresh();
2351 ASSERT_TRUE(s.IsNotSupported());
2352 db_->ReleaseSnapshot(snapshot);
2353 delete iter;
2354 }
2355
2356 TEST_P(DBIteratorTest, CreationFailure) {
2357 SyncPoint::GetInstance()->SetCallBack(
2358 "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) {
2359 *(reinterpret_cast<Status*>(arg)) = Status::Corruption("test status");
2360 });
2361 SyncPoint::GetInstance()->EnableProcessing();
2362
2363 Iterator* iter = NewIterator(ReadOptions());
2364 ASSERT_FALSE(iter->Valid());
2365 ASSERT_TRUE(iter->status().IsCorruption());
2366 delete iter;
2367 }
2368
2369 TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
2370 Options options = CurrentOptions();
2371 options.max_sequential_skip_in_iterations = 3;
2372 DestroyAndReopen(options);
2373
2374 // write a bunch of kvs to the database.
2375 ASSERT_OK(Put("a", "1"));
2376 ASSERT_OK(Put("y", "1"));
2377 ASSERT_OK(Put("y1", "1"));
2378 ASSERT_OK(Put("y2", "1"));
2379 ASSERT_OK(Put("y3", "1"));
2380 ASSERT_OK(Put("z", "1"));
2381 ASSERT_OK(Flush());
2382 ASSERT_OK(Put("a", "1"));
2383 ASSERT_OK(Put("z", "1"));
2384 ASSERT_OK(Put("bar", "1"));
2385 ASSERT_OK(Put("foo", "1"));
2386
2387 std::string upper_bound = "x";
2388 Slice ub_slice(upper_bound);
2389 ReadOptions ro;
2390 ro.iterate_upper_bound = &ub_slice;
2391 ro.max_skippable_internal_keys = 1000;
2392
2393 Iterator* iter = NewIterator(ro);
2394 iter->Seek("foo");
2395 ASSERT_TRUE(iter->Valid());
2396 ASSERT_EQ("foo", iter->key().ToString());
2397
2398 iter->Prev();
2399 ASSERT_TRUE(iter->Valid());
2400 ASSERT_OK(iter->status());
2401 ASSERT_EQ("bar", iter->key().ToString());
2402
2403 delete iter;
2404 }
2405
2406 TEST_P(DBIteratorTest, TableFilter) {
2407 ASSERT_OK(Put("a", "1"));
2408 dbfull()->Flush(FlushOptions());
2409 ASSERT_OK(Put("b", "2"));
2410 ASSERT_OK(Put("c", "3"));
2411 dbfull()->Flush(FlushOptions());
2412 ASSERT_OK(Put("d", "4"));
2413 ASSERT_OK(Put("e", "5"));
2414 ASSERT_OK(Put("f", "6"));
2415 dbfull()->Flush(FlushOptions());
2416
2417 // Ensure the table_filter callback is called once for each table.
2418 {
2419 std::set<uint64_t> unseen{1, 2, 3};
2420 ReadOptions opts;
2421 opts.table_filter = [&](const TableProperties& props) {
2422 auto it = unseen.find(props.num_entries);
2423 if (it == unseen.end()) {
2424 ADD_FAILURE() << "saw table properties with an unexpected "
2425 << props.num_entries << " entries";
2426 } else {
2427 unseen.erase(it);
2428 }
2429 return true;
2430 };
2431 auto iter = NewIterator(opts);
2432 iter->SeekToFirst();
2433 ASSERT_EQ(IterStatus(iter), "a->1");
2434 iter->Next();
2435 ASSERT_EQ(IterStatus(iter), "b->2");
2436 iter->Next();
2437 ASSERT_EQ(IterStatus(iter), "c->3");
2438 iter->Next();
2439 ASSERT_EQ(IterStatus(iter), "d->4");
2440 iter->Next();
2441 ASSERT_EQ(IterStatus(iter), "e->5");
2442 iter->Next();
2443 ASSERT_EQ(IterStatus(iter), "f->6");
2444 iter->Next();
2445 ASSERT_FALSE(iter->Valid());
2446 ASSERT_TRUE(unseen.empty());
2447 delete iter;
2448 }
2449
2450 // Ensure returning false in the table_filter hides the keys from that table
2451 // during iteration.
2452 {
2453 ReadOptions opts;
2454 opts.table_filter = [](const TableProperties& props) {
2455 return props.num_entries != 2;
2456 };
2457 auto iter = NewIterator(opts);
2458 iter->SeekToFirst();
2459 ASSERT_EQ(IterStatus(iter), "a->1");
2460 iter->Next();
2461 ASSERT_EQ(IterStatus(iter), "d->4");
2462 iter->Next();
2463 ASSERT_EQ(IterStatus(iter), "e->5");
2464 iter->Next();
2465 ASSERT_EQ(IterStatus(iter), "f->6");
2466 iter->Next();
2467 ASSERT_FALSE(iter->Valid());
2468 delete iter;
2469 }
2470 }
2471
2472 TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
2473 Options options = CurrentOptions();
2474 options.max_sequential_skip_in_iterations = 3;
2475 DestroyAndReopen(options);
2476
2477 // write a bunch of kvs to the database.
2478 ASSERT_OK(Put("a", "1"));
2479 ASSERT_OK(Put("y", "1"));
2480 ASSERT_OK(Put("z", "1"));
2481 ASSERT_OK(Flush());
2482 ASSERT_OK(Put("a", "1"));
2483 ASSERT_OK(Put("z", "1"));
2484 ASSERT_OK(Put("bar", "1"));
2485 ASSERT_OK(Put("foo", "1"));
2486 ASSERT_OK(Put("foo", "2"));
2487
2488 ASSERT_OK(Put("foo", "3"));
2489 ASSERT_OK(Put("foo", "4"));
2490 ASSERT_OK(Put("foo", "5"));
2491 const Snapshot* snapshot = db_->GetSnapshot();
2492 ASSERT_OK(Put("foo", "6"));
2493
2494 std::string upper_bound = "x";
2495 Slice ub_slice(upper_bound);
2496 ReadOptions ro;
2497 ro.snapshot = snapshot;
2498 ro.iterate_upper_bound = &ub_slice;
2499
2500 Iterator* iter = NewIterator(ro);
2501 iter->SeekForPrev("goo");
2502 ASSERT_TRUE(iter->Valid());
2503 ASSERT_EQ("foo", iter->key().ToString());
2504 iter->Prev();
2505
2506 ASSERT_TRUE(iter->Valid());
2507 ASSERT_EQ("bar", iter->key().ToString());
2508
2509 delete iter;
2510 db_->ReleaseSnapshot(snapshot);
2511 }
2512
2513 TEST_P(DBIteratorTest, SkipStatistics) {
2514 Options options = CurrentOptions();
2515 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2516 DestroyAndReopen(options);
2517
2518 int skip_count = 0;
2519
2520 // write a bunch of kvs to the database.
2521 ASSERT_OK(Put("a", "1"));
2522 ASSERT_OK(Put("b", "1"));
2523 ASSERT_OK(Put("c", "1"));
2524 ASSERT_OK(Flush());
2525 ASSERT_OK(Put("d", "1"));
2526 ASSERT_OK(Put("e", "1"));
2527 ASSERT_OK(Put("f", "1"));
2528 ASSERT_OK(Put("a", "2"));
2529 ASSERT_OK(Put("b", "2"));
2530 ASSERT_OK(Flush());
2531 ASSERT_OK(Delete("d"));
2532 ASSERT_OK(Delete("e"));
2533 ASSERT_OK(Delete("f"));
2534
2535 Iterator* iter = NewIterator(ReadOptions());
2536 int count = 0;
2537 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2538 ASSERT_OK(iter->status());
2539 count++;
2540 }
2541 ASSERT_EQ(count, 3);
2542 delete iter;
2543 skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
2544 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2545
2546 iter = NewIterator(ReadOptions());
2547 count = 0;
2548 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2549 ASSERT_OK(iter->status());
2550 count++;
2551 }
2552 ASSERT_EQ(count, 3);
2553 delete iter;
2554 skip_count += 8; // Same as above, but in reverse order
2555 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2556
2557 ASSERT_OK(Put("aa", "1"));
2558 ASSERT_OK(Put("ab", "1"));
2559 ASSERT_OK(Put("ac", "1"));
2560 ASSERT_OK(Put("ad", "1"));
2561 ASSERT_OK(Flush());
2562 ASSERT_OK(Delete("ab"));
2563 ASSERT_OK(Delete("ac"));
2564 ASSERT_OK(Delete("ad"));
2565
2566 ReadOptions ro;
2567 Slice prefix("b");
2568 ro.iterate_upper_bound = &prefix;
2569
2570 iter = NewIterator(ro);
2571 count = 0;
2572 for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
2573 ASSERT_OK(iter->status());
2574 count++;
2575 }
2576 ASSERT_EQ(count, 1);
2577 delete iter;
2578 skip_count += 6; // 3 deletes + 3 original keys
2579 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2580
2581 iter = NewIterator(ro);
2582 count = 0;
2583 for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2584 ASSERT_OK(iter->status());
2585 count++;
2586 }
2587 ASSERT_EQ(count, 2);
2588 delete iter;
2589 // 3 deletes + 3 original keys + lower sequence of "a"
2590 skip_count += 7;
2591 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2592 }
2593
2594 TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) {
2595 Options options = CurrentOptions();
2596 DestroyAndReopen(options);
2597 ReadOptions ropts;
2598 ropts.max_skippable_internal_keys = 2;
2599
2600 Put("1", "val_1");
2601 // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2602 Delete("2");
2603 Delete("3");
2604 Delete("4");
2605 Delete("5");
2606 Put("6", "val_6");
2607
2608 std::unique_ptr<Iterator> iter(NewIterator(ropts));
2609 iter->SeekToFirst();
2610
2611 ASSERT_TRUE(iter->Valid());
2612 ASSERT_EQ(iter->key().ToString(), "1");
2613 ASSERT_EQ(iter->value().ToString(), "val_1");
2614
2615 // This should fail as incomplete due to too many non-visible internal keys on
2616 // the way to the next valid user key.
2617 iter->Next();
2618 ASSERT_TRUE(!iter->Valid());
2619 ASSERT_TRUE(iter->status().IsIncomplete());
2620
2621 // Get the internal key at which Next() failed.
2622 std::string prop_value;
2623 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
2624 ASSERT_EQ("4", prop_value);
2625
2626 // Create a new iterator to seek to the internal key.
2627 std::unique_ptr<Iterator> iter2(NewIterator(ropts));
2628 iter2->Seek(prop_value);
2629 ASSERT_TRUE(iter2->Valid());
2630 ASSERT_OK(iter2->status());
2631
2632 ASSERT_EQ(iter2->key().ToString(), "6");
2633 ASSERT_EQ(iter2->value().ToString(), "val_6");
2634 }
2635
2636 // Reproduces a former bug where iterator would skip some records when DBIter
2637 // re-seeks subiterator with Incomplete status.
2638 TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
2639 Options options = CurrentOptions();
2640 BlockBasedTableOptions table_options;
2641 // Make sure the sst file has more than one block.
2642 table_options.flush_block_policy_factory =
2643 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
2644 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2645 DestroyAndReopen(options);
2646
2647 // Two records in sst file, each in its own block.
2648 Put("b", "");
2649 Put("d", "");
2650 Flush();
2651
2652 // Create a nonblocking iterator before writing to memtable.
2653 ReadOptions ropt;
2654 ropt.read_tier = kBlockCacheTier;
2655 std::unique_ptr<Iterator> iter(NewIterator(ropt));
2656
2657 // Overwrite a key in memtable many times to hit
2658 // max_sequential_skip_in_iterations (which is 8 by default).
2659 for (int i = 0; i < 20; ++i) {
2660 Put("c", "");
2661 }
2662
2663 // Load the second block in sst file into the block cache.
2664 {
2665 std::unique_ptr<Iterator> iter2(NewIterator(ReadOptions()));
2666 iter2->Seek("d");
2667 }
2668
2669 // Finally seek the nonblocking iterator.
2670 iter->Seek("a");
2671 // With the bug, the status used to be OK, and the iterator used to point to
2672 // "d".
2673 EXPECT_TRUE(iter->status().IsIncomplete());
2674 }
2675
2676 TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
2677 Put("a", "");
2678 Put("b", "");
2679 Flush();
2680
2681 ReadOptions ropt;
2682 Slice ub = "b";
2683 ropt.iterate_upper_bound = &ub;
2684
2685 std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
2686 it->SeekForPrev("a");
2687 ASSERT_TRUE(it->Valid());
2688 ASSERT_OK(it->status());
2689 ASSERT_EQ("a", it->key().ToString());
2690 it->Next();
2691 ASSERT_FALSE(it->Valid());
2692 ASSERT_OK(it->status());
2693 it->SeekForPrev("a");
2694 ASSERT_OK(it->status());
2695
2696 ASSERT_TRUE(it->Valid());
2697 ASSERT_EQ("a", it->key().ToString());
2698 }
2699
2700 TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
2701 Options options = CurrentOptions();
2702 options.compression = CompressionType::kNoCompression;
2703 BlockBasedTableOptions table_options;
2704 table_options.block_size = 800;
2705 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2706 Reopen(options);
2707
2708 Random rnd(301);
2709 std::string random_str = rnd.RandomString(180);
2710
2711 ASSERT_OK(Put("1", random_str));
2712 ASSERT_OK(Put("2", random_str));
2713 ASSERT_OK(Put("3", random_str));
2714 ASSERT_OK(Put("4", random_str));
2715 // A new block
2716 ASSERT_OK(Put("5", random_str));
2717 ASSERT_OK(Put("6", random_str));
2718 ASSERT_OK(Put("7", random_str));
2719 ASSERT_OK(Flush());
2720 ASSERT_OK(Put("8", random_str));
2721 ASSERT_OK(Put("9", random_str));
2722 ASSERT_OK(Flush());
2723 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2724
2725 int num_find_file_in_level = 0;
2726 int num_idx_blk_seek = 0;
2727 SyncPoint::GetInstance()->SetCallBack(
2728 "LevelIterator::Seek:BeforeFindFile",
2729 [&](void* /*arg*/) { num_find_file_in_level++; });
2730 SyncPoint::GetInstance()->SetCallBack(
2731 "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
2732 SyncPoint::GetInstance()->EnableProcessing();
2733
2734 {
2735 std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2736 iter->Seek("1");
2737 ASSERT_TRUE(iter->Valid());
2738 ASSERT_EQ(1, num_find_file_in_level);
2739 ASSERT_EQ(1, num_idx_blk_seek);
2740
2741 iter->Seek("2");
2742 ASSERT_TRUE(iter->Valid());
2743 ASSERT_EQ(1, num_find_file_in_level);
2744 ASSERT_EQ(1, num_idx_blk_seek);
2745
2746 iter->Seek("3");
2747 ASSERT_TRUE(iter->Valid());
2748 ASSERT_EQ(1, num_find_file_in_level);
2749 ASSERT_EQ(1, num_idx_blk_seek);
2750
2751 iter->Next();
2752 ASSERT_TRUE(iter->Valid());
2753 ASSERT_EQ(1, num_find_file_in_level);
2754 ASSERT_EQ(1, num_idx_blk_seek);
2755
2756 iter->Seek("5");
2757 ASSERT_TRUE(iter->Valid());
2758 ASSERT_EQ(1, num_find_file_in_level);
2759 ASSERT_EQ(2, num_idx_blk_seek);
2760
2761 iter->Seek("6");
2762 ASSERT_TRUE(iter->Valid());
2763 ASSERT_EQ(1, num_find_file_in_level);
2764 ASSERT_EQ(2, num_idx_blk_seek);
2765
2766 iter->Seek("7");
2767 ASSERT_TRUE(iter->Valid());
2768 ASSERT_EQ(1, num_find_file_in_level);
2769 ASSERT_EQ(3, num_idx_blk_seek);
2770
2771 iter->Seek("8");
2772 ASSERT_TRUE(iter->Valid());
2773 ASSERT_EQ(2, num_find_file_in_level);
2774 // Still re-seek because "8" is the boundary key, which has
2775 // the same user key as the seek key.
2776 ASSERT_EQ(4, num_idx_blk_seek);
2777
2778 iter->Seek("5");
2779 ASSERT_TRUE(iter->Valid());
2780 ASSERT_EQ(3, num_find_file_in_level);
2781 ASSERT_EQ(5, num_idx_blk_seek);
2782
2783 iter->Next();
2784 ASSERT_TRUE(iter->Valid());
2785 ASSERT_EQ(3, num_find_file_in_level);
2786 ASSERT_EQ(5, num_idx_blk_seek);
2787
2788 // Seek backward never triggers the index block seek to be skipped
2789 iter->Seek("5");
2790 ASSERT_TRUE(iter->Valid());
2791 ASSERT_EQ(3, num_find_file_in_level);
2792 ASSERT_EQ(6, num_idx_blk_seek);
2793 }
2794
2795 SyncPoint::GetInstance()->DisableProcessing();
2796 }
2797
2798 // MyRocks may change iterate bounds before seek. Simply test to make sure such
2799 // usage doesn't break iterator.
2800 TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) {
2801 Options options = CurrentOptions();
2802 options.compression = CompressionType::kNoCompression;
2803 BlockBasedTableOptions table_options;
2804 table_options.block_size = 100;
2805 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2806 std::string value(50, 'v');
2807 Reopen(options);
2808 ASSERT_OK(Put("aaa", value));
2809 ASSERT_OK(Flush());
2810 ASSERT_OK(Put("bbb", "v"));
2811 ASSERT_OK(Put("ccc", "v"));
2812 ASSERT_OK(Put("ddd", "v"));
2813 ASSERT_OK(Flush());
2814 ASSERT_OK(Put("eee", "v"));
2815 ASSERT_OK(Flush());
2816 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2817
2818 std::string ub1 = "e";
2819 std::string ub2 = "c";
2820 Slice ub(ub1);
2821 ReadOptions read_opts1;
2822 read_opts1.iterate_upper_bound = &ub;
2823 Iterator* iter = NewIterator(read_opts1);
2824 // Seek and iterate accross block boundary.
2825 iter->Seek("b");
2826 ASSERT_TRUE(iter->Valid());
2827 ASSERT_OK(iter->status());
2828 ASSERT_EQ("bbb", iter->key());
2829 ub = Slice(ub2);
2830 iter->Seek("b");
2831 ASSERT_TRUE(iter->Valid());
2832 ASSERT_OK(iter->status());
2833 ASSERT_EQ("bbb", iter->key());
2834 iter->Next();
2835 ASSERT_FALSE(iter->Valid());
2836 ASSERT_OK(iter->status());
2837 delete iter;
2838
2839 std::string lb1 = "a";
2840 std::string lb2 = "c";
2841 Slice lb(lb1);
2842 ReadOptions read_opts2;
2843 read_opts2.iterate_lower_bound = &lb;
2844 iter = NewIterator(read_opts2);
2845 iter->SeekForPrev("d");
2846 ASSERT_TRUE(iter->Valid());
2847 ASSERT_OK(iter->status());
2848 ASSERT_EQ("ccc", iter->key());
2849 lb = Slice(lb2);
2850 iter->SeekForPrev("d");
2851 ASSERT_TRUE(iter->Valid());
2852 ASSERT_OK(iter->status());
2853 ASSERT_EQ("ccc", iter->key());
2854 iter->Prev();
2855 ASSERT_FALSE(iter->Valid());
2856 ASSERT_OK(iter->status());
2857 delete iter;
2858 }
2859
2860 TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) {
2861 ASSERT_OK(Put("aaa", "v"));
2862 ASSERT_OK(Put("bbb", "v"));
2863 ASSERT_OK(Flush());
2864 ASSERT_OK(Put("ccc", "v"));
2865 ASSERT_OK(Put("ddd", "v"));
2866 ASSERT_OK(Flush());
2867 // Move both files to bottom level.
2868 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2869 Slice lower_bound("b");
2870 ReadOptions read_opts;
2871 read_opts.iterate_lower_bound = &lower_bound;
2872 std::unique_ptr<Iterator> iter(NewIterator(read_opts));
2873 iter->SeekForPrev("d");
2874 ASSERT_TRUE(iter->Valid());
2875 ASSERT_OK(iter->status());
2876 ASSERT_EQ("ccc", iter->key());
2877 iter->Prev();
2878 ASSERT_TRUE(iter->Valid());
2879 ASSERT_OK(iter->status());
2880 ASSERT_EQ("bbb", iter->key());
2881 iter->Prev();
2882 ASSERT_FALSE(iter->Valid());
2883 ASSERT_OK(iter->status());
2884 }
2885
2886 INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
2887 testing::Values(true, false));
2888
2889 // Tests how DBIter work with ReadCallback
2890 class DBIteratorWithReadCallbackTest : public DBIteratorTest {};
2891
2892 TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) {
2893 class TestReadCallback : public ReadCallback {
2894 public:
2895 explicit TestReadCallback(SequenceNumber _max_visible_seq)
2896 : ReadCallback(_max_visible_seq) {}
2897
2898 bool IsVisibleFullCheck(SequenceNumber seq) override {
2899 return seq <= max_visible_seq_;
2900 }
2901 };
2902
2903 ASSERT_OK(Put("foo", "v1"));
2904 ASSERT_OK(Put("foo", "v2"));
2905 ASSERT_OK(Put("foo", "v3"));
2906 ASSERT_OK(Put("a", "va"));
2907 ASSERT_OK(Put("z", "vz"));
2908 SequenceNumber seq1 = db_->GetLatestSequenceNumber();
2909 TestReadCallback callback1(seq1);
2910 ASSERT_OK(Put("foo", "v4"));
2911 ASSERT_OK(Put("foo", "v5"));
2912 ASSERT_OK(Put("bar", "v7"));
2913
2914 SequenceNumber seq2 = db_->GetLatestSequenceNumber();
2915 auto* cfd =
2916 static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
2917 ->cfd();
2918 // The iterator are suppose to see data before seq1.
2919 Iterator* iter =
2920 dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1);
2921
2922 // Seek
2923 // The latest value of "foo" before seq1 is "v3"
2924 iter->Seek("foo");
2925 ASSERT_TRUE(iter->Valid());
2926 ASSERT_OK(iter->status());
2927 ASSERT_EQ("foo", iter->key());
2928 ASSERT_EQ("v3", iter->value());
2929 // "bar" is not visible to the iterator. It will move on to the next key
2930 // "foo".
2931 iter->Seek("bar");
2932 ASSERT_TRUE(iter->Valid());
2933 ASSERT_OK(iter->status());
2934 ASSERT_EQ("foo", iter->key());
2935 ASSERT_EQ("v3", iter->value());
2936
2937 // Next
2938 // Seek to "a"
2939 iter->Seek("a");
2940 ASSERT_TRUE(iter->Valid());
2941 ASSERT_OK(iter->status());
2942 ASSERT_EQ("va", iter->value());
2943 // "bar" is not visible to the iterator. It will move on to the next key
2944 // "foo".
2945 iter->Next();
2946 ASSERT_TRUE(iter->Valid());
2947 ASSERT_OK(iter->status());
2948 ASSERT_EQ("foo", iter->key());
2949 ASSERT_EQ("v3", iter->value());
2950
2951 // Prev
2952 // Seek to "z"
2953 iter->Seek("z");
2954 ASSERT_TRUE(iter->Valid());
2955 ASSERT_OK(iter->status());
2956 ASSERT_EQ("vz", iter->value());
2957 // The previous key is "foo", which is visible to the iterator.
2958 iter->Prev();
2959 ASSERT_TRUE(iter->Valid());
2960 ASSERT_OK(iter->status());
2961 ASSERT_EQ("foo", iter->key());
2962 ASSERT_EQ("v3", iter->value());
2963 // "bar" is not visible to the iterator. It will move on to the next key "a".
2964 iter->Prev(); // skipping "bar"
2965 ASSERT_TRUE(iter->Valid());
2966 ASSERT_OK(iter->status());
2967 ASSERT_EQ("a", iter->key());
2968 ASSERT_EQ("va", iter->value());
2969
2970 // SeekForPrev
2971 // The previous key is "foo", which is visible to the iterator.
2972 iter->SeekForPrev("y");
2973 ASSERT_TRUE(iter->Valid());
2974 ASSERT_OK(iter->status());
2975 ASSERT_EQ("foo", iter->key());
2976 ASSERT_EQ("v3", iter->value());
2977 // "bar" is not visible to the iterator. It will move on to the next key "a".
2978 iter->SeekForPrev("bar");
2979 ASSERT_TRUE(iter->Valid());
2980 ASSERT_OK(iter->status());
2981 ASSERT_EQ("a", iter->key());
2982 ASSERT_EQ("va", iter->value());
2983
2984 delete iter;
2985
2986 // Prev beyond max_sequential_skip_in_iterations
2987 uint64_t num_versions =
2988 CurrentOptions().max_sequential_skip_in_iterations + 10;
2989 for (uint64_t i = 0; i < num_versions; i++) {
2990 ASSERT_OK(Put("bar", ToString(i)));
2991 }
2992 SequenceNumber seq3 = db_->GetLatestSequenceNumber();
2993 TestReadCallback callback2(seq3);
2994 ASSERT_OK(Put("bar", "v8"));
2995 SequenceNumber seq4 = db_->GetLatestSequenceNumber();
2996
2997 // The iterator is suppose to see data before seq3.
2998 iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2);
2999 // Seek to "z", which is visible.
3000 iter->Seek("z");
3001 ASSERT_TRUE(iter->Valid());
3002 ASSERT_OK(iter->status());
3003 ASSERT_EQ("vz", iter->value());
3004 // Previous key is "foo" and the last value "v5" is visible.
3005 iter->Prev();
3006 ASSERT_TRUE(iter->Valid());
3007 ASSERT_OK(iter->status());
3008 ASSERT_EQ("foo", iter->key());
3009 ASSERT_EQ("v5", iter->value());
3010 // Since the number of values of "bar" is more than
3011 // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
3012 // seek in forward direction. Here we test the fallback seek is correct.
3013 // The last visible value should be (num_versions - 1), as "v8" is not
3014 // visible.
3015 iter->Prev();
3016 ASSERT_TRUE(iter->Valid());
3017 ASSERT_OK(iter->status());
3018 ASSERT_EQ("bar", iter->key());
3019 ASSERT_EQ(ToString(num_versions - 1), iter->value());
3020
3021 delete iter;
3022 }
3023
3024 TEST_F(DBIteratorTest, BackwardIterationOnInplaceUpdateMemtable) {
3025 Options options = CurrentOptions();
3026 options.create_if_missing = true;
3027 options.inplace_update_support = false;
3028 options.env = env_;
3029 DestroyAndReopen(options);
3030 constexpr int kNumKeys = 10;
3031
3032 // Write kNumKeys to WAL.
3033 for (int i = 0; i < kNumKeys; ++i) {
3034 ASSERT_OK(Put(Key(i), "val"));
3035 }
3036 ReadOptions read_opts;
3037 read_opts.total_order_seek = true;
3038 {
3039 std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
3040 int count = 0;
3041 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
3042 ++count;
3043 }
3044 ASSERT_EQ(kNumKeys, count);
3045 }
3046
3047 // Reopen and rebuild the memtable from WAL.
3048 options.create_if_missing = false;
3049 options.avoid_flush_during_recovery = true;
3050 options.inplace_update_support = true;
3051 options.allow_concurrent_memtable_write = false;
3052 Reopen(options);
3053 {
3054 std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
3055 iter->SeekToLast();
3056 // Backward iteration not supported due to inplace_update_support = true.
3057 ASSERT_TRUE(iter->status().IsNotSupported());
3058 ASSERT_FALSE(iter->Valid());
3059 }
3060 }
3061
3062 } // namespace ROCKSDB_NAMESPACE
3063
3064 int main(int argc, char** argv) {
3065 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
3066 ::testing::InitGoogleTest(&argc, argv);
3067 return RUN_ALL_TESTS();
3068 }