]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/db_iterator_test.cc
bump version to 18.2.2-pve1
[ceph.git] / ceph / src / rocksdb / db / db_iterator_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include <functional>
11
12 #include "db/arena_wrapped_db_iter.h"
13 #include "db/db_iter.h"
14 #include "db/db_test_util.h"
15 #include "port/port.h"
16 #include "port/stack_trace.h"
17 #include "rocksdb/iostats_context.h"
18 #include "rocksdb/perf_context.h"
19 #include "table/block_based/flush_block_policy.h"
20 #include "util/random.h"
21 #include "utilities/merge_operators/string_append/stringappend2.h"
22
23 namespace ROCKSDB_NAMESPACE {
24
25 // A dumb ReadCallback which saying every key is committed.
26 class DummyReadCallback : public ReadCallback {
27 public:
28 DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {}
29 bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; }
30 void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
31 };
32
33 // Test param:
34 // bool: whether to pass read_callback to NewIterator().
35 class DBIteratorTest : public DBTestBase,
36 public testing::WithParamInterface<bool> {
37 public:
38 DBIteratorTest() : DBTestBase("db_iterator_test", /*env_do_fsync=*/true) {}
39
40 Iterator* NewIterator(const ReadOptions& read_options,
41 ColumnFamilyHandle* column_family = nullptr) {
42 if (column_family == nullptr) {
43 column_family = db_->DefaultColumnFamily();
44 }
45 auto* cfd =
46 static_cast_with_check<ColumnFamilyHandleImpl>(column_family)->cfd();
47 SequenceNumber seq = read_options.snapshot != nullptr
48 ? read_options.snapshot->GetSequenceNumber()
49 : db_->GetLatestSequenceNumber();
50 bool use_read_callback = GetParam();
51 DummyReadCallback* read_callback = nullptr;
52 if (use_read_callback) {
53 read_callback = new DummyReadCallback();
54 read_callback->SetSnapshot(seq);
55 InstrumentedMutexLock lock(&mutex_);
56 read_callbacks_.push_back(
57 std::unique_ptr<DummyReadCallback>(read_callback));
58 }
59 return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback);
60 }
61
62 private:
63 InstrumentedMutex mutex_;
64 std::vector<std::unique_ptr<DummyReadCallback>> read_callbacks_;
65 };
66
67 TEST_P(DBIteratorTest, IteratorProperty) {
68 // The test needs to be changed if kPersistedTier is supported in iterator.
69 Options options = CurrentOptions();
70 CreateAndReopenWithCF({"pikachu"}, options);
71 ASSERT_OK(Put(1, "1", "2"));
72 ASSERT_OK(Delete(1, "2"));
73 ReadOptions ropt;
74 ropt.pin_data = false;
75 {
76 std::unique_ptr<Iterator> iter(NewIterator(ropt, handles_[1]));
77 iter->SeekToFirst();
78 std::string prop_value;
79 ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
80 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
81 ASSERT_EQ("0", prop_value);
82 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
83 ASSERT_EQ("1", prop_value);
84 iter->Next();
85 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
86 ASSERT_EQ("Iterator is not valid.", prop_value);
87
88 // Get internal key at which the iteration stopped (tombstone in this case).
89 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
90 ASSERT_EQ("2", prop_value);
91 }
92 Close();
93 }
94
95 TEST_P(DBIteratorTest, PersistedTierOnIterator) {
96 // The test needs to be changed if kPersistedTier is supported in iterator.
97 Options options = CurrentOptions();
98 CreateAndReopenWithCF({"pikachu"}, options);
99 ReadOptions ropt;
100 ropt.read_tier = kPersistedTier;
101
102 auto* iter = db_->NewIterator(ropt, handles_[1]);
103 ASSERT_TRUE(iter->status().IsNotSupported());
104 delete iter;
105
106 std::vector<Iterator*> iters;
107 ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
108 Close();
109 }
110
111 TEST_P(DBIteratorTest, NonBlockingIteration) {
112 do {
113 ReadOptions non_blocking_opts, regular_opts;
114 anon::OptionsOverride options_override;
115 options_override.full_block_cache = true;
116 Options options = CurrentOptions(options_override);
117 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
118 non_blocking_opts.read_tier = kBlockCacheTier;
119
120 CreateAndReopenWithCF({"pikachu"}, options);
121 // write one kv to the database.
122 ASSERT_OK(Put(1, "a", "b"));
123
124 // scan using non-blocking iterator. We should find it because
125 // it is in memtable.
126 Iterator* iter = NewIterator(non_blocking_opts, handles_[1]);
127 int count = 0;
128 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
129 ASSERT_OK(iter->status());
130 count++;
131 }
132 ASSERT_EQ(count, 1);
133 delete iter;
134
135 // flush memtable to storage. Now, the key should not be in the
136 // memtable neither in the block cache.
137 ASSERT_OK(Flush(1));
138
139 // verify that a non-blocking iterator does not find any
140 // kvs. Neither does it do any IOs to storage.
141 uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
142 uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
143 iter = NewIterator(non_blocking_opts, handles_[1]);
144 count = 0;
145 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
146 count++;
147 }
148 ASSERT_EQ(count, 0);
149 ASSERT_TRUE(iter->status().IsIncomplete());
150 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
151 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
152 delete iter;
153
154 // read in the specified block via a regular get
155 ASSERT_EQ(Get(1, "a"), "b");
156
157 // verify that we can find it via a non-blocking scan
158 numopen = TestGetTickerCount(options, NO_FILE_OPENS);
159 cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
160 iter = NewIterator(non_blocking_opts, handles_[1]);
161 count = 0;
162 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
163 ASSERT_OK(iter->status());
164 count++;
165 }
166 ASSERT_EQ(count, 1);
167 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
168 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
169 delete iter;
170
171 // This test verifies block cache behaviors, which is not used by plain
172 // table format.
173 } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
174 }
175
176 TEST_P(DBIteratorTest, IterSeekBeforePrev) {
177 ASSERT_OK(Put("a", "b"));
178 ASSERT_OK(Put("c", "d"));
179 EXPECT_OK(dbfull()->Flush(FlushOptions()));
180 ASSERT_OK(Put("0", "f"));
181 ASSERT_OK(Put("1", "h"));
182 EXPECT_OK(dbfull()->Flush(FlushOptions()));
183 ASSERT_OK(Put("2", "j"));
184 auto iter = NewIterator(ReadOptions());
185 iter->Seek(Slice("c"));
186 iter->Prev();
187 iter->Seek(Slice("a"));
188 iter->Prev();
189 delete iter;
190 }
191
192 TEST_P(DBIteratorTest, IterReseekNewUpperBound) {
193 Random rnd(301);
194 Options options = CurrentOptions();
195 BlockBasedTableOptions table_options;
196 table_options.block_size = 1024;
197 table_options.block_size_deviation = 50;
198 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
199 options.compression = kNoCompression;
200 Reopen(options);
201
202 ASSERT_OK(Put("a", rnd.RandomString(400)));
203 ASSERT_OK(Put("aabb", rnd.RandomString(400)));
204 ASSERT_OK(Put("aaef", rnd.RandomString(400)));
205 ASSERT_OK(Put("b", rnd.RandomString(400)));
206 EXPECT_OK(dbfull()->Flush(FlushOptions()));
207 ReadOptions opts;
208 Slice ub = Slice("aa");
209 opts.iterate_upper_bound = &ub;
210 auto iter = NewIterator(opts);
211 iter->Seek(Slice("a"));
212 ub = Slice("b");
213 iter->Seek(Slice("aabc"));
214 ASSERT_TRUE(iter->Valid());
215 ASSERT_EQ(iter->key().ToString(), "aaef");
216 delete iter;
217 }
218
219 TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) {
220 ASSERT_OK(Put("a", "b"));
221 ASSERT_OK(Put("c", "d"));
222 EXPECT_OK(dbfull()->Flush(FlushOptions()));
223 ASSERT_OK(Put("0", "f"));
224 ASSERT_OK(Put("1", "h"));
225 EXPECT_OK(dbfull()->Flush(FlushOptions()));
226 ASSERT_OK(Put("2", "j"));
227 auto iter = NewIterator(ReadOptions());
228 iter->SeekForPrev(Slice("0"));
229 iter->Next();
230 iter->SeekForPrev(Slice("1"));
231 iter->Next();
232 delete iter;
233 }
234
235 namespace {
236 std::string MakeLongKey(size_t length, char c) {
237 return std::string(length, c);
238 }
239 } // anonymous namespace
240
241 TEST_P(DBIteratorTest, IterLongKeys) {
242 ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
243 ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
244 ASSERT_OK(Put("a", "b"));
245 EXPECT_OK(dbfull()->Flush(FlushOptions()));
246 ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
247 ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
248 ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
249 auto iter = NewIterator(ReadOptions());
250
251 // Create a key that needs to be skipped for Seq too new
252 iter->Seek(MakeLongKey(20, 0));
253 ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
254 iter->Next();
255 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
256 iter->Next();
257 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
258 iter->Next();
259 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
260 iter->Next();
261 ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
262
263 iter->SeekForPrev(MakeLongKey(127, 3));
264 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
265 iter->Prev();
266 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
267 iter->Prev();
268 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
269 delete iter;
270
271 iter = NewIterator(ReadOptions());
272 iter->Seek(MakeLongKey(50, 1));
273 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
274 iter->Next();
275 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
276 iter->Next();
277 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
278 delete iter;
279 }
280
281 TEST_P(DBIteratorTest, IterNextWithNewerSeq) {
282 ASSERT_OK(Put("0", "0"));
283 EXPECT_OK(dbfull()->Flush(FlushOptions()));
284 ASSERT_OK(Put("a", "b"));
285 ASSERT_OK(Put("c", "d"));
286 ASSERT_OK(Put("d", "e"));
287 auto iter = NewIterator(ReadOptions());
288
289 // Create a key that needs to be skipped for Seq too new
290 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
291 i++) {
292 ASSERT_OK(Put("b", "f"));
293 }
294
295 iter->Seek(Slice("a"));
296 ASSERT_EQ(IterStatus(iter), "a->b");
297 iter->Next();
298 ASSERT_EQ(IterStatus(iter), "c->d");
299 iter->SeekForPrev(Slice("b"));
300 ASSERT_EQ(IterStatus(iter), "a->b");
301 iter->Next();
302 ASSERT_EQ(IterStatus(iter), "c->d");
303
304 delete iter;
305 }
306
307 TEST_P(DBIteratorTest, IterPrevWithNewerSeq) {
308 ASSERT_OK(Put("0", "0"));
309 EXPECT_OK(dbfull()->Flush(FlushOptions()));
310 ASSERT_OK(Put("a", "b"));
311 ASSERT_OK(Put("c", "d"));
312 ASSERT_OK(Put("d", "e"));
313 auto iter = NewIterator(ReadOptions());
314
315 // Create a key that needs to be skipped for Seq too new
316 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
317 i++) {
318 ASSERT_OK(Put("b", "f"));
319 }
320
321 iter->Seek(Slice("d"));
322 ASSERT_EQ(IterStatus(iter), "d->e");
323 iter->Prev();
324 ASSERT_EQ(IterStatus(iter), "c->d");
325 iter->Prev();
326 ASSERT_EQ(IterStatus(iter), "a->b");
327 iter->Prev();
328 iter->SeekForPrev(Slice("d"));
329 ASSERT_EQ(IterStatus(iter), "d->e");
330 iter->Prev();
331 ASSERT_EQ(IterStatus(iter), "c->d");
332 iter->Prev();
333 ASSERT_EQ(IterStatus(iter), "a->b");
334 iter->Prev();
335 delete iter;
336 }
337
338 TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) {
339 ASSERT_OK(Put("0", "0"));
340 EXPECT_OK(dbfull()->Flush(FlushOptions()));
341 ASSERT_OK(Put("a", "b"));
342 ASSERT_OK(Put("c", "d"));
343 ASSERT_OK(Put("e", "f"));
344 auto iter = NewIterator(ReadOptions());
345 auto iter2 = NewIterator(ReadOptions());
346 iter->Seek(Slice("c"));
347 iter2->SeekForPrev(Slice("d"));
348 ASSERT_EQ(IterStatus(iter), "c->d");
349 ASSERT_EQ(IterStatus(iter2), "c->d");
350
351 // Create a key that needs to be skipped for Seq too new
352 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
353 i++) {
354 ASSERT_OK(Put("b", "f"));
355 }
356
357 iter->Prev();
358 ASSERT_EQ(IterStatus(iter), "a->b");
359 iter->Prev();
360 iter2->Prev();
361 ASSERT_EQ(IterStatus(iter2), "a->b");
362 iter2->Prev();
363 delete iter;
364 delete iter2;
365 }
366
367 TEST_P(DBIteratorTest, IterEmpty) {
368 do {
369 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
370 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
371
372 iter->SeekToFirst();
373 ASSERT_EQ(IterStatus(iter), "(invalid)");
374
375 iter->SeekToLast();
376 ASSERT_EQ(IterStatus(iter), "(invalid)");
377
378 iter->Seek("foo");
379 ASSERT_EQ(IterStatus(iter), "(invalid)");
380
381 iter->SeekForPrev("foo");
382 ASSERT_EQ(IterStatus(iter), "(invalid)");
383
384 ASSERT_OK(iter->status());
385
386 delete iter;
387 } while (ChangeCompactOptions());
388 }
389
390 TEST_P(DBIteratorTest, IterSingle) {
391 do {
392 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
393 ASSERT_OK(Put(1, "a", "va"));
394 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
395
396 iter->SeekToFirst();
397 ASSERT_EQ(IterStatus(iter), "a->va");
398 iter->Next();
399 ASSERT_EQ(IterStatus(iter), "(invalid)");
400 iter->SeekToFirst();
401 ASSERT_EQ(IterStatus(iter), "a->va");
402 iter->Prev();
403 ASSERT_EQ(IterStatus(iter), "(invalid)");
404
405 iter->SeekToLast();
406 ASSERT_EQ(IterStatus(iter), "a->va");
407 iter->Next();
408 ASSERT_EQ(IterStatus(iter), "(invalid)");
409 iter->SeekToLast();
410 ASSERT_EQ(IterStatus(iter), "a->va");
411 iter->Prev();
412 ASSERT_EQ(IterStatus(iter), "(invalid)");
413
414 iter->Seek("");
415 ASSERT_EQ(IterStatus(iter), "a->va");
416 iter->Next();
417 ASSERT_EQ(IterStatus(iter), "(invalid)");
418 iter->SeekForPrev("");
419 ASSERT_EQ(IterStatus(iter), "(invalid)");
420
421 iter->Seek("a");
422 ASSERT_EQ(IterStatus(iter), "a->va");
423 iter->Next();
424 ASSERT_EQ(IterStatus(iter), "(invalid)");
425 iter->SeekForPrev("a");
426 ASSERT_EQ(IterStatus(iter), "a->va");
427 iter->Prev();
428 ASSERT_EQ(IterStatus(iter), "(invalid)");
429
430 iter->Seek("b");
431 ASSERT_EQ(IterStatus(iter), "(invalid)");
432 iter->SeekForPrev("b");
433 ASSERT_EQ(IterStatus(iter), "a->va");
434 iter->Prev();
435 ASSERT_EQ(IterStatus(iter), "(invalid)");
436
437 delete iter;
438 } while (ChangeCompactOptions());
439 }
440
441 TEST_P(DBIteratorTest, IterMulti) {
442 do {
443 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
444 ASSERT_OK(Put(1, "a", "va"));
445 ASSERT_OK(Put(1, "b", "vb"));
446 ASSERT_OK(Put(1, "c", "vc"));
447 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
448
449 iter->SeekToFirst();
450 ASSERT_EQ(IterStatus(iter), "a->va");
451 iter->Next();
452 ASSERT_EQ(IterStatus(iter), "b->vb");
453 iter->Next();
454 ASSERT_EQ(IterStatus(iter), "c->vc");
455 iter->Next();
456 ASSERT_EQ(IterStatus(iter), "(invalid)");
457 iter->SeekToFirst();
458 ASSERT_EQ(IterStatus(iter), "a->va");
459 iter->Prev();
460 ASSERT_EQ(IterStatus(iter), "(invalid)");
461
462 iter->SeekToLast();
463 ASSERT_EQ(IterStatus(iter), "c->vc");
464 iter->Prev();
465 ASSERT_EQ(IterStatus(iter), "b->vb");
466 iter->Prev();
467 ASSERT_EQ(IterStatus(iter), "a->va");
468 iter->Prev();
469 ASSERT_EQ(IterStatus(iter), "(invalid)");
470 iter->SeekToLast();
471 ASSERT_EQ(IterStatus(iter), "c->vc");
472 iter->Next();
473 ASSERT_EQ(IterStatus(iter), "(invalid)");
474
475 iter->Seek("");
476 ASSERT_EQ(IterStatus(iter), "a->va");
477 iter->Seek("a");
478 ASSERT_EQ(IterStatus(iter), "a->va");
479 iter->Seek("ax");
480 ASSERT_EQ(IterStatus(iter), "b->vb");
481 iter->SeekForPrev("d");
482 ASSERT_EQ(IterStatus(iter), "c->vc");
483 iter->SeekForPrev("c");
484 ASSERT_EQ(IterStatus(iter), "c->vc");
485 iter->SeekForPrev("bx");
486 ASSERT_EQ(IterStatus(iter), "b->vb");
487
488 iter->Seek("b");
489 ASSERT_EQ(IterStatus(iter), "b->vb");
490 iter->Seek("z");
491 ASSERT_EQ(IterStatus(iter), "(invalid)");
492 iter->SeekForPrev("b");
493 ASSERT_EQ(IterStatus(iter), "b->vb");
494 iter->SeekForPrev("");
495 ASSERT_EQ(IterStatus(iter), "(invalid)");
496
497 // Switch from reverse to forward
498 iter->SeekToLast();
499 iter->Prev();
500 iter->Prev();
501 iter->Next();
502 ASSERT_EQ(IterStatus(iter), "b->vb");
503
504 // Switch from forward to reverse
505 iter->SeekToFirst();
506 iter->Next();
507 iter->Next();
508 iter->Prev();
509 ASSERT_EQ(IterStatus(iter), "b->vb");
510
511 // Make sure iter stays at snapshot
512 ASSERT_OK(Put(1, "a", "va2"));
513 ASSERT_OK(Put(1, "a2", "va3"));
514 ASSERT_OK(Put(1, "b", "vb2"));
515 ASSERT_OK(Put(1, "c", "vc2"));
516 ASSERT_OK(Delete(1, "b"));
517 iter->SeekToFirst();
518 ASSERT_EQ(IterStatus(iter), "a->va");
519 iter->Next();
520 ASSERT_EQ(IterStatus(iter), "b->vb");
521 iter->Next();
522 ASSERT_EQ(IterStatus(iter), "c->vc");
523 iter->Next();
524 ASSERT_EQ(IterStatus(iter), "(invalid)");
525 iter->SeekToLast();
526 ASSERT_EQ(IterStatus(iter), "c->vc");
527 iter->Prev();
528 ASSERT_EQ(IterStatus(iter), "b->vb");
529 iter->Prev();
530 ASSERT_EQ(IterStatus(iter), "a->va");
531 iter->Prev();
532 ASSERT_EQ(IterStatus(iter), "(invalid)");
533
534 delete iter;
535 } while (ChangeCompactOptions());
536 }
537
538 // Check that we can skip over a run of user keys
539 // by using reseek rather than sequential scan
540 TEST_P(DBIteratorTest, IterReseek) {
541 anon::OptionsOverride options_override;
542 options_override.skip_policy = kSkipNoSnapshot;
543 Options options = CurrentOptions(options_override);
544 options.max_sequential_skip_in_iterations = 3;
545 options.create_if_missing = true;
546 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
547 DestroyAndReopen(options);
548 CreateAndReopenWithCF({"pikachu"}, options);
549
550 // insert three keys with same userkey and verify that
551 // reseek is not invoked. For each of these test cases,
552 // verify that we can find the next key "b".
553 ASSERT_OK(Put(1, "a", "zero"));
554 ASSERT_OK(Put(1, "a", "one"));
555 ASSERT_OK(Put(1, "a", "two"));
556 ASSERT_OK(Put(1, "b", "bone"));
557 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
558 iter->SeekToFirst();
559 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
560 ASSERT_EQ(IterStatus(iter), "a->two");
561 iter->Next();
562 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
563 ASSERT_EQ(IterStatus(iter), "b->bone");
564 delete iter;
565
566 // insert a total of three keys with same userkey and verify
567 // that reseek is still not invoked.
568 ASSERT_OK(Put(1, "a", "three"));
569 iter = NewIterator(ReadOptions(), handles_[1]);
570 iter->SeekToFirst();
571 ASSERT_EQ(IterStatus(iter), "a->three");
572 iter->Next();
573 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
574 ASSERT_EQ(IterStatus(iter), "b->bone");
575 delete iter;
576
577 // insert a total of four keys with same userkey and verify
578 // that reseek is invoked.
579 ASSERT_OK(Put(1, "a", "four"));
580 iter = NewIterator(ReadOptions(), handles_[1]);
581 iter->SeekToFirst();
582 ASSERT_EQ(IterStatus(iter), "a->four");
583 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
584 iter->Next();
585 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
586 ASSERT_EQ(IterStatus(iter), "b->bone");
587 delete iter;
588
589 // Testing reverse iterator
590 // At this point, we have three versions of "a" and one version of "b".
591 // The reseek statistics is already at 1.
592 int num_reseeks = static_cast<int>(
593 TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
594
595 // Insert another version of b and assert that reseek is not invoked
596 ASSERT_OK(Put(1, "b", "btwo"));
597 iter = NewIterator(ReadOptions(), handles_[1]);
598 iter->SeekToLast();
599 ASSERT_EQ(IterStatus(iter), "b->btwo");
600 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
601 num_reseeks);
602 iter->Prev();
603 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
604 num_reseeks + 1);
605 ASSERT_EQ(IterStatus(iter), "a->four");
606 delete iter;
607
608 // insert two more versions of b. This makes a total of 4 versions
609 // of b and 4 versions of a.
610 ASSERT_OK(Put(1, "b", "bthree"));
611 ASSERT_OK(Put(1, "b", "bfour"));
612 iter = NewIterator(ReadOptions(), handles_[1]);
613 iter->SeekToLast();
614 ASSERT_EQ(IterStatus(iter), "b->bfour");
615 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
616 num_reseeks + 2);
617 iter->Prev();
618
619 // the previous Prev call should have invoked reseek
620 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
621 num_reseeks + 3);
622 ASSERT_EQ(IterStatus(iter), "a->four");
623 delete iter;
624 }
625
626 TEST_F(DBIteratorTest, ReseekUponDirectionChange) {
627 Options options = GetDefaultOptions();
628 options.create_if_missing = true;
629 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
630 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
631 options.merge_operator.reset(
632 new StringAppendTESTOperator(/*delim_char=*/' '));
633 DestroyAndReopen(options);
634 ASSERT_OK(Put("foo", "value"));
635 ASSERT_OK(Put("bar", "value"));
636 {
637 std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
638 it->SeekToLast();
639 it->Prev();
640 it->Next();
641 }
642 ASSERT_EQ(1,
643 options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION));
644
645 const std::string merge_key("good");
646 ASSERT_OK(Put(merge_key, "orig"));
647 ASSERT_OK(Merge(merge_key, "suffix"));
648 {
649 std::unique_ptr<Iterator> it(db_->NewIterator(ReadOptions()));
650 it->Seek(merge_key);
651 ASSERT_TRUE(it->Valid());
652 const uint64_t prev_reseek_count =
653 options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION);
654 it->Prev();
655 ASSERT_EQ(prev_reseek_count + 1, options.statistics->getTickerCount(
656 NUMBER_OF_RESEEKS_IN_ITERATION));
657 }
658 }
659
660 TEST_P(DBIteratorTest, IterSmallAndLargeMix) {
661 do {
662 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
663 ASSERT_OK(Put(1, "a", "va"));
664 ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
665 ASSERT_OK(Put(1, "c", "vc"));
666 ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
667 ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
668
669 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
670
671 iter->SeekToFirst();
672 ASSERT_EQ(IterStatus(iter), "a->va");
673 iter->Next();
674 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
675 iter->Next();
676 ASSERT_EQ(IterStatus(iter), "c->vc");
677 iter->Next();
678 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
679 iter->Next();
680 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
681 iter->Next();
682 ASSERT_EQ(IterStatus(iter), "(invalid)");
683
684 iter->SeekToLast();
685 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
686 iter->Prev();
687 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
688 iter->Prev();
689 ASSERT_EQ(IterStatus(iter), "c->vc");
690 iter->Prev();
691 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
692 iter->Prev();
693 ASSERT_EQ(IterStatus(iter), "a->va");
694 iter->Prev();
695 ASSERT_EQ(IterStatus(iter), "(invalid)");
696
697 delete iter;
698 } while (ChangeCompactOptions());
699 }
700
701 TEST_P(DBIteratorTest, IterMultiWithDelete) {
702 do {
703 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
704 ASSERT_OK(Put(1, "ka", "va"));
705 ASSERT_OK(Put(1, "kb", "vb"));
706 ASSERT_OK(Put(1, "kc", "vc"));
707 ASSERT_OK(Delete(1, "kb"));
708 ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
709
710 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
711 iter->Seek("kc");
712 ASSERT_EQ(IterStatus(iter), "kc->vc");
713 if (!CurrentOptions().merge_operator) {
714 // TODO: merge operator does not support backward iteration yet
715 if (kPlainTableAllBytesPrefix != option_config_ &&
716 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
717 kHashLinkList != option_config_ &&
718 kHashSkipList != option_config_) { // doesn't support SeekToLast
719 iter->Prev();
720 ASSERT_EQ(IterStatus(iter), "ka->va");
721 }
722 }
723 delete iter;
724 } while (ChangeOptions());
725 }
726
727 TEST_P(DBIteratorTest, IterPrevMaxSkip) {
728 do {
729 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
730 for (int i = 0; i < 2; i++) {
731 ASSERT_OK(Put(1, "key1", "v1"));
732 ASSERT_OK(Put(1, "key2", "v2"));
733 ASSERT_OK(Put(1, "key3", "v3"));
734 ASSERT_OK(Put(1, "key4", "v4"));
735 ASSERT_OK(Put(1, "key5", "v5"));
736 }
737
738 VerifyIterLast("key5->v5", 1);
739
740 ASSERT_OK(Delete(1, "key5"));
741 VerifyIterLast("key4->v4", 1);
742
743 ASSERT_OK(Delete(1, "key4"));
744 VerifyIterLast("key3->v3", 1);
745
746 ASSERT_OK(Delete(1, "key3"));
747 VerifyIterLast("key2->v2", 1);
748
749 ASSERT_OK(Delete(1, "key2"));
750 VerifyIterLast("key1->v1", 1);
751
752 ASSERT_OK(Delete(1, "key1"));
753 VerifyIterLast("(invalid)", 1);
754 } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
755 }
756
757 TEST_P(DBIteratorTest, IterWithSnapshot) {
758 anon::OptionsOverride options_override;
759 options_override.skip_policy = kSkipNoSnapshot;
760 do {
761 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
762 ASSERT_OK(Put(1, "key1", "val1"));
763 ASSERT_OK(Put(1, "key2", "val2"));
764 ASSERT_OK(Put(1, "key3", "val3"));
765 ASSERT_OK(Put(1, "key4", "val4"));
766 ASSERT_OK(Put(1, "key5", "val5"));
767
768 const Snapshot* snapshot = db_->GetSnapshot();
769 ReadOptions options;
770 options.snapshot = snapshot;
771 Iterator* iter = NewIterator(options, handles_[1]);
772
773 ASSERT_OK(Put(1, "key0", "val0"));
774 // Put more values after the snapshot
775 ASSERT_OK(Put(1, "key100", "val100"));
776 ASSERT_OK(Put(1, "key101", "val101"));
777
778 iter->Seek("key5");
779 ASSERT_EQ(IterStatus(iter), "key5->val5");
780 if (!CurrentOptions().merge_operator) {
781 // TODO: merge operator does not support backward iteration yet
782 if (kPlainTableAllBytesPrefix != option_config_ &&
783 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
784 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
785 iter->Prev();
786 ASSERT_EQ(IterStatus(iter), "key4->val4");
787 iter->Prev();
788 ASSERT_EQ(IterStatus(iter), "key3->val3");
789
790 iter->Next();
791 ASSERT_EQ(IterStatus(iter), "key4->val4");
792 iter->Next();
793 ASSERT_EQ(IterStatus(iter), "key5->val5");
794 }
795 iter->Next();
796 ASSERT_TRUE(!iter->Valid());
797 }
798
799 if (!CurrentOptions().merge_operator) {
800 // TODO(gzh): merge operator does not support backward iteration yet
801 if (kPlainTableAllBytesPrefix != option_config_ &&
802 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
803 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
804 iter->SeekForPrev("key1");
805 ASSERT_EQ(IterStatus(iter), "key1->val1");
806 iter->Next();
807 ASSERT_EQ(IterStatus(iter), "key2->val2");
808 iter->Next();
809 ASSERT_EQ(IterStatus(iter), "key3->val3");
810 iter->Prev();
811 ASSERT_EQ(IterStatus(iter), "key2->val2");
812 iter->Prev();
813 ASSERT_EQ(IterStatus(iter), "key1->val1");
814 iter->Prev();
815 ASSERT_TRUE(!iter->Valid());
816 }
817 }
818 db_->ReleaseSnapshot(snapshot);
819 delete iter;
820 } while (ChangeOptions());
821 }
822
823 TEST_P(DBIteratorTest, IteratorPinsRef) {
824 do {
825 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
826 ASSERT_OK(Put(1, "foo", "hello"));
827
828 // Get iterator that will yield the current contents of the DB.
829 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
830
831 // Write to force compactions
832 ASSERT_OK(Put(1, "foo", "newvalue1"));
833 for (int i = 0; i < 100; i++) {
834 // 100K values
835 ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
836 }
837 ASSERT_OK(Put(1, "foo", "newvalue2"));
838
839 iter->SeekToFirst();
840 ASSERT_TRUE(iter->Valid());
841 ASSERT_EQ("foo", iter->key().ToString());
842 ASSERT_EQ("hello", iter->value().ToString());
843 iter->Next();
844 ASSERT_TRUE(!iter->Valid());
845 delete iter;
846 } while (ChangeCompactOptions());
847 }
848
849 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDelete) {
850 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
851
852 ASSERT_OK(Put(1, "foo", "delete-cf-then-delete-iter"));
853 ASSERT_OK(Put(1, "hello", "value2"));
854
855 ColumnFamilyHandle* cf = handles_[1];
856 ReadOptions ro;
857
858 auto* iter = db_->NewIterator(ro, cf);
859 iter->SeekToFirst();
860 ASSERT_EQ(IterStatus(iter), "foo->delete-cf-then-delete-iter");
861
862 // delete CF handle
863 EXPECT_OK(db_->DestroyColumnFamilyHandle(cf));
864 handles_.erase(std::begin(handles_) + 1);
865
866 // delete Iterator after CF handle is deleted
867 iter->Next();
868 ASSERT_EQ(IterStatus(iter), "hello->value2");
869 delete iter;
870 }
871
872 TEST_P(DBIteratorTest, IteratorDeleteAfterCfDrop) {
873 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
874
875 ASSERT_OK(Put(1, "foo", "drop-cf-then-delete-iter"));
876
877 ReadOptions ro;
878 ColumnFamilyHandle* cf = handles_[1];
879
880 auto* iter = db_->NewIterator(ro, cf);
881 iter->SeekToFirst();
882 ASSERT_EQ(IterStatus(iter), "foo->drop-cf-then-delete-iter");
883
884 // drop and delete CF
885 EXPECT_OK(db_->DropColumnFamily(cf));
886 EXPECT_OK(db_->DestroyColumnFamilyHandle(cf));
887 handles_.erase(std::begin(handles_) + 1);
888
889 // delete Iterator after CF handle is dropped
890 delete iter;
891 }
892
893 // SetOptions not defined in ROCKSDB LITE
894 #ifndef ROCKSDB_LITE
895 TEST_P(DBIteratorTest, DBIteratorBoundTest) {
896 Options options = CurrentOptions();
897 options.env = env_;
898 options.create_if_missing = true;
899
900 options.prefix_extractor = nullptr;
901 DestroyAndReopen(options);
902 ASSERT_OK(Put("a", "0"));
903 ASSERT_OK(Put("foo", "bar"));
904 ASSERT_OK(Put("foo1", "bar1"));
905 ASSERT_OK(Put("g1", "0"));
906
907 // testing basic case with no iterate_upper_bound and no prefix_extractor
908 {
909 ReadOptions ro;
910 ro.iterate_upper_bound = nullptr;
911
912 std::unique_ptr<Iterator> iter(NewIterator(ro));
913
914 iter->Seek("foo");
915
916 ASSERT_TRUE(iter->Valid());
917 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
918
919 iter->Next();
920 ASSERT_TRUE(iter->Valid());
921 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
922
923 iter->Next();
924 ASSERT_TRUE(iter->Valid());
925 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
926
927 iter->SeekForPrev("g1");
928
929 ASSERT_TRUE(iter->Valid());
930 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
931
932 iter->Prev();
933 ASSERT_TRUE(iter->Valid());
934 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
935
936 iter->Prev();
937 ASSERT_TRUE(iter->Valid());
938 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
939 }
940
941 // testing iterate_upper_bound and forward iterator
942 // to make sure it stops at bound
943 {
944 ReadOptions ro;
945 // iterate_upper_bound points beyond the last expected entry
946 Slice prefix("foo2");
947 ro.iterate_upper_bound = &prefix;
948
949 std::unique_ptr<Iterator> iter(NewIterator(ro));
950
951 iter->Seek("foo");
952
953 ASSERT_TRUE(iter->Valid());
954 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
955
956 iter->Next();
957 ASSERT_TRUE(iter->Valid());
958 ASSERT_EQ(iter->key().compare(("foo1")), 0);
959
960 iter->Next();
961 // should stop here...
962 ASSERT_TRUE(!iter->Valid());
963 }
964 // Testing SeekToLast with iterate_upper_bound set
965 {
966 ReadOptions ro;
967
968 Slice prefix("foo");
969 ro.iterate_upper_bound = &prefix;
970
971 std::unique_ptr<Iterator> iter(NewIterator(ro));
972
973 iter->SeekToLast();
974 ASSERT_TRUE(iter->Valid());
975 ASSERT_EQ(iter->key().compare(Slice("a")), 0);
976 }
977
978 // prefix is the first letter of the key
979 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
980 ASSERT_OK(Put("a", "0"));
981 ASSERT_OK(Put("foo", "bar"));
982 ASSERT_OK(Put("foo1", "bar1"));
983 ASSERT_OK(Put("g1", "0"));
984
985 // testing with iterate_upper_bound and prefix_extractor
986 // Seek target and iterate_upper_bound are not is same prefix
987 // This should be an error
988 {
989 ReadOptions ro;
990 Slice upper_bound("g");
991 ro.iterate_upper_bound = &upper_bound;
992
993 std::unique_ptr<Iterator> iter(NewIterator(ro));
994
995 iter->Seek("foo");
996
997 ASSERT_TRUE(iter->Valid());
998 ASSERT_EQ("foo", iter->key().ToString());
999
1000 iter->Next();
1001 ASSERT_TRUE(iter->Valid());
1002 ASSERT_EQ("foo1", iter->key().ToString());
1003
1004 iter->Next();
1005 ASSERT_TRUE(!iter->Valid());
1006 }
1007
1008 // testing that iterate_upper_bound prevents iterating over deleted items
1009 // if the bound has already reached
1010 {
1011 options.prefix_extractor = nullptr;
1012 DestroyAndReopen(options);
1013 ASSERT_OK(Put("a", "0"));
1014 ASSERT_OK(Put("b", "0"));
1015 ASSERT_OK(Put("b1", "0"));
1016 ASSERT_OK(Put("c", "0"));
1017 ASSERT_OK(Put("d", "0"));
1018 ASSERT_OK(Put("e", "0"));
1019 ASSERT_OK(Delete("c"));
1020 ASSERT_OK(Delete("d"));
1021
1022 // base case with no bound
1023 ReadOptions ro;
1024 ro.iterate_upper_bound = nullptr;
1025
1026 std::unique_ptr<Iterator> iter(NewIterator(ro));
1027
1028 iter->Seek("b");
1029 ASSERT_TRUE(iter->Valid());
1030 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
1031
1032 iter->Next();
1033 ASSERT_TRUE(iter->Valid());
1034 ASSERT_EQ(iter->key().compare(("b1")), 0);
1035
1036 get_perf_context()->Reset();
1037 iter->Next();
1038
1039 ASSERT_TRUE(iter->Valid());
1040 ASSERT_EQ(
1041 static_cast<int>(get_perf_context()->internal_delete_skipped_count), 2);
1042
1043 // now testing with iterate_bound
1044 Slice prefix("c");
1045 ro.iterate_upper_bound = &prefix;
1046
1047 iter.reset(NewIterator(ro));
1048
1049 get_perf_context()->Reset();
1050
1051 iter->Seek("b");
1052 ASSERT_TRUE(iter->Valid());
1053 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
1054
1055 iter->Next();
1056 ASSERT_TRUE(iter->Valid());
1057 ASSERT_EQ(iter->key().compare(("b1")), 0);
1058
1059 iter->Next();
1060 // the iteration should stop as soon as the bound key is reached
1061 // even though the key is deleted
1062 // hence internal_delete_skipped_count should be 0
1063 ASSERT_TRUE(!iter->Valid());
1064 ASSERT_EQ(
1065 static_cast<int>(get_perf_context()->internal_delete_skipped_count), 0);
1066 }
1067 }
1068
1069 TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
1070 Options options = CurrentOptions();
1071 options.env = env_;
1072 options.create_if_missing = true;
1073 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1074 options.prefix_extractor = nullptr;
1075 DestroyAndReopen(options);
1076 ASSERT_OK(Put("a", "0"));
1077 ASSERT_OK(Put("z", "0"));
1078 ASSERT_OK(Flush());
1079 ASSERT_OK(Put("foo1", "bar1"));
1080 ASSERT_OK(Put("foo2", "bar2"));
1081 ASSERT_OK(Put("foo3", "bar3"));
1082 ASSERT_OK(Put("foo4", "bar4"));
1083
1084 {
1085 std::string up_str = "foo5";
1086 Slice up(up_str);
1087 ReadOptions ro;
1088 ro.iterate_upper_bound = &up;
1089 std::unique_ptr<Iterator> iter(NewIterator(ro));
1090
1091 iter->Seek("foo1");
1092 ASSERT_TRUE(iter->Valid());
1093 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1094
1095 uint64_t prev_block_cache_hit =
1096 TestGetTickerCount(options, BLOCK_CACHE_HIT);
1097 uint64_t prev_block_cache_miss =
1098 TestGetTickerCount(options, BLOCK_CACHE_MISS);
1099
1100 ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0);
1101
1102 iter->Seek("foo4");
1103 ASSERT_TRUE(iter->Valid());
1104 ASSERT_EQ(iter->key().compare(Slice("foo4")), 0);
1105 ASSERT_EQ(prev_block_cache_hit,
1106 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1107 ASSERT_EQ(prev_block_cache_miss,
1108 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1109
1110 iter->Seek("foo2");
1111 ASSERT_TRUE(iter->Valid());
1112 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1113 iter->Next();
1114 ASSERT_TRUE(iter->Valid());
1115 ASSERT_EQ(iter->key().compare(Slice("foo3")), 0);
1116 ASSERT_EQ(prev_block_cache_hit,
1117 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1118 ASSERT_EQ(prev_block_cache_miss,
1119 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1120 }
1121 }
1122 #endif
1123
1124 TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
1125 for (auto format_version : {2, 3, 4}) {
1126 int upper_bound_hits = 0;
1127 Options options = CurrentOptions();
1128 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
1129 "BlockBasedTableIterator:out_of_bound",
1130 [&upper_bound_hits](void*) { upper_bound_hits++; });
1131 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
1132 options.env = env_;
1133 options.create_if_missing = true;
1134 options.prefix_extractor = nullptr;
1135 BlockBasedTableOptions table_options;
1136 table_options.format_version = format_version;
1137 table_options.flush_block_policy_factory =
1138 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1139 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1140
1141 DestroyAndReopen(options);
1142 ASSERT_OK(Put("foo1", "bar1"));
1143 ASSERT_OK(Put("foo2", "bar2"));
1144 ASSERT_OK(Put("foo4", "bar4"));
1145 ASSERT_OK(Flush());
1146
1147 Slice ub("foo3");
1148 ReadOptions ro;
1149 ro.iterate_upper_bound = &ub;
1150
1151 std::unique_ptr<Iterator> iter(NewIterator(ro));
1152
1153 iter->Seek("foo");
1154 ASSERT_TRUE(iter->Valid());
1155 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1156 ASSERT_EQ(upper_bound_hits, 0);
1157
1158 iter->Next();
1159 ASSERT_TRUE(iter->Valid());
1160 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1161 ASSERT_EQ(upper_bound_hits, 0);
1162
1163 iter->Next();
1164 ASSERT_FALSE(iter->Valid());
1165 ASSERT_EQ(upper_bound_hits, 1);
1166 }
1167 }
1168
1169 // Enable kBinarySearchWithFirstKey, do some iterator operations and check that
1170 // they don't do unnecessary block reads.
1171 TEST_P(DBIteratorTest, IndexWithFirstKey) {
1172 for (int tailing = 0; tailing < 2; ++tailing) {
1173 SCOPED_TRACE("tailing = " + std::to_string(tailing));
1174 Options options = CurrentOptions();
1175 options.env = env_;
1176 options.create_if_missing = true;
1177 options.prefix_extractor = nullptr;
1178 options.merge_operator = MergeOperators::CreateStringAppendOperator();
1179 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1180 Statistics* stats = options.statistics.get();
1181 BlockBasedTableOptions table_options;
1182 table_options.index_type =
1183 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1184 table_options.index_shortening =
1185 BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1186 table_options.flush_block_policy_factory =
1187 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1188 table_options.block_cache =
1189 NewLRUCache(8000); // fits all blocks and their cache metadata overhead
1190 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1191
1192 DestroyAndReopen(options);
1193 ASSERT_OK(Merge("a1", "x1"));
1194 ASSERT_OK(Merge("b1", "y1"));
1195 ASSERT_OK(Merge("c0", "z1"));
1196 ASSERT_OK(Flush());
1197 ASSERT_OK(Merge("a2", "x2"));
1198 ASSERT_OK(Merge("b2", "y2"));
1199 ASSERT_OK(Merge("c0", "z2"));
1200 ASSERT_OK(Flush());
1201 ASSERT_OK(Merge("a3", "x3"));
1202 ASSERT_OK(Merge("b3", "y3"));
1203 ASSERT_OK(Merge("c3", "z3"));
1204 ASSERT_OK(Flush());
1205
1206 // Block cache is not important for this test.
1207 // We use BLOCK_CACHE_DATA_* counters just because they're the most readily
1208 // available way of counting block accesses.
1209
1210 ReadOptions ropt;
1211 ropt.tailing = tailing;
1212 std::unique_ptr<Iterator> iter(NewIterator(ropt));
1213
1214 ropt.read_tier = ReadTier::kBlockCacheTier;
1215 std::unique_ptr<Iterator> nonblocking_iter(NewIterator(ropt));
1216
1217 iter->Seek("b10");
1218 ASSERT_TRUE(iter->Valid());
1219 EXPECT_EQ("b2", iter->key().ToString());
1220 EXPECT_EQ("y2", iter->value().ToString());
1221 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1222
1223 // The cache-only iterator should succeed too, using the blocks pulled into
1224 // the cache by the previous iterator.
1225 nonblocking_iter->Seek("b10");
1226 ASSERT_TRUE(nonblocking_iter->Valid());
1227 EXPECT_EQ("b2", nonblocking_iter->key().ToString());
1228 EXPECT_EQ("y2", nonblocking_iter->value().ToString());
1229 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1230
1231 // ... but it shouldn't be able to step forward since the next block is
1232 // not in cache yet.
1233 nonblocking_iter->Next();
1234 ASSERT_FALSE(nonblocking_iter->Valid());
1235 ASSERT_TRUE(nonblocking_iter->status().IsIncomplete());
1236
1237 // ... nor should a seek to the next key succeed.
1238 nonblocking_iter->Seek("b20");
1239 ASSERT_FALSE(nonblocking_iter->Valid());
1240 ASSERT_TRUE(nonblocking_iter->status().IsIncomplete());
1241
1242 iter->Next();
1243 ASSERT_TRUE(iter->Valid());
1244 EXPECT_EQ("b3", iter->key().ToString());
1245 EXPECT_EQ("y3", iter->value().ToString());
1246 EXPECT_EQ(4, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1247 EXPECT_EQ(1, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1248
1249 // After the blocking iterator loaded the next block, the nonblocking
1250 // iterator's seek should succeed.
1251 nonblocking_iter->Seek("b20");
1252 ASSERT_TRUE(nonblocking_iter->Valid());
1253 EXPECT_EQ("b3", nonblocking_iter->key().ToString());
1254 EXPECT_EQ("y3", nonblocking_iter->value().ToString());
1255 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1256
1257 iter->Seek("c0");
1258 ASSERT_TRUE(iter->Valid());
1259 EXPECT_EQ("c0", iter->key().ToString());
1260 EXPECT_EQ("z1,z2", iter->value().ToString());
1261 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1262 EXPECT_EQ(6, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1263
1264 iter->Next();
1265 ASSERT_TRUE(iter->Valid());
1266 EXPECT_EQ("c3", iter->key().ToString());
1267 EXPECT_EQ("z3", iter->value().ToString());
1268 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1269 EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1270
1271 iter.reset();
1272
1273 // Enable iterate_upper_bound and check that iterator is not trying to read
1274 // blocks that are fully above upper bound.
1275 std::string ub = "b3";
1276 Slice ub_slice(ub);
1277 ropt.iterate_upper_bound = &ub_slice;
1278 iter.reset(NewIterator(ropt));
1279
1280 iter->Seek("b2");
1281 ASSERT_TRUE(iter->Valid());
1282 EXPECT_EQ("b2", iter->key().ToString());
1283 EXPECT_EQ("y2", iter->value().ToString());
1284 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1285 EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1286
1287 iter->Next();
1288 ASSERT_FALSE(iter->Valid());
1289 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1290 EXPECT_EQ(7, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1291 }
1292 }
1293
1294 TEST_P(DBIteratorTest, IndexWithFirstKeyGet) {
1295 Options options = CurrentOptions();
1296 options.env = env_;
1297 options.create_if_missing = true;
1298 options.prefix_extractor = nullptr;
1299 options.merge_operator = MergeOperators::CreateStringAppendOperator();
1300 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
1301 Statistics* stats = options.statistics.get();
1302 BlockBasedTableOptions table_options;
1303 table_options.index_type =
1304 BlockBasedTableOptions::IndexType::kBinarySearchWithFirstKey;
1305 table_options.index_shortening =
1306 BlockBasedTableOptions::IndexShorteningMode::kNoShortening;
1307 table_options.flush_block_policy_factory =
1308 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1309 table_options.block_cache = NewLRUCache(1000); // fits all blocks
1310 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1311
1312 DestroyAndReopen(options);
1313 ASSERT_OK(Merge("a", "x1"));
1314 ASSERT_OK(Merge("c", "y1"));
1315 ASSERT_OK(Merge("e", "z1"));
1316 ASSERT_OK(Flush());
1317 ASSERT_OK(Merge("c", "y2"));
1318 ASSERT_OK(Merge("e", "z2"));
1319 ASSERT_OK(Flush());
1320
1321 // Get() between blocks shouldn't read any blocks.
1322 ASSERT_EQ("NOT_FOUND", Get("b"));
1323 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1324 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1325
1326 // Get() of an existing key shouldn't read any unnecessary blocks when there's
1327 // only one key per block.
1328
1329 ASSERT_EQ("y1,y2", Get("c"));
1330 EXPECT_EQ(2, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1331 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1332
1333 ASSERT_EQ("x1", Get("a"));
1334 EXPECT_EQ(3, stats->getTickerCount(BLOCK_CACHE_DATA_MISS));
1335 EXPECT_EQ(0, stats->getTickerCount(BLOCK_CACHE_DATA_HIT));
1336
1337 EXPECT_EQ(std::vector<std::string>({"NOT_FOUND", "z1,z2"}),
1338 MultiGet({"b", "e"}));
1339 }
1340
1341 // TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1342 // return the biggest key which is smaller than the seek key.
1343 TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) {
1344 Options options;
1345 options.create_if_missing = true;
1346 options.merge_operator = MergeOperators::CreatePutOperator();
1347 options.env = env_;
1348 DestroyAndReopen(options);
1349
1350 // write three entries with different keys using Merge()
1351 WriteOptions wopts;
1352 ASSERT_OK(db_->Merge(wopts, "1", "data1"));
1353 ASSERT_OK(db_->Merge(wopts, "2", "data2"));
1354 ASSERT_OK(db_->Merge(wopts, "3", "data3"));
1355
1356 std::unique_ptr<Iterator> it(NewIterator(ReadOptions()));
1357
1358 it->Seek("2");
1359 ASSERT_TRUE(it->Valid());
1360 ASSERT_EQ("2", it->key().ToString());
1361
1362 it->Prev();
1363 ASSERT_TRUE(it->Valid());
1364 ASSERT_EQ("1", it->key().ToString());
1365
1366 it->SeekForPrev("1");
1367 ASSERT_TRUE(it->Valid());
1368 ASSERT_EQ("1", it->key().ToString());
1369
1370 it->Next();
1371 ASSERT_TRUE(it->Valid());
1372 ASSERT_EQ("2", it->key().ToString());
1373 }
1374
1375 class DBIteratorTestForPinnedData : public DBIteratorTest {
1376 public:
1377 enum TestConfig {
1378 NORMAL,
1379 CLOSE_AND_OPEN,
1380 COMPACT_BEFORE_READ,
1381 FLUSH_EVERY_1000,
1382 MAX
1383 };
1384 DBIteratorTestForPinnedData() : DBIteratorTest() {}
1385 void PinnedDataIteratorRandomized(TestConfig run_config) {
1386 // Generate Random data
1387 Random rnd(301);
1388
1389 int puts = 100000;
1390 int key_pool = static_cast<int>(puts * 0.7);
1391 int key_size = 100;
1392 int val_size = 1000;
1393 int seeks_percentage = 20; // 20% of keys will be used to test seek()
1394 int delete_percentage = 20; // 20% of keys will be deleted
1395 int merge_percentage = 20; // 20% of keys will be added using Merge()
1396
1397 Options options = CurrentOptions();
1398 BlockBasedTableOptions table_options;
1399 table_options.use_delta_encoding = false;
1400 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1401 options.merge_operator = MergeOperators::CreatePutOperator();
1402 DestroyAndReopen(options);
1403
1404 std::vector<std::string> generated_keys(key_pool);
1405 for (int i = 0; i < key_pool; i++) {
1406 generated_keys[i] = rnd.RandomString(key_size);
1407 }
1408
1409 std::map<std::string, std::string> true_data;
1410 std::vector<std::string> random_keys;
1411 std::vector<std::string> deleted_keys;
1412 for (int i = 0; i < puts; i++) {
1413 auto& k = generated_keys[rnd.Next() % key_pool];
1414 auto v = rnd.RandomString(val_size);
1415
1416 // Insert data to true_data map and to DB
1417 true_data[k] = v;
1418 if (rnd.PercentTrue(merge_percentage)) {
1419 ASSERT_OK(db_->Merge(WriteOptions(), k, v));
1420 } else {
1421 ASSERT_OK(Put(k, v));
1422 }
1423
1424 // Pick random keys to be used to test Seek()
1425 if (rnd.PercentTrue(seeks_percentage)) {
1426 random_keys.push_back(k);
1427 }
1428
1429 // Delete some random keys
1430 if (rnd.PercentTrue(delete_percentage)) {
1431 deleted_keys.push_back(k);
1432 true_data.erase(k);
1433 ASSERT_OK(Delete(k));
1434 }
1435
1436 if (run_config == TestConfig::FLUSH_EVERY_1000) {
1437 if (i && i % 1000 == 0) {
1438 ASSERT_OK(Flush());
1439 }
1440 }
1441 }
1442
1443 if (run_config == TestConfig::CLOSE_AND_OPEN) {
1444 Close();
1445 Reopen(options);
1446 } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
1447 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1448 }
1449
1450 ReadOptions ro;
1451 ro.pin_data = true;
1452 auto iter = NewIterator(ro);
1453
1454 {
1455 // Test Seek to random keys
1456 std::vector<Slice> keys_slices;
1457 std::vector<std::string> true_keys;
1458 for (auto& k : random_keys) {
1459 iter->Seek(k);
1460 if (!iter->Valid()) {
1461 ASSERT_EQ(true_data.lower_bound(k), true_data.end());
1462 continue;
1463 }
1464 std::string prop_value;
1465 ASSERT_OK(
1466 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1467 ASSERT_EQ("1", prop_value);
1468 keys_slices.push_back(iter->key());
1469 true_keys.push_back(true_data.lower_bound(k)->first);
1470 }
1471
1472 for (size_t i = 0; i < keys_slices.size(); i++) {
1473 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1474 }
1475 }
1476
1477 {
1478 // Test SeekForPrev to random keys
1479 std::vector<Slice> keys_slices;
1480 std::vector<std::string> true_keys;
1481 for (auto& k : random_keys) {
1482 iter->SeekForPrev(k);
1483 if (!iter->Valid()) {
1484 ASSERT_EQ(true_data.upper_bound(k), true_data.begin());
1485 continue;
1486 }
1487 std::string prop_value;
1488 ASSERT_OK(
1489 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1490 ASSERT_EQ("1", prop_value);
1491 keys_slices.push_back(iter->key());
1492 true_keys.push_back((--true_data.upper_bound(k))->first);
1493 }
1494
1495 for (size_t i = 0; i < keys_slices.size(); i++) {
1496 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1497 }
1498 }
1499
1500 {
1501 // Test iterating all data forward
1502 std::vector<Slice> all_keys;
1503 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1504 std::string prop_value;
1505 ASSERT_OK(
1506 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1507 ASSERT_EQ("1", prop_value);
1508 all_keys.push_back(iter->key());
1509 }
1510 ASSERT_EQ(all_keys.size(), true_data.size());
1511
1512 // Verify that all keys slices are valid
1513 auto data_iter = true_data.begin();
1514 for (size_t i = 0; i < all_keys.size(); i++) {
1515 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1516 data_iter++;
1517 }
1518 }
1519
1520 {
1521 // Test iterating all data backward
1522 std::vector<Slice> all_keys;
1523 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1524 std::string prop_value;
1525 ASSERT_OK(
1526 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1527 ASSERT_EQ("1", prop_value);
1528 all_keys.push_back(iter->key());
1529 }
1530 ASSERT_EQ(all_keys.size(), true_data.size());
1531
1532 // Verify that all keys slices are valid (backward)
1533 auto data_iter = true_data.rbegin();
1534 for (size_t i = 0; i < all_keys.size(); i++) {
1535 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1536 data_iter++;
1537 }
1538 }
1539
1540 delete iter;
1541 }
1542 };
1543
1544 #if !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1545 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) {
1546 PinnedDataIteratorRandomized(TestConfig::NORMAL);
1547 }
1548 #endif // !defined(ROCKSDB_VALGRIND_RUN) || defined(ROCKSDB_FULL_VALGRIND_RUN)
1549
1550 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) {
1551 PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN);
1552 }
1553
1554 TEST_P(DBIteratorTestForPinnedData,
1555 PinnedDataIteratorRandomizedCompactBeforeRead) {
1556 PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ);
1557 }
1558
1559 TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) {
1560 PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000);
1561 }
1562
1563 INSTANTIATE_TEST_CASE_P(DBIteratorTestForPinnedDataInstance,
1564 DBIteratorTestForPinnedData,
1565 testing::Values(true, false));
1566
1567 #ifndef ROCKSDB_LITE
1568 TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
1569 Options options = CurrentOptions();
1570 BlockBasedTableOptions table_options;
1571 table_options.use_delta_encoding = false;
1572 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1573 options.disable_auto_compactions = true;
1574 options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb
1575 DestroyAndReopen(options);
1576
1577 std::map<std::string, std::string> true_data;
1578
1579 // Generate 4 sst files in L2
1580 Random rnd(301);
1581 for (int i = 1; i <= 1000; i++) {
1582 std::string k = Key(i * 3);
1583 std::string v = rnd.RandomString(100);
1584 ASSERT_OK(Put(k, v));
1585 true_data[k] = v;
1586 if (i % 250 == 0) {
1587 ASSERT_OK(Flush());
1588 }
1589 }
1590 ASSERT_EQ(FilesPerLevel(0), "4");
1591 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1592 ASSERT_EQ(FilesPerLevel(0), "0,4");
1593
1594 // Generate 4 sst files in L0
1595 for (int i = 1; i <= 1000; i++) {
1596 std::string k = Key(i * 2);
1597 std::string v = rnd.RandomString(100);
1598 ASSERT_OK(Put(k, v));
1599 true_data[k] = v;
1600 if (i % 250 == 0) {
1601 ASSERT_OK(Flush());
1602 }
1603 }
1604 ASSERT_EQ(FilesPerLevel(0), "4,4");
1605
1606 // Add some keys/values in memtables
1607 for (int i = 1; i <= 1000; i++) {
1608 std::string k = Key(i);
1609 std::string v = rnd.RandomString(100);
1610 ASSERT_OK(Put(k, v));
1611 true_data[k] = v;
1612 }
1613 ASSERT_EQ(FilesPerLevel(0), "4,4");
1614
1615 ReadOptions ro;
1616 ro.pin_data = true;
1617 auto iter = NewIterator(ro);
1618
1619 std::vector<std::pair<Slice, std::string>> results;
1620 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1621 std::string prop_value;
1622 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1623 ASSERT_EQ("1", prop_value);
1624 results.emplace_back(iter->key(), iter->value().ToString());
1625 }
1626
1627 ASSERT_EQ(results.size(), true_data.size());
1628 auto data_iter = true_data.begin();
1629 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1630 auto& kv = results[i];
1631 ASSERT_EQ(kv.first, data_iter->first);
1632 ASSERT_EQ(kv.second, data_iter->second);
1633 }
1634
1635 delete iter;
1636 }
1637 #endif
1638
1639 TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) {
1640 Options options = CurrentOptions();
1641 BlockBasedTableOptions table_options;
1642 table_options.use_delta_encoding = false;
1643 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1644 options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1645 DestroyAndReopen(options);
1646
1647 std::string numbers[7];
1648 for (int val = 0; val <= 6; val++) {
1649 PutFixed64(numbers + val, val);
1650 }
1651
1652 // +1 all keys in range [ 0 => 999]
1653 for (int i = 0; i < 1000; i++) {
1654 WriteOptions wo;
1655 ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
1656 }
1657
1658 // +2 all keys divisible by 2 in range [ 0 => 999]
1659 for (int i = 0; i < 1000; i += 2) {
1660 WriteOptions wo;
1661 ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
1662 }
1663
1664 // +3 all keys divisible by 5 in range [ 0 => 999]
1665 for (int i = 0; i < 1000; i += 5) {
1666 WriteOptions wo;
1667 ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
1668 }
1669
1670 ReadOptions ro;
1671 ro.pin_data = true;
1672 auto iter = NewIterator(ro);
1673
1674 std::vector<std::pair<Slice, std::string>> results;
1675 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1676 std::string prop_value;
1677 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1678 ASSERT_EQ("1", prop_value);
1679 results.emplace_back(iter->key(), iter->value().ToString());
1680 }
1681
1682 ASSERT_EQ(results.size(), 1000);
1683 for (size_t i = 0; i < results.size(); i++) {
1684 auto& kv = results[i];
1685 ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
1686 int expected_val = 1;
1687 if (i % 2 == 0) {
1688 expected_val += 2;
1689 }
1690 if (i % 5 == 0) {
1691 expected_val += 3;
1692 }
1693 ASSERT_EQ(kv.second, numbers[expected_val]);
1694 }
1695
1696 delete iter;
1697 }
1698
1699 TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
1700 Options options = CurrentOptions();
1701 BlockBasedTableOptions table_options;
1702 table_options.use_delta_encoding = false;
1703 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1704 options.write_buffer_size = 100000;
1705 DestroyAndReopen(options);
1706
1707 Random rnd(301);
1708
1709 std::map<std::string, std::string> true_data;
1710 for (int i = 0; i < 1000; i++) {
1711 std::string k = rnd.RandomString(10);
1712 std::string v = rnd.RandomString(1000);
1713 ASSERT_OK(Put(k, v));
1714 true_data[k] = v;
1715 }
1716
1717 ReadOptions ro;
1718 ro.pin_data = true;
1719 auto iter = NewIterator(ro);
1720
1721 // Delete 50% of the keys and update the other 50%
1722 for (auto& kv : true_data) {
1723 if (rnd.OneIn(2)) {
1724 ASSERT_OK(Delete(kv.first));
1725 } else {
1726 std::string new_val = rnd.RandomString(1000);
1727 ASSERT_OK(Put(kv.first, new_val));
1728 }
1729 }
1730
1731 std::vector<std::pair<Slice, std::string>> results;
1732 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1733 std::string prop_value;
1734 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1735 ASSERT_EQ("1", prop_value);
1736 results.emplace_back(iter->key(), iter->value().ToString());
1737 }
1738
1739 auto data_iter = true_data.begin();
1740 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1741 auto& kv = results[i];
1742 ASSERT_EQ(kv.first, data_iter->first);
1743 ASSERT_EQ(kv.second, data_iter->second);
1744 }
1745
1746 delete iter;
1747 }
1748
1749 class SliceTransformLimitedDomainGeneric : public SliceTransform {
1750 const char* Name() const override {
1751 return "SliceTransformLimitedDomainGeneric";
1752 }
1753
1754 Slice Transform(const Slice& src) const override {
1755 return Slice(src.data(), 1);
1756 }
1757
1758 bool InDomain(const Slice& src) const override {
1759 // prefix will be x????
1760 return src.size() >= 1;
1761 }
1762
1763 bool InRange(const Slice& dst) const override {
1764 // prefix will be x????
1765 return dst.size() == 1;
1766 }
1767 };
1768
1769 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) {
1770 Options options = CurrentOptions();
1771 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1772 options.disable_auto_compactions = true;
1773 // Enable prefix bloom for SST files
1774 BlockBasedTableOptions table_options;
1775 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1776 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1777 DestroyAndReopen(options);
1778
1779 ASSERT_OK(Put("a1", "va1"));
1780 ASSERT_OK(Put("a2", "va2"));
1781 ASSERT_OK(Put("a3", "va3"));
1782 ASSERT_OK(Flush());
1783
1784 ASSERT_OK(Put("b1", "vb1"));
1785 ASSERT_OK(Put("b2", "vb2"));
1786 ASSERT_OK(Put("b3", "vb3"));
1787 ASSERT_OK(Flush());
1788
1789 ASSERT_OK(Put("b4", "vb4"));
1790 ASSERT_OK(Put("d1", "vd1"));
1791 ASSERT_OK(Put("d2", "vd2"));
1792 ASSERT_OK(Put("d4", "vd4"));
1793 ASSERT_OK(Flush());
1794
1795 MoveFilesToLevel(1);
1796 {
1797 ReadOptions ro;
1798 Iterator* iter = NewIterator(ro);
1799
1800 iter->SeekForPrev("a4");
1801 ASSERT_EQ(iter->key().ToString(), "a3");
1802 ASSERT_EQ(iter->value().ToString(), "va3");
1803
1804 iter->SeekForPrev("c2");
1805 ASSERT_EQ(iter->key().ToString(), "b3");
1806 iter->SeekForPrev("d3");
1807 ASSERT_EQ(iter->key().ToString(), "d2");
1808 iter->SeekForPrev("b5");
1809 ASSERT_EQ(iter->key().ToString(), "b4");
1810 delete iter;
1811 }
1812
1813 {
1814 ReadOptions ro;
1815 ro.prefix_same_as_start = true;
1816 Iterator* iter = NewIterator(ro);
1817 iter->SeekForPrev("c2");
1818 ASSERT_TRUE(!iter->Valid());
1819 ASSERT_OK(iter->status());
1820 delete iter;
1821 }
1822 }
1823
1824 TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) {
1825 Options options = CurrentOptions();
1826 options.prefix_extractor =
1827 std::make_shared<SliceTransformLimitedDomainGeneric>();
1828 options.disable_auto_compactions = true;
1829 // Enable prefix bloom for SST files
1830 BlockBasedTableOptions table_options;
1831 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1832 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1833 DestroyAndReopen(options);
1834
1835 ASSERT_OK(Put("a1", "va1"));
1836 ASSERT_OK(Put("a2", "va2"));
1837 ASSERT_OK(Put("a3", "va3"));
1838 ASSERT_OK(Flush());
1839
1840 ASSERT_OK(Put("b1", "vb1"));
1841 ASSERT_OK(Put("b2", "vb2"));
1842 ASSERT_OK(Put("b3", "vb3"));
1843 ASSERT_OK(Flush());
1844
1845 ASSERT_OK(Put("b4", "vb4"));
1846 ASSERT_OK(Put("d1", "vd1"));
1847 ASSERT_OK(Put("d2", "vd2"));
1848 ASSERT_OK(Put("d4", "vd4"));
1849 ASSERT_OK(Flush());
1850
1851 MoveFilesToLevel(1);
1852 {
1853 ReadOptions ro;
1854 Iterator* iter = NewIterator(ro);
1855
1856 iter->SeekForPrev("a4");
1857 ASSERT_EQ(iter->key().ToString(), "a3");
1858 ASSERT_EQ(iter->value().ToString(), "va3");
1859
1860 iter->SeekForPrev("c2");
1861 ASSERT_EQ(iter->key().ToString(), "b3");
1862 iter->SeekForPrev("d3");
1863 ASSERT_EQ(iter->key().ToString(), "d2");
1864 iter->SeekForPrev("b5");
1865 ASSERT_EQ(iter->key().ToString(), "b4");
1866 delete iter;
1867 }
1868
1869 {
1870 ReadOptions ro;
1871 ro.prefix_same_as_start = true;
1872 Iterator* iter = NewIterator(ro);
1873 iter->SeekForPrev("c2");
1874 ASSERT_TRUE(!iter->Valid());
1875 ASSERT_OK(iter->status());
1876 delete iter;
1877 }
1878 }
1879
1880 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) {
1881 Options options = CurrentOptions();
1882 BlockBasedTableOptions table_options;
1883 table_options.block_size = 1; // every block will contain one entry
1884 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1885 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1886 options.disable_auto_compactions = true;
1887 options.max_sequential_skip_in_iterations = 8;
1888
1889 DestroyAndReopen(options);
1890
1891 // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1892 for (int file_num = 0; file_num < 10; file_num++) {
1893 ASSERT_OK(Delete("key4"));
1894 ASSERT_OK(Flush());
1895 }
1896
1897 // First File containing 5 blocks of puts
1898 ASSERT_OK(Put("key1", "val1.0"));
1899 ASSERT_OK(Put("key2", "val2.0"));
1900 ASSERT_OK(Put("key3", "val3.0"));
1901 ASSERT_OK(Put("key4", "val4.0"));
1902 ASSERT_OK(Put("key5", "val5.0"));
1903 ASSERT_OK(Flush());
1904
1905 // Second file containing 9 blocks of merge operands
1906 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
1907 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
1908
1909 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
1910 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
1911 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
1912
1913 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
1914 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
1915 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
1916 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
1917 ASSERT_OK(Flush());
1918
1919 {
1920 ReadOptions ro;
1921 ro.fill_cache = false;
1922 Iterator* iter = NewIterator(ro);
1923
1924 iter->SeekToLast();
1925 ASSERT_EQ(iter->key().ToString(), "key5");
1926 ASSERT_EQ(iter->value().ToString(), "val5.0");
1927
1928 iter->Prev();
1929 ASSERT_EQ(iter->key().ToString(), "key4");
1930 ASSERT_EQ(iter->value().ToString(), "val4.0");
1931
1932 iter->Prev();
1933 ASSERT_EQ(iter->key().ToString(), "key3");
1934 ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1935
1936 iter->Prev();
1937 ASSERT_EQ(iter->key().ToString(), "key2");
1938 ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1939
1940 iter->Prev();
1941 ASSERT_EQ(iter->key().ToString(), "key1");
1942 ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
1943
1944 delete iter;
1945 }
1946 }
1947
1948 TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
1949 Options options = CurrentOptions();
1950 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1951 options.disable_auto_compactions = true;
1952 options.level0_slowdown_writes_trigger = (1 << 30);
1953 options.level0_stop_writes_trigger = (1 << 30);
1954 options.max_sequential_skip_in_iterations = 8;
1955 DestroyAndReopen(options);
1956
1957 const int kNumKeys = 500;
1958 // Small number of merge operands to make sure that DBIter::Prev() don't
1959 // fall back to Seek()
1960 const int kNumMergeOperands = 3;
1961 // Use value size that will make sure that every block contain 1 key
1962 const int kValSize =
1963 static_cast<int>(BlockBasedTableOptions().block_size) * 4;
1964 // Percentage of keys that wont get merge operations
1965 const int kNoMergeOpPercentage = 20;
1966 // Percentage of keys that will be deleted
1967 const int kDeletePercentage = 10;
1968
1969 // For half of the key range we will write multiple deletes first to
1970 // force DBIter::Prev() to fall back to Seek()
1971 for (int file_num = 0; file_num < 10; file_num++) {
1972 for (int i = 0; i < kNumKeys; i += 2) {
1973 ASSERT_OK(Delete(Key(i)));
1974 }
1975 ASSERT_OK(Flush());
1976 }
1977
1978 Random rnd(301);
1979 std::map<std::string, std::string> true_data;
1980 std::string gen_key;
1981 std::string gen_val;
1982
1983 for (int i = 0; i < kNumKeys; i++) {
1984 gen_key = Key(i);
1985 gen_val = rnd.RandomString(kValSize);
1986
1987 ASSERT_OK(Put(gen_key, gen_val));
1988 true_data[gen_key] = gen_val;
1989 }
1990 ASSERT_OK(Flush());
1991
1992 // Separate values and merge operands in different file so that we
1993 // make sure that we don't merge them while flushing but actually
1994 // merge them in the read path
1995 for (int i = 0; i < kNumKeys; i++) {
1996 if (rnd.PercentTrue(kNoMergeOpPercentage)) {
1997 // Dont give merge operations for some keys
1998 continue;
1999 }
2000
2001 for (int j = 0; j < kNumMergeOperands; j++) {
2002 gen_key = Key(i);
2003 gen_val = rnd.RandomString(kValSize);
2004
2005 ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
2006 true_data[gen_key] += "," + gen_val;
2007 }
2008 }
2009 ASSERT_OK(Flush());
2010
2011 for (int i = 0; i < kNumKeys; i++) {
2012 if (rnd.PercentTrue(kDeletePercentage)) {
2013 gen_key = Key(i);
2014
2015 ASSERT_OK(Delete(gen_key));
2016 true_data.erase(gen_key);
2017 }
2018 }
2019 ASSERT_OK(Flush());
2020
2021 {
2022 ReadOptions ro;
2023 ro.fill_cache = false;
2024 Iterator* iter = NewIterator(ro);
2025 auto data_iter = true_data.rbegin();
2026
2027 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2028 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2029 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2030 data_iter++;
2031 }
2032 ASSERT_EQ(data_iter, true_data.rend());
2033
2034 delete iter;
2035 }
2036
2037 {
2038 ReadOptions ro;
2039 ro.fill_cache = false;
2040 Iterator* iter = NewIterator(ro);
2041 auto data_iter = true_data.rbegin();
2042
2043 int entries_right = 0;
2044 std::string seek_key;
2045 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2046 // Verify key/value of current position
2047 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2048 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2049
2050 bool restore_position_with_seek = rnd.Uniform(2);
2051 if (restore_position_with_seek) {
2052 seek_key = iter->key().ToString();
2053 }
2054
2055 // Do some Next() operations the restore the iterator to orignal position
2056 int next_count =
2057 entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
2058 for (int i = 0; i < next_count; i++) {
2059 iter->Next();
2060 data_iter--;
2061
2062 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2063 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2064 }
2065
2066 if (restore_position_with_seek) {
2067 // Restore orignal position using Seek()
2068 iter->Seek(seek_key);
2069 for (int i = 0; i < next_count; i++) {
2070 data_iter++;
2071 }
2072
2073 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2074 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2075 } else {
2076 // Restore original position using Prev()
2077 for (int i = 0; i < next_count; i++) {
2078 iter->Prev();
2079 data_iter++;
2080
2081 ASSERT_EQ(iter->key().ToString(), data_iter->first);
2082 ASSERT_EQ(iter->value().ToString(), data_iter->second);
2083 }
2084 }
2085
2086 entries_right++;
2087 data_iter++;
2088 }
2089 ASSERT_EQ(data_iter, true_data.rend());
2090
2091 delete iter;
2092 }
2093 }
2094
2095 TEST_P(DBIteratorTest, IteratorWithLocalStatistics) {
2096 Options options = CurrentOptions();
2097 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2098 DestroyAndReopen(options);
2099
2100 Random rnd(301);
2101 for (int i = 0; i < 1000; i++) {
2102 // Key 10 bytes / Value 10 bytes
2103 ASSERT_OK(Put(rnd.RandomString(10), rnd.RandomString(10)));
2104 }
2105
2106 std::atomic<uint64_t> total_next(0);
2107 std::atomic<uint64_t> total_next_found(0);
2108 std::atomic<uint64_t> total_prev(0);
2109 std::atomic<uint64_t> total_prev_found(0);
2110 std::atomic<uint64_t> total_bytes(0);
2111
2112 std::vector<port::Thread> threads;
2113 std::function<void()> reader_func_next = [&]() {
2114 SetPerfLevel(kEnableCount);
2115 get_perf_context()->Reset();
2116 Iterator* iter = NewIterator(ReadOptions());
2117
2118 iter->SeekToFirst();
2119 // Seek will bump ITER_BYTES_READ
2120 uint64_t bytes = 0;
2121 bytes += iter->key().size();
2122 bytes += iter->value().size();
2123 while (true) {
2124 iter->Next();
2125 total_next++;
2126
2127 if (!iter->Valid()) {
2128 break;
2129 }
2130 total_next_found++;
2131 bytes += iter->key().size();
2132 bytes += iter->value().size();
2133 }
2134
2135 delete iter;
2136 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2137 SetPerfLevel(kDisable);
2138 total_bytes += bytes;
2139 };
2140
2141 std::function<void()> reader_func_prev = [&]() {
2142 SetPerfLevel(kEnableCount);
2143 Iterator* iter = NewIterator(ReadOptions());
2144
2145 iter->SeekToLast();
2146 // Seek will bump ITER_BYTES_READ
2147 uint64_t bytes = 0;
2148 bytes += iter->key().size();
2149 bytes += iter->value().size();
2150 while (true) {
2151 iter->Prev();
2152 total_prev++;
2153
2154 if (!iter->Valid()) {
2155 break;
2156 }
2157 total_prev_found++;
2158 bytes += iter->key().size();
2159 bytes += iter->value().size();
2160 }
2161
2162 delete iter;
2163 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
2164 SetPerfLevel(kDisable);
2165 total_bytes += bytes;
2166 };
2167
2168 for (int i = 0; i < 10; i++) {
2169 threads.emplace_back(reader_func_next);
2170 }
2171 for (int i = 0; i < 15; i++) {
2172 threads.emplace_back(reader_func_prev);
2173 }
2174
2175 for (auto& t : threads) {
2176 t.join();
2177 }
2178
2179 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next);
2180 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
2181 (uint64_t)total_next_found);
2182 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev);
2183 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
2184 (uint64_t)total_prev_found);
2185 ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ),
2186 (uint64_t)total_bytes);
2187 }
2188
2189 TEST_P(DBIteratorTest, ReadAhead) {
2190 Options options;
2191 env_->count_random_reads_ = true;
2192 options.env = env_;
2193 options.disable_auto_compactions = true;
2194 options.write_buffer_size = 4 << 20;
2195 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2196 BlockBasedTableOptions table_options;
2197 table_options.block_size = 1024;
2198 table_options.no_block_cache = true;
2199 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2200 Reopen(options);
2201
2202 std::string value(1024, 'a');
2203 for (int i = 0; i < 100; i++) {
2204 ASSERT_OK(Put(Key(i), value));
2205 }
2206 ASSERT_OK(Flush());
2207 MoveFilesToLevel(2);
2208
2209 for (int i = 0; i < 100; i++) {
2210 ASSERT_OK(Put(Key(i), value));
2211 }
2212 ASSERT_OK(Flush());
2213 MoveFilesToLevel(1);
2214
2215 for (int i = 0; i < 100; i++) {
2216 ASSERT_OK(Put(Key(i), value));
2217 }
2218 ASSERT_OK(Flush());
2219 #ifndef ROCKSDB_LITE
2220 ASSERT_EQ("1,1,1", FilesPerLevel());
2221 #endif // !ROCKSDB_LITE
2222
2223 env_->random_read_bytes_counter_ = 0;
2224 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2225 ReadOptions read_options;
2226 auto* iter = NewIterator(read_options);
2227 iter->SeekToFirst();
2228 int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
2229 size_t bytes_read = env_->random_read_bytes_counter_;
2230 delete iter;
2231
2232 int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES);
2233 env_->random_read_bytes_counter_ = 0;
2234 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
2235 read_options.readahead_size = 1024 * 10;
2236 iter = NewIterator(read_options);
2237 iter->SeekToFirst();
2238 int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
2239 size_t bytes_read_readahead = env_->random_read_bytes_counter_;
2240 delete iter;
2241 int64_t num_file_closes_readahead =
2242 TestGetTickerCount(options, NO_FILE_CLOSES);
2243 ASSERT_EQ(num_file_opens, num_file_opens_readahead);
2244 ASSERT_EQ(num_file_closes, num_file_closes_readahead);
2245 ASSERT_GT(bytes_read_readahead, bytes_read);
2246 ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
2247
2248 // Verify correctness.
2249 iter = NewIterator(read_options);
2250 int count = 0;
2251 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2252 ASSERT_EQ(value, iter->value());
2253 count++;
2254 }
2255 ASSERT_EQ(100, count);
2256 for (int i = 0; i < 100; i++) {
2257 iter->Seek(Key(i));
2258 ASSERT_EQ(value, iter->value());
2259 }
2260 delete iter;
2261 }
2262
2263 // Insert a key, create a snapshot iterator, overwrite key lots of times,
2264 // seek to a smaller key. Expect DBIter to fall back to a seek instead of
2265 // going through all the overwrites linearly.
2266 TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) {
2267 Options options = CurrentOptions();
2268 options.env = env_;
2269 options.create_if_missing = true;
2270 options.max_sequential_skip_in_iterations = 3;
2271 options.prefix_extractor = nullptr;
2272 options.write_buffer_size = 1 << 27; // big enough to avoid flush
2273 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2274 DestroyAndReopen(options);
2275
2276 // Insert.
2277 ASSERT_OK(Put("b", "0"));
2278
2279 // Create iterator.
2280 ReadOptions ro;
2281 std::unique_ptr<Iterator> iter(NewIterator(ro));
2282
2283 // Insert a lot.
2284 for (int i = 0; i < 100; ++i) {
2285 ASSERT_OK(Put("b", std::to_string(i + 1).c_str()));
2286 }
2287
2288 #ifndef ROCKSDB_LITE
2289 // Check that memtable wasn't flushed.
2290 std::string val;
2291 ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val));
2292 EXPECT_EQ("0", val);
2293 #endif
2294
2295 // Seek iterator to a smaller key.
2296 get_perf_context()->Reset();
2297 iter->Seek("a");
2298 ASSERT_TRUE(iter->Valid());
2299 EXPECT_EQ("b", iter->key().ToString());
2300 EXPECT_EQ("0", iter->value().ToString());
2301
2302 // Check that the seek didn't do too much work.
2303 // Checks are not tight, just make sure that everything is well below 100.
2304 EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4);
2305 EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8);
2306 EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10);
2307 EXPECT_LT(get_perf_context()->next_on_memtable_count, 10);
2308 EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10);
2309
2310 // Check that iterator did something like what we expect.
2311 EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0);
2312 EXPECT_EQ(get_perf_context()->internal_merge_count, 0);
2313 EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2);
2314 EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2);
2315 EXPECT_EQ(1,
2316 options.statistics->getTickerCount(NUMBER_OF_RESEEKS_IN_ITERATION));
2317 }
2318
2319 TEST_P(DBIteratorTest, Refresh) {
2320 ASSERT_OK(Put("x", "y"));
2321
2322 std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2323 ASSERT_OK(iter->status());
2324 iter->Seek(Slice("a"));
2325 ASSERT_TRUE(iter->Valid());
2326 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2327 iter->Next();
2328 ASSERT_FALSE(iter->Valid());
2329
2330 ASSERT_OK(Put("c", "d"));
2331
2332 iter->Seek(Slice("a"));
2333 ASSERT_TRUE(iter->Valid());
2334 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2335 iter->Next();
2336 ASSERT_FALSE(iter->Valid());
2337
2338 ASSERT_OK(iter->status());
2339 ASSERT_OK(iter->Refresh());
2340
2341 iter->Seek(Slice("a"));
2342 ASSERT_TRUE(iter->Valid());
2343 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2344 iter->Next();
2345 ASSERT_TRUE(iter->Valid());
2346 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2347 iter->Next();
2348 ASSERT_FALSE(iter->Valid());
2349
2350 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2351
2352 ASSERT_OK(Put("m", "n"));
2353
2354 iter->Seek(Slice("a"));
2355 ASSERT_TRUE(iter->Valid());
2356 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2357 iter->Next();
2358 ASSERT_TRUE(iter->Valid());
2359 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2360 iter->Next();
2361 ASSERT_FALSE(iter->Valid());
2362
2363 ASSERT_OK(iter->status());
2364 ASSERT_OK(iter->Refresh());
2365
2366 iter->Seek(Slice("a"));
2367 ASSERT_TRUE(iter->Valid());
2368 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2369 iter->Next();
2370 ASSERT_TRUE(iter->Valid());
2371 ASSERT_EQ(iter->key().compare(Slice("m")), 0);
2372 iter->Next();
2373 ASSERT_TRUE(iter->Valid());
2374 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2375 iter->Next();
2376 ASSERT_FALSE(iter->Valid());
2377
2378 iter.reset();
2379 }
2380
2381 TEST_P(DBIteratorTest, RefreshWithSnapshot) {
2382 ASSERT_OK(Put("x", "y"));
2383 const Snapshot* snapshot = db_->GetSnapshot();
2384 ReadOptions options;
2385 options.snapshot = snapshot;
2386 Iterator* iter = NewIterator(options);
2387 ASSERT_OK(iter->status());
2388
2389 iter->Seek(Slice("a"));
2390 ASSERT_TRUE(iter->Valid());
2391 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2392 iter->Next();
2393 ASSERT_FALSE(iter->Valid());
2394
2395 ASSERT_OK(Put("c", "d"));
2396
2397 iter->Seek(Slice("a"));
2398 ASSERT_TRUE(iter->Valid());
2399 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2400 iter->Next();
2401 ASSERT_FALSE(iter->Valid());
2402
2403 ASSERT_OK(iter->status());
2404 Status s = iter->Refresh();
2405 ASSERT_TRUE(s.IsNotSupported());
2406 db_->ReleaseSnapshot(snapshot);
2407 delete iter;
2408 }
2409
2410 TEST_P(DBIteratorTest, CreationFailure) {
2411 SyncPoint::GetInstance()->SetCallBack(
2412 "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) {
2413 *(reinterpret_cast<Status*>(arg)) = Status::Corruption("test status");
2414 });
2415 SyncPoint::GetInstance()->EnableProcessing();
2416
2417 Iterator* iter = NewIterator(ReadOptions());
2418 ASSERT_FALSE(iter->Valid());
2419 ASSERT_TRUE(iter->status().IsCorruption());
2420 delete iter;
2421 }
2422
2423 TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
2424 Options options = CurrentOptions();
2425 options.max_sequential_skip_in_iterations = 3;
2426 DestroyAndReopen(options);
2427
2428 // write a bunch of kvs to the database.
2429 ASSERT_OK(Put("a", "1"));
2430 ASSERT_OK(Put("y", "1"));
2431 ASSERT_OK(Put("y1", "1"));
2432 ASSERT_OK(Put("y2", "1"));
2433 ASSERT_OK(Put("y3", "1"));
2434 ASSERT_OK(Put("z", "1"));
2435 ASSERT_OK(Flush());
2436 ASSERT_OK(Put("a", "1"));
2437 ASSERT_OK(Put("z", "1"));
2438 ASSERT_OK(Put("bar", "1"));
2439 ASSERT_OK(Put("foo", "1"));
2440
2441 std::string upper_bound = "x";
2442 Slice ub_slice(upper_bound);
2443 ReadOptions ro;
2444 ro.iterate_upper_bound = &ub_slice;
2445 ro.max_skippable_internal_keys = 1000;
2446
2447 Iterator* iter = NewIterator(ro);
2448 iter->Seek("foo");
2449 ASSERT_TRUE(iter->Valid());
2450 ASSERT_EQ("foo", iter->key().ToString());
2451
2452 iter->Prev();
2453 ASSERT_TRUE(iter->Valid());
2454 ASSERT_OK(iter->status());
2455 ASSERT_EQ("bar", iter->key().ToString());
2456
2457 delete iter;
2458 }
2459
2460 TEST_P(DBIteratorTest, TableFilter) {
2461 ASSERT_OK(Put("a", "1"));
2462 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2463 ASSERT_OK(Put("b", "2"));
2464 ASSERT_OK(Put("c", "3"));
2465 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2466 ASSERT_OK(Put("d", "4"));
2467 ASSERT_OK(Put("e", "5"));
2468 ASSERT_OK(Put("f", "6"));
2469 EXPECT_OK(dbfull()->Flush(FlushOptions()));
2470
2471 // Ensure the table_filter callback is called once for each table.
2472 {
2473 std::set<uint64_t> unseen{1, 2, 3};
2474 ReadOptions opts;
2475 opts.table_filter = [&](const TableProperties& props) {
2476 auto it = unseen.find(props.num_entries);
2477 if (it == unseen.end()) {
2478 ADD_FAILURE() << "saw table properties with an unexpected "
2479 << props.num_entries << " entries";
2480 } else {
2481 unseen.erase(it);
2482 }
2483 return true;
2484 };
2485 auto iter = NewIterator(opts);
2486 iter->SeekToFirst();
2487 ASSERT_EQ(IterStatus(iter), "a->1");
2488 iter->Next();
2489 ASSERT_EQ(IterStatus(iter), "b->2");
2490 iter->Next();
2491 ASSERT_EQ(IterStatus(iter), "c->3");
2492 iter->Next();
2493 ASSERT_EQ(IterStatus(iter), "d->4");
2494 iter->Next();
2495 ASSERT_EQ(IterStatus(iter), "e->5");
2496 iter->Next();
2497 ASSERT_EQ(IterStatus(iter), "f->6");
2498 iter->Next();
2499 ASSERT_FALSE(iter->Valid());
2500 ASSERT_TRUE(unseen.empty());
2501 delete iter;
2502 }
2503
2504 // Ensure returning false in the table_filter hides the keys from that table
2505 // during iteration.
2506 {
2507 ReadOptions opts;
2508 opts.table_filter = [](const TableProperties& props) {
2509 return props.num_entries != 2;
2510 };
2511 auto iter = NewIterator(opts);
2512 iter->SeekToFirst();
2513 ASSERT_EQ(IterStatus(iter), "a->1");
2514 iter->Next();
2515 ASSERT_EQ(IterStatus(iter), "d->4");
2516 iter->Next();
2517 ASSERT_EQ(IterStatus(iter), "e->5");
2518 iter->Next();
2519 ASSERT_EQ(IterStatus(iter), "f->6");
2520 iter->Next();
2521 ASSERT_FALSE(iter->Valid());
2522 delete iter;
2523 }
2524 }
2525
2526 TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
2527 Options options = CurrentOptions();
2528 options.max_sequential_skip_in_iterations = 3;
2529 DestroyAndReopen(options);
2530
2531 // write a bunch of kvs to the database.
2532 ASSERT_OK(Put("a", "1"));
2533 ASSERT_OK(Put("y", "1"));
2534 ASSERT_OK(Put("z", "1"));
2535 ASSERT_OK(Flush());
2536 ASSERT_OK(Put("a", "1"));
2537 ASSERT_OK(Put("z", "1"));
2538 ASSERT_OK(Put("bar", "1"));
2539 ASSERT_OK(Put("foo", "1"));
2540 ASSERT_OK(Put("foo", "2"));
2541
2542 ASSERT_OK(Put("foo", "3"));
2543 ASSERT_OK(Put("foo", "4"));
2544 ASSERT_OK(Put("foo", "5"));
2545 const Snapshot* snapshot = db_->GetSnapshot();
2546 ASSERT_OK(Put("foo", "6"));
2547
2548 std::string upper_bound = "x";
2549 Slice ub_slice(upper_bound);
2550 ReadOptions ro;
2551 ro.snapshot = snapshot;
2552 ro.iterate_upper_bound = &ub_slice;
2553
2554 Iterator* iter = NewIterator(ro);
2555 iter->SeekForPrev("goo");
2556 ASSERT_TRUE(iter->Valid());
2557 ASSERT_EQ("foo", iter->key().ToString());
2558 iter->Prev();
2559
2560 ASSERT_TRUE(iter->Valid());
2561 ASSERT_EQ("bar", iter->key().ToString());
2562
2563 delete iter;
2564 db_->ReleaseSnapshot(snapshot);
2565 }
2566
2567 TEST_P(DBIteratorTest, SkipStatistics) {
2568 Options options = CurrentOptions();
2569 options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
2570 DestroyAndReopen(options);
2571
2572 int skip_count = 0;
2573
2574 // write a bunch of kvs to the database.
2575 ASSERT_OK(Put("a", "1"));
2576 ASSERT_OK(Put("b", "1"));
2577 ASSERT_OK(Put("c", "1"));
2578 ASSERT_OK(Flush());
2579 ASSERT_OK(Put("d", "1"));
2580 ASSERT_OK(Put("e", "1"));
2581 ASSERT_OK(Put("f", "1"));
2582 ASSERT_OK(Put("a", "2"));
2583 ASSERT_OK(Put("b", "2"));
2584 ASSERT_OK(Flush());
2585 ASSERT_OK(Delete("d"));
2586 ASSERT_OK(Delete("e"));
2587 ASSERT_OK(Delete("f"));
2588
2589 Iterator* iter = NewIterator(ReadOptions());
2590 int count = 0;
2591 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2592 ASSERT_OK(iter->status());
2593 count++;
2594 }
2595 ASSERT_EQ(count, 3);
2596 delete iter;
2597 skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
2598 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2599
2600 iter = NewIterator(ReadOptions());
2601 count = 0;
2602 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2603 ASSERT_OK(iter->status());
2604 count++;
2605 }
2606 ASSERT_EQ(count, 3);
2607 delete iter;
2608 skip_count += 8; // Same as above, but in reverse order
2609 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2610
2611 ASSERT_OK(Put("aa", "1"));
2612 ASSERT_OK(Put("ab", "1"));
2613 ASSERT_OK(Put("ac", "1"));
2614 ASSERT_OK(Put("ad", "1"));
2615 ASSERT_OK(Flush());
2616 ASSERT_OK(Delete("ab"));
2617 ASSERT_OK(Delete("ac"));
2618 ASSERT_OK(Delete("ad"));
2619
2620 ReadOptions ro;
2621 Slice prefix("b");
2622 ro.iterate_upper_bound = &prefix;
2623
2624 iter = NewIterator(ro);
2625 count = 0;
2626 for (iter->Seek("aa"); iter->Valid(); iter->Next()) {
2627 ASSERT_OK(iter->status());
2628 count++;
2629 }
2630 ASSERT_EQ(count, 1);
2631 delete iter;
2632 skip_count += 6; // 3 deletes + 3 original keys
2633 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2634
2635 iter = NewIterator(ro);
2636 count = 0;
2637 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2638 ASSERT_OK(iter->status());
2639 count++;
2640 }
2641 ASSERT_EQ(count, 2);
2642 delete iter;
2643 // 3 deletes + 3 original keys + lower sequence of "a"
2644 skip_count += 7;
2645 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2646 }
2647
2648 TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) {
2649 Options options = CurrentOptions();
2650 DestroyAndReopen(options);
2651 ReadOptions ropts;
2652 ropts.max_skippable_internal_keys = 2;
2653
2654 ASSERT_OK(Put("1", "val_1"));
2655 // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2656 ASSERT_OK(Delete("2"));
2657 ASSERT_OK(Delete("3"));
2658 ASSERT_OK(Delete("4"));
2659 ASSERT_OK(Delete("5"));
2660 ASSERT_OK(Put("6", "val_6"));
2661
2662 std::unique_ptr<Iterator> iter(NewIterator(ropts));
2663 iter->SeekToFirst();
2664
2665 ASSERT_TRUE(iter->Valid());
2666 ASSERT_EQ(iter->key().ToString(), "1");
2667 ASSERT_EQ(iter->value().ToString(), "val_1");
2668
2669 // This should fail as incomplete due to too many non-visible internal keys on
2670 // the way to the next valid user key.
2671 iter->Next();
2672 ASSERT_TRUE(!iter->Valid());
2673 ASSERT_TRUE(iter->status().IsIncomplete());
2674
2675 // Get the internal key at which Next() failed.
2676 std::string prop_value;
2677 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
2678 ASSERT_EQ("4", prop_value);
2679
2680 // Create a new iterator to seek to the internal key.
2681 std::unique_ptr<Iterator> iter2(NewIterator(ropts));
2682 iter2->Seek(prop_value);
2683 ASSERT_TRUE(iter2->Valid());
2684 ASSERT_OK(iter2->status());
2685
2686 ASSERT_EQ(iter2->key().ToString(), "6");
2687 ASSERT_EQ(iter2->value().ToString(), "val_6");
2688 }
2689
2690 // Reproduces a former bug where iterator would skip some records when DBIter
2691 // re-seeks subiterator with Incomplete status.
2692 TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
2693 Options options = CurrentOptions();
2694 BlockBasedTableOptions table_options;
2695 // Make sure the sst file has more than one block.
2696 table_options.flush_block_policy_factory =
2697 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
2698 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2699 DestroyAndReopen(options);
2700
2701 // Two records in sst file, each in its own block.
2702 ASSERT_OK(Put("b", ""));
2703 ASSERT_OK(Put("d", ""));
2704 ASSERT_OK(Flush());
2705
2706 // Create a nonblocking iterator before writing to memtable.
2707 ReadOptions ropt;
2708 ropt.read_tier = kBlockCacheTier;
2709 std::unique_ptr<Iterator> iter(NewIterator(ropt));
2710
2711 // Overwrite a key in memtable many times to hit
2712 // max_sequential_skip_in_iterations (which is 8 by default).
2713 for (int i = 0; i < 20; ++i) {
2714 ASSERT_OK(Put("c", ""));
2715 }
2716
2717 // Load the second block in sst file into the block cache.
2718 {
2719 std::unique_ptr<Iterator> iter2(NewIterator(ReadOptions()));
2720 iter2->Seek("d");
2721 }
2722
2723 // Finally seek the nonblocking iterator.
2724 iter->Seek("a");
2725 // With the bug, the status used to be OK, and the iterator used to point to
2726 // "d".
2727 EXPECT_TRUE(iter->status().IsIncomplete());
2728 }
2729
2730 TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
2731 ASSERT_OK(Put("a", ""));
2732 ASSERT_OK(Put("b", ""));
2733 ASSERT_OK(Flush());
2734
2735 ReadOptions ropt;
2736 Slice ub = "b";
2737 ropt.iterate_upper_bound = &ub;
2738
2739 std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
2740 it->SeekForPrev("a");
2741 ASSERT_TRUE(it->Valid());
2742 ASSERT_OK(it->status());
2743 ASSERT_EQ("a", it->key().ToString());
2744 it->Next();
2745 ASSERT_FALSE(it->Valid());
2746 ASSERT_OK(it->status());
2747 it->SeekForPrev("a");
2748 ASSERT_OK(it->status());
2749
2750 ASSERT_TRUE(it->Valid());
2751 ASSERT_EQ("a", it->key().ToString());
2752 }
2753
2754 TEST_P(DBIteratorTest, AvoidReseekLevelIterator) {
2755 Options options = CurrentOptions();
2756 options.compression = CompressionType::kNoCompression;
2757 BlockBasedTableOptions table_options;
2758 table_options.block_size = 800;
2759 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2760 Reopen(options);
2761
2762 Random rnd(301);
2763 std::string random_str = rnd.RandomString(180);
2764
2765 ASSERT_OK(Put("1", random_str));
2766 ASSERT_OK(Put("2", random_str));
2767 ASSERT_OK(Put("3", random_str));
2768 ASSERT_OK(Put("4", random_str));
2769 // A new block
2770 ASSERT_OK(Put("5", random_str));
2771 ASSERT_OK(Put("6", random_str));
2772 ASSERT_OK(Put("7", random_str));
2773 ASSERT_OK(Flush());
2774 ASSERT_OK(Put("8", random_str));
2775 ASSERT_OK(Put("9", random_str));
2776 ASSERT_OK(Flush());
2777 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2778
2779 int num_find_file_in_level = 0;
2780 int num_idx_blk_seek = 0;
2781 SyncPoint::GetInstance()->SetCallBack(
2782 "LevelIterator::Seek:BeforeFindFile",
2783 [&](void* /*arg*/) { num_find_file_in_level++; });
2784 SyncPoint::GetInstance()->SetCallBack(
2785 "IndexBlockIter::Seek:0", [&](void* /*arg*/) { num_idx_blk_seek++; });
2786 SyncPoint::GetInstance()->EnableProcessing();
2787
2788 {
2789 std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2790 iter->Seek("1");
2791 ASSERT_TRUE(iter->Valid());
2792 ASSERT_EQ(1, num_find_file_in_level);
2793 ASSERT_EQ(1, num_idx_blk_seek);
2794
2795 iter->Seek("2");
2796 ASSERT_TRUE(iter->Valid());
2797 ASSERT_EQ(1, num_find_file_in_level);
2798 ASSERT_EQ(1, num_idx_blk_seek);
2799
2800 iter->Seek("3");
2801 ASSERT_TRUE(iter->Valid());
2802 ASSERT_EQ(1, num_find_file_in_level);
2803 ASSERT_EQ(1, num_idx_blk_seek);
2804
2805 iter->Next();
2806 ASSERT_TRUE(iter->Valid());
2807 ASSERT_EQ(1, num_find_file_in_level);
2808 ASSERT_EQ(1, num_idx_blk_seek);
2809
2810 iter->Seek("5");
2811 ASSERT_TRUE(iter->Valid());
2812 ASSERT_EQ(1, num_find_file_in_level);
2813 ASSERT_EQ(2, num_idx_blk_seek);
2814
2815 iter->Seek("6");
2816 ASSERT_TRUE(iter->Valid());
2817 ASSERT_EQ(1, num_find_file_in_level);
2818 ASSERT_EQ(2, num_idx_blk_seek);
2819
2820 iter->Seek("7");
2821 ASSERT_TRUE(iter->Valid());
2822 ASSERT_EQ(1, num_find_file_in_level);
2823 ASSERT_EQ(3, num_idx_blk_seek);
2824
2825 iter->Seek("8");
2826 ASSERT_TRUE(iter->Valid());
2827 ASSERT_EQ(2, num_find_file_in_level);
2828 // Still re-seek because "8" is the boundary key, which has
2829 // the same user key as the seek key.
2830 ASSERT_EQ(4, num_idx_blk_seek);
2831
2832 iter->Seek("5");
2833 ASSERT_TRUE(iter->Valid());
2834 ASSERT_EQ(3, num_find_file_in_level);
2835 ASSERT_EQ(5, num_idx_blk_seek);
2836
2837 iter->Next();
2838 ASSERT_TRUE(iter->Valid());
2839 ASSERT_EQ(3, num_find_file_in_level);
2840 ASSERT_EQ(5, num_idx_blk_seek);
2841
2842 // Seek backward never triggers the index block seek to be skipped
2843 iter->Seek("5");
2844 ASSERT_TRUE(iter->Valid());
2845 ASSERT_EQ(3, num_find_file_in_level);
2846 ASSERT_EQ(6, num_idx_blk_seek);
2847 }
2848
2849 SyncPoint::GetInstance()->DisableProcessing();
2850 }
2851
2852 // MyRocks may change iterate bounds before seek. Simply test to make sure such
2853 // usage doesn't break iterator.
2854 TEST_P(DBIteratorTest, IterateBoundChangedBeforeSeek) {
2855 Options options = CurrentOptions();
2856 options.compression = CompressionType::kNoCompression;
2857 BlockBasedTableOptions table_options;
2858 table_options.block_size = 100;
2859 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2860 std::string value(50, 'v');
2861 Reopen(options);
2862 ASSERT_OK(Put("aaa", value));
2863 ASSERT_OK(Flush());
2864 ASSERT_OK(Put("bbb", "v"));
2865 ASSERT_OK(Put("ccc", "v"));
2866 ASSERT_OK(Put("ddd", "v"));
2867 ASSERT_OK(Flush());
2868 ASSERT_OK(Put("eee", "v"));
2869 ASSERT_OK(Flush());
2870 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2871
2872 std::string ub1 = "e";
2873 std::string ub2 = "c";
2874 Slice ub(ub1);
2875 ReadOptions read_opts1;
2876 read_opts1.iterate_upper_bound = &ub;
2877 Iterator* iter = NewIterator(read_opts1);
2878 // Seek and iterate accross block boundary.
2879 iter->Seek("b");
2880 ASSERT_TRUE(iter->Valid());
2881 ASSERT_OK(iter->status());
2882 ASSERT_EQ("bbb", iter->key());
2883 ub = Slice(ub2);
2884 iter->Seek("b");
2885 ASSERT_TRUE(iter->Valid());
2886 ASSERT_OK(iter->status());
2887 ASSERT_EQ("bbb", iter->key());
2888 iter->Next();
2889 ASSERT_FALSE(iter->Valid());
2890 ASSERT_OK(iter->status());
2891 delete iter;
2892
2893 std::string lb1 = "a";
2894 std::string lb2 = "c";
2895 Slice lb(lb1);
2896 ReadOptions read_opts2;
2897 read_opts2.iterate_lower_bound = &lb;
2898 iter = NewIterator(read_opts2);
2899 iter->SeekForPrev("d");
2900 ASSERT_TRUE(iter->Valid());
2901 ASSERT_OK(iter->status());
2902 ASSERT_EQ("ccc", iter->key());
2903 lb = Slice(lb2);
2904 iter->SeekForPrev("d");
2905 ASSERT_TRUE(iter->Valid());
2906 ASSERT_OK(iter->status());
2907 ASSERT_EQ("ccc", iter->key());
2908 iter->Prev();
2909 ASSERT_FALSE(iter->Valid());
2910 ASSERT_OK(iter->status());
2911 delete iter;
2912 }
2913
2914 TEST_P(DBIteratorTest, IterateWithLowerBoundAcrossFileBoundary) {
2915 ASSERT_OK(Put("aaa", "v"));
2916 ASSERT_OK(Put("bbb", "v"));
2917 ASSERT_OK(Flush());
2918 ASSERT_OK(Put("ccc", "v"));
2919 ASSERT_OK(Put("ddd", "v"));
2920 ASSERT_OK(Flush());
2921 // Move both files to bottom level.
2922 ASSERT_OK(dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr));
2923 Slice lower_bound("b");
2924 ReadOptions read_opts;
2925 read_opts.iterate_lower_bound = &lower_bound;
2926 std::unique_ptr<Iterator> iter(NewIterator(read_opts));
2927 iter->SeekForPrev("d");
2928 ASSERT_TRUE(iter->Valid());
2929 ASSERT_OK(iter->status());
2930 ASSERT_EQ("ccc", iter->key());
2931 iter->Prev();
2932 ASSERT_TRUE(iter->Valid());
2933 ASSERT_OK(iter->status());
2934 ASSERT_EQ("bbb", iter->key());
2935 iter->Prev();
2936 ASSERT_FALSE(iter->Valid());
2937 ASSERT_OK(iter->status());
2938 }
2939
2940 TEST_P(DBIteratorTest, Blob) {
2941 Options options = CurrentOptions();
2942 options.enable_blob_files = true;
2943 options.max_sequential_skip_in_iterations = 2;
2944 options.statistics = CreateDBStatistics();
2945
2946 Reopen(options);
2947
2948 // Note: we have 4 KVs (3 of which are hidden) for key "b" and
2949 // max_sequential_skip_in_iterations is set to 2. Thus, we need to do a reseek
2950 // anytime we move from "b" to "c" or vice versa.
2951 ASSERT_OK(Put("a", "va"));
2952 ASSERT_OK(Flush());
2953 ASSERT_OK(Put("b", "vb0"));
2954 ASSERT_OK(Flush());
2955 ASSERT_OK(Put("b", "vb1"));
2956 ASSERT_OK(Flush());
2957 ASSERT_OK(Put("b", "vb2"));
2958 ASSERT_OK(Flush());
2959 ASSERT_OK(Put("b", "vb3"));
2960 ASSERT_OK(Flush());
2961 ASSERT_OK(Put("c", "vc"));
2962 ASSERT_OK(Flush());
2963
2964 std::unique_ptr<Iterator> iter_guard(NewIterator(ReadOptions()));
2965 Iterator* const iter = iter_guard.get();
2966
2967 iter->SeekToFirst();
2968 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
2969 ASSERT_EQ(IterStatus(iter), "a->va");
2970 iter->Next();
2971 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
2972 ASSERT_EQ(IterStatus(iter), "b->vb3");
2973 iter->Next();
2974 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
2975 ASSERT_EQ(IterStatus(iter), "c->vc");
2976 iter->Next();
2977 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
2978 ASSERT_EQ(IterStatus(iter), "(invalid)");
2979 iter->SeekToFirst();
2980 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
2981 ASSERT_EQ(IterStatus(iter), "a->va");
2982 iter->Prev();
2983 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
2984 ASSERT_EQ(IterStatus(iter), "(invalid)");
2985
2986 iter->SeekToLast();
2987 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
2988 ASSERT_EQ(IterStatus(iter), "c->vc");
2989 iter->Prev();
2990 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
2991 ASSERT_EQ(IterStatus(iter), "b->vb3");
2992 iter->Prev();
2993 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
2994 ASSERT_EQ(IterStatus(iter), "a->va");
2995 iter->Prev();
2996 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
2997 ASSERT_EQ(IterStatus(iter), "(invalid)");
2998 iter->SeekToLast();
2999 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
3000 ASSERT_EQ(IterStatus(iter), "c->vc");
3001 iter->Next();
3002 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
3003 ASSERT_EQ(IterStatus(iter), "(invalid)");
3004
3005 iter->Seek("");
3006 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
3007 ASSERT_EQ(IterStatus(iter), "a->va");
3008 iter->Seek("a");
3009 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
3010 ASSERT_EQ(IterStatus(iter), "a->va");
3011 iter->Seek("ax");
3012 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
3013 ASSERT_EQ(IterStatus(iter), "b->vb3");
3014
3015 iter->SeekForPrev("d");
3016 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
3017 ASSERT_EQ(IterStatus(iter), "c->vc");
3018 iter->SeekForPrev("c");
3019 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 2);
3020 ASSERT_EQ(IterStatus(iter), "c->vc");
3021 iter->SeekForPrev("bx");
3022 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3);
3023 ASSERT_EQ(IterStatus(iter), "b->vb3");
3024
3025 iter->Seek("b");
3026 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3);
3027 ASSERT_EQ(IterStatus(iter), "b->vb3");
3028 iter->Seek("z");
3029 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 3);
3030 ASSERT_EQ(IterStatus(iter), "(invalid)");
3031 iter->SeekForPrev("b");
3032 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4);
3033 ASSERT_EQ(IterStatus(iter), "b->vb3");
3034 iter->SeekForPrev("");
3035 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4);
3036 ASSERT_EQ(IterStatus(iter), "(invalid)");
3037
3038 // Switch from reverse to forward
3039 iter->SeekToLast();
3040 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 4);
3041 iter->Prev();
3042 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 5);
3043 iter->Prev();
3044 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 5);
3045 iter->Next();
3046 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6);
3047 ASSERT_EQ(IterStatus(iter), "b->vb3");
3048
3049 // Switch from forward to reverse
3050 iter->SeekToFirst();
3051 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6);
3052 iter->Next();
3053 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 6);
3054 iter->Next();
3055 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 7);
3056 iter->Prev();
3057 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 8);
3058 ASSERT_EQ(IterStatus(iter), "b->vb3");
3059 }
3060
3061 INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
3062 testing::Values(true, false));
3063
3064 // Tests how DBIter work with ReadCallback
3065 class DBIteratorWithReadCallbackTest : public DBIteratorTest {};
3066
3067 TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) {
3068 class TestReadCallback : public ReadCallback {
3069 public:
3070 explicit TestReadCallback(SequenceNumber _max_visible_seq)
3071 : ReadCallback(_max_visible_seq) {}
3072
3073 bool IsVisibleFullCheck(SequenceNumber seq) override {
3074 return seq <= max_visible_seq_;
3075 }
3076 };
3077
3078 ASSERT_OK(Put("foo", "v1"));
3079 ASSERT_OK(Put("foo", "v2"));
3080 ASSERT_OK(Put("foo", "v3"));
3081 ASSERT_OK(Put("a", "va"));
3082 ASSERT_OK(Put("z", "vz"));
3083 SequenceNumber seq1 = db_->GetLatestSequenceNumber();
3084 TestReadCallback callback1(seq1);
3085 ASSERT_OK(Put("foo", "v4"));
3086 ASSERT_OK(Put("foo", "v5"));
3087 ASSERT_OK(Put("bar", "v7"));
3088
3089 SequenceNumber seq2 = db_->GetLatestSequenceNumber();
3090 auto* cfd =
3091 static_cast_with_check<ColumnFamilyHandleImpl>(db_->DefaultColumnFamily())
3092 ->cfd();
3093 // The iterator are suppose to see data before seq1.
3094 Iterator* iter =
3095 dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1);
3096
3097 // Seek
3098 // The latest value of "foo" before seq1 is "v3"
3099 iter->Seek("foo");
3100 ASSERT_TRUE(iter->Valid());
3101 ASSERT_OK(iter->status());
3102 ASSERT_EQ("foo", iter->key());
3103 ASSERT_EQ("v3", iter->value());
3104 // "bar" is not visible to the iterator. It will move on to the next key
3105 // "foo".
3106 iter->Seek("bar");
3107 ASSERT_TRUE(iter->Valid());
3108 ASSERT_OK(iter->status());
3109 ASSERT_EQ("foo", iter->key());
3110 ASSERT_EQ("v3", iter->value());
3111
3112 // Next
3113 // Seek to "a"
3114 iter->Seek("a");
3115 ASSERT_TRUE(iter->Valid());
3116 ASSERT_OK(iter->status());
3117 ASSERT_EQ("va", iter->value());
3118 // "bar" is not visible to the iterator. It will move on to the next key
3119 // "foo".
3120 iter->Next();
3121 ASSERT_TRUE(iter->Valid());
3122 ASSERT_OK(iter->status());
3123 ASSERT_EQ("foo", iter->key());
3124 ASSERT_EQ("v3", iter->value());
3125
3126 // Prev
3127 // Seek to "z"
3128 iter->Seek("z");
3129 ASSERT_TRUE(iter->Valid());
3130 ASSERT_OK(iter->status());
3131 ASSERT_EQ("vz", iter->value());
3132 // The previous key is "foo", which is visible to the iterator.
3133 iter->Prev();
3134 ASSERT_TRUE(iter->Valid());
3135 ASSERT_OK(iter->status());
3136 ASSERT_EQ("foo", iter->key());
3137 ASSERT_EQ("v3", iter->value());
3138 // "bar" is not visible to the iterator. It will move on to the next key "a".
3139 iter->Prev(); // skipping "bar"
3140 ASSERT_TRUE(iter->Valid());
3141 ASSERT_OK(iter->status());
3142 ASSERT_EQ("a", iter->key());
3143 ASSERT_EQ("va", iter->value());
3144
3145 // SeekForPrev
3146 // The previous key is "foo", which is visible to the iterator.
3147 iter->SeekForPrev("y");
3148 ASSERT_TRUE(iter->Valid());
3149 ASSERT_OK(iter->status());
3150 ASSERT_EQ("foo", iter->key());
3151 ASSERT_EQ("v3", iter->value());
3152 // "bar" is not visible to the iterator. It will move on to the next key "a".
3153 iter->SeekForPrev("bar");
3154 ASSERT_TRUE(iter->Valid());
3155 ASSERT_OK(iter->status());
3156 ASSERT_EQ("a", iter->key());
3157 ASSERT_EQ("va", iter->value());
3158
3159 delete iter;
3160
3161 // Prev beyond max_sequential_skip_in_iterations
3162 uint64_t num_versions =
3163 CurrentOptions().max_sequential_skip_in_iterations + 10;
3164 for (uint64_t i = 0; i < num_versions; i++) {
3165 ASSERT_OK(Put("bar", std::to_string(i)));
3166 }
3167 SequenceNumber seq3 = db_->GetLatestSequenceNumber();
3168 TestReadCallback callback2(seq3);
3169 ASSERT_OK(Put("bar", "v8"));
3170 SequenceNumber seq4 = db_->GetLatestSequenceNumber();
3171
3172 // The iterator is suppose to see data before seq3.
3173 iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2);
3174 // Seek to "z", which is visible.
3175 iter->Seek("z");
3176 ASSERT_TRUE(iter->Valid());
3177 ASSERT_OK(iter->status());
3178 ASSERT_EQ("vz", iter->value());
3179 // Previous key is "foo" and the last value "v5" is visible.
3180 iter->Prev();
3181 ASSERT_TRUE(iter->Valid());
3182 ASSERT_OK(iter->status());
3183 ASSERT_EQ("foo", iter->key());
3184 ASSERT_EQ("v5", iter->value());
3185 // Since the number of values of "bar" is more than
3186 // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
3187 // seek in forward direction. Here we test the fallback seek is correct.
3188 // The last visible value should be (num_versions - 1), as "v8" is not
3189 // visible.
3190 iter->Prev();
3191 ASSERT_TRUE(iter->Valid());
3192 ASSERT_OK(iter->status());
3193 ASSERT_EQ("bar", iter->key());
3194 ASSERT_EQ(std::to_string(num_versions - 1), iter->value());
3195
3196 delete iter;
3197 }
3198
3199 TEST_F(DBIteratorTest, BackwardIterationOnInplaceUpdateMemtable) {
3200 Options options = CurrentOptions();
3201 options.create_if_missing = true;
3202 options.inplace_update_support = false;
3203 options.env = env_;
3204 DestroyAndReopen(options);
3205 constexpr int kNumKeys = 10;
3206
3207 // Write kNumKeys to WAL.
3208 for (int i = 0; i < kNumKeys; ++i) {
3209 ASSERT_OK(Put(Key(i), "val"));
3210 }
3211 ReadOptions read_opts;
3212 read_opts.total_order_seek = true;
3213 {
3214 std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
3215 int count = 0;
3216 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
3217 ++count;
3218 }
3219 ASSERT_EQ(kNumKeys, count);
3220 }
3221
3222 // Reopen and rebuild the memtable from WAL.
3223 options.create_if_missing = false;
3224 options.avoid_flush_during_recovery = true;
3225 options.inplace_update_support = true;
3226 options.allow_concurrent_memtable_write = false;
3227 Reopen(options);
3228 {
3229 std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
3230 iter->SeekToLast();
3231 // Backward iteration not supported due to inplace_update_support = true.
3232 ASSERT_TRUE(iter->status().IsNotSupported());
3233 ASSERT_FALSE(iter->Valid());
3234 }
3235 }
3236
3237 TEST_F(DBIteratorTest, IteratorRefreshReturnSV) {
3238 Options options = CurrentOptions();
3239 options.disable_auto_compactions = true;
3240 DestroyAndReopen(options);
3241 ASSERT_OK(
3242 db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(), "a", "z"));
3243 std::unique_ptr<Iterator> iter{db_->NewIterator(ReadOptions())};
3244 SyncPoint::GetInstance()->SetCallBack(
3245 "ArenaWrappedDBIter::Refresh:SV", [&](void*) {
3246 ASSERT_OK(db_->Put(WriteOptions(), "dummy", "new SV"));
3247 // This makes the local SV obselete.
3248 ASSERT_OK(Flush());
3249 SyncPoint::GetInstance()->DisableProcessing();
3250 });
3251 SyncPoint::GetInstance()->EnableProcessing();
3252 ASSERT_OK(iter->Refresh());
3253 iter.reset();
3254 // iter used to not cleanup SV, so the Close() below would hit an assertion
3255 // error.
3256 Close();
3257 }
3258
3259 } // namespace ROCKSDB_NAMESPACE
3260
3261 int main(int argc, char** argv) {
3262 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
3263 ::testing::InitGoogleTest(&argc, argv);
3264 return RUN_ALL_TESTS();
3265 }