]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/db_iterator_test.cc
import 14.2.4 nautilus point release
[ceph.git] / ceph / src / rocksdb / db / db_iterator_test.cc
CommitLineData
7c673cae 1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
11fdf7f2
TL
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
7c673cae
FG
5//
6// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7// Use of this source code is governed by a BSD-style license that can be
8// found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10#include <functional>
11
11fdf7f2 12#include "db/db_iter.h"
7c673cae
FG
13#include "db/db_test_util.h"
14#include "port/port.h"
15#include "port/stack_trace.h"
16#include "rocksdb/iostats_context.h"
17#include "rocksdb/perf_context.h"
18
19namespace rocksdb {
20
11fdf7f2
TL
21// A dumb ReadCallback which saying every key is committed.
22class DummyReadCallback : public ReadCallback {
494da23a
TL
23 public:
24 DummyReadCallback() : ReadCallback(kMaxSequenceNumber) {}
25 bool IsVisibleFullCheck(SequenceNumber /*seq*/) override { return true; }
26 void SetSnapshot(SequenceNumber seq) { max_visible_seq_ = seq; }
11fdf7f2
TL
27};
28
29// Test param:
30// bool: whether to pass read_callback to NewIterator().
31class DBIteratorTest : public DBTestBase,
32 public testing::WithParamInterface<bool> {
7c673cae
FG
33 public:
34 DBIteratorTest() : DBTestBase("/db_iterator_test") {}
11fdf7f2
TL
35
36 Iterator* NewIterator(const ReadOptions& read_options,
37 ColumnFamilyHandle* column_family = nullptr) {
38 if (column_family == nullptr) {
39 column_family = db_->DefaultColumnFamily();
40 }
41 auto* cfd = reinterpret_cast<ColumnFamilyHandleImpl*>(column_family)->cfd();
42 SequenceNumber seq = read_options.snapshot != nullptr
43 ? read_options.snapshot->GetSequenceNumber()
44 : db_->GetLatestSequenceNumber();
45 bool use_read_callback = GetParam();
494da23a
TL
46 DummyReadCallback* read_callback = nullptr;
47 if (use_read_callback) {
48 read_callback = new DummyReadCallback();
49 read_callback->SetSnapshot(seq);
50 InstrumentedMutexLock lock(&mutex_);
51 read_callbacks_.push_back(
52 std::unique_ptr<DummyReadCallback>(read_callback));
53 }
11fdf7f2
TL
54 return dbfull()->NewIteratorImpl(read_options, cfd, seq, read_callback);
55 }
56
57 private:
494da23a
TL
58 InstrumentedMutex mutex_;
59 std::vector<std::unique_ptr<DummyReadCallback>> read_callbacks_;
11fdf7f2
TL
60};
61
62class FlushBlockEveryKeyPolicy : public FlushBlockPolicy {
63 public:
494da23a 64 bool Update(const Slice& /*key*/, const Slice& /*value*/) override {
11fdf7f2
TL
65 if (!start_) {
66 start_ = true;
67 return false;
68 }
69 return true;
70 }
494da23a 71
11fdf7f2
TL
72 private:
73 bool start_ = false;
7c673cae
FG
74};
75
11fdf7f2
TL
76class FlushBlockEveryKeyPolicyFactory : public FlushBlockPolicyFactory {
77 public:
78 explicit FlushBlockEveryKeyPolicyFactory() {}
79
80 const char* Name() const override {
81 return "FlushBlockEveryKeyPolicyFactory";
82 }
83
84 FlushBlockPolicy* NewFlushBlockPolicy(
85 const BlockBasedTableOptions& /*table_options*/,
86 const BlockBuilder& /*data_block_builder*/) const override {
87 return new FlushBlockEveryKeyPolicy;
88 }
89};
90
91TEST_P(DBIteratorTest, IteratorProperty) {
7c673cae
FG
92 // The test needs to be changed if kPersistedTier is supported in iterator.
93 Options options = CurrentOptions();
94 CreateAndReopenWithCF({"pikachu"}, options);
95 Put(1, "1", "2");
11fdf7f2 96 Delete(1, "2");
7c673cae
FG
97 ReadOptions ropt;
98 ropt.pin_data = false;
99 {
494da23a 100 std::unique_ptr<Iterator> iter(NewIterator(ropt, handles_[1]));
7c673cae
FG
101 iter->SeekToFirst();
102 std::string prop_value;
103 ASSERT_NOK(iter->GetProperty("non_existing.value", &prop_value));
104 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
105 ASSERT_EQ("0", prop_value);
11fdf7f2
TL
106 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
107 ASSERT_EQ("1", prop_value);
7c673cae
FG
108 iter->Next();
109 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
110 ASSERT_EQ("Iterator is not valid.", prop_value);
11fdf7f2
TL
111
112 // Get internal key at which the iteration stopped (tombstone in this case).
113 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
114 ASSERT_EQ("2", prop_value);
7c673cae
FG
115 }
116 Close();
117}
118
11fdf7f2 119TEST_P(DBIteratorTest, PersistedTierOnIterator) {
7c673cae
FG
120 // The test needs to be changed if kPersistedTier is supported in iterator.
121 Options options = CurrentOptions();
122 CreateAndReopenWithCF({"pikachu"}, options);
123 ReadOptions ropt;
124 ropt.read_tier = kPersistedTier;
125
126 auto* iter = db_->NewIterator(ropt, handles_[1]);
127 ASSERT_TRUE(iter->status().IsNotSupported());
128 delete iter;
129
130 std::vector<Iterator*> iters;
131 ASSERT_TRUE(db_->NewIterators(ropt, {handles_[1]}, &iters).IsNotSupported());
132 Close();
133}
134
11fdf7f2 135TEST_P(DBIteratorTest, NonBlockingIteration) {
7c673cae
FG
136 do {
137 ReadOptions non_blocking_opts, regular_opts;
138 Options options = CurrentOptions();
139 options.statistics = rocksdb::CreateDBStatistics();
140 non_blocking_opts.read_tier = kBlockCacheTier;
141 CreateAndReopenWithCF({"pikachu"}, options);
142 // write one kv to the database.
143 ASSERT_OK(Put(1, "a", "b"));
144
145 // scan using non-blocking iterator. We should find it because
146 // it is in memtable.
11fdf7f2 147 Iterator* iter = NewIterator(non_blocking_opts, handles_[1]);
7c673cae
FG
148 int count = 0;
149 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
150 ASSERT_OK(iter->status());
151 count++;
152 }
153 ASSERT_EQ(count, 1);
154 delete iter;
155
156 // flush memtable to storage. Now, the key should not be in the
157 // memtable neither in the block cache.
158 ASSERT_OK(Flush(1));
159
160 // verify that a non-blocking iterator does not find any
161 // kvs. Neither does it do any IOs to storage.
162 uint64_t numopen = TestGetTickerCount(options, NO_FILE_OPENS);
163 uint64_t cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
11fdf7f2 164 iter = NewIterator(non_blocking_opts, handles_[1]);
7c673cae
FG
165 count = 0;
166 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
167 count++;
168 }
169 ASSERT_EQ(count, 0);
170 ASSERT_TRUE(iter->status().IsIncomplete());
171 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
172 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
173 delete iter;
174
175 // read in the specified block via a regular get
176 ASSERT_EQ(Get(1, "a"), "b");
177
178 // verify that we can find it via a non-blocking scan
179 numopen = TestGetTickerCount(options, NO_FILE_OPENS);
180 cache_added = TestGetTickerCount(options, BLOCK_CACHE_ADD);
11fdf7f2 181 iter = NewIterator(non_blocking_opts, handles_[1]);
7c673cae
FG
182 count = 0;
183 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
184 ASSERT_OK(iter->status());
185 count++;
186 }
187 ASSERT_EQ(count, 1);
188 ASSERT_EQ(numopen, TestGetTickerCount(options, NO_FILE_OPENS));
189 ASSERT_EQ(cache_added, TestGetTickerCount(options, BLOCK_CACHE_ADD));
190 delete iter;
191
192 // This test verifies block cache behaviors, which is not used by plain
193 // table format.
494da23a 194 } while (ChangeOptions(kSkipPlainTable | kSkipNoSeekToLast | kSkipMmapReads));
7c673cae
FG
195}
196
11fdf7f2 197TEST_P(DBIteratorTest, IterSeekBeforePrev) {
7c673cae
FG
198 ASSERT_OK(Put("a", "b"));
199 ASSERT_OK(Put("c", "d"));
200 dbfull()->Flush(FlushOptions());
201 ASSERT_OK(Put("0", "f"));
202 ASSERT_OK(Put("1", "h"));
203 dbfull()->Flush(FlushOptions());
204 ASSERT_OK(Put("2", "j"));
11fdf7f2 205 auto iter = NewIterator(ReadOptions());
7c673cae
FG
206 iter->Seek(Slice("c"));
207 iter->Prev();
208 iter->Seek(Slice("a"));
209 iter->Prev();
210 delete iter;
211}
212
11fdf7f2 213TEST_P(DBIteratorTest, IterSeekForPrevBeforeNext) {
7c673cae
FG
214 ASSERT_OK(Put("a", "b"));
215 ASSERT_OK(Put("c", "d"));
216 dbfull()->Flush(FlushOptions());
217 ASSERT_OK(Put("0", "f"));
218 ASSERT_OK(Put("1", "h"));
219 dbfull()->Flush(FlushOptions());
220 ASSERT_OK(Put("2", "j"));
11fdf7f2 221 auto iter = NewIterator(ReadOptions());
7c673cae
FG
222 iter->SeekForPrev(Slice("0"));
223 iter->Next();
224 iter->SeekForPrev(Slice("1"));
225 iter->Next();
226 delete iter;
227}
228
229namespace {
230std::string MakeLongKey(size_t length, char c) {
231 return std::string(length, c);
232}
233} // namespace
234
11fdf7f2 235TEST_P(DBIteratorTest, IterLongKeys) {
7c673cae
FG
236 ASSERT_OK(Put(MakeLongKey(20, 0), "0"));
237 ASSERT_OK(Put(MakeLongKey(32, 2), "2"));
238 ASSERT_OK(Put("a", "b"));
239 dbfull()->Flush(FlushOptions());
240 ASSERT_OK(Put(MakeLongKey(50, 1), "1"));
241 ASSERT_OK(Put(MakeLongKey(127, 3), "3"));
242 ASSERT_OK(Put(MakeLongKey(64, 4), "4"));
11fdf7f2 243 auto iter = NewIterator(ReadOptions());
7c673cae
FG
244
245 // Create a key that needs to be skipped for Seq too new
246 iter->Seek(MakeLongKey(20, 0));
247 ASSERT_EQ(IterStatus(iter), MakeLongKey(20, 0) + "->0");
248 iter->Next();
249 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
250 iter->Next();
251 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
252 iter->Next();
253 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
254 iter->Next();
255 ASSERT_EQ(IterStatus(iter), MakeLongKey(64, 4) + "->4");
256
257 iter->SeekForPrev(MakeLongKey(127, 3));
258 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
259 iter->Prev();
260 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
261 iter->Prev();
262 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
263 delete iter;
264
11fdf7f2 265 iter = NewIterator(ReadOptions());
7c673cae
FG
266 iter->Seek(MakeLongKey(50, 1));
267 ASSERT_EQ(IterStatus(iter), MakeLongKey(50, 1) + "->1");
268 iter->Next();
269 ASSERT_EQ(IterStatus(iter), MakeLongKey(32, 2) + "->2");
270 iter->Next();
271 ASSERT_EQ(IterStatus(iter), MakeLongKey(127, 3) + "->3");
272 delete iter;
273}
274
11fdf7f2 275TEST_P(DBIteratorTest, IterNextWithNewerSeq) {
7c673cae
FG
276 ASSERT_OK(Put("0", "0"));
277 dbfull()->Flush(FlushOptions());
278 ASSERT_OK(Put("a", "b"));
279 ASSERT_OK(Put("c", "d"));
280 ASSERT_OK(Put("d", "e"));
11fdf7f2 281 auto iter = NewIterator(ReadOptions());
7c673cae
FG
282
283 // Create a key that needs to be skipped for Seq too new
284 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
285 i++) {
286 ASSERT_OK(Put("b", "f"));
287 }
288
289 iter->Seek(Slice("a"));
290 ASSERT_EQ(IterStatus(iter), "a->b");
291 iter->Next();
292 ASSERT_EQ(IterStatus(iter), "c->d");
293 iter->SeekForPrev(Slice("b"));
294 ASSERT_EQ(IterStatus(iter), "a->b");
295 iter->Next();
296 ASSERT_EQ(IterStatus(iter), "c->d");
297
298 delete iter;
299}
300
11fdf7f2 301TEST_P(DBIteratorTest, IterPrevWithNewerSeq) {
7c673cae
FG
302 ASSERT_OK(Put("0", "0"));
303 dbfull()->Flush(FlushOptions());
304 ASSERT_OK(Put("a", "b"));
305 ASSERT_OK(Put("c", "d"));
306 ASSERT_OK(Put("d", "e"));
11fdf7f2 307 auto iter = NewIterator(ReadOptions());
7c673cae
FG
308
309 // Create a key that needs to be skipped for Seq too new
310 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
311 i++) {
312 ASSERT_OK(Put("b", "f"));
313 }
314
315 iter->Seek(Slice("d"));
316 ASSERT_EQ(IterStatus(iter), "d->e");
317 iter->Prev();
318 ASSERT_EQ(IterStatus(iter), "c->d");
319 iter->Prev();
320 ASSERT_EQ(IterStatus(iter), "a->b");
321 iter->Prev();
322 iter->SeekForPrev(Slice("d"));
323 ASSERT_EQ(IterStatus(iter), "d->e");
324 iter->Prev();
325 ASSERT_EQ(IterStatus(iter), "c->d");
326 iter->Prev();
327 ASSERT_EQ(IterStatus(iter), "a->b");
328 iter->Prev();
329 delete iter;
330}
331
11fdf7f2 332TEST_P(DBIteratorTest, IterPrevWithNewerSeq2) {
7c673cae
FG
333 ASSERT_OK(Put("0", "0"));
334 dbfull()->Flush(FlushOptions());
335 ASSERT_OK(Put("a", "b"));
336 ASSERT_OK(Put("c", "d"));
337 ASSERT_OK(Put("e", "f"));
11fdf7f2
TL
338 auto iter = NewIterator(ReadOptions());
339 auto iter2 = NewIterator(ReadOptions());
7c673cae
FG
340 iter->Seek(Slice("c"));
341 iter2->SeekForPrev(Slice("d"));
342 ASSERT_EQ(IterStatus(iter), "c->d");
343 ASSERT_EQ(IterStatus(iter2), "c->d");
344
345 // Create a key that needs to be skipped for Seq too new
346 for (uint64_t i = 0; i < last_options_.max_sequential_skip_in_iterations + 1;
347 i++) {
348 ASSERT_OK(Put("b", "f"));
349 }
350
351 iter->Prev();
352 ASSERT_EQ(IterStatus(iter), "a->b");
353 iter->Prev();
354 iter2->Prev();
355 ASSERT_EQ(IterStatus(iter2), "a->b");
356 iter2->Prev();
357 delete iter;
358 delete iter2;
359}
360
11fdf7f2 361TEST_P(DBIteratorTest, IterEmpty) {
7c673cae
FG
362 do {
363 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
11fdf7f2 364 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
365
366 iter->SeekToFirst();
367 ASSERT_EQ(IterStatus(iter), "(invalid)");
368
369 iter->SeekToLast();
370 ASSERT_EQ(IterStatus(iter), "(invalid)");
371
372 iter->Seek("foo");
373 ASSERT_EQ(IterStatus(iter), "(invalid)");
374
375 iter->SeekForPrev("foo");
376 ASSERT_EQ(IterStatus(iter), "(invalid)");
377
378 delete iter;
379 } while (ChangeCompactOptions());
380}
381
11fdf7f2 382TEST_P(DBIteratorTest, IterSingle) {
7c673cae
FG
383 do {
384 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
385 ASSERT_OK(Put(1, "a", "va"));
11fdf7f2 386 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
387
388 iter->SeekToFirst();
389 ASSERT_EQ(IterStatus(iter), "a->va");
390 iter->Next();
391 ASSERT_EQ(IterStatus(iter), "(invalid)");
392 iter->SeekToFirst();
393 ASSERT_EQ(IterStatus(iter), "a->va");
394 iter->Prev();
395 ASSERT_EQ(IterStatus(iter), "(invalid)");
396
397 iter->SeekToLast();
398 ASSERT_EQ(IterStatus(iter), "a->va");
399 iter->Next();
400 ASSERT_EQ(IterStatus(iter), "(invalid)");
401 iter->SeekToLast();
402 ASSERT_EQ(IterStatus(iter), "a->va");
403 iter->Prev();
404 ASSERT_EQ(IterStatus(iter), "(invalid)");
405
406 iter->Seek("");
407 ASSERT_EQ(IterStatus(iter), "a->va");
408 iter->Next();
409 ASSERT_EQ(IterStatus(iter), "(invalid)");
410 iter->SeekForPrev("");
411 ASSERT_EQ(IterStatus(iter), "(invalid)");
412
413 iter->Seek("a");
414 ASSERT_EQ(IterStatus(iter), "a->va");
415 iter->Next();
416 ASSERT_EQ(IterStatus(iter), "(invalid)");
417 iter->SeekForPrev("a");
418 ASSERT_EQ(IterStatus(iter), "a->va");
419 iter->Prev();
420 ASSERT_EQ(IterStatus(iter), "(invalid)");
421
422 iter->Seek("b");
423 ASSERT_EQ(IterStatus(iter), "(invalid)");
424 iter->SeekForPrev("b");
425 ASSERT_EQ(IterStatus(iter), "a->va");
426 iter->Prev();
427 ASSERT_EQ(IterStatus(iter), "(invalid)");
428
429 delete iter;
430 } while (ChangeCompactOptions());
431}
432
11fdf7f2 433TEST_P(DBIteratorTest, IterMulti) {
7c673cae
FG
434 do {
435 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
436 ASSERT_OK(Put(1, "a", "va"));
437 ASSERT_OK(Put(1, "b", "vb"));
438 ASSERT_OK(Put(1, "c", "vc"));
11fdf7f2 439 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
440
441 iter->SeekToFirst();
442 ASSERT_EQ(IterStatus(iter), "a->va");
443 iter->Next();
444 ASSERT_EQ(IterStatus(iter), "b->vb");
445 iter->Next();
446 ASSERT_EQ(IterStatus(iter), "c->vc");
447 iter->Next();
448 ASSERT_EQ(IterStatus(iter), "(invalid)");
449 iter->SeekToFirst();
450 ASSERT_EQ(IterStatus(iter), "a->va");
451 iter->Prev();
452 ASSERT_EQ(IterStatus(iter), "(invalid)");
453
454 iter->SeekToLast();
455 ASSERT_EQ(IterStatus(iter), "c->vc");
456 iter->Prev();
457 ASSERT_EQ(IterStatus(iter), "b->vb");
458 iter->Prev();
459 ASSERT_EQ(IterStatus(iter), "a->va");
460 iter->Prev();
461 ASSERT_EQ(IterStatus(iter), "(invalid)");
462 iter->SeekToLast();
463 ASSERT_EQ(IterStatus(iter), "c->vc");
464 iter->Next();
465 ASSERT_EQ(IterStatus(iter), "(invalid)");
466
467 iter->Seek("");
468 ASSERT_EQ(IterStatus(iter), "a->va");
469 iter->Seek("a");
470 ASSERT_EQ(IterStatus(iter), "a->va");
471 iter->Seek("ax");
472 ASSERT_EQ(IterStatus(iter), "b->vb");
473 iter->SeekForPrev("d");
474 ASSERT_EQ(IterStatus(iter), "c->vc");
475 iter->SeekForPrev("c");
476 ASSERT_EQ(IterStatus(iter), "c->vc");
477 iter->SeekForPrev("bx");
478 ASSERT_EQ(IterStatus(iter), "b->vb");
479
480 iter->Seek("b");
481 ASSERT_EQ(IterStatus(iter), "b->vb");
482 iter->Seek("z");
483 ASSERT_EQ(IterStatus(iter), "(invalid)");
484 iter->SeekForPrev("b");
485 ASSERT_EQ(IterStatus(iter), "b->vb");
486 iter->SeekForPrev("");
487 ASSERT_EQ(IterStatus(iter), "(invalid)");
488
489 // Switch from reverse to forward
490 iter->SeekToLast();
491 iter->Prev();
492 iter->Prev();
493 iter->Next();
494 ASSERT_EQ(IterStatus(iter), "b->vb");
495
496 // Switch from forward to reverse
497 iter->SeekToFirst();
498 iter->Next();
499 iter->Next();
500 iter->Prev();
501 ASSERT_EQ(IterStatus(iter), "b->vb");
502
503 // Make sure iter stays at snapshot
504 ASSERT_OK(Put(1, "a", "va2"));
505 ASSERT_OK(Put(1, "a2", "va3"));
506 ASSERT_OK(Put(1, "b", "vb2"));
507 ASSERT_OK(Put(1, "c", "vc2"));
508 ASSERT_OK(Delete(1, "b"));
509 iter->SeekToFirst();
510 ASSERT_EQ(IterStatus(iter), "a->va");
511 iter->Next();
512 ASSERT_EQ(IterStatus(iter), "b->vb");
513 iter->Next();
514 ASSERT_EQ(IterStatus(iter), "c->vc");
515 iter->Next();
516 ASSERT_EQ(IterStatus(iter), "(invalid)");
517 iter->SeekToLast();
518 ASSERT_EQ(IterStatus(iter), "c->vc");
519 iter->Prev();
520 ASSERT_EQ(IterStatus(iter), "b->vb");
521 iter->Prev();
522 ASSERT_EQ(IterStatus(iter), "a->va");
523 iter->Prev();
524 ASSERT_EQ(IterStatus(iter), "(invalid)");
525
526 delete iter;
527 } while (ChangeCompactOptions());
528}
529
530// Check that we can skip over a run of user keys
531// by using reseek rather than sequential scan
11fdf7f2 532TEST_P(DBIteratorTest, IterReseek) {
7c673cae
FG
533 anon::OptionsOverride options_override;
534 options_override.skip_policy = kSkipNoSnapshot;
535 Options options = CurrentOptions(options_override);
536 options.max_sequential_skip_in_iterations = 3;
537 options.create_if_missing = true;
538 options.statistics = rocksdb::CreateDBStatistics();
539 DestroyAndReopen(options);
540 CreateAndReopenWithCF({"pikachu"}, options);
541
542 // insert three keys with same userkey and verify that
543 // reseek is not invoked. For each of these test cases,
544 // verify that we can find the next key "b".
545 ASSERT_OK(Put(1, "a", "zero"));
546 ASSERT_OK(Put(1, "a", "one"));
547 ASSERT_OK(Put(1, "a", "two"));
548 ASSERT_OK(Put(1, "b", "bone"));
11fdf7f2 549 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
550 iter->SeekToFirst();
551 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
552 ASSERT_EQ(IterStatus(iter), "a->two");
553 iter->Next();
554 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
555 ASSERT_EQ(IterStatus(iter), "b->bone");
556 delete iter;
557
558 // insert a total of three keys with same userkey and verify
559 // that reseek is still not invoked.
560 ASSERT_OK(Put(1, "a", "three"));
11fdf7f2 561 iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
562 iter->SeekToFirst();
563 ASSERT_EQ(IterStatus(iter), "a->three");
564 iter->Next();
565 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
566 ASSERT_EQ(IterStatus(iter), "b->bone");
567 delete iter;
568
569 // insert a total of four keys with same userkey and verify
570 // that reseek is invoked.
571 ASSERT_OK(Put(1, "a", "four"));
11fdf7f2 572 iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
573 iter->SeekToFirst();
574 ASSERT_EQ(IterStatus(iter), "a->four");
575 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 0);
576 iter->Next();
577 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION), 1);
578 ASSERT_EQ(IterStatus(iter), "b->bone");
579 delete iter;
580
581 // Testing reverse iterator
582 // At this point, we have three versions of "a" and one version of "b".
583 // The reseek statistics is already at 1.
584 int num_reseeks = static_cast<int>(
585 TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION));
586
587 // Insert another version of b and assert that reseek is not invoked
588 ASSERT_OK(Put(1, "b", "btwo"));
11fdf7f2 589 iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
590 iter->SeekToLast();
591 ASSERT_EQ(IterStatus(iter), "b->btwo");
592 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
593 num_reseeks);
594 iter->Prev();
595 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
596 num_reseeks + 1);
597 ASSERT_EQ(IterStatus(iter), "a->four");
598 delete iter;
599
600 // insert two more versions of b. This makes a total of 4 versions
601 // of b and 4 versions of a.
602 ASSERT_OK(Put(1, "b", "bthree"));
603 ASSERT_OK(Put(1, "b", "bfour"));
11fdf7f2 604 iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
605 iter->SeekToLast();
606 ASSERT_EQ(IterStatus(iter), "b->bfour");
607 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
608 num_reseeks + 2);
609 iter->Prev();
610
611 // the previous Prev call should have invoked reseek
612 ASSERT_EQ(TestGetTickerCount(options, NUMBER_OF_RESEEKS_IN_ITERATION),
613 num_reseeks + 3);
614 ASSERT_EQ(IterStatus(iter), "a->four");
615 delete iter;
616}
617
11fdf7f2 618TEST_P(DBIteratorTest, IterSmallAndLargeMix) {
7c673cae
FG
619 do {
620 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
621 ASSERT_OK(Put(1, "a", "va"));
622 ASSERT_OK(Put(1, "b", std::string(100000, 'b')));
623 ASSERT_OK(Put(1, "c", "vc"));
624 ASSERT_OK(Put(1, "d", std::string(100000, 'd')));
625 ASSERT_OK(Put(1, "e", std::string(100000, 'e')));
626
11fdf7f2 627 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
628
629 iter->SeekToFirst();
630 ASSERT_EQ(IterStatus(iter), "a->va");
631 iter->Next();
632 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
633 iter->Next();
634 ASSERT_EQ(IterStatus(iter), "c->vc");
635 iter->Next();
636 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
637 iter->Next();
638 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
639 iter->Next();
640 ASSERT_EQ(IterStatus(iter), "(invalid)");
641
642 iter->SeekToLast();
643 ASSERT_EQ(IterStatus(iter), "e->" + std::string(100000, 'e'));
644 iter->Prev();
645 ASSERT_EQ(IterStatus(iter), "d->" + std::string(100000, 'd'));
646 iter->Prev();
647 ASSERT_EQ(IterStatus(iter), "c->vc");
648 iter->Prev();
649 ASSERT_EQ(IterStatus(iter), "b->" + std::string(100000, 'b'));
650 iter->Prev();
651 ASSERT_EQ(IterStatus(iter), "a->va");
652 iter->Prev();
653 ASSERT_EQ(IterStatus(iter), "(invalid)");
654
655 delete iter;
656 } while (ChangeCompactOptions());
657}
658
11fdf7f2 659TEST_P(DBIteratorTest, IterMultiWithDelete) {
7c673cae
FG
660 do {
661 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
662 ASSERT_OK(Put(1, "ka", "va"));
663 ASSERT_OK(Put(1, "kb", "vb"));
664 ASSERT_OK(Put(1, "kc", "vc"));
665 ASSERT_OK(Delete(1, "kb"));
666 ASSERT_EQ("NOT_FOUND", Get(1, "kb"));
667
11fdf7f2 668 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
669 iter->Seek("kc");
670 ASSERT_EQ(IterStatus(iter), "kc->vc");
671 if (!CurrentOptions().merge_operator) {
672 // TODO: merge operator does not support backward iteration yet
673 if (kPlainTableAllBytesPrefix != option_config_ &&
674 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
675 kHashLinkList != option_config_ &&
676 kHashSkipList != option_config_) { // doesn't support SeekToLast
677 iter->Prev();
678 ASSERT_EQ(IterStatus(iter), "ka->va");
679 }
680 }
681 delete iter;
682 } while (ChangeOptions());
683}
684
11fdf7f2 685TEST_P(DBIteratorTest, IterPrevMaxSkip) {
7c673cae
FG
686 do {
687 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
688 for (int i = 0; i < 2; i++) {
689 ASSERT_OK(Put(1, "key1", "v1"));
690 ASSERT_OK(Put(1, "key2", "v2"));
691 ASSERT_OK(Put(1, "key3", "v3"));
692 ASSERT_OK(Put(1, "key4", "v4"));
693 ASSERT_OK(Put(1, "key5", "v5"));
694 }
695
696 VerifyIterLast("key5->v5", 1);
697
698 ASSERT_OK(Delete(1, "key5"));
699 VerifyIterLast("key4->v4", 1);
700
701 ASSERT_OK(Delete(1, "key4"));
702 VerifyIterLast("key3->v3", 1);
703
704 ASSERT_OK(Delete(1, "key3"));
705 VerifyIterLast("key2->v2", 1);
706
707 ASSERT_OK(Delete(1, "key2"));
708 VerifyIterLast("key1->v1", 1);
709
710 ASSERT_OK(Delete(1, "key1"));
711 VerifyIterLast("(invalid)", 1);
712 } while (ChangeOptions(kSkipMergePut | kSkipNoSeekToLast));
713}
714
11fdf7f2 715TEST_P(DBIteratorTest, IterWithSnapshot) {
7c673cae
FG
716 anon::OptionsOverride options_override;
717 options_override.skip_policy = kSkipNoSnapshot;
718 do {
719 CreateAndReopenWithCF({"pikachu"}, CurrentOptions(options_override));
720 ASSERT_OK(Put(1, "key1", "val1"));
721 ASSERT_OK(Put(1, "key2", "val2"));
722 ASSERT_OK(Put(1, "key3", "val3"));
723 ASSERT_OK(Put(1, "key4", "val4"));
724 ASSERT_OK(Put(1, "key5", "val5"));
725
726 const Snapshot* snapshot = db_->GetSnapshot();
727 ReadOptions options;
728 options.snapshot = snapshot;
11fdf7f2 729 Iterator* iter = NewIterator(options, handles_[1]);
7c673cae
FG
730
731 ASSERT_OK(Put(1, "key0", "val0"));
732 // Put more values after the snapshot
733 ASSERT_OK(Put(1, "key100", "val100"));
734 ASSERT_OK(Put(1, "key101", "val101"));
735
736 iter->Seek("key5");
737 ASSERT_EQ(IterStatus(iter), "key5->val5");
738 if (!CurrentOptions().merge_operator) {
739 // TODO: merge operator does not support backward iteration yet
740 if (kPlainTableAllBytesPrefix != option_config_ &&
741 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
742 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
743 iter->Prev();
744 ASSERT_EQ(IterStatus(iter), "key4->val4");
745 iter->Prev();
746 ASSERT_EQ(IterStatus(iter), "key3->val3");
747
748 iter->Next();
749 ASSERT_EQ(IterStatus(iter), "key4->val4");
750 iter->Next();
751 ASSERT_EQ(IterStatus(iter), "key5->val5");
752 }
753 iter->Next();
754 ASSERT_TRUE(!iter->Valid());
755 }
756
757 if (!CurrentOptions().merge_operator) {
758 // TODO(gzh): merge operator does not support backward iteration yet
759 if (kPlainTableAllBytesPrefix != option_config_ &&
760 kBlockBasedTableWithWholeKeyHashIndex != option_config_ &&
761 kHashLinkList != option_config_ && kHashSkipList != option_config_) {
762 iter->SeekForPrev("key1");
763 ASSERT_EQ(IterStatus(iter), "key1->val1");
764 iter->Next();
765 ASSERT_EQ(IterStatus(iter), "key2->val2");
766 iter->Next();
767 ASSERT_EQ(IterStatus(iter), "key3->val3");
768 iter->Prev();
769 ASSERT_EQ(IterStatus(iter), "key2->val2");
770 iter->Prev();
771 ASSERT_EQ(IterStatus(iter), "key1->val1");
772 iter->Prev();
773 ASSERT_TRUE(!iter->Valid());
774 }
775 }
776 db_->ReleaseSnapshot(snapshot);
777 delete iter;
494da23a 778 } while (ChangeOptions());
7c673cae
FG
779}
780
11fdf7f2 781TEST_P(DBIteratorTest, IteratorPinsRef) {
7c673cae
FG
782 do {
783 CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
784 Put(1, "foo", "hello");
785
786 // Get iterator that will yield the current contents of the DB.
11fdf7f2 787 Iterator* iter = NewIterator(ReadOptions(), handles_[1]);
7c673cae
FG
788
789 // Write to force compactions
790 Put(1, "foo", "newvalue1");
791 for (int i = 0; i < 100; i++) {
792 // 100K values
793 ASSERT_OK(Put(1, Key(i), Key(i) + std::string(100000, 'v')));
794 }
795 Put(1, "foo", "newvalue2");
796
797 iter->SeekToFirst();
798 ASSERT_TRUE(iter->Valid());
799 ASSERT_EQ("foo", iter->key().ToString());
800 ASSERT_EQ("hello", iter->value().ToString());
801 iter->Next();
802 ASSERT_TRUE(!iter->Valid());
803 delete iter;
804 } while (ChangeCompactOptions());
805}
806
11fdf7f2
TL
807// SetOptions not defined in ROCKSDB LITE
808#ifndef ROCKSDB_LITE
809TEST_P(DBIteratorTest, DBIteratorBoundTest) {
7c673cae
FG
810 Options options = CurrentOptions();
811 options.env = env_;
812 options.create_if_missing = true;
813
814 options.prefix_extractor = nullptr;
815 DestroyAndReopen(options);
816 ASSERT_OK(Put("a", "0"));
817 ASSERT_OK(Put("foo", "bar"));
818 ASSERT_OK(Put("foo1", "bar1"));
819 ASSERT_OK(Put("g1", "0"));
820
821 // testing basic case with no iterate_upper_bound and no prefix_extractor
822 {
823 ReadOptions ro;
824 ro.iterate_upper_bound = nullptr;
825
11fdf7f2 826 std::unique_ptr<Iterator> iter(NewIterator(ro));
7c673cae
FG
827
828 iter->Seek("foo");
829
830 ASSERT_TRUE(iter->Valid());
831 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
832
833 iter->Next();
834 ASSERT_TRUE(iter->Valid());
835 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
836
837 iter->Next();
838 ASSERT_TRUE(iter->Valid());
839 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
840
841 iter->SeekForPrev("g1");
842
843 ASSERT_TRUE(iter->Valid());
844 ASSERT_EQ(iter->key().compare(Slice("g1")), 0);
845
846 iter->Prev();
847 ASSERT_TRUE(iter->Valid());
848 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
849
850 iter->Prev();
851 ASSERT_TRUE(iter->Valid());
852 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
853 }
854
855 // testing iterate_upper_bound and forward iterator
856 // to make sure it stops at bound
857 {
858 ReadOptions ro;
859 // iterate_upper_bound points beyond the last expected entry
860 Slice prefix("foo2");
861 ro.iterate_upper_bound = &prefix;
862
11fdf7f2 863 std::unique_ptr<Iterator> iter(NewIterator(ro));
7c673cae
FG
864
865 iter->Seek("foo");
866
867 ASSERT_TRUE(iter->Valid());
868 ASSERT_EQ(iter->key().compare(Slice("foo")), 0);
869
870 iter->Next();
871 ASSERT_TRUE(iter->Valid());
872 ASSERT_EQ(iter->key().compare(("foo1")), 0);
873
874 iter->Next();
875 // should stop here...
876 ASSERT_TRUE(!iter->Valid());
877 }
878 // Testing SeekToLast with iterate_upper_bound set
879 {
880 ReadOptions ro;
881
882 Slice prefix("foo");
883 ro.iterate_upper_bound = &prefix;
884
11fdf7f2 885 std::unique_ptr<Iterator> iter(NewIterator(ro));
7c673cae
FG
886
887 iter->SeekToLast();
888 ASSERT_TRUE(iter->Valid());
889 ASSERT_EQ(iter->key().compare(Slice("a")), 0);
890 }
891
892 // prefix is the first letter of the key
11fdf7f2 893 ASSERT_OK(dbfull()->SetOptions({{"prefix_extractor", "fixed:1"}}));
7c673cae
FG
894 ASSERT_OK(Put("a", "0"));
895 ASSERT_OK(Put("foo", "bar"));
896 ASSERT_OK(Put("foo1", "bar1"));
897 ASSERT_OK(Put("g1", "0"));
898
899 // testing with iterate_upper_bound and prefix_extractor
900 // Seek target and iterate_upper_bound are not is same prefix
901 // This should be an error
902 {
903 ReadOptions ro;
904 Slice upper_bound("g");
905 ro.iterate_upper_bound = &upper_bound;
906
11fdf7f2 907 std::unique_ptr<Iterator> iter(NewIterator(ro));
7c673cae
FG
908
909 iter->Seek("foo");
910
911 ASSERT_TRUE(iter->Valid());
912 ASSERT_EQ("foo", iter->key().ToString());
913
914 iter->Next();
915 ASSERT_TRUE(iter->Valid());
916 ASSERT_EQ("foo1", iter->key().ToString());
917
918 iter->Next();
919 ASSERT_TRUE(!iter->Valid());
920 }
921
922 // testing that iterate_upper_bound prevents iterating over deleted items
923 // if the bound has already reached
924 {
925 options.prefix_extractor = nullptr;
926 DestroyAndReopen(options);
927 ASSERT_OK(Put("a", "0"));
928 ASSERT_OK(Put("b", "0"));
929 ASSERT_OK(Put("b1", "0"));
930 ASSERT_OK(Put("c", "0"));
931 ASSERT_OK(Put("d", "0"));
932 ASSERT_OK(Put("e", "0"));
933 ASSERT_OK(Delete("c"));
934 ASSERT_OK(Delete("d"));
935
936 // base case with no bound
937 ReadOptions ro;
938 ro.iterate_upper_bound = nullptr;
939
11fdf7f2 940 std::unique_ptr<Iterator> iter(NewIterator(ro));
7c673cae
FG
941
942 iter->Seek("b");
943 ASSERT_TRUE(iter->Valid());
944 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
945
946 iter->Next();
947 ASSERT_TRUE(iter->Valid());
948 ASSERT_EQ(iter->key().compare(("b1")), 0);
949
11fdf7f2 950 get_perf_context()->Reset();
7c673cae
FG
951 iter->Next();
952
953 ASSERT_TRUE(iter->Valid());
11fdf7f2 954 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 2);
7c673cae
FG
955
956 // now testing with iterate_bound
957 Slice prefix("c");
958 ro.iterate_upper_bound = &prefix;
959
11fdf7f2 960 iter.reset(NewIterator(ro));
7c673cae 961
11fdf7f2 962 get_perf_context()->Reset();
7c673cae
FG
963
964 iter->Seek("b");
965 ASSERT_TRUE(iter->Valid());
966 ASSERT_EQ(iter->key().compare(Slice("b")), 0);
967
968 iter->Next();
969 ASSERT_TRUE(iter->Valid());
970 ASSERT_EQ(iter->key().compare(("b1")), 0);
971
972 iter->Next();
973 // the iteration should stop as soon as the bound key is reached
974 // even though the key is deleted
975 // hence internal_delete_skipped_count should be 0
976 ASSERT_TRUE(!iter->Valid());
11fdf7f2 977 ASSERT_EQ(static_cast<int>(get_perf_context()->internal_delete_skipped_count), 0);
7c673cae
FG
978 }
979}
980
11fdf7f2
TL
981TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
982 Options options = CurrentOptions();
983 options.env = env_;
984 options.create_if_missing = true;
985 options.statistics = rocksdb::CreateDBStatistics();
986 options.prefix_extractor = nullptr;
987 DestroyAndReopen(options);
988 ASSERT_OK(Put("a", "0"));
989 ASSERT_OK(Put("z", "0"));
990 ASSERT_OK(Flush());
991 ASSERT_OK(Put("foo1", "bar1"));
992 ASSERT_OK(Put("foo2", "bar2"));
993 ASSERT_OK(Put("foo3", "bar3"));
994 ASSERT_OK(Put("foo4", "bar4"));
995
996 {
997 std::string up_str = "foo5";
998 Slice up(up_str);
999 ReadOptions ro;
1000 ro.iterate_upper_bound = &up;
1001 std::unique_ptr<Iterator> iter(NewIterator(ro));
1002
1003 iter->Seek("foo1");
1004 ASSERT_TRUE(iter->Valid());
1005 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1006
1007 uint64_t prev_block_cache_hit =
1008 TestGetTickerCount(options, BLOCK_CACHE_HIT);
1009 uint64_t prev_block_cache_miss =
1010 TestGetTickerCount(options, BLOCK_CACHE_MISS);
1011
1012 ASSERT_GT(prev_block_cache_hit + prev_block_cache_miss, 0);
1013
1014 iter->Seek("foo4");
1015 ASSERT_TRUE(iter->Valid());
1016 ASSERT_EQ(iter->key().compare(Slice("foo4")), 0);
1017 ASSERT_EQ(prev_block_cache_hit,
1018 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1019 ASSERT_EQ(prev_block_cache_miss,
1020 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1021
1022 iter->Seek("foo2");
1023 ASSERT_TRUE(iter->Valid());
1024 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1025 iter->Next();
1026 ASSERT_TRUE(iter->Valid());
1027 ASSERT_EQ(iter->key().compare(Slice("foo3")), 0);
1028 ASSERT_EQ(prev_block_cache_hit,
1029 TestGetTickerCount(options, BLOCK_CACHE_HIT));
1030 ASSERT_EQ(prev_block_cache_miss,
1031 TestGetTickerCount(options, BLOCK_CACHE_MISS));
1032 }
1033}
1034#endif
1035
1036TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
1037 int upper_bound_hits = 0;
1038 Options options = CurrentOptions();
1039 rocksdb::SyncPoint::GetInstance()->SetCallBack(
1040 "BlockBasedTable::BlockEntryIteratorState::KeyReachedUpperBound",
1041 [&upper_bound_hits](void* arg) {
1042 assert(arg != nullptr);
1043 upper_bound_hits += (*static_cast<bool*>(arg) ? 1 : 0);
1044 });
1045 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
1046 options.env = env_;
1047 options.create_if_missing = true;
1048 options.prefix_extractor = nullptr;
1049 BlockBasedTableOptions table_options;
1050 table_options.flush_block_policy_factory =
1051 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
1052 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1053
1054 DestroyAndReopen(options);
1055 ASSERT_OK(Put("foo1", "bar1"));
1056 ASSERT_OK(Put("foo2", "bar2"));
1057 ASSERT_OK(Put("foo4", "bar4"));
1058 ASSERT_OK(Flush());
1059
1060 Slice ub("foo3");
1061 ReadOptions ro;
1062 ro.iterate_upper_bound = &ub;
1063
1064 std::unique_ptr<Iterator> iter(NewIterator(ro));
1065
1066 iter->Seek("foo");
1067 ASSERT_TRUE(iter->Valid());
1068 ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
1069 ASSERT_EQ(upper_bound_hits, 0);
1070
1071 iter->Next();
1072 ASSERT_TRUE(iter->Valid());
1073 ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
1074 ASSERT_EQ(upper_bound_hits, 0);
1075
1076 iter->Next();
1077 ASSERT_FALSE(iter->Valid());
1078 ASSERT_EQ(upper_bound_hits, 1);
1079}
7c673cae
FG
1080// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
1081// return the biggest key which is smaller than the seek key.
11fdf7f2 1082TEST_P(DBIteratorTest, PrevAfterAndNextAfterMerge) {
7c673cae
FG
1083 Options options;
1084 options.create_if_missing = true;
1085 options.merge_operator = MergeOperators::CreatePutOperator();
1086 options.env = env_;
1087 DestroyAndReopen(options);
1088
1089 // write three entries with different keys using Merge()
1090 WriteOptions wopts;
1091 db_->Merge(wopts, "1", "data1");
1092 db_->Merge(wopts, "2", "data2");
1093 db_->Merge(wopts, "3", "data3");
1094
11fdf7f2 1095 std::unique_ptr<Iterator> it(NewIterator(ReadOptions()));
7c673cae
FG
1096
1097 it->Seek("2");
1098 ASSERT_TRUE(it->Valid());
1099 ASSERT_EQ("2", it->key().ToString());
1100
1101 it->Prev();
1102 ASSERT_TRUE(it->Valid());
1103 ASSERT_EQ("1", it->key().ToString());
1104
1105 it->SeekForPrev("1");
1106 ASSERT_TRUE(it->Valid());
1107 ASSERT_EQ("1", it->key().ToString());
1108
1109 it->Next();
1110 ASSERT_TRUE(it->Valid());
1111 ASSERT_EQ("2", it->key().ToString());
1112}
1113
11fdf7f2
TL
1114class DBIteratorTestForPinnedData : public DBIteratorTest {
1115 public:
7c673cae
FG
1116 enum TestConfig {
1117 NORMAL,
1118 CLOSE_AND_OPEN,
1119 COMPACT_BEFORE_READ,
1120 FLUSH_EVERY_1000,
1121 MAX
1122 };
11fdf7f2
TL
1123 DBIteratorTestForPinnedData() : DBIteratorTest() {}
1124 void PinnedDataIteratorRandomized(TestConfig run_config) {
1125 // Generate Random data
1126 Random rnd(301);
1127
1128 int puts = 100000;
1129 int key_pool = static_cast<int>(puts * 0.7);
1130 int key_size = 100;
1131 int val_size = 1000;
1132 int seeks_percentage = 20; // 20% of keys will be used to test seek()
1133 int delete_percentage = 20; // 20% of keys will be deleted
1134 int merge_percentage = 20; // 20% of keys will be added using Merge()
7c673cae 1135
7c673cae
FG
1136 Options options = CurrentOptions();
1137 BlockBasedTableOptions table_options;
1138 table_options.use_delta_encoding = false;
1139 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1140 options.merge_operator = MergeOperators::CreatePutOperator();
1141 DestroyAndReopen(options);
1142
1143 std::vector<std::string> generated_keys(key_pool);
1144 for (int i = 0; i < key_pool; i++) {
1145 generated_keys[i] = RandomString(&rnd, key_size);
1146 }
1147
1148 std::map<std::string, std::string> true_data;
1149 std::vector<std::string> random_keys;
1150 std::vector<std::string> deleted_keys;
1151 for (int i = 0; i < puts; i++) {
1152 auto& k = generated_keys[rnd.Next() % key_pool];
1153 auto v = RandomString(&rnd, val_size);
1154
1155 // Insert data to true_data map and to DB
1156 true_data[k] = v;
1157 if (rnd.OneIn(static_cast<int>(100.0 / merge_percentage))) {
1158 ASSERT_OK(db_->Merge(WriteOptions(), k, v));
1159 } else {
1160 ASSERT_OK(Put(k, v));
1161 }
1162
1163 // Pick random keys to be used to test Seek()
1164 if (rnd.OneIn(static_cast<int>(100.0 / seeks_percentage))) {
1165 random_keys.push_back(k);
1166 }
1167
1168 // Delete some random keys
1169 if (rnd.OneIn(static_cast<int>(100.0 / delete_percentage))) {
1170 deleted_keys.push_back(k);
1171 true_data.erase(k);
1172 ASSERT_OK(Delete(k));
1173 }
1174
1175 if (run_config == TestConfig::FLUSH_EVERY_1000) {
1176 if (i && i % 1000 == 0) {
1177 Flush();
1178 }
1179 }
1180 }
1181
1182 if (run_config == TestConfig::CLOSE_AND_OPEN) {
1183 Close();
1184 Reopen(options);
1185 } else if (run_config == TestConfig::COMPACT_BEFORE_READ) {
1186 db_->CompactRange(CompactRangeOptions(), nullptr, nullptr);
1187 }
1188
1189 ReadOptions ro;
1190 ro.pin_data = true;
11fdf7f2 1191 auto iter = NewIterator(ro);
7c673cae
FG
1192
1193 {
1194 // Test Seek to random keys
1195 std::vector<Slice> keys_slices;
1196 std::vector<std::string> true_keys;
1197 for (auto& k : random_keys) {
1198 iter->Seek(k);
1199 if (!iter->Valid()) {
1200 ASSERT_EQ(true_data.lower_bound(k), true_data.end());
1201 continue;
1202 }
1203 std::string prop_value;
1204 ASSERT_OK(
1205 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1206 ASSERT_EQ("1", prop_value);
1207 keys_slices.push_back(iter->key());
1208 true_keys.push_back(true_data.lower_bound(k)->first);
1209 }
1210
1211 for (size_t i = 0; i < keys_slices.size(); i++) {
1212 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1213 }
1214 }
1215
1216 {
1217 // Test SeekForPrev to random keys
1218 std::vector<Slice> keys_slices;
1219 std::vector<std::string> true_keys;
1220 for (auto& k : random_keys) {
1221 iter->SeekForPrev(k);
1222 if (!iter->Valid()) {
1223 ASSERT_EQ(true_data.upper_bound(k), true_data.begin());
1224 continue;
1225 }
1226 std::string prop_value;
1227 ASSERT_OK(
1228 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1229 ASSERT_EQ("1", prop_value);
1230 keys_slices.push_back(iter->key());
1231 true_keys.push_back((--true_data.upper_bound(k))->first);
1232 }
1233
1234 for (size_t i = 0; i < keys_slices.size(); i++) {
1235 ASSERT_EQ(keys_slices[i].ToString(), true_keys[i]);
1236 }
1237 }
1238
1239 {
1240 // Test iterating all data forward
1241 std::vector<Slice> all_keys;
1242 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1243 std::string prop_value;
1244 ASSERT_OK(
1245 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1246 ASSERT_EQ("1", prop_value);
1247 all_keys.push_back(iter->key());
1248 }
1249 ASSERT_EQ(all_keys.size(), true_data.size());
1250
1251 // Verify that all keys slices are valid
1252 auto data_iter = true_data.begin();
1253 for (size_t i = 0; i < all_keys.size(); i++) {
1254 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1255 data_iter++;
1256 }
1257 }
1258
1259 {
1260 // Test iterating all data backward
1261 std::vector<Slice> all_keys;
1262 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1263 std::string prop_value;
1264 ASSERT_OK(
1265 iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1266 ASSERT_EQ("1", prop_value);
1267 all_keys.push_back(iter->key());
1268 }
1269 ASSERT_EQ(all_keys.size(), true_data.size());
1270
1271 // Verify that all keys slices are valid (backward)
1272 auto data_iter = true_data.rbegin();
1273 for (size_t i = 0; i < all_keys.size(); i++) {
1274 ASSERT_EQ(all_keys[i].ToString(), data_iter->first);
1275 data_iter++;
1276 }
1277 }
1278
1279 delete iter;
11fdf7f2
TL
1280}
1281};
1282
1283TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedNormal) {
1284 PinnedDataIteratorRandomized(TestConfig::NORMAL);
1285}
1286
1287TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedCLoseAndOpen) {
1288 PinnedDataIteratorRandomized(TestConfig::CLOSE_AND_OPEN);
1289}
1290
1291TEST_P(DBIteratorTestForPinnedData,
1292 PinnedDataIteratorRandomizedCompactBeforeRead) {
1293 PinnedDataIteratorRandomized(TestConfig::COMPACT_BEFORE_READ);
1294}
1295
1296TEST_P(DBIteratorTestForPinnedData, PinnedDataIteratorRandomizedFlush) {
1297 PinnedDataIteratorRandomized(TestConfig::FLUSH_EVERY_1000);
7c673cae
FG
1298}
1299
1300#ifndef ROCKSDB_LITE
11fdf7f2 1301TEST_P(DBIteratorTest, PinnedDataIteratorMultipleFiles) {
7c673cae
FG
1302 Options options = CurrentOptions();
1303 BlockBasedTableOptions table_options;
1304 table_options.use_delta_encoding = false;
1305 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1306 options.disable_auto_compactions = true;
1307 options.write_buffer_size = 1024 * 1024 * 10; // 10 Mb
1308 DestroyAndReopen(options);
1309
1310 std::map<std::string, std::string> true_data;
1311
1312 // Generate 4 sst files in L2
1313 Random rnd(301);
1314 for (int i = 1; i <= 1000; i++) {
1315 std::string k = Key(i * 3);
1316 std::string v = RandomString(&rnd, 100);
1317 ASSERT_OK(Put(k, v));
1318 true_data[k] = v;
1319 if (i % 250 == 0) {
1320 ASSERT_OK(Flush());
1321 }
1322 }
1323 ASSERT_EQ(FilesPerLevel(0), "4");
1324 ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
1325 ASSERT_EQ(FilesPerLevel(0), "0,4");
1326
1327 // Generate 4 sst files in L0
1328 for (int i = 1; i <= 1000; i++) {
1329 std::string k = Key(i * 2);
1330 std::string v = RandomString(&rnd, 100);
1331 ASSERT_OK(Put(k, v));
1332 true_data[k] = v;
1333 if (i % 250 == 0) {
1334 ASSERT_OK(Flush());
1335 }
1336 }
1337 ASSERT_EQ(FilesPerLevel(0), "4,4");
1338
1339 // Add some keys/values in memtables
1340 for (int i = 1; i <= 1000; i++) {
1341 std::string k = Key(i);
1342 std::string v = RandomString(&rnd, 100);
1343 ASSERT_OK(Put(k, v));
1344 true_data[k] = v;
1345 }
1346 ASSERT_EQ(FilesPerLevel(0), "4,4");
1347
1348 ReadOptions ro;
1349 ro.pin_data = true;
11fdf7f2 1350 auto iter = NewIterator(ro);
7c673cae
FG
1351
1352 std::vector<std::pair<Slice, std::string>> results;
1353 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1354 std::string prop_value;
1355 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1356 ASSERT_EQ("1", prop_value);
1357 results.emplace_back(iter->key(), iter->value().ToString());
1358 }
1359
1360 ASSERT_EQ(results.size(), true_data.size());
1361 auto data_iter = true_data.begin();
1362 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1363 auto& kv = results[i];
1364 ASSERT_EQ(kv.first, data_iter->first);
1365 ASSERT_EQ(kv.second, data_iter->second);
1366 }
1367
1368 delete iter;
1369}
1370#endif
1371
11fdf7f2 1372TEST_P(DBIteratorTest, PinnedDataIteratorMergeOperator) {
7c673cae
FG
1373 Options options = CurrentOptions();
1374 BlockBasedTableOptions table_options;
1375 table_options.use_delta_encoding = false;
1376 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1377 options.merge_operator = MergeOperators::CreateUInt64AddOperator();
1378 DestroyAndReopen(options);
1379
1380 std::string numbers[7];
1381 for (int val = 0; val <= 6; val++) {
1382 PutFixed64(numbers + val, val);
1383 }
1384
1385 // +1 all keys in range [ 0 => 999]
1386 for (int i = 0; i < 1000; i++) {
1387 WriteOptions wo;
1388 ASSERT_OK(db_->Merge(wo, Key(i), numbers[1]));
1389 }
1390
1391 // +2 all keys divisible by 2 in range [ 0 => 999]
1392 for (int i = 0; i < 1000; i += 2) {
1393 WriteOptions wo;
1394 ASSERT_OK(db_->Merge(wo, Key(i), numbers[2]));
1395 }
1396
1397 // +3 all keys divisible by 5 in range [ 0 => 999]
1398 for (int i = 0; i < 1000; i += 5) {
1399 WriteOptions wo;
1400 ASSERT_OK(db_->Merge(wo, Key(i), numbers[3]));
1401 }
1402
1403 ReadOptions ro;
1404 ro.pin_data = true;
11fdf7f2 1405 auto iter = NewIterator(ro);
7c673cae
FG
1406
1407 std::vector<std::pair<Slice, std::string>> results;
1408 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1409 std::string prop_value;
1410 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1411 ASSERT_EQ("1", prop_value);
1412 results.emplace_back(iter->key(), iter->value().ToString());
1413 }
1414
1415 ASSERT_EQ(results.size(), 1000);
1416 for (size_t i = 0; i < results.size(); i++) {
1417 auto& kv = results[i];
1418 ASSERT_EQ(kv.first, Key(static_cast<int>(i)));
1419 int expected_val = 1;
1420 if (i % 2 == 0) {
1421 expected_val += 2;
1422 }
1423 if (i % 5 == 0) {
1424 expected_val += 3;
1425 }
1426 ASSERT_EQ(kv.second, numbers[expected_val]);
1427 }
1428
1429 delete iter;
1430}
1431
11fdf7f2 1432TEST_P(DBIteratorTest, PinnedDataIteratorReadAfterUpdate) {
7c673cae
FG
1433 Options options = CurrentOptions();
1434 BlockBasedTableOptions table_options;
1435 table_options.use_delta_encoding = false;
1436 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1437 options.write_buffer_size = 100000;
1438 DestroyAndReopen(options);
1439
1440 Random rnd(301);
1441
1442 std::map<std::string, std::string> true_data;
1443 for (int i = 0; i < 1000; i++) {
1444 std::string k = RandomString(&rnd, 10);
1445 std::string v = RandomString(&rnd, 1000);
1446 ASSERT_OK(Put(k, v));
1447 true_data[k] = v;
1448 }
1449
1450 ReadOptions ro;
1451 ro.pin_data = true;
11fdf7f2 1452 auto iter = NewIterator(ro);
7c673cae
FG
1453
1454 // Delete 50% of the keys and update the other 50%
1455 for (auto& kv : true_data) {
1456 if (rnd.OneIn(2)) {
1457 ASSERT_OK(Delete(kv.first));
1458 } else {
1459 std::string new_val = RandomString(&rnd, 1000);
1460 ASSERT_OK(Put(kv.first, new_val));
1461 }
1462 }
1463
1464 std::vector<std::pair<Slice, std::string>> results;
1465 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1466 std::string prop_value;
1467 ASSERT_OK(iter->GetProperty("rocksdb.iterator.is-key-pinned", &prop_value));
1468 ASSERT_EQ("1", prop_value);
1469 results.emplace_back(iter->key(), iter->value().ToString());
1470 }
1471
1472 auto data_iter = true_data.begin();
1473 for (size_t i = 0; i < results.size(); i++, data_iter++) {
1474 auto& kv = results[i];
1475 ASSERT_EQ(kv.first, data_iter->first);
1476 ASSERT_EQ(kv.second, data_iter->second);
1477 }
1478
1479 delete iter;
1480}
1481
11fdf7f2
TL
1482class SliceTransformLimitedDomainGeneric : public SliceTransform {
1483 const char* Name() const override {
1484 return "SliceTransformLimitedDomainGeneric";
1485 }
1486
1487 Slice Transform(const Slice& src) const override {
1488 return Slice(src.data(), 1);
1489 }
1490
1491 bool InDomain(const Slice& src) const override {
1492 // prefix will be x????
1493 return src.size() >= 1;
1494 }
1495
1496 bool InRange(const Slice& dst) const override {
1497 // prefix will be x????
1498 return dst.size() == 1;
1499 }
1500};
1501
1502TEST_P(DBIteratorTest, IterSeekForPrevCrossingFiles) {
7c673cae
FG
1503 Options options = CurrentOptions();
1504 options.prefix_extractor.reset(NewFixedPrefixTransform(1));
1505 options.disable_auto_compactions = true;
1506 // Enable prefix bloom for SST files
1507 BlockBasedTableOptions table_options;
1508 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1509 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1510 DestroyAndReopen(options);
1511
1512 ASSERT_OK(Put("a1", "va1"));
1513 ASSERT_OK(Put("a2", "va2"));
1514 ASSERT_OK(Put("a3", "va3"));
1515 ASSERT_OK(Flush());
1516
1517 ASSERT_OK(Put("b1", "vb1"));
1518 ASSERT_OK(Put("b2", "vb2"));
1519 ASSERT_OK(Put("b3", "vb3"));
1520 ASSERT_OK(Flush());
1521
1522 ASSERT_OK(Put("b4", "vb4"));
1523 ASSERT_OK(Put("d1", "vd1"));
1524 ASSERT_OK(Put("d2", "vd2"));
1525 ASSERT_OK(Put("d4", "vd4"));
1526 ASSERT_OK(Flush());
1527
1528 MoveFilesToLevel(1);
1529 {
1530 ReadOptions ro;
11fdf7f2
TL
1531 Iterator* iter = NewIterator(ro);
1532
1533 iter->SeekForPrev("a4");
1534 ASSERT_EQ(iter->key().ToString(), "a3");
1535 ASSERT_EQ(iter->value().ToString(), "va3");
1536
1537 iter->SeekForPrev("c2");
1538 ASSERT_EQ(iter->key().ToString(), "b3");
1539 iter->SeekForPrev("d3");
1540 ASSERT_EQ(iter->key().ToString(), "d2");
1541 iter->SeekForPrev("b5");
1542 ASSERT_EQ(iter->key().ToString(), "b4");
1543 delete iter;
1544 }
1545
1546 {
1547 ReadOptions ro;
1548 ro.prefix_same_as_start = true;
1549 Iterator* iter = NewIterator(ro);
1550 iter->SeekForPrev("c2");
1551 ASSERT_TRUE(!iter->Valid());
1552 delete iter;
1553 }
1554}
1555
1556TEST_P(DBIteratorTest, IterSeekForPrevCrossingFilesCustomPrefixExtractor) {
1557 Options options = CurrentOptions();
1558 options.prefix_extractor =
1559 std::make_shared<SliceTransformLimitedDomainGeneric>();
1560 options.disable_auto_compactions = true;
1561 // Enable prefix bloom for SST files
1562 BlockBasedTableOptions table_options;
1563 table_options.filter_policy.reset(NewBloomFilterPolicy(10, true));
1564 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1565 DestroyAndReopen(options);
1566
1567 ASSERT_OK(Put("a1", "va1"));
1568 ASSERT_OK(Put("a2", "va2"));
1569 ASSERT_OK(Put("a3", "va3"));
1570 ASSERT_OK(Flush());
1571
1572 ASSERT_OK(Put("b1", "vb1"));
1573 ASSERT_OK(Put("b2", "vb2"));
1574 ASSERT_OK(Put("b3", "vb3"));
1575 ASSERT_OK(Flush());
1576
1577 ASSERT_OK(Put("b4", "vb4"));
1578 ASSERT_OK(Put("d1", "vd1"));
1579 ASSERT_OK(Put("d2", "vd2"));
1580 ASSERT_OK(Put("d4", "vd4"));
1581 ASSERT_OK(Flush());
1582
1583 MoveFilesToLevel(1);
1584 {
1585 ReadOptions ro;
1586 Iterator* iter = NewIterator(ro);
7c673cae
FG
1587
1588 iter->SeekForPrev("a4");
1589 ASSERT_EQ(iter->key().ToString(), "a3");
1590 ASSERT_EQ(iter->value().ToString(), "va3");
1591
1592 iter->SeekForPrev("c2");
1593 ASSERT_EQ(iter->key().ToString(), "b3");
1594 iter->SeekForPrev("d3");
1595 ASSERT_EQ(iter->key().ToString(), "d2");
1596 iter->SeekForPrev("b5");
1597 ASSERT_EQ(iter->key().ToString(), "b4");
1598 delete iter;
1599 }
1600
1601 {
1602 ReadOptions ro;
1603 ro.prefix_same_as_start = true;
11fdf7f2 1604 Iterator* iter = NewIterator(ro);
7c673cae
FG
1605 iter->SeekForPrev("c2");
1606 ASSERT_TRUE(!iter->Valid());
1607 delete iter;
1608 }
1609}
1610
11fdf7f2 1611TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocks) {
7c673cae
FG
1612 Options options = CurrentOptions();
1613 BlockBasedTableOptions table_options;
1614 table_options.block_size = 1; // every block will contain one entry
1615 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
1616 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1617 options.disable_auto_compactions = true;
1618 options.max_sequential_skip_in_iterations = 8;
1619
1620 DestroyAndReopen(options);
1621
1622 // Putting such deletes will force DBIter::Prev() to fallback to a Seek
1623 for (int file_num = 0; file_num < 10; file_num++) {
1624 ASSERT_OK(Delete("key4"));
1625 ASSERT_OK(Flush());
1626 }
1627
1628 // First File containing 5 blocks of puts
1629 ASSERT_OK(Put("key1", "val1.0"));
1630 ASSERT_OK(Put("key2", "val2.0"));
1631 ASSERT_OK(Put("key3", "val3.0"));
1632 ASSERT_OK(Put("key4", "val4.0"));
1633 ASSERT_OK(Put("key5", "val5.0"));
1634 ASSERT_OK(Flush());
1635
1636 // Second file containing 9 blocks of merge operands
1637 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.1"));
1638 ASSERT_OK(db_->Merge(WriteOptions(), "key1", "val1.2"));
1639
1640 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.1"));
1641 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.2"));
1642 ASSERT_OK(db_->Merge(WriteOptions(), "key2", "val2.3"));
1643
1644 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.1"));
1645 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.2"));
1646 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.3"));
1647 ASSERT_OK(db_->Merge(WriteOptions(), "key3", "val3.4"));
1648 ASSERT_OK(Flush());
1649
1650 {
1651 ReadOptions ro;
1652 ro.fill_cache = false;
11fdf7f2 1653 Iterator* iter = NewIterator(ro);
7c673cae
FG
1654
1655 iter->SeekToLast();
1656 ASSERT_EQ(iter->key().ToString(), "key5");
1657 ASSERT_EQ(iter->value().ToString(), "val5.0");
1658
1659 iter->Prev();
1660 ASSERT_EQ(iter->key().ToString(), "key4");
1661 ASSERT_EQ(iter->value().ToString(), "val4.0");
1662
1663 iter->Prev();
1664 ASSERT_EQ(iter->key().ToString(), "key3");
1665 ASSERT_EQ(iter->value().ToString(), "val3.0,val3.1,val3.2,val3.3,val3.4");
1666
1667 iter->Prev();
1668 ASSERT_EQ(iter->key().ToString(), "key2");
1669 ASSERT_EQ(iter->value().ToString(), "val2.0,val2.1,val2.2,val2.3");
1670
1671 iter->Prev();
1672 ASSERT_EQ(iter->key().ToString(), "key1");
1673 ASSERT_EQ(iter->value().ToString(), "val1.0,val1.1,val1.2");
1674
1675 delete iter;
1676 }
1677}
1678
11fdf7f2 1679TEST_P(DBIteratorTest, IterPrevKeyCrossingBlocksRandomized) {
7c673cae
FG
1680 Options options = CurrentOptions();
1681 options.merge_operator = MergeOperators::CreateStringAppendTESTOperator();
1682 options.disable_auto_compactions = true;
1683 options.level0_slowdown_writes_trigger = (1 << 30);
1684 options.level0_stop_writes_trigger = (1 << 30);
1685 options.max_sequential_skip_in_iterations = 8;
1686 DestroyAndReopen(options);
1687
1688 const int kNumKeys = 500;
1689 // Small number of merge operands to make sure that DBIter::Prev() dont
1690 // fall back to Seek()
1691 const int kNumMergeOperands = 3;
1692 // Use value size that will make sure that every block contain 1 key
1693 const int kValSize =
1694 static_cast<int>(BlockBasedTableOptions().block_size) * 4;
1695 // Percentage of keys that wont get merge operations
1696 const int kNoMergeOpPercentage = 20;
1697 // Percentage of keys that will be deleted
1698 const int kDeletePercentage = 10;
1699
1700 // For half of the key range we will write multiple deletes first to
1701 // force DBIter::Prev() to fall back to Seek()
1702 for (int file_num = 0; file_num < 10; file_num++) {
1703 for (int i = 0; i < kNumKeys; i += 2) {
1704 ASSERT_OK(Delete(Key(i)));
1705 }
1706 ASSERT_OK(Flush());
1707 }
1708
1709 Random rnd(301);
1710 std::map<std::string, std::string> true_data;
1711 std::string gen_key;
1712 std::string gen_val;
1713
1714 for (int i = 0; i < kNumKeys; i++) {
1715 gen_key = Key(i);
1716 gen_val = RandomString(&rnd, kValSize);
1717
1718 ASSERT_OK(Put(gen_key, gen_val));
1719 true_data[gen_key] = gen_val;
1720 }
1721 ASSERT_OK(Flush());
1722
1723 // Separate values and merge operands in different file so that we
1724 // make sure that we dont merge them while flushing but actually
1725 // merge them in the read path
1726 for (int i = 0; i < kNumKeys; i++) {
1727 if (rnd.OneIn(static_cast<int>(100.0 / kNoMergeOpPercentage))) {
1728 // Dont give merge operations for some keys
1729 continue;
1730 }
1731
1732 for (int j = 0; j < kNumMergeOperands; j++) {
1733 gen_key = Key(i);
1734 gen_val = RandomString(&rnd, kValSize);
1735
1736 ASSERT_OK(db_->Merge(WriteOptions(), gen_key, gen_val));
1737 true_data[gen_key] += "," + gen_val;
1738 }
1739 }
1740 ASSERT_OK(Flush());
1741
1742 for (int i = 0; i < kNumKeys; i++) {
1743 if (rnd.OneIn(static_cast<int>(100.0 / kDeletePercentage))) {
1744 gen_key = Key(i);
1745
1746 ASSERT_OK(Delete(gen_key));
1747 true_data.erase(gen_key);
1748 }
1749 }
1750 ASSERT_OK(Flush());
1751
1752 {
1753 ReadOptions ro;
1754 ro.fill_cache = false;
11fdf7f2 1755 Iterator* iter = NewIterator(ro);
7c673cae
FG
1756 auto data_iter = true_data.rbegin();
1757
1758 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1759 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1760 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1761 data_iter++;
1762 }
1763 ASSERT_EQ(data_iter, true_data.rend());
1764
1765 delete iter;
1766 }
1767
1768 {
1769 ReadOptions ro;
1770 ro.fill_cache = false;
11fdf7f2 1771 Iterator* iter = NewIterator(ro);
7c673cae
FG
1772 auto data_iter = true_data.rbegin();
1773
1774 int entries_right = 0;
1775 std::string seek_key;
1776 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
1777 // Verify key/value of current position
1778 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1779 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1780
1781 bool restore_position_with_seek = rnd.Uniform(2);
1782 if (restore_position_with_seek) {
1783 seek_key = iter->key().ToString();
1784 }
1785
1786 // Do some Next() operations the restore the iterator to orignal position
1787 int next_count =
1788 entries_right > 0 ? rnd.Uniform(std::min(entries_right, 10)) : 0;
1789 for (int i = 0; i < next_count; i++) {
1790 iter->Next();
1791 data_iter--;
1792
1793 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1794 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1795 }
1796
1797 if (restore_position_with_seek) {
1798 // Restore orignal position using Seek()
1799 iter->Seek(seek_key);
1800 for (int i = 0; i < next_count; i++) {
1801 data_iter++;
1802 }
1803
1804 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1805 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1806 } else {
1807 // Restore original position using Prev()
1808 for (int i = 0; i < next_count; i++) {
1809 iter->Prev();
1810 data_iter++;
1811
1812 ASSERT_EQ(iter->key().ToString(), data_iter->first);
1813 ASSERT_EQ(iter->value().ToString(), data_iter->second);
1814 }
1815 }
1816
1817 entries_right++;
1818 data_iter++;
1819 }
1820 ASSERT_EQ(data_iter, true_data.rend());
1821
1822 delete iter;
1823 }
1824}
1825
11fdf7f2 1826TEST_P(DBIteratorTest, IteratorWithLocalStatistics) {
7c673cae
FG
1827 Options options = CurrentOptions();
1828 options.statistics = rocksdb::CreateDBStatistics();
1829 DestroyAndReopen(options);
1830
1831 Random rnd(301);
1832 for (int i = 0; i < 1000; i++) {
1833 // Key 10 bytes / Value 10 bytes
1834 ASSERT_OK(Put(RandomString(&rnd, 10), RandomString(&rnd, 10)));
1835 }
1836
1837 std::atomic<uint64_t> total_next(0);
1838 std::atomic<uint64_t> total_next_found(0);
1839 std::atomic<uint64_t> total_prev(0);
1840 std::atomic<uint64_t> total_prev_found(0);
1841 std::atomic<uint64_t> total_bytes(0);
1842
1843 std::vector<port::Thread> threads;
1844 std::function<void()> reader_func_next = [&]() {
11fdf7f2
TL
1845 SetPerfLevel(kEnableCount);
1846 get_perf_context()->Reset();
1847 Iterator* iter = NewIterator(ReadOptions());
7c673cae
FG
1848
1849 iter->SeekToFirst();
1850 // Seek will bump ITER_BYTES_READ
11fdf7f2
TL
1851 uint64_t bytes = 0;
1852 bytes += iter->key().size();
1853 bytes += iter->value().size();
7c673cae
FG
1854 while (true) {
1855 iter->Next();
1856 total_next++;
1857
1858 if (!iter->Valid()) {
1859 break;
1860 }
1861 total_next_found++;
11fdf7f2
TL
1862 bytes += iter->key().size();
1863 bytes += iter->value().size();
7c673cae
FG
1864 }
1865
1866 delete iter;
11fdf7f2
TL
1867 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
1868 SetPerfLevel(kDisable);
1869 total_bytes += bytes;
7c673cae
FG
1870 };
1871
1872 std::function<void()> reader_func_prev = [&]() {
11fdf7f2
TL
1873 SetPerfLevel(kEnableCount);
1874 Iterator* iter = NewIterator(ReadOptions());
7c673cae
FG
1875
1876 iter->SeekToLast();
1877 // Seek will bump ITER_BYTES_READ
11fdf7f2
TL
1878 uint64_t bytes = 0;
1879 bytes += iter->key().size();
1880 bytes += iter->value().size();
7c673cae
FG
1881 while (true) {
1882 iter->Prev();
1883 total_prev++;
1884
1885 if (!iter->Valid()) {
1886 break;
1887 }
1888 total_prev_found++;
11fdf7f2
TL
1889 bytes += iter->key().size();
1890 bytes += iter->value().size();
7c673cae
FG
1891 }
1892
1893 delete iter;
11fdf7f2
TL
1894 ASSERT_EQ(bytes, get_perf_context()->iter_read_bytes);
1895 SetPerfLevel(kDisable);
1896 total_bytes += bytes;
7c673cae
FG
1897 };
1898
1899 for (int i = 0; i < 10; i++) {
1900 threads.emplace_back(reader_func_next);
1901 }
1902 for (int i = 0; i < 15; i++) {
1903 threads.emplace_back(reader_func_prev);
1904 }
1905
1906 for (auto& t : threads) {
1907 t.join();
1908 }
1909
1910 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT), (uint64_t)total_next);
1911 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_NEXT_FOUND),
1912 (uint64_t)total_next_found);
1913 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV), (uint64_t)total_prev);
1914 ASSERT_EQ(TestGetTickerCount(options, NUMBER_DB_PREV_FOUND),
1915 (uint64_t)total_prev_found);
1916 ASSERT_EQ(TestGetTickerCount(options, ITER_BYTES_READ), (uint64_t)total_bytes);
1917
1918}
1919
11fdf7f2 1920TEST_P(DBIteratorTest, ReadAhead) {
7c673cae
FG
1921 Options options;
1922 env_->count_random_reads_ = true;
1923 options.env = env_;
1924 options.disable_auto_compactions = true;
1925 options.write_buffer_size = 4 << 20;
1926 options.statistics = rocksdb::CreateDBStatistics();
1927 BlockBasedTableOptions table_options;
1928 table_options.block_size = 1024;
1929 table_options.no_block_cache = true;
1930 options.table_factory.reset(new BlockBasedTableFactory(table_options));
1931 Reopen(options);
1932
1933 std::string value(1024, 'a');
1934 for (int i = 0; i < 100; i++) {
1935 Put(Key(i), value);
1936 }
1937 ASSERT_OK(Flush());
1938 MoveFilesToLevel(2);
1939
1940 for (int i = 0; i < 100; i++) {
1941 Put(Key(i), value);
1942 }
1943 ASSERT_OK(Flush());
1944 MoveFilesToLevel(1);
1945
1946 for (int i = 0; i < 100; i++) {
1947 Put(Key(i), value);
1948 }
1949 ASSERT_OK(Flush());
1950#ifndef ROCKSDB_LITE
1951 ASSERT_EQ("1,1,1", FilesPerLevel());
1952#endif // !ROCKSDB_LITE
1953
1954 env_->random_read_bytes_counter_ = 0;
1955 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
1956 ReadOptions read_options;
11fdf7f2 1957 auto* iter = NewIterator(read_options);
7c673cae
FG
1958 iter->SeekToFirst();
1959 int64_t num_file_opens = TestGetTickerCount(options, NO_FILE_OPENS);
1960 size_t bytes_read = env_->random_read_bytes_counter_;
1961 delete iter;
1962
494da23a 1963 int64_t num_file_closes = TestGetTickerCount(options, NO_FILE_CLOSES);
7c673cae
FG
1964 env_->random_read_bytes_counter_ = 0;
1965 options.statistics->setTickerCount(NO_FILE_OPENS, 0);
1966 read_options.readahead_size = 1024 * 10;
11fdf7f2 1967 iter = NewIterator(read_options);
7c673cae
FG
1968 iter->SeekToFirst();
1969 int64_t num_file_opens_readahead = TestGetTickerCount(options, NO_FILE_OPENS);
1970 size_t bytes_read_readahead = env_->random_read_bytes_counter_;
1971 delete iter;
494da23a
TL
1972 int64_t num_file_closes_readahead =
1973 TestGetTickerCount(options, NO_FILE_CLOSES);
7c673cae 1974 ASSERT_EQ(num_file_opens + 3, num_file_opens_readahead);
494da23a 1975 ASSERT_EQ(num_file_closes + 3, num_file_closes_readahead);
7c673cae
FG
1976 ASSERT_GT(bytes_read_readahead, bytes_read);
1977 ASSERT_GT(bytes_read_readahead, read_options.readahead_size * 3);
1978
1979 // Verify correctness.
11fdf7f2 1980 iter = NewIterator(read_options);
7c673cae
FG
1981 int count = 0;
1982 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
1983 ASSERT_EQ(value, iter->value());
1984 count++;
1985 }
1986 ASSERT_EQ(100, count);
1987 for (int i = 0; i < 100; i++) {
1988 iter->Seek(Key(i));
1989 ASSERT_EQ(value, iter->value());
1990 }
1991 delete iter;
1992}
1993
1994// Insert a key, create a snapshot iterator, overwrite key lots of times,
1995// seek to a smaller key. Expect DBIter to fall back to a seek instead of
1996// going through all the overwrites linearly.
11fdf7f2 1997TEST_P(DBIteratorTest, DBIteratorSkipRecentDuplicatesTest) {
7c673cae
FG
1998 Options options = CurrentOptions();
1999 options.env = env_;
2000 options.create_if_missing = true;
2001 options.max_sequential_skip_in_iterations = 3;
2002 options.prefix_extractor = nullptr;
2003 options.write_buffer_size = 1 << 27; // big enough to avoid flush
2004 options.statistics = rocksdb::CreateDBStatistics();
2005 DestroyAndReopen(options);
2006
2007 // Insert.
2008 ASSERT_OK(Put("b", "0"));
2009
2010 // Create iterator.
2011 ReadOptions ro;
11fdf7f2 2012 std::unique_ptr<Iterator> iter(NewIterator(ro));
7c673cae
FG
2013
2014 // Insert a lot.
2015 for (int i = 0; i < 100; ++i) {
2016 ASSERT_OK(Put("b", std::to_string(i + 1).c_str()));
2017 }
2018
2019#ifndef ROCKSDB_LITE
2020 // Check that memtable wasn't flushed.
2021 std::string val;
2022 ASSERT_TRUE(db_->GetProperty("rocksdb.num-files-at-level0", &val));
2023 EXPECT_EQ("0", val);
2024#endif
2025
2026 // Seek iterator to a smaller key.
11fdf7f2 2027 get_perf_context()->Reset();
7c673cae
FG
2028 iter->Seek("a");
2029 ASSERT_TRUE(iter->Valid());
2030 EXPECT_EQ("b", iter->key().ToString());
2031 EXPECT_EQ("0", iter->value().ToString());
2032
2033 // Check that the seek didn't do too much work.
2034 // Checks are not tight, just make sure that everything is well below 100.
11fdf7f2
TL
2035 EXPECT_LT(get_perf_context()->internal_key_skipped_count, 4);
2036 EXPECT_LT(get_perf_context()->internal_recent_skipped_count, 8);
2037 EXPECT_LT(get_perf_context()->seek_on_memtable_count, 10);
2038 EXPECT_LT(get_perf_context()->next_on_memtable_count, 10);
2039 EXPECT_LT(get_perf_context()->prev_on_memtable_count, 10);
7c673cae
FG
2040
2041 // Check that iterator did something like what we expect.
11fdf7f2
TL
2042 EXPECT_EQ(get_perf_context()->internal_delete_skipped_count, 0);
2043 EXPECT_EQ(get_perf_context()->internal_merge_count, 0);
2044 EXPECT_GE(get_perf_context()->internal_recent_skipped_count, 2);
2045 EXPECT_GE(get_perf_context()->seek_on_memtable_count, 2);
7c673cae
FG
2046 EXPECT_EQ(1, options.statistics->getTickerCount(
2047 NUMBER_OF_RESEEKS_IN_ITERATION));
2048}
2049
11fdf7f2
TL
2050TEST_P(DBIteratorTest, Refresh) {
2051 ASSERT_OK(Put("x", "y"));
2052
2053 std::unique_ptr<Iterator> iter(NewIterator(ReadOptions()));
2054 iter->Seek(Slice("a"));
2055 ASSERT_TRUE(iter->Valid());
2056 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2057 iter->Next();
2058 ASSERT_FALSE(iter->Valid());
2059
2060 ASSERT_OK(Put("c", "d"));
2061
2062 iter->Seek(Slice("a"));
2063 ASSERT_TRUE(iter->Valid());
2064 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2065 iter->Next();
2066 ASSERT_FALSE(iter->Valid());
2067
2068 iter->Refresh();
2069
2070 iter->Seek(Slice("a"));
2071 ASSERT_TRUE(iter->Valid());
2072 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2073 iter->Next();
2074 ASSERT_TRUE(iter->Valid());
2075 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2076 iter->Next();
2077 ASSERT_FALSE(iter->Valid());
2078
2079 dbfull()->Flush(FlushOptions());
2080
2081 ASSERT_OK(Put("m", "n"));
2082
2083 iter->Seek(Slice("a"));
2084 ASSERT_TRUE(iter->Valid());
2085 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2086 iter->Next();
2087 ASSERT_TRUE(iter->Valid());
2088 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2089 iter->Next();
2090 ASSERT_FALSE(iter->Valid());
2091
2092 iter->Refresh();
2093
2094 iter->Seek(Slice("a"));
2095 ASSERT_TRUE(iter->Valid());
2096 ASSERT_EQ(iter->key().compare(Slice("c")), 0);
2097 iter->Next();
2098 ASSERT_TRUE(iter->Valid());
2099 ASSERT_EQ(iter->key().compare(Slice("m")), 0);
2100 iter->Next();
2101 ASSERT_TRUE(iter->Valid());
2102 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2103 iter->Next();
2104 ASSERT_FALSE(iter->Valid());
2105
2106 iter.reset();
2107}
2108
2109TEST_P(DBIteratorTest, RefreshWithSnapshot) {
2110 ASSERT_OK(Put("x", "y"));
2111 const Snapshot* snapshot = db_->GetSnapshot();
2112 ReadOptions options;
2113 options.snapshot = snapshot;
2114 Iterator* iter = NewIterator(options);
2115
2116 iter->Seek(Slice("a"));
2117 ASSERT_TRUE(iter->Valid());
2118 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2119 iter->Next();
2120 ASSERT_FALSE(iter->Valid());
2121
2122 ASSERT_OK(Put("c", "d"));
2123
2124 iter->Seek(Slice("a"));
2125 ASSERT_TRUE(iter->Valid());
2126 ASSERT_EQ(iter->key().compare(Slice("x")), 0);
2127 iter->Next();
2128 ASSERT_FALSE(iter->Valid());
2129
2130 Status s;
2131 s = iter->Refresh();
2132 ASSERT_TRUE(s.IsNotSupported());
2133 db_->ReleaseSnapshot(snapshot);
2134 delete iter;
2135}
2136
2137TEST_P(DBIteratorTest, CreationFailure) {
2138 SyncPoint::GetInstance()->SetCallBack(
2139 "DBImpl::NewInternalIterator:StatusCallback", [](void* arg) {
2140 *(reinterpret_cast<Status*>(arg)) = Status::Corruption("test status");
2141 });
2142 SyncPoint::GetInstance()->EnableProcessing();
2143
2144 Iterator* iter = NewIterator(ReadOptions());
2145 ASSERT_FALSE(iter->Valid());
2146 ASSERT_TRUE(iter->status().IsCorruption());
2147 delete iter;
2148}
2149
2150TEST_P(DBIteratorTest, UpperBoundWithChangeDirection) {
2151 Options options = CurrentOptions();
2152 options.max_sequential_skip_in_iterations = 3;
2153 DestroyAndReopen(options);
2154
2155 // write a bunch of kvs to the database.
2156 ASSERT_OK(Put("a", "1"));
2157 ASSERT_OK(Put("y", "1"));
2158 ASSERT_OK(Put("y1", "1"));
2159 ASSERT_OK(Put("y2", "1"));
2160 ASSERT_OK(Put("y3", "1"));
2161 ASSERT_OK(Put("z", "1"));
2162 ASSERT_OK(Flush());
2163 ASSERT_OK(Put("a", "1"));
2164 ASSERT_OK(Put("z", "1"));
2165 ASSERT_OK(Put("bar", "1"));
2166 ASSERT_OK(Put("foo", "1"));
2167
2168 std::string upper_bound = "x";
2169 Slice ub_slice(upper_bound);
2170 ReadOptions ro;
2171 ro.iterate_upper_bound = &ub_slice;
2172 ro.max_skippable_internal_keys = 1000;
2173
2174 Iterator* iter = NewIterator(ro);
2175 iter->Seek("foo");
2176 ASSERT_TRUE(iter->Valid());
2177 ASSERT_EQ("foo", iter->key().ToString());
2178
2179 iter->Prev();
2180 ASSERT_TRUE(iter->Valid());
2181 ASSERT_OK(iter->status());
2182 ASSERT_EQ("bar", iter->key().ToString());
2183
2184 delete iter;
2185}
2186
2187TEST_P(DBIteratorTest, TableFilter) {
2188 ASSERT_OK(Put("a", "1"));
2189 dbfull()->Flush(FlushOptions());
2190 ASSERT_OK(Put("b", "2"));
2191 ASSERT_OK(Put("c", "3"));
2192 dbfull()->Flush(FlushOptions());
2193 ASSERT_OK(Put("d", "4"));
2194 ASSERT_OK(Put("e", "5"));
2195 ASSERT_OK(Put("f", "6"));
2196 dbfull()->Flush(FlushOptions());
2197
2198 // Ensure the table_filter callback is called once for each table.
2199 {
2200 std::set<uint64_t> unseen{1, 2, 3};
2201 ReadOptions opts;
2202 opts.table_filter = [&](const TableProperties& props) {
2203 auto it = unseen.find(props.num_entries);
2204 if (it == unseen.end()) {
2205 ADD_FAILURE() << "saw table properties with an unexpected "
2206 << props.num_entries << " entries";
2207 } else {
2208 unseen.erase(it);
2209 }
2210 return true;
2211 };
2212 auto iter = NewIterator(opts);
2213 iter->SeekToFirst();
2214 ASSERT_EQ(IterStatus(iter), "a->1");
2215 iter->Next();
2216 ASSERT_EQ(IterStatus(iter), "b->2");
2217 iter->Next();
2218 ASSERT_EQ(IterStatus(iter), "c->3");
2219 iter->Next();
2220 ASSERT_EQ(IterStatus(iter), "d->4");
2221 iter->Next();
2222 ASSERT_EQ(IterStatus(iter), "e->5");
2223 iter->Next();
2224 ASSERT_EQ(IterStatus(iter), "f->6");
2225 iter->Next();
2226 ASSERT_FALSE(iter->Valid());
2227 ASSERT_TRUE(unseen.empty());
2228 delete iter;
2229 }
2230
2231 // Ensure returning false in the table_filter hides the keys from that table
2232 // during iteration.
2233 {
2234 ReadOptions opts;
2235 opts.table_filter = [](const TableProperties& props) {
2236 return props.num_entries != 2;
2237 };
2238 auto iter = NewIterator(opts);
2239 iter->SeekToFirst();
2240 ASSERT_EQ(IterStatus(iter), "a->1");
2241 iter->Next();
2242 ASSERT_EQ(IterStatus(iter), "d->4");
2243 iter->Next();
2244 ASSERT_EQ(IterStatus(iter), "e->5");
2245 iter->Next();
2246 ASSERT_EQ(IterStatus(iter), "f->6");
2247 iter->Next();
2248 ASSERT_FALSE(iter->Valid());
2249 delete iter;
2250 }
2251}
2252
2253TEST_P(DBIteratorTest, UpperBoundWithPrevReseek) {
2254 Options options = CurrentOptions();
2255 options.max_sequential_skip_in_iterations = 3;
2256 DestroyAndReopen(options);
2257
2258 // write a bunch of kvs to the database.
2259 ASSERT_OK(Put("a", "1"));
2260 ASSERT_OK(Put("y", "1"));
2261 ASSERT_OK(Put("z", "1"));
2262 ASSERT_OK(Flush());
2263 ASSERT_OK(Put("a", "1"));
2264 ASSERT_OK(Put("z", "1"));
2265 ASSERT_OK(Put("bar", "1"));
2266 ASSERT_OK(Put("foo", "1"));
2267 ASSERT_OK(Put("foo", "2"));
2268
2269 ASSERT_OK(Put("foo", "3"));
2270 ASSERT_OK(Put("foo", "4"));
2271 ASSERT_OK(Put("foo", "5"));
2272 const Snapshot* snapshot = db_->GetSnapshot();
2273 ASSERT_OK(Put("foo", "6"));
2274
2275 std::string upper_bound = "x";
2276 Slice ub_slice(upper_bound);
2277 ReadOptions ro;
2278 ro.snapshot = snapshot;
2279 ro.iterate_upper_bound = &ub_slice;
2280
2281 Iterator* iter = NewIterator(ro);
2282 iter->SeekForPrev("goo");
2283 ASSERT_TRUE(iter->Valid());
2284 ASSERT_EQ("foo", iter->key().ToString());
2285 iter->Prev();
2286
2287 ASSERT_TRUE(iter->Valid());
2288 ASSERT_EQ("bar", iter->key().ToString());
2289
2290 delete iter;
2291 db_->ReleaseSnapshot(snapshot);
2292}
2293
2294TEST_P(DBIteratorTest, SkipStatistics) {
2295 Options options = CurrentOptions();
2296 options.statistics = rocksdb::CreateDBStatistics();
2297 DestroyAndReopen(options);
2298
2299 int skip_count = 0;
2300
2301 // write a bunch of kvs to the database.
2302 ASSERT_OK(Put("a", "1"));
2303 ASSERT_OK(Put("b", "1"));
2304 ASSERT_OK(Put("c", "1"));
2305 ASSERT_OK(Flush());
2306 ASSERT_OK(Put("d", "1"));
2307 ASSERT_OK(Put("e", "1"));
2308 ASSERT_OK(Put("f", "1"));
2309 ASSERT_OK(Put("a", "2"));
2310 ASSERT_OK(Put("b", "2"));
2311 ASSERT_OK(Flush());
2312 ASSERT_OK(Delete("d"));
2313 ASSERT_OK(Delete("e"));
2314 ASSERT_OK(Delete("f"));
2315
2316 Iterator* iter = NewIterator(ReadOptions());
2317 int count = 0;
2318 for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
2319 ASSERT_OK(iter->status());
2320 count++;
2321 }
2322 ASSERT_EQ(count, 3);
2323 delete iter;
2324 skip_count += 8; // 3 deletes + 3 original keys + 2 lower in sequence
2325 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2326
2327 iter = NewIterator(ReadOptions());
2328 count = 0;
2329 for (iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2330 ASSERT_OK(iter->status());
2331 count++;
2332 }
2333 ASSERT_EQ(count, 3);
2334 delete iter;
2335 skip_count += 8; // Same as above, but in reverse order
2336 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2337
2338 ASSERT_OK(Put("aa", "1"));
2339 ASSERT_OK(Put("ab", "1"));
2340 ASSERT_OK(Put("ac", "1"));
2341 ASSERT_OK(Put("ad", "1"));
2342 ASSERT_OK(Flush());
2343 ASSERT_OK(Delete("ab"));
2344 ASSERT_OK(Delete("ac"));
2345 ASSERT_OK(Delete("ad"));
2346
2347 ReadOptions ro;
2348 Slice prefix("b");
2349 ro.iterate_upper_bound = &prefix;
2350
2351 iter = NewIterator(ro);
2352 count = 0;
2353 for(iter->Seek("aa"); iter->Valid(); iter->Next()) {
2354 ASSERT_OK(iter->status());
2355 count++;
2356 }
2357 ASSERT_EQ(count, 1);
2358 delete iter;
2359 skip_count += 6; // 3 deletes + 3 original keys
2360 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2361
2362 iter = NewIterator(ro);
2363 count = 0;
2364 for(iter->SeekToLast(); iter->Valid(); iter->Prev()) {
2365 ASSERT_OK(iter->status());
2366 count++;
2367 }
2368 ASSERT_EQ(count, 2);
2369 delete iter;
2370 // 3 deletes + 3 original keys + lower sequence of "a"
2371 skip_count += 7;
2372 ASSERT_EQ(skip_count, TestGetTickerCount(options, NUMBER_ITER_SKIP));
2373}
2374
2375TEST_P(DBIteratorTest, SeekAfterHittingManyInternalKeys) {
2376 Options options = CurrentOptions();
2377 DestroyAndReopen(options);
2378 ReadOptions ropts;
2379 ropts.max_skippable_internal_keys = 2;
2380
2381 Put("1", "val_1");
2382 // Add more tombstones than max_skippable_internal_keys so that Next() fails.
2383 Delete("2");
2384 Delete("3");
2385 Delete("4");
2386 Delete("5");
2387 Put("6", "val_6");
2388
494da23a 2389 std::unique_ptr<Iterator> iter(NewIterator(ropts));
11fdf7f2
TL
2390 iter->SeekToFirst();
2391
2392 ASSERT_TRUE(iter->Valid());
2393 ASSERT_EQ(iter->key().ToString(), "1");
2394 ASSERT_EQ(iter->value().ToString(), "val_1");
2395
2396 // This should fail as incomplete due to too many non-visible internal keys on
2397 // the way to the next valid user key.
2398 iter->Next();
2399 ASSERT_TRUE(!iter->Valid());
2400 ASSERT_TRUE(iter->status().IsIncomplete());
2401
2402 // Get the internal key at which Next() failed.
2403 std::string prop_value;
2404 ASSERT_OK(iter->GetProperty("rocksdb.iterator.internal-key", &prop_value));
2405 ASSERT_EQ("4", prop_value);
2406
2407 // Create a new iterator to seek to the internal key.
494da23a 2408 std::unique_ptr<Iterator> iter2(NewIterator(ropts));
11fdf7f2
TL
2409 iter2->Seek(prop_value);
2410 ASSERT_TRUE(iter2->Valid());
2411 ASSERT_OK(iter2->status());
2412
2413 ASSERT_EQ(iter2->key().ToString(), "6");
2414 ASSERT_EQ(iter2->value().ToString(), "val_6");
2415}
2416
2417// Reproduces a former bug where iterator would skip some records when DBIter
2418// re-seeks subiterator with Incomplete status.
2419TEST_P(DBIteratorTest, NonBlockingIterationBugRepro) {
2420 Options options = CurrentOptions();
2421 BlockBasedTableOptions table_options;
2422 // Make sure the sst file has more than one block.
2423 table_options.flush_block_policy_factory =
2424 std::make_shared<FlushBlockEveryKeyPolicyFactory>();
2425 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
2426 DestroyAndReopen(options);
2427
2428 // Two records in sst file, each in its own block.
2429 Put("b", "");
2430 Put("d", "");
2431 Flush();
2432
2433 // Create a nonblocking iterator before writing to memtable.
2434 ReadOptions ropt;
2435 ropt.read_tier = kBlockCacheTier;
494da23a 2436 std::unique_ptr<Iterator> iter(NewIterator(ropt));
11fdf7f2
TL
2437
2438 // Overwrite a key in memtable many times to hit
2439 // max_sequential_skip_in_iterations (which is 8 by default).
2440 for (int i = 0; i < 20; ++i) {
2441 Put("c", "");
2442 }
2443
2444 // Load the second block in sst file into the block cache.
2445 {
494da23a 2446 std::unique_ptr<Iterator> iter2(NewIterator(ReadOptions()));
11fdf7f2
TL
2447 iter2->Seek("d");
2448 }
2449
2450 // Finally seek the nonblocking iterator.
2451 iter->Seek("a");
2452 // With the bug, the status used to be OK, and the iterator used to point to
2453 // "d".
2454 EXPECT_TRUE(iter->status().IsIncomplete());
2455}
2456
2457TEST_P(DBIteratorTest, SeekBackwardAfterOutOfUpperBound) {
2458 Put("a", "");
2459 Put("b", "");
2460 Flush();
2461
2462 ReadOptions ropt;
2463 Slice ub = "b";
2464 ropt.iterate_upper_bound = &ub;
2465
2466 std::unique_ptr<Iterator> it(dbfull()->NewIterator(ropt));
2467 it->SeekForPrev("a");
2468 ASSERT_TRUE(it->Valid());
2469 ASSERT_OK(it->status());
2470 ASSERT_EQ("a", it->key().ToString());
2471 it->Next();
2472 ASSERT_FALSE(it->Valid());
2473 ASSERT_OK(it->status());
2474 it->SeekForPrev("a");
2475 ASSERT_OK(it->status());
2476
2477 ASSERT_TRUE(it->Valid());
2478 ASSERT_EQ("a", it->key().ToString());
2479}
2480
2481INSTANTIATE_TEST_CASE_P(DBIteratorTestInstance, DBIteratorTest,
2482 testing::Values(true, false));
2483
2484// Tests how DBIter work with ReadCallback
2485class DBIteratorWithReadCallbackTest : public DBIteratorTest {};
2486
2487TEST_F(DBIteratorWithReadCallbackTest, ReadCallback) {
2488 class TestReadCallback : public ReadCallback {
2489 public:
494da23a
TL
2490 explicit TestReadCallback(SequenceNumber _max_visible_seq)
2491 : ReadCallback(_max_visible_seq) {}
11fdf7f2 2492
494da23a
TL
2493 bool IsVisibleFullCheck(SequenceNumber seq) override {
2494 return seq <= max_visible_seq_;
11fdf7f2 2495 }
11fdf7f2
TL
2496 };
2497
2498 ASSERT_OK(Put("foo", "v1"));
2499 ASSERT_OK(Put("foo", "v2"));
2500 ASSERT_OK(Put("foo", "v3"));
2501 ASSERT_OK(Put("a", "va"));
2502 ASSERT_OK(Put("z", "vz"));
2503 SequenceNumber seq1 = db_->GetLatestSequenceNumber();
2504 TestReadCallback callback1(seq1);
2505 ASSERT_OK(Put("foo", "v4"));
2506 ASSERT_OK(Put("foo", "v5"));
2507 ASSERT_OK(Put("bar", "v7"));
2508
2509 SequenceNumber seq2 = db_->GetLatestSequenceNumber();
2510 auto* cfd =
2511 reinterpret_cast<ColumnFamilyHandleImpl*>(db_->DefaultColumnFamily())
2512 ->cfd();
2513 // The iterator are suppose to see data before seq1.
2514 Iterator* iter =
2515 dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq2, &callback1);
2516
2517 // Seek
2518 // The latest value of "foo" before seq1 is "v3"
2519 iter->Seek("foo");
2520 ASSERT_TRUE(iter->Valid());
2521 ASSERT_OK(iter->status());
2522 ASSERT_EQ("foo", iter->key());
2523 ASSERT_EQ("v3", iter->value());
2524 // "bar" is not visible to the iterator. It will move on to the next key
2525 // "foo".
2526 iter->Seek("bar");
2527 ASSERT_TRUE(iter->Valid());
2528 ASSERT_OK(iter->status());
2529 ASSERT_EQ("foo", iter->key());
2530 ASSERT_EQ("v3", iter->value());
2531
2532 // Next
2533 // Seek to "a"
2534 iter->Seek("a");
2535 ASSERT_TRUE(iter->Valid());
2536 ASSERT_OK(iter->status());
2537 ASSERT_EQ("va", iter->value());
2538 // "bar" is not visible to the iterator. It will move on to the next key
2539 // "foo".
2540 iter->Next();
2541 ASSERT_TRUE(iter->Valid());
2542 ASSERT_OK(iter->status());
2543 ASSERT_EQ("foo", iter->key());
2544 ASSERT_EQ("v3", iter->value());
2545
2546 // Prev
2547 // Seek to "z"
2548 iter->Seek("z");
2549 ASSERT_TRUE(iter->Valid());
2550 ASSERT_OK(iter->status());
2551 ASSERT_EQ("vz", iter->value());
2552 // The previous key is "foo", which is visible to the iterator.
2553 iter->Prev();
2554 ASSERT_TRUE(iter->Valid());
2555 ASSERT_OK(iter->status());
2556 ASSERT_EQ("foo", iter->key());
2557 ASSERT_EQ("v3", iter->value());
2558 // "bar" is not visible to the iterator. It will move on to the next key "a".
2559 iter->Prev(); // skipping "bar"
2560 ASSERT_TRUE(iter->Valid());
2561 ASSERT_OK(iter->status());
2562 ASSERT_EQ("a", iter->key());
2563 ASSERT_EQ("va", iter->value());
2564
2565 // SeekForPrev
2566 // The previous key is "foo", which is visible to the iterator.
2567 iter->SeekForPrev("y");
2568 ASSERT_TRUE(iter->Valid());
2569 ASSERT_OK(iter->status());
2570 ASSERT_EQ("foo", iter->key());
2571 ASSERT_EQ("v3", iter->value());
2572 // "bar" is not visible to the iterator. It will move on to the next key "a".
2573 iter->SeekForPrev("bar");
2574 ASSERT_TRUE(iter->Valid());
2575 ASSERT_OK(iter->status());
2576 ASSERT_EQ("a", iter->key());
2577 ASSERT_EQ("va", iter->value());
2578
2579 delete iter;
2580
2581 // Prev beyond max_sequential_skip_in_iterations
2582 uint64_t num_versions =
2583 CurrentOptions().max_sequential_skip_in_iterations + 10;
2584 for (uint64_t i = 0; i < num_versions; i++) {
2585 ASSERT_OK(Put("bar", ToString(i)));
2586 }
2587 SequenceNumber seq3 = db_->GetLatestSequenceNumber();
2588 TestReadCallback callback2(seq3);
2589 ASSERT_OK(Put("bar", "v8"));
2590 SequenceNumber seq4 = db_->GetLatestSequenceNumber();
2591
2592 // The iterator is suppose to see data before seq3.
2593 iter = dbfull()->NewIteratorImpl(ReadOptions(), cfd, seq4, &callback2);
2594 // Seek to "z", which is visible.
2595 iter->Seek("z");
2596 ASSERT_TRUE(iter->Valid());
2597 ASSERT_OK(iter->status());
2598 ASSERT_EQ("vz", iter->value());
2599 // Previous key is "foo" and the last value "v5" is visible.
2600 iter->Prev();
2601 ASSERT_TRUE(iter->Valid());
2602 ASSERT_OK(iter->status());
2603 ASSERT_EQ("foo", iter->key());
2604 ASSERT_EQ("v5", iter->value());
2605 // Since the number of values of "bar" is more than
2606 // max_sequential_skip_in_iterations, Prev() will ultimately fallback to
2607 // seek in forward direction. Here we test the fallback seek is correct.
2608 // The last visible value should be (num_versions - 1), as "v8" is not
2609 // visible.
2610 iter->Prev();
2611 ASSERT_TRUE(iter->Valid());
2612 ASSERT_OK(iter->status());
2613 ASSERT_EQ("bar", iter->key());
2614 ASSERT_EQ(ToString(num_versions - 1), iter->value());
2615
2616 delete iter;
2617}
2618
7c673cae
FG
2619} // namespace rocksdb
2620
2621int main(int argc, char** argv) {
2622 rocksdb::port::InstallStackTraceHandler();
2623 ::testing::InitGoogleTest(&argc, argv);
2624 return RUN_ALL_TESTS();
2625}