]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/db_iter_stress_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / db_iter_stress_test.cc
CommitLineData
11fdf7f2
TL
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5
6#include "db/db_iter.h"
7#include "db/dbformat.h"
8#include "rocksdb/comparator.h"
9#include "rocksdb/options.h"
10#include "rocksdb/slice.h"
f67539c2 11#include "test_util/testharness.h"
11fdf7f2
TL
12#include "util/random.h"
13#include "util/string_util.h"
11fdf7f2
TL
14#include "utilities/merge_operators.h"
15
16#ifdef GFLAGS
17
18#include "util/gflags_compat.h"
19
20using GFLAGS_NAMESPACE::ParseCommandLineFlags;
21
22DEFINE_bool(verbose, false,
23 "Print huge, detailed trace. Intended for debugging failures.");
24
25#else
26
27void ParseCommandLineFlags(int*, char***, bool) {}
28bool FLAGS_verbose = false;
29
30#endif
31
f67539c2 32namespace ROCKSDB_NAMESPACE {
11fdf7f2
TL
33
34class DBIteratorStressTest : public testing::Test {
35 public:
36 Env* env_;
37
38 DBIteratorStressTest() : env_(Env::Default()) {}
39};
40
41namespace {
42
43struct Entry {
44 std::string key;
45 ValueType type; // kTypeValue, kTypeDeletion, kTypeMerge
46 uint64_t sequence;
47 std::string ikey; // internal key, made from `key`, `sequence` and `type`
48 std::string value;
49 // If false, we'll pretend that this entry doesn't exist.
50 bool visible = true;
51
52 bool operator<(const Entry& e) const {
53 if (key != e.key) return key < e.key;
54 return std::tie(sequence, type) > std::tie(e.sequence, e.type);
55 }
56};
57
58struct Data {
59 std::vector<Entry> entries;
60
61 // Indices in `entries` with `visible` = false.
62 std::vector<size_t> hidden;
63 // Keys of entries whose `visible` changed since the last seek of iterators.
64 std::set<std::string> recently_touched_keys;
65};
66
67struct StressTestIterator : public InternalIterator {
68 Data* data;
69 Random64* rnd;
70 InternalKeyComparator cmp;
71
72 // Each operation will return error with this probability...
73 double error_probability = 0;
74 // ... and add/remove entries with this probability.
75 double mutation_probability = 0;
76 // The probability of adding vs removing entries will be chosen so that the
77 // amount of removed entries stays somewhat close to this number.
78 double target_hidden_fraction = 0;
79 // If true, print all mutations to stdout for debugging.
80 bool trace = false;
81
82 int iter = -1;
83 Status status_;
84
85 StressTestIterator(Data* _data, Random64* _rnd, const Comparator* _cmp)
86 : data(_data), rnd(_rnd), cmp(_cmp) {}
87
88 bool Valid() const override {
89 if (iter >= 0 && iter < (int)data->entries.size()) {
90 assert(status_.ok());
91 return true;
92 }
93 return false;
94 }
95
96 Status status() const override { return status_; }
97
98 bool MaybeFail() {
99 if (rnd->Next() >=
20effc67
TL
100 static_cast<double>(std::numeric_limits<uint64_t>::max()) *
101 error_probability) {
11fdf7f2
TL
102 return false;
103 }
104 if (rnd->Next() % 2) {
105 status_ = Status::Incomplete("test");
106 } else {
107 status_ = Status::IOError("test");
108 }
109 if (trace) {
110 std::cout << "injecting " << status_.ToString() << std::endl;
111 }
112 iter = -1;
113 return true;
114 }
115
116 void MaybeMutate() {
117 if (rnd->Next() >=
20effc67
TL
118 static_cast<double>(std::numeric_limits<uint64_t>::max()) *
119 mutation_probability) {
11fdf7f2
TL
120 return;
121 }
122 do {
123 // If too many entries are hidden, hide less, otherwise hide more.
124 double hide_probability =
125 data->hidden.size() > data->entries.size() * target_hidden_fraction
126 ? 1. / 3
127 : 2. / 3;
128 if (data->hidden.empty()) {
129 hide_probability = 1;
130 }
20effc67
TL
131 bool do_hide = rnd->Next() <
132 static_cast<double>(std::numeric_limits<uint64_t>::max()) *
133 hide_probability;
11fdf7f2
TL
134 if (do_hide) {
135 // Hide a random entry.
136 size_t idx = rnd->Next() % data->entries.size();
137 Entry& e = data->entries[idx];
138 if (e.visible) {
139 if (trace) {
140 std::cout << "hiding idx " << idx << std::endl;
141 }
142 e.visible = false;
143 data->hidden.push_back(idx);
144 data->recently_touched_keys.insert(e.key);
145 } else {
146 // Already hidden. Let's go unhide something instead, just because
147 // it's easy and it doesn't really matter what we do.
148 do_hide = false;
149 }
150 }
151 if (!do_hide) {
152 // Unhide a random entry.
153 size_t hi = rnd->Next() % data->hidden.size();
154 size_t idx = data->hidden[hi];
155 if (trace) {
156 std::cout << "unhiding idx " << idx << std::endl;
157 }
158 Entry& e = data->entries[idx];
159 assert(!e.visible);
160 e.visible = true;
161 data->hidden[hi] = data->hidden.back();
162 data->hidden.pop_back();
163 data->recently_touched_keys.insert(e.key);
164 }
165 } while (rnd->Next() % 3 != 0); // do 3 mutations on average
166 }
167
168 void SkipForward() {
169 while (iter < (int)data->entries.size() && !data->entries[iter].visible) {
170 ++iter;
171 }
172 }
173 void SkipBackward() {
174 while (iter >= 0 && !data->entries[iter].visible) {
175 --iter;
176 }
177 }
178
179 void SeekToFirst() override {
180 if (MaybeFail()) return;
181 MaybeMutate();
182
183 status_ = Status::OK();
184 iter = 0;
185 SkipForward();
186 }
187 void SeekToLast() override {
188 if (MaybeFail()) return;
189 MaybeMutate();
190
191 status_ = Status::OK();
192 iter = (int)data->entries.size() - 1;
193 SkipBackward();
194 }
195
196 void Seek(const Slice& target) override {
197 if (MaybeFail()) return;
198 MaybeMutate();
199
200 status_ = Status::OK();
201 // Binary search.
202 auto it = std::partition_point(
203 data->entries.begin(), data->entries.end(),
204 [&](const Entry& e) { return cmp.Compare(e.ikey, target) < 0; });
205 iter = (int)(it - data->entries.begin());
206 SkipForward();
207 }
208 void SeekForPrev(const Slice& target) override {
209 if (MaybeFail()) return;
210 MaybeMutate();
211
212 status_ = Status::OK();
213 // Binary search.
214 auto it = std::partition_point(
215 data->entries.begin(), data->entries.end(),
216 [&](const Entry& e) { return cmp.Compare(e.ikey, target) <= 0; });
217 iter = (int)(it - data->entries.begin());
218 --iter;
219 SkipBackward();
220 }
221
222 void Next() override {
223 assert(Valid());
224 if (MaybeFail()) return;
225 MaybeMutate();
226 ++iter;
227 SkipForward();
228 }
229 void Prev() override {
230 assert(Valid());
231 if (MaybeFail()) return;
232 MaybeMutate();
233 --iter;
234 SkipBackward();
235 }
236
237 Slice key() const override {
238 assert(Valid());
239 return data->entries[iter].ikey;
240 }
241 Slice value() const override {
242 assert(Valid());
243 return data->entries[iter].value;
244 }
245
246 bool IsKeyPinned() const override { return true; }
247 bool IsValuePinned() const override { return true; }
248};
249
250// A small reimplementation of DBIter, supporting only some of the features,
251// and doing everything in O(log n).
252// Skips all keys that are in recently_touched_keys.
253struct ReferenceIterator {
254 Data* data;
255 uint64_t sequence; // ignore entries with sequence number below this
256
257 bool valid = false;
258 std::string key;
259 std::string value;
260
261 ReferenceIterator(Data* _data, uint64_t _sequence)
262 : data(_data), sequence(_sequence) {}
263
264 bool Valid() const { return valid; }
265
266 // Finds the first entry with key
267 // greater/less/greater-or-equal/less-or-equal than `key`, depending on
268 // arguments: if `skip`, inequality is strict; if `forward`, it's
269 // greater/greater-or-equal, otherwise less/less-or-equal.
270 // Sets `key` to the result.
271 // If no such key exists, returns false. Doesn't check `visible`.
272 bool FindNextKey(bool skip, bool forward) {
273 valid = false;
274 auto it = std::partition_point(data->entries.begin(), data->entries.end(),
275 [&](const Entry& e) {
276 if (forward != skip) {
277 return e.key < key;
278 } else {
279 return e.key <= key;
280 }
281 });
282 if (forward) {
283 if (it != data->entries.end()) {
284 key = it->key;
285 return true;
286 }
287 } else {
288 if (it != data->entries.begin()) {
289 --it;
290 key = it->key;
291 return true;
292 }
293 }
294 return false;
295 }
296
297 bool FindValueForCurrentKey() {
298 if (data->recently_touched_keys.count(key)) {
299 return false;
300 }
301
302 // Find the first entry for the key. The caller promises that it exists.
303 auto it = std::partition_point(data->entries.begin(), data->entries.end(),
304 [&](const Entry& e) {
305 if (e.key != key) {
306 return e.key < key;
307 }
308 return e.sequence > sequence;
309 });
310
311 // Find the first visible entry.
312 for (;; ++it) {
313 if (it == data->entries.end()) {
314 return false;
315 }
316 Entry& e = *it;
317 if (e.key != key) {
318 return false;
319 }
320 assert(e.sequence <= sequence);
321 if (!e.visible) continue;
322 if (e.type == kTypeDeletion) {
323 return false;
324 }
325 if (e.type == kTypeValue) {
326 value = e.value;
327 valid = true;
328 return true;
329 }
330 assert(e.type == kTypeMerge);
331 break;
332 }
333
334 // Collect merge operands.
335 std::vector<Slice> operands;
336 for (; it != data->entries.end(); ++it) {
337 Entry& e = *it;
338 if (e.key != key) {
339 break;
340 }
341 assert(e.sequence <= sequence);
342 if (!e.visible) continue;
343 if (e.type == kTypeDeletion) {
344 break;
345 }
346 operands.push_back(e.value);
347 if (e.type == kTypeValue) {
348 break;
349 }
350 }
351
352 // Do a merge.
353 value = operands.back().ToString();
354 for (int i = (int)operands.size() - 2; i >= 0; --i) {
355 value.append(",");
356 value.append(operands[i].data(), operands[i].size());
357 }
358
359 valid = true;
360 return true;
361 }
362
363 // Start at `key` and move until we encounter a valid value.
364 // `forward` defines the direction of movement.
365 // If `skip` is true, we're looking for key not equal to `key`.
366 void DoTheThing(bool skip, bool forward) {
367 while (FindNextKey(skip, forward) && !FindValueForCurrentKey()) {
368 skip = true;
369 }
370 }
371
372 void Seek(const Slice& target) {
373 key = target.ToString();
374 DoTheThing(false, true);
375 }
376 void SeekForPrev(const Slice& target) {
377 key = target.ToString();
378 DoTheThing(false, false);
379 }
380 void SeekToFirst() { Seek(""); }
381 void SeekToLast() {
382 key = data->entries.back().key;
383 DoTheThing(false, false);
384 }
385 void Next() {
386 assert(Valid());
387 DoTheThing(true, true);
388 }
389 void Prev() {
390 assert(Valid());
391 DoTheThing(true, false);
392 }
393};
394
395} // namespace
396
397// Use an internal iterator that sometimes returns errors and sometimes
398// adds/removes entries on the fly. Do random operations on a DBIter and
399// check results.
400// TODO: can be improved for more coverage:
401// * Override IsKeyPinned() and IsValuePinned() to actually use
402// PinnedIteratorManager and check that there's no use-after free.
403// * Try different combinations of prefix_extractor, total_order_seek,
404// prefix_same_as_start, iterate_lower_bound, iterate_upper_bound.
405TEST_F(DBIteratorStressTest, StressTest) {
406 // We use a deterministic RNG, and everything happens in a single thread.
407 Random64 rnd(826909345792864532ll);
408
409 auto gen_key = [&](int max_key) {
410 assert(max_key > 0);
411 int len = 0;
412 int a = max_key;
413 while (a) {
414 a /= 10;
415 ++len;
416 }
417 std::string s = ToString(rnd.Next() % static_cast<uint64_t>(max_key));
418 s.insert(0, len - (int)s.size(), '0');
419 return s;
420 };
421
422 Options options;
423 options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
424 ReadOptions ropt;
425
426 size_t num_matching = 0;
427 size_t num_at_end = 0;
428 size_t num_not_ok = 0;
429 size_t num_recently_removed = 0;
430
431 // Number of iterations for each combination of parameters
432 // (there are ~250 of those).
433 // Tweak this to change the test run time.
434 // As of the time of writing, the test takes ~4 seconds for value of 5000.
435 const int num_iterations = 5000;
436 // Enable this to print all the operations for debugging.
437 bool trace = FLAGS_verbose;
438
439 for (int num_entries : {5, 10, 100}) {
440 for (double key_space : {0.1, 1.0, 3.0}) {
441 for (ValueType prevalent_entry_type :
442 {kTypeValue, kTypeDeletion, kTypeMerge}) {
443 for (double error_probability : {0.01, 0.1}) {
444 for (double mutation_probability : {0.01, 0.5}) {
445 for (double target_hidden_fraction : {0.1, 0.5}) {
446 std::string trace_str =
447 "entries: " + ToString(num_entries) +
448 ", key_space: " + ToString(key_space) +
449 ", error_probability: " + ToString(error_probability) +
450 ", mutation_probability: " + ToString(mutation_probability) +
451 ", target_hidden_fraction: " +
452 ToString(target_hidden_fraction);
453 SCOPED_TRACE(trace_str);
454 if (trace) {
455 std::cout << trace_str << std::endl;
456 }
457
458 // Generate data.
459 Data data;
460 int max_key = (int)(num_entries * key_space) + 1;
461 for (int i = 0; i < num_entries; ++i) {
462 Entry e;
463 e.key = gen_key(max_key);
464 if (rnd.Next() % 10 != 0) {
465 e.type = prevalent_entry_type;
466 } else {
467 const ValueType types[] = {kTypeValue, kTypeDeletion,
468 kTypeMerge};
469 e.type =
470 types[rnd.Next() % (sizeof(types) / sizeof(types[0]))];
471 }
472 e.sequence = i;
473 e.value = "v" + ToString(i);
474 ParsedInternalKey internal_key(e.key, e.sequence, e.type);
475 AppendInternalKey(&e.ikey, internal_key);
476
477 data.entries.push_back(e);
478 }
479 std::sort(data.entries.begin(), data.entries.end());
480 if (trace) {
481 std::cout << "entries:";
482 for (size_t i = 0; i < data.entries.size(); ++i) {
483 Entry& e = data.entries[i];
484 std::cout
485 << "\n idx " << i << ": \"" << e.key << "\": \""
486 << e.value << "\" seq: " << e.sequence << " type: "
487 << (e.type == kTypeValue
488 ? "val"
489 : e.type == kTypeDeletion ? "del" : "merge");
490 }
491 std::cout << std::endl;
492 }
493
494 std::unique_ptr<Iterator> db_iter;
495 std::unique_ptr<ReferenceIterator> ref_iter;
496 for (int iteration = 0; iteration < num_iterations; ++iteration) {
497 SCOPED_TRACE(iteration);
498 // Create a new iterator every ~30 operations.
499 if (db_iter == nullptr || rnd.Next() % 30 == 0) {
500 uint64_t sequence = rnd.Next() % (data.entries.size() + 2);
501 ref_iter.reset(new ReferenceIterator(&data, sequence));
502 if (trace) {
503 std::cout << "new iterator, seq: " << sequence << std::endl;
504 }
505
506 auto internal_iter =
507 new StressTestIterator(&data, &rnd, BytewiseComparator());
508 internal_iter->error_probability = error_probability;
509 internal_iter->mutation_probability = mutation_probability;
510 internal_iter->target_hidden_fraction =
511 target_hidden_fraction;
512 internal_iter->trace = trace;
513 db_iter.reset(NewDBIterator(
514 env_, ropt, ImmutableCFOptions(options),
515 MutableCFOptions(options), BytewiseComparator(),
516 internal_iter, sequence,
517 options.max_sequential_skip_in_iterations,
518 nullptr /*read_callback*/));
519 }
520
521 // Do a random operation. It's important to do it on ref_it
522 // later than on db_iter to make sure ref_it sees the correct
523 // recently_touched_keys.
524 std::string old_key;
525 bool forward = rnd.Next() % 2 > 0;
526 // Do Next()/Prev() ~90% of the time.
527 bool seek = !ref_iter->Valid() || rnd.Next() % 10 == 0;
528 if (trace) {
529 std::cout << iteration << ": ";
530 }
531
532 if (!seek) {
533 assert(db_iter->Valid());
534 old_key = ref_iter->key;
535 if (trace) {
536 std::cout << (forward ? "Next" : "Prev") << std::endl;
537 }
538
539 if (forward) {
540 db_iter->Next();
541 ref_iter->Next();
542 } else {
543 db_iter->Prev();
544 ref_iter->Prev();
545 }
546 } else {
547 data.recently_touched_keys.clear();
548 // Do SeekToFirst less often than Seek.
549 if (rnd.Next() % 4 == 0) {
550 if (trace) {
551 std::cout << (forward ? "SeekToFirst" : "SeekToLast")
552 << std::endl;
553 }
554
555 if (forward) {
556 old_key = "";
557 db_iter->SeekToFirst();
558 ref_iter->SeekToFirst();
559 } else {
560 old_key = data.entries.back().key;
561 db_iter->SeekToLast();
562 ref_iter->SeekToLast();
563 }
564 } else {
565 old_key = gen_key(max_key);
566 if (trace) {
567 std::cout << (forward ? "Seek" : "SeekForPrev") << " \""
568 << old_key << '"' << std::endl;
569 }
570 if (forward) {
571 db_iter->Seek(old_key);
572 ref_iter->Seek(old_key);
573 } else {
574 db_iter->SeekForPrev(old_key);
575 ref_iter->SeekForPrev(old_key);
576 }
577 }
578 }
579
580 // Check the result.
581 if (db_iter->Valid()) {
582 ASSERT_TRUE(db_iter->status().ok());
583 if (data.recently_touched_keys.count(
584 db_iter->key().ToString())) {
585 // Ended on a key that may have been mutated during the
586 // operation. Reference iterator skips such keys, so we
587 // can't check the exact result.
588
589 // Check that the key moved in the right direction.
590 if (forward) {
591 if (seek)
592 ASSERT_GE(db_iter->key().ToString(), old_key);
593 else
594 ASSERT_GT(db_iter->key().ToString(), old_key);
595 } else {
596 if (seek)
597 ASSERT_LE(db_iter->key().ToString(), old_key);
598 else
599 ASSERT_LT(db_iter->key().ToString(), old_key);
600 }
601
602 if (ref_iter->Valid()) {
603 // Check that DBIter didn't miss any non-mutated key.
604 if (forward) {
605 ASSERT_LT(db_iter->key().ToString(), ref_iter->key);
606 } else {
607 ASSERT_GT(db_iter->key().ToString(), ref_iter->key);
608 }
609 }
610 // Tell the next iteration of the loop to reseek the
611 // iterators.
612 ref_iter->valid = false;
613
614 ++num_recently_removed;
615 } else {
616 ASSERT_TRUE(ref_iter->Valid());
617 ASSERT_EQ(ref_iter->key, db_iter->key().ToString());
618 ASSERT_EQ(ref_iter->value, db_iter->value());
619 ++num_matching;
620 }
621 } else if (db_iter->status().ok()) {
622 ASSERT_FALSE(ref_iter->Valid());
623 ++num_at_end;
624 } else {
625 // Non-ok status. Nothing to check here.
626 // Tell the next iteration of the loop to reseek the
627 // iterators.
628 ref_iter->valid = false;
629 ++num_not_ok;
630 }
631 }
632 }
633 }
634 }
635 }
636 }
637 }
638
639 // Check that all cases were hit many times.
640 EXPECT_GT(num_matching, 10000);
641 EXPECT_GT(num_at_end, 10000);
642 EXPECT_GT(num_not_ok, 10000);
643 EXPECT_GT(num_recently_removed, 10000);
644
645 std::cout << "stats:\n exact matches: " << num_matching
646 << "\n end reached: " << num_at_end
647 << "\n non-ok status: " << num_not_ok
648 << "\n mutated on the fly: " << num_recently_removed << std::endl;
649}
650
f67539c2 651} // namespace ROCKSDB_NAMESPACE
11fdf7f2
TL
652
653int main(int argc, char** argv) {
654 ::testing::InitGoogleTest(&argc, argv);
655 ParseCommandLineFlags(&argc, &argv, true);
656 return RUN_ALL_TESTS();
657}