]> git.proxmox.com Git - ceph.git/blame - ceph/src/rocksdb/db/db_iter_stress_test.cc
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / rocksdb / db / db_iter_stress_test.cc
CommitLineData
11fdf7f2
TL
1// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2// This source code is licensed under both the GPLv2 (found in the
3// COPYING file in the root directory) and Apache 2.0 License
4// (found in the LICENSE.Apache file in the root directory).
5
6#include "db/db_iter.h"
7#include "db/dbformat.h"
8#include "rocksdb/comparator.h"
9#include "rocksdb/options.h"
10#include "rocksdb/slice.h"
f67539c2 11#include "test_util/testharness.h"
11fdf7f2
TL
12#include "util/random.h"
13#include "util/string_util.h"
11fdf7f2
TL
14#include "utilities/merge_operators.h"
15
16#ifdef GFLAGS
17
18#include "util/gflags_compat.h"
19
20using GFLAGS_NAMESPACE::ParseCommandLineFlags;
21
22DEFINE_bool(verbose, false,
23 "Print huge, detailed trace. Intended for debugging failures.");
24
25#else
26
27void ParseCommandLineFlags(int*, char***, bool) {}
28bool FLAGS_verbose = false;
29
30#endif
31
f67539c2 32namespace ROCKSDB_NAMESPACE {
11fdf7f2
TL
33
34class DBIteratorStressTest : public testing::Test {
35 public:
36 Env* env_;
37
38 DBIteratorStressTest() : env_(Env::Default()) {}
39};
40
41namespace {
42
43struct Entry {
44 std::string key;
45 ValueType type; // kTypeValue, kTypeDeletion, kTypeMerge
46 uint64_t sequence;
47 std::string ikey; // internal key, made from `key`, `sequence` and `type`
48 std::string value;
49 // If false, we'll pretend that this entry doesn't exist.
50 bool visible = true;
51
52 bool operator<(const Entry& e) const {
53 if (key != e.key) return key < e.key;
54 return std::tie(sequence, type) > std::tie(e.sequence, e.type);
55 }
56};
57
58struct Data {
59 std::vector<Entry> entries;
60
61 // Indices in `entries` with `visible` = false.
62 std::vector<size_t> hidden;
63 // Keys of entries whose `visible` changed since the last seek of iterators.
64 std::set<std::string> recently_touched_keys;
65};
66
67struct StressTestIterator : public InternalIterator {
68 Data* data;
69 Random64* rnd;
70 InternalKeyComparator cmp;
71
72 // Each operation will return error with this probability...
73 double error_probability = 0;
74 // ... and add/remove entries with this probability.
75 double mutation_probability = 0;
76 // The probability of adding vs removing entries will be chosen so that the
77 // amount of removed entries stays somewhat close to this number.
78 double target_hidden_fraction = 0;
79 // If true, print all mutations to stdout for debugging.
80 bool trace = false;
81
82 int iter = -1;
83 Status status_;
84
85 StressTestIterator(Data* _data, Random64* _rnd, const Comparator* _cmp)
86 : data(_data), rnd(_rnd), cmp(_cmp) {}
87
88 bool Valid() const override {
89 if (iter >= 0 && iter < (int)data->entries.size()) {
90 assert(status_.ok());
91 return true;
92 }
93 return false;
94 }
95
96 Status status() const override { return status_; }
97
98 bool MaybeFail() {
99 if (rnd->Next() >=
100 std::numeric_limits<uint64_t>::max() * error_probability) {
101 return false;
102 }
103 if (rnd->Next() % 2) {
104 status_ = Status::Incomplete("test");
105 } else {
106 status_ = Status::IOError("test");
107 }
108 if (trace) {
109 std::cout << "injecting " << status_.ToString() << std::endl;
110 }
111 iter = -1;
112 return true;
113 }
114
115 void MaybeMutate() {
116 if (rnd->Next() >=
117 std::numeric_limits<uint64_t>::max() * mutation_probability) {
118 return;
119 }
120 do {
121 // If too many entries are hidden, hide less, otherwise hide more.
122 double hide_probability =
123 data->hidden.size() > data->entries.size() * target_hidden_fraction
124 ? 1. / 3
125 : 2. / 3;
126 if (data->hidden.empty()) {
127 hide_probability = 1;
128 }
129 bool do_hide =
130 rnd->Next() < std::numeric_limits<uint64_t>::max() * hide_probability;
131 if (do_hide) {
132 // Hide a random entry.
133 size_t idx = rnd->Next() % data->entries.size();
134 Entry& e = data->entries[idx];
135 if (e.visible) {
136 if (trace) {
137 std::cout << "hiding idx " << idx << std::endl;
138 }
139 e.visible = false;
140 data->hidden.push_back(idx);
141 data->recently_touched_keys.insert(e.key);
142 } else {
143 // Already hidden. Let's go unhide something instead, just because
144 // it's easy and it doesn't really matter what we do.
145 do_hide = false;
146 }
147 }
148 if (!do_hide) {
149 // Unhide a random entry.
150 size_t hi = rnd->Next() % data->hidden.size();
151 size_t idx = data->hidden[hi];
152 if (trace) {
153 std::cout << "unhiding idx " << idx << std::endl;
154 }
155 Entry& e = data->entries[idx];
156 assert(!e.visible);
157 e.visible = true;
158 data->hidden[hi] = data->hidden.back();
159 data->hidden.pop_back();
160 data->recently_touched_keys.insert(e.key);
161 }
162 } while (rnd->Next() % 3 != 0); // do 3 mutations on average
163 }
164
165 void SkipForward() {
166 while (iter < (int)data->entries.size() && !data->entries[iter].visible) {
167 ++iter;
168 }
169 }
170 void SkipBackward() {
171 while (iter >= 0 && !data->entries[iter].visible) {
172 --iter;
173 }
174 }
175
176 void SeekToFirst() override {
177 if (MaybeFail()) return;
178 MaybeMutate();
179
180 status_ = Status::OK();
181 iter = 0;
182 SkipForward();
183 }
184 void SeekToLast() override {
185 if (MaybeFail()) return;
186 MaybeMutate();
187
188 status_ = Status::OK();
189 iter = (int)data->entries.size() - 1;
190 SkipBackward();
191 }
192
193 void Seek(const Slice& target) override {
194 if (MaybeFail()) return;
195 MaybeMutate();
196
197 status_ = Status::OK();
198 // Binary search.
199 auto it = std::partition_point(
200 data->entries.begin(), data->entries.end(),
201 [&](const Entry& e) { return cmp.Compare(e.ikey, target) < 0; });
202 iter = (int)(it - data->entries.begin());
203 SkipForward();
204 }
205 void SeekForPrev(const Slice& target) override {
206 if (MaybeFail()) return;
207 MaybeMutate();
208
209 status_ = Status::OK();
210 // Binary search.
211 auto it = std::partition_point(
212 data->entries.begin(), data->entries.end(),
213 [&](const Entry& e) { return cmp.Compare(e.ikey, target) <= 0; });
214 iter = (int)(it - data->entries.begin());
215 --iter;
216 SkipBackward();
217 }
218
219 void Next() override {
220 assert(Valid());
221 if (MaybeFail()) return;
222 MaybeMutate();
223 ++iter;
224 SkipForward();
225 }
226 void Prev() override {
227 assert(Valid());
228 if (MaybeFail()) return;
229 MaybeMutate();
230 --iter;
231 SkipBackward();
232 }
233
234 Slice key() const override {
235 assert(Valid());
236 return data->entries[iter].ikey;
237 }
238 Slice value() const override {
239 assert(Valid());
240 return data->entries[iter].value;
241 }
242
243 bool IsKeyPinned() const override { return true; }
244 bool IsValuePinned() const override { return true; }
245};
246
247// A small reimplementation of DBIter, supporting only some of the features,
248// and doing everything in O(log n).
249// Skips all keys that are in recently_touched_keys.
250struct ReferenceIterator {
251 Data* data;
252 uint64_t sequence; // ignore entries with sequence number below this
253
254 bool valid = false;
255 std::string key;
256 std::string value;
257
258 ReferenceIterator(Data* _data, uint64_t _sequence)
259 : data(_data), sequence(_sequence) {}
260
261 bool Valid() const { return valid; }
262
263 // Finds the first entry with key
264 // greater/less/greater-or-equal/less-or-equal than `key`, depending on
265 // arguments: if `skip`, inequality is strict; if `forward`, it's
266 // greater/greater-or-equal, otherwise less/less-or-equal.
267 // Sets `key` to the result.
268 // If no such key exists, returns false. Doesn't check `visible`.
269 bool FindNextKey(bool skip, bool forward) {
270 valid = false;
271 auto it = std::partition_point(data->entries.begin(), data->entries.end(),
272 [&](const Entry& e) {
273 if (forward != skip) {
274 return e.key < key;
275 } else {
276 return e.key <= key;
277 }
278 });
279 if (forward) {
280 if (it != data->entries.end()) {
281 key = it->key;
282 return true;
283 }
284 } else {
285 if (it != data->entries.begin()) {
286 --it;
287 key = it->key;
288 return true;
289 }
290 }
291 return false;
292 }
293
294 bool FindValueForCurrentKey() {
295 if (data->recently_touched_keys.count(key)) {
296 return false;
297 }
298
299 // Find the first entry for the key. The caller promises that it exists.
300 auto it = std::partition_point(data->entries.begin(), data->entries.end(),
301 [&](const Entry& e) {
302 if (e.key != key) {
303 return e.key < key;
304 }
305 return e.sequence > sequence;
306 });
307
308 // Find the first visible entry.
309 for (;; ++it) {
310 if (it == data->entries.end()) {
311 return false;
312 }
313 Entry& e = *it;
314 if (e.key != key) {
315 return false;
316 }
317 assert(e.sequence <= sequence);
318 if (!e.visible) continue;
319 if (e.type == kTypeDeletion) {
320 return false;
321 }
322 if (e.type == kTypeValue) {
323 value = e.value;
324 valid = true;
325 return true;
326 }
327 assert(e.type == kTypeMerge);
328 break;
329 }
330
331 // Collect merge operands.
332 std::vector<Slice> operands;
333 for (; it != data->entries.end(); ++it) {
334 Entry& e = *it;
335 if (e.key != key) {
336 break;
337 }
338 assert(e.sequence <= sequence);
339 if (!e.visible) continue;
340 if (e.type == kTypeDeletion) {
341 break;
342 }
343 operands.push_back(e.value);
344 if (e.type == kTypeValue) {
345 break;
346 }
347 }
348
349 // Do a merge.
350 value = operands.back().ToString();
351 for (int i = (int)operands.size() - 2; i >= 0; --i) {
352 value.append(",");
353 value.append(operands[i].data(), operands[i].size());
354 }
355
356 valid = true;
357 return true;
358 }
359
360 // Start at `key` and move until we encounter a valid value.
361 // `forward` defines the direction of movement.
362 // If `skip` is true, we're looking for key not equal to `key`.
363 void DoTheThing(bool skip, bool forward) {
364 while (FindNextKey(skip, forward) && !FindValueForCurrentKey()) {
365 skip = true;
366 }
367 }
368
369 void Seek(const Slice& target) {
370 key = target.ToString();
371 DoTheThing(false, true);
372 }
373 void SeekForPrev(const Slice& target) {
374 key = target.ToString();
375 DoTheThing(false, false);
376 }
377 void SeekToFirst() { Seek(""); }
378 void SeekToLast() {
379 key = data->entries.back().key;
380 DoTheThing(false, false);
381 }
382 void Next() {
383 assert(Valid());
384 DoTheThing(true, true);
385 }
386 void Prev() {
387 assert(Valid());
388 DoTheThing(true, false);
389 }
390};
391
392} // namespace
393
394// Use an internal iterator that sometimes returns errors and sometimes
395// adds/removes entries on the fly. Do random operations on a DBIter and
396// check results.
397// TODO: can be improved for more coverage:
398// * Override IsKeyPinned() and IsValuePinned() to actually use
399// PinnedIteratorManager and check that there's no use-after free.
400// * Try different combinations of prefix_extractor, total_order_seek,
401// prefix_same_as_start, iterate_lower_bound, iterate_upper_bound.
402TEST_F(DBIteratorStressTest, StressTest) {
403 // We use a deterministic RNG, and everything happens in a single thread.
404 Random64 rnd(826909345792864532ll);
405
406 auto gen_key = [&](int max_key) {
407 assert(max_key > 0);
408 int len = 0;
409 int a = max_key;
410 while (a) {
411 a /= 10;
412 ++len;
413 }
414 std::string s = ToString(rnd.Next() % static_cast<uint64_t>(max_key));
415 s.insert(0, len - (int)s.size(), '0');
416 return s;
417 };
418
419 Options options;
420 options.merge_operator = MergeOperators::CreateFromStringId("stringappend");
421 ReadOptions ropt;
422
423 size_t num_matching = 0;
424 size_t num_at_end = 0;
425 size_t num_not_ok = 0;
426 size_t num_recently_removed = 0;
427
428 // Number of iterations for each combination of parameters
429 // (there are ~250 of those).
430 // Tweak this to change the test run time.
431 // As of the time of writing, the test takes ~4 seconds for value of 5000.
432 const int num_iterations = 5000;
433 // Enable this to print all the operations for debugging.
434 bool trace = FLAGS_verbose;
435
436 for (int num_entries : {5, 10, 100}) {
437 for (double key_space : {0.1, 1.0, 3.0}) {
438 for (ValueType prevalent_entry_type :
439 {kTypeValue, kTypeDeletion, kTypeMerge}) {
440 for (double error_probability : {0.01, 0.1}) {
441 for (double mutation_probability : {0.01, 0.5}) {
442 for (double target_hidden_fraction : {0.1, 0.5}) {
443 std::string trace_str =
444 "entries: " + ToString(num_entries) +
445 ", key_space: " + ToString(key_space) +
446 ", error_probability: " + ToString(error_probability) +
447 ", mutation_probability: " + ToString(mutation_probability) +
448 ", target_hidden_fraction: " +
449 ToString(target_hidden_fraction);
450 SCOPED_TRACE(trace_str);
451 if (trace) {
452 std::cout << trace_str << std::endl;
453 }
454
455 // Generate data.
456 Data data;
457 int max_key = (int)(num_entries * key_space) + 1;
458 for (int i = 0; i < num_entries; ++i) {
459 Entry e;
460 e.key = gen_key(max_key);
461 if (rnd.Next() % 10 != 0) {
462 e.type = prevalent_entry_type;
463 } else {
464 const ValueType types[] = {kTypeValue, kTypeDeletion,
465 kTypeMerge};
466 e.type =
467 types[rnd.Next() % (sizeof(types) / sizeof(types[0]))];
468 }
469 e.sequence = i;
470 e.value = "v" + ToString(i);
471 ParsedInternalKey internal_key(e.key, e.sequence, e.type);
472 AppendInternalKey(&e.ikey, internal_key);
473
474 data.entries.push_back(e);
475 }
476 std::sort(data.entries.begin(), data.entries.end());
477 if (trace) {
478 std::cout << "entries:";
479 for (size_t i = 0; i < data.entries.size(); ++i) {
480 Entry& e = data.entries[i];
481 std::cout
482 << "\n idx " << i << ": \"" << e.key << "\": \""
483 << e.value << "\" seq: " << e.sequence << " type: "
484 << (e.type == kTypeValue
485 ? "val"
486 : e.type == kTypeDeletion ? "del" : "merge");
487 }
488 std::cout << std::endl;
489 }
490
491 std::unique_ptr<Iterator> db_iter;
492 std::unique_ptr<ReferenceIterator> ref_iter;
493 for (int iteration = 0; iteration < num_iterations; ++iteration) {
494 SCOPED_TRACE(iteration);
495 // Create a new iterator every ~30 operations.
496 if (db_iter == nullptr || rnd.Next() % 30 == 0) {
497 uint64_t sequence = rnd.Next() % (data.entries.size() + 2);
498 ref_iter.reset(new ReferenceIterator(&data, sequence));
499 if (trace) {
500 std::cout << "new iterator, seq: " << sequence << std::endl;
501 }
502
503 auto internal_iter =
504 new StressTestIterator(&data, &rnd, BytewiseComparator());
505 internal_iter->error_probability = error_probability;
506 internal_iter->mutation_probability = mutation_probability;
507 internal_iter->target_hidden_fraction =
508 target_hidden_fraction;
509 internal_iter->trace = trace;
510 db_iter.reset(NewDBIterator(
511 env_, ropt, ImmutableCFOptions(options),
512 MutableCFOptions(options), BytewiseComparator(),
513 internal_iter, sequence,
514 options.max_sequential_skip_in_iterations,
515 nullptr /*read_callback*/));
516 }
517
518 // Do a random operation. It's important to do it on ref_it
519 // later than on db_iter to make sure ref_it sees the correct
520 // recently_touched_keys.
521 std::string old_key;
522 bool forward = rnd.Next() % 2 > 0;
523 // Do Next()/Prev() ~90% of the time.
524 bool seek = !ref_iter->Valid() || rnd.Next() % 10 == 0;
525 if (trace) {
526 std::cout << iteration << ": ";
527 }
528
529 if (!seek) {
530 assert(db_iter->Valid());
531 old_key = ref_iter->key;
532 if (trace) {
533 std::cout << (forward ? "Next" : "Prev") << std::endl;
534 }
535
536 if (forward) {
537 db_iter->Next();
538 ref_iter->Next();
539 } else {
540 db_iter->Prev();
541 ref_iter->Prev();
542 }
543 } else {
544 data.recently_touched_keys.clear();
545 // Do SeekToFirst less often than Seek.
546 if (rnd.Next() % 4 == 0) {
547 if (trace) {
548 std::cout << (forward ? "SeekToFirst" : "SeekToLast")
549 << std::endl;
550 }
551
552 if (forward) {
553 old_key = "";
554 db_iter->SeekToFirst();
555 ref_iter->SeekToFirst();
556 } else {
557 old_key = data.entries.back().key;
558 db_iter->SeekToLast();
559 ref_iter->SeekToLast();
560 }
561 } else {
562 old_key = gen_key(max_key);
563 if (trace) {
564 std::cout << (forward ? "Seek" : "SeekForPrev") << " \""
565 << old_key << '"' << std::endl;
566 }
567 if (forward) {
568 db_iter->Seek(old_key);
569 ref_iter->Seek(old_key);
570 } else {
571 db_iter->SeekForPrev(old_key);
572 ref_iter->SeekForPrev(old_key);
573 }
574 }
575 }
576
577 // Check the result.
578 if (db_iter->Valid()) {
579 ASSERT_TRUE(db_iter->status().ok());
580 if (data.recently_touched_keys.count(
581 db_iter->key().ToString())) {
582 // Ended on a key that may have been mutated during the
583 // operation. Reference iterator skips such keys, so we
584 // can't check the exact result.
585
586 // Check that the key moved in the right direction.
587 if (forward) {
588 if (seek)
589 ASSERT_GE(db_iter->key().ToString(), old_key);
590 else
591 ASSERT_GT(db_iter->key().ToString(), old_key);
592 } else {
593 if (seek)
594 ASSERT_LE(db_iter->key().ToString(), old_key);
595 else
596 ASSERT_LT(db_iter->key().ToString(), old_key);
597 }
598
599 if (ref_iter->Valid()) {
600 // Check that DBIter didn't miss any non-mutated key.
601 if (forward) {
602 ASSERT_LT(db_iter->key().ToString(), ref_iter->key);
603 } else {
604 ASSERT_GT(db_iter->key().ToString(), ref_iter->key);
605 }
606 }
607 // Tell the next iteration of the loop to reseek the
608 // iterators.
609 ref_iter->valid = false;
610
611 ++num_recently_removed;
612 } else {
613 ASSERT_TRUE(ref_iter->Valid());
614 ASSERT_EQ(ref_iter->key, db_iter->key().ToString());
615 ASSERT_EQ(ref_iter->value, db_iter->value());
616 ++num_matching;
617 }
618 } else if (db_iter->status().ok()) {
619 ASSERT_FALSE(ref_iter->Valid());
620 ++num_at_end;
621 } else {
622 // Non-ok status. Nothing to check here.
623 // Tell the next iteration of the loop to reseek the
624 // iterators.
625 ref_iter->valid = false;
626 ++num_not_ok;
627 }
628 }
629 }
630 }
631 }
632 }
633 }
634 }
635
636 // Check that all cases were hit many times.
637 EXPECT_GT(num_matching, 10000);
638 EXPECT_GT(num_at_end, 10000);
639 EXPECT_GT(num_not_ok, 10000);
640 EXPECT_GT(num_recently_removed, 10000);
641
642 std::cout << "stats:\n exact matches: " << num_matching
643 << "\n end reached: " << num_at_end
644 << "\n non-ok status: " << num_not_ok
645 << "\n mutated on the fly: " << num_recently_removed << std::endl;
646}
647
f67539c2 648} // namespace ROCKSDB_NAMESPACE
11fdf7f2
TL
649
650int main(int argc, char** argv) {
651 ::testing::InitGoogleTest(&argc, argv);
652 ParseCommandLineFlags(&argc, &argv, true);
653 return RUN_ALL_TESTS();
654}