]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/db_io_failure_test.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / rocksdb / db / db_io_failure_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "db/db_test_util.h"
11 #include "port/stack_trace.h"
12 #include "util/random.h"
13
14 namespace ROCKSDB_NAMESPACE {
15
16 class DBIOFailureTest : public DBTestBase {
17 public:
18 DBIOFailureTest()
19 : DBTestBase("/db_io_failure_test", /*env_do_fsync=*/true) {}
20 };
21
22 #ifndef ROCKSDB_LITE
23 // Check that number of files does not grow when writes are dropped
24 TEST_F(DBIOFailureTest, DropWrites) {
25 do {
26 Options options = CurrentOptions();
27 options.env = env_;
28 options.paranoid_checks = false;
29 Reopen(options);
30
31 ASSERT_OK(Put("foo", "v1"));
32 ASSERT_EQ("v1", Get("foo"));
33 Compact("a", "z");
34 const size_t num_files = CountFiles();
35 // Force out-of-space errors
36 env_->drop_writes_.store(true, std::memory_order_release);
37 env_->sleep_counter_.Reset();
38 env_->SetMockSleep();
39 for (int i = 0; i < 5; i++) {
40 if (option_config_ != kUniversalCompactionMultiLevel &&
41 option_config_ != kUniversalSubcompactions) {
42 for (int level = 0; level < dbfull()->NumberLevels(); level++) {
43 if (level > 0 && level == dbfull()->NumberLevels() - 1) {
44 break;
45 }
46 dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr,
47 true /* disallow trivial move */);
48 }
49 } else {
50 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
51 }
52 }
53
54 std::string property_value;
55 ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
56 ASSERT_EQ("5", property_value);
57
58 env_->drop_writes_.store(false, std::memory_order_release);
59 ASSERT_LT(CountFiles(), num_files + 3);
60
61 // Check that compaction attempts slept after errors
62 // TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler
63 // versions
64 ASSERT_GE(env_->sleep_counter_.Read(), 4);
65 } while (ChangeCompactOptions());
66 }
67
68 // Check background error counter bumped on flush failures.
69 TEST_F(DBIOFailureTest, DropWritesFlush) {
70 do {
71 Options options = CurrentOptions();
72 options.env = env_;
73 options.max_background_flushes = 1;
74 Reopen(options);
75
76 ASSERT_OK(Put("foo", "v1"));
77 // Force out-of-space errors
78 env_->drop_writes_.store(true, std::memory_order_release);
79
80 std::string property_value;
81 // Background error count is 0 now.
82 ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
83 ASSERT_EQ("0", property_value);
84
85 dbfull()->TEST_FlushMemTable(true);
86
87 ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
88 ASSERT_EQ("1", property_value);
89
90 env_->drop_writes_.store(false, std::memory_order_release);
91 } while (ChangeCompactOptions());
92 }
93
94 // Check that CompactRange() returns failure if there is not enough space left
95 // on device
96 TEST_F(DBIOFailureTest, NoSpaceCompactRange) {
97 do {
98 Options options = CurrentOptions();
99 options.env = env_;
100 options.disable_auto_compactions = true;
101 Reopen(options);
102
103 // generate 5 tables
104 for (int i = 0; i < 5; ++i) {
105 ASSERT_OK(Put(Key(i), Key(i) + "v"));
106 ASSERT_OK(Flush());
107 }
108
109 // Force out-of-space errors
110 env_->no_space_.store(true, std::memory_order_release);
111
112 Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
113 true /* disallow trivial move */);
114 ASSERT_TRUE(s.IsIOError());
115 ASSERT_TRUE(s.IsNoSpace());
116
117 env_->no_space_.store(false, std::memory_order_release);
118 } while (ChangeCompactOptions());
119 }
120 #endif // ROCKSDB_LITE
121
122 TEST_F(DBIOFailureTest, NonWritableFileSystem) {
123 do {
124 Options options = CurrentOptions();
125 options.write_buffer_size = 4096;
126 options.arena_block_size = 4096;
127 options.env = env_;
128 Reopen(options);
129 ASSERT_OK(Put("foo", "v1"));
130 env_->non_writeable_rate_.store(100);
131 std::string big(100000, 'x');
132 int errors = 0;
133 for (int i = 0; i < 20; i++) {
134 if (!Put("foo", big).ok()) {
135 errors++;
136 env_->SleepForMicroseconds(100000);
137 }
138 }
139 ASSERT_GT(errors, 0);
140 env_->non_writeable_rate_.store(0);
141 } while (ChangeCompactOptions());
142 }
143
144 #ifndef ROCKSDB_LITE
145 TEST_F(DBIOFailureTest, ManifestWriteError) {
146 // Test for the following problem:
147 // (a) Compaction produces file F
148 // (b) Log record containing F is written to MANIFEST file, but Sync() fails
149 // (c) GC deletes F
150 // (d) After reopening DB, reads fail since deleted F is named in log record
151
152 // We iterate twice. In the second iteration, everything is the
153 // same except the log record never makes it to the MANIFEST file.
154 for (int iter = 0; iter < 2; iter++) {
155 std::atomic<bool>* error_type = (iter == 0) ? &env_->manifest_sync_error_
156 : &env_->manifest_write_error_;
157
158 // Insert foo=>bar mapping
159 Options options = CurrentOptions();
160 options.env = env_;
161 options.create_if_missing = true;
162 options.error_if_exists = false;
163 options.paranoid_checks = true;
164 DestroyAndReopen(options);
165 ASSERT_OK(Put("foo", "bar"));
166 ASSERT_EQ("bar", Get("foo"));
167
168 // Memtable compaction (will succeed)
169 Flush();
170 ASSERT_EQ("bar", Get("foo"));
171 const int last = 2;
172 MoveFilesToLevel(2);
173 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
174
175 // Merging compaction (will fail)
176 error_type->store(true, std::memory_order_release);
177 dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail
178 ASSERT_EQ("bar", Get("foo"));
179
180 error_type->store(false, std::memory_order_release);
181
182 // Since paranoid_checks=true, writes should fail
183 ASSERT_NOK(Put("foo2", "bar2"));
184
185 // Recovery: should not lose data
186 ASSERT_EQ("bar", Get("foo"));
187
188 // Try again with paranoid_checks=false
189 Close();
190 options.paranoid_checks = false;
191 Reopen(options);
192
193 // Merging compaction (will fail)
194 error_type->store(true, std::memory_order_release);
195 dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail
196 ASSERT_EQ("bar", Get("foo"));
197
198 // Recovery: should not lose data
199 error_type->store(false, std::memory_order_release);
200 Reopen(options);
201 ASSERT_EQ("bar", Get("foo"));
202
203 // Since paranoid_checks=false, writes should succeed
204 ASSERT_OK(Put("foo2", "bar2"));
205 ASSERT_EQ("bar", Get("foo"));
206 ASSERT_EQ("bar2", Get("foo2"));
207 }
208 }
209
210 TEST_F(DBIOFailureTest, PutFailsParanoid) {
211 // Test the following:
212 // (a) A random put fails in paranoid mode (simulate by sync fail)
213 // (b) All other puts have to fail, even if writes would succeed
214 // (c) All of that should happen ONLY if paranoid_checks = true
215
216 Options options = CurrentOptions();
217 options.env = env_;
218 options.create_if_missing = true;
219 options.error_if_exists = false;
220 options.paranoid_checks = true;
221 DestroyAndReopen(options);
222 CreateAndReopenWithCF({"pikachu"}, options);
223 Status s;
224
225 ASSERT_OK(Put(1, "foo", "bar"));
226 ASSERT_OK(Put(1, "foo1", "bar1"));
227 // simulate error
228 env_->log_write_error_.store(true, std::memory_order_release);
229 s = Put(1, "foo2", "bar2");
230 ASSERT_TRUE(!s.ok());
231 env_->log_write_error_.store(false, std::memory_order_release);
232 s = Put(1, "foo3", "bar3");
233 // the next put should fail, too
234 ASSERT_TRUE(!s.ok());
235 // but we're still able to read
236 ASSERT_EQ("bar", Get(1, "foo"));
237
238 // do the same thing with paranoid checks off
239 options.paranoid_checks = false;
240 DestroyAndReopen(options);
241 CreateAndReopenWithCF({"pikachu"}, options);
242
243 ASSERT_OK(Put(1, "foo", "bar"));
244 ASSERT_OK(Put(1, "foo1", "bar1"));
245 // simulate error
246 env_->log_write_error_.store(true, std::memory_order_release);
247 s = Put(1, "foo2", "bar2");
248 ASSERT_TRUE(!s.ok());
249 env_->log_write_error_.store(false, std::memory_order_release);
250 s = Put(1, "foo3", "bar3");
251 // the next put should NOT fail
252 ASSERT_TRUE(s.ok());
253 }
254 #if !(defined NDEBUG) || !defined(OS_WIN)
255 TEST_F(DBIOFailureTest, FlushSstRangeSyncError) {
256 Options options = CurrentOptions();
257 options.env = env_;
258 options.create_if_missing = true;
259 options.error_if_exists = false;
260 options.paranoid_checks = true;
261 options.write_buffer_size = 256 * 1024 * 1024;
262 options.writable_file_max_buffer_size = 128 * 1024;
263 options.bytes_per_sync = 128 * 1024;
264 options.level0_file_num_compaction_trigger = 4;
265 options.memtable_factory.reset(new SpecialSkipListFactory(10));
266 BlockBasedTableOptions table_options;
267 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
268 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
269
270 DestroyAndReopen(options);
271 CreateAndReopenWithCF({"pikachu"}, options);
272 Status s;
273
274 std::atomic<int> range_sync_called(0);
275 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
276 "SpecialEnv::SStableFile::RangeSync", [&](void* arg) {
277 if (range_sync_called.fetch_add(1) == 0) {
278 Status* st = static_cast<Status*>(arg);
279 *st = Status::IOError("range sync dummy error");
280 }
281 });
282 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
283
284 Random rnd(301);
285 std::string rnd_str =
286 rnd.RandomString(static_cast<int>(options.bytes_per_sync / 2));
287 std::string rnd_str_512kb = rnd.RandomString(512 * 1024);
288
289 ASSERT_OK(Put(1, "foo", "bar"));
290 // First 1MB doesn't get range synced
291 ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb));
292 ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb));
293 ASSERT_OK(Put(1, "foo1_1", rnd_str));
294 ASSERT_OK(Put(1, "foo1_2", rnd_str));
295 ASSERT_OK(Put(1, "foo1_3", rnd_str));
296 ASSERT_OK(Put(1, "foo2", "bar"));
297 ASSERT_OK(Put(1, "foo3_1", rnd_str));
298 ASSERT_OK(Put(1, "foo3_2", rnd_str));
299 ASSERT_OK(Put(1, "foo3_3", rnd_str));
300 ASSERT_OK(Put(1, "foo4", "bar"));
301 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
302
303 // Following writes should fail as flush failed.
304 ASSERT_NOK(Put(1, "foo2", "bar3"));
305 ASSERT_EQ("bar", Get(1, "foo"));
306
307 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
308 ASSERT_GE(1, range_sync_called.load());
309
310 ReopenWithColumnFamilies({"default", "pikachu"}, options);
311 ASSERT_EQ("bar", Get(1, "foo"));
312 }
313
314 TEST_F(DBIOFailureTest, CompactSstRangeSyncError) {
315 Options options = CurrentOptions();
316 options.env = env_;
317 options.create_if_missing = true;
318 options.error_if_exists = false;
319 options.paranoid_checks = true;
320 options.write_buffer_size = 256 * 1024 * 1024;
321 options.writable_file_max_buffer_size = 128 * 1024;
322 options.bytes_per_sync = 128 * 1024;
323 options.level0_file_num_compaction_trigger = 2;
324 options.target_file_size_base = 256 * 1024 * 1024;
325 options.disable_auto_compactions = true;
326 BlockBasedTableOptions table_options;
327 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
328 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
329 DestroyAndReopen(options);
330 CreateAndReopenWithCF({"pikachu"}, options);
331 Status s;
332
333 Random rnd(301);
334 std::string rnd_str =
335 rnd.RandomString(static_cast<int>(options.bytes_per_sync / 2));
336 std::string rnd_str_512kb = rnd.RandomString(512 * 1024);
337
338 ASSERT_OK(Put(1, "foo", "bar"));
339 // First 1MB doesn't get range synced
340 ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb));
341 ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb));
342 ASSERT_OK(Put(1, "foo1_1", rnd_str));
343 ASSERT_OK(Put(1, "foo1_2", rnd_str));
344 ASSERT_OK(Put(1, "foo1_3", rnd_str));
345 Flush(1);
346 ASSERT_OK(Put(1, "foo", "bar"));
347 ASSERT_OK(Put(1, "foo3_1", rnd_str));
348 ASSERT_OK(Put(1, "foo3_2", rnd_str));
349 ASSERT_OK(Put(1, "foo3_3", rnd_str));
350 ASSERT_OK(Put(1, "foo4", "bar"));
351 Flush(1);
352 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
353
354 std::atomic<int> range_sync_called(0);
355 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
356 "SpecialEnv::SStableFile::RangeSync", [&](void* arg) {
357 if (range_sync_called.fetch_add(1) == 0) {
358 Status* st = static_cast<Status*>(arg);
359 *st = Status::IOError("range sync dummy error");
360 }
361 });
362 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
363
364 ASSERT_OK(dbfull()->SetOptions(handles_[1],
365 {
366 {"disable_auto_compactions", "false"},
367 }));
368 dbfull()->TEST_WaitForCompact();
369
370 // Following writes should fail as flush failed.
371 ASSERT_NOK(Put(1, "foo2", "bar3"));
372 ASSERT_EQ("bar", Get(1, "foo"));
373
374 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
375 ASSERT_GE(1, range_sync_called.load());
376
377 ReopenWithColumnFamilies({"default", "pikachu"}, options);
378 ASSERT_EQ("bar", Get(1, "foo"));
379 }
380
381 TEST_F(DBIOFailureTest, FlushSstCloseError) {
382 Options options = CurrentOptions();
383 options.env = env_;
384 options.create_if_missing = true;
385 options.error_if_exists = false;
386 options.paranoid_checks = true;
387 options.level0_file_num_compaction_trigger = 4;
388 options.memtable_factory.reset(new SpecialSkipListFactory(2));
389
390 DestroyAndReopen(options);
391 CreateAndReopenWithCF({"pikachu"}, options);
392 Status s;
393 std::atomic<int> close_called(0);
394 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
395 "SpecialEnv::SStableFile::Close", [&](void* arg) {
396 if (close_called.fetch_add(1) == 0) {
397 Status* st = static_cast<Status*>(arg);
398 *st = Status::IOError("close dummy error");
399 }
400 });
401
402 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
403
404 ASSERT_OK(Put(1, "foo", "bar"));
405 ASSERT_OK(Put(1, "foo1", "bar1"));
406 ASSERT_OK(Put(1, "foo", "bar2"));
407 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
408
409 // Following writes should fail as flush failed.
410 ASSERT_NOK(Put(1, "foo2", "bar3"));
411 ASSERT_EQ("bar2", Get(1, "foo"));
412 ASSERT_EQ("bar1", Get(1, "foo1"));
413
414 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
415
416 ReopenWithColumnFamilies({"default", "pikachu"}, options);
417 ASSERT_EQ("bar2", Get(1, "foo"));
418 ASSERT_EQ("bar1", Get(1, "foo1"));
419 }
420
421 TEST_F(DBIOFailureTest, CompactionSstCloseError) {
422 Options options = CurrentOptions();
423 options.env = env_;
424 options.create_if_missing = true;
425 options.error_if_exists = false;
426 options.paranoid_checks = true;
427 options.level0_file_num_compaction_trigger = 2;
428 options.disable_auto_compactions = true;
429
430 DestroyAndReopen(options);
431 CreateAndReopenWithCF({"pikachu"}, options);
432 Status s;
433
434 ASSERT_OK(Put(1, "foo", "bar"));
435 ASSERT_OK(Put(1, "foo2", "bar"));
436 Flush(1);
437 ASSERT_OK(Put(1, "foo", "bar2"));
438 ASSERT_OK(Put(1, "foo2", "bar"));
439 Flush(1);
440 ASSERT_OK(Put(1, "foo", "bar3"));
441 ASSERT_OK(Put(1, "foo2", "bar"));
442 Flush(1);
443 dbfull()->TEST_WaitForCompact();
444
445 std::atomic<int> close_called(0);
446 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
447 "SpecialEnv::SStableFile::Close", [&](void* arg) {
448 if (close_called.fetch_add(1) == 0) {
449 Status* st = static_cast<Status*>(arg);
450 *st = Status::IOError("close dummy error");
451 }
452 });
453
454 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
455 ASSERT_OK(dbfull()->SetOptions(handles_[1],
456 {
457 {"disable_auto_compactions", "false"},
458 }));
459 dbfull()->TEST_WaitForCompact();
460
461 // Following writes should fail as compaction failed.
462 ASSERT_NOK(Put(1, "foo2", "bar3"));
463 ASSERT_EQ("bar3", Get(1, "foo"));
464
465 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
466
467 ReopenWithColumnFamilies({"default", "pikachu"}, options);
468 ASSERT_EQ("bar3", Get(1, "foo"));
469 }
470
471 TEST_F(DBIOFailureTest, FlushSstSyncError) {
472 Options options = CurrentOptions();
473 options.env = env_;
474 options.create_if_missing = true;
475 options.error_if_exists = false;
476 options.paranoid_checks = true;
477 options.use_fsync = false;
478 options.level0_file_num_compaction_trigger = 4;
479 options.memtable_factory.reset(new SpecialSkipListFactory(2));
480
481 DestroyAndReopen(options);
482 CreateAndReopenWithCF({"pikachu"}, options);
483 Status s;
484 std::atomic<int> sync_called(0);
485 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
486 "SpecialEnv::SStableFile::Sync", [&](void* arg) {
487 if (sync_called.fetch_add(1) == 0) {
488 Status* st = static_cast<Status*>(arg);
489 *st = Status::IOError("sync dummy error");
490 }
491 });
492
493 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
494
495 ASSERT_OK(Put(1, "foo", "bar"));
496 ASSERT_OK(Put(1, "foo1", "bar1"));
497 ASSERT_OK(Put(1, "foo", "bar2"));
498 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
499
500 // Following writes should fail as flush failed.
501 ASSERT_NOK(Put(1, "foo2", "bar3"));
502 ASSERT_EQ("bar2", Get(1, "foo"));
503 ASSERT_EQ("bar1", Get(1, "foo1"));
504
505 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
506
507 ReopenWithColumnFamilies({"default", "pikachu"}, options);
508 ASSERT_EQ("bar2", Get(1, "foo"));
509 ASSERT_EQ("bar1", Get(1, "foo1"));
510 }
511
512 TEST_F(DBIOFailureTest, CompactionSstSyncError) {
513 Options options = CurrentOptions();
514 options.env = env_;
515 options.create_if_missing = true;
516 options.error_if_exists = false;
517 options.paranoid_checks = true;
518 options.level0_file_num_compaction_trigger = 2;
519 options.disable_auto_compactions = true;
520 options.use_fsync = false;
521
522 DestroyAndReopen(options);
523 CreateAndReopenWithCF({"pikachu"}, options);
524 Status s;
525
526 ASSERT_OK(Put(1, "foo", "bar"));
527 ASSERT_OK(Put(1, "foo2", "bar"));
528 Flush(1);
529 ASSERT_OK(Put(1, "foo", "bar2"));
530 ASSERT_OK(Put(1, "foo2", "bar"));
531 Flush(1);
532 ASSERT_OK(Put(1, "foo", "bar3"));
533 ASSERT_OK(Put(1, "foo2", "bar"));
534 Flush(1);
535 dbfull()->TEST_WaitForCompact();
536
537 std::atomic<int> sync_called(0);
538 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->SetCallBack(
539 "SpecialEnv::SStableFile::Sync", [&](void* arg) {
540 if (sync_called.fetch_add(1) == 0) {
541 Status* st = static_cast<Status*>(arg);
542 *st = Status::IOError("close dummy error");
543 }
544 });
545
546 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
547 ASSERT_OK(dbfull()->SetOptions(handles_[1],
548 {
549 {"disable_auto_compactions", "false"},
550 }));
551 dbfull()->TEST_WaitForCompact();
552
553 // Following writes should fail as compaction failed.
554 ASSERT_NOK(Put(1, "foo2", "bar3"));
555 ASSERT_EQ("bar3", Get(1, "foo"));
556
557 ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
558
559 ReopenWithColumnFamilies({"default", "pikachu"}, options);
560 ASSERT_EQ("bar3", Get(1, "foo"));
561 }
562 #endif // !(defined NDEBUG) || !defined(OS_WIN)
563 #endif // ROCKSDB_LITE
564 } // namespace ROCKSDB_NAMESPACE
565
566 int main(int argc, char** argv) {
567 ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
568 ::testing::InitGoogleTest(&argc, argv);
569 return RUN_ALL_TESTS();
570 }