]> git.proxmox.com Git - ceph.git/blob - ceph/src/rocksdb/db/db_io_failure_test.cc
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / rocksdb / db / db_io_failure_test.cc
1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9
10 #include "db/db_test_util.h"
11 #include "port/stack_trace.h"
12
13 namespace rocksdb {
14
15 class DBIOFailureTest : public DBTestBase {
16 public:
17 DBIOFailureTest() : DBTestBase("/db_io_failure_test") {}
18 };
19
20 #ifndef ROCKSDB_LITE
21 // Check that number of files does not grow when writes are dropped
22 TEST_F(DBIOFailureTest, DropWrites) {
23 do {
24 Options options = CurrentOptions();
25 options.env = env_;
26 options.paranoid_checks = false;
27 Reopen(options);
28
29 ASSERT_OK(Put("foo", "v1"));
30 ASSERT_EQ("v1", Get("foo"));
31 Compact("a", "z");
32 const size_t num_files = CountFiles();
33 // Force out-of-space errors
34 env_->drop_writes_.store(true, std::memory_order_release);
35 env_->sleep_counter_.Reset();
36 env_->no_slowdown_ = true;
37 for (int i = 0; i < 5; i++) {
38 if (option_config_ != kUniversalCompactionMultiLevel &&
39 option_config_ != kUniversalSubcompactions) {
40 for (int level = 0; level < dbfull()->NumberLevels(); level++) {
41 if (level > 0 && level == dbfull()->NumberLevels() - 1) {
42 break;
43 }
44 dbfull()->TEST_CompactRange(level, nullptr, nullptr, nullptr,
45 true /* disallow trivial move */);
46 }
47 } else {
48 dbfull()->CompactRange(CompactRangeOptions(), nullptr, nullptr);
49 }
50 }
51
52 std::string property_value;
53 ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
54 ASSERT_EQ("5", property_value);
55
56 env_->drop_writes_.store(false, std::memory_order_release);
57 ASSERT_LT(CountFiles(), num_files + 3);
58
59 // Check that compaction attempts slept after errors
60 // TODO @krad: Figure out why ASSERT_EQ 5 keeps failing in certain compiler
61 // versions
62 ASSERT_GE(env_->sleep_counter_.Read(), 4);
63 } while (ChangeCompactOptions());
64 }
65
66 // Check background error counter bumped on flush failures.
67 TEST_F(DBIOFailureTest, DropWritesFlush) {
68 do {
69 Options options = CurrentOptions();
70 options.env = env_;
71 options.max_background_flushes = 1;
72 Reopen(options);
73
74 ASSERT_OK(Put("foo", "v1"));
75 // Force out-of-space errors
76 env_->drop_writes_.store(true, std::memory_order_release);
77
78 std::string property_value;
79 // Background error count is 0 now.
80 ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
81 ASSERT_EQ("0", property_value);
82
83 dbfull()->TEST_FlushMemTable(true);
84
85 ASSERT_TRUE(db_->GetProperty("rocksdb.background-errors", &property_value));
86 ASSERT_EQ("1", property_value);
87
88 env_->drop_writes_.store(false, std::memory_order_release);
89 } while (ChangeCompactOptions());
90 }
91 #endif // ROCKSDB_LITE
92
93 // Check that CompactRange() returns failure if there is not enough space left
94 // on device
95 TEST_F(DBIOFailureTest, NoSpaceCompactRange) {
96 do {
97 Options options = CurrentOptions();
98 options.env = env_;
99 options.disable_auto_compactions = true;
100 Reopen(options);
101
102 // generate 5 tables
103 for (int i = 0; i < 5; ++i) {
104 ASSERT_OK(Put(Key(i), Key(i) + "v"));
105 ASSERT_OK(Flush());
106 }
107
108 // Force out-of-space errors
109 env_->no_space_.store(true, std::memory_order_release);
110
111 Status s = dbfull()->TEST_CompactRange(0, nullptr, nullptr, nullptr,
112 true /* disallow trivial move */);
113 ASSERT_TRUE(s.IsIOError());
114 ASSERT_TRUE(s.IsNoSpace());
115
116 env_->no_space_.store(false, std::memory_order_release);
117 } while (ChangeCompactOptions());
118 }
119
120 TEST_F(DBIOFailureTest, NonWritableFileSystem) {
121 do {
122 Options options = CurrentOptions();
123 options.write_buffer_size = 4096;
124 options.arena_block_size = 4096;
125 options.env = env_;
126 Reopen(options);
127 ASSERT_OK(Put("foo", "v1"));
128 env_->non_writeable_rate_.store(100);
129 std::string big(100000, 'x');
130 int errors = 0;
131 for (int i = 0; i < 20; i++) {
132 if (!Put("foo", big).ok()) {
133 errors++;
134 env_->SleepForMicroseconds(100000);
135 }
136 }
137 ASSERT_GT(errors, 0);
138 env_->non_writeable_rate_.store(0);
139 } while (ChangeCompactOptions());
140 }
141
142 #ifndef ROCKSDB_LITE
143 TEST_F(DBIOFailureTest, ManifestWriteError) {
144 // Test for the following problem:
145 // (a) Compaction produces file F
146 // (b) Log record containing F is written to MANIFEST file, but Sync() fails
147 // (c) GC deletes F
148 // (d) After reopening DB, reads fail since deleted F is named in log record
149
150 // We iterate twice. In the second iteration, everything is the
151 // same except the log record never makes it to the MANIFEST file.
152 for (int iter = 0; iter < 2; iter++) {
153 std::atomic<bool>* error_type = (iter == 0) ? &env_->manifest_sync_error_
154 : &env_->manifest_write_error_;
155
156 // Insert foo=>bar mapping
157 Options options = CurrentOptions();
158 options.env = env_;
159 options.create_if_missing = true;
160 options.error_if_exists = false;
161 options.paranoid_checks = true;
162 DestroyAndReopen(options);
163 ASSERT_OK(Put("foo", "bar"));
164 ASSERT_EQ("bar", Get("foo"));
165
166 // Memtable compaction (will succeed)
167 Flush();
168 ASSERT_EQ("bar", Get("foo"));
169 const int last = 2;
170 MoveFilesToLevel(2);
171 ASSERT_EQ(NumTableFilesAtLevel(last), 1); // foo=>bar is now in last level
172
173 // Merging compaction (will fail)
174 error_type->store(true, std::memory_order_release);
175 dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail
176 ASSERT_EQ("bar", Get("foo"));
177
178 error_type->store(false, std::memory_order_release);
179
180 // Since paranoid_checks=true, writes should fail
181 ASSERT_NOK(Put("foo2", "bar2"));
182
183 // Recovery: should not lose data
184 ASSERT_EQ("bar", Get("foo"));
185
186 // Try again with paranoid_checks=false
187 Close();
188 options.paranoid_checks = false;
189 Reopen(options);
190
191 // Merging compaction (will fail)
192 error_type->store(true, std::memory_order_release);
193 dbfull()->TEST_CompactRange(last, nullptr, nullptr); // Should fail
194 ASSERT_EQ("bar", Get("foo"));
195
196 // Recovery: should not lose data
197 error_type->store(false, std::memory_order_release);
198 Reopen(options);
199 ASSERT_EQ("bar", Get("foo"));
200
201 // Since paranoid_checks=false, writes should succeed
202 ASSERT_OK(Put("foo2", "bar2"));
203 ASSERT_EQ("bar", Get("foo"));
204 ASSERT_EQ("bar2", Get("foo2"));
205 }
206 }
207
208 TEST_F(DBIOFailureTest, PutFailsParanoid) {
209 // Test the following:
210 // (a) A random put fails in paranoid mode (simulate by sync fail)
211 // (b) All other puts have to fail, even if writes would succeed
212 // (c) All of that should happen ONLY if paranoid_checks = true
213
214 Options options = CurrentOptions();
215 options.env = env_;
216 options.create_if_missing = true;
217 options.error_if_exists = false;
218 options.paranoid_checks = true;
219 DestroyAndReopen(options);
220 CreateAndReopenWithCF({"pikachu"}, options);
221 Status s;
222
223 ASSERT_OK(Put(1, "foo", "bar"));
224 ASSERT_OK(Put(1, "foo1", "bar1"));
225 // simulate error
226 env_->log_write_error_.store(true, std::memory_order_release);
227 s = Put(1, "foo2", "bar2");
228 ASSERT_TRUE(!s.ok());
229 env_->log_write_error_.store(false, std::memory_order_release);
230 s = Put(1, "foo3", "bar3");
231 // the next put should fail, too
232 ASSERT_TRUE(!s.ok());
233 // but we're still able to read
234 ASSERT_EQ("bar", Get(1, "foo"));
235
236 // do the same thing with paranoid checks off
237 options.paranoid_checks = false;
238 DestroyAndReopen(options);
239 CreateAndReopenWithCF({"pikachu"}, options);
240
241 ASSERT_OK(Put(1, "foo", "bar"));
242 ASSERT_OK(Put(1, "foo1", "bar1"));
243 // simulate error
244 env_->log_write_error_.store(true, std::memory_order_release);
245 s = Put(1, "foo2", "bar2");
246 ASSERT_TRUE(!s.ok());
247 env_->log_write_error_.store(false, std::memory_order_release);
248 s = Put(1, "foo3", "bar3");
249 // the next put should NOT fail
250 ASSERT_TRUE(s.ok());
251 }
252 #if !(defined NDEBUG) || !defined(OS_WIN)
253 TEST_F(DBIOFailureTest, FlushSstRangeSyncError) {
254 Options options = CurrentOptions();
255 options.env = env_;
256 options.create_if_missing = true;
257 options.error_if_exists = false;
258 options.paranoid_checks = true;
259 options.write_buffer_size = 256 * 1024 * 1024;
260 options.writable_file_max_buffer_size = 128 * 1024;
261 options.bytes_per_sync = 128 * 1024;
262 options.level0_file_num_compaction_trigger = 4;
263 options.memtable_factory.reset(new SpecialSkipListFactory(10));
264 BlockBasedTableOptions table_options;
265 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
266 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
267
268 DestroyAndReopen(options);
269 CreateAndReopenWithCF({"pikachu"}, options);
270 Status s;
271
272 std::atomic<int> range_sync_called(0);
273 rocksdb::SyncPoint::GetInstance()->SetCallBack(
274 "SpecialEnv::SStableFile::RangeSync", [&](void* arg) {
275 if (range_sync_called.fetch_add(1) == 0) {
276 Status* st = static_cast<Status*>(arg);
277 *st = Status::IOError("range sync dummy error");
278 }
279 });
280 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
281
282 Random rnd(301);
283 std::string rnd_str =
284 RandomString(&rnd, static_cast<int>(options.bytes_per_sync / 2));
285 std::string rnd_str_512kb = RandomString(&rnd, 512 * 1024);
286
287 ASSERT_OK(Put(1, "foo", "bar"));
288 // First 1MB doesn't get range synced
289 ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb));
290 ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb));
291 ASSERT_OK(Put(1, "foo1_1", rnd_str));
292 ASSERT_OK(Put(1, "foo1_2", rnd_str));
293 ASSERT_OK(Put(1, "foo1_3", rnd_str));
294 ASSERT_OK(Put(1, "foo2", "bar"));
295 ASSERT_OK(Put(1, "foo3_1", rnd_str));
296 ASSERT_OK(Put(1, "foo3_2", rnd_str));
297 ASSERT_OK(Put(1, "foo3_3", rnd_str));
298 ASSERT_OK(Put(1, "foo4", "bar"));
299 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
300
301 // Following writes should fail as flush failed.
302 ASSERT_NOK(Put(1, "foo2", "bar3"));
303 ASSERT_EQ("bar", Get(1, "foo"));
304
305 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
306 ASSERT_GE(1, range_sync_called.load());
307
308 ReopenWithColumnFamilies({"default", "pikachu"}, options);
309 ASSERT_EQ("bar", Get(1, "foo"));
310 }
311
312 TEST_F(DBIOFailureTest, CompactSstRangeSyncError) {
313 Options options = CurrentOptions();
314 options.env = env_;
315 options.create_if_missing = true;
316 options.error_if_exists = false;
317 options.paranoid_checks = true;
318 options.write_buffer_size = 256 * 1024 * 1024;
319 options.writable_file_max_buffer_size = 128 * 1024;
320 options.bytes_per_sync = 128 * 1024;
321 options.level0_file_num_compaction_trigger = 2;
322 options.target_file_size_base = 256 * 1024 * 1024;
323 options.disable_auto_compactions = true;
324 BlockBasedTableOptions table_options;
325 table_options.filter_policy.reset(NewBloomFilterPolicy(10));
326 options.table_factory.reset(NewBlockBasedTableFactory(table_options));
327 DestroyAndReopen(options);
328 CreateAndReopenWithCF({"pikachu"}, options);
329 Status s;
330
331 Random rnd(301);
332 std::string rnd_str =
333 RandomString(&rnd, static_cast<int>(options.bytes_per_sync / 2));
334 std::string rnd_str_512kb = RandomString(&rnd, 512 * 1024);
335
336 ASSERT_OK(Put(1, "foo", "bar"));
337 // First 1MB doesn't get range synced
338 ASSERT_OK(Put(1, "foo0_0", rnd_str_512kb));
339 ASSERT_OK(Put(1, "foo0_1", rnd_str_512kb));
340 ASSERT_OK(Put(1, "foo1_1", rnd_str));
341 ASSERT_OK(Put(1, "foo1_2", rnd_str));
342 ASSERT_OK(Put(1, "foo1_3", rnd_str));
343 Flush(1);
344 ASSERT_OK(Put(1, "foo", "bar"));
345 ASSERT_OK(Put(1, "foo3_1", rnd_str));
346 ASSERT_OK(Put(1, "foo3_2", rnd_str));
347 ASSERT_OK(Put(1, "foo3_3", rnd_str));
348 ASSERT_OK(Put(1, "foo4", "bar"));
349 Flush(1);
350 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
351
352 std::atomic<int> range_sync_called(0);
353 rocksdb::SyncPoint::GetInstance()->SetCallBack(
354 "SpecialEnv::SStableFile::RangeSync", [&](void* arg) {
355 if (range_sync_called.fetch_add(1) == 0) {
356 Status* st = static_cast<Status*>(arg);
357 *st = Status::IOError("range sync dummy error");
358 }
359 });
360 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
361
362 ASSERT_OK(dbfull()->SetOptions(handles_[1],
363 {
364 {"disable_auto_compactions", "false"},
365 }));
366 dbfull()->TEST_WaitForCompact();
367
368 // Following writes should fail as flush failed.
369 ASSERT_NOK(Put(1, "foo2", "bar3"));
370 ASSERT_EQ("bar", Get(1, "foo"));
371
372 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
373 ASSERT_GE(1, range_sync_called.load());
374
375 ReopenWithColumnFamilies({"default", "pikachu"}, options);
376 ASSERT_EQ("bar", Get(1, "foo"));
377 }
378
379 TEST_F(DBIOFailureTest, FlushSstCloseError) {
380 Options options = CurrentOptions();
381 options.env = env_;
382 options.create_if_missing = true;
383 options.error_if_exists = false;
384 options.paranoid_checks = true;
385 options.level0_file_num_compaction_trigger = 4;
386 options.memtable_factory.reset(new SpecialSkipListFactory(2));
387
388 DestroyAndReopen(options);
389 CreateAndReopenWithCF({"pikachu"}, options);
390 Status s;
391 std::atomic<int> close_called(0);
392 rocksdb::SyncPoint::GetInstance()->SetCallBack(
393 "SpecialEnv::SStableFile::Close", [&](void* arg) {
394 if (close_called.fetch_add(1) == 0) {
395 Status* st = static_cast<Status*>(arg);
396 *st = Status::IOError("close dummy error");
397 }
398 });
399
400 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
401
402 ASSERT_OK(Put(1, "foo", "bar"));
403 ASSERT_OK(Put(1, "foo1", "bar1"));
404 ASSERT_OK(Put(1, "foo", "bar2"));
405 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
406
407 // Following writes should fail as flush failed.
408 ASSERT_NOK(Put(1, "foo2", "bar3"));
409 ASSERT_EQ("bar2", Get(1, "foo"));
410 ASSERT_EQ("bar1", Get(1, "foo1"));
411
412 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
413
414 ReopenWithColumnFamilies({"default", "pikachu"}, options);
415 ASSERT_EQ("bar2", Get(1, "foo"));
416 ASSERT_EQ("bar1", Get(1, "foo1"));
417 }
418
419 TEST_F(DBIOFailureTest, CompactionSstCloseError) {
420 Options options = CurrentOptions();
421 options.env = env_;
422 options.create_if_missing = true;
423 options.error_if_exists = false;
424 options.paranoid_checks = true;
425 options.level0_file_num_compaction_trigger = 2;
426 options.disable_auto_compactions = true;
427
428 DestroyAndReopen(options);
429 CreateAndReopenWithCF({"pikachu"}, options);
430 Status s;
431
432 ASSERT_OK(Put(1, "foo", "bar"));
433 ASSERT_OK(Put(1, "foo2", "bar"));
434 Flush(1);
435 ASSERT_OK(Put(1, "foo", "bar2"));
436 ASSERT_OK(Put(1, "foo2", "bar"));
437 Flush(1);
438 ASSERT_OK(Put(1, "foo", "bar3"));
439 ASSERT_OK(Put(1, "foo2", "bar"));
440 Flush(1);
441 dbfull()->TEST_WaitForCompact();
442
443 std::atomic<int> close_called(0);
444 rocksdb::SyncPoint::GetInstance()->SetCallBack(
445 "SpecialEnv::SStableFile::Close", [&](void* arg) {
446 if (close_called.fetch_add(1) == 0) {
447 Status* st = static_cast<Status*>(arg);
448 *st = Status::IOError("close dummy error");
449 }
450 });
451
452 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
453 ASSERT_OK(dbfull()->SetOptions(handles_[1],
454 {
455 {"disable_auto_compactions", "false"},
456 }));
457 dbfull()->TEST_WaitForCompact();
458
459 // Following writes should fail as compaction failed.
460 ASSERT_NOK(Put(1, "foo2", "bar3"));
461 ASSERT_EQ("bar3", Get(1, "foo"));
462
463 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
464
465 ReopenWithColumnFamilies({"default", "pikachu"}, options);
466 ASSERT_EQ("bar3", Get(1, "foo"));
467 }
468
469 TEST_F(DBIOFailureTest, FlushSstSyncError) {
470 Options options = CurrentOptions();
471 options.env = env_;
472 options.create_if_missing = true;
473 options.error_if_exists = false;
474 options.paranoid_checks = true;
475 options.use_fsync = false;
476 options.level0_file_num_compaction_trigger = 4;
477 options.memtable_factory.reset(new SpecialSkipListFactory(2));
478
479 DestroyAndReopen(options);
480 CreateAndReopenWithCF({"pikachu"}, options);
481 Status s;
482 std::atomic<int> sync_called(0);
483 rocksdb::SyncPoint::GetInstance()->SetCallBack(
484 "SpecialEnv::SStableFile::Sync", [&](void* arg) {
485 if (sync_called.fetch_add(1) == 0) {
486 Status* st = static_cast<Status*>(arg);
487 *st = Status::IOError("sync dummy error");
488 }
489 });
490
491 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
492
493 ASSERT_OK(Put(1, "foo", "bar"));
494 ASSERT_OK(Put(1, "foo1", "bar1"));
495 ASSERT_OK(Put(1, "foo", "bar2"));
496 dbfull()->TEST_WaitForFlushMemTable(handles_[1]);
497
498 // Following writes should fail as flush failed.
499 ASSERT_NOK(Put(1, "foo2", "bar3"));
500 ASSERT_EQ("bar2", Get(1, "foo"));
501 ASSERT_EQ("bar1", Get(1, "foo1"));
502
503 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
504
505 ReopenWithColumnFamilies({"default", "pikachu"}, options);
506 ASSERT_EQ("bar2", Get(1, "foo"));
507 ASSERT_EQ("bar1", Get(1, "foo1"));
508 }
509
510 TEST_F(DBIOFailureTest, CompactionSstSyncError) {
511 Options options = CurrentOptions();
512 options.env = env_;
513 options.create_if_missing = true;
514 options.error_if_exists = false;
515 options.paranoid_checks = true;
516 options.level0_file_num_compaction_trigger = 2;
517 options.disable_auto_compactions = true;
518 options.use_fsync = false;
519
520 DestroyAndReopen(options);
521 CreateAndReopenWithCF({"pikachu"}, options);
522 Status s;
523
524 ASSERT_OK(Put(1, "foo", "bar"));
525 ASSERT_OK(Put(1, "foo2", "bar"));
526 Flush(1);
527 ASSERT_OK(Put(1, "foo", "bar2"));
528 ASSERT_OK(Put(1, "foo2", "bar"));
529 Flush(1);
530 ASSERT_OK(Put(1, "foo", "bar3"));
531 ASSERT_OK(Put(1, "foo2", "bar"));
532 Flush(1);
533 dbfull()->TEST_WaitForCompact();
534
535 std::atomic<int> sync_called(0);
536 rocksdb::SyncPoint::GetInstance()->SetCallBack(
537 "SpecialEnv::SStableFile::Sync", [&](void* arg) {
538 if (sync_called.fetch_add(1) == 0) {
539 Status* st = static_cast<Status*>(arg);
540 *st = Status::IOError("close dummy error");
541 }
542 });
543
544 rocksdb::SyncPoint::GetInstance()->EnableProcessing();
545 ASSERT_OK(dbfull()->SetOptions(handles_[1],
546 {
547 {"disable_auto_compactions", "false"},
548 }));
549 dbfull()->TEST_WaitForCompact();
550
551 // Following writes should fail as compaction failed.
552 ASSERT_NOK(Put(1, "foo2", "bar3"));
553 ASSERT_EQ("bar3", Get(1, "foo"));
554
555 rocksdb::SyncPoint::GetInstance()->DisableProcessing();
556
557 ReopenWithColumnFamilies({"default", "pikachu"}, options);
558 ASSERT_EQ("bar3", Get(1, "foo"));
559 }
560 #endif // !(defined NDEBUG) || !defined(OS_WIN)
561 #endif // ROCKSDB_LITE
562 } // namespace rocksdb
563
564 int main(int argc, char** argv) {
565 rocksdb::port::InstallStackTraceHandler();
566 ::testing::InitGoogleTest(&argc, argv);
567 return RUN_ALL_TESTS();
568 }