]>
Commit | Line | Data |
---|---|---|
1 | // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. | |
2 | // This source code is licensed under the BSD-style license found in the | |
3 | // LICENSE file in the root directory of this source tree. An additional grant | |
4 | // of patent rights can be found in the PATENTS file in the same directory. | |
5 | // | |
6 | // Copyright (c) 2011 The LevelDB Authors. All rights reserved. | |
7 | // Use of this source code is governed by a BSD-style license that can be | |
8 | // found in the LICENSE file. See the AUTHORS file for names of contributors. | |
9 | ||
10 | #ifndef ROCKSDB_LITE | |
11 | ||
12 | #include "rocksdb/db.h" | |
13 | ||
14 | #include <errno.h> | |
15 | #include <fcntl.h> | |
16 | #include <inttypes.h> | |
17 | #include <sys/stat.h> | |
18 | #include <sys/types.h> | |
19 | #include "db/db_impl.h" | |
20 | #include "db/log_format.h" | |
21 | #include "db/version_set.h" | |
22 | #include "rocksdb/cache.h" | |
23 | #include "rocksdb/env.h" | |
24 | #include "rocksdb/table.h" | |
25 | #include "rocksdb/write_batch.h" | |
26 | #include "util/filename.h" | |
27 | #include "util/string_util.h" | |
28 | #include "util/testharness.h" | |
29 | #include "util/testutil.h" | |
30 | ||
31 | namespace rocksdb { | |
32 | ||
33 | static const int kValueSize = 1000; | |
34 | ||
35 | class CorruptionTest : public testing::Test { | |
36 | public: | |
37 | test::ErrorEnv env_; | |
38 | std::string dbname_; | |
39 | shared_ptr<Cache> tiny_cache_; | |
40 | Options options_; | |
41 | DB* db_; | |
42 | ||
43 | CorruptionTest() { | |
44 | // If LRU cache shard bit is smaller than 2 (or -1 which will automatically | |
45 | // set it to 0), test SequenceNumberRecovery will fail, likely because of a | |
46 | // bug in recovery code. Keep it 4 for now to make the test passes. | |
47 | tiny_cache_ = NewLRUCache(100, 4); | |
48 | options_.wal_recovery_mode = WALRecoveryMode::kTolerateCorruptedTailRecords; | |
49 | options_.env = &env_; | |
50 | dbname_ = test::TmpDir() + "/corruption_test"; | |
51 | DestroyDB(dbname_, options_); | |
52 | ||
53 | db_ = nullptr; | |
54 | options_.create_if_missing = true; | |
55 | BlockBasedTableOptions table_options; | |
56 | table_options.block_size_deviation = 0; // make unit test pass for now | |
57 | options_.table_factory.reset(NewBlockBasedTableFactory(table_options)); | |
58 | Reopen(); | |
59 | options_.create_if_missing = false; | |
60 | } | |
61 | ||
62 | ~CorruptionTest() { | |
63 | delete db_; | |
64 | DestroyDB(dbname_, Options()); | |
65 | } | |
66 | ||
67 | void CloseDb() { | |
68 | delete db_; | |
69 | db_ = nullptr; | |
70 | } | |
71 | ||
72 | Status TryReopen(Options* options = nullptr) { | |
73 | delete db_; | |
74 | db_ = nullptr; | |
75 | Options opt = (options ? *options : options_); | |
76 | opt.env = &env_; | |
77 | opt.arena_block_size = 4096; | |
78 | BlockBasedTableOptions table_options; | |
79 | table_options.block_cache = tiny_cache_; | |
80 | table_options.block_size_deviation = 0; | |
81 | opt.table_factory.reset(NewBlockBasedTableFactory(table_options)); | |
82 | return DB::Open(opt, dbname_, &db_); | |
83 | } | |
84 | ||
85 | void Reopen(Options* options = nullptr) { | |
86 | ASSERT_OK(TryReopen(options)); | |
87 | } | |
88 | ||
89 | void RepairDB() { | |
90 | delete db_; | |
91 | db_ = nullptr; | |
92 | ASSERT_OK(::rocksdb::RepairDB(dbname_, options_)); | |
93 | } | |
94 | ||
95 | void Build(int n, int flush_every = 0) { | |
96 | std::string key_space, value_space; | |
97 | WriteBatch batch; | |
98 | for (int i = 0; i < n; i++) { | |
99 | if (flush_every != 0 && i != 0 && i % flush_every == 0) { | |
100 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
101 | dbi->TEST_FlushMemTable(); | |
102 | } | |
103 | //if ((i % 100) == 0) fprintf(stderr, "@ %d of %d\n", i, n); | |
104 | Slice key = Key(i, &key_space); | |
105 | batch.Clear(); | |
106 | batch.Put(key, Value(i, &value_space)); | |
107 | ASSERT_OK(db_->Write(WriteOptions(), &batch)); | |
108 | } | |
109 | } | |
110 | ||
111 | void Check(int min_expected, int max_expected) { | |
112 | uint64_t next_expected = 0; | |
113 | uint64_t missed = 0; | |
114 | int bad_keys = 0; | |
115 | int bad_values = 0; | |
116 | int correct = 0; | |
117 | std::string value_space; | |
118 | // Do not verify checksums. If we verify checksums then the | |
119 | // db itself will raise errors because data is corrupted. | |
120 | // Instead, we want the reads to be successful and this test | |
121 | // will detect whether the appropriate corruptions have | |
122 | // occurred. | |
123 | Iterator* iter = db_->NewIterator(ReadOptions(false, true)); | |
124 | for (iter->SeekToFirst(); iter->Valid(); iter->Next()) { | |
125 | uint64_t key; | |
126 | Slice in(iter->key()); | |
127 | if (!ConsumeDecimalNumber(&in, &key) || | |
128 | !in.empty() || | |
129 | key < next_expected) { | |
130 | bad_keys++; | |
131 | continue; | |
132 | } | |
133 | missed += (key - next_expected); | |
134 | next_expected = key + 1; | |
135 | if (iter->value() != Value(static_cast<int>(key), &value_space)) { | |
136 | bad_values++; | |
137 | } else { | |
138 | correct++; | |
139 | } | |
140 | } | |
141 | delete iter; | |
142 | ||
143 | fprintf(stderr, | |
144 | "expected=%d..%d; got=%d; bad_keys=%d; bad_values=%d; missed=%llu\n", | |
145 | min_expected, max_expected, correct, bad_keys, bad_values, | |
146 | static_cast<unsigned long long>(missed)); | |
147 | ASSERT_LE(min_expected, correct); | |
148 | ASSERT_GE(max_expected, correct); | |
149 | } | |
150 | ||
151 | void CorruptFile(const std::string& fname, int offset, int bytes_to_corrupt) { | |
152 | struct stat sbuf; | |
153 | if (stat(fname.c_str(), &sbuf) != 0) { | |
154 | const char* msg = strerror(errno); | |
155 | ASSERT_TRUE(false) << fname << ": " << msg; | |
156 | } | |
157 | ||
158 | if (offset < 0) { | |
159 | // Relative to end of file; make it absolute | |
160 | if (-offset > sbuf.st_size) { | |
161 | offset = 0; | |
162 | } else { | |
163 | offset = static_cast<int>(sbuf.st_size + offset); | |
164 | } | |
165 | } | |
166 | if (offset > sbuf.st_size) { | |
167 | offset = static_cast<int>(sbuf.st_size); | |
168 | } | |
169 | if (offset + bytes_to_corrupt > sbuf.st_size) { | |
170 | bytes_to_corrupt = static_cast<int>(sbuf.st_size - offset); | |
171 | } | |
172 | ||
173 | // Do it | |
174 | std::string contents; | |
175 | Status s = ReadFileToString(Env::Default(), fname, &contents); | |
176 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
177 | for (int i = 0; i < bytes_to_corrupt; i++) { | |
178 | contents[i + offset] ^= 0x80; | |
179 | } | |
180 | s = WriteStringToFile(Env::Default(), contents, fname); | |
181 | ASSERT_TRUE(s.ok()) << s.ToString(); | |
182 | } | |
183 | ||
184 | void Corrupt(FileType filetype, int offset, int bytes_to_corrupt) { | |
185 | // Pick file to corrupt | |
186 | std::vector<std::string> filenames; | |
187 | ASSERT_OK(env_.GetChildren(dbname_, &filenames)); | |
188 | uint64_t number; | |
189 | FileType type; | |
190 | std::string fname; | |
191 | int picked_number = -1; | |
192 | for (size_t i = 0; i < filenames.size(); i++) { | |
193 | if (ParseFileName(filenames[i], &number, &type) && | |
194 | type == filetype && | |
195 | static_cast<int>(number) > picked_number) { // Pick latest file | |
196 | fname = dbname_ + "/" + filenames[i]; | |
197 | picked_number = static_cast<int>(number); | |
198 | } | |
199 | } | |
200 | ASSERT_TRUE(!fname.empty()) << filetype; | |
201 | ||
202 | CorruptFile(fname, offset, bytes_to_corrupt); | |
203 | } | |
204 | ||
205 | // corrupts exactly one file at level `level`. if no file found at level, | |
206 | // asserts | |
207 | void CorruptTableFileAtLevel(int level, int offset, int bytes_to_corrupt) { | |
208 | std::vector<LiveFileMetaData> metadata; | |
209 | db_->GetLiveFilesMetaData(&metadata); | |
210 | for (const auto& m : metadata) { | |
211 | if (m.level == level) { | |
212 | CorruptFile(dbname_ + "/" + m.name, offset, bytes_to_corrupt); | |
213 | return; | |
214 | } | |
215 | } | |
216 | ASSERT_TRUE(false) << "no file found at level"; | |
217 | } | |
218 | ||
219 | ||
220 | int Property(const std::string& name) { | |
221 | std::string property; | |
222 | int result; | |
223 | if (db_->GetProperty(name, &property) && | |
224 | sscanf(property.c_str(), "%d", &result) == 1) { | |
225 | return result; | |
226 | } else { | |
227 | return -1; | |
228 | } | |
229 | } | |
230 | ||
231 | // Return the ith key | |
232 | Slice Key(int i, std::string* storage) { | |
233 | char buf[100]; | |
234 | snprintf(buf, sizeof(buf), "%016d", i); | |
235 | storage->assign(buf, strlen(buf)); | |
236 | return Slice(*storage); | |
237 | } | |
238 | ||
239 | // Return the value to associate with the specified key | |
240 | Slice Value(int k, std::string* storage) { | |
241 | if (k == 0) { | |
242 | // Ugh. Random seed of 0 used to produce no entropy. This code | |
243 | // preserves the implementation that was in place when all of the | |
244 | // magic values in this file were picked. | |
245 | *storage = std::string(kValueSize, ' '); | |
246 | return Slice(*storage); | |
247 | } else { | |
248 | Random r(k); | |
249 | return test::RandomString(&r, kValueSize, storage); | |
250 | } | |
251 | } | |
252 | }; | |
253 | ||
254 | TEST_F(CorruptionTest, Recovery) { | |
255 | Build(100); | |
256 | Check(100, 100); | |
257 | #ifdef OS_WIN | |
258 | // On Wndows OS Disk cache does not behave properly | |
259 | // We do not call FlushBuffers on every Flush. If we do not close | |
260 | // the log file prior to the corruption we end up with the first | |
261 | // block not corrupted but only the second. However, under the debugger | |
262 | // things work just fine but never pass when running normally | |
263 | // For that reason people may want to run with unbuffered I/O. That option | |
264 | // is not available for WAL though. | |
265 | CloseDb(); | |
266 | #endif | |
267 | Corrupt(kLogFile, 19, 1); // WriteBatch tag for first record | |
268 | Corrupt(kLogFile, log::kBlockSize + 1000, 1); // Somewhere in second block | |
269 | ASSERT_TRUE(!TryReopen().ok()); | |
270 | options_.paranoid_checks = false; | |
271 | Reopen(&options_); | |
272 | ||
273 | // The 64 records in the first two log blocks are completely lost. | |
274 | Check(36, 36); | |
275 | } | |
276 | ||
277 | TEST_F(CorruptionTest, RecoverWriteError) { | |
278 | env_.writable_file_error_ = true; | |
279 | Status s = TryReopen(); | |
280 | ASSERT_TRUE(!s.ok()); | |
281 | } | |
282 | ||
283 | TEST_F(CorruptionTest, NewFileErrorDuringWrite) { | |
284 | // Do enough writing to force minor compaction | |
285 | env_.writable_file_error_ = true; | |
286 | const int num = | |
287 | static_cast<int>(3 + (Options().write_buffer_size / kValueSize)); | |
288 | std::string value_storage; | |
289 | Status s; | |
290 | bool failed = false; | |
291 | for (int i = 0; i < num; i++) { | |
292 | WriteBatch batch; | |
293 | batch.Put("a", Value(100, &value_storage)); | |
294 | s = db_->Write(WriteOptions(), &batch); | |
295 | if (!s.ok()) { | |
296 | failed = true; | |
297 | } | |
298 | ASSERT_TRUE(!failed || !s.ok()); | |
299 | } | |
300 | ASSERT_TRUE(!s.ok()); | |
301 | ASSERT_GE(env_.num_writable_file_errors_, 1); | |
302 | env_.writable_file_error_ = false; | |
303 | Reopen(); | |
304 | } | |
305 | ||
306 | TEST_F(CorruptionTest, TableFile) { | |
307 | Build(100); | |
308 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
309 | dbi->TEST_FlushMemTable(); | |
310 | dbi->TEST_CompactRange(0, nullptr, nullptr); | |
311 | dbi->TEST_CompactRange(1, nullptr, nullptr); | |
312 | ||
313 | Corrupt(kTableFile, 100, 1); | |
314 | Check(99, 99); | |
315 | } | |
316 | ||
317 | TEST_F(CorruptionTest, TableFileIndexData) { | |
318 | Options options; | |
319 | // very big, we'll trigger flushes manually | |
320 | options.write_buffer_size = 100 * 1024 * 1024; | |
321 | Reopen(&options); | |
322 | // build 2 tables, flush at 5000 | |
323 | Build(10000, 5000); | |
324 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
325 | dbi->TEST_FlushMemTable(); | |
326 | ||
327 | // corrupt an index block of an entire file | |
328 | Corrupt(kTableFile, -2000, 500); | |
329 | Reopen(); | |
330 | // one full file should be readable, since only one was corrupted | |
331 | // the other file should be fully non-readable, since index was corrupted | |
332 | Check(5000, 5000); | |
333 | } | |
334 | ||
335 | TEST_F(CorruptionTest, MissingDescriptor) { | |
336 | Build(1000); | |
337 | RepairDB(); | |
338 | Reopen(); | |
339 | Check(1000, 1000); | |
340 | } | |
341 | ||
342 | TEST_F(CorruptionTest, SequenceNumberRecovery) { | |
343 | ASSERT_OK(db_->Put(WriteOptions(), "foo", "v1")); | |
344 | ASSERT_OK(db_->Put(WriteOptions(), "foo", "v2")); | |
345 | ASSERT_OK(db_->Put(WriteOptions(), "foo", "v3")); | |
346 | ASSERT_OK(db_->Put(WriteOptions(), "foo", "v4")); | |
347 | ASSERT_OK(db_->Put(WriteOptions(), "foo", "v5")); | |
348 | RepairDB(); | |
349 | Reopen(); | |
350 | std::string v; | |
351 | ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |
352 | ASSERT_EQ("v5", v); | |
353 | // Write something. If sequence number was not recovered properly, | |
354 | // it will be hidden by an earlier write. | |
355 | ASSERT_OK(db_->Put(WriteOptions(), "foo", "v6")); | |
356 | ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |
357 | ASSERT_EQ("v6", v); | |
358 | Reopen(); | |
359 | ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |
360 | ASSERT_EQ("v6", v); | |
361 | } | |
362 | ||
363 | TEST_F(CorruptionTest, CorruptedDescriptor) { | |
364 | ASSERT_OK(db_->Put(WriteOptions(), "foo", "hello")); | |
365 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
366 | dbi->TEST_FlushMemTable(); | |
367 | dbi->TEST_CompactRange(0, nullptr, nullptr); | |
368 | ||
369 | Corrupt(kDescriptorFile, 0, 1000); | |
370 | Status s = TryReopen(); | |
371 | ASSERT_TRUE(!s.ok()); | |
372 | ||
373 | RepairDB(); | |
374 | Reopen(); | |
375 | std::string v; | |
376 | ASSERT_OK(db_->Get(ReadOptions(), "foo", &v)); | |
377 | ASSERT_EQ("hello", v); | |
378 | } | |
379 | ||
380 | TEST_F(CorruptionTest, CompactionInputError) { | |
381 | Options options; | |
382 | Reopen(&options); | |
383 | Build(10); | |
384 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
385 | dbi->TEST_FlushMemTable(); | |
386 | dbi->TEST_CompactRange(0, nullptr, nullptr); | |
387 | dbi->TEST_CompactRange(1, nullptr, nullptr); | |
388 | ASSERT_EQ(1, Property("rocksdb.num-files-at-level2")); | |
389 | ||
390 | Corrupt(kTableFile, 100, 1); | |
391 | Check(9, 9); | |
392 | ||
393 | // Force compactions by writing lots of values | |
394 | Build(10000); | |
395 | Check(10000, 10000); | |
396 | } | |
397 | ||
398 | TEST_F(CorruptionTest, CompactionInputErrorParanoid) { | |
399 | Options options; | |
400 | options.paranoid_checks = true; | |
401 | options.write_buffer_size = 131072; | |
402 | options.max_write_buffer_number = 2; | |
403 | Reopen(&options); | |
404 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
405 | ||
406 | // Fill levels >= 1 | |
407 | for (int level = 1; level < dbi->NumberLevels(); level++) { | |
408 | dbi->Put(WriteOptions(), "", "begin"); | |
409 | dbi->Put(WriteOptions(), "~", "end"); | |
410 | dbi->TEST_FlushMemTable(); | |
411 | for (int comp_level = 0; comp_level < dbi->NumberLevels() - level; | |
412 | ++comp_level) { | |
413 | dbi->TEST_CompactRange(comp_level, nullptr, nullptr); | |
414 | } | |
415 | } | |
416 | ||
417 | Reopen(&options); | |
418 | ||
419 | dbi = reinterpret_cast<DBImpl*>(db_); | |
420 | Build(10); | |
421 | dbi->TEST_FlushMemTable(); | |
422 | dbi->TEST_WaitForCompact(); | |
423 | ASSERT_EQ(1, Property("rocksdb.num-files-at-level0")); | |
424 | ||
425 | CorruptTableFileAtLevel(0, 100, 1); | |
426 | Check(9, 9); | |
427 | ||
428 | // Write must eventually fail because of corrupted table | |
429 | Status s; | |
430 | std::string tmp1, tmp2; | |
431 | bool failed = false; | |
432 | for (int i = 0; i < 10000; i++) { | |
433 | s = db_->Put(WriteOptions(), Key(i, &tmp1), Value(i, &tmp2)); | |
434 | if (!s.ok()) { | |
435 | failed = true; | |
436 | } | |
437 | // if one write failed, every subsequent write must fail, too | |
438 | ASSERT_TRUE(!failed || !s.ok()) << "write did not fail in a corrupted db"; | |
439 | } | |
440 | ASSERT_TRUE(!s.ok()) << "write did not fail in corrupted paranoid db"; | |
441 | } | |
442 | ||
443 | TEST_F(CorruptionTest, UnrelatedKeys) { | |
444 | Build(10); | |
445 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
446 | dbi->TEST_FlushMemTable(); | |
447 | Corrupt(kTableFile, 100, 1); | |
448 | ||
449 | std::string tmp1, tmp2; | |
450 | ASSERT_OK(db_->Put(WriteOptions(), Key(1000, &tmp1), Value(1000, &tmp2))); | |
451 | std::string v; | |
452 | ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); | |
453 | ASSERT_EQ(Value(1000, &tmp2).ToString(), v); | |
454 | dbi->TEST_FlushMemTable(); | |
455 | ASSERT_OK(db_->Get(ReadOptions(), Key(1000, &tmp1), &v)); | |
456 | ASSERT_EQ(Value(1000, &tmp2).ToString(), v); | |
457 | } | |
458 | ||
459 | TEST_F(CorruptionTest, FileSystemStateCorrupted) { | |
460 | for (int iter = 0; iter < 2; ++iter) { | |
461 | Options options; | |
462 | options.paranoid_checks = true; | |
463 | options.create_if_missing = true; | |
464 | Reopen(&options); | |
465 | Build(10); | |
466 | ASSERT_OK(db_->Flush(FlushOptions())); | |
467 | DBImpl* dbi = reinterpret_cast<DBImpl*>(db_); | |
468 | std::vector<LiveFileMetaData> metadata; | |
469 | dbi->GetLiveFilesMetaData(&metadata); | |
470 | ASSERT_GT(metadata.size(), size_t(0)); | |
471 | std::string filename = dbname_ + metadata[0].name; | |
472 | ||
473 | delete db_; | |
474 | db_ = nullptr; | |
475 | ||
476 | if (iter == 0) { // corrupt file size | |
477 | unique_ptr<WritableFile> file; | |
478 | env_.NewWritableFile(filename, &file, EnvOptions()); | |
479 | file->Append(Slice("corrupted sst")); | |
480 | file.reset(); | |
481 | } else { // delete the file | |
482 | env_.DeleteFile(filename); | |
483 | } | |
484 | ||
485 | Status x = TryReopen(&options); | |
486 | ASSERT_TRUE(x.IsCorruption()); | |
487 | DestroyDB(dbname_, options_); | |
488 | Reopen(&options); | |
489 | } | |
490 | } | |
491 | ||
492 | } // namespace rocksdb | |
493 | ||
494 | int main(int argc, char** argv) { | |
495 | ::testing::InitGoogleTest(&argc, argv); | |
496 | return RUN_ALL_TESTS(); | |
497 | } | |
498 | ||
499 | #else | |
500 | #include <stdio.h> | |
501 | ||
502 | int main(int argc, char** argv) { | |
503 | fprintf(stderr, "SKIPPED as RepairDB() is not supported in ROCKSDB_LITE\n"); | |
504 | return 0; | |
505 | } | |
506 | ||
507 | #endif // !ROCKSDB_LITE |