1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
13 #include "global/global_init.h"
14 #include "common/ceph_argparse.h"
15 #include "include/stringify.h"
16 #include "include/scope_guard.h"
17 #include "common/errno.h"
18 #include <gtest/gtest.h>
20 #include "os/bluestore/BlueFS.h"
24 std::unique_ptr
<char[]> gen_buffer(uint64_t size
)
26 std::unique_ptr
<char[]> buffer
= std::make_unique
<char[]>(size
);
27 std::independent_bits_engine
<std::default_random_engine
, CHAR_BIT
, unsigned char> e
;
28 std::generate(buffer
.get(), buffer
.get()+size
, std::ref(e
));
34 TempBdev(uint64_t size
)
35 : path
{get_temp_bdev(size
)}
40 const std::string path
;
42 static string
get_temp_bdev(uint64_t size
)
45 string fn
= "ceph_test_bluefs.tmp.block." + stringify(getpid())
46 + "." + stringify(++n
);
47 int fd
= ::open(fn
.c_str(), O_CREAT
|O_RDWR
|O_TRUNC
, 0644);
49 int r
= ::ftruncate(fd
, size
);
54 static void rm_temp_bdev(string f
)
61 std::stack
<std::pair
<std::string
, std::string
>> saved_settings
;
64 ConfSaver(ConfigProxy
& conf
) : conf(conf
) {
65 conf
._clear_safe_to_start_threads();
68 conf
._clear_safe_to_start_threads();
69 while(saved_settings
.size() > 0) {
70 auto& e
= saved_settings
.top();
71 conf
.set_val_or_die(e
.first
, e
.second
);
74 conf
.set_safe_to_start_threads();
75 conf
.apply_changes(nullptr);
77 void SetVal(const char* key
, const char* val
) {
78 std::string
skey(key
);
80 conf
.get_val(skey
, &prev_val
);
81 conf
.set_val_or_die(skey
, val
);
82 saved_settings
.emplace(skey
, prev_val
);
85 conf
.set_safe_to_start_threads();
86 conf
.apply_changes(nullptr);
91 uint64_t size
= 1048576 * 128;
94 BlueFS
fs(g_ceph_context
);
95 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
96 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
99 TEST(BlueFS
, mkfs_mount
) {
100 uint64_t size
= 1048576 * 128;
102 BlueFS
fs(g_ceph_context
);
103 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
105 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
106 ASSERT_EQ(0, fs
.mount());
107 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
108 ASSERT_EQ(fs
.get_total(BlueFS::BDEV_DB
), size
- 1048576);
109 ASSERT_LT(fs
.get_free(BlueFS::BDEV_DB
), size
- 1048576);
113 TEST(BlueFS
, write_read
) {
114 uint64_t size
= 1048576 * 128;
116 BlueFS
fs(g_ceph_context
);
117 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
119 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
120 ASSERT_EQ(0, fs
.mount());
121 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
123 BlueFS::FileWriter
*h
;
124 ASSERT_EQ(0, fs
.mkdir("dir"));
125 ASSERT_EQ(0, fs
.open_for_write("dir", "file", &h
, false));
133 BlueFS::FileReader
*h
;
134 ASSERT_EQ(0, fs
.open_for_read("dir", "file", &h
));
136 ASSERT_EQ(9, fs
.read(h
, 0, 1024, &bl
, NULL
));
137 ASSERT_EQ(0, strncmp("foobarbaz", bl
.c_str(), 9));
143 TEST(BlueFS
, small_appends
) {
144 uint64_t size
= 1048576 * 128;
146 BlueFS
fs(g_ceph_context
);
147 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
149 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
150 ASSERT_EQ(0, fs
.mount());
151 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
153 BlueFS::FileWriter
*h
;
154 ASSERT_EQ(0, fs
.mkdir("dir"));
155 ASSERT_EQ(0, fs
.open_for_write("dir", "file", &h
, false));
156 for (unsigned i
= 0; i
< 10000; ++i
) {
157 h
->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
163 BlueFS::FileWriter
*h
;
164 ASSERT_EQ(0, fs
.open_for_write("dir", "file_sync", &h
, false));
165 for (unsigned i
= 0; i
< 1000; ++i
) {
166 h
->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
167 ASSERT_EQ(0, fs
.fsync(h
));
174 TEST(BlueFS
, very_large_write
) {
175 // we'll write a ~5G file, so allocate more than that for the whole fs
176 uint64_t size
= 1048576 * 1024 * 6ull;
178 BlueFS
fs(g_ceph_context
);
180 bool old
= g_ceph_context
->_conf
.get_val
<bool>("bluefs_buffered_io");
181 g_ceph_context
->_conf
.set_val("bluefs_buffered_io", "false");
182 uint64_t total_written
= 0;
184 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
186 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
187 ASSERT_EQ(0, fs
.mount());
188 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
189 char buf
[1048571]; // this is biggish, but intentionally not evenly aligned
190 for (unsigned i
= 0; i
< sizeof(buf
); ++i
) {
194 BlueFS::FileWriter
*h
;
195 ASSERT_EQ(0, fs
.mkdir("dir"));
196 ASSERT_EQ(0, fs
.open_for_write("dir", "bigfile", &h
, false));
197 for (unsigned i
= 0; i
< 3*1024*1048576ull / sizeof(buf
); ++i
) {
198 h
->append(buf
, sizeof(buf
));
199 total_written
+= sizeof(buf
);
202 for (unsigned i
= 0; i
< 2*1024*1048576ull / sizeof(buf
); ++i
) {
203 h
->append(buf
, sizeof(buf
));
204 total_written
+= sizeof(buf
);
210 BlueFS::FileReader
*h
;
211 ASSERT_EQ(0, fs
.open_for_read("dir", "bigfile", &h
));
213 ASSERT_EQ(h
->file
->fnode
.size
, total_written
);
214 for (unsigned i
= 0; i
< 3*1024*1048576ull / sizeof(buf
); ++i
) {
216 fs
.read(h
, i
* sizeof(buf
), sizeof(buf
), &bl
, NULL
);
217 int r
= memcmp(buf
, bl
.c_str(), sizeof(buf
));
219 cerr
<< "read got mismatch at offset " << i
*sizeof(buf
) << " r " << r
224 for (unsigned i
= 0; i
< 2*1024*1048576ull / sizeof(buf
); ++i
) {
226 fs
.read(h
, i
* sizeof(buf
), sizeof(buf
), &bl
, NULL
);
227 int r
= memcmp(buf
, bl
.c_str(), sizeof(buf
));
229 cerr
<< "read got mismatch at offset " << i
*sizeof(buf
) << " r " << r
235 ASSERT_EQ(0, fs
.open_for_read("dir", "bigfile", &h
));
236 ASSERT_EQ(h
->file
->fnode
.size
, total_written
);
237 unique_ptr
<char> huge_buf(new char[h
->file
->fnode
.size
]);
238 auto l
= h
->file
->fnode
.size
;
239 int64_t r
= fs
.read(h
, 0, l
, NULL
, huge_buf
.get());
245 g_ceph_context
->_conf
.set_val("bluefs_buffered_io", stringify((int)old
));
248 TEST(BlueFS
, very_large_write2
) {
249 // we'll write a ~5G file, so allocate more than that for the whole fs
250 uint64_t size_full
= 1048576 * 1024 * 6ull;
251 uint64_t size
= 1048576 * 1024 * 5ull;
252 TempBdev bdev
{ size_full
};
253 BlueFS
fs(g_ceph_context
);
255 bool old
= g_ceph_context
->_conf
.get_val
<bool>("bluefs_buffered_io");
256 g_ceph_context
->_conf
.set_val("bluefs_buffered_io", "false");
257 uint64_t total_written
= 0;
259 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
261 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
262 ASSERT_EQ(0, fs
.mount());
263 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
265 char fill_arr
[1 << 20]; // 1M
266 for (size_t i
= 0; i
< sizeof(fill_arr
); ++i
) {
267 fill_arr
[i
] = (char)i
;
269 std::unique_ptr
<char[]> buf
;
270 buf
.reset(new char[size
]);
271 for (size_t i
= 0; i
< size
; i
+= sizeof(fill_arr
)) {
272 memcpy(buf
.get() + i
, fill_arr
, sizeof(fill_arr
));
275 BlueFS::FileWriter
* h
;
276 ASSERT_EQ(0, fs
.mkdir("dir"));
277 ASSERT_EQ(0, fs
.open_for_write("dir", "bigfile", &h
, false));
278 fs
.append_try_flush(h
, buf
.get(), size
);
279 total_written
= size
;
283 memset(buf
.get(), 0, size
);
285 BlueFS::FileReader
* h
;
286 ASSERT_EQ(0, fs
.open_for_read("dir", "bigfile", &h
));
287 ASSERT_EQ(h
->file
->fnode
.size
, total_written
);
288 auto l
= h
->file
->fnode
.size
;
289 int64_t r
= fs
.read(h
, 0, l
, NULL
, buf
.get());
291 for (size_t i
= 0; i
< size
; i
+= sizeof(fill_arr
)) {
292 ceph_assert(memcmp(buf
.get() + i
, fill_arr
, sizeof(fill_arr
)) == 0);
298 g_ceph_context
->_conf
.set_val("bluefs_buffered_io", stringify((int)old
));
301 #define ALLOC_SIZE 4096
303 void write_data(BlueFS
&fs
, uint64_t rationed_bytes
)
306 uint64_t written_bytes
= 0;
307 rationed_bytes
-= ALLOC_SIZE
;
310 ss
<< std::this_thread::get_id();
311 dir
.append(ss
.str());
313 dir
.append(to_string(j
));
314 ASSERT_EQ(0, fs
.mkdir(dir
));
316 string file
= "file.";
317 file
.append(to_string(j
));
318 BlueFS::FileWriter
*h
;
319 ASSERT_EQ(0, fs
.open_for_write(dir
, file
, &h
, false));
320 ASSERT_NE(nullptr, h
);
321 auto sg
= make_scope_guard([&fs
, h
] { fs
.close_writer(h
); });
323 std::unique_ptr
<char[]> buf
= gen_buffer(ALLOC_SIZE
);
324 bufferptr bp
= buffer::claim_char(ALLOC_SIZE
, buf
.get());
326 h
->append(bl
.c_str(), bl
.length());
331 written_bytes
+= g_conf()->bluefs_alloc_size
;
333 if ((rationed_bytes
- written_bytes
) <= g_conf()->bluefs_alloc_size
) {
339 void create_single_file(BlueFS
&fs
)
341 BlueFS::FileWriter
*h
;
343 string dir
= "dir.test";
344 ASSERT_EQ(0, fs
.mkdir(dir
));
345 string file
= "testfile";
346 ASSERT_EQ(0, fs
.open_for_write(dir
, file
, &h
, false));
348 std::unique_ptr
<char[]> buf
= gen_buffer(ALLOC_SIZE
);
349 bufferptr bp
= buffer::claim_char(ALLOC_SIZE
, buf
.get());
351 h
->append(bl
.c_str(), bl
.length());
356 void write_single_file(BlueFS
&fs
, uint64_t rationed_bytes
)
359 const string dir
= "dir.test";
360 const string file
= "testfile";
361 uint64_t written_bytes
= 0;
362 rationed_bytes
-= ALLOC_SIZE
;
364 BlueFS::FileWriter
*h
;
365 ASSERT_EQ(0, fs
.open_for_write(dir
, file
, &h
, false));
366 ASSERT_NE(nullptr, h
);
367 auto sg
= make_scope_guard([&fs
, h
] { fs
.close_writer(h
); });
369 std::unique_ptr
<char[]> buf
= gen_buffer(ALLOC_SIZE
);
370 bufferptr bp
= buffer::claim_char(ALLOC_SIZE
, buf
.get());
372 h
->append(bl
.c_str(), bl
.length());
377 written_bytes
+= g_conf()->bluefs_alloc_size
;
378 if ((rationed_bytes
- written_bytes
) <= g_conf()->bluefs_alloc_size
) {
384 bool writes_done
= false;
386 void sync_fs(BlueFS
&fs
)
389 if (writes_done
== true)
391 fs
.sync_metadata(false);
397 void do_join(std::thread
& t
)
402 void join_all(std::vector
<std::thread
>& v
)
404 std::for_each(v
.begin(),v
.end(),do_join
);
407 #define NUM_WRITERS 3
408 #define NUM_SYNC_THREADS 1
410 #define NUM_SINGLE_FILE_WRITERS 1
411 #define NUM_MULTIPLE_FILE_WRITERS 2
413 TEST(BlueFS
, test_flush_1
) {
414 uint64_t size
= 1048576 * 128;
416 g_ceph_context
->_conf
.set_val(
419 g_ceph_context
->_conf
.apply_changes(nullptr);
421 BlueFS
fs(g_ceph_context
);
422 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
424 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
425 ASSERT_EQ(0, fs
.mount());
426 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
428 std::vector
<std::thread
> write_thread_multiple
;
429 uint64_t effective_size
= size
- (32 * 1048576); // leaving the last 32 MB for log compaction
430 uint64_t per_thread_bytes
= (effective_size
/(NUM_MULTIPLE_FILE_WRITERS
+ NUM_SINGLE_FILE_WRITERS
));
431 for (int i
=0; i
<NUM_MULTIPLE_FILE_WRITERS
; i
++) {
432 write_thread_multiple
.push_back(std::thread(write_data
, std::ref(fs
), per_thread_bytes
));
435 create_single_file(fs
);
436 std::vector
<std::thread
> write_thread_single
;
437 for (int i
=0; i
<NUM_SINGLE_FILE_WRITERS
; i
++) {
438 write_thread_single
.push_back(std::thread(write_single_file
, std::ref(fs
), per_thread_bytes
));
441 join_all(write_thread_single
);
442 join_all(write_thread_multiple
);
447 TEST(BlueFS
, test_flush_2
) {
448 uint64_t size
= 1048576 * 256;
450 g_ceph_context
->_conf
.set_val(
453 g_ceph_context
->_conf
.apply_changes(nullptr);
455 BlueFS
fs(g_ceph_context
);
456 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
458 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
459 ASSERT_EQ(0, fs
.mount());
460 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
462 uint64_t effective_size
= size
- (128 * 1048576); // leaving the last 32 MB for log compaction
463 uint64_t per_thread_bytes
= (effective_size
/(NUM_WRITERS
));
464 std::vector
<std::thread
> write_thread_multiple
;
465 for (int i
=0; i
<NUM_WRITERS
; i
++) {
466 write_thread_multiple
.push_back(std::thread(write_data
, std::ref(fs
), per_thread_bytes
));
469 join_all(write_thread_multiple
);
474 TEST(BlueFS
, test_flush_3
) {
475 uint64_t size
= 1048576 * 256;
477 g_ceph_context
->_conf
.set_val(
480 g_ceph_context
->_conf
.apply_changes(nullptr);
482 BlueFS
fs(g_ceph_context
);
483 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
485 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
486 ASSERT_EQ(0, fs
.mount());
487 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
489 std::vector
<std::thread
> write_threads
;
490 uint64_t effective_size
= size
- (64 * 1048576); // leaving the last 11 MB for log compaction
491 uint64_t per_thread_bytes
= (effective_size
/(NUM_WRITERS
));
492 for (int i
=0; i
<NUM_WRITERS
; i
++) {
493 write_threads
.push_back(std::thread(write_data
, std::ref(fs
), per_thread_bytes
));
496 std::vector
<std::thread
> sync_threads
;
497 for (int i
=0; i
<NUM_SYNC_THREADS
; i
++) {
498 sync_threads
.push_back(std::thread(sync_fs
, std::ref(fs
)));
501 join_all(write_threads
);
503 join_all(sync_threads
);
508 TEST(BlueFS
, test_simple_compaction_sync
) {
509 g_ceph_context
->_conf
.set_val(
510 "bluefs_compact_log_sync",
512 uint64_t size
= 1048576 * 128;
515 BlueFS
fs(g_ceph_context
);
516 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
518 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
519 ASSERT_EQ(0, fs
.mount());
520 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
522 for (int i
=0; i
<10; i
++) {
524 dir
.append(to_string(i
));
525 ASSERT_EQ(0, fs
.mkdir(dir
));
526 for (int j
=0; j
<10; j
++) {
527 string file
= "file.";
528 file
.append(to_string(j
));
529 BlueFS::FileWriter
*h
;
530 ASSERT_EQ(0, fs
.open_for_write(dir
, file
, &h
, false));
531 ASSERT_NE(nullptr, h
);
532 auto sg
= make_scope_guard([&fs
, h
] { fs
.close_writer(h
); });
534 std::unique_ptr
<char[]> buf
= gen_buffer(4096);
535 bufferptr bp
= buffer::claim_char(4096, buf
.get());
537 h
->append(bl
.c_str(), bl
.length());
543 for (int i
=0; i
<10; i
+=2) {
545 dir
.append(to_string(i
));
546 for (int j
=0; j
<10; j
++) {
547 string file
= "file.";
548 file
.append(to_string(j
));
549 fs
.unlink(dir
, file
);
550 fs
.sync_metadata(false);
552 ASSERT_EQ(0, fs
.rmdir(dir
));
553 fs
.sync_metadata(false);
560 TEST(BlueFS
, test_simple_compaction_async
) {
561 g_ceph_context
->_conf
.set_val(
562 "bluefs_compact_log_sync",
564 uint64_t size
= 1048576 * 128;
567 BlueFS
fs(g_ceph_context
);
568 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
570 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
571 ASSERT_EQ(0, fs
.mount());
572 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
574 for (int i
=0; i
<10; i
++) {
576 dir
.append(to_string(i
));
577 ASSERT_EQ(0, fs
.mkdir(dir
));
578 for (int j
=0; j
<10; j
++) {
579 string file
= "file.";
580 file
.append(to_string(j
));
581 BlueFS::FileWriter
*h
;
582 ASSERT_EQ(0, fs
.open_for_write(dir
, file
, &h
, false));
583 ASSERT_NE(nullptr, h
);
584 auto sg
= make_scope_guard([&fs
, h
] { fs
.close_writer(h
); });
586 std::unique_ptr
<char[]> buf
= gen_buffer(4096);
587 bufferptr bp
= buffer::claim_char(4096, buf
.get());
589 h
->append(bl
.c_str(), bl
.length());
595 for (int i
=0; i
<10; i
+=2) {
597 dir
.append(to_string(i
));
598 for (int j
=0; j
<10; j
++) {
599 string file
= "file.";
600 file
.append(to_string(j
));
601 fs
.unlink(dir
, file
);
602 fs
.sync_metadata(false);
604 ASSERT_EQ(0, fs
.rmdir(dir
));
605 fs
.sync_metadata(false);
612 TEST(BlueFS
, test_compaction_sync
) {
613 uint64_t size
= 1048576 * 128;
615 g_ceph_context
->_conf
.set_val(
618 g_ceph_context
->_conf
.set_val(
619 "bluefs_compact_log_sync",
621 const char* canary_dir
= "dir.after_compact_test";
622 const char* canary_file
= "file.after_compact_test";
623 const char* canary_data
= "some random data";
625 BlueFS
fs(g_ceph_context
);
626 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
628 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
629 ASSERT_EQ(0, fs
.mount());
630 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
632 std::vector
<std::thread
> write_threads
;
633 uint64_t effective_size
= size
- (32 * 1048576); // leaving the last 32 MB for log compaction
634 uint64_t per_thread_bytes
= (effective_size
/(NUM_WRITERS
));
635 for (int i
=0; i
<NUM_WRITERS
; i
++) {
636 write_threads
.push_back(std::thread(write_data
, std::ref(fs
), per_thread_bytes
));
639 std::vector
<std::thread
> sync_threads
;
640 for (int i
=0; i
<NUM_SYNC_THREADS
; i
++) {
641 sync_threads
.push_back(std::thread(sync_fs
, std::ref(fs
)));
644 join_all(write_threads
);
646 join_all(sync_threads
);
650 ASSERT_EQ(0, fs
.mkdir(canary_dir
));
651 BlueFS::FileWriter
*h
;
652 ASSERT_EQ(0, fs
.open_for_write(canary_dir
, canary_file
, &h
, false));
653 ASSERT_NE(nullptr, h
);
654 auto sg
= make_scope_guard([&fs
, h
] { fs
.close_writer(h
); });
655 h
->append(canary_data
, strlen(canary_data
));
664 BlueFS::FileReader
*h
;
665 ASSERT_EQ(0, fs
.open_for_read(canary_dir
, canary_file
, &h
));
666 ASSERT_NE(nullptr, h
);
668 ASSERT_EQ(strlen(canary_data
), fs
.read(h
, 0, 1024, &bl
, NULL
));
669 std::cout
<< bl
.c_str() << std::endl
;
670 ASSERT_EQ(0, strncmp(canary_data
, bl
.c_str(), strlen(canary_data
)));
676 TEST(BlueFS
, test_compaction_async
) {
677 uint64_t size
= 1048576 * 128;
679 g_ceph_context
->_conf
.set_val(
682 g_ceph_context
->_conf
.set_val(
683 "bluefs_compact_log_sync",
685 const char* canary_dir
= "dir.after_compact_test";
686 const char* canary_file
= "file.after_compact_test";
687 const char* canary_data
= "some random data";
689 BlueFS
fs(g_ceph_context
);
690 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
692 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
693 ASSERT_EQ(0, fs
.mount());
694 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
696 std::vector
<std::thread
> write_threads
;
697 uint64_t effective_size
= size
- (32 * 1048576); // leaving the last 32 MB for log compaction
698 uint64_t per_thread_bytes
= (effective_size
/(NUM_WRITERS
));
699 for (int i
=0; i
<NUM_WRITERS
; i
++) {
700 write_threads
.push_back(std::thread(write_data
, std::ref(fs
), per_thread_bytes
));
703 std::vector
<std::thread
> sync_threads
;
704 for (int i
=0; i
<NUM_SYNC_THREADS
; i
++) {
705 sync_threads
.push_back(std::thread(sync_fs
, std::ref(fs
)));
708 join_all(write_threads
);
710 join_all(sync_threads
);
714 ASSERT_EQ(0, fs
.mkdir(canary_dir
));
715 BlueFS::FileWriter
*h
;
716 ASSERT_EQ(0, fs
.open_for_write(canary_dir
, canary_file
, &h
, false));
717 ASSERT_NE(nullptr, h
);
718 auto sg
= make_scope_guard([&fs
, h
] { fs
.close_writer(h
); });
719 h
->append(canary_data
, strlen(canary_data
));
728 BlueFS::FileReader
*h
;
729 ASSERT_EQ(0, fs
.open_for_read(canary_dir
, canary_file
, &h
));
730 ASSERT_NE(nullptr, h
);
732 ASSERT_EQ(strlen(canary_data
), fs
.read(h
, 0, 1024, &bl
, NULL
));
733 std::cout
<< bl
.c_str() << std::endl
;
734 ASSERT_EQ(0, strncmp(canary_data
, bl
.c_str(), strlen(canary_data
)));
740 TEST(BlueFS
, test_replay
) {
741 uint64_t size
= 1048576 * 128;
743 g_ceph_context
->_conf
.set_val(
746 g_ceph_context
->_conf
.set_val(
747 "bluefs_compact_log_sync",
750 BlueFS
fs(g_ceph_context
);
751 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
753 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
754 ASSERT_EQ(0, fs
.mount());
755 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
757 std::vector
<std::thread
> write_threads
;
758 uint64_t effective_size
= size
- (32 * 1048576); // leaving the last 32 MB for log compaction
759 uint64_t per_thread_bytes
= (effective_size
/(NUM_WRITERS
));
760 for (int i
=0; i
<NUM_WRITERS
; i
++) {
761 write_threads
.push_back(std::thread(write_data
, std::ref(fs
), per_thread_bytes
));
764 std::vector
<std::thread
> sync_threads
;
765 for (int i
=0; i
<NUM_SYNC_THREADS
; i
++) {
766 sync_threads
.push_back(std::thread(sync_fs
, std::ref(fs
)));
769 join_all(write_threads
);
771 join_all(sync_threads
);
775 // remount and check log can replay safe?
776 ASSERT_EQ(0, fs
.mount());
777 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
781 TEST(BlueFS
, test_replay_growth
) {
782 uint64_t size
= 1048576LL * (2 * 1024 + 128);
785 ConfSaver
conf(g_ceph_context
->_conf
);
786 conf
.SetVal("bluefs_alloc_size", "4096");
787 conf
.SetVal("bluefs_shared_alloc_size", "4096");
788 conf
.SetVal("bluefs_compact_log_sync", "false");
789 conf
.SetVal("bluefs_min_log_runway", "32768");
790 conf
.SetVal("bluefs_max_log_runway", "65536");
791 conf
.SetVal("bluefs_allocator", "stupid");
792 conf
.SetVal("bluefs_sync_write", "true");
795 BlueFS
fs(g_ceph_context
);
796 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
798 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
799 ASSERT_EQ(0, fs
.mount());
800 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
801 ASSERT_EQ(0, fs
.mkdir("dir"));
804 BlueFS::FileWriter
*h
;
805 ASSERT_EQ(0, fs
.open_for_write("dir", "file", &h
, false));
806 for (size_t i
= 0; i
< 10000; i
++) {
807 h
->append(data
, 2000);
811 fs
.umount(true); //do not compact on exit!
813 // remount and check log can replay safe?
814 ASSERT_EQ(0, fs
.mount());
815 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
819 TEST(BlueFS
, test_tracker_50965
) {
820 uint64_t size_wal
= 1048576 * 64;
821 TempBdev bdev_wal
{size_wal
};
822 uint64_t size_db
= 1048576 * 128;
823 TempBdev bdev_db
{size_db
};
824 uint64_t size_slow
= 1048576 * 256;
825 TempBdev bdev_slow
{size_slow
};
827 ConfSaver
conf(g_ceph_context
->_conf
);
828 conf
.SetVal("bluefs_min_flush_size", "65536");
831 BlueFS
fs(g_ceph_context
);
832 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_WAL
, bdev_wal
.path
, false, 0));
833 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev_db
.path
, false, 0));
834 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_SLOW
, bdev_slow
.path
, false, 0));
836 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, true, true }));
837 ASSERT_EQ(0, fs
.mount());
838 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, true, true }));
840 string dir_slow
= "dir.slow";
841 ASSERT_EQ(0, fs
.mkdir(dir_slow
));
842 string dir_db
= "dir_db";
843 ASSERT_EQ(0, fs
.mkdir(dir_db
));
845 string file_slow
= "file";
846 BlueFS::FileWriter
*h_slow
;
847 ASSERT_EQ(0, fs
.open_for_write(dir_slow
, file_slow
, &h_slow
, false));
848 ASSERT_NE(nullptr, h_slow
);
850 string file_db
= "file";
851 BlueFS::FileWriter
*h_db
;
852 ASSERT_EQ(0, fs
.open_for_write(dir_db
, file_db
, &h_db
, false));
853 ASSERT_NE(nullptr, h_db
);
856 std::unique_ptr
<char[]> buf1
= gen_buffer(70000);
857 bufferptr bp1
= buffer::claim_char(70000, buf1
.get());
859 h_slow
->append(bl1
.c_str(), bl1
.length());
862 uint64_t h_slow_dirty_seq_1
= fs
.debug_get_dirty_seq(h_slow
);
865 std::unique_ptr
<char[]> buf2
= gen_buffer(1000);
866 bufferptr bp2
= buffer::claim_char(1000, buf2
.get());
868 h_db
->append(bl2
.c_str(), bl2
.length());
871 uint64_t h_slow_dirty_seq_2
= fs
.debug_get_dirty_seq(h_slow
);
872 bool h_slow_dev_dirty
= fs
.debug_get_is_dev_dirty(h_slow
, BlueFS::BDEV_SLOW
);
874 //problem if allocations are stable in log but slow device is not flushed yet
875 ASSERT_FALSE(h_slow_dirty_seq_1
!= 0 &&
876 h_slow_dirty_seq_2
== 0 &&
877 h_slow_dev_dirty
== true);
879 fs
.close_writer(h_slow
);
880 fs
.close_writer(h_db
);
885 TEST(BlueFS
, test_truncate_stable_53129
) {
887 ConfSaver
conf(g_ceph_context
->_conf
);
888 conf
.SetVal("bluefs_min_flush_size", "65536");
891 uint64_t size_wal
= 1048576 * 64;
892 TempBdev bdev_wal
{size_wal
};
893 uint64_t size_db
= 1048576 * 128;
894 TempBdev bdev_db
{size_db
};
895 uint64_t size_slow
= 1048576 * 256;
896 TempBdev bdev_slow
{size_slow
};
898 BlueFS
fs(g_ceph_context
);
899 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_WAL
, bdev_wal
.path
, false, 0));
900 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev_db
.path
, false, 0));
901 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_SLOW
, bdev_slow
.path
, false, 0));
903 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, true, true }));
904 ASSERT_EQ(0, fs
.mount());
905 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, true, true }));
907 string dir_slow
= "dir.slow";
908 ASSERT_EQ(0, fs
.mkdir(dir_slow
));
909 string dir_db
= "dir_db";
910 ASSERT_EQ(0, fs
.mkdir(dir_db
));
912 string file_slow
= "file";
913 BlueFS::FileWriter
*h_slow
;
914 ASSERT_EQ(0, fs
.open_for_write(dir_slow
, file_slow
, &h_slow
, false));
915 ASSERT_NE(nullptr, h_slow
);
917 string file_db
= "file";
918 BlueFS::FileWriter
*h_db
;
919 ASSERT_EQ(0, fs
.open_for_write(dir_db
, file_db
, &h_db
, false));
920 ASSERT_NE(nullptr, h_db
);
923 std::unique_ptr
<char[]> buf1
= gen_buffer(70000);
924 bufferptr bp1
= buffer::claim_char(70000, buf1
.get());
927 h_slow
->append(bl1
.c_str(), bl1
.length());
929 // and truncate to 60000 bytes
930 fs
.truncate(h_slow
, 60000);
932 // write something to file on DB device
934 std::unique_ptr
<char[]> buf2
= gen_buffer(1000);
935 bufferptr bp2
= buffer::claim_char(1000, buf2
.get());
937 h_db
->append(bl2
.c_str(), bl2
.length());
938 // and force bluefs log to flush
941 // This is the actual test point.
942 // We completed truncate, and we expect
943 // - size to be 60000
944 // - data to be stable on slow device
946 // - size = 0 or file does not exist
947 // - dev_dirty is irrelevant
948 bool h_slow_dev_dirty
= fs
.debug_get_is_dev_dirty(h_slow
, BlueFS::BDEV_SLOW
);
949 // Imagine power goes down here.
951 fs
.close_writer(h_slow
);
952 fs
.close_writer(h_db
);
956 ASSERT_EQ(0, fs
.mount());
957 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, true, true }));
961 ASSERT_EQ(0, fs
.stat("dir.slow", "file", &size
, &mtime
));
962 // check file size 60000
963 ASSERT_EQ(size
, 60000);
964 // check that dev_dirty was false (data stable on media)
965 ASSERT_EQ(h_slow_dev_dirty
, false);
970 TEST(BlueFS
, test_update_ino1_delta_after_replay
) {
971 uint64_t size
= 1048576LL * (2 * 1024 + 128);
974 ConfSaver
conf(g_ceph_context
->_conf
);
975 conf
.SetVal("bluefs_alloc_size", "4096");
976 conf
.SetVal("bluefs_shared_alloc_size", "4096");
977 conf
.SetVal("bluefs_compact_log_sync", "false");
978 conf
.SetVal("bluefs_min_log_runway", "32768");
979 conf
.SetVal("bluefs_max_log_runway", "65536");
980 conf
.SetVal("bluefs_allocator", "stupid");
983 BlueFS
fs(g_ceph_context
);
984 ASSERT_EQ(0, fs
.add_block_device(BlueFS::BDEV_DB
, bdev
.path
, false, 1048576));
986 ASSERT_EQ(0, fs
.mkfs(fsid
, { BlueFS::BDEV_DB
, false, false }));
987 ASSERT_EQ(0, fs
.mount());
988 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
989 ASSERT_EQ(0, fs
.mkdir("dir"));
992 BlueFS::FileWriter
*h
;
993 ASSERT_EQ(0, fs
.open_for_write("dir", "file", &h
, false));
994 for (size_t i
= 0; i
< 100; i
++) {
995 h
->append(data
, 2000);
999 fs
.umount(true); //do not compact on exit!
1001 ASSERT_EQ(0, fs
.mount());
1002 ASSERT_EQ(0, fs
.open_for_write("dir", "file2", &h
, false));
1003 for (size_t i
= 0; i
< 100; i
++) {
1004 h
->append(data
, 2000);
1010 // remount and check log can replay safe?
1011 ASSERT_EQ(0, fs
.mount());
1012 ASSERT_EQ(0, fs
.maybe_verify_layout({ BlueFS::BDEV_DB
, false, false }));
1016 int main(int argc
, char **argv
) {
1017 auto args
= argv_to_vec(argc
, argv
);
1018 map
<string
,string
> defaults
= {
1019 { "debug_bluefs", "1/20" },
1020 { "debug_bdev", "1/20" }
1023 auto cct
= global_init(&defaults
, args
, CEPH_ENTITY_TYPE_CLIENT
,
1024 CODE_ENVIRONMENT_UTILITY
,
1025 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE
);
1026 common_init_finish(g_ceph_context
);
1027 g_ceph_context
->_conf
.set_val(
1028 "enable_experimental_unrecoverable_data_corrupting_features",
1030 g_ceph_context
->_conf
.apply_changes(nullptr);
1032 ::testing::InitGoogleTest(&argc
, argv
);
1033 return RUN_ALL_TESTS();