#include <random>
#include <thread>
#include <stack>
+#include <gtest/gtest.h>
#include "global/global_init.h"
#include "common/ceph_argparse.h"
#include "include/stringify.h"
#include "include/scope_guard.h"
#include "common/errno.h"
-#include <gtest/gtest.h>
+#include "os/bluestore/Allocator.h"
#include "os/bluestore/BlueFS.h"
+using namespace std;
+
std::unique_ptr<char[]> gen_buffer(uint64_t size)
{
std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
delete h;
ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
ASSERT_EQ(h->file->fnode.size, total_written);
- unique_ptr<char> huge_buf(new char[h->file->fnode.size]);
+ auto huge_buf = std::make_unique<char[]>(h->file->fnode.size);
auto l = h->file->fnode.size;
int64_t r = fs.read(h, 0, l, NULL, huge_buf.get());
ASSERT_EQ(r, l);
g_ceph_context->_conf.set_val(
"bluefs_compact_log_sync",
"true");
+ const char* canary_dir = "dir.after_compact_test";
+ const char* canary_file = "file.after_compact_test";
+ const char* canary_data = "some random data";
BlueFS fs(g_ceph_context);
ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
writes_done = true;
join_all(sync_threads);
fs.compact_log();
+
+ {
+ ASSERT_EQ(0, fs.mkdir(canary_dir));
+ BlueFS::FileWriter *h;
+ ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false));
+ ASSERT_NE(nullptr, h);
+ auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
+ h->append(canary_data, strlen(canary_data));
+ int r = fs.fsync(h);
+ ASSERT_EQ(r, 0);
+ }
+ }
+ fs.umount();
+
+ fs.mount();
+ {
+ BlueFS::FileReader *h;
+ ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h));
+ ASSERT_NE(nullptr, h);
+ bufferlist bl;
+ ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL));
+ std::cout << bl.c_str() << std::endl;
+ ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data)));
+ delete h;
}
fs.umount();
}
g_ceph_context->_conf.set_val(
"bluefs_compact_log_sync",
"false");
+ const char* canary_dir = "dir.after_compact_test";
+ const char* canary_file = "file.after_compact_test";
+ const char* canary_data = "some random data";
BlueFS fs(g_ceph_context);
ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
writes_done = true;
join_all(sync_threads);
fs.compact_log();
+
+ {
+ ASSERT_EQ(0, fs.mkdir(canary_dir));
+ BlueFS::FileWriter *h;
+ ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false));
+ ASSERT_NE(nullptr, h);
+ auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
+ h->append(canary_data, strlen(canary_data));
+ int r = fs.fsync(h);
+ ASSERT_EQ(r, 0);
+ }
+ }
+ fs.umount();
+
+ fs.mount();
+ {
+ BlueFS::FileReader *h;
+ ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h));
+ ASSERT_NE(nullptr, h);
+ bufferlist bl;
+ ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL));
+ std::cout << bl.c_str() << std::endl;
+ ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data)));
+ delete h;
}
fs.umount();
}
fs.umount();
}
-int main(int argc, char **argv) {
- vector<const char*> args;
- argv_to_vec(argc, (const char **)argv, args);
+TEST(BlueFS, test_tracker_50965) {
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_slow = "dir.slow";
+ ASSERT_EQ(0, fs.mkdir(dir_slow));
+ string dir_db = "dir_db";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string file_slow = "file";
+ BlueFS::FileWriter *h_slow;
+ ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
+ ASSERT_NE(nullptr, h_slow);
+
+ string file_db = "file";
+ BlueFS::FileWriter *h_db;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
+ ASSERT_NE(nullptr, h_db);
+
+ bufferlist bl1;
+ std::unique_ptr<char[]> buf1 = gen_buffer(70000);
+ bufferptr bp1 = buffer::claim_char(70000, buf1.get());
+ bl1.push_back(bp1);
+ h_slow->append(bl1.c_str(), bl1.length());
+ fs.flush(h_slow);
+
+ uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow);
+
+ bufferlist bl2;
+ std::unique_ptr<char[]> buf2 = gen_buffer(1000);
+ bufferptr bp2 = buffer::claim_char(1000, buf2.get());
+ bl2.push_back(bp2);
+ h_db->append(bl2.c_str(), bl2.length());
+ fs.fsync(h_db);
+
+ uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow);
+ bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
+
+ //problem if allocations are stable in log but slow device is not flushed yet
+ ASSERT_FALSE(h_slow_dirty_seq_1 != 0 &&
+ h_slow_dirty_seq_2 == 0 &&
+ h_slow_dev_dirty == true);
+
+ fs.close_writer(h_slow);
+ fs.close_writer(h_db);
+
+ fs.umount();
+}
+
+TEST(BlueFS, test_truncate_stable_53129) {
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_min_flush_size", "65536");
+ conf.ApplyChanges();
+
+ uint64_t size_wal = 1048576 * 64;
+ TempBdev bdev_wal{size_wal};
+ uint64_t size_db = 1048576 * 128;
+ TempBdev bdev_db{size_db};
+ uint64_t size_slow = 1048576 * 256;
+ TempBdev bdev_slow{size_slow};
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ string dir_slow = "dir.slow";
+ ASSERT_EQ(0, fs.mkdir(dir_slow));
+ string dir_db = "dir_db";
+ ASSERT_EQ(0, fs.mkdir(dir_db));
+
+ string file_slow = "file";
+ BlueFS::FileWriter *h_slow;
+ ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
+ ASSERT_NE(nullptr, h_slow);
+
+ string file_db = "file";
+ BlueFS::FileWriter *h_db;
+ ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
+ ASSERT_NE(nullptr, h_db);
+
+ bufferlist bl1;
+ std::unique_ptr<char[]> buf1 = gen_buffer(70000);
+ bufferptr bp1 = buffer::claim_char(70000, buf1.get());
+ bl1.push_back(bp1);
+ // add 70000 bytes
+ h_slow->append(bl1.c_str(), bl1.length());
+ fs.flush(h_slow);
+ // and truncate to 60000 bytes
+ fs.truncate(h_slow, 60000);
+
+ // write something to file on DB device
+ bufferlist bl2;
+ std::unique_ptr<char[]> buf2 = gen_buffer(1000);
+ bufferptr bp2 = buffer::claim_char(1000, buf2.get());
+ bl2.push_back(bp2);
+ h_db->append(bl2.c_str(), bl2.length());
+ // and force bluefs log to flush
+ fs.fsync(h_db);
+
+ // This is the actual test point.
+ // We completed truncate, and we expect
+ // - size to be 60000
+ // - data to be stable on slow device
+ // OR
+ // - size = 0 or file does not exist
+ // - dev_dirty is irrelevant
+ bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
+ // Imagine power goes down here.
+
+ fs.close_writer(h_slow);
+ fs.close_writer(h_db);
+
+ fs.umount();
+
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
+
+ uint64_t size;
+ utime_t mtime;
+ ASSERT_EQ(0, fs.stat("dir.slow", "file", &size, &mtime));
+ // check file size 60000
+ ASSERT_EQ(size, 60000);
+ // check that dev_dirty was false (data stable on media)
+ ASSERT_EQ(h_slow_dev_dirty, false);
+
+ fs.umount();
+}
+
+TEST(BlueFS, test_update_ino1_delta_after_replay) {
+ uint64_t size = 1048576LL * (2 * 1024 + 128);
+ TempBdev bdev{size};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_alloc_size", "4096");
+ conf.SetVal("bluefs_shared_alloc_size", "4096");
+ conf.SetVal("bluefs_compact_log_sync", "false");
+ conf.SetVal("bluefs_min_log_runway", "32768");
+ conf.SetVal("bluefs_max_log_runway", "65536");
+ conf.SetVal("bluefs_allocator", "stupid");
+ conf.ApplyChanges();
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
+ ASSERT_EQ(0, fs.mkdir("dir"));
+
+ char data[2000];
+ BlueFS::FileWriter *h;
+ ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
+ for (size_t i = 0; i < 100; i++) {
+ h->append(data, 2000);
+ fs.fsync(h);
+ }
+ fs.close_writer(h);
+ fs.umount(true); //do not compact on exit!
+
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false));
+ for (size_t i = 0; i < 100; i++) {
+ h->append(data, 2000);
+ fs.fsync(h);
+ }
+ fs.close_writer(h);
+ fs.umount();
+
+ // remount and check log can replay safe?
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
+ fs.umount();
+}
+
+TEST(BlueFS, test_shared_alloc) {
+ uint64_t size = 1048576 * 128;
+ TempBdev bdev_slow{size};
+ uint64_t size_db = 1048576 * 8;
+ TempBdev bdev_db{size_db};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_shared_alloc_size", "1048576");
+
+ bluefs_shared_alloc_context_t shared_alloc;
+ uint64_t shared_alloc_unit = 4096;
+ shared_alloc.set(
+ Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator,
+ size, shared_alloc_unit, 0, 0, "test shared allocator"),
+ shared_alloc_unit);
+ shared_alloc.a->init_add_free(0, size);
+
+ BlueFS fs(g_ceph_context);
+ // DB device is fully utilized
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, size_db - 0x1000));
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0,
+ &shared_alloc));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
+ {
+ for (int i=0; i<10; i++) {
+ string dir = "dir.";
+ dir.append(to_string(i));
+ ASSERT_EQ(0, fs.mkdir(dir));
+ for (int j=0; j<10; j++) {
+ string file = "file.";
+ file.append(to_string(j));
+ BlueFS::FileWriter *h;
+ ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
+ ASSERT_NE(nullptr, h);
+ auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
+ bufferlist bl;
+ std::unique_ptr<char[]> buf = gen_buffer(4096);
+ bufferptr bp = buffer::claim_char(4096, buf.get());
+ bl.push_back(bp);
+ h->append(bl.c_str(), bl.length());
+ fs.fsync(h);
+ }
+ }
+ }
+ {
+ for (int i=0; i<10; i+=2) {
+ string dir = "dir.";
+ dir.append(to_string(i));
+ for (int j=0; j<10; j++) {
+ string file = "file.";
+ file.append(to_string(j));
+ fs.unlink(dir, file);
+ fs.sync_metadata(false);
+ }
+ ASSERT_EQ(0, fs.rmdir(dir));
+ fs.sync_metadata(false);
+ }
+ }
+ fs.compact_log();
+ auto *logger = fs.get_perf_counters();
+ ASSERT_NE(logger->get(l_bluefs_alloc_shared_dev_fallbacks), 0);
+ auto num_files = logger->get(l_bluefs_num_files);
+ fs.umount();
+ fs.mount();
+ ASSERT_EQ(num_files, logger->get(l_bluefs_num_files));
+ fs.umount();
+}
+TEST(BlueFS, test_shared_alloc_sparse) {
+ uint64_t size = 1048576 * 128 * 2;
+ uint64_t main_unit = 4096;
+ uint64_t bluefs_alloc_unit = 1048576;
+ TempBdev bdev_slow{size};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_shared_alloc_size",
+ stringify(bluefs_alloc_unit).c_str());
+
+ bluefs_shared_alloc_context_t shared_alloc;
+ shared_alloc.set(
+ Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator,
+ size, main_unit, 0, 0, "test shared allocator"),
+ main_unit);
+ // prepare sparse free space but let's have a continuous chunk at
+ // the beginning to fit initial log's fnode into superblock,
+ // we don't have any tricks to deal with sparse allocations
+ // (and hence long fnode) at mkfs
+ shared_alloc.a->init_add_free(bluefs_alloc_unit, 4 * bluefs_alloc_unit);
+ for(uint64_t i = 5 * bluefs_alloc_unit; i < size; i += 2 * main_unit) {
+ shared_alloc.a->init_add_free(i, main_unit);
+ }
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_slow.path, false, 0,
+ &shared_alloc));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
+ {
+ for (int i=0; i<10; i++) {
+ string dir = "dir.";
+ dir.append(to_string(i));
+ ASSERT_EQ(0, fs.mkdir(dir));
+ for (int j=0; j<10; j++) {
+ string file = "file.";
+ file.append(to_string(j));
+ BlueFS::FileWriter *h;
+ ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
+ ASSERT_NE(nullptr, h);
+ auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
+ bufferlist bl;
+ std::unique_ptr<char[]> buf = gen_buffer(4096);
+ bufferptr bp = buffer::claim_char(4096, buf.get());
+ bl.push_back(bp);
+ h->append(bl.c_str(), bl.length());
+ fs.fsync(h);
+ }
+ }
+ }
+ {
+ for (int i=0; i<10; i+=2) {
+ string dir = "dir.";
+ dir.append(to_string(i));
+ for (int j=0; j<10; j++) {
+ string file = "file.";
+ file.append(to_string(j));
+ fs.unlink(dir, file);
+ fs.sync_metadata(false);
+ }
+ ASSERT_EQ(0, fs.rmdir(dir));
+ fs.sync_metadata(false);
+ }
+ }
+ fs.compact_log();
+ auto *logger = fs.get_perf_counters();
+ ASSERT_NE(logger->get(l_bluefs_alloc_shared_size_fallbacks), 0);
+ auto num_files = logger->get(l_bluefs_num_files);
+ fs.umount();
+
+ fs.mount();
+ ASSERT_EQ(num_files, logger->get(l_bluefs_num_files));
+ fs.umount();
+}
+
+TEST(BlueFS, test_4k_shared_alloc) {
+ uint64_t size = 1048576 * 128 * 2;
+ uint64_t main_unit = 4096;
+ uint64_t bluefs_alloc_unit = main_unit;
+ TempBdev bdev_slow{size};
+
+ ConfSaver conf(g_ceph_context->_conf);
+ conf.SetVal("bluefs_shared_alloc_size",
+ stringify(bluefs_alloc_unit).c_str());
+
+ bluefs_shared_alloc_context_t shared_alloc;
+ shared_alloc.set(
+ Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator,
+ size, main_unit, 0, 0, "test shared allocator"),
+ main_unit);
+ shared_alloc.a->init_add_free(bluefs_alloc_unit, size - bluefs_alloc_unit);
+
+ BlueFS fs(g_ceph_context);
+ ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_slow.path, false, 0,
+ &shared_alloc));
+ uuid_d fsid;
+ ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
+ ASSERT_EQ(0, fs.mount());
+ ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
+ {
+ for (int i=0; i<10; i++) {
+ string dir = "dir.";
+ dir.append(to_string(i));
+ ASSERT_EQ(0, fs.mkdir(dir));
+ for (int j=0; j<10; j++) {
+ string file = "file.";
+ file.append(to_string(j));
+ BlueFS::FileWriter *h;
+ ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
+ ASSERT_NE(nullptr, h);
+ auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
+ bufferlist bl;
+ std::unique_ptr<char[]> buf = gen_buffer(4096);
+ bufferptr bp = buffer::claim_char(4096, buf.get());
+ bl.push_back(bp);
+ h->append(bl.c_str(), bl.length());
+ fs.fsync(h);
+ }
+ }
+ }
+ {
+ for (int i=0; i<10; i+=2) {
+ string dir = "dir.";
+ dir.append(to_string(i));
+ for (int j=0; j<10; j++) {
+ string file = "file.";
+ file.append(to_string(j));
+ fs.unlink(dir, file);
+ fs.sync_metadata(false);
+ }
+ ASSERT_EQ(0, fs.rmdir(dir));
+ fs.sync_metadata(false);
+ }
+ }
+ fs.compact_log();
+ auto *logger = fs.get_perf_counters();
+ ASSERT_EQ(logger->get(l_bluefs_alloc_shared_dev_fallbacks), 0);
+ ASSERT_EQ(logger->get(l_bluefs_alloc_shared_size_fallbacks), 0);
+ auto num_files = logger->get(l_bluefs_num_files);
+ fs.umount();
+
+ fs.mount();
+ ASSERT_EQ(num_files, logger->get(l_bluefs_num_files));
+ fs.umount();
+}
+
+int main(int argc, char **argv) {
+ auto args = argv_to_vec(argc, argv);
map<string,string> defaults = {
{ "debug_bluefs", "1/20" },
{ "debug_bdev", "1/20" }