]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/objectstore/test_bluefs.cc
import ceph quincy 17.2.6
[ceph.git] / ceph / src / test / objectstore / test_bluefs.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <stdio.h>
5#include <string.h>
6#include <iostream>
7#include <time.h>
8#include <fcntl.h>
9#include <unistd.h>
91327a77 10#include <random>
7c673cae 11#include <thread>
f6b5b4d7 12#include <stack>
39ae355f 13#include <gtest/gtest.h>
7c673cae
FG
14#include "global/global_init.h"
15#include "common/ceph_argparse.h"
16#include "include/stringify.h"
11fdf7f2 17#include "include/scope_guard.h"
7c673cae 18#include "common/errno.h"
7c673cae 19
39ae355f 20#include "os/bluestore/Allocator.h"
7c673cae
FG
21#include "os/bluestore/BlueFS.h"
22
20effc67
TL
23using namespace std;
24
11fdf7f2 25std::unique_ptr<char[]> gen_buffer(uint64_t size)
7c673cae 26{
11fdf7f2
TL
27 std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
28 std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e;
29 std::generate(buffer.get(), buffer.get()+size, std::ref(e));
7c673cae
FG
30 return buffer;
31}
32
9f95a23c
TL
33class TempBdev {
34public:
35 TempBdev(uint64_t size)
36 : path{get_temp_bdev(size)}
37 {}
38 ~TempBdev() {
39 rm_temp_bdev(path);
40 }
41 const std::string path;
42private:
43 static string get_temp_bdev(uint64_t size)
44 {
45 static int n = 0;
46 string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid())
47 + "." + stringify(++n);
48 int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
49 ceph_assert(fd >= 0);
50 int r = ::ftruncate(fd, size);
51 ceph_assert(r >= 0);
52 ::close(fd);
53 return fn;
54 }
55 static void rm_temp_bdev(string f)
56 {
57 ::unlink(f.c_str());
58 }
59};
7c673cae 60
f6b5b4d7
TL
61class ConfSaver {
62 std::stack<std::pair<std::string, std::string>> saved_settings;
63 ConfigProxy& conf;
64public:
65 ConfSaver(ConfigProxy& conf) : conf(conf) {
66 conf._clear_safe_to_start_threads();
67 };
68 ~ConfSaver() {
69 conf._clear_safe_to_start_threads();
70 while(saved_settings.size() > 0) {
71 auto& e = saved_settings.top();
72 conf.set_val_or_die(e.first, e.second);
73 saved_settings.pop();
74 }
75 conf.set_safe_to_start_threads();
76 conf.apply_changes(nullptr);
77 }
78 void SetVal(const char* key, const char* val) {
79 std::string skey(key);
80 std::string prev_val;
81 conf.get_val(skey, &prev_val);
82 conf.set_val_or_die(skey, val);
83 saved_settings.emplace(skey, prev_val);
84 }
85 void ApplyChanges() {
86 conf.set_safe_to_start_threads();
87 conf.apply_changes(nullptr);
88 }
89};
90
7c673cae
FG
91TEST(BlueFS, mkfs) {
92 uint64_t size = 1048576 * 128;
9f95a23c 93 TempBdev bdev{size};
7c673cae
FG
94 uuid_d fsid;
95 BlueFS fs(g_ceph_context);
f67539c2 96 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
9f95a23c 97 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae
FG
98}
99
100TEST(BlueFS, mkfs_mount) {
101 uint64_t size = 1048576 * 128;
9f95a23c 102 TempBdev bdev{size};
7c673cae 103 BlueFS fs(g_ceph_context);
f67539c2 104 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 105 uuid_d fsid;
9f95a23c 106 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 107 ASSERT_EQ(0, fs.mount());
9f95a23c 108 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
109 ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
110 ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
111 fs.umount();
9f95a23c
TL
112}
113
7c673cae
FG
114TEST(BlueFS, write_read) {
115 uint64_t size = 1048576 * 128;
9f95a23c 116 TempBdev bdev{size};
7c673cae 117 BlueFS fs(g_ceph_context);
f67539c2 118 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 119 uuid_d fsid;
9f95a23c 120 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 121 ASSERT_EQ(0, fs.mount());
9f95a23c 122 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
123 {
124 BlueFS::FileWriter *h;
125 ASSERT_EQ(0, fs.mkdir("dir"));
126 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
127 h->append("foo", 3);
128 h->append("bar", 3);
129 h->append("baz", 3);
130 fs.fsync(h);
131 fs.close_writer(h);
132 }
133 {
134 BlueFS::FileReader *h;
135 ASSERT_EQ(0, fs.open_for_read("dir", "file", &h));
136 bufferlist bl;
f67539c2 137 ASSERT_EQ(9, fs.read(h, 0, 1024, &bl, NULL));
7c673cae
FG
138 ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9));
139 delete h;
140 }
141 fs.umount();
7c673cae
FG
142}
143
144TEST(BlueFS, small_appends) {
145 uint64_t size = 1048576 * 128;
9f95a23c 146 TempBdev bdev{size};
7c673cae 147 BlueFS fs(g_ceph_context);
f67539c2 148 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 149 uuid_d fsid;
9f95a23c 150 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 151 ASSERT_EQ(0, fs.mount());
9f95a23c 152 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
153 {
154 BlueFS::FileWriter *h;
155 ASSERT_EQ(0, fs.mkdir("dir"));
156 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
157 for (unsigned i = 0; i < 10000; ++i) {
158 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
159 }
160 fs.fsync(h);
161 fs.close_writer(h);
162 }
163 {
164 BlueFS::FileWriter *h;
165 ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false));
166 for (unsigned i = 0; i < 1000; ++i) {
167 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
31f18b77 168 ASSERT_EQ(0, fs.fsync(h));
7c673cae
FG
169 }
170 fs.close_writer(h);
171 }
172 fs.umount();
7c673cae
FG
173}
174
494da23a 175TEST(BlueFS, very_large_write) {
adb31ebb 176 // we'll write a ~5G file, so allocate more than that for the whole fs
cd265ab1 177 uint64_t size = 1048576 * 1024 * 6ull;
9f95a23c 178 TempBdev bdev{size};
494da23a
TL
179 BlueFS fs(g_ceph_context);
180
181 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
182 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
adb31ebb 183 uint64_t total_written = 0;
494da23a 184
f67539c2 185 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
494da23a 186 uuid_d fsid;
9f95a23c 187 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
494da23a 188 ASSERT_EQ(0, fs.mount());
9f95a23c 189 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
494da23a
TL
190 char buf[1048571]; // this is biggish, but intentionally not evenly aligned
191 for (unsigned i = 0; i < sizeof(buf); ++i) {
192 buf[i] = i;
193 }
194 {
195 BlueFS::FileWriter *h;
196 ASSERT_EQ(0, fs.mkdir("dir"));
197 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
198 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
199 h->append(buf, sizeof(buf));
adb31ebb
TL
200 total_written += sizeof(buf);
201 }
202 fs.fsync(h);
203 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
204 h->append(buf, sizeof(buf));
205 total_written += sizeof(buf);
494da23a
TL
206 }
207 fs.fsync(h);
208 fs.close_writer(h);
209 }
210 {
211 BlueFS::FileReader *h;
212 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
213 bufferlist bl;
adb31ebb 214 ASSERT_EQ(h->file->fnode.size, total_written);
494da23a
TL
215 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
216 bl.clear();
f67539c2 217 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
494da23a
TL
218 int r = memcmp(buf, bl.c_str(), sizeof(buf));
219 if (r) {
220 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
221 << std::endl;
222 }
223 ASSERT_EQ(0, r);
224 }
adb31ebb
TL
225 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
226 bl.clear();
f67539c2 227 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
adb31ebb
TL
228 int r = memcmp(buf, bl.c_str(), sizeof(buf));
229 if (r) {
230 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
231 << std::endl;
232 }
233 ASSERT_EQ(0, r);
234 }
235 delete h;
236 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
237 ASSERT_EQ(h->file->fnode.size, total_written);
39ae355f 238 auto huge_buf = std::make_unique<char[]>(h->file->fnode.size);
adb31ebb 239 auto l = h->file->fnode.size;
f67539c2
TL
240 int64_t r = fs.read(h, 0, l, NULL, huge_buf.get());
241 ASSERT_EQ(r, l);
cd265ab1
TL
242 delete h;
243 }
244 fs.umount();
245
246 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
247}
248
249TEST(BlueFS, very_large_write2) {
250 // we'll write a ~5G file, so allocate more than that for the whole fs
251 uint64_t size_full = 1048576 * 1024 * 6ull;
252 uint64_t size = 1048576 * 1024 * 5ull;
253 TempBdev bdev{ size_full };
254 BlueFS fs(g_ceph_context);
255
256 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
257 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
258 uint64_t total_written = 0;
259
260 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
cd265ab1
TL
261 uuid_d fsid;
262 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
263 ASSERT_EQ(0, fs.mount());
264 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
265
266 char fill_arr[1 << 20]; // 1M
267 for (size_t i = 0; i < sizeof(fill_arr); ++i) {
268 fill_arr[i] = (char)i;
269 }
270 std::unique_ptr<char[]> buf;
271 buf.reset(new char[size]);
272 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
273 memcpy(buf.get() + i, fill_arr, sizeof(fill_arr));
274 }
275 {
276 BlueFS::FileWriter* h;
277 ASSERT_EQ(0, fs.mkdir("dir"));
278 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
279 fs.append_try_flush(h, buf.get(), size);
280 total_written = size;
281 fs.fsync(h);
282 fs.close_writer(h);
283 }
284 memset(buf.get(), 0, size);
285 {
286 BlueFS::FileReader* h;
287 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
288 ASSERT_EQ(h->file->fnode.size, total_written);
289 auto l = h->file->fnode.size;
f67539c2
TL
290 int64_t r = fs.read(h, 0, l, NULL, buf.get());
291 ASSERT_EQ(r, l);
cd265ab1
TL
292 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
293 ceph_assert(memcmp(buf.get() + i, fill_arr, sizeof(fill_arr)) == 0);
294 }
494da23a
TL
295 delete h;
296 }
297 fs.umount();
298
299 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
494da23a
TL
300}
301
7c673cae
FG
302#define ALLOC_SIZE 4096
303
304void write_data(BlueFS &fs, uint64_t rationed_bytes)
305{
7c673cae
FG
306 int j=0, r=0;
307 uint64_t written_bytes = 0;
308 rationed_bytes -= ALLOC_SIZE;
309 stringstream ss;
310 string dir = "dir.";
311 ss << std::this_thread::get_id();
312 dir.append(ss.str());
313 dir.append(".");
314 dir.append(to_string(j));
315 ASSERT_EQ(0, fs.mkdir(dir));
316 while (1) {
317 string file = "file.";
318 file.append(to_string(j));
11fdf7f2 319 BlueFS::FileWriter *h;
7c673cae 320 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
321 ASSERT_NE(nullptr, h);
322 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 323 bufferlist bl;
11fdf7f2
TL
324 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
325 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
326 bl.push_back(bp);
327 h->append(bl.c_str(), bl.length());
328 r = fs.fsync(h);
329 if (r < 0) {
7c673cae
FG
330 break;
331 }
11fdf7f2 332 written_bytes += g_conf()->bluefs_alloc_size;
7c673cae 333 j++;
11fdf7f2 334 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
335 break;
336 }
337 }
338}
339
340void create_single_file(BlueFS &fs)
341{
342 BlueFS::FileWriter *h;
343 stringstream ss;
344 string dir = "dir.test";
345 ASSERT_EQ(0, fs.mkdir(dir));
346 string file = "testfile";
347 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
348 bufferlist bl;
11fdf7f2
TL
349 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
350 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
351 bl.push_back(bp);
352 h->append(bl.c_str(), bl.length());
353 fs.fsync(h);
354 fs.close_writer(h);
355}
356
357void write_single_file(BlueFS &fs, uint64_t rationed_bytes)
358{
7c673cae 359 stringstream ss;
11fdf7f2
TL
360 const string dir = "dir.test";
361 const string file = "testfile";
7c673cae
FG
362 uint64_t written_bytes = 0;
363 rationed_bytes -= ALLOC_SIZE;
364 while (1) {
11fdf7f2 365 BlueFS::FileWriter *h;
7c673cae 366 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
367 ASSERT_NE(nullptr, h);
368 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 369 bufferlist bl;
11fdf7f2
TL
370 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
371 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
372 bl.push_back(bp);
373 h->append(bl.c_str(), bl.length());
11fdf7f2 374 int r = fs.fsync(h);
7c673cae 375 if (r < 0) {
7c673cae
FG
376 break;
377 }
11fdf7f2
TL
378 written_bytes += g_conf()->bluefs_alloc_size;
379 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
380 break;
381 }
382 }
383}
384
385bool writes_done = false;
386
387void sync_fs(BlueFS &fs)
388{
389 while (1) {
390 if (writes_done == true)
391 break;
1911f103 392 fs.sync_metadata(false);
7c673cae
FG
393 sleep(1);
394 }
395}
396
397
398void do_join(std::thread& t)
399{
400 t.join();
401}
402
403void join_all(std::vector<std::thread>& v)
404{
405 std::for_each(v.begin(),v.end(),do_join);
406}
407
408#define NUM_WRITERS 3
409#define NUM_SYNC_THREADS 1
410
411#define NUM_SINGLE_FILE_WRITERS 1
412#define NUM_MULTIPLE_FILE_WRITERS 2
413
414TEST(BlueFS, test_flush_1) {
415 uint64_t size = 1048576 * 128;
9f95a23c 416 TempBdev bdev{size};
11fdf7f2 417 g_ceph_context->_conf.set_val(
7c673cae
FG
418 "bluefs_alloc_size",
419 "65536");
11fdf7f2 420 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
421
422 BlueFS fs(g_ceph_context);
f67539c2 423 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 424 uuid_d fsid;
9f95a23c 425 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 426 ASSERT_EQ(0, fs.mount());
9f95a23c 427 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
428 {
429 std::vector<std::thread> write_thread_multiple;
430 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
431 uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS));
432 for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) {
433 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
434 }
435
436 create_single_file(fs);
437 std::vector<std::thread> write_thread_single;
438 for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) {
439 write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes));
440 }
441
442 join_all(write_thread_single);
443 join_all(write_thread_multiple);
444 }
445 fs.umount();
7c673cae
FG
446}
447
448TEST(BlueFS, test_flush_2) {
449 uint64_t size = 1048576 * 256;
9f95a23c 450 TempBdev bdev{size};
11fdf7f2 451 g_ceph_context->_conf.set_val(
7c673cae
FG
452 "bluefs_alloc_size",
453 "65536");
11fdf7f2 454 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
455
456 BlueFS fs(g_ceph_context);
f67539c2 457 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 458 uuid_d fsid;
9f95a23c 459 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 460 ASSERT_EQ(0, fs.mount());
9f95a23c 461 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
462 {
463 uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction
464 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
465 std::vector<std::thread> write_thread_multiple;
466 for (int i=0; i<NUM_WRITERS; i++) {
467 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
468 }
469
470 join_all(write_thread_multiple);
471 }
472 fs.umount();
7c673cae
FG
473}
474
475TEST(BlueFS, test_flush_3) {
476 uint64_t size = 1048576 * 256;
9f95a23c 477 TempBdev bdev{size};
11fdf7f2 478 g_ceph_context->_conf.set_val(
7c673cae
FG
479 "bluefs_alloc_size",
480 "65536");
11fdf7f2 481 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
482
483 BlueFS fs(g_ceph_context);
f67539c2 484 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 485 uuid_d fsid;
9f95a23c 486 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 487 ASSERT_EQ(0, fs.mount());
9f95a23c 488 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
489 {
490 std::vector<std::thread> write_threads;
491 uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction
492 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
493 for (int i=0; i<NUM_WRITERS; i++) {
494 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
495 }
496
497 std::vector<std::thread> sync_threads;
498 for (int i=0; i<NUM_SYNC_THREADS; i++) {
499 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
500 }
501
502 join_all(write_threads);
503 writes_done = true;
504 join_all(sync_threads);
505 }
506 fs.umount();
7c673cae
FG
507}
508
509TEST(BlueFS, test_simple_compaction_sync) {
11fdf7f2 510 g_ceph_context->_conf.set_val(
7c673cae
FG
511 "bluefs_compact_log_sync",
512 "true");
513 uint64_t size = 1048576 * 128;
9f95a23c 514 TempBdev bdev{size};
7c673cae
FG
515
516 BlueFS fs(g_ceph_context);
f67539c2 517 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 518 uuid_d fsid;
9f95a23c 519 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 520 ASSERT_EQ(0, fs.mount());
9f95a23c 521 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 522 {
7c673cae
FG
523 for (int i=0; i<10; i++) {
524 string dir = "dir.";
525 dir.append(to_string(i));
526 ASSERT_EQ(0, fs.mkdir(dir));
527 for (int j=0; j<10; j++) {
528 string file = "file.";
529 file.append(to_string(j));
11fdf7f2 530 BlueFS::FileWriter *h;
7c673cae 531 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
532 ASSERT_NE(nullptr, h);
533 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 534 bufferlist bl;
11fdf7f2
TL
535 std::unique_ptr<char[]> buf = gen_buffer(4096);
536 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
537 bl.push_back(bp);
538 h->append(bl.c_str(), bl.length());
539 fs.fsync(h);
7c673cae
FG
540 }
541 }
542 }
7c673cae
FG
543 {
544 for (int i=0; i<10; i+=2) {
545 string dir = "dir.";
546 dir.append(to_string(i));
11fdf7f2 547 for (int j=0; j<10; j++) {
7c673cae
FG
548 string file = "file.";
549 file.append(to_string(j));
550 fs.unlink(dir, file);
1911f103 551 fs.sync_metadata(false);
7c673cae 552 }
11fdf7f2 553 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 554 fs.sync_metadata(false);
7c673cae
FG
555 }
556 }
557 fs.compact_log();
558 fs.umount();
7c673cae
FG
559}
560
561TEST(BlueFS, test_simple_compaction_async) {
11fdf7f2 562 g_ceph_context->_conf.set_val(
7c673cae
FG
563 "bluefs_compact_log_sync",
564 "false");
565 uint64_t size = 1048576 * 128;
9f95a23c 566 TempBdev bdev{size};
7c673cae
FG
567
568 BlueFS fs(g_ceph_context);
f67539c2 569 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 570 uuid_d fsid;
9f95a23c 571 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 572 ASSERT_EQ(0, fs.mount());
9f95a23c 573 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 574 {
7c673cae
FG
575 for (int i=0; i<10; i++) {
576 string dir = "dir.";
577 dir.append(to_string(i));
578 ASSERT_EQ(0, fs.mkdir(dir));
579 for (int j=0; j<10; j++) {
580 string file = "file.";
581 file.append(to_string(j));
11fdf7f2 582 BlueFS::FileWriter *h;
7c673cae 583 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
584 ASSERT_NE(nullptr, h);
585 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 586 bufferlist bl;
11fdf7f2
TL
587 std::unique_ptr<char[]> buf = gen_buffer(4096);
588 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
589 bl.push_back(bp);
590 h->append(bl.c_str(), bl.length());
591 fs.fsync(h);
7c673cae
FG
592 }
593 }
594 }
7c673cae
FG
595 {
596 for (int i=0; i<10; i+=2) {
597 string dir = "dir.";
598 dir.append(to_string(i));
11fdf7f2 599 for (int j=0; j<10; j++) {
7c673cae
FG
600 string file = "file.";
601 file.append(to_string(j));
602 fs.unlink(dir, file);
1911f103 603 fs.sync_metadata(false);
7c673cae 604 }
11fdf7f2 605 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 606 fs.sync_metadata(false);
7c673cae
FG
607 }
608 }
609 fs.compact_log();
610 fs.umount();
7c673cae
FG
611}
612
613TEST(BlueFS, test_compaction_sync) {
614 uint64_t size = 1048576 * 128;
9f95a23c 615 TempBdev bdev{size};
11fdf7f2 616 g_ceph_context->_conf.set_val(
7c673cae
FG
617 "bluefs_alloc_size",
618 "65536");
11fdf7f2 619 g_ceph_context->_conf.set_val(
7c673cae
FG
620 "bluefs_compact_log_sync",
621 "true");
1d09f67e
TL
622 const char* canary_dir = "dir.after_compact_test";
623 const char* canary_file = "file.after_compact_test";
624 const char* canary_data = "some random data";
7c673cae
FG
625
626 BlueFS fs(g_ceph_context);
f67539c2 627 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 628 uuid_d fsid;
9f95a23c 629 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 630 ASSERT_EQ(0, fs.mount());
9f95a23c 631 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
632 {
633 std::vector<std::thread> write_threads;
634 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
635 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
636 for (int i=0; i<NUM_WRITERS; i++) {
637 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
638 }
639
640 std::vector<std::thread> sync_threads;
641 for (int i=0; i<NUM_SYNC_THREADS; i++) {
642 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
643 }
644
645 join_all(write_threads);
646 writes_done = true;
647 join_all(sync_threads);
648 fs.compact_log();
1d09f67e
TL
649
650 {
651 ASSERT_EQ(0, fs.mkdir(canary_dir));
652 BlueFS::FileWriter *h;
653 ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false));
654 ASSERT_NE(nullptr, h);
655 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
656 h->append(canary_data, strlen(canary_data));
657 int r = fs.fsync(h);
658 ASSERT_EQ(r, 0);
659 }
660 }
661 fs.umount();
662
663 fs.mount();
664 {
665 BlueFS::FileReader *h;
666 ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h));
667 ASSERT_NE(nullptr, h);
668 bufferlist bl;
669 ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL));
670 std::cout << bl.c_str() << std::endl;
671 ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data)));
672 delete h;
7c673cae
FG
673 }
674 fs.umount();
7c673cae
FG
675}
676
677TEST(BlueFS, test_compaction_async) {
678 uint64_t size = 1048576 * 128;
9f95a23c 679 TempBdev bdev{size};
11fdf7f2 680 g_ceph_context->_conf.set_val(
7c673cae
FG
681 "bluefs_alloc_size",
682 "65536");
11fdf7f2 683 g_ceph_context->_conf.set_val(
7c673cae
FG
684 "bluefs_compact_log_sync",
685 "false");
1d09f67e
TL
686 const char* canary_dir = "dir.after_compact_test";
687 const char* canary_file = "file.after_compact_test";
688 const char* canary_data = "some random data";
7c673cae
FG
689
690 BlueFS fs(g_ceph_context);
f67539c2 691 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 692 uuid_d fsid;
9f95a23c 693 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 694 ASSERT_EQ(0, fs.mount());
9f95a23c 695 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
696 {
697 std::vector<std::thread> write_threads;
698 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
699 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
700 for (int i=0; i<NUM_WRITERS; i++) {
701 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
702 }
703
704 std::vector<std::thread> sync_threads;
705 for (int i=0; i<NUM_SYNC_THREADS; i++) {
706 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
707 }
708
709 join_all(write_threads);
710 writes_done = true;
711 join_all(sync_threads);
712 fs.compact_log();
1d09f67e
TL
713
714 {
715 ASSERT_EQ(0, fs.mkdir(canary_dir));
716 BlueFS::FileWriter *h;
717 ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false));
718 ASSERT_NE(nullptr, h);
719 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
720 h->append(canary_data, strlen(canary_data));
721 int r = fs.fsync(h);
722 ASSERT_EQ(r, 0);
723 }
724 }
725 fs.umount();
726
727 fs.mount();
728 {
729 BlueFS::FileReader *h;
730 ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h));
731 ASSERT_NE(nullptr, h);
732 bufferlist bl;
733 ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL));
734 std::cout << bl.c_str() << std::endl;
735 ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data)));
736 delete h;
7c673cae
FG
737 }
738 fs.umount();
7c673cae
FG
739}
740
741TEST(BlueFS, test_replay) {
742 uint64_t size = 1048576 * 128;
9f95a23c 743 TempBdev bdev{size};
11fdf7f2 744 g_ceph_context->_conf.set_val(
7c673cae
FG
745 "bluefs_alloc_size",
746 "65536");
11fdf7f2 747 g_ceph_context->_conf.set_val(
7c673cae
FG
748 "bluefs_compact_log_sync",
749 "false");
750
751 BlueFS fs(g_ceph_context);
f67539c2 752 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 753 uuid_d fsid;
9f95a23c 754 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 755 ASSERT_EQ(0, fs.mount());
9f95a23c 756 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
757 {
758 std::vector<std::thread> write_threads;
759 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
760 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
761 for (int i=0; i<NUM_WRITERS; i++) {
762 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
763 }
764
765 std::vector<std::thread> sync_threads;
766 for (int i=0; i<NUM_SYNC_THREADS; i++) {
767 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
768 }
769
770 join_all(write_threads);
771 writes_done = true;
772 join_all(sync_threads);
773 fs.compact_log();
774 }
775 fs.umount();
776 // remount and check log can replay safe?
31f18b77 777 ASSERT_EQ(0, fs.mount());
9f95a23c 778 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 779 fs.umount();
7c673cae
FG
780}
781
f6b5b4d7
TL
782TEST(BlueFS, test_replay_growth) {
783 uint64_t size = 1048576LL * (2 * 1024 + 128);
784 TempBdev bdev{size};
785
786 ConfSaver conf(g_ceph_context->_conf);
787 conf.SetVal("bluefs_alloc_size", "4096");
788 conf.SetVal("bluefs_shared_alloc_size", "4096");
789 conf.SetVal("bluefs_compact_log_sync", "false");
790 conf.SetVal("bluefs_min_log_runway", "32768");
791 conf.SetVal("bluefs_max_log_runway", "65536");
792 conf.SetVal("bluefs_allocator", "stupid");
793 conf.SetVal("bluefs_sync_write", "true");
794 conf.ApplyChanges();
795
796 BlueFS fs(g_ceph_context);
f67539c2 797 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
f6b5b4d7
TL
798 uuid_d fsid;
799 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
800 ASSERT_EQ(0, fs.mount());
801 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
802 ASSERT_EQ(0, fs.mkdir("dir"));
803
804 char data[2000];
805 BlueFS::FileWriter *h;
806 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
807 for (size_t i = 0; i < 10000; i++) {
808 h->append(data, 2000);
809 fs.fsync(h);
810 }
811 fs.close_writer(h);
812 fs.umount(true); //do not compact on exit!
813
814 // remount and check log can replay safe?
815 ASSERT_EQ(0, fs.mount());
816 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
817 fs.umount();
818}
819
522d829b
TL
820TEST(BlueFS, test_tracker_50965) {
821 uint64_t size_wal = 1048576 * 64;
822 TempBdev bdev_wal{size_wal};
823 uint64_t size_db = 1048576 * 128;
824 TempBdev bdev_db{size_db};
825 uint64_t size_slow = 1048576 * 256;
826 TempBdev bdev_slow{size_slow};
827
828 ConfSaver conf(g_ceph_context->_conf);
829 conf.SetVal("bluefs_min_flush_size", "65536");
830 conf.ApplyChanges();
831
832 BlueFS fs(g_ceph_context);
833 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
834 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
835 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
836 uuid_d fsid;
837 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
838 ASSERT_EQ(0, fs.mount());
839 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
840
841 string dir_slow = "dir.slow";
842 ASSERT_EQ(0, fs.mkdir(dir_slow));
843 string dir_db = "dir_db";
844 ASSERT_EQ(0, fs.mkdir(dir_db));
845
846 string file_slow = "file";
847 BlueFS::FileWriter *h_slow;
848 ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
849 ASSERT_NE(nullptr, h_slow);
850
851 string file_db = "file";
852 BlueFS::FileWriter *h_db;
853 ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
854 ASSERT_NE(nullptr, h_db);
855
856 bufferlist bl1;
857 std::unique_ptr<char[]> buf1 = gen_buffer(70000);
858 bufferptr bp1 = buffer::claim_char(70000, buf1.get());
859 bl1.push_back(bp1);
860 h_slow->append(bl1.c_str(), bl1.length());
861 fs.flush(h_slow);
862
863 uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow);
864
865 bufferlist bl2;
866 std::unique_ptr<char[]> buf2 = gen_buffer(1000);
867 bufferptr bp2 = buffer::claim_char(1000, buf2.get());
868 bl2.push_back(bp2);
869 h_db->append(bl2.c_str(), bl2.length());
870 fs.fsync(h_db);
871
872 uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow);
873 bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
874
875 //problem if allocations are stable in log but slow device is not flushed yet
876 ASSERT_FALSE(h_slow_dirty_seq_1 != 0 &&
877 h_slow_dirty_seq_2 == 0 &&
878 h_slow_dev_dirty == true);
879
880 fs.close_writer(h_slow);
881 fs.close_writer(h_db);
882
883 fs.umount();
884}
885
20effc67
TL
886TEST(BlueFS, test_truncate_stable_53129) {
887
888 ConfSaver conf(g_ceph_context->_conf);
889 conf.SetVal("bluefs_min_flush_size", "65536");
890 conf.ApplyChanges();
891
892 uint64_t size_wal = 1048576 * 64;
893 TempBdev bdev_wal{size_wal};
894 uint64_t size_db = 1048576 * 128;
895 TempBdev bdev_db{size_db};
896 uint64_t size_slow = 1048576 * 256;
897 TempBdev bdev_slow{size_slow};
898
899 BlueFS fs(g_ceph_context);
900 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
901 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
902 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
903 uuid_d fsid;
904 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
905 ASSERT_EQ(0, fs.mount());
906 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
907
908 string dir_slow = "dir.slow";
909 ASSERT_EQ(0, fs.mkdir(dir_slow));
910 string dir_db = "dir_db";
911 ASSERT_EQ(0, fs.mkdir(dir_db));
912
913 string file_slow = "file";
914 BlueFS::FileWriter *h_slow;
915 ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
916 ASSERT_NE(nullptr, h_slow);
917
918 string file_db = "file";
919 BlueFS::FileWriter *h_db;
920 ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
921 ASSERT_NE(nullptr, h_db);
922
923 bufferlist bl1;
924 std::unique_ptr<char[]> buf1 = gen_buffer(70000);
925 bufferptr bp1 = buffer::claim_char(70000, buf1.get());
926 bl1.push_back(bp1);
927 // add 70000 bytes
928 h_slow->append(bl1.c_str(), bl1.length());
929 fs.flush(h_slow);
930 // and truncate to 60000 bytes
931 fs.truncate(h_slow, 60000);
932
933 // write something to file on DB device
934 bufferlist bl2;
935 std::unique_ptr<char[]> buf2 = gen_buffer(1000);
936 bufferptr bp2 = buffer::claim_char(1000, buf2.get());
937 bl2.push_back(bp2);
938 h_db->append(bl2.c_str(), bl2.length());
939 // and force bluefs log to flush
940 fs.fsync(h_db);
941
942 // This is the actual test point.
943 // We completed truncate, and we expect
944 // - size to be 60000
945 // - data to be stable on slow device
946 // OR
947 // - size = 0 or file does not exist
948 // - dev_dirty is irrelevant
949 bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
950 // Imagine power goes down here.
951
952 fs.close_writer(h_slow);
953 fs.close_writer(h_db);
954
955 fs.umount();
956
957 ASSERT_EQ(0, fs.mount());
958 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
959
960 uint64_t size;
961 utime_t mtime;
962 ASSERT_EQ(0, fs.stat("dir.slow", "file", &size, &mtime));
963 // check file size 60000
964 ASSERT_EQ(size, 60000);
965 // check that dev_dirty was false (data stable on media)
966 ASSERT_EQ(h_slow_dev_dirty, false);
967
968 fs.umount();
969}
970
971TEST(BlueFS, test_update_ino1_delta_after_replay) {
972 uint64_t size = 1048576LL * (2 * 1024 + 128);
973 TempBdev bdev{size};
974
975 ConfSaver conf(g_ceph_context->_conf);
976 conf.SetVal("bluefs_alloc_size", "4096");
977 conf.SetVal("bluefs_shared_alloc_size", "4096");
978 conf.SetVal("bluefs_compact_log_sync", "false");
979 conf.SetVal("bluefs_min_log_runway", "32768");
980 conf.SetVal("bluefs_max_log_runway", "65536");
981 conf.SetVal("bluefs_allocator", "stupid");
982 conf.ApplyChanges();
983
984 BlueFS fs(g_ceph_context);
985 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
986 uuid_d fsid;
987 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
988 ASSERT_EQ(0, fs.mount());
989 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
990 ASSERT_EQ(0, fs.mkdir("dir"));
991
992 char data[2000];
993 BlueFS::FileWriter *h;
994 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
995 for (size_t i = 0; i < 100; i++) {
996 h->append(data, 2000);
997 fs.fsync(h);
998 }
999 fs.close_writer(h);
1000 fs.umount(true); //do not compact on exit!
7c673cae 1001
20effc67
TL
1002 ASSERT_EQ(0, fs.mount());
1003 ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false));
1004 for (size_t i = 0; i < 100; i++) {
1005 h->append(data, 2000);
1006 fs.fsync(h);
1007 }
1008 fs.close_writer(h);
1009 fs.umount();
1010
1011 // remount and check log can replay safe?
1012 ASSERT_EQ(0, fs.mount());
1013 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
1014 fs.umount();
1015}
1016
39ae355f
TL
1017TEST(BlueFS, test_shared_alloc) {
1018 uint64_t size = 1048576 * 128;
1019 TempBdev bdev_slow{size};
1020 uint64_t size_db = 1048576 * 8;
1021 TempBdev bdev_db{size_db};
1022
1023 ConfSaver conf(g_ceph_context->_conf);
1024 conf.SetVal("bluefs_shared_alloc_size", "1048576");
1025
1026 bluefs_shared_alloc_context_t shared_alloc;
1027 uint64_t shared_alloc_unit = 4096;
1028 shared_alloc.set(
1029 Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator,
1030 size, shared_alloc_unit, 0, 0, "test shared allocator"),
1031 shared_alloc_unit);
1032 shared_alloc.a->init_add_free(0, size);
1033
1034 BlueFS fs(g_ceph_context);
1035 // DB device is fully utilized
1036 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, size_db - 0x1000));
1037 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0,
1038 &shared_alloc));
1039 uuid_d fsid;
1040 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
1041 ASSERT_EQ(0, fs.mount());
1042 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
1043 {
1044 for (int i=0; i<10; i++) {
1045 string dir = "dir.";
1046 dir.append(to_string(i));
1047 ASSERT_EQ(0, fs.mkdir(dir));
1048 for (int j=0; j<10; j++) {
1049 string file = "file.";
1050 file.append(to_string(j));
1051 BlueFS::FileWriter *h;
1052 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
1053 ASSERT_NE(nullptr, h);
1054 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
1055 bufferlist bl;
1056 std::unique_ptr<char[]> buf = gen_buffer(4096);
1057 bufferptr bp = buffer::claim_char(4096, buf.get());
1058 bl.push_back(bp);
1059 h->append(bl.c_str(), bl.length());
1060 fs.fsync(h);
1061 }
1062 }
1063 }
1064 {
1065 for (int i=0; i<10; i+=2) {
1066 string dir = "dir.";
1067 dir.append(to_string(i));
1068 for (int j=0; j<10; j++) {
1069 string file = "file.";
1070 file.append(to_string(j));
1071 fs.unlink(dir, file);
1072 fs.sync_metadata(false);
1073 }
1074 ASSERT_EQ(0, fs.rmdir(dir));
1075 fs.sync_metadata(false);
1076 }
1077 }
1078 fs.compact_log();
1079 auto *logger = fs.get_perf_counters();
1080 ASSERT_NE(logger->get(l_bluefs_alloc_shared_dev_fallbacks), 0);
1081 auto num_files = logger->get(l_bluefs_num_files);
1082 fs.umount();
1083 fs.mount();
1084 ASSERT_EQ(num_files, logger->get(l_bluefs_num_files));
1085 fs.umount();
1086}
1087
1088TEST(BlueFS, test_shared_alloc_sparse) {
1089 uint64_t size = 1048576 * 128 * 2;
1090 uint64_t main_unit = 4096;
1091 uint64_t bluefs_alloc_unit = 1048576;
1092 TempBdev bdev_slow{size};
1093
1094 ConfSaver conf(g_ceph_context->_conf);
1095 conf.SetVal("bluefs_shared_alloc_size",
1096 stringify(bluefs_alloc_unit).c_str());
1097
1098 bluefs_shared_alloc_context_t shared_alloc;
1099 shared_alloc.set(
1100 Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator,
1101 size, main_unit, 0, 0, "test shared allocator"),
1102 main_unit);
1103 // prepare sparse free space but let's have a continuous chunk at
1104 // the beginning to fit initial log's fnode into superblock,
1105 // we don't have any tricks to deal with sparse allocations
1106 // (and hence long fnode) at mkfs
1107 shared_alloc.a->init_add_free(bluefs_alloc_unit, 4 * bluefs_alloc_unit);
1108 for(uint64_t i = 5 * bluefs_alloc_unit; i < size; i += 2 * main_unit) {
1109 shared_alloc.a->init_add_free(i, main_unit);
1110 }
1111
1112 BlueFS fs(g_ceph_context);
1113 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_slow.path, false, 0,
1114 &shared_alloc));
1115 uuid_d fsid;
1116 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
1117 ASSERT_EQ(0, fs.mount());
1118 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
1119 {
1120 for (int i=0; i<10; i++) {
1121 string dir = "dir.";
1122 dir.append(to_string(i));
1123 ASSERT_EQ(0, fs.mkdir(dir));
1124 for (int j=0; j<10; j++) {
1125 string file = "file.";
1126 file.append(to_string(j));
1127 BlueFS::FileWriter *h;
1128 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
1129 ASSERT_NE(nullptr, h);
1130 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
1131 bufferlist bl;
1132 std::unique_ptr<char[]> buf = gen_buffer(4096);
1133 bufferptr bp = buffer::claim_char(4096, buf.get());
1134 bl.push_back(bp);
1135 h->append(bl.c_str(), bl.length());
1136 fs.fsync(h);
1137 }
1138 }
1139 }
1140 {
1141 for (int i=0; i<10; i+=2) {
1142 string dir = "dir.";
1143 dir.append(to_string(i));
1144 for (int j=0; j<10; j++) {
1145 string file = "file.";
1146 file.append(to_string(j));
1147 fs.unlink(dir, file);
1148 fs.sync_metadata(false);
1149 }
1150 ASSERT_EQ(0, fs.rmdir(dir));
1151 fs.sync_metadata(false);
1152 }
1153 }
1154 fs.compact_log();
1155 auto *logger = fs.get_perf_counters();
1156 ASSERT_NE(logger->get(l_bluefs_alloc_shared_size_fallbacks), 0);
1157 auto num_files = logger->get(l_bluefs_num_files);
1158 fs.umount();
1159
1160 fs.mount();
1161 ASSERT_EQ(num_files, logger->get(l_bluefs_num_files));
1162 fs.umount();
1163}
1164
1165TEST(BlueFS, test_4k_shared_alloc) {
1166 uint64_t size = 1048576 * 128 * 2;
1167 uint64_t main_unit = 4096;
1168 uint64_t bluefs_alloc_unit = main_unit;
1169 TempBdev bdev_slow{size};
1170
1171 ConfSaver conf(g_ceph_context->_conf);
1172 conf.SetVal("bluefs_shared_alloc_size",
1173 stringify(bluefs_alloc_unit).c_str());
1174
1175 bluefs_shared_alloc_context_t shared_alloc;
1176 shared_alloc.set(
1177 Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator,
1178 size, main_unit, 0, 0, "test shared allocator"),
1179 main_unit);
1180 shared_alloc.a->init_add_free(bluefs_alloc_unit, size - bluefs_alloc_unit);
1181
1182 BlueFS fs(g_ceph_context);
1183 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_slow.path, false, 0,
1184 &shared_alloc));
1185 uuid_d fsid;
1186 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
1187 ASSERT_EQ(0, fs.mount());
1188 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
1189 {
1190 for (int i=0; i<10; i++) {
1191 string dir = "dir.";
1192 dir.append(to_string(i));
1193 ASSERT_EQ(0, fs.mkdir(dir));
1194 for (int j=0; j<10; j++) {
1195 string file = "file.";
1196 file.append(to_string(j));
1197 BlueFS::FileWriter *h;
1198 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
1199 ASSERT_NE(nullptr, h);
1200 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
1201 bufferlist bl;
1202 std::unique_ptr<char[]> buf = gen_buffer(4096);
1203 bufferptr bp = buffer::claim_char(4096, buf.get());
1204 bl.push_back(bp);
1205 h->append(bl.c_str(), bl.length());
1206 fs.fsync(h);
1207 }
1208 }
1209 }
1210 {
1211 for (int i=0; i<10; i+=2) {
1212 string dir = "dir.";
1213 dir.append(to_string(i));
1214 for (int j=0; j<10; j++) {
1215 string file = "file.";
1216 file.append(to_string(j));
1217 fs.unlink(dir, file);
1218 fs.sync_metadata(false);
1219 }
1220 ASSERT_EQ(0, fs.rmdir(dir));
1221 fs.sync_metadata(false);
1222 }
1223 }
1224 fs.compact_log();
1225 auto *logger = fs.get_perf_counters();
1226 ASSERT_EQ(logger->get(l_bluefs_alloc_shared_dev_fallbacks), 0);
1227 ASSERT_EQ(logger->get(l_bluefs_alloc_shared_size_fallbacks), 0);
1228 auto num_files = logger->get(l_bluefs_num_files);
1229 fs.umount();
1230
1231 fs.mount();
1232 ASSERT_EQ(num_files, logger->get(l_bluefs_num_files));
1233 fs.umount();
1234}
1235
20effc67
TL
1236int main(int argc, char **argv) {
1237 auto args = argv_to_vec(argc, argv);
11fdf7f2
TL
1238 map<string,string> defaults = {
1239 { "debug_bluefs", "1/20" },
1240 { "debug_bdev", "1/20" }
1241 };
7c673cae 1242
11fdf7f2 1243 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
7c673cae 1244 CODE_ENVIRONMENT_UTILITY,
11fdf7f2 1245 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
7c673cae 1246 common_init_finish(g_ceph_context);
11fdf7f2 1247 g_ceph_context->_conf.set_val(
7c673cae
FG
1248 "enable_experimental_unrecoverable_data_corrupting_features",
1249 "*");
11fdf7f2 1250 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
1251
1252 ::testing::InitGoogleTest(&argc, argv);
1253 return RUN_ALL_TESTS();
1254}