]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/objectstore/test_bluefs.cc
use the buster suite for getting the source package for now
[ceph.git] / ceph / src / test / objectstore / test_bluefs.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <stdio.h>
5#include <string.h>
6#include <iostream>
7#include <time.h>
8#include <fcntl.h>
9#include <unistd.h>
91327a77 10#include <random>
7c673cae 11#include <thread>
f6b5b4d7 12#include <stack>
7c673cae
FG
13#include "global/global_init.h"
14#include "common/ceph_argparse.h"
15#include "include/stringify.h"
11fdf7f2 16#include "include/scope_guard.h"
7c673cae
FG
17#include "common/errno.h"
18#include <gtest/gtest.h>
19
20#include "os/bluestore/BlueFS.h"
21
11fdf7f2 22std::unique_ptr<char[]> gen_buffer(uint64_t size)
7c673cae 23{
11fdf7f2
TL
24 std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
25 std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e;
26 std::generate(buffer.get(), buffer.get()+size, std::ref(e));
7c673cae
FG
27 return buffer;
28}
29
9f95a23c
TL
30class TempBdev {
31public:
32 TempBdev(uint64_t size)
33 : path{get_temp_bdev(size)}
34 {}
35 ~TempBdev() {
36 rm_temp_bdev(path);
37 }
38 const std::string path;
39private:
40 static string get_temp_bdev(uint64_t size)
41 {
42 static int n = 0;
43 string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid())
44 + "." + stringify(++n);
45 int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
46 ceph_assert(fd >= 0);
47 int r = ::ftruncate(fd, size);
48 ceph_assert(r >= 0);
49 ::close(fd);
50 return fn;
51 }
52 static void rm_temp_bdev(string f)
53 {
54 ::unlink(f.c_str());
55 }
56};
7c673cae 57
f6b5b4d7
TL
58class ConfSaver {
59 std::stack<std::pair<std::string, std::string>> saved_settings;
60 ConfigProxy& conf;
61public:
62 ConfSaver(ConfigProxy& conf) : conf(conf) {
63 conf._clear_safe_to_start_threads();
64 };
65 ~ConfSaver() {
66 conf._clear_safe_to_start_threads();
67 while(saved_settings.size() > 0) {
68 auto& e = saved_settings.top();
69 conf.set_val_or_die(e.first, e.second);
70 saved_settings.pop();
71 }
72 conf.set_safe_to_start_threads();
73 conf.apply_changes(nullptr);
74 }
75 void SetVal(const char* key, const char* val) {
76 std::string skey(key);
77 std::string prev_val;
78 conf.get_val(skey, &prev_val);
79 conf.set_val_or_die(skey, val);
80 saved_settings.emplace(skey, prev_val);
81 }
82 void ApplyChanges() {
83 conf.set_safe_to_start_threads();
84 conf.apply_changes(nullptr);
85 }
86};
87
7c673cae
FG
88TEST(BlueFS, mkfs) {
89 uint64_t size = 1048576 * 128;
9f95a23c 90 TempBdev bdev{size};
7c673cae
FG
91 uuid_d fsid;
92 BlueFS fs(g_ceph_context);
f67539c2 93 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
9f95a23c 94 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae
FG
95}
96
97TEST(BlueFS, mkfs_mount) {
98 uint64_t size = 1048576 * 128;
9f95a23c 99 TempBdev bdev{size};
7c673cae 100 BlueFS fs(g_ceph_context);
f67539c2 101 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 102 uuid_d fsid;
9f95a23c 103 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 104 ASSERT_EQ(0, fs.mount());
9f95a23c 105 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
106 ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
107 ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
108 fs.umount();
9f95a23c
TL
109}
110
7c673cae
FG
111TEST(BlueFS, write_read) {
112 uint64_t size = 1048576 * 128;
9f95a23c 113 TempBdev bdev{size};
7c673cae 114 BlueFS fs(g_ceph_context);
f67539c2 115 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 116 uuid_d fsid;
9f95a23c 117 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 118 ASSERT_EQ(0, fs.mount());
9f95a23c 119 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
120 {
121 BlueFS::FileWriter *h;
122 ASSERT_EQ(0, fs.mkdir("dir"));
123 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
124 h->append("foo", 3);
125 h->append("bar", 3);
126 h->append("baz", 3);
127 fs.fsync(h);
128 fs.close_writer(h);
129 }
130 {
131 BlueFS::FileReader *h;
132 ASSERT_EQ(0, fs.open_for_read("dir", "file", &h));
133 bufferlist bl;
f67539c2 134 ASSERT_EQ(9, fs.read(h, 0, 1024, &bl, NULL));
7c673cae
FG
135 ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9));
136 delete h;
137 }
138 fs.umount();
7c673cae
FG
139}
140
141TEST(BlueFS, small_appends) {
142 uint64_t size = 1048576 * 128;
9f95a23c 143 TempBdev bdev{size};
7c673cae 144 BlueFS fs(g_ceph_context);
f67539c2 145 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 146 uuid_d fsid;
9f95a23c 147 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 148 ASSERT_EQ(0, fs.mount());
9f95a23c 149 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
150 {
151 BlueFS::FileWriter *h;
152 ASSERT_EQ(0, fs.mkdir("dir"));
153 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
154 for (unsigned i = 0; i < 10000; ++i) {
155 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
156 }
157 fs.fsync(h);
158 fs.close_writer(h);
159 }
160 {
161 BlueFS::FileWriter *h;
162 ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false));
163 for (unsigned i = 0; i < 1000; ++i) {
164 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
31f18b77 165 ASSERT_EQ(0, fs.fsync(h));
7c673cae
FG
166 }
167 fs.close_writer(h);
168 }
169 fs.umount();
7c673cae
FG
170}
171
494da23a 172TEST(BlueFS, very_large_write) {
adb31ebb 173 // we'll write a ~5G file, so allocate more than that for the whole fs
cd265ab1 174 uint64_t size = 1048576 * 1024 * 6ull;
9f95a23c 175 TempBdev bdev{size};
494da23a
TL
176 BlueFS fs(g_ceph_context);
177
178 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
179 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
adb31ebb 180 uint64_t total_written = 0;
494da23a 181
f67539c2 182 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
494da23a 183 uuid_d fsid;
9f95a23c 184 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
494da23a 185 ASSERT_EQ(0, fs.mount());
9f95a23c 186 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
494da23a
TL
187 char buf[1048571]; // this is biggish, but intentionally not evenly aligned
188 for (unsigned i = 0; i < sizeof(buf); ++i) {
189 buf[i] = i;
190 }
191 {
192 BlueFS::FileWriter *h;
193 ASSERT_EQ(0, fs.mkdir("dir"));
194 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
195 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
196 h->append(buf, sizeof(buf));
adb31ebb
TL
197 total_written += sizeof(buf);
198 }
199 fs.fsync(h);
200 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
201 h->append(buf, sizeof(buf));
202 total_written += sizeof(buf);
494da23a
TL
203 }
204 fs.fsync(h);
205 fs.close_writer(h);
206 }
207 {
208 BlueFS::FileReader *h;
209 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
210 bufferlist bl;
adb31ebb 211 ASSERT_EQ(h->file->fnode.size, total_written);
494da23a
TL
212 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
213 bl.clear();
f67539c2 214 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
494da23a
TL
215 int r = memcmp(buf, bl.c_str(), sizeof(buf));
216 if (r) {
217 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
218 << std::endl;
219 }
220 ASSERT_EQ(0, r);
221 }
adb31ebb
TL
222 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
223 bl.clear();
f67539c2 224 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
adb31ebb
TL
225 int r = memcmp(buf, bl.c_str(), sizeof(buf));
226 if (r) {
227 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
228 << std::endl;
229 }
230 ASSERT_EQ(0, r);
231 }
232 delete h;
233 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
234 ASSERT_EQ(h->file->fnode.size, total_written);
235 unique_ptr<char> huge_buf(new char[h->file->fnode.size]);
236 auto l = h->file->fnode.size;
f67539c2
TL
237 int64_t r = fs.read(h, 0, l, NULL, huge_buf.get());
238 ASSERT_EQ(r, l);
cd265ab1
TL
239 delete h;
240 }
241 fs.umount();
242
243 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
244}
245
246TEST(BlueFS, very_large_write2) {
247 // we'll write a ~5G file, so allocate more than that for the whole fs
248 uint64_t size_full = 1048576 * 1024 * 6ull;
249 uint64_t size = 1048576 * 1024 * 5ull;
250 TempBdev bdev{ size_full };
251 BlueFS fs(g_ceph_context);
252
253 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
254 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
255 uint64_t total_written = 0;
256
257 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
cd265ab1
TL
258 uuid_d fsid;
259 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
260 ASSERT_EQ(0, fs.mount());
261 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
262
263 char fill_arr[1 << 20]; // 1M
264 for (size_t i = 0; i < sizeof(fill_arr); ++i) {
265 fill_arr[i] = (char)i;
266 }
267 std::unique_ptr<char[]> buf;
268 buf.reset(new char[size]);
269 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
270 memcpy(buf.get() + i, fill_arr, sizeof(fill_arr));
271 }
272 {
273 BlueFS::FileWriter* h;
274 ASSERT_EQ(0, fs.mkdir("dir"));
275 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
276 fs.append_try_flush(h, buf.get(), size);
277 total_written = size;
278 fs.fsync(h);
279 fs.close_writer(h);
280 }
281 memset(buf.get(), 0, size);
282 {
283 BlueFS::FileReader* h;
284 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
285 ASSERT_EQ(h->file->fnode.size, total_written);
286 auto l = h->file->fnode.size;
f67539c2
TL
287 int64_t r = fs.read(h, 0, l, NULL, buf.get());
288 ASSERT_EQ(r, l);
cd265ab1
TL
289 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
290 ceph_assert(memcmp(buf.get() + i, fill_arr, sizeof(fill_arr)) == 0);
291 }
494da23a
TL
292 delete h;
293 }
294 fs.umount();
295
296 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
494da23a
TL
297}
298
7c673cae
FG
299#define ALLOC_SIZE 4096
300
301void write_data(BlueFS &fs, uint64_t rationed_bytes)
302{
7c673cae
FG
303 int j=0, r=0;
304 uint64_t written_bytes = 0;
305 rationed_bytes -= ALLOC_SIZE;
306 stringstream ss;
307 string dir = "dir.";
308 ss << std::this_thread::get_id();
309 dir.append(ss.str());
310 dir.append(".");
311 dir.append(to_string(j));
312 ASSERT_EQ(0, fs.mkdir(dir));
313 while (1) {
314 string file = "file.";
315 file.append(to_string(j));
11fdf7f2 316 BlueFS::FileWriter *h;
7c673cae 317 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
318 ASSERT_NE(nullptr, h);
319 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 320 bufferlist bl;
11fdf7f2
TL
321 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
322 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
323 bl.push_back(bp);
324 h->append(bl.c_str(), bl.length());
325 r = fs.fsync(h);
326 if (r < 0) {
7c673cae
FG
327 break;
328 }
11fdf7f2 329 written_bytes += g_conf()->bluefs_alloc_size;
7c673cae 330 j++;
11fdf7f2 331 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
332 break;
333 }
334 }
335}
336
337void create_single_file(BlueFS &fs)
338{
339 BlueFS::FileWriter *h;
340 stringstream ss;
341 string dir = "dir.test";
342 ASSERT_EQ(0, fs.mkdir(dir));
343 string file = "testfile";
344 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
345 bufferlist bl;
11fdf7f2
TL
346 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
347 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
348 bl.push_back(bp);
349 h->append(bl.c_str(), bl.length());
350 fs.fsync(h);
351 fs.close_writer(h);
352}
353
354void write_single_file(BlueFS &fs, uint64_t rationed_bytes)
355{
7c673cae 356 stringstream ss;
11fdf7f2
TL
357 const string dir = "dir.test";
358 const string file = "testfile";
7c673cae
FG
359 uint64_t written_bytes = 0;
360 rationed_bytes -= ALLOC_SIZE;
361 while (1) {
11fdf7f2 362 BlueFS::FileWriter *h;
7c673cae 363 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
364 ASSERT_NE(nullptr, h);
365 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 366 bufferlist bl;
11fdf7f2
TL
367 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
368 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
369 bl.push_back(bp);
370 h->append(bl.c_str(), bl.length());
11fdf7f2 371 int r = fs.fsync(h);
7c673cae 372 if (r < 0) {
7c673cae
FG
373 break;
374 }
11fdf7f2
TL
375 written_bytes += g_conf()->bluefs_alloc_size;
376 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
377 break;
378 }
379 }
380}
381
382bool writes_done = false;
383
384void sync_fs(BlueFS &fs)
385{
386 while (1) {
387 if (writes_done == true)
388 break;
1911f103 389 fs.sync_metadata(false);
7c673cae
FG
390 sleep(1);
391 }
392}
393
394
395void do_join(std::thread& t)
396{
397 t.join();
398}
399
400void join_all(std::vector<std::thread>& v)
401{
402 std::for_each(v.begin(),v.end(),do_join);
403}
404
405#define NUM_WRITERS 3
406#define NUM_SYNC_THREADS 1
407
408#define NUM_SINGLE_FILE_WRITERS 1
409#define NUM_MULTIPLE_FILE_WRITERS 2
410
411TEST(BlueFS, test_flush_1) {
412 uint64_t size = 1048576 * 128;
9f95a23c 413 TempBdev bdev{size};
11fdf7f2 414 g_ceph_context->_conf.set_val(
7c673cae
FG
415 "bluefs_alloc_size",
416 "65536");
11fdf7f2 417 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
418
419 BlueFS fs(g_ceph_context);
f67539c2 420 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 421 uuid_d fsid;
9f95a23c 422 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 423 ASSERT_EQ(0, fs.mount());
9f95a23c 424 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
425 {
426 std::vector<std::thread> write_thread_multiple;
427 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
428 uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS));
429 for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) {
430 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
431 }
432
433 create_single_file(fs);
434 std::vector<std::thread> write_thread_single;
435 for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) {
436 write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes));
437 }
438
439 join_all(write_thread_single);
440 join_all(write_thread_multiple);
441 }
442 fs.umount();
7c673cae
FG
443}
444
445TEST(BlueFS, test_flush_2) {
446 uint64_t size = 1048576 * 256;
9f95a23c 447 TempBdev bdev{size};
11fdf7f2 448 g_ceph_context->_conf.set_val(
7c673cae
FG
449 "bluefs_alloc_size",
450 "65536");
11fdf7f2 451 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
452
453 BlueFS fs(g_ceph_context);
f67539c2 454 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 455 uuid_d fsid;
9f95a23c 456 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 457 ASSERT_EQ(0, fs.mount());
9f95a23c 458 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
459 {
460 uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction
461 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
462 std::vector<std::thread> write_thread_multiple;
463 for (int i=0; i<NUM_WRITERS; i++) {
464 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
465 }
466
467 join_all(write_thread_multiple);
468 }
469 fs.umount();
7c673cae
FG
470}
471
472TEST(BlueFS, test_flush_3) {
473 uint64_t size = 1048576 * 256;
9f95a23c 474 TempBdev bdev{size};
11fdf7f2 475 g_ceph_context->_conf.set_val(
7c673cae
FG
476 "bluefs_alloc_size",
477 "65536");
11fdf7f2 478 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
479
480 BlueFS fs(g_ceph_context);
f67539c2 481 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 482 uuid_d fsid;
9f95a23c 483 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 484 ASSERT_EQ(0, fs.mount());
9f95a23c 485 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
486 {
487 std::vector<std::thread> write_threads;
488 uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction
489 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
490 for (int i=0; i<NUM_WRITERS; i++) {
491 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
492 }
493
494 std::vector<std::thread> sync_threads;
495 for (int i=0; i<NUM_SYNC_THREADS; i++) {
496 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
497 }
498
499 join_all(write_threads);
500 writes_done = true;
501 join_all(sync_threads);
502 }
503 fs.umount();
7c673cae
FG
504}
505
506TEST(BlueFS, test_simple_compaction_sync) {
11fdf7f2 507 g_ceph_context->_conf.set_val(
7c673cae
FG
508 "bluefs_compact_log_sync",
509 "true");
510 uint64_t size = 1048576 * 128;
9f95a23c 511 TempBdev bdev{size};
7c673cae
FG
512
513 BlueFS fs(g_ceph_context);
f67539c2 514 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 515 uuid_d fsid;
9f95a23c 516 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 517 ASSERT_EQ(0, fs.mount());
9f95a23c 518 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 519 {
7c673cae
FG
520 for (int i=0; i<10; i++) {
521 string dir = "dir.";
522 dir.append(to_string(i));
523 ASSERT_EQ(0, fs.mkdir(dir));
524 for (int j=0; j<10; j++) {
525 string file = "file.";
526 file.append(to_string(j));
11fdf7f2 527 BlueFS::FileWriter *h;
7c673cae 528 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
529 ASSERT_NE(nullptr, h);
530 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 531 bufferlist bl;
11fdf7f2
TL
532 std::unique_ptr<char[]> buf = gen_buffer(4096);
533 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
534 bl.push_back(bp);
535 h->append(bl.c_str(), bl.length());
536 fs.fsync(h);
7c673cae
FG
537 }
538 }
539 }
7c673cae
FG
540 {
541 for (int i=0; i<10; i+=2) {
542 string dir = "dir.";
543 dir.append(to_string(i));
11fdf7f2 544 for (int j=0; j<10; j++) {
7c673cae
FG
545 string file = "file.";
546 file.append(to_string(j));
547 fs.unlink(dir, file);
1911f103 548 fs.sync_metadata(false);
7c673cae 549 }
11fdf7f2 550 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 551 fs.sync_metadata(false);
7c673cae
FG
552 }
553 }
554 fs.compact_log();
555 fs.umount();
7c673cae
FG
556}
557
558TEST(BlueFS, test_simple_compaction_async) {
11fdf7f2 559 g_ceph_context->_conf.set_val(
7c673cae
FG
560 "bluefs_compact_log_sync",
561 "false");
562 uint64_t size = 1048576 * 128;
9f95a23c 563 TempBdev bdev{size};
7c673cae
FG
564
565 BlueFS fs(g_ceph_context);
f67539c2 566 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 567 uuid_d fsid;
9f95a23c 568 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 569 ASSERT_EQ(0, fs.mount());
9f95a23c 570 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 571 {
7c673cae
FG
572 for (int i=0; i<10; i++) {
573 string dir = "dir.";
574 dir.append(to_string(i));
575 ASSERT_EQ(0, fs.mkdir(dir));
576 for (int j=0; j<10; j++) {
577 string file = "file.";
578 file.append(to_string(j));
11fdf7f2 579 BlueFS::FileWriter *h;
7c673cae 580 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
581 ASSERT_NE(nullptr, h);
582 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 583 bufferlist bl;
11fdf7f2
TL
584 std::unique_ptr<char[]> buf = gen_buffer(4096);
585 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
586 bl.push_back(bp);
587 h->append(bl.c_str(), bl.length());
588 fs.fsync(h);
7c673cae
FG
589 }
590 }
591 }
7c673cae
FG
592 {
593 for (int i=0; i<10; i+=2) {
594 string dir = "dir.";
595 dir.append(to_string(i));
11fdf7f2 596 for (int j=0; j<10; j++) {
7c673cae
FG
597 string file = "file.";
598 file.append(to_string(j));
599 fs.unlink(dir, file);
1911f103 600 fs.sync_metadata(false);
7c673cae 601 }
11fdf7f2 602 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 603 fs.sync_metadata(false);
7c673cae
FG
604 }
605 }
606 fs.compact_log();
607 fs.umount();
7c673cae
FG
608}
609
610TEST(BlueFS, test_compaction_sync) {
611 uint64_t size = 1048576 * 128;
9f95a23c 612 TempBdev bdev{size};
11fdf7f2 613 g_ceph_context->_conf.set_val(
7c673cae
FG
614 "bluefs_alloc_size",
615 "65536");
11fdf7f2 616 g_ceph_context->_conf.set_val(
7c673cae
FG
617 "bluefs_compact_log_sync",
618 "true");
619
620 BlueFS fs(g_ceph_context);
f67539c2 621 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 622 uuid_d fsid;
9f95a23c 623 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 624 ASSERT_EQ(0, fs.mount());
9f95a23c 625 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
626 {
627 std::vector<std::thread> write_threads;
628 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
629 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
630 for (int i=0; i<NUM_WRITERS; i++) {
631 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
632 }
633
634 std::vector<std::thread> sync_threads;
635 for (int i=0; i<NUM_SYNC_THREADS; i++) {
636 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
637 }
638
639 join_all(write_threads);
640 writes_done = true;
641 join_all(sync_threads);
642 fs.compact_log();
643 }
644 fs.umount();
7c673cae
FG
645}
646
647TEST(BlueFS, test_compaction_async) {
648 uint64_t size = 1048576 * 128;
9f95a23c 649 TempBdev bdev{size};
11fdf7f2 650 g_ceph_context->_conf.set_val(
7c673cae
FG
651 "bluefs_alloc_size",
652 "65536");
11fdf7f2 653 g_ceph_context->_conf.set_val(
7c673cae
FG
654 "bluefs_compact_log_sync",
655 "false");
656
657 BlueFS fs(g_ceph_context);
f67539c2 658 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 659 uuid_d fsid;
9f95a23c 660 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 661 ASSERT_EQ(0, fs.mount());
9f95a23c 662 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
663 {
664 std::vector<std::thread> write_threads;
665 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
666 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
667 for (int i=0; i<NUM_WRITERS; i++) {
668 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
669 }
670
671 std::vector<std::thread> sync_threads;
672 for (int i=0; i<NUM_SYNC_THREADS; i++) {
673 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
674 }
675
676 join_all(write_threads);
677 writes_done = true;
678 join_all(sync_threads);
679 fs.compact_log();
680 }
681 fs.umount();
7c673cae
FG
682}
683
684TEST(BlueFS, test_replay) {
685 uint64_t size = 1048576 * 128;
9f95a23c 686 TempBdev bdev{size};
11fdf7f2 687 g_ceph_context->_conf.set_val(
7c673cae
FG
688 "bluefs_alloc_size",
689 "65536");
11fdf7f2 690 g_ceph_context->_conf.set_val(
7c673cae
FG
691 "bluefs_compact_log_sync",
692 "false");
693
694 BlueFS fs(g_ceph_context);
f67539c2 695 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 696 uuid_d fsid;
9f95a23c 697 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 698 ASSERT_EQ(0, fs.mount());
9f95a23c 699 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
700 {
701 std::vector<std::thread> write_threads;
702 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
703 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
704 for (int i=0; i<NUM_WRITERS; i++) {
705 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
706 }
707
708 std::vector<std::thread> sync_threads;
709 for (int i=0; i<NUM_SYNC_THREADS; i++) {
710 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
711 }
712
713 join_all(write_threads);
714 writes_done = true;
715 join_all(sync_threads);
716 fs.compact_log();
717 }
718 fs.umount();
719 // remount and check log can replay safe?
31f18b77 720 ASSERT_EQ(0, fs.mount());
9f95a23c 721 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 722 fs.umount();
7c673cae
FG
723}
724
f6b5b4d7
TL
725TEST(BlueFS, test_replay_growth) {
726 uint64_t size = 1048576LL * (2 * 1024 + 128);
727 TempBdev bdev{size};
728
729 ConfSaver conf(g_ceph_context->_conf);
730 conf.SetVal("bluefs_alloc_size", "4096");
731 conf.SetVal("bluefs_shared_alloc_size", "4096");
732 conf.SetVal("bluefs_compact_log_sync", "false");
733 conf.SetVal("bluefs_min_log_runway", "32768");
734 conf.SetVal("bluefs_max_log_runway", "65536");
735 conf.SetVal("bluefs_allocator", "stupid");
736 conf.SetVal("bluefs_sync_write", "true");
737 conf.ApplyChanges();
738
739 BlueFS fs(g_ceph_context);
f67539c2 740 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
f6b5b4d7
TL
741 uuid_d fsid;
742 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
743 ASSERT_EQ(0, fs.mount());
744 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
745 ASSERT_EQ(0, fs.mkdir("dir"));
746
747 char data[2000];
748 BlueFS::FileWriter *h;
749 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
750 for (size_t i = 0; i < 10000; i++) {
751 h->append(data, 2000);
752 fs.fsync(h);
753 }
754 fs.close_writer(h);
755 fs.umount(true); //do not compact on exit!
756
757 // remount and check log can replay safe?
758 ASSERT_EQ(0, fs.mount());
759 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
760 fs.umount();
761}
762
7c673cae
FG
763int main(int argc, char **argv) {
764 vector<const char*> args;
765 argv_to_vec(argc, (const char **)argv, args);
7c673cae 766
11fdf7f2
TL
767 map<string,string> defaults = {
768 { "debug_bluefs", "1/20" },
769 { "debug_bdev", "1/20" }
770 };
7c673cae 771
11fdf7f2 772 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
7c673cae 773 CODE_ENVIRONMENT_UTILITY,
11fdf7f2 774 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
7c673cae 775 common_init_finish(g_ceph_context);
11fdf7f2 776 g_ceph_context->_conf.set_val(
7c673cae
FG
777 "enable_experimental_unrecoverable_data_corrupting_features",
778 "*");
11fdf7f2 779 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
780
781 ::testing::InitGoogleTest(&argc, argv);
782 return RUN_ALL_TESTS();
783}