]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/objectstore/test_bluefs.cc
update patches for quincy beta
[ceph.git] / ceph / src / test / objectstore / test_bluefs.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <stdio.h>
5#include <string.h>
6#include <iostream>
7#include <time.h>
8#include <fcntl.h>
9#include <unistd.h>
91327a77 10#include <random>
7c673cae 11#include <thread>
f6b5b4d7 12#include <stack>
7c673cae
FG
13#include "global/global_init.h"
14#include "common/ceph_argparse.h"
15#include "include/stringify.h"
11fdf7f2 16#include "include/scope_guard.h"
7c673cae
FG
17#include "common/errno.h"
18#include <gtest/gtest.h>
19
20#include "os/bluestore/BlueFS.h"
21
20effc67
TL
22using namespace std;
23
11fdf7f2 24std::unique_ptr<char[]> gen_buffer(uint64_t size)
7c673cae 25{
11fdf7f2
TL
26 std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
27 std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e;
28 std::generate(buffer.get(), buffer.get()+size, std::ref(e));
7c673cae
FG
29 return buffer;
30}
31
9f95a23c
TL
32class TempBdev {
33public:
34 TempBdev(uint64_t size)
35 : path{get_temp_bdev(size)}
36 {}
37 ~TempBdev() {
38 rm_temp_bdev(path);
39 }
40 const std::string path;
41private:
42 static string get_temp_bdev(uint64_t size)
43 {
44 static int n = 0;
45 string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid())
46 + "." + stringify(++n);
47 int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
48 ceph_assert(fd >= 0);
49 int r = ::ftruncate(fd, size);
50 ceph_assert(r >= 0);
51 ::close(fd);
52 return fn;
53 }
54 static void rm_temp_bdev(string f)
55 {
56 ::unlink(f.c_str());
57 }
58};
7c673cae 59
f6b5b4d7
TL
60class ConfSaver {
61 std::stack<std::pair<std::string, std::string>> saved_settings;
62 ConfigProxy& conf;
63public:
64 ConfSaver(ConfigProxy& conf) : conf(conf) {
65 conf._clear_safe_to_start_threads();
66 };
67 ~ConfSaver() {
68 conf._clear_safe_to_start_threads();
69 while(saved_settings.size() > 0) {
70 auto& e = saved_settings.top();
71 conf.set_val_or_die(e.first, e.second);
72 saved_settings.pop();
73 }
74 conf.set_safe_to_start_threads();
75 conf.apply_changes(nullptr);
76 }
77 void SetVal(const char* key, const char* val) {
78 std::string skey(key);
79 std::string prev_val;
80 conf.get_val(skey, &prev_val);
81 conf.set_val_or_die(skey, val);
82 saved_settings.emplace(skey, prev_val);
83 }
84 void ApplyChanges() {
85 conf.set_safe_to_start_threads();
86 conf.apply_changes(nullptr);
87 }
88};
89
7c673cae
FG
90TEST(BlueFS, mkfs) {
91 uint64_t size = 1048576 * 128;
9f95a23c 92 TempBdev bdev{size};
7c673cae
FG
93 uuid_d fsid;
94 BlueFS fs(g_ceph_context);
f67539c2 95 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
9f95a23c 96 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae
FG
97}
98
99TEST(BlueFS, mkfs_mount) {
100 uint64_t size = 1048576 * 128;
9f95a23c 101 TempBdev bdev{size};
7c673cae 102 BlueFS fs(g_ceph_context);
f67539c2 103 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 104 uuid_d fsid;
9f95a23c 105 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 106 ASSERT_EQ(0, fs.mount());
9f95a23c 107 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
108 ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
109 ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
110 fs.umount();
9f95a23c
TL
111}
112
7c673cae
FG
113TEST(BlueFS, write_read) {
114 uint64_t size = 1048576 * 128;
9f95a23c 115 TempBdev bdev{size};
7c673cae 116 BlueFS fs(g_ceph_context);
f67539c2 117 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 118 uuid_d fsid;
9f95a23c 119 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 120 ASSERT_EQ(0, fs.mount());
9f95a23c 121 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
122 {
123 BlueFS::FileWriter *h;
124 ASSERT_EQ(0, fs.mkdir("dir"));
125 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
126 h->append("foo", 3);
127 h->append("bar", 3);
128 h->append("baz", 3);
129 fs.fsync(h);
130 fs.close_writer(h);
131 }
132 {
133 BlueFS::FileReader *h;
134 ASSERT_EQ(0, fs.open_for_read("dir", "file", &h));
135 bufferlist bl;
f67539c2 136 ASSERT_EQ(9, fs.read(h, 0, 1024, &bl, NULL));
7c673cae
FG
137 ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9));
138 delete h;
139 }
140 fs.umount();
7c673cae
FG
141}
142
143TEST(BlueFS, small_appends) {
144 uint64_t size = 1048576 * 128;
9f95a23c 145 TempBdev bdev{size};
7c673cae 146 BlueFS fs(g_ceph_context);
f67539c2 147 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 148 uuid_d fsid;
9f95a23c 149 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 150 ASSERT_EQ(0, fs.mount());
9f95a23c 151 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
152 {
153 BlueFS::FileWriter *h;
154 ASSERT_EQ(0, fs.mkdir("dir"));
155 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
156 for (unsigned i = 0; i < 10000; ++i) {
157 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
158 }
159 fs.fsync(h);
160 fs.close_writer(h);
161 }
162 {
163 BlueFS::FileWriter *h;
164 ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false));
165 for (unsigned i = 0; i < 1000; ++i) {
166 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
31f18b77 167 ASSERT_EQ(0, fs.fsync(h));
7c673cae
FG
168 }
169 fs.close_writer(h);
170 }
171 fs.umount();
7c673cae
FG
172}
173
494da23a 174TEST(BlueFS, very_large_write) {
adb31ebb 175 // we'll write a ~5G file, so allocate more than that for the whole fs
cd265ab1 176 uint64_t size = 1048576 * 1024 * 6ull;
9f95a23c 177 TempBdev bdev{size};
494da23a
TL
178 BlueFS fs(g_ceph_context);
179
180 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
181 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
adb31ebb 182 uint64_t total_written = 0;
494da23a 183
f67539c2 184 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
494da23a 185 uuid_d fsid;
9f95a23c 186 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
494da23a 187 ASSERT_EQ(0, fs.mount());
9f95a23c 188 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
494da23a
TL
189 char buf[1048571]; // this is biggish, but intentionally not evenly aligned
190 for (unsigned i = 0; i < sizeof(buf); ++i) {
191 buf[i] = i;
192 }
193 {
194 BlueFS::FileWriter *h;
195 ASSERT_EQ(0, fs.mkdir("dir"));
196 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
197 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
198 h->append(buf, sizeof(buf));
adb31ebb
TL
199 total_written += sizeof(buf);
200 }
201 fs.fsync(h);
202 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
203 h->append(buf, sizeof(buf));
204 total_written += sizeof(buf);
494da23a
TL
205 }
206 fs.fsync(h);
207 fs.close_writer(h);
208 }
209 {
210 BlueFS::FileReader *h;
211 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
212 bufferlist bl;
adb31ebb 213 ASSERT_EQ(h->file->fnode.size, total_written);
494da23a
TL
214 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
215 bl.clear();
f67539c2 216 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
494da23a
TL
217 int r = memcmp(buf, bl.c_str(), sizeof(buf));
218 if (r) {
219 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
220 << std::endl;
221 }
222 ASSERT_EQ(0, r);
223 }
adb31ebb
TL
224 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
225 bl.clear();
f67539c2 226 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
adb31ebb
TL
227 int r = memcmp(buf, bl.c_str(), sizeof(buf));
228 if (r) {
229 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
230 << std::endl;
231 }
232 ASSERT_EQ(0, r);
233 }
234 delete h;
235 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
236 ASSERT_EQ(h->file->fnode.size, total_written);
237 unique_ptr<char> huge_buf(new char[h->file->fnode.size]);
238 auto l = h->file->fnode.size;
f67539c2
TL
239 int64_t r = fs.read(h, 0, l, NULL, huge_buf.get());
240 ASSERT_EQ(r, l);
cd265ab1
TL
241 delete h;
242 }
243 fs.umount();
244
245 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
246}
247
248TEST(BlueFS, very_large_write2) {
249 // we'll write a ~5G file, so allocate more than that for the whole fs
250 uint64_t size_full = 1048576 * 1024 * 6ull;
251 uint64_t size = 1048576 * 1024 * 5ull;
252 TempBdev bdev{ size_full };
253 BlueFS fs(g_ceph_context);
254
255 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
256 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
257 uint64_t total_written = 0;
258
259 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
cd265ab1
TL
260 uuid_d fsid;
261 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
262 ASSERT_EQ(0, fs.mount());
263 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
264
265 char fill_arr[1 << 20]; // 1M
266 for (size_t i = 0; i < sizeof(fill_arr); ++i) {
267 fill_arr[i] = (char)i;
268 }
269 std::unique_ptr<char[]> buf;
270 buf.reset(new char[size]);
271 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
272 memcpy(buf.get() + i, fill_arr, sizeof(fill_arr));
273 }
274 {
275 BlueFS::FileWriter* h;
276 ASSERT_EQ(0, fs.mkdir("dir"));
277 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
278 fs.append_try_flush(h, buf.get(), size);
279 total_written = size;
280 fs.fsync(h);
281 fs.close_writer(h);
282 }
283 memset(buf.get(), 0, size);
284 {
285 BlueFS::FileReader* h;
286 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
287 ASSERT_EQ(h->file->fnode.size, total_written);
288 auto l = h->file->fnode.size;
f67539c2
TL
289 int64_t r = fs.read(h, 0, l, NULL, buf.get());
290 ASSERT_EQ(r, l);
cd265ab1
TL
291 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
292 ceph_assert(memcmp(buf.get() + i, fill_arr, sizeof(fill_arr)) == 0);
293 }
494da23a
TL
294 delete h;
295 }
296 fs.umount();
297
298 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
494da23a
TL
299}
300
7c673cae
FG
301#define ALLOC_SIZE 4096
302
303void write_data(BlueFS &fs, uint64_t rationed_bytes)
304{
7c673cae
FG
305 int j=0, r=0;
306 uint64_t written_bytes = 0;
307 rationed_bytes -= ALLOC_SIZE;
308 stringstream ss;
309 string dir = "dir.";
310 ss << std::this_thread::get_id();
311 dir.append(ss.str());
312 dir.append(".");
313 dir.append(to_string(j));
314 ASSERT_EQ(0, fs.mkdir(dir));
315 while (1) {
316 string file = "file.";
317 file.append(to_string(j));
11fdf7f2 318 BlueFS::FileWriter *h;
7c673cae 319 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
320 ASSERT_NE(nullptr, h);
321 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 322 bufferlist bl;
11fdf7f2
TL
323 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
324 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
325 bl.push_back(bp);
326 h->append(bl.c_str(), bl.length());
327 r = fs.fsync(h);
328 if (r < 0) {
7c673cae
FG
329 break;
330 }
11fdf7f2 331 written_bytes += g_conf()->bluefs_alloc_size;
7c673cae 332 j++;
11fdf7f2 333 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
334 break;
335 }
336 }
337}
338
339void create_single_file(BlueFS &fs)
340{
341 BlueFS::FileWriter *h;
342 stringstream ss;
343 string dir = "dir.test";
344 ASSERT_EQ(0, fs.mkdir(dir));
345 string file = "testfile";
346 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
347 bufferlist bl;
11fdf7f2
TL
348 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
349 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
350 bl.push_back(bp);
351 h->append(bl.c_str(), bl.length());
352 fs.fsync(h);
353 fs.close_writer(h);
354}
355
356void write_single_file(BlueFS &fs, uint64_t rationed_bytes)
357{
7c673cae 358 stringstream ss;
11fdf7f2
TL
359 const string dir = "dir.test";
360 const string file = "testfile";
7c673cae
FG
361 uint64_t written_bytes = 0;
362 rationed_bytes -= ALLOC_SIZE;
363 while (1) {
11fdf7f2 364 BlueFS::FileWriter *h;
7c673cae 365 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
366 ASSERT_NE(nullptr, h);
367 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 368 bufferlist bl;
11fdf7f2
TL
369 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
370 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
371 bl.push_back(bp);
372 h->append(bl.c_str(), bl.length());
11fdf7f2 373 int r = fs.fsync(h);
7c673cae 374 if (r < 0) {
7c673cae
FG
375 break;
376 }
11fdf7f2
TL
377 written_bytes += g_conf()->bluefs_alloc_size;
378 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
379 break;
380 }
381 }
382}
383
384bool writes_done = false;
385
386void sync_fs(BlueFS &fs)
387{
388 while (1) {
389 if (writes_done == true)
390 break;
1911f103 391 fs.sync_metadata(false);
7c673cae
FG
392 sleep(1);
393 }
394}
395
396
397void do_join(std::thread& t)
398{
399 t.join();
400}
401
402void join_all(std::vector<std::thread>& v)
403{
404 std::for_each(v.begin(),v.end(),do_join);
405}
406
407#define NUM_WRITERS 3
408#define NUM_SYNC_THREADS 1
409
410#define NUM_SINGLE_FILE_WRITERS 1
411#define NUM_MULTIPLE_FILE_WRITERS 2
412
413TEST(BlueFS, test_flush_1) {
414 uint64_t size = 1048576 * 128;
9f95a23c 415 TempBdev bdev{size};
11fdf7f2 416 g_ceph_context->_conf.set_val(
7c673cae
FG
417 "bluefs_alloc_size",
418 "65536");
11fdf7f2 419 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
420
421 BlueFS fs(g_ceph_context);
f67539c2 422 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 423 uuid_d fsid;
9f95a23c 424 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 425 ASSERT_EQ(0, fs.mount());
9f95a23c 426 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
427 {
428 std::vector<std::thread> write_thread_multiple;
429 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
430 uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS));
431 for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) {
432 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
433 }
434
435 create_single_file(fs);
436 std::vector<std::thread> write_thread_single;
437 for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) {
438 write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes));
439 }
440
441 join_all(write_thread_single);
442 join_all(write_thread_multiple);
443 }
444 fs.umount();
7c673cae
FG
445}
446
447TEST(BlueFS, test_flush_2) {
448 uint64_t size = 1048576 * 256;
9f95a23c 449 TempBdev bdev{size};
11fdf7f2 450 g_ceph_context->_conf.set_val(
7c673cae
FG
451 "bluefs_alloc_size",
452 "65536");
11fdf7f2 453 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
454
455 BlueFS fs(g_ceph_context);
f67539c2 456 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 457 uuid_d fsid;
9f95a23c 458 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 459 ASSERT_EQ(0, fs.mount());
9f95a23c 460 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
461 {
462 uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction
463 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
464 std::vector<std::thread> write_thread_multiple;
465 for (int i=0; i<NUM_WRITERS; i++) {
466 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
467 }
468
469 join_all(write_thread_multiple);
470 }
471 fs.umount();
7c673cae
FG
472}
473
474TEST(BlueFS, test_flush_3) {
475 uint64_t size = 1048576 * 256;
9f95a23c 476 TempBdev bdev{size};
11fdf7f2 477 g_ceph_context->_conf.set_val(
7c673cae
FG
478 "bluefs_alloc_size",
479 "65536");
11fdf7f2 480 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
481
482 BlueFS fs(g_ceph_context);
f67539c2 483 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 484 uuid_d fsid;
9f95a23c 485 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 486 ASSERT_EQ(0, fs.mount());
9f95a23c 487 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
488 {
489 std::vector<std::thread> write_threads;
490 uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction
491 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
492 for (int i=0; i<NUM_WRITERS; i++) {
493 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
494 }
495
496 std::vector<std::thread> sync_threads;
497 for (int i=0; i<NUM_SYNC_THREADS; i++) {
498 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
499 }
500
501 join_all(write_threads);
502 writes_done = true;
503 join_all(sync_threads);
504 }
505 fs.umount();
7c673cae
FG
506}
507
508TEST(BlueFS, test_simple_compaction_sync) {
11fdf7f2 509 g_ceph_context->_conf.set_val(
7c673cae
FG
510 "bluefs_compact_log_sync",
511 "true");
512 uint64_t size = 1048576 * 128;
9f95a23c 513 TempBdev bdev{size};
7c673cae
FG
514
515 BlueFS fs(g_ceph_context);
f67539c2 516 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 517 uuid_d fsid;
9f95a23c 518 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 519 ASSERT_EQ(0, fs.mount());
9f95a23c 520 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 521 {
7c673cae
FG
522 for (int i=0; i<10; i++) {
523 string dir = "dir.";
524 dir.append(to_string(i));
525 ASSERT_EQ(0, fs.mkdir(dir));
526 for (int j=0; j<10; j++) {
527 string file = "file.";
528 file.append(to_string(j));
11fdf7f2 529 BlueFS::FileWriter *h;
7c673cae 530 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
531 ASSERT_NE(nullptr, h);
532 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 533 bufferlist bl;
11fdf7f2
TL
534 std::unique_ptr<char[]> buf = gen_buffer(4096);
535 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
536 bl.push_back(bp);
537 h->append(bl.c_str(), bl.length());
538 fs.fsync(h);
7c673cae
FG
539 }
540 }
541 }
7c673cae
FG
542 {
543 for (int i=0; i<10; i+=2) {
544 string dir = "dir.";
545 dir.append(to_string(i));
11fdf7f2 546 for (int j=0; j<10; j++) {
7c673cae
FG
547 string file = "file.";
548 file.append(to_string(j));
549 fs.unlink(dir, file);
1911f103 550 fs.sync_metadata(false);
7c673cae 551 }
11fdf7f2 552 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 553 fs.sync_metadata(false);
7c673cae
FG
554 }
555 }
556 fs.compact_log();
557 fs.umount();
7c673cae
FG
558}
559
560TEST(BlueFS, test_simple_compaction_async) {
11fdf7f2 561 g_ceph_context->_conf.set_val(
7c673cae
FG
562 "bluefs_compact_log_sync",
563 "false");
564 uint64_t size = 1048576 * 128;
9f95a23c 565 TempBdev bdev{size};
7c673cae
FG
566
567 BlueFS fs(g_ceph_context);
f67539c2 568 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 569 uuid_d fsid;
9f95a23c 570 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 571 ASSERT_EQ(0, fs.mount());
9f95a23c 572 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 573 {
7c673cae
FG
574 for (int i=0; i<10; i++) {
575 string dir = "dir.";
576 dir.append(to_string(i));
577 ASSERT_EQ(0, fs.mkdir(dir));
578 for (int j=0; j<10; j++) {
579 string file = "file.";
580 file.append(to_string(j));
11fdf7f2 581 BlueFS::FileWriter *h;
7c673cae 582 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
583 ASSERT_NE(nullptr, h);
584 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 585 bufferlist bl;
11fdf7f2
TL
586 std::unique_ptr<char[]> buf = gen_buffer(4096);
587 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
588 bl.push_back(bp);
589 h->append(bl.c_str(), bl.length());
590 fs.fsync(h);
7c673cae
FG
591 }
592 }
593 }
7c673cae
FG
594 {
595 for (int i=0; i<10; i+=2) {
596 string dir = "dir.";
597 dir.append(to_string(i));
11fdf7f2 598 for (int j=0; j<10; j++) {
7c673cae
FG
599 string file = "file.";
600 file.append(to_string(j));
601 fs.unlink(dir, file);
1911f103 602 fs.sync_metadata(false);
7c673cae 603 }
11fdf7f2 604 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 605 fs.sync_metadata(false);
7c673cae
FG
606 }
607 }
608 fs.compact_log();
609 fs.umount();
7c673cae
FG
610}
611
612TEST(BlueFS, test_compaction_sync) {
613 uint64_t size = 1048576 * 128;
9f95a23c 614 TempBdev bdev{size};
11fdf7f2 615 g_ceph_context->_conf.set_val(
7c673cae
FG
616 "bluefs_alloc_size",
617 "65536");
11fdf7f2 618 g_ceph_context->_conf.set_val(
7c673cae
FG
619 "bluefs_compact_log_sync",
620 "true");
621
622 BlueFS fs(g_ceph_context);
f67539c2 623 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 624 uuid_d fsid;
9f95a23c 625 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 626 ASSERT_EQ(0, fs.mount());
9f95a23c 627 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
628 {
629 std::vector<std::thread> write_threads;
630 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
631 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
632 for (int i=0; i<NUM_WRITERS; i++) {
633 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
634 }
635
636 std::vector<std::thread> sync_threads;
637 for (int i=0; i<NUM_SYNC_THREADS; i++) {
638 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
639 }
640
641 join_all(write_threads);
642 writes_done = true;
643 join_all(sync_threads);
644 fs.compact_log();
645 }
646 fs.umount();
7c673cae
FG
647}
648
649TEST(BlueFS, test_compaction_async) {
650 uint64_t size = 1048576 * 128;
9f95a23c 651 TempBdev bdev{size};
11fdf7f2 652 g_ceph_context->_conf.set_val(
7c673cae
FG
653 "bluefs_alloc_size",
654 "65536");
11fdf7f2 655 g_ceph_context->_conf.set_val(
7c673cae
FG
656 "bluefs_compact_log_sync",
657 "false");
658
659 BlueFS fs(g_ceph_context);
f67539c2 660 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 661 uuid_d fsid;
9f95a23c 662 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 663 ASSERT_EQ(0, fs.mount());
9f95a23c 664 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
665 {
666 std::vector<std::thread> write_threads;
667 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
668 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
669 for (int i=0; i<NUM_WRITERS; i++) {
670 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
671 }
672
673 std::vector<std::thread> sync_threads;
674 for (int i=0; i<NUM_SYNC_THREADS; i++) {
675 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
676 }
677
678 join_all(write_threads);
679 writes_done = true;
680 join_all(sync_threads);
681 fs.compact_log();
682 }
683 fs.umount();
7c673cae
FG
684}
685
686TEST(BlueFS, test_replay) {
687 uint64_t size = 1048576 * 128;
9f95a23c 688 TempBdev bdev{size};
11fdf7f2 689 g_ceph_context->_conf.set_val(
7c673cae
FG
690 "bluefs_alloc_size",
691 "65536");
11fdf7f2 692 g_ceph_context->_conf.set_val(
7c673cae
FG
693 "bluefs_compact_log_sync",
694 "false");
695
696 BlueFS fs(g_ceph_context);
f67539c2 697 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
7c673cae 698 uuid_d fsid;
9f95a23c 699 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 700 ASSERT_EQ(0, fs.mount());
9f95a23c 701 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
702 {
703 std::vector<std::thread> write_threads;
704 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
705 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
706 for (int i=0; i<NUM_WRITERS; i++) {
707 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
708 }
709
710 std::vector<std::thread> sync_threads;
711 for (int i=0; i<NUM_SYNC_THREADS; i++) {
712 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
713 }
714
715 join_all(write_threads);
716 writes_done = true;
717 join_all(sync_threads);
718 fs.compact_log();
719 }
720 fs.umount();
721 // remount and check log can replay safe?
31f18b77 722 ASSERT_EQ(0, fs.mount());
9f95a23c 723 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 724 fs.umount();
7c673cae
FG
725}
726
f6b5b4d7
TL
727TEST(BlueFS, test_replay_growth) {
728 uint64_t size = 1048576LL * (2 * 1024 + 128);
729 TempBdev bdev{size};
730
731 ConfSaver conf(g_ceph_context->_conf);
732 conf.SetVal("bluefs_alloc_size", "4096");
733 conf.SetVal("bluefs_shared_alloc_size", "4096");
734 conf.SetVal("bluefs_compact_log_sync", "false");
735 conf.SetVal("bluefs_min_log_runway", "32768");
736 conf.SetVal("bluefs_max_log_runway", "65536");
737 conf.SetVal("bluefs_allocator", "stupid");
738 conf.SetVal("bluefs_sync_write", "true");
739 conf.ApplyChanges();
740
741 BlueFS fs(g_ceph_context);
f67539c2 742 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
f6b5b4d7
TL
743 uuid_d fsid;
744 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
745 ASSERT_EQ(0, fs.mount());
746 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
747 ASSERT_EQ(0, fs.mkdir("dir"));
748
749 char data[2000];
750 BlueFS::FileWriter *h;
751 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
752 for (size_t i = 0; i < 10000; i++) {
753 h->append(data, 2000);
754 fs.fsync(h);
755 }
756 fs.close_writer(h);
757 fs.umount(true); //do not compact on exit!
758
759 // remount and check log can replay safe?
760 ASSERT_EQ(0, fs.mount());
761 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
762 fs.umount();
763}
764
522d829b
TL
765TEST(BlueFS, test_tracker_50965) {
766 uint64_t size_wal = 1048576 * 64;
767 TempBdev bdev_wal{size_wal};
768 uint64_t size_db = 1048576 * 128;
769 TempBdev bdev_db{size_db};
770 uint64_t size_slow = 1048576 * 256;
771 TempBdev bdev_slow{size_slow};
772
773 ConfSaver conf(g_ceph_context->_conf);
774 conf.SetVal("bluefs_min_flush_size", "65536");
775 conf.ApplyChanges();
776
777 BlueFS fs(g_ceph_context);
778 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
779 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
780 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
781 uuid_d fsid;
782 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
783 ASSERT_EQ(0, fs.mount());
784 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
785
786 string dir_slow = "dir.slow";
787 ASSERT_EQ(0, fs.mkdir(dir_slow));
788 string dir_db = "dir_db";
789 ASSERT_EQ(0, fs.mkdir(dir_db));
790
791 string file_slow = "file";
792 BlueFS::FileWriter *h_slow;
793 ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
794 ASSERT_NE(nullptr, h_slow);
795
796 string file_db = "file";
797 BlueFS::FileWriter *h_db;
798 ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
799 ASSERT_NE(nullptr, h_db);
800
801 bufferlist bl1;
802 std::unique_ptr<char[]> buf1 = gen_buffer(70000);
803 bufferptr bp1 = buffer::claim_char(70000, buf1.get());
804 bl1.push_back(bp1);
805 h_slow->append(bl1.c_str(), bl1.length());
806 fs.flush(h_slow);
807
808 uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow);
809
810 bufferlist bl2;
811 std::unique_ptr<char[]> buf2 = gen_buffer(1000);
812 bufferptr bp2 = buffer::claim_char(1000, buf2.get());
813 bl2.push_back(bp2);
814 h_db->append(bl2.c_str(), bl2.length());
815 fs.fsync(h_db);
816
817 uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow);
818 bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
819
820 //problem if allocations are stable in log but slow device is not flushed yet
821 ASSERT_FALSE(h_slow_dirty_seq_1 != 0 &&
822 h_slow_dirty_seq_2 == 0 &&
823 h_slow_dev_dirty == true);
824
825 fs.close_writer(h_slow);
826 fs.close_writer(h_db);
827
828 fs.umount();
829}
830
20effc67
TL
831TEST(BlueFS, test_truncate_stable_53129) {
832
833 ConfSaver conf(g_ceph_context->_conf);
834 conf.SetVal("bluefs_min_flush_size", "65536");
835 conf.ApplyChanges();
836
837 uint64_t size_wal = 1048576 * 64;
838 TempBdev bdev_wal{size_wal};
839 uint64_t size_db = 1048576 * 128;
840 TempBdev bdev_db{size_db};
841 uint64_t size_slow = 1048576 * 256;
842 TempBdev bdev_slow{size_slow};
843
844 BlueFS fs(g_ceph_context);
845 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
846 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
847 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
848 uuid_d fsid;
849 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
850 ASSERT_EQ(0, fs.mount());
851 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
852
853 string dir_slow = "dir.slow";
854 ASSERT_EQ(0, fs.mkdir(dir_slow));
855 string dir_db = "dir_db";
856 ASSERT_EQ(0, fs.mkdir(dir_db));
857
858 string file_slow = "file";
859 BlueFS::FileWriter *h_slow;
860 ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
861 ASSERT_NE(nullptr, h_slow);
862
863 string file_db = "file";
864 BlueFS::FileWriter *h_db;
865 ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
866 ASSERT_NE(nullptr, h_db);
867
868 bufferlist bl1;
869 std::unique_ptr<char[]> buf1 = gen_buffer(70000);
870 bufferptr bp1 = buffer::claim_char(70000, buf1.get());
871 bl1.push_back(bp1);
872 // add 70000 bytes
873 h_slow->append(bl1.c_str(), bl1.length());
874 fs.flush(h_slow);
875 // and truncate to 60000 bytes
876 fs.truncate(h_slow, 60000);
877
878 // write something to file on DB device
879 bufferlist bl2;
880 std::unique_ptr<char[]> buf2 = gen_buffer(1000);
881 bufferptr bp2 = buffer::claim_char(1000, buf2.get());
882 bl2.push_back(bp2);
883 h_db->append(bl2.c_str(), bl2.length());
884 // and force bluefs log to flush
885 fs.fsync(h_db);
886
887 // This is the actual test point.
888 // We completed truncate, and we expect
889 // - size to be 60000
890 // - data to be stable on slow device
891 // OR
892 // - size = 0 or file does not exist
893 // - dev_dirty is irrelevant
894 bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
895 // Imagine power goes down here.
896
897 fs.close_writer(h_slow);
898 fs.close_writer(h_db);
899
900 fs.umount();
901
902 ASSERT_EQ(0, fs.mount());
903 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
904
905 uint64_t size;
906 utime_t mtime;
907 ASSERT_EQ(0, fs.stat("dir.slow", "file", &size, &mtime));
908 // check file size 60000
909 ASSERT_EQ(size, 60000);
910 // check that dev_dirty was false (data stable on media)
911 ASSERT_EQ(h_slow_dev_dirty, false);
912
913 fs.umount();
914}
915
916TEST(BlueFS, test_update_ino1_delta_after_replay) {
917 uint64_t size = 1048576LL * (2 * 1024 + 128);
918 TempBdev bdev{size};
919
920 ConfSaver conf(g_ceph_context->_conf);
921 conf.SetVal("bluefs_alloc_size", "4096");
922 conf.SetVal("bluefs_shared_alloc_size", "4096");
923 conf.SetVal("bluefs_compact_log_sync", "false");
924 conf.SetVal("bluefs_min_log_runway", "32768");
925 conf.SetVal("bluefs_max_log_runway", "65536");
926 conf.SetVal("bluefs_allocator", "stupid");
927 conf.ApplyChanges();
928
929 BlueFS fs(g_ceph_context);
930 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
931 uuid_d fsid;
932 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
933 ASSERT_EQ(0, fs.mount());
934 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
935 ASSERT_EQ(0, fs.mkdir("dir"));
936
937 char data[2000];
938 BlueFS::FileWriter *h;
939 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
940 for (size_t i = 0; i < 100; i++) {
941 h->append(data, 2000);
942 fs.fsync(h);
943 }
944 fs.close_writer(h);
945 fs.umount(true); //do not compact on exit!
7c673cae 946
20effc67
TL
947 ASSERT_EQ(0, fs.mount());
948 ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false));
949 for (size_t i = 0; i < 100; i++) {
950 h->append(data, 2000);
951 fs.fsync(h);
952 }
953 fs.close_writer(h);
954 fs.umount();
955
956 // remount and check log can replay safe?
957 ASSERT_EQ(0, fs.mount());
958 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
959 fs.umount();
960}
961
962int main(int argc, char **argv) {
963 auto args = argv_to_vec(argc, argv);
11fdf7f2
TL
964 map<string,string> defaults = {
965 { "debug_bluefs", "1/20" },
966 { "debug_bdev", "1/20" }
967 };
7c673cae 968
11fdf7f2 969 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
7c673cae 970 CODE_ENVIRONMENT_UTILITY,
11fdf7f2 971 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
7c673cae 972 common_init_finish(g_ceph_context);
11fdf7f2 973 g_ceph_context->_conf.set_val(
7c673cae
FG
974 "enable_experimental_unrecoverable_data_corrupting_features",
975 "*");
11fdf7f2 976 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
977
978 ::testing::InitGoogleTest(&argc, argv);
979 return RUN_ALL_TESTS();
980}