]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/objectstore/test_bluefs.cc
2a84b27c9992e22c59e1e6a182e0099860f33b2c
[ceph.git] / ceph / src / test / objectstore / test_bluefs.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <stdio.h>
5 #include <string.h>
6 #include <iostream>
7 #include <time.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10 #include <random>
11 #include <thread>
12 #include <stack>
13 #include "global/global_init.h"
14 #include "common/ceph_argparse.h"
15 #include "include/stringify.h"
16 #include "include/scope_guard.h"
17 #include "common/errno.h"
18 #include <gtest/gtest.h>
19
20 #include "os/bluestore/BlueFS.h"
21
22 using namespace std;
23
24 std::unique_ptr<char[]> gen_buffer(uint64_t size)
25 {
26 std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
27 std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e;
28 std::generate(buffer.get(), buffer.get()+size, std::ref(e));
29 return buffer;
30 }
31
32 class TempBdev {
33 public:
34 TempBdev(uint64_t size)
35 : path{get_temp_bdev(size)}
36 {}
37 ~TempBdev() {
38 rm_temp_bdev(path);
39 }
40 const std::string path;
41 private:
42 static string get_temp_bdev(uint64_t size)
43 {
44 static int n = 0;
45 string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid())
46 + "." + stringify(++n);
47 int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
48 ceph_assert(fd >= 0);
49 int r = ::ftruncate(fd, size);
50 ceph_assert(r >= 0);
51 ::close(fd);
52 return fn;
53 }
54 static void rm_temp_bdev(string f)
55 {
56 ::unlink(f.c_str());
57 }
58 };
59
60 class ConfSaver {
61 std::stack<std::pair<std::string, std::string>> saved_settings;
62 ConfigProxy& conf;
63 public:
64 ConfSaver(ConfigProxy& conf) : conf(conf) {
65 conf._clear_safe_to_start_threads();
66 };
67 ~ConfSaver() {
68 conf._clear_safe_to_start_threads();
69 while(saved_settings.size() > 0) {
70 auto& e = saved_settings.top();
71 conf.set_val_or_die(e.first, e.second);
72 saved_settings.pop();
73 }
74 conf.set_safe_to_start_threads();
75 conf.apply_changes(nullptr);
76 }
77 void SetVal(const char* key, const char* val) {
78 std::string skey(key);
79 std::string prev_val;
80 conf.get_val(skey, &prev_val);
81 conf.set_val_or_die(skey, val);
82 saved_settings.emplace(skey, prev_val);
83 }
84 void ApplyChanges() {
85 conf.set_safe_to_start_threads();
86 conf.apply_changes(nullptr);
87 }
88 };
89
90 TEST(BlueFS, mkfs) {
91 uint64_t size = 1048576 * 128;
92 TempBdev bdev{size};
93 uuid_d fsid;
94 BlueFS fs(g_ceph_context);
95 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
96 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
97 }
98
99 TEST(BlueFS, mkfs_mount) {
100 uint64_t size = 1048576 * 128;
101 TempBdev bdev{size};
102 BlueFS fs(g_ceph_context);
103 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
104 uuid_d fsid;
105 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
106 ASSERT_EQ(0, fs.mount());
107 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
108 ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
109 ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
110 fs.umount();
111 }
112
113 TEST(BlueFS, write_read) {
114 uint64_t size = 1048576 * 128;
115 TempBdev bdev{size};
116 BlueFS fs(g_ceph_context);
117 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
118 uuid_d fsid;
119 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
120 ASSERT_EQ(0, fs.mount());
121 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
122 {
123 BlueFS::FileWriter *h;
124 ASSERT_EQ(0, fs.mkdir("dir"));
125 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
126 h->append("foo", 3);
127 h->append("bar", 3);
128 h->append("baz", 3);
129 fs.fsync(h);
130 fs.close_writer(h);
131 }
132 {
133 BlueFS::FileReader *h;
134 ASSERT_EQ(0, fs.open_for_read("dir", "file", &h));
135 bufferlist bl;
136 ASSERT_EQ(9, fs.read(h, 0, 1024, &bl, NULL));
137 ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9));
138 delete h;
139 }
140 fs.umount();
141 }
142
143 TEST(BlueFS, small_appends) {
144 uint64_t size = 1048576 * 128;
145 TempBdev bdev{size};
146 BlueFS fs(g_ceph_context);
147 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
148 uuid_d fsid;
149 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
150 ASSERT_EQ(0, fs.mount());
151 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
152 {
153 BlueFS::FileWriter *h;
154 ASSERT_EQ(0, fs.mkdir("dir"));
155 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
156 for (unsigned i = 0; i < 10000; ++i) {
157 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
158 }
159 fs.fsync(h);
160 fs.close_writer(h);
161 }
162 {
163 BlueFS::FileWriter *h;
164 ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false));
165 for (unsigned i = 0; i < 1000; ++i) {
166 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
167 ASSERT_EQ(0, fs.fsync(h));
168 }
169 fs.close_writer(h);
170 }
171 fs.umount();
172 }
173
174 TEST(BlueFS, very_large_write) {
175 // we'll write a ~5G file, so allocate more than that for the whole fs
176 uint64_t size = 1048576 * 1024 * 6ull;
177 TempBdev bdev{size};
178 BlueFS fs(g_ceph_context);
179
180 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
181 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
182 uint64_t total_written = 0;
183
184 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
185 uuid_d fsid;
186 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
187 ASSERT_EQ(0, fs.mount());
188 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
189 char buf[1048571]; // this is biggish, but intentionally not evenly aligned
190 for (unsigned i = 0; i < sizeof(buf); ++i) {
191 buf[i] = i;
192 }
193 {
194 BlueFS::FileWriter *h;
195 ASSERT_EQ(0, fs.mkdir("dir"));
196 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
197 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
198 h->append(buf, sizeof(buf));
199 total_written += sizeof(buf);
200 }
201 fs.fsync(h);
202 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
203 h->append(buf, sizeof(buf));
204 total_written += sizeof(buf);
205 }
206 fs.fsync(h);
207 fs.close_writer(h);
208 }
209 {
210 BlueFS::FileReader *h;
211 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
212 bufferlist bl;
213 ASSERT_EQ(h->file->fnode.size, total_written);
214 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
215 bl.clear();
216 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
217 int r = memcmp(buf, bl.c_str(), sizeof(buf));
218 if (r) {
219 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
220 << std::endl;
221 }
222 ASSERT_EQ(0, r);
223 }
224 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
225 bl.clear();
226 fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL);
227 int r = memcmp(buf, bl.c_str(), sizeof(buf));
228 if (r) {
229 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
230 << std::endl;
231 }
232 ASSERT_EQ(0, r);
233 }
234 delete h;
235 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
236 ASSERT_EQ(h->file->fnode.size, total_written);
237 unique_ptr<char> huge_buf(new char[h->file->fnode.size]);
238 auto l = h->file->fnode.size;
239 int64_t r = fs.read(h, 0, l, NULL, huge_buf.get());
240 ASSERT_EQ(r, l);
241 delete h;
242 }
243 fs.umount();
244
245 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
246 }
247
248 TEST(BlueFS, very_large_write2) {
249 // we'll write a ~5G file, so allocate more than that for the whole fs
250 uint64_t size_full = 1048576 * 1024 * 6ull;
251 uint64_t size = 1048576 * 1024 * 5ull;
252 TempBdev bdev{ size_full };
253 BlueFS fs(g_ceph_context);
254
255 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
256 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
257 uint64_t total_written = 0;
258
259 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
260 uuid_d fsid;
261 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
262 ASSERT_EQ(0, fs.mount());
263 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
264
265 char fill_arr[1 << 20]; // 1M
266 for (size_t i = 0; i < sizeof(fill_arr); ++i) {
267 fill_arr[i] = (char)i;
268 }
269 std::unique_ptr<char[]> buf;
270 buf.reset(new char[size]);
271 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
272 memcpy(buf.get() + i, fill_arr, sizeof(fill_arr));
273 }
274 {
275 BlueFS::FileWriter* h;
276 ASSERT_EQ(0, fs.mkdir("dir"));
277 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
278 fs.append_try_flush(h, buf.get(), size);
279 total_written = size;
280 fs.fsync(h);
281 fs.close_writer(h);
282 }
283 memset(buf.get(), 0, size);
284 {
285 BlueFS::FileReader* h;
286 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
287 ASSERT_EQ(h->file->fnode.size, total_written);
288 auto l = h->file->fnode.size;
289 int64_t r = fs.read(h, 0, l, NULL, buf.get());
290 ASSERT_EQ(r, l);
291 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
292 ceph_assert(memcmp(buf.get() + i, fill_arr, sizeof(fill_arr)) == 0);
293 }
294 delete h;
295 }
296 fs.umount();
297
298 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
299 }
300
301 #define ALLOC_SIZE 4096
302
303 void write_data(BlueFS &fs, uint64_t rationed_bytes)
304 {
305 int j=0, r=0;
306 uint64_t written_bytes = 0;
307 rationed_bytes -= ALLOC_SIZE;
308 stringstream ss;
309 string dir = "dir.";
310 ss << std::this_thread::get_id();
311 dir.append(ss.str());
312 dir.append(".");
313 dir.append(to_string(j));
314 ASSERT_EQ(0, fs.mkdir(dir));
315 while (1) {
316 string file = "file.";
317 file.append(to_string(j));
318 BlueFS::FileWriter *h;
319 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
320 ASSERT_NE(nullptr, h);
321 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
322 bufferlist bl;
323 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
324 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
325 bl.push_back(bp);
326 h->append(bl.c_str(), bl.length());
327 r = fs.fsync(h);
328 if (r < 0) {
329 break;
330 }
331 written_bytes += g_conf()->bluefs_alloc_size;
332 j++;
333 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
334 break;
335 }
336 }
337 }
338
339 void create_single_file(BlueFS &fs)
340 {
341 BlueFS::FileWriter *h;
342 stringstream ss;
343 string dir = "dir.test";
344 ASSERT_EQ(0, fs.mkdir(dir));
345 string file = "testfile";
346 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
347 bufferlist bl;
348 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
349 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
350 bl.push_back(bp);
351 h->append(bl.c_str(), bl.length());
352 fs.fsync(h);
353 fs.close_writer(h);
354 }
355
356 void write_single_file(BlueFS &fs, uint64_t rationed_bytes)
357 {
358 stringstream ss;
359 const string dir = "dir.test";
360 const string file = "testfile";
361 uint64_t written_bytes = 0;
362 rationed_bytes -= ALLOC_SIZE;
363 while (1) {
364 BlueFS::FileWriter *h;
365 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
366 ASSERT_NE(nullptr, h);
367 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
368 bufferlist bl;
369 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
370 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
371 bl.push_back(bp);
372 h->append(bl.c_str(), bl.length());
373 int r = fs.fsync(h);
374 if (r < 0) {
375 break;
376 }
377 written_bytes += g_conf()->bluefs_alloc_size;
378 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
379 break;
380 }
381 }
382 }
383
384 bool writes_done = false;
385
386 void sync_fs(BlueFS &fs)
387 {
388 while (1) {
389 if (writes_done == true)
390 break;
391 fs.sync_metadata(false);
392 sleep(1);
393 }
394 }
395
396
397 void do_join(std::thread& t)
398 {
399 t.join();
400 }
401
402 void join_all(std::vector<std::thread>& v)
403 {
404 std::for_each(v.begin(),v.end(),do_join);
405 }
406
407 #define NUM_WRITERS 3
408 #define NUM_SYNC_THREADS 1
409
410 #define NUM_SINGLE_FILE_WRITERS 1
411 #define NUM_MULTIPLE_FILE_WRITERS 2
412
413 TEST(BlueFS, test_flush_1) {
414 uint64_t size = 1048576 * 128;
415 TempBdev bdev{size};
416 g_ceph_context->_conf.set_val(
417 "bluefs_alloc_size",
418 "65536");
419 g_ceph_context->_conf.apply_changes(nullptr);
420
421 BlueFS fs(g_ceph_context);
422 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
423 uuid_d fsid;
424 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
425 ASSERT_EQ(0, fs.mount());
426 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
427 {
428 std::vector<std::thread> write_thread_multiple;
429 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
430 uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS));
431 for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) {
432 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
433 }
434
435 create_single_file(fs);
436 std::vector<std::thread> write_thread_single;
437 for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) {
438 write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes));
439 }
440
441 join_all(write_thread_single);
442 join_all(write_thread_multiple);
443 }
444 fs.umount();
445 }
446
447 TEST(BlueFS, test_flush_2) {
448 uint64_t size = 1048576 * 256;
449 TempBdev bdev{size};
450 g_ceph_context->_conf.set_val(
451 "bluefs_alloc_size",
452 "65536");
453 g_ceph_context->_conf.apply_changes(nullptr);
454
455 BlueFS fs(g_ceph_context);
456 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
457 uuid_d fsid;
458 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
459 ASSERT_EQ(0, fs.mount());
460 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
461 {
462 uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction
463 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
464 std::vector<std::thread> write_thread_multiple;
465 for (int i=0; i<NUM_WRITERS; i++) {
466 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
467 }
468
469 join_all(write_thread_multiple);
470 }
471 fs.umount();
472 }
473
474 TEST(BlueFS, test_flush_3) {
475 uint64_t size = 1048576 * 256;
476 TempBdev bdev{size};
477 g_ceph_context->_conf.set_val(
478 "bluefs_alloc_size",
479 "65536");
480 g_ceph_context->_conf.apply_changes(nullptr);
481
482 BlueFS fs(g_ceph_context);
483 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
484 uuid_d fsid;
485 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
486 ASSERT_EQ(0, fs.mount());
487 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
488 {
489 std::vector<std::thread> write_threads;
490 uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction
491 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
492 for (int i=0; i<NUM_WRITERS; i++) {
493 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
494 }
495
496 std::vector<std::thread> sync_threads;
497 for (int i=0; i<NUM_SYNC_THREADS; i++) {
498 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
499 }
500
501 join_all(write_threads);
502 writes_done = true;
503 join_all(sync_threads);
504 }
505 fs.umount();
506 }
507
508 TEST(BlueFS, test_simple_compaction_sync) {
509 g_ceph_context->_conf.set_val(
510 "bluefs_compact_log_sync",
511 "true");
512 uint64_t size = 1048576 * 128;
513 TempBdev bdev{size};
514
515 BlueFS fs(g_ceph_context);
516 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
517 uuid_d fsid;
518 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
519 ASSERT_EQ(0, fs.mount());
520 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
521 {
522 for (int i=0; i<10; i++) {
523 string dir = "dir.";
524 dir.append(to_string(i));
525 ASSERT_EQ(0, fs.mkdir(dir));
526 for (int j=0; j<10; j++) {
527 string file = "file.";
528 file.append(to_string(j));
529 BlueFS::FileWriter *h;
530 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
531 ASSERT_NE(nullptr, h);
532 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
533 bufferlist bl;
534 std::unique_ptr<char[]> buf = gen_buffer(4096);
535 bufferptr bp = buffer::claim_char(4096, buf.get());
536 bl.push_back(bp);
537 h->append(bl.c_str(), bl.length());
538 fs.fsync(h);
539 }
540 }
541 }
542 {
543 for (int i=0; i<10; i+=2) {
544 string dir = "dir.";
545 dir.append(to_string(i));
546 for (int j=0; j<10; j++) {
547 string file = "file.";
548 file.append(to_string(j));
549 fs.unlink(dir, file);
550 fs.sync_metadata(false);
551 }
552 ASSERT_EQ(0, fs.rmdir(dir));
553 fs.sync_metadata(false);
554 }
555 }
556 fs.compact_log();
557 fs.umount();
558 }
559
560 TEST(BlueFS, test_simple_compaction_async) {
561 g_ceph_context->_conf.set_val(
562 "bluefs_compact_log_sync",
563 "false");
564 uint64_t size = 1048576 * 128;
565 TempBdev bdev{size};
566
567 BlueFS fs(g_ceph_context);
568 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
569 uuid_d fsid;
570 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
571 ASSERT_EQ(0, fs.mount());
572 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
573 {
574 for (int i=0; i<10; i++) {
575 string dir = "dir.";
576 dir.append(to_string(i));
577 ASSERT_EQ(0, fs.mkdir(dir));
578 for (int j=0; j<10; j++) {
579 string file = "file.";
580 file.append(to_string(j));
581 BlueFS::FileWriter *h;
582 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
583 ASSERT_NE(nullptr, h);
584 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
585 bufferlist bl;
586 std::unique_ptr<char[]> buf = gen_buffer(4096);
587 bufferptr bp = buffer::claim_char(4096, buf.get());
588 bl.push_back(bp);
589 h->append(bl.c_str(), bl.length());
590 fs.fsync(h);
591 }
592 }
593 }
594 {
595 for (int i=0; i<10; i+=2) {
596 string dir = "dir.";
597 dir.append(to_string(i));
598 for (int j=0; j<10; j++) {
599 string file = "file.";
600 file.append(to_string(j));
601 fs.unlink(dir, file);
602 fs.sync_metadata(false);
603 }
604 ASSERT_EQ(0, fs.rmdir(dir));
605 fs.sync_metadata(false);
606 }
607 }
608 fs.compact_log();
609 fs.umount();
610 }
611
612 TEST(BlueFS, test_compaction_sync) {
613 uint64_t size = 1048576 * 128;
614 TempBdev bdev{size};
615 g_ceph_context->_conf.set_val(
616 "bluefs_alloc_size",
617 "65536");
618 g_ceph_context->_conf.set_val(
619 "bluefs_compact_log_sync",
620 "true");
621 const char* canary_dir = "dir.after_compact_test";
622 const char* canary_file = "file.after_compact_test";
623 const char* canary_data = "some random data";
624
625 BlueFS fs(g_ceph_context);
626 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
627 uuid_d fsid;
628 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
629 ASSERT_EQ(0, fs.mount());
630 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
631 {
632 std::vector<std::thread> write_threads;
633 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
634 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
635 for (int i=0; i<NUM_WRITERS; i++) {
636 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
637 }
638
639 std::vector<std::thread> sync_threads;
640 for (int i=0; i<NUM_SYNC_THREADS; i++) {
641 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
642 }
643
644 join_all(write_threads);
645 writes_done = true;
646 join_all(sync_threads);
647 fs.compact_log();
648
649 {
650 ASSERT_EQ(0, fs.mkdir(canary_dir));
651 BlueFS::FileWriter *h;
652 ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false));
653 ASSERT_NE(nullptr, h);
654 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
655 h->append(canary_data, strlen(canary_data));
656 int r = fs.fsync(h);
657 ASSERT_EQ(r, 0);
658 }
659 }
660 fs.umount();
661
662 fs.mount();
663 {
664 BlueFS::FileReader *h;
665 ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h));
666 ASSERT_NE(nullptr, h);
667 bufferlist bl;
668 ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL));
669 std::cout << bl.c_str() << std::endl;
670 ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data)));
671 delete h;
672 }
673 fs.umount();
674 }
675
676 TEST(BlueFS, test_compaction_async) {
677 uint64_t size = 1048576 * 128;
678 TempBdev bdev{size};
679 g_ceph_context->_conf.set_val(
680 "bluefs_alloc_size",
681 "65536");
682 g_ceph_context->_conf.set_val(
683 "bluefs_compact_log_sync",
684 "false");
685 const char* canary_dir = "dir.after_compact_test";
686 const char* canary_file = "file.after_compact_test";
687 const char* canary_data = "some random data";
688
689 BlueFS fs(g_ceph_context);
690 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
691 uuid_d fsid;
692 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
693 ASSERT_EQ(0, fs.mount());
694 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
695 {
696 std::vector<std::thread> write_threads;
697 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
698 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
699 for (int i=0; i<NUM_WRITERS; i++) {
700 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
701 }
702
703 std::vector<std::thread> sync_threads;
704 for (int i=0; i<NUM_SYNC_THREADS; i++) {
705 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
706 }
707
708 join_all(write_threads);
709 writes_done = true;
710 join_all(sync_threads);
711 fs.compact_log();
712
713 {
714 ASSERT_EQ(0, fs.mkdir(canary_dir));
715 BlueFS::FileWriter *h;
716 ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false));
717 ASSERT_NE(nullptr, h);
718 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
719 h->append(canary_data, strlen(canary_data));
720 int r = fs.fsync(h);
721 ASSERT_EQ(r, 0);
722 }
723 }
724 fs.umount();
725
726 fs.mount();
727 {
728 BlueFS::FileReader *h;
729 ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h));
730 ASSERT_NE(nullptr, h);
731 bufferlist bl;
732 ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL));
733 std::cout << bl.c_str() << std::endl;
734 ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data)));
735 delete h;
736 }
737 fs.umount();
738 }
739
740 TEST(BlueFS, test_replay) {
741 uint64_t size = 1048576 * 128;
742 TempBdev bdev{size};
743 g_ceph_context->_conf.set_val(
744 "bluefs_alloc_size",
745 "65536");
746 g_ceph_context->_conf.set_val(
747 "bluefs_compact_log_sync",
748 "false");
749
750 BlueFS fs(g_ceph_context);
751 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
752 uuid_d fsid;
753 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
754 ASSERT_EQ(0, fs.mount());
755 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
756 {
757 std::vector<std::thread> write_threads;
758 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
759 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
760 for (int i=0; i<NUM_WRITERS; i++) {
761 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
762 }
763
764 std::vector<std::thread> sync_threads;
765 for (int i=0; i<NUM_SYNC_THREADS; i++) {
766 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
767 }
768
769 join_all(write_threads);
770 writes_done = true;
771 join_all(sync_threads);
772 fs.compact_log();
773 }
774 fs.umount();
775 // remount and check log can replay safe?
776 ASSERT_EQ(0, fs.mount());
777 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
778 fs.umount();
779 }
780
781 TEST(BlueFS, test_replay_growth) {
782 uint64_t size = 1048576LL * (2 * 1024 + 128);
783 TempBdev bdev{size};
784
785 ConfSaver conf(g_ceph_context->_conf);
786 conf.SetVal("bluefs_alloc_size", "4096");
787 conf.SetVal("bluefs_shared_alloc_size", "4096");
788 conf.SetVal("bluefs_compact_log_sync", "false");
789 conf.SetVal("bluefs_min_log_runway", "32768");
790 conf.SetVal("bluefs_max_log_runway", "65536");
791 conf.SetVal("bluefs_allocator", "stupid");
792 conf.SetVal("bluefs_sync_write", "true");
793 conf.ApplyChanges();
794
795 BlueFS fs(g_ceph_context);
796 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
797 uuid_d fsid;
798 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
799 ASSERT_EQ(0, fs.mount());
800 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
801 ASSERT_EQ(0, fs.mkdir("dir"));
802
803 char data[2000];
804 BlueFS::FileWriter *h;
805 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
806 for (size_t i = 0; i < 10000; i++) {
807 h->append(data, 2000);
808 fs.fsync(h);
809 }
810 fs.close_writer(h);
811 fs.umount(true); //do not compact on exit!
812
813 // remount and check log can replay safe?
814 ASSERT_EQ(0, fs.mount());
815 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
816 fs.umount();
817 }
818
819 TEST(BlueFS, test_tracker_50965) {
820 uint64_t size_wal = 1048576 * 64;
821 TempBdev bdev_wal{size_wal};
822 uint64_t size_db = 1048576 * 128;
823 TempBdev bdev_db{size_db};
824 uint64_t size_slow = 1048576 * 256;
825 TempBdev bdev_slow{size_slow};
826
827 ConfSaver conf(g_ceph_context->_conf);
828 conf.SetVal("bluefs_min_flush_size", "65536");
829 conf.ApplyChanges();
830
831 BlueFS fs(g_ceph_context);
832 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
833 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
834 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
835 uuid_d fsid;
836 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
837 ASSERT_EQ(0, fs.mount());
838 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
839
840 string dir_slow = "dir.slow";
841 ASSERT_EQ(0, fs.mkdir(dir_slow));
842 string dir_db = "dir_db";
843 ASSERT_EQ(0, fs.mkdir(dir_db));
844
845 string file_slow = "file";
846 BlueFS::FileWriter *h_slow;
847 ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
848 ASSERT_NE(nullptr, h_slow);
849
850 string file_db = "file";
851 BlueFS::FileWriter *h_db;
852 ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
853 ASSERT_NE(nullptr, h_db);
854
855 bufferlist bl1;
856 std::unique_ptr<char[]> buf1 = gen_buffer(70000);
857 bufferptr bp1 = buffer::claim_char(70000, buf1.get());
858 bl1.push_back(bp1);
859 h_slow->append(bl1.c_str(), bl1.length());
860 fs.flush(h_slow);
861
862 uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow);
863
864 bufferlist bl2;
865 std::unique_ptr<char[]> buf2 = gen_buffer(1000);
866 bufferptr bp2 = buffer::claim_char(1000, buf2.get());
867 bl2.push_back(bp2);
868 h_db->append(bl2.c_str(), bl2.length());
869 fs.fsync(h_db);
870
871 uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow);
872 bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
873
874 //problem if allocations are stable in log but slow device is not flushed yet
875 ASSERT_FALSE(h_slow_dirty_seq_1 != 0 &&
876 h_slow_dirty_seq_2 == 0 &&
877 h_slow_dev_dirty == true);
878
879 fs.close_writer(h_slow);
880 fs.close_writer(h_db);
881
882 fs.umount();
883 }
884
885 TEST(BlueFS, test_truncate_stable_53129) {
886
887 ConfSaver conf(g_ceph_context->_conf);
888 conf.SetVal("bluefs_min_flush_size", "65536");
889 conf.ApplyChanges();
890
891 uint64_t size_wal = 1048576 * 64;
892 TempBdev bdev_wal{size_wal};
893 uint64_t size_db = 1048576 * 128;
894 TempBdev bdev_db{size_db};
895 uint64_t size_slow = 1048576 * 256;
896 TempBdev bdev_slow{size_slow};
897
898 BlueFS fs(g_ceph_context);
899 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
900 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
901 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
902 uuid_d fsid;
903 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
904 ASSERT_EQ(0, fs.mount());
905 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
906
907 string dir_slow = "dir.slow";
908 ASSERT_EQ(0, fs.mkdir(dir_slow));
909 string dir_db = "dir_db";
910 ASSERT_EQ(0, fs.mkdir(dir_db));
911
912 string file_slow = "file";
913 BlueFS::FileWriter *h_slow;
914 ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
915 ASSERT_NE(nullptr, h_slow);
916
917 string file_db = "file";
918 BlueFS::FileWriter *h_db;
919 ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
920 ASSERT_NE(nullptr, h_db);
921
922 bufferlist bl1;
923 std::unique_ptr<char[]> buf1 = gen_buffer(70000);
924 bufferptr bp1 = buffer::claim_char(70000, buf1.get());
925 bl1.push_back(bp1);
926 // add 70000 bytes
927 h_slow->append(bl1.c_str(), bl1.length());
928 fs.flush(h_slow);
929 // and truncate to 60000 bytes
930 fs.truncate(h_slow, 60000);
931
932 // write something to file on DB device
933 bufferlist bl2;
934 std::unique_ptr<char[]> buf2 = gen_buffer(1000);
935 bufferptr bp2 = buffer::claim_char(1000, buf2.get());
936 bl2.push_back(bp2);
937 h_db->append(bl2.c_str(), bl2.length());
938 // and force bluefs log to flush
939 fs.fsync(h_db);
940
941 // This is the actual test point.
942 // We completed truncate, and we expect
943 // - size to be 60000
944 // - data to be stable on slow device
945 // OR
946 // - size = 0 or file does not exist
947 // - dev_dirty is irrelevant
948 bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
949 // Imagine power goes down here.
950
951 fs.close_writer(h_slow);
952 fs.close_writer(h_db);
953
954 fs.umount();
955
956 ASSERT_EQ(0, fs.mount());
957 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
958
959 uint64_t size;
960 utime_t mtime;
961 ASSERT_EQ(0, fs.stat("dir.slow", "file", &size, &mtime));
962 // check file size 60000
963 ASSERT_EQ(size, 60000);
964 // check that dev_dirty was false (data stable on media)
965 ASSERT_EQ(h_slow_dev_dirty, false);
966
967 fs.umount();
968 }
969
970 TEST(BlueFS, test_update_ino1_delta_after_replay) {
971 uint64_t size = 1048576LL * (2 * 1024 + 128);
972 TempBdev bdev{size};
973
974 ConfSaver conf(g_ceph_context->_conf);
975 conf.SetVal("bluefs_alloc_size", "4096");
976 conf.SetVal("bluefs_shared_alloc_size", "4096");
977 conf.SetVal("bluefs_compact_log_sync", "false");
978 conf.SetVal("bluefs_min_log_runway", "32768");
979 conf.SetVal("bluefs_max_log_runway", "65536");
980 conf.SetVal("bluefs_allocator", "stupid");
981 conf.ApplyChanges();
982
983 BlueFS fs(g_ceph_context);
984 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
985 uuid_d fsid;
986 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
987 ASSERT_EQ(0, fs.mount());
988 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
989 ASSERT_EQ(0, fs.mkdir("dir"));
990
991 char data[2000];
992 BlueFS::FileWriter *h;
993 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
994 for (size_t i = 0; i < 100; i++) {
995 h->append(data, 2000);
996 fs.fsync(h);
997 }
998 fs.close_writer(h);
999 fs.umount(true); //do not compact on exit!
1000
1001 ASSERT_EQ(0, fs.mount());
1002 ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false));
1003 for (size_t i = 0; i < 100; i++) {
1004 h->append(data, 2000);
1005 fs.fsync(h);
1006 }
1007 fs.close_writer(h);
1008 fs.umount();
1009
1010 // remount and check log can replay safe?
1011 ASSERT_EQ(0, fs.mount());
1012 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
1013 fs.umount();
1014 }
1015
1016 int main(int argc, char **argv) {
1017 auto args = argv_to_vec(argc, argv);
1018 map<string,string> defaults = {
1019 { "debug_bluefs", "1/20" },
1020 { "debug_bdev", "1/20" }
1021 };
1022
1023 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
1024 CODE_ENVIRONMENT_UTILITY,
1025 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
1026 common_init_finish(g_ceph_context);
1027 g_ceph_context->_conf.set_val(
1028 "enable_experimental_unrecoverable_data_corrupting_features",
1029 "*");
1030 g_ceph_context->_conf.apply_changes(nullptr);
1031
1032 ::testing::InitGoogleTest(&argc, argv);
1033 return RUN_ALL_TESTS();
1034 }