]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/objectstore/test_bluefs.cc
3484b4401e48b4b099eedb377a86575c3f38e0d7
[ceph.git] / ceph / src / test / objectstore / test_bluefs.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <stdio.h>
5 #include <string.h>
6 #include <iostream>
7 #include <time.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10 #include <random>
11 #include <thread>
12 #include <stack>
13 #include "global/global_init.h"
14 #include "common/ceph_argparse.h"
15 #include "include/stringify.h"
16 #include "include/scope_guard.h"
17 #include "common/errno.h"
18 #include <gtest/gtest.h>
19
20 #include "os/bluestore/BlueFS.h"
21
22 std::unique_ptr<char[]> gen_buffer(uint64_t size)
23 {
24 std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
25 std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e;
26 std::generate(buffer.get(), buffer.get()+size, std::ref(e));
27 return buffer;
28 }
29
30 class TempBdev {
31 public:
32 TempBdev(uint64_t size)
33 : path{get_temp_bdev(size)}
34 {}
35 ~TempBdev() {
36 rm_temp_bdev(path);
37 }
38 const std::string path;
39 private:
40 static string get_temp_bdev(uint64_t size)
41 {
42 static int n = 0;
43 string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid())
44 + "." + stringify(++n);
45 int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
46 ceph_assert(fd >= 0);
47 int r = ::ftruncate(fd, size);
48 ceph_assert(r >= 0);
49 ::close(fd);
50 return fn;
51 }
52 static void rm_temp_bdev(string f)
53 {
54 ::unlink(f.c_str());
55 }
56 };
57
58 class ConfSaver {
59 std::stack<std::pair<std::string, std::string>> saved_settings;
60 ConfigProxy& conf;
61 public:
62 ConfSaver(ConfigProxy& conf) : conf(conf) {
63 conf._clear_safe_to_start_threads();
64 };
65 ~ConfSaver() {
66 conf._clear_safe_to_start_threads();
67 while(saved_settings.size() > 0) {
68 auto& e = saved_settings.top();
69 conf.set_val_or_die(e.first, e.second);
70 saved_settings.pop();
71 }
72 conf.set_safe_to_start_threads();
73 conf.apply_changes(nullptr);
74 }
75 void SetVal(const char* key, const char* val) {
76 std::string skey(key);
77 std::string prev_val;
78 conf.get_val(skey, &prev_val);
79 conf.set_val_or_die(skey, val);
80 saved_settings.emplace(skey, prev_val);
81 }
82 void ApplyChanges() {
83 conf.set_safe_to_start_threads();
84 conf.apply_changes(nullptr);
85 }
86 };
87
88 TEST(BlueFS, mkfs) {
89 uint64_t size = 1048576 * 128;
90 TempBdev bdev{size};
91 uuid_d fsid;
92 BlueFS fs(g_ceph_context);
93 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
94 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
95 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
96 }
97
98 TEST(BlueFS, mkfs_mount) {
99 uint64_t size = 1048576 * 128;
100 TempBdev bdev{size};
101 BlueFS fs(g_ceph_context);
102 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
103 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
104 uuid_d fsid;
105 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
106 ASSERT_EQ(0, fs.mount());
107 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
108 ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
109 ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
110 fs.umount();
111 }
112
113 TEST(BlueFS, mkfs_mount_duplicate_gift) {
114 uint64_t size = 1048576 * 128;
115 TempBdev bdev{ size };
116 bluefs_extent_t dup_ext;
117 {
118 BlueFS fs(g_ceph_context);
119 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
120 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
121 uuid_d fsid;
122 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
123 ASSERT_EQ(0, fs.mount());
124
125 {
126 BlueFS::FileWriter *h;
127 ASSERT_EQ(0, fs.mkdir("dir"));
128 ASSERT_EQ(0, fs.open_for_write("dir", "file1", &h, false));
129 h->append("foo", 3);
130 h->append("bar", 3);
131 h->append("baz", 3);
132 fs.fsync(h);
133 ceph_assert(h->file->fnode.extents.size() > 0);
134 dup_ext = h->file->fnode.extents[0];
135 ceph_assert(dup_ext.bdev == BlueFS::BDEV_DB);
136 fs.close_writer(h);
137 }
138
139 fs.umount();
140 }
141
142 {
143 BlueFS fs(g_ceph_context);
144 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
145 ASSERT_EQ(0, fs.mount());
146 // free allocation presumably allocated for file1
147 std::cout << "duplicate extent: " << std::hex
148 << dup_ext.offset << "~" << dup_ext.length
149 << std::dec << std::endl;
150 fs.debug_inject_duplicate_gift(BlueFS::BDEV_DB, dup_ext.offset, dup_ext.length);
151 {
152 // overwrite file1 with file2
153 BlueFS::FileWriter *h;
154 ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false));
155 h->append("foo", 3);
156 h->append("bar", 3);
157 h->append("baz", 3);
158 fs.fsync(h);
159 fs.close_writer(h);
160 }
161 fs.umount();
162 }
163
164 g_ceph_context->_conf.set_val_or_die("bluefs_log_replay_check_allocations", "true");
165 g_ceph_context->_conf.apply_changes(nullptr);
166
167 {
168 // this should fail
169 BlueFS fs(g_ceph_context);
170 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
171 ASSERT_NE(0, fs.mount());
172 }
173 }
174
175 TEST(BlueFS, write_read) {
176 uint64_t size = 1048576 * 128;
177 TempBdev bdev{size};
178 BlueFS fs(g_ceph_context);
179 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
180 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
181 uuid_d fsid;
182 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
183 ASSERT_EQ(0, fs.mount());
184 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
185 {
186 BlueFS::FileWriter *h;
187 ASSERT_EQ(0, fs.mkdir("dir"));
188 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
189 h->append("foo", 3);
190 h->append("bar", 3);
191 h->append("baz", 3);
192 fs.fsync(h);
193 fs.close_writer(h);
194 }
195 {
196 BlueFS::FileReader *h;
197 ASSERT_EQ(0, fs.open_for_read("dir", "file", &h));
198 bufferlist bl;
199 BlueFS::FileReaderBuffer buf(4096);
200 ASSERT_EQ(9, fs.read(h, &buf, 0, 1024, &bl, NULL));
201 ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9));
202 delete h;
203 }
204 fs.umount();
205 }
206
207 TEST(BlueFS, small_appends) {
208 uint64_t size = 1048576 * 128;
209 TempBdev bdev{size};
210 BlueFS fs(g_ceph_context);
211 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
212 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
213 uuid_d fsid;
214 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
215 ASSERT_EQ(0, fs.mount());
216 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
217 {
218 BlueFS::FileWriter *h;
219 ASSERT_EQ(0, fs.mkdir("dir"));
220 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
221 for (unsigned i = 0; i < 10000; ++i) {
222 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
223 }
224 fs.fsync(h);
225 fs.close_writer(h);
226 }
227 {
228 BlueFS::FileWriter *h;
229 ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false));
230 for (unsigned i = 0; i < 1000; ++i) {
231 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
232 ASSERT_EQ(0, fs.fsync(h));
233 }
234 fs.close_writer(h);
235 }
236 fs.umount();
237 }
238
239 TEST(BlueFS, very_large_write) {
240 // we'll write a ~5G file, so allocate more than that for the whole fs
241 uint64_t size = 1048576 * 1024 * 8ull;
242 TempBdev bdev{size};
243 BlueFS fs(g_ceph_context);
244
245 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
246 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
247 uint64_t total_written = 0;
248
249 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
250 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
251 uuid_d fsid;
252 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
253 ASSERT_EQ(0, fs.mount());
254 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
255 char buf[1048571]; // this is biggish, but intentionally not evenly aligned
256 for (unsigned i = 0; i < sizeof(buf); ++i) {
257 buf[i] = i;
258 }
259 {
260 BlueFS::FileWriter *h;
261 ASSERT_EQ(0, fs.mkdir("dir"));
262 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
263 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
264 h->append(buf, sizeof(buf));
265 total_written += sizeof(buf);
266 }
267 fs.fsync(h);
268 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
269 h->append(buf, sizeof(buf));
270 total_written += sizeof(buf);
271 }
272 fs.fsync(h);
273 fs.close_writer(h);
274 }
275 {
276 BlueFS::FileReader *h;
277 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
278 bufferlist bl;
279 BlueFS::FileReaderBuffer readbuf(10485760);
280 ASSERT_EQ(h->file->fnode.size, total_written);
281 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
282 bl.clear();
283 fs.read(h, &readbuf, i * sizeof(buf), sizeof(buf), &bl, NULL);
284 int r = memcmp(buf, bl.c_str(), sizeof(buf));
285 if (r) {
286 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
287 << std::endl;
288 }
289 ASSERT_EQ(0, r);
290 }
291 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
292 bl.clear();
293 fs.read(h, &readbuf, i * sizeof(buf), sizeof(buf), &bl, NULL);
294 int r = memcmp(buf, bl.c_str(), sizeof(buf));
295 if (r) {
296 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
297 << std::endl;
298 }
299 ASSERT_EQ(0, r);
300 }
301 delete h;
302 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
303 ASSERT_EQ(h->file->fnode.size, total_written);
304 unique_ptr<char> huge_buf(new char[h->file->fnode.size]);
305 auto l = h->file->fnode.size;
306 int64_t r = fs.read(h, &readbuf, 0, l, NULL, huge_buf.get());
307 ASSERT_EQ(r, l);
308 delete h;
309 }
310 fs.umount();
311
312 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
313 }
314
315 #define ALLOC_SIZE 4096
316
317 void write_data(BlueFS &fs, uint64_t rationed_bytes)
318 {
319 int j=0, r=0;
320 uint64_t written_bytes = 0;
321 rationed_bytes -= ALLOC_SIZE;
322 stringstream ss;
323 string dir = "dir.";
324 ss << std::this_thread::get_id();
325 dir.append(ss.str());
326 dir.append(".");
327 dir.append(to_string(j));
328 ASSERT_EQ(0, fs.mkdir(dir));
329 while (1) {
330 string file = "file.";
331 file.append(to_string(j));
332 BlueFS::FileWriter *h;
333 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
334 ASSERT_NE(nullptr, h);
335 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
336 bufferlist bl;
337 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
338 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
339 bl.push_back(bp);
340 h->append(bl.c_str(), bl.length());
341 r = fs.fsync(h);
342 if (r < 0) {
343 break;
344 }
345 written_bytes += g_conf()->bluefs_alloc_size;
346 j++;
347 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
348 break;
349 }
350 }
351 }
352
353 void create_single_file(BlueFS &fs)
354 {
355 BlueFS::FileWriter *h;
356 stringstream ss;
357 string dir = "dir.test";
358 ASSERT_EQ(0, fs.mkdir(dir));
359 string file = "testfile";
360 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
361 bufferlist bl;
362 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
363 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
364 bl.push_back(bp);
365 h->append(bl.c_str(), bl.length());
366 fs.fsync(h);
367 fs.close_writer(h);
368 }
369
370 void write_single_file(BlueFS &fs, uint64_t rationed_bytes)
371 {
372 stringstream ss;
373 const string dir = "dir.test";
374 const string file = "testfile";
375 uint64_t written_bytes = 0;
376 rationed_bytes -= ALLOC_SIZE;
377 while (1) {
378 BlueFS::FileWriter *h;
379 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
380 ASSERT_NE(nullptr, h);
381 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
382 bufferlist bl;
383 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
384 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
385 bl.push_back(bp);
386 h->append(bl.c_str(), bl.length());
387 int r = fs.fsync(h);
388 if (r < 0) {
389 break;
390 }
391 written_bytes += g_conf()->bluefs_alloc_size;
392 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
393 break;
394 }
395 }
396 }
397
398 bool writes_done = false;
399
400 void sync_fs(BlueFS &fs)
401 {
402 while (1) {
403 if (writes_done == true)
404 break;
405 fs.sync_metadata(false);
406 sleep(1);
407 }
408 }
409
410
411 void do_join(std::thread& t)
412 {
413 t.join();
414 }
415
416 void join_all(std::vector<std::thread>& v)
417 {
418 std::for_each(v.begin(),v.end(),do_join);
419 }
420
421 #define NUM_WRITERS 3
422 #define NUM_SYNC_THREADS 1
423
424 #define NUM_SINGLE_FILE_WRITERS 1
425 #define NUM_MULTIPLE_FILE_WRITERS 2
426
427 TEST(BlueFS, test_flush_1) {
428 uint64_t size = 1048576 * 128;
429 TempBdev bdev{size};
430 g_ceph_context->_conf.set_val(
431 "bluefs_alloc_size",
432 "65536");
433 g_ceph_context->_conf.apply_changes(nullptr);
434
435 BlueFS fs(g_ceph_context);
436 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
437 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
438 uuid_d fsid;
439 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
440 ASSERT_EQ(0, fs.mount());
441 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
442 {
443 std::vector<std::thread> write_thread_multiple;
444 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
445 uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS));
446 for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) {
447 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
448 }
449
450 create_single_file(fs);
451 std::vector<std::thread> write_thread_single;
452 for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) {
453 write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes));
454 }
455
456 join_all(write_thread_single);
457 join_all(write_thread_multiple);
458 }
459 fs.umount();
460 }
461
462 TEST(BlueFS, test_flush_2) {
463 uint64_t size = 1048576 * 256;
464 TempBdev bdev{size};
465 g_ceph_context->_conf.set_val(
466 "bluefs_alloc_size",
467 "65536");
468 g_ceph_context->_conf.apply_changes(nullptr);
469
470 BlueFS fs(g_ceph_context);
471 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
472 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
473 uuid_d fsid;
474 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
475 ASSERT_EQ(0, fs.mount());
476 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
477 {
478 uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction
479 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
480 std::vector<std::thread> write_thread_multiple;
481 for (int i=0; i<NUM_WRITERS; i++) {
482 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
483 }
484
485 join_all(write_thread_multiple);
486 }
487 fs.umount();
488 }
489
490 TEST(BlueFS, test_flush_3) {
491 uint64_t size = 1048576 * 256;
492 TempBdev bdev{size};
493 g_ceph_context->_conf.set_val(
494 "bluefs_alloc_size",
495 "65536");
496 g_ceph_context->_conf.apply_changes(nullptr);
497
498 BlueFS fs(g_ceph_context);
499 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
500 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
501 uuid_d fsid;
502 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
503 ASSERT_EQ(0, fs.mount());
504 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
505 {
506 std::vector<std::thread> write_threads;
507 uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction
508 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
509 for (int i=0; i<NUM_WRITERS; i++) {
510 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
511 }
512
513 std::vector<std::thread> sync_threads;
514 for (int i=0; i<NUM_SYNC_THREADS; i++) {
515 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
516 }
517
518 join_all(write_threads);
519 writes_done = true;
520 join_all(sync_threads);
521 }
522 fs.umount();
523 }
524
525 TEST(BlueFS, test_simple_compaction_sync) {
526 g_ceph_context->_conf.set_val(
527 "bluefs_compact_log_sync",
528 "true");
529 uint64_t size = 1048576 * 128;
530 TempBdev bdev{size};
531
532 BlueFS fs(g_ceph_context);
533 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
534 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
535 uuid_d fsid;
536 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
537 ASSERT_EQ(0, fs.mount());
538 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
539 {
540 for (int i=0; i<10; i++) {
541 string dir = "dir.";
542 dir.append(to_string(i));
543 ASSERT_EQ(0, fs.mkdir(dir));
544 for (int j=0; j<10; j++) {
545 string file = "file.";
546 file.append(to_string(j));
547 BlueFS::FileWriter *h;
548 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
549 ASSERT_NE(nullptr, h);
550 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
551 bufferlist bl;
552 std::unique_ptr<char[]> buf = gen_buffer(4096);
553 bufferptr bp = buffer::claim_char(4096, buf.get());
554 bl.push_back(bp);
555 h->append(bl.c_str(), bl.length());
556 fs.fsync(h);
557 }
558 }
559 }
560 {
561 for (int i=0; i<10; i+=2) {
562 string dir = "dir.";
563 dir.append(to_string(i));
564 for (int j=0; j<10; j++) {
565 string file = "file.";
566 file.append(to_string(j));
567 fs.unlink(dir, file);
568 fs.sync_metadata(false);
569 }
570 ASSERT_EQ(0, fs.rmdir(dir));
571 fs.sync_metadata(false);
572 }
573 }
574 fs.compact_log();
575 fs.umount();
576 }
577
578 TEST(BlueFS, test_simple_compaction_async) {
579 g_ceph_context->_conf.set_val(
580 "bluefs_compact_log_sync",
581 "false");
582 uint64_t size = 1048576 * 128;
583 TempBdev bdev{size};
584
585 BlueFS fs(g_ceph_context);
586 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
587 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
588 uuid_d fsid;
589 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
590 ASSERT_EQ(0, fs.mount());
591 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
592 {
593 for (int i=0; i<10; i++) {
594 string dir = "dir.";
595 dir.append(to_string(i));
596 ASSERT_EQ(0, fs.mkdir(dir));
597 for (int j=0; j<10; j++) {
598 string file = "file.";
599 file.append(to_string(j));
600 BlueFS::FileWriter *h;
601 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
602 ASSERT_NE(nullptr, h);
603 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
604 bufferlist bl;
605 std::unique_ptr<char[]> buf = gen_buffer(4096);
606 bufferptr bp = buffer::claim_char(4096, buf.get());
607 bl.push_back(bp);
608 h->append(bl.c_str(), bl.length());
609 fs.fsync(h);
610 }
611 }
612 }
613 {
614 for (int i=0; i<10; i+=2) {
615 string dir = "dir.";
616 dir.append(to_string(i));
617 for (int j=0; j<10; j++) {
618 string file = "file.";
619 file.append(to_string(j));
620 fs.unlink(dir, file);
621 fs.sync_metadata(false);
622 }
623 ASSERT_EQ(0, fs.rmdir(dir));
624 fs.sync_metadata(false);
625 }
626 }
627 fs.compact_log();
628 fs.umount();
629 }
630
631 TEST(BlueFS, test_compaction_sync) {
632 uint64_t size = 1048576 * 128;
633 TempBdev bdev{size};
634 g_ceph_context->_conf.set_val(
635 "bluefs_alloc_size",
636 "65536");
637 g_ceph_context->_conf.set_val(
638 "bluefs_compact_log_sync",
639 "true");
640
641 BlueFS fs(g_ceph_context);
642 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
643 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
644 uuid_d fsid;
645 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
646 ASSERT_EQ(0, fs.mount());
647 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
648 {
649 std::vector<std::thread> write_threads;
650 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
651 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
652 for (int i=0; i<NUM_WRITERS; i++) {
653 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
654 }
655
656 std::vector<std::thread> sync_threads;
657 for (int i=0; i<NUM_SYNC_THREADS; i++) {
658 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
659 }
660
661 join_all(write_threads);
662 writes_done = true;
663 join_all(sync_threads);
664 fs.compact_log();
665 }
666 fs.umount();
667 }
668
669 TEST(BlueFS, test_compaction_async) {
670 uint64_t size = 1048576 * 128;
671 TempBdev bdev{size};
672 g_ceph_context->_conf.set_val(
673 "bluefs_alloc_size",
674 "65536");
675 g_ceph_context->_conf.set_val(
676 "bluefs_compact_log_sync",
677 "false");
678
679 BlueFS fs(g_ceph_context);
680 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
681 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
682 uuid_d fsid;
683 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
684 ASSERT_EQ(0, fs.mount());
685 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
686 {
687 std::vector<std::thread> write_threads;
688 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
689 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
690 for (int i=0; i<NUM_WRITERS; i++) {
691 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
692 }
693
694 std::vector<std::thread> sync_threads;
695 for (int i=0; i<NUM_SYNC_THREADS; i++) {
696 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
697 }
698
699 join_all(write_threads);
700 writes_done = true;
701 join_all(sync_threads);
702 fs.compact_log();
703 }
704 fs.umount();
705 }
706
707 TEST(BlueFS, test_replay) {
708 uint64_t size = 1048576 * 128;
709 TempBdev bdev{size};
710 g_ceph_context->_conf.set_val(
711 "bluefs_alloc_size",
712 "65536");
713 g_ceph_context->_conf.set_val(
714 "bluefs_compact_log_sync",
715 "false");
716
717 BlueFS fs(g_ceph_context);
718 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
719 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
720 uuid_d fsid;
721 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
722 ASSERT_EQ(0, fs.mount());
723 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
724 {
725 std::vector<std::thread> write_threads;
726 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
727 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
728 for (int i=0; i<NUM_WRITERS; i++) {
729 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
730 }
731
732 std::vector<std::thread> sync_threads;
733 for (int i=0; i<NUM_SYNC_THREADS; i++) {
734 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
735 }
736
737 join_all(write_threads);
738 writes_done = true;
739 join_all(sync_threads);
740 fs.compact_log();
741 }
742 fs.umount();
743 // remount and check log can replay safe?
744 ASSERT_EQ(0, fs.mount());
745 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
746 fs.umount();
747 }
748
749 TEST(BlueFS, test_replay_growth) {
750 uint64_t size = 1048576LL * (2 * 1024 + 128);
751 TempBdev bdev{size};
752
753 ConfSaver conf(g_ceph_context->_conf);
754 conf.SetVal("bluefs_alloc_size", "4096");
755 conf.SetVal("bluefs_shared_alloc_size", "4096");
756 conf.SetVal("bluefs_compact_log_sync", "false");
757 conf.SetVal("bluefs_min_log_runway", "32768");
758 conf.SetVal("bluefs_max_log_runway", "65536");
759 conf.SetVal("bluefs_allocator", "stupid");
760 conf.SetVal("bluefs_sync_write", "true");
761 conf.ApplyChanges();
762
763 BlueFS fs(g_ceph_context);
764 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
765 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
766 uuid_d fsid;
767 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
768 ASSERT_EQ(0, fs.mount());
769 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
770 ASSERT_EQ(0, fs.mkdir("dir"));
771
772 char data[2000];
773 BlueFS::FileWriter *h;
774 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
775 for (size_t i = 0; i < 10000; i++) {
776 h->append(data, 2000);
777 fs.fsync(h);
778 }
779 fs.close_writer(h);
780 fs.umount(true); //do not compact on exit!
781
782 // remount and check log can replay safe?
783 ASSERT_EQ(0, fs.mount());
784 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
785 fs.umount();
786 }
787
788 int main(int argc, char **argv) {
789 vector<const char*> args;
790 argv_to_vec(argc, (const char **)argv, args);
791
792 map<string,string> defaults = {
793 { "debug_bluefs", "1/20" },
794 { "debug_bdev", "1/20" }
795 };
796
797 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
798 CODE_ENVIRONMENT_UTILITY,
799 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
800 common_init_finish(g_ceph_context);
801 g_ceph_context->_conf.set_val(
802 "enable_experimental_unrecoverable_data_corrupting_features",
803 "*");
804 g_ceph_context->_conf.apply_changes(nullptr);
805
806 ::testing::InitGoogleTest(&argc, argv);
807 return RUN_ALL_TESTS();
808 }