]> git.proxmox.com Git - ceph.git/blame - ceph/src/test/objectstore/test_bluefs.cc
import ceph 15.2.14
[ceph.git] / ceph / src / test / objectstore / test_bluefs.cc
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#include <stdio.h>
5#include <string.h>
6#include <iostream>
7#include <time.h>
8#include <fcntl.h>
9#include <unistd.h>
91327a77 10#include <random>
7c673cae 11#include <thread>
f6b5b4d7 12#include <stack>
7c673cae
FG
13#include "global/global_init.h"
14#include "common/ceph_argparse.h"
15#include "include/stringify.h"
11fdf7f2 16#include "include/scope_guard.h"
7c673cae
FG
17#include "common/errno.h"
18#include <gtest/gtest.h>
19
20#include "os/bluestore/BlueFS.h"
21
11fdf7f2 22std::unique_ptr<char[]> gen_buffer(uint64_t size)
7c673cae 23{
11fdf7f2
TL
24 std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
25 std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e;
26 std::generate(buffer.get(), buffer.get()+size, std::ref(e));
7c673cae
FG
27 return buffer;
28}
29
9f95a23c
TL
30class TempBdev {
31public:
32 TempBdev(uint64_t size)
33 : path{get_temp_bdev(size)}
34 {}
35 ~TempBdev() {
36 rm_temp_bdev(path);
37 }
38 const std::string path;
39private:
40 static string get_temp_bdev(uint64_t size)
41 {
42 static int n = 0;
43 string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid())
44 + "." + stringify(++n);
45 int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
46 ceph_assert(fd >= 0);
47 int r = ::ftruncate(fd, size);
48 ceph_assert(r >= 0);
49 ::close(fd);
50 return fn;
51 }
52 static void rm_temp_bdev(string f)
53 {
54 ::unlink(f.c_str());
55 }
56};
7c673cae 57
f6b5b4d7
TL
58class ConfSaver {
59 std::stack<std::pair<std::string, std::string>> saved_settings;
60 ConfigProxy& conf;
61public:
62 ConfSaver(ConfigProxy& conf) : conf(conf) {
63 conf._clear_safe_to_start_threads();
64 };
65 ~ConfSaver() {
66 conf._clear_safe_to_start_threads();
67 while(saved_settings.size() > 0) {
68 auto& e = saved_settings.top();
69 conf.set_val_or_die(e.first, e.second);
70 saved_settings.pop();
71 }
72 conf.set_safe_to_start_threads();
73 conf.apply_changes(nullptr);
74 }
75 void SetVal(const char* key, const char* val) {
76 std::string skey(key);
77 std::string prev_val;
78 conf.get_val(skey, &prev_val);
79 conf.set_val_or_die(skey, val);
80 saved_settings.emplace(skey, prev_val);
81 }
82 void ApplyChanges() {
83 conf.set_safe_to_start_threads();
84 conf.apply_changes(nullptr);
85 }
86};
87
7c673cae
FG
88TEST(BlueFS, mkfs) {
89 uint64_t size = 1048576 * 128;
9f95a23c 90 TempBdev bdev{size};
7c673cae
FG
91 uuid_d fsid;
92 BlueFS fs(g_ceph_context);
9f95a23c 93 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae 94 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
9f95a23c 95 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae
FG
96}
97
98TEST(BlueFS, mkfs_mount) {
99 uint64_t size = 1048576 * 128;
9f95a23c 100 TempBdev bdev{size};
7c673cae 101 BlueFS fs(g_ceph_context);
9f95a23c 102 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
103 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
104 uuid_d fsid;
9f95a23c 105 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 106 ASSERT_EQ(0, fs.mount());
9f95a23c 107 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
108 ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
109 ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
110 fs.umount();
9f95a23c
TL
111}
112
113TEST(BlueFS, mkfs_mount_duplicate_gift) {
114 uint64_t size = 1048576 * 128;
115 TempBdev bdev{ size };
e306af50 116 bluefs_extent_t dup_ext;
9f95a23c
TL
117 {
118 BlueFS fs(g_ceph_context);
119 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
120 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
121 uuid_d fsid;
122 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
123 ASSERT_EQ(0, fs.mount());
124
125 {
126 BlueFS::FileWriter *h;
127 ASSERT_EQ(0, fs.mkdir("dir"));
128 ASSERT_EQ(0, fs.open_for_write("dir", "file1", &h, false));
129 h->append("foo", 3);
130 h->append("bar", 3);
131 h->append("baz", 3);
132 fs.fsync(h);
e306af50
TL
133 ceph_assert(h->file->fnode.extents.size() > 0);
134 dup_ext = h->file->fnode.extents[0];
135 ceph_assert(dup_ext.bdev == BlueFS::BDEV_DB);
9f95a23c
TL
136 fs.close_writer(h);
137 }
138
139 fs.umount();
140 }
141
142 {
143 BlueFS fs(g_ceph_context);
144 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
145 ASSERT_EQ(0, fs.mount());
146 // free allocation presumably allocated for file1
e306af50
TL
147 std::cout << "duplicate extent: " << std::hex
148 << dup_ext.offset << "~" << dup_ext.length
149 << std::dec << std::endl;
150 fs.debug_inject_duplicate_gift(BlueFS::BDEV_DB, dup_ext.offset, dup_ext.length);
9f95a23c
TL
151 {
152 // overwrite file1 with file2
153 BlueFS::FileWriter *h;
154 ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false));
155 h->append("foo", 3);
156 h->append("bar", 3);
157 h->append("baz", 3);
158 fs.fsync(h);
159 fs.close_writer(h);
160 }
161 fs.umount();
162 }
163
164 g_ceph_context->_conf.set_val_or_die("bluefs_log_replay_check_allocations", "true");
165 g_ceph_context->_conf.apply_changes(nullptr);
166
167 {
168 // this should fail
169 BlueFS fs(g_ceph_context);
170 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
171 ASSERT_NE(0, fs.mount());
172 }
7c673cae
FG
173}
174
175TEST(BlueFS, write_read) {
176 uint64_t size = 1048576 * 128;
9f95a23c 177 TempBdev bdev{size};
7c673cae 178 BlueFS fs(g_ceph_context);
9f95a23c 179 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
180 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
181 uuid_d fsid;
9f95a23c 182 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 183 ASSERT_EQ(0, fs.mount());
9f95a23c 184 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
185 {
186 BlueFS::FileWriter *h;
187 ASSERT_EQ(0, fs.mkdir("dir"));
188 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
189 h->append("foo", 3);
190 h->append("bar", 3);
191 h->append("baz", 3);
192 fs.fsync(h);
193 fs.close_writer(h);
194 }
195 {
196 BlueFS::FileReader *h;
197 ASSERT_EQ(0, fs.open_for_read("dir", "file", &h));
198 bufferlist bl;
199 BlueFS::FileReaderBuffer buf(4096);
200 ASSERT_EQ(9, fs.read(h, &buf, 0, 1024, &bl, NULL));
201 ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9));
202 delete h;
203 }
204 fs.umount();
7c673cae
FG
205}
206
207TEST(BlueFS, small_appends) {
208 uint64_t size = 1048576 * 128;
9f95a23c 209 TempBdev bdev{size};
7c673cae 210 BlueFS fs(g_ceph_context);
9f95a23c 211 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
212 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
213 uuid_d fsid;
9f95a23c 214 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 215 ASSERT_EQ(0, fs.mount());
9f95a23c 216 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
217 {
218 BlueFS::FileWriter *h;
219 ASSERT_EQ(0, fs.mkdir("dir"));
220 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
221 for (unsigned i = 0; i < 10000; ++i) {
222 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
223 }
224 fs.fsync(h);
225 fs.close_writer(h);
226 }
227 {
228 BlueFS::FileWriter *h;
229 ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false));
230 for (unsigned i = 0; i < 1000; ++i) {
231 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
31f18b77 232 ASSERT_EQ(0, fs.fsync(h));
7c673cae
FG
233 }
234 fs.close_writer(h);
235 }
236 fs.umount();
7c673cae
FG
237}
238
494da23a 239TEST(BlueFS, very_large_write) {
adb31ebb 240 // we'll write a ~5G file, so allocate more than that for the whole fs
cd265ab1 241 uint64_t size = 1048576 * 1024 * 6ull;
9f95a23c 242 TempBdev bdev{size};
494da23a
TL
243 BlueFS fs(g_ceph_context);
244
245 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
246 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
adb31ebb 247 uint64_t total_written = 0;
494da23a 248
9f95a23c 249 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
494da23a
TL
250 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
251 uuid_d fsid;
9f95a23c 252 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
494da23a 253 ASSERT_EQ(0, fs.mount());
9f95a23c 254 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
494da23a
TL
255 char buf[1048571]; // this is biggish, but intentionally not evenly aligned
256 for (unsigned i = 0; i < sizeof(buf); ++i) {
257 buf[i] = i;
258 }
259 {
260 BlueFS::FileWriter *h;
261 ASSERT_EQ(0, fs.mkdir("dir"));
262 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
263 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
264 h->append(buf, sizeof(buf));
adb31ebb
TL
265 total_written += sizeof(buf);
266 }
267 fs.fsync(h);
268 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
269 h->append(buf, sizeof(buf));
270 total_written += sizeof(buf);
494da23a
TL
271 }
272 fs.fsync(h);
273 fs.close_writer(h);
274 }
275 {
276 BlueFS::FileReader *h;
277 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
278 bufferlist bl;
279 BlueFS::FileReaderBuffer readbuf(10485760);
adb31ebb 280 ASSERT_EQ(h->file->fnode.size, total_written);
494da23a
TL
281 for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) {
282 bl.clear();
283 fs.read(h, &readbuf, i * sizeof(buf), sizeof(buf), &bl, NULL);
284 int r = memcmp(buf, bl.c_str(), sizeof(buf));
285 if (r) {
286 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
287 << std::endl;
288 }
289 ASSERT_EQ(0, r);
290 }
adb31ebb
TL
291 for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) {
292 bl.clear();
293 fs.read(h, &readbuf, i * sizeof(buf), sizeof(buf), &bl, NULL);
294 int r = memcmp(buf, bl.c_str(), sizeof(buf));
295 if (r) {
296 cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r
297 << std::endl;
298 }
299 ASSERT_EQ(0, r);
300 }
301 delete h;
302 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
303 ASSERT_EQ(h->file->fnode.size, total_written);
304 unique_ptr<char> huge_buf(new char[h->file->fnode.size]);
305 auto l = h->file->fnode.size;
306 int64_t r = fs.read(h, &readbuf, 0, l, NULL, huge_buf.get());
cd265ab1
TL
307 ASSERT_EQ(r, (int64_t)l);
308 delete h;
309 }
310 fs.umount();
311
312 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
313}
314
315TEST(BlueFS, very_large_write2) {
316 // we'll write a ~5G file, so allocate more than that for the whole fs
317 uint64_t size_full = 1048576 * 1024 * 6ull;
318 uint64_t size = 1048576 * 1024 * 5ull;
319 TempBdev bdev{ size_full };
320 BlueFS fs(g_ceph_context);
321
322 bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io");
323 g_ceph_context->_conf.set_val("bluefs_buffered_io", "false");
324 uint64_t total_written = 0;
325
326 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576));
327 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size_full - 1048576);
328 uuid_d fsid;
329 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
330 ASSERT_EQ(0, fs.mount());
331 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
332
333 char fill_arr[1 << 20]; // 1M
334 for (size_t i = 0; i < sizeof(fill_arr); ++i) {
335 fill_arr[i] = (char)i;
336 }
337 std::unique_ptr<char[]> buf;
338 buf.reset(new char[size]);
339 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
340 memcpy(buf.get() + i, fill_arr, sizeof(fill_arr));
341 }
342 {
343 BlueFS::FileWriter* h;
344 ASSERT_EQ(0, fs.mkdir("dir"));
345 ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false));
346 fs.append_try_flush(h, buf.get(), size);
347 total_written = size;
348 fs.fsync(h);
349 fs.close_writer(h);
350 }
351 memset(buf.get(), 0, size);
352 {
353 BlueFS::FileReader* h;
354 ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h));
355 ASSERT_EQ(h->file->fnode.size, total_written);
356 auto l = h->file->fnode.size;
357 BlueFS::FileReaderBuffer readbuf(10485760);
358 int64_t r = fs.read(h, &readbuf, 0, l, NULL, buf.get());
359 ASSERT_EQ(r, (int64_t)l);
360 for (size_t i = 0; i < size; i += sizeof(fill_arr)) {
361 ceph_assert(memcmp(buf.get() + i, fill_arr, sizeof(fill_arr)) == 0);
362 }
494da23a
TL
363 delete h;
364 }
365 fs.umount();
366
367 g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old));
494da23a
TL
368}
369
7c673cae
FG
370#define ALLOC_SIZE 4096
371
372void write_data(BlueFS &fs, uint64_t rationed_bytes)
373{
7c673cae
FG
374 int j=0, r=0;
375 uint64_t written_bytes = 0;
376 rationed_bytes -= ALLOC_SIZE;
377 stringstream ss;
378 string dir = "dir.";
379 ss << std::this_thread::get_id();
380 dir.append(ss.str());
381 dir.append(".");
382 dir.append(to_string(j));
383 ASSERT_EQ(0, fs.mkdir(dir));
384 while (1) {
385 string file = "file.";
386 file.append(to_string(j));
11fdf7f2 387 BlueFS::FileWriter *h;
7c673cae 388 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
389 ASSERT_NE(nullptr, h);
390 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 391 bufferlist bl;
11fdf7f2
TL
392 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
393 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
394 bl.push_back(bp);
395 h->append(bl.c_str(), bl.length());
396 r = fs.fsync(h);
397 if (r < 0) {
7c673cae
FG
398 break;
399 }
11fdf7f2 400 written_bytes += g_conf()->bluefs_alloc_size;
7c673cae 401 j++;
11fdf7f2 402 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
403 break;
404 }
405 }
406}
407
408void create_single_file(BlueFS &fs)
409{
410 BlueFS::FileWriter *h;
411 stringstream ss;
412 string dir = "dir.test";
413 ASSERT_EQ(0, fs.mkdir(dir));
414 string file = "testfile";
415 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
416 bufferlist bl;
11fdf7f2
TL
417 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
418 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
419 bl.push_back(bp);
420 h->append(bl.c_str(), bl.length());
421 fs.fsync(h);
422 fs.close_writer(h);
423}
424
425void write_single_file(BlueFS &fs, uint64_t rationed_bytes)
426{
7c673cae 427 stringstream ss;
11fdf7f2
TL
428 const string dir = "dir.test";
429 const string file = "testfile";
7c673cae
FG
430 uint64_t written_bytes = 0;
431 rationed_bytes -= ALLOC_SIZE;
432 while (1) {
11fdf7f2 433 BlueFS::FileWriter *h;
7c673cae 434 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
435 ASSERT_NE(nullptr, h);
436 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 437 bufferlist bl;
11fdf7f2
TL
438 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
439 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
7c673cae
FG
440 bl.push_back(bp);
441 h->append(bl.c_str(), bl.length());
11fdf7f2 442 int r = fs.fsync(h);
7c673cae 443 if (r < 0) {
7c673cae
FG
444 break;
445 }
11fdf7f2
TL
446 written_bytes += g_conf()->bluefs_alloc_size;
447 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
7c673cae
FG
448 break;
449 }
450 }
451}
452
453bool writes_done = false;
454
455void sync_fs(BlueFS &fs)
456{
457 while (1) {
458 if (writes_done == true)
459 break;
1911f103 460 fs.sync_metadata(false);
7c673cae
FG
461 sleep(1);
462 }
463}
464
465
466void do_join(std::thread& t)
467{
468 t.join();
469}
470
471void join_all(std::vector<std::thread>& v)
472{
473 std::for_each(v.begin(),v.end(),do_join);
474}
475
476#define NUM_WRITERS 3
477#define NUM_SYNC_THREADS 1
478
479#define NUM_SINGLE_FILE_WRITERS 1
480#define NUM_MULTIPLE_FILE_WRITERS 2
481
482TEST(BlueFS, test_flush_1) {
483 uint64_t size = 1048576 * 128;
9f95a23c 484 TempBdev bdev{size};
11fdf7f2 485 g_ceph_context->_conf.set_val(
7c673cae
FG
486 "bluefs_alloc_size",
487 "65536");
11fdf7f2 488 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
489
490 BlueFS fs(g_ceph_context);
9f95a23c 491 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
492 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
493 uuid_d fsid;
9f95a23c 494 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 495 ASSERT_EQ(0, fs.mount());
9f95a23c 496 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
497 {
498 std::vector<std::thread> write_thread_multiple;
499 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
500 uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS));
501 for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) {
502 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
503 }
504
505 create_single_file(fs);
506 std::vector<std::thread> write_thread_single;
507 for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) {
508 write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes));
509 }
510
511 join_all(write_thread_single);
512 join_all(write_thread_multiple);
513 }
514 fs.umount();
7c673cae
FG
515}
516
517TEST(BlueFS, test_flush_2) {
518 uint64_t size = 1048576 * 256;
9f95a23c 519 TempBdev bdev{size};
11fdf7f2 520 g_ceph_context->_conf.set_val(
7c673cae
FG
521 "bluefs_alloc_size",
522 "65536");
11fdf7f2 523 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
524
525 BlueFS fs(g_ceph_context);
9f95a23c 526 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
527 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
528 uuid_d fsid;
9f95a23c 529 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 530 ASSERT_EQ(0, fs.mount());
9f95a23c 531 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
532 {
533 uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction
534 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
535 std::vector<std::thread> write_thread_multiple;
536 for (int i=0; i<NUM_WRITERS; i++) {
537 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
538 }
539
540 join_all(write_thread_multiple);
541 }
542 fs.umount();
7c673cae
FG
543}
544
545TEST(BlueFS, test_flush_3) {
546 uint64_t size = 1048576 * 256;
9f95a23c 547 TempBdev bdev{size};
11fdf7f2 548 g_ceph_context->_conf.set_val(
7c673cae
FG
549 "bluefs_alloc_size",
550 "65536");
11fdf7f2 551 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
552
553 BlueFS fs(g_ceph_context);
9f95a23c 554 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
555 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
556 uuid_d fsid;
9f95a23c 557 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 558 ASSERT_EQ(0, fs.mount());
9f95a23c 559 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
560 {
561 std::vector<std::thread> write_threads;
562 uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction
563 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
564 for (int i=0; i<NUM_WRITERS; i++) {
565 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
566 }
567
568 std::vector<std::thread> sync_threads;
569 for (int i=0; i<NUM_SYNC_THREADS; i++) {
570 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
571 }
572
573 join_all(write_threads);
574 writes_done = true;
575 join_all(sync_threads);
576 }
577 fs.umount();
7c673cae
FG
578}
579
580TEST(BlueFS, test_simple_compaction_sync) {
11fdf7f2 581 g_ceph_context->_conf.set_val(
7c673cae
FG
582 "bluefs_compact_log_sync",
583 "true");
584 uint64_t size = 1048576 * 128;
9f95a23c 585 TempBdev bdev{size};
7c673cae
FG
586
587 BlueFS fs(g_ceph_context);
9f95a23c 588 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
589 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
590 uuid_d fsid;
9f95a23c 591 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 592 ASSERT_EQ(0, fs.mount());
9f95a23c 593 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 594 {
7c673cae
FG
595 for (int i=0; i<10; i++) {
596 string dir = "dir.";
597 dir.append(to_string(i));
598 ASSERT_EQ(0, fs.mkdir(dir));
599 for (int j=0; j<10; j++) {
600 string file = "file.";
601 file.append(to_string(j));
11fdf7f2 602 BlueFS::FileWriter *h;
7c673cae 603 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
604 ASSERT_NE(nullptr, h);
605 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 606 bufferlist bl;
11fdf7f2
TL
607 std::unique_ptr<char[]> buf = gen_buffer(4096);
608 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
609 bl.push_back(bp);
610 h->append(bl.c_str(), bl.length());
611 fs.fsync(h);
7c673cae
FG
612 }
613 }
614 }
7c673cae
FG
615 {
616 for (int i=0; i<10; i+=2) {
617 string dir = "dir.";
618 dir.append(to_string(i));
11fdf7f2 619 for (int j=0; j<10; j++) {
7c673cae
FG
620 string file = "file.";
621 file.append(to_string(j));
622 fs.unlink(dir, file);
1911f103 623 fs.sync_metadata(false);
7c673cae 624 }
11fdf7f2 625 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 626 fs.sync_metadata(false);
7c673cae
FG
627 }
628 }
629 fs.compact_log();
630 fs.umount();
7c673cae
FG
631}
632
633TEST(BlueFS, test_simple_compaction_async) {
11fdf7f2 634 g_ceph_context->_conf.set_val(
7c673cae
FG
635 "bluefs_compact_log_sync",
636 "false");
637 uint64_t size = 1048576 * 128;
9f95a23c 638 TempBdev bdev{size};
7c673cae
FG
639
640 BlueFS fs(g_ceph_context);
9f95a23c 641 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
642 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
643 uuid_d fsid;
9f95a23c 644 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 645 ASSERT_EQ(0, fs.mount());
9f95a23c 646 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 647 {
7c673cae
FG
648 for (int i=0; i<10; i++) {
649 string dir = "dir.";
650 dir.append(to_string(i));
651 ASSERT_EQ(0, fs.mkdir(dir));
652 for (int j=0; j<10; j++) {
653 string file = "file.";
654 file.append(to_string(j));
11fdf7f2 655 BlueFS::FileWriter *h;
7c673cae 656 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
11fdf7f2
TL
657 ASSERT_NE(nullptr, h);
658 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
7c673cae 659 bufferlist bl;
11fdf7f2
TL
660 std::unique_ptr<char[]> buf = gen_buffer(4096);
661 bufferptr bp = buffer::claim_char(4096, buf.get());
7c673cae
FG
662 bl.push_back(bp);
663 h->append(bl.c_str(), bl.length());
664 fs.fsync(h);
7c673cae
FG
665 }
666 }
667 }
7c673cae
FG
668 {
669 for (int i=0; i<10; i+=2) {
670 string dir = "dir.";
671 dir.append(to_string(i));
11fdf7f2 672 for (int j=0; j<10; j++) {
7c673cae
FG
673 string file = "file.";
674 file.append(to_string(j));
675 fs.unlink(dir, file);
1911f103 676 fs.sync_metadata(false);
7c673cae 677 }
11fdf7f2 678 ASSERT_EQ(0, fs.rmdir(dir));
1911f103 679 fs.sync_metadata(false);
7c673cae
FG
680 }
681 }
682 fs.compact_log();
683 fs.umount();
7c673cae
FG
684}
685
686TEST(BlueFS, test_compaction_sync) {
687 uint64_t size = 1048576 * 128;
9f95a23c 688 TempBdev bdev{size};
11fdf7f2 689 g_ceph_context->_conf.set_val(
7c673cae
FG
690 "bluefs_alloc_size",
691 "65536");
11fdf7f2 692 g_ceph_context->_conf.set_val(
7c673cae
FG
693 "bluefs_compact_log_sync",
694 "true");
695
696 BlueFS fs(g_ceph_context);
9f95a23c 697 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
698 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
699 uuid_d fsid;
9f95a23c 700 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 701 ASSERT_EQ(0, fs.mount());
9f95a23c 702 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
703 {
704 std::vector<std::thread> write_threads;
705 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
706 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
707 for (int i=0; i<NUM_WRITERS; i++) {
708 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
709 }
710
711 std::vector<std::thread> sync_threads;
712 for (int i=0; i<NUM_SYNC_THREADS; i++) {
713 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
714 }
715
716 join_all(write_threads);
717 writes_done = true;
718 join_all(sync_threads);
719 fs.compact_log();
720 }
721 fs.umount();
7c673cae
FG
722}
723
724TEST(BlueFS, test_compaction_async) {
725 uint64_t size = 1048576 * 128;
9f95a23c 726 TempBdev bdev{size};
11fdf7f2 727 g_ceph_context->_conf.set_val(
7c673cae
FG
728 "bluefs_alloc_size",
729 "65536");
11fdf7f2 730 g_ceph_context->_conf.set_val(
7c673cae
FG
731 "bluefs_compact_log_sync",
732 "false");
733
734 BlueFS fs(g_ceph_context);
9f95a23c 735 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
736 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
737 uuid_d fsid;
9f95a23c 738 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 739 ASSERT_EQ(0, fs.mount());
9f95a23c 740 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
741 {
742 std::vector<std::thread> write_threads;
743 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
744 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
745 for (int i=0; i<NUM_WRITERS; i++) {
746 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
747 }
748
749 std::vector<std::thread> sync_threads;
750 for (int i=0; i<NUM_SYNC_THREADS; i++) {
751 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
752 }
753
754 join_all(write_threads);
755 writes_done = true;
756 join_all(sync_threads);
757 fs.compact_log();
758 }
759 fs.umount();
7c673cae
FG
760}
761
762TEST(BlueFS, test_replay) {
763 uint64_t size = 1048576 * 128;
9f95a23c 764 TempBdev bdev{size};
11fdf7f2 765 g_ceph_context->_conf.set_val(
7c673cae
FG
766 "bluefs_alloc_size",
767 "65536");
11fdf7f2 768 g_ceph_context->_conf.set_val(
7c673cae
FG
769 "bluefs_compact_log_sync",
770 "false");
771
772 BlueFS fs(g_ceph_context);
9f95a23c 773 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
7c673cae
FG
774 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
775 uuid_d fsid;
9f95a23c 776 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
7c673cae 777 ASSERT_EQ(0, fs.mount());
9f95a23c 778 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae
FG
779 {
780 std::vector<std::thread> write_threads;
781 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
782 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
783 for (int i=0; i<NUM_WRITERS; i++) {
784 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
785 }
786
787 std::vector<std::thread> sync_threads;
788 for (int i=0; i<NUM_SYNC_THREADS; i++) {
789 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
790 }
791
792 join_all(write_threads);
793 writes_done = true;
794 join_all(sync_threads);
795 fs.compact_log();
796 }
797 fs.umount();
798 // remount and check log can replay safe?
31f18b77 799 ASSERT_EQ(0, fs.mount());
9f95a23c 800 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
7c673cae 801 fs.umount();
7c673cae
FG
802}
803
f6b5b4d7
TL
804TEST(BlueFS, test_replay_growth) {
805 uint64_t size = 1048576LL * (2 * 1024 + 128);
806 TempBdev bdev{size};
807
808 ConfSaver conf(g_ceph_context->_conf);
809 conf.SetVal("bluefs_alloc_size", "4096");
810 conf.SetVal("bluefs_shared_alloc_size", "4096");
811 conf.SetVal("bluefs_compact_log_sync", "false");
812 conf.SetVal("bluefs_min_log_runway", "32768");
813 conf.SetVal("bluefs_max_log_runway", "65536");
814 conf.SetVal("bluefs_allocator", "stupid");
815 conf.SetVal("bluefs_sync_write", "true");
816 conf.ApplyChanges();
817
818 BlueFS fs(g_ceph_context);
819 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false));
820 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
821 uuid_d fsid;
822 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false }));
823 ASSERT_EQ(0, fs.mount());
824 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
825 ASSERT_EQ(0, fs.mkdir("dir"));
826
827 char data[2000];
828 BlueFS::FileWriter *h;
829 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
830 for (size_t i = 0; i < 10000; i++) {
831 h->append(data, 2000);
832 fs.fsync(h);
833 }
834 fs.close_writer(h);
835 fs.umount(true); //do not compact on exit!
836
837 // remount and check log can replay safe?
838 ASSERT_EQ(0, fs.mount());
839 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false }));
840 fs.umount();
841}
842
ec96510d
FG
843TEST(BlueFS, test_tracker_50965) {
844 uint64_t size_wal = 1048576 * 64;
845 TempBdev bdev_wal{size_wal};
846 uint64_t size_db = 1048576 * 128;
847 TempBdev bdev_db{size_db};
848 uint64_t size_slow = 1048576 * 256;
849 TempBdev bdev_slow{size_slow};
850
851 ConfSaver conf(g_ceph_context->_conf);
852 conf.SetVal("bluefs_min_flush_size", "65536");
853 conf.ApplyChanges();
854
855 BlueFS fs(g_ceph_context);
856 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0));
857 fs.add_block_extent(BlueFS::BDEV_WAL, 1048576, size_wal - 1048576);
858 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0));
859 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size_db - 1048576);
860 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0));
861 fs.add_block_extent(BlueFS::BDEV_SLOW, 1048576, size_slow - 1048576);
862 uuid_d fsid;
863 ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true }));
864 ASSERT_EQ(0, fs.mount());
865 ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true }));
866
867 string dir_slow = "dir.slow";
868 ASSERT_EQ(0, fs.mkdir(dir_slow));
869 string dir_db = "dir_db";
870 ASSERT_EQ(0, fs.mkdir(dir_db));
871
872 string file_slow = "file";
873 BlueFS::FileWriter *h_slow;
874 ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false));
875 ASSERT_NE(nullptr, h_slow);
876
877 string file_db = "file";
878 BlueFS::FileWriter *h_db;
879 ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false));
880 ASSERT_NE(nullptr, h_db);
881
882 bufferlist bl1;
883 std::unique_ptr<char[]> buf1 = gen_buffer(70000);
884 bufferptr bp1 = buffer::claim_char(70000, buf1.get());
885 bl1.push_back(bp1);
886 h_slow->append(bl1.c_str(), bl1.length());
887 fs.flush(h_slow);
888
889 uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow);
890
891 bufferlist bl2;
892 std::unique_ptr<char[]> buf2 = gen_buffer(1000);
893 bufferptr bp2 = buffer::claim_char(1000, buf2.get());
894 bl2.push_back(bp2);
895 h_db->append(bl2.c_str(), bl2.length());
896 fs.fsync(h_db);
897
898 uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow);
899 bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW);
900
901 //problem if allocations are stable in log but slow device is not flushed yet
902 ASSERT_FALSE(h_slow_dirty_seq_1 != 0 &&
903 h_slow_dirty_seq_2 == 0 &&
904 h_slow_dev_dirty == true);
905
906 fs.close_writer(h_slow);
907 fs.close_writer(h_db);
908
909 fs.umount();
910}
911
7c673cae
FG
912int main(int argc, char **argv) {
913 vector<const char*> args;
914 argv_to_vec(argc, (const char **)argv, args);
7c673cae 915
11fdf7f2
TL
916 map<string,string> defaults = {
917 { "debug_bluefs", "1/20" },
918 { "debug_bdev", "1/20" }
919 };
7c673cae 920
11fdf7f2 921 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
7c673cae 922 CODE_ENVIRONMENT_UTILITY,
11fdf7f2 923 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
7c673cae 924 common_init_finish(g_ceph_context);
11fdf7f2 925 g_ceph_context->_conf.set_val(
7c673cae
FG
926 "enable_experimental_unrecoverable_data_corrupting_features",
927 "*");
11fdf7f2 928 g_ceph_context->_conf.apply_changes(nullptr);
7c673cae
FG
929
930 ::testing::InitGoogleTest(&argc, argv);
931 return RUN_ALL_TESTS();
932}