]> git.proxmox.com Git - ceph.git/blob - ceph/src/test/objectstore/test_bluefs.cc
build: use dgit for download target
[ceph.git] / ceph / src / test / objectstore / test_bluefs.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <stdio.h>
5 #include <string.h>
6 #include <iostream>
7 #include <time.h>
8 #include <fcntl.h>
9 #include <unistd.h>
10 #include <random>
11 #include <thread>
12 #include "global/global_init.h"
13 #include "common/ceph_argparse.h"
14 #include "include/stringify.h"
15 #include "include/scope_guard.h"
16 #include "common/errno.h"
17 #include <gtest/gtest.h>
18
19 #include "os/bluestore/BlueFS.h"
20
21 string get_temp_bdev(uint64_t size)
22 {
23 static int n = 0;
24 string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid())
25 + "." + stringify(++n);
26 int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644);
27 ceph_assert(fd >= 0);
28 int r = ::ftruncate(fd, size);
29 ceph_assert(r >= 0);
30 ::close(fd);
31 return fn;
32 }
33
34 std::unique_ptr<char[]> gen_buffer(uint64_t size)
35 {
36 std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size);
37 std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e;
38 std::generate(buffer.get(), buffer.get()+size, std::ref(e));
39 return buffer;
40 }
41
42
43 void rm_temp_bdev(string f)
44 {
45 ::unlink(f.c_str());
46 }
47
48 TEST(BlueFS, mkfs) {
49 uint64_t size = 1048576 * 128;
50 string fn = get_temp_bdev(size);
51 uuid_d fsid;
52 BlueFS fs(g_ceph_context);
53 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
54 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
55 ASSERT_EQ(0, fs.mkfs(fsid));
56 rm_temp_bdev(fn);
57 }
58
59 TEST(BlueFS, mkfs_mount) {
60 uint64_t size = 1048576 * 128;
61 string fn = get_temp_bdev(size);
62 BlueFS fs(g_ceph_context);
63 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
64 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
65 uuid_d fsid;
66 ASSERT_EQ(0, fs.mkfs(fsid));
67 ASSERT_EQ(0, fs.mount());
68 ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576);
69 ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576);
70 fs.umount();
71 rm_temp_bdev(fn);
72 }
73
74 TEST(BlueFS, write_read) {
75 uint64_t size = 1048576 * 128;
76 string fn = get_temp_bdev(size);
77 BlueFS fs(g_ceph_context);
78 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
79 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
80 uuid_d fsid;
81 ASSERT_EQ(0, fs.mkfs(fsid));
82 ASSERT_EQ(0, fs.mount());
83 {
84 BlueFS::FileWriter *h;
85 ASSERT_EQ(0, fs.mkdir("dir"));
86 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
87 h->append("foo", 3);
88 h->append("bar", 3);
89 h->append("baz", 3);
90 fs.fsync(h);
91 fs.close_writer(h);
92 }
93 {
94 BlueFS::FileReader *h;
95 ASSERT_EQ(0, fs.open_for_read("dir", "file", &h));
96 bufferlist bl;
97 BlueFS::FileReaderBuffer buf(4096);
98 ASSERT_EQ(9, fs.read(h, &buf, 0, 1024, &bl, NULL));
99 ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9));
100 delete h;
101 }
102 fs.umount();
103 rm_temp_bdev(fn);
104 }
105
106 TEST(BlueFS, small_appends) {
107 uint64_t size = 1048576 * 128;
108 string fn = get_temp_bdev(size);
109 BlueFS fs(g_ceph_context);
110 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
111 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
112 uuid_d fsid;
113 ASSERT_EQ(0, fs.mkfs(fsid));
114 ASSERT_EQ(0, fs.mount());
115 {
116 BlueFS::FileWriter *h;
117 ASSERT_EQ(0, fs.mkdir("dir"));
118 ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false));
119 for (unsigned i = 0; i < 10000; ++i) {
120 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
121 }
122 fs.fsync(h);
123 fs.close_writer(h);
124 }
125 {
126 BlueFS::FileWriter *h;
127 ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false));
128 for (unsigned i = 0; i < 1000; ++i) {
129 h->append("abcdeabcdeabcdeabcdeabcdeabc", 23);
130 ASSERT_EQ(0, fs.fsync(h));
131 }
132 fs.close_writer(h);
133 }
134 fs.umount();
135 rm_temp_bdev(fn);
136 }
137
138 #define ALLOC_SIZE 4096
139
140 void write_data(BlueFS &fs, uint64_t rationed_bytes)
141 {
142 int j=0, r=0;
143 uint64_t written_bytes = 0;
144 rationed_bytes -= ALLOC_SIZE;
145 stringstream ss;
146 string dir = "dir.";
147 ss << std::this_thread::get_id();
148 dir.append(ss.str());
149 dir.append(".");
150 dir.append(to_string(j));
151 ASSERT_EQ(0, fs.mkdir(dir));
152 while (1) {
153 string file = "file.";
154 file.append(to_string(j));
155 BlueFS::FileWriter *h;
156 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
157 ASSERT_NE(nullptr, h);
158 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
159 bufferlist bl;
160 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
161 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
162 bl.push_back(bp);
163 h->append(bl.c_str(), bl.length());
164 r = fs.fsync(h);
165 if (r < 0) {
166 break;
167 }
168 written_bytes += g_conf()->bluefs_alloc_size;
169 j++;
170 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
171 break;
172 }
173 }
174 }
175
176 void create_single_file(BlueFS &fs)
177 {
178 BlueFS::FileWriter *h;
179 stringstream ss;
180 string dir = "dir.test";
181 ASSERT_EQ(0, fs.mkdir(dir));
182 string file = "testfile";
183 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
184 bufferlist bl;
185 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
186 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
187 bl.push_back(bp);
188 h->append(bl.c_str(), bl.length());
189 fs.fsync(h);
190 fs.close_writer(h);
191 }
192
193 void write_single_file(BlueFS &fs, uint64_t rationed_bytes)
194 {
195 stringstream ss;
196 const string dir = "dir.test";
197 const string file = "testfile";
198 uint64_t written_bytes = 0;
199 rationed_bytes -= ALLOC_SIZE;
200 while (1) {
201 BlueFS::FileWriter *h;
202 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
203 ASSERT_NE(nullptr, h);
204 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
205 bufferlist bl;
206 std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE);
207 bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get());
208 bl.push_back(bp);
209 h->append(bl.c_str(), bl.length());
210 int r = fs.fsync(h);
211 if (r < 0) {
212 break;
213 }
214 written_bytes += g_conf()->bluefs_alloc_size;
215 if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) {
216 break;
217 }
218 }
219 }
220
221 bool writes_done = false;
222
223 void sync_fs(BlueFS &fs)
224 {
225 while (1) {
226 if (writes_done == true)
227 break;
228 fs.sync_metadata();
229 sleep(1);
230 }
231 }
232
233
234 void do_join(std::thread& t)
235 {
236 t.join();
237 }
238
239 void join_all(std::vector<std::thread>& v)
240 {
241 std::for_each(v.begin(),v.end(),do_join);
242 }
243
244 #define NUM_WRITERS 3
245 #define NUM_SYNC_THREADS 1
246
247 #define NUM_SINGLE_FILE_WRITERS 1
248 #define NUM_MULTIPLE_FILE_WRITERS 2
249
250 TEST(BlueFS, test_flush_1) {
251 uint64_t size = 1048576 * 128;
252 string fn = get_temp_bdev(size);
253 g_ceph_context->_conf.set_val(
254 "bluefs_alloc_size",
255 "65536");
256 g_ceph_context->_conf.apply_changes(nullptr);
257
258 BlueFS fs(g_ceph_context);
259 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
260 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
261 uuid_d fsid;
262 ASSERT_EQ(0, fs.mkfs(fsid));
263 ASSERT_EQ(0, fs.mount());
264 {
265 std::vector<std::thread> write_thread_multiple;
266 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
267 uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS));
268 for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) {
269 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
270 }
271
272 create_single_file(fs);
273 std::vector<std::thread> write_thread_single;
274 for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) {
275 write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes));
276 }
277
278 join_all(write_thread_single);
279 join_all(write_thread_multiple);
280 }
281 fs.umount();
282 rm_temp_bdev(fn);
283 }
284
285 TEST(BlueFS, test_flush_2) {
286 uint64_t size = 1048576 * 256;
287 string fn = get_temp_bdev(size);
288 g_ceph_context->_conf.set_val(
289 "bluefs_alloc_size",
290 "65536");
291 g_ceph_context->_conf.apply_changes(nullptr);
292
293 BlueFS fs(g_ceph_context);
294 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
295 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
296 uuid_d fsid;
297 ASSERT_EQ(0, fs.mkfs(fsid));
298 ASSERT_EQ(0, fs.mount());
299 {
300 uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction
301 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
302 std::vector<std::thread> write_thread_multiple;
303 for (int i=0; i<NUM_WRITERS; i++) {
304 write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
305 }
306
307 join_all(write_thread_multiple);
308 }
309 fs.umount();
310 rm_temp_bdev(fn);
311 }
312
313 TEST(BlueFS, test_flush_3) {
314 uint64_t size = 1048576 * 256;
315 string fn = get_temp_bdev(size);
316 g_ceph_context->_conf.set_val(
317 "bluefs_alloc_size",
318 "65536");
319 g_ceph_context->_conf.apply_changes(nullptr);
320
321 BlueFS fs(g_ceph_context);
322 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
323 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
324 uuid_d fsid;
325 ASSERT_EQ(0, fs.mkfs(fsid));
326 ASSERT_EQ(0, fs.mount());
327 {
328 std::vector<std::thread> write_threads;
329 uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction
330 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
331 for (int i=0; i<NUM_WRITERS; i++) {
332 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
333 }
334
335 std::vector<std::thread> sync_threads;
336 for (int i=0; i<NUM_SYNC_THREADS; i++) {
337 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
338 }
339
340 join_all(write_threads);
341 writes_done = true;
342 join_all(sync_threads);
343 }
344 fs.umount();
345 rm_temp_bdev(fn);
346 }
347
348 TEST(BlueFS, test_simple_compaction_sync) {
349 g_ceph_context->_conf.set_val(
350 "bluefs_compact_log_sync",
351 "true");
352 uint64_t size = 1048576 * 128;
353 string fn = get_temp_bdev(size);
354
355 BlueFS fs(g_ceph_context);
356 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
357 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
358 uuid_d fsid;
359 ASSERT_EQ(0, fs.mkfs(fsid));
360 ASSERT_EQ(0, fs.mount());
361 {
362 for (int i=0; i<10; i++) {
363 string dir = "dir.";
364 dir.append(to_string(i));
365 ASSERT_EQ(0, fs.mkdir(dir));
366 for (int j=0; j<10; j++) {
367 string file = "file.";
368 file.append(to_string(j));
369 BlueFS::FileWriter *h;
370 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
371 ASSERT_NE(nullptr, h);
372 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
373 bufferlist bl;
374 std::unique_ptr<char[]> buf = gen_buffer(4096);
375 bufferptr bp = buffer::claim_char(4096, buf.get());
376 bl.push_back(bp);
377 h->append(bl.c_str(), bl.length());
378 fs.fsync(h);
379 }
380 }
381 }
382 {
383 for (int i=0; i<10; i+=2) {
384 string dir = "dir.";
385 dir.append(to_string(i));
386 for (int j=0; j<10; j++) {
387 string file = "file.";
388 file.append(to_string(j));
389 fs.unlink(dir, file);
390 fs.flush_log();
391 }
392 ASSERT_EQ(0, fs.rmdir(dir));
393 fs.flush_log();
394 }
395 }
396 fs.compact_log();
397 fs.umount();
398 rm_temp_bdev(fn);
399 }
400
401 TEST(BlueFS, test_simple_compaction_async) {
402 g_ceph_context->_conf.set_val(
403 "bluefs_compact_log_sync",
404 "false");
405 uint64_t size = 1048576 * 128;
406 string fn = get_temp_bdev(size);
407
408 BlueFS fs(g_ceph_context);
409 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
410 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
411 uuid_d fsid;
412 ASSERT_EQ(0, fs.mkfs(fsid));
413 ASSERT_EQ(0, fs.mount());
414 {
415 for (int i=0; i<10; i++) {
416 string dir = "dir.";
417 dir.append(to_string(i));
418 ASSERT_EQ(0, fs.mkdir(dir));
419 for (int j=0; j<10; j++) {
420 string file = "file.";
421 file.append(to_string(j));
422 BlueFS::FileWriter *h;
423 ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false));
424 ASSERT_NE(nullptr, h);
425 auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); });
426 bufferlist bl;
427 std::unique_ptr<char[]> buf = gen_buffer(4096);
428 bufferptr bp = buffer::claim_char(4096, buf.get());
429 bl.push_back(bp);
430 h->append(bl.c_str(), bl.length());
431 fs.fsync(h);
432 }
433 }
434 }
435 {
436 for (int i=0; i<10; i+=2) {
437 string dir = "dir.";
438 dir.append(to_string(i));
439 for (int j=0; j<10; j++) {
440 string file = "file.";
441 file.append(to_string(j));
442 fs.unlink(dir, file);
443 fs.flush_log();
444 }
445 ASSERT_EQ(0, fs.rmdir(dir));
446 fs.flush_log();
447 }
448 }
449 fs.compact_log();
450 fs.umount();
451 rm_temp_bdev(fn);
452 }
453
454 TEST(BlueFS, test_compaction_sync) {
455 uint64_t size = 1048576 * 128;
456 string fn = get_temp_bdev(size);
457 g_ceph_context->_conf.set_val(
458 "bluefs_alloc_size",
459 "65536");
460 g_ceph_context->_conf.set_val(
461 "bluefs_compact_log_sync",
462 "true");
463
464 BlueFS fs(g_ceph_context);
465 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
466 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
467 uuid_d fsid;
468 ASSERT_EQ(0, fs.mkfs(fsid));
469 ASSERT_EQ(0, fs.mount());
470 {
471 std::vector<std::thread> write_threads;
472 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
473 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
474 for (int i=0; i<NUM_WRITERS; i++) {
475 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
476 }
477
478 std::vector<std::thread> sync_threads;
479 for (int i=0; i<NUM_SYNC_THREADS; i++) {
480 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
481 }
482
483 join_all(write_threads);
484 writes_done = true;
485 join_all(sync_threads);
486 fs.compact_log();
487 }
488 fs.umount();
489 rm_temp_bdev(fn);
490 }
491
492 TEST(BlueFS, test_compaction_async) {
493 uint64_t size = 1048576 * 128;
494 string fn = get_temp_bdev(size);
495 g_ceph_context->_conf.set_val(
496 "bluefs_alloc_size",
497 "65536");
498 g_ceph_context->_conf.set_val(
499 "bluefs_compact_log_sync",
500 "false");
501
502 BlueFS fs(g_ceph_context);
503 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
504 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
505 uuid_d fsid;
506 ASSERT_EQ(0, fs.mkfs(fsid));
507 ASSERT_EQ(0, fs.mount());
508 {
509 std::vector<std::thread> write_threads;
510 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
511 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
512 for (int i=0; i<NUM_WRITERS; i++) {
513 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
514 }
515
516 std::vector<std::thread> sync_threads;
517 for (int i=0; i<NUM_SYNC_THREADS; i++) {
518 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
519 }
520
521 join_all(write_threads);
522 writes_done = true;
523 join_all(sync_threads);
524 fs.compact_log();
525 }
526 fs.umount();
527 rm_temp_bdev(fn);
528 }
529
530 TEST(BlueFS, test_replay) {
531 uint64_t size = 1048576 * 128;
532 string fn = get_temp_bdev(size);
533 g_ceph_context->_conf.set_val(
534 "bluefs_alloc_size",
535 "65536");
536 g_ceph_context->_conf.set_val(
537 "bluefs_compact_log_sync",
538 "false");
539
540 BlueFS fs(g_ceph_context);
541 ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, fn, false));
542 fs.add_block_extent(BlueFS::BDEV_DB, 1048576, size - 1048576);
543 uuid_d fsid;
544 ASSERT_EQ(0, fs.mkfs(fsid));
545 ASSERT_EQ(0, fs.mount());
546 {
547 std::vector<std::thread> write_threads;
548 uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction
549 uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS));
550 for (int i=0; i<NUM_WRITERS; i++) {
551 write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes));
552 }
553
554 std::vector<std::thread> sync_threads;
555 for (int i=0; i<NUM_SYNC_THREADS; i++) {
556 sync_threads.push_back(std::thread(sync_fs, std::ref(fs)));
557 }
558
559 join_all(write_threads);
560 writes_done = true;
561 join_all(sync_threads);
562 fs.compact_log();
563 }
564 fs.umount();
565 // remount and check log can replay safe?
566 ASSERT_EQ(0, fs.mount());
567 fs.umount();
568 rm_temp_bdev(fn);
569 }
570
571 int main(int argc, char **argv) {
572 vector<const char*> args;
573 argv_to_vec(argc, (const char **)argv, args);
574
575 map<string,string> defaults = {
576 { "debug_bluefs", "1/20" },
577 { "debug_bdev", "1/20" }
578 };
579
580 auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT,
581 CODE_ENVIRONMENT_UTILITY,
582 CINIT_FLAG_NO_DEFAULT_CONFIG_FILE);
583 common_init_finish(g_ceph_context);
584 g_ceph_context->_conf.set_val(
585 "enable_experimental_unrecoverable_data_corrupting_features",
586 "*");
587 g_ceph_context->_conf.apply_changes(nullptr);
588
589 ::testing::InitGoogleTest(&argc, argv);
590 return RUN_ALL_TESTS();
591 }