]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <stdio.h> | |
5 | #include <string.h> | |
6 | #include <iostream> | |
7 | #include <time.h> | |
8 | #include <fcntl.h> | |
9 | #include <unistd.h> | |
91327a77 | 10 | #include <random> |
7c673cae | 11 | #include <thread> |
f6b5b4d7 | 12 | #include <stack> |
39ae355f | 13 | #include <gtest/gtest.h> |
7c673cae FG |
14 | #include "global/global_init.h" |
15 | #include "common/ceph_argparse.h" | |
16 | #include "include/stringify.h" | |
11fdf7f2 | 17 | #include "include/scope_guard.h" |
7c673cae | 18 | #include "common/errno.h" |
7c673cae | 19 | |
39ae355f | 20 | #include "os/bluestore/Allocator.h" |
7c673cae FG |
21 | #include "os/bluestore/BlueFS.h" |
22 | ||
20effc67 TL |
23 | using namespace std; |
24 | ||
11fdf7f2 | 25 | std::unique_ptr<char[]> gen_buffer(uint64_t size) |
7c673cae | 26 | { |
11fdf7f2 TL |
27 | std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size); |
28 | std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e; | |
29 | std::generate(buffer.get(), buffer.get()+size, std::ref(e)); | |
7c673cae FG |
30 | return buffer; |
31 | } | |
32 | ||
9f95a23c TL |
33 | class TempBdev { |
34 | public: | |
35 | TempBdev(uint64_t size) | |
36 | : path{get_temp_bdev(size)} | |
37 | {} | |
38 | ~TempBdev() { | |
39 | rm_temp_bdev(path); | |
40 | } | |
41 | const std::string path; | |
42 | private: | |
43 | static string get_temp_bdev(uint64_t size) | |
44 | { | |
45 | static int n = 0; | |
46 | string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid()) | |
47 | + "." + stringify(++n); | |
48 | int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644); | |
49 | ceph_assert(fd >= 0); | |
50 | int r = ::ftruncate(fd, size); | |
51 | ceph_assert(r >= 0); | |
52 | ::close(fd); | |
53 | return fn; | |
54 | } | |
55 | static void rm_temp_bdev(string f) | |
56 | { | |
57 | ::unlink(f.c_str()); | |
58 | } | |
59 | }; | |
7c673cae | 60 | |
f6b5b4d7 TL |
61 | class ConfSaver { |
62 | std::stack<std::pair<std::string, std::string>> saved_settings; | |
63 | ConfigProxy& conf; | |
64 | public: | |
65 | ConfSaver(ConfigProxy& conf) : conf(conf) { | |
66 | conf._clear_safe_to_start_threads(); | |
67 | }; | |
68 | ~ConfSaver() { | |
69 | conf._clear_safe_to_start_threads(); | |
70 | while(saved_settings.size() > 0) { | |
71 | auto& e = saved_settings.top(); | |
72 | conf.set_val_or_die(e.first, e.second); | |
73 | saved_settings.pop(); | |
74 | } | |
75 | conf.set_safe_to_start_threads(); | |
76 | conf.apply_changes(nullptr); | |
77 | } | |
78 | void SetVal(const char* key, const char* val) { | |
79 | std::string skey(key); | |
80 | std::string prev_val; | |
81 | conf.get_val(skey, &prev_val); | |
82 | conf.set_val_or_die(skey, val); | |
83 | saved_settings.emplace(skey, prev_val); | |
84 | } | |
85 | void ApplyChanges() { | |
86 | conf.set_safe_to_start_threads(); | |
87 | conf.apply_changes(nullptr); | |
88 | } | |
89 | }; | |
90 | ||
7c673cae FG |
91 | TEST(BlueFS, mkfs) { |
92 | uint64_t size = 1048576 * 128; | |
9f95a23c | 93 | TempBdev bdev{size}; |
7c673cae FG |
94 | uuid_d fsid; |
95 | BlueFS fs(g_ceph_context); | |
f67539c2 | 96 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
9f95a23c | 97 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
98 | } |
99 | ||
100 | TEST(BlueFS, mkfs_mount) { | |
101 | uint64_t size = 1048576 * 128; | |
9f95a23c | 102 | TempBdev bdev{size}; |
7c673cae | 103 | BlueFS fs(g_ceph_context); |
f67539c2 | 104 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 105 | uuid_d fsid; |
9f95a23c | 106 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 107 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 108 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
109 | ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576); |
110 | ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576); | |
111 | fs.umount(); | |
9f95a23c TL |
112 | } |
113 | ||
7c673cae FG |
114 | TEST(BlueFS, write_read) { |
115 | uint64_t size = 1048576 * 128; | |
9f95a23c | 116 | TempBdev bdev{size}; |
7c673cae | 117 | BlueFS fs(g_ceph_context); |
f67539c2 | 118 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 119 | uuid_d fsid; |
9f95a23c | 120 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 121 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 122 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
123 | { |
124 | BlueFS::FileWriter *h; | |
125 | ASSERT_EQ(0, fs.mkdir("dir")); | |
126 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
127 | h->append("foo", 3); | |
128 | h->append("bar", 3); | |
129 | h->append("baz", 3); | |
130 | fs.fsync(h); | |
131 | fs.close_writer(h); | |
132 | } | |
133 | { | |
134 | BlueFS::FileReader *h; | |
135 | ASSERT_EQ(0, fs.open_for_read("dir", "file", &h)); | |
136 | bufferlist bl; | |
f67539c2 | 137 | ASSERT_EQ(9, fs.read(h, 0, 1024, &bl, NULL)); |
7c673cae FG |
138 | ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9)); |
139 | delete h; | |
140 | } | |
141 | fs.umount(); | |
7c673cae FG |
142 | } |
143 | ||
144 | TEST(BlueFS, small_appends) { | |
145 | uint64_t size = 1048576 * 128; | |
9f95a23c | 146 | TempBdev bdev{size}; |
7c673cae | 147 | BlueFS fs(g_ceph_context); |
f67539c2 | 148 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 149 | uuid_d fsid; |
9f95a23c | 150 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 151 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 152 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
153 | { |
154 | BlueFS::FileWriter *h; | |
155 | ASSERT_EQ(0, fs.mkdir("dir")); | |
156 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
157 | for (unsigned i = 0; i < 10000; ++i) { | |
158 | h->append("abcdeabcdeabcdeabcdeabcdeabc", 23); | |
159 | } | |
160 | fs.fsync(h); | |
161 | fs.close_writer(h); | |
162 | } | |
163 | { | |
164 | BlueFS::FileWriter *h; | |
165 | ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false)); | |
166 | for (unsigned i = 0; i < 1000; ++i) { | |
167 | h->append("abcdeabcdeabcdeabcdeabcdeabc", 23); | |
31f18b77 | 168 | ASSERT_EQ(0, fs.fsync(h)); |
7c673cae FG |
169 | } |
170 | fs.close_writer(h); | |
171 | } | |
172 | fs.umount(); | |
7c673cae FG |
173 | } |
174 | ||
494da23a | 175 | TEST(BlueFS, very_large_write) { |
adb31ebb | 176 | // we'll write a ~5G file, so allocate more than that for the whole fs |
cd265ab1 | 177 | uint64_t size = 1048576 * 1024 * 6ull; |
9f95a23c | 178 | TempBdev bdev{size}; |
494da23a TL |
179 | BlueFS fs(g_ceph_context); |
180 | ||
181 | bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io"); | |
182 | g_ceph_context->_conf.set_val("bluefs_buffered_io", "false"); | |
adb31ebb | 183 | uint64_t total_written = 0; |
494da23a | 184 | |
f67539c2 | 185 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
494da23a | 186 | uuid_d fsid; |
9f95a23c | 187 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
494da23a | 188 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 189 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
494da23a TL |
190 | char buf[1048571]; // this is biggish, but intentionally not evenly aligned |
191 | for (unsigned i = 0; i < sizeof(buf); ++i) { | |
192 | buf[i] = i; | |
193 | } | |
194 | { | |
195 | BlueFS::FileWriter *h; | |
196 | ASSERT_EQ(0, fs.mkdir("dir")); | |
197 | ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false)); | |
198 | for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) { | |
199 | h->append(buf, sizeof(buf)); | |
adb31ebb TL |
200 | total_written += sizeof(buf); |
201 | } | |
202 | fs.fsync(h); | |
203 | for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) { | |
204 | h->append(buf, sizeof(buf)); | |
205 | total_written += sizeof(buf); | |
494da23a TL |
206 | } |
207 | fs.fsync(h); | |
208 | fs.close_writer(h); | |
209 | } | |
210 | { | |
211 | BlueFS::FileReader *h; | |
212 | ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h)); | |
213 | bufferlist bl; | |
adb31ebb | 214 | ASSERT_EQ(h->file->fnode.size, total_written); |
494da23a TL |
215 | for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) { |
216 | bl.clear(); | |
f67539c2 | 217 | fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL); |
494da23a TL |
218 | int r = memcmp(buf, bl.c_str(), sizeof(buf)); |
219 | if (r) { | |
220 | cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r | |
221 | << std::endl; | |
222 | } | |
223 | ASSERT_EQ(0, r); | |
224 | } | |
adb31ebb TL |
225 | for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) { |
226 | bl.clear(); | |
f67539c2 | 227 | fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL); |
adb31ebb TL |
228 | int r = memcmp(buf, bl.c_str(), sizeof(buf)); |
229 | if (r) { | |
230 | cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r | |
231 | << std::endl; | |
232 | } | |
233 | ASSERT_EQ(0, r); | |
234 | } | |
235 | delete h; | |
236 | ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h)); | |
237 | ASSERT_EQ(h->file->fnode.size, total_written); | |
39ae355f | 238 | auto huge_buf = std::make_unique<char[]>(h->file->fnode.size); |
adb31ebb | 239 | auto l = h->file->fnode.size; |
f67539c2 TL |
240 | int64_t r = fs.read(h, 0, l, NULL, huge_buf.get()); |
241 | ASSERT_EQ(r, l); | |
cd265ab1 TL |
242 | delete h; |
243 | } | |
244 | fs.umount(); | |
245 | ||
246 | g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old)); | |
247 | } | |
248 | ||
249 | TEST(BlueFS, very_large_write2) { | |
250 | // we'll write a ~5G file, so allocate more than that for the whole fs | |
251 | uint64_t size_full = 1048576 * 1024 * 6ull; | |
252 | uint64_t size = 1048576 * 1024 * 5ull; | |
253 | TempBdev bdev{ size_full }; | |
254 | BlueFS fs(g_ceph_context); | |
255 | ||
256 | bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io"); | |
257 | g_ceph_context->_conf.set_val("bluefs_buffered_io", "false"); | |
258 | uint64_t total_written = 0; | |
259 | ||
260 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); | |
cd265ab1 TL |
261 | uuid_d fsid; |
262 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
263 | ASSERT_EQ(0, fs.mount()); | |
264 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
265 | ||
266 | char fill_arr[1 << 20]; // 1M | |
267 | for (size_t i = 0; i < sizeof(fill_arr); ++i) { | |
268 | fill_arr[i] = (char)i; | |
269 | } | |
270 | std::unique_ptr<char[]> buf; | |
271 | buf.reset(new char[size]); | |
272 | for (size_t i = 0; i < size; i += sizeof(fill_arr)) { | |
273 | memcpy(buf.get() + i, fill_arr, sizeof(fill_arr)); | |
274 | } | |
275 | { | |
276 | BlueFS::FileWriter* h; | |
277 | ASSERT_EQ(0, fs.mkdir("dir")); | |
278 | ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false)); | |
279 | fs.append_try_flush(h, buf.get(), size); | |
280 | total_written = size; | |
281 | fs.fsync(h); | |
282 | fs.close_writer(h); | |
283 | } | |
284 | memset(buf.get(), 0, size); | |
285 | { | |
286 | BlueFS::FileReader* h; | |
287 | ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h)); | |
288 | ASSERT_EQ(h->file->fnode.size, total_written); | |
289 | auto l = h->file->fnode.size; | |
f67539c2 TL |
290 | int64_t r = fs.read(h, 0, l, NULL, buf.get()); |
291 | ASSERT_EQ(r, l); | |
cd265ab1 TL |
292 | for (size_t i = 0; i < size; i += sizeof(fill_arr)) { |
293 | ceph_assert(memcmp(buf.get() + i, fill_arr, sizeof(fill_arr)) == 0); | |
294 | } | |
494da23a TL |
295 | delete h; |
296 | } | |
297 | fs.umount(); | |
298 | ||
299 | g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old)); | |
494da23a TL |
300 | } |
301 | ||
7c673cae FG |
302 | #define ALLOC_SIZE 4096 |
303 | ||
304 | void write_data(BlueFS &fs, uint64_t rationed_bytes) | |
305 | { | |
7c673cae FG |
306 | int j=0, r=0; |
307 | uint64_t written_bytes = 0; | |
308 | rationed_bytes -= ALLOC_SIZE; | |
309 | stringstream ss; | |
310 | string dir = "dir."; | |
311 | ss << std::this_thread::get_id(); | |
312 | dir.append(ss.str()); | |
313 | dir.append("."); | |
314 | dir.append(to_string(j)); | |
315 | ASSERT_EQ(0, fs.mkdir(dir)); | |
316 | while (1) { | |
317 | string file = "file."; | |
318 | file.append(to_string(j)); | |
11fdf7f2 | 319 | BlueFS::FileWriter *h; |
7c673cae | 320 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
321 | ASSERT_NE(nullptr, h); |
322 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 323 | bufferlist bl; |
11fdf7f2 TL |
324 | std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE); |
325 | bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get()); | |
7c673cae FG |
326 | bl.push_back(bp); |
327 | h->append(bl.c_str(), bl.length()); | |
328 | r = fs.fsync(h); | |
329 | if (r < 0) { | |
7c673cae FG |
330 | break; |
331 | } | |
11fdf7f2 | 332 | written_bytes += g_conf()->bluefs_alloc_size; |
7c673cae | 333 | j++; |
11fdf7f2 | 334 | if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) { |
7c673cae FG |
335 | break; |
336 | } | |
337 | } | |
338 | } | |
339 | ||
340 | void create_single_file(BlueFS &fs) | |
341 | { | |
342 | BlueFS::FileWriter *h; | |
343 | stringstream ss; | |
344 | string dir = "dir.test"; | |
345 | ASSERT_EQ(0, fs.mkdir(dir)); | |
346 | string file = "testfile"; | |
347 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); | |
348 | bufferlist bl; | |
11fdf7f2 TL |
349 | std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE); |
350 | bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get()); | |
7c673cae FG |
351 | bl.push_back(bp); |
352 | h->append(bl.c_str(), bl.length()); | |
353 | fs.fsync(h); | |
354 | fs.close_writer(h); | |
355 | } | |
356 | ||
357 | void write_single_file(BlueFS &fs, uint64_t rationed_bytes) | |
358 | { | |
7c673cae | 359 | stringstream ss; |
11fdf7f2 TL |
360 | const string dir = "dir.test"; |
361 | const string file = "testfile"; | |
7c673cae FG |
362 | uint64_t written_bytes = 0; |
363 | rationed_bytes -= ALLOC_SIZE; | |
364 | while (1) { | |
11fdf7f2 | 365 | BlueFS::FileWriter *h; |
7c673cae | 366 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
367 | ASSERT_NE(nullptr, h); |
368 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 369 | bufferlist bl; |
11fdf7f2 TL |
370 | std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE); |
371 | bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get()); | |
7c673cae FG |
372 | bl.push_back(bp); |
373 | h->append(bl.c_str(), bl.length()); | |
11fdf7f2 | 374 | int r = fs.fsync(h); |
7c673cae | 375 | if (r < 0) { |
7c673cae FG |
376 | break; |
377 | } | |
11fdf7f2 TL |
378 | written_bytes += g_conf()->bluefs_alloc_size; |
379 | if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) { | |
7c673cae FG |
380 | break; |
381 | } | |
382 | } | |
383 | } | |
384 | ||
385 | bool writes_done = false; | |
386 | ||
387 | void sync_fs(BlueFS &fs) | |
388 | { | |
389 | while (1) { | |
390 | if (writes_done == true) | |
391 | break; | |
1911f103 | 392 | fs.sync_metadata(false); |
7c673cae FG |
393 | sleep(1); |
394 | } | |
395 | } | |
396 | ||
397 | ||
398 | void do_join(std::thread& t) | |
399 | { | |
400 | t.join(); | |
401 | } | |
402 | ||
403 | void join_all(std::vector<std::thread>& v) | |
404 | { | |
405 | std::for_each(v.begin(),v.end(),do_join); | |
406 | } | |
407 | ||
408 | #define NUM_WRITERS 3 | |
409 | #define NUM_SYNC_THREADS 1 | |
410 | ||
411 | #define NUM_SINGLE_FILE_WRITERS 1 | |
412 | #define NUM_MULTIPLE_FILE_WRITERS 2 | |
413 | ||
414 | TEST(BlueFS, test_flush_1) { | |
415 | uint64_t size = 1048576 * 128; | |
9f95a23c | 416 | TempBdev bdev{size}; |
11fdf7f2 | 417 | g_ceph_context->_conf.set_val( |
7c673cae FG |
418 | "bluefs_alloc_size", |
419 | "65536"); | |
11fdf7f2 | 420 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
421 | |
422 | BlueFS fs(g_ceph_context); | |
f67539c2 | 423 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 424 | uuid_d fsid; |
9f95a23c | 425 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 426 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 427 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
428 | { |
429 | std::vector<std::thread> write_thread_multiple; | |
430 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
431 | uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS)); | |
432 | for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) { | |
433 | write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
434 | } | |
435 | ||
436 | create_single_file(fs); | |
437 | std::vector<std::thread> write_thread_single; | |
438 | for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) { | |
439 | write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes)); | |
440 | } | |
441 | ||
442 | join_all(write_thread_single); | |
443 | join_all(write_thread_multiple); | |
444 | } | |
445 | fs.umount(); | |
7c673cae FG |
446 | } |
447 | ||
448 | TEST(BlueFS, test_flush_2) { | |
449 | uint64_t size = 1048576 * 256; | |
9f95a23c | 450 | TempBdev bdev{size}; |
11fdf7f2 | 451 | g_ceph_context->_conf.set_val( |
7c673cae FG |
452 | "bluefs_alloc_size", |
453 | "65536"); | |
11fdf7f2 | 454 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
455 | |
456 | BlueFS fs(g_ceph_context); | |
f67539c2 | 457 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 458 | uuid_d fsid; |
9f95a23c | 459 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 460 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 461 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
462 | { |
463 | uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction | |
464 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
465 | std::vector<std::thread> write_thread_multiple; | |
466 | for (int i=0; i<NUM_WRITERS; i++) { | |
467 | write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
468 | } | |
469 | ||
470 | join_all(write_thread_multiple); | |
471 | } | |
472 | fs.umount(); | |
7c673cae FG |
473 | } |
474 | ||
475 | TEST(BlueFS, test_flush_3) { | |
476 | uint64_t size = 1048576 * 256; | |
9f95a23c | 477 | TempBdev bdev{size}; |
11fdf7f2 | 478 | g_ceph_context->_conf.set_val( |
7c673cae FG |
479 | "bluefs_alloc_size", |
480 | "65536"); | |
11fdf7f2 | 481 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
482 | |
483 | BlueFS fs(g_ceph_context); | |
f67539c2 | 484 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 485 | uuid_d fsid; |
9f95a23c | 486 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 487 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 488 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
489 | { |
490 | std::vector<std::thread> write_threads; | |
491 | uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction | |
492 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
493 | for (int i=0; i<NUM_WRITERS; i++) { | |
494 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
495 | } | |
496 | ||
497 | std::vector<std::thread> sync_threads; | |
498 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
499 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
500 | } | |
501 | ||
502 | join_all(write_threads); | |
503 | writes_done = true; | |
504 | join_all(sync_threads); | |
505 | } | |
506 | fs.umount(); | |
7c673cae FG |
507 | } |
508 | ||
509 | TEST(BlueFS, test_simple_compaction_sync) { | |
11fdf7f2 | 510 | g_ceph_context->_conf.set_val( |
7c673cae FG |
511 | "bluefs_compact_log_sync", |
512 | "true"); | |
513 | uint64_t size = 1048576 * 128; | |
9f95a23c | 514 | TempBdev bdev{size}; |
7c673cae FG |
515 | |
516 | BlueFS fs(g_ceph_context); | |
f67539c2 | 517 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 518 | uuid_d fsid; |
9f95a23c | 519 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 520 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 521 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae | 522 | { |
7c673cae FG |
523 | for (int i=0; i<10; i++) { |
524 | string dir = "dir."; | |
525 | dir.append(to_string(i)); | |
526 | ASSERT_EQ(0, fs.mkdir(dir)); | |
527 | for (int j=0; j<10; j++) { | |
528 | string file = "file."; | |
529 | file.append(to_string(j)); | |
11fdf7f2 | 530 | BlueFS::FileWriter *h; |
7c673cae | 531 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
532 | ASSERT_NE(nullptr, h); |
533 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 534 | bufferlist bl; |
11fdf7f2 TL |
535 | std::unique_ptr<char[]> buf = gen_buffer(4096); |
536 | bufferptr bp = buffer::claim_char(4096, buf.get()); | |
7c673cae FG |
537 | bl.push_back(bp); |
538 | h->append(bl.c_str(), bl.length()); | |
539 | fs.fsync(h); | |
7c673cae FG |
540 | } |
541 | } | |
542 | } | |
7c673cae FG |
543 | { |
544 | for (int i=0; i<10; i+=2) { | |
545 | string dir = "dir."; | |
546 | dir.append(to_string(i)); | |
11fdf7f2 | 547 | for (int j=0; j<10; j++) { |
7c673cae FG |
548 | string file = "file."; |
549 | file.append(to_string(j)); | |
550 | fs.unlink(dir, file); | |
1911f103 | 551 | fs.sync_metadata(false); |
7c673cae | 552 | } |
11fdf7f2 | 553 | ASSERT_EQ(0, fs.rmdir(dir)); |
1911f103 | 554 | fs.sync_metadata(false); |
7c673cae FG |
555 | } |
556 | } | |
557 | fs.compact_log(); | |
558 | fs.umount(); | |
7c673cae FG |
559 | } |
560 | ||
561 | TEST(BlueFS, test_simple_compaction_async) { | |
11fdf7f2 | 562 | g_ceph_context->_conf.set_val( |
7c673cae FG |
563 | "bluefs_compact_log_sync", |
564 | "false"); | |
565 | uint64_t size = 1048576 * 128; | |
9f95a23c | 566 | TempBdev bdev{size}; |
7c673cae FG |
567 | |
568 | BlueFS fs(g_ceph_context); | |
f67539c2 | 569 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 570 | uuid_d fsid; |
9f95a23c | 571 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 572 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 573 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae | 574 | { |
7c673cae FG |
575 | for (int i=0; i<10; i++) { |
576 | string dir = "dir."; | |
577 | dir.append(to_string(i)); | |
578 | ASSERT_EQ(0, fs.mkdir(dir)); | |
579 | for (int j=0; j<10; j++) { | |
580 | string file = "file."; | |
581 | file.append(to_string(j)); | |
11fdf7f2 | 582 | BlueFS::FileWriter *h; |
7c673cae | 583 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
584 | ASSERT_NE(nullptr, h); |
585 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 586 | bufferlist bl; |
11fdf7f2 TL |
587 | std::unique_ptr<char[]> buf = gen_buffer(4096); |
588 | bufferptr bp = buffer::claim_char(4096, buf.get()); | |
7c673cae FG |
589 | bl.push_back(bp); |
590 | h->append(bl.c_str(), bl.length()); | |
591 | fs.fsync(h); | |
7c673cae FG |
592 | } |
593 | } | |
594 | } | |
7c673cae FG |
595 | { |
596 | for (int i=0; i<10; i+=2) { | |
597 | string dir = "dir."; | |
598 | dir.append(to_string(i)); | |
11fdf7f2 | 599 | for (int j=0; j<10; j++) { |
7c673cae FG |
600 | string file = "file."; |
601 | file.append(to_string(j)); | |
602 | fs.unlink(dir, file); | |
1911f103 | 603 | fs.sync_metadata(false); |
7c673cae | 604 | } |
11fdf7f2 | 605 | ASSERT_EQ(0, fs.rmdir(dir)); |
1911f103 | 606 | fs.sync_metadata(false); |
7c673cae FG |
607 | } |
608 | } | |
609 | fs.compact_log(); | |
610 | fs.umount(); | |
7c673cae FG |
611 | } |
612 | ||
613 | TEST(BlueFS, test_compaction_sync) { | |
614 | uint64_t size = 1048576 * 128; | |
9f95a23c | 615 | TempBdev bdev{size}; |
11fdf7f2 | 616 | g_ceph_context->_conf.set_val( |
7c673cae FG |
617 | "bluefs_alloc_size", |
618 | "65536"); | |
11fdf7f2 | 619 | g_ceph_context->_conf.set_val( |
7c673cae FG |
620 | "bluefs_compact_log_sync", |
621 | "true"); | |
1d09f67e TL |
622 | const char* canary_dir = "dir.after_compact_test"; |
623 | const char* canary_file = "file.after_compact_test"; | |
624 | const char* canary_data = "some random data"; | |
7c673cae FG |
625 | |
626 | BlueFS fs(g_ceph_context); | |
f67539c2 | 627 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 628 | uuid_d fsid; |
9f95a23c | 629 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 630 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 631 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
632 | { |
633 | std::vector<std::thread> write_threads; | |
634 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
635 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
636 | for (int i=0; i<NUM_WRITERS; i++) { | |
637 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
638 | } | |
639 | ||
640 | std::vector<std::thread> sync_threads; | |
641 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
642 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
643 | } | |
644 | ||
645 | join_all(write_threads); | |
646 | writes_done = true; | |
647 | join_all(sync_threads); | |
648 | fs.compact_log(); | |
1d09f67e TL |
649 | |
650 | { | |
651 | ASSERT_EQ(0, fs.mkdir(canary_dir)); | |
652 | BlueFS::FileWriter *h; | |
653 | ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false)); | |
654 | ASSERT_NE(nullptr, h); | |
655 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
656 | h->append(canary_data, strlen(canary_data)); | |
657 | int r = fs.fsync(h); | |
658 | ASSERT_EQ(r, 0); | |
659 | } | |
660 | } | |
661 | fs.umount(); | |
662 | ||
663 | fs.mount(); | |
664 | { | |
665 | BlueFS::FileReader *h; | |
666 | ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h)); | |
667 | ASSERT_NE(nullptr, h); | |
668 | bufferlist bl; | |
669 | ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL)); | |
670 | std::cout << bl.c_str() << std::endl; | |
671 | ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data))); | |
672 | delete h; | |
7c673cae FG |
673 | } |
674 | fs.umount(); | |
7c673cae FG |
675 | } |
676 | ||
677 | TEST(BlueFS, test_compaction_async) { | |
678 | uint64_t size = 1048576 * 128; | |
9f95a23c | 679 | TempBdev bdev{size}; |
11fdf7f2 | 680 | g_ceph_context->_conf.set_val( |
7c673cae FG |
681 | "bluefs_alloc_size", |
682 | "65536"); | |
11fdf7f2 | 683 | g_ceph_context->_conf.set_val( |
7c673cae FG |
684 | "bluefs_compact_log_sync", |
685 | "false"); | |
1d09f67e TL |
686 | const char* canary_dir = "dir.after_compact_test"; |
687 | const char* canary_file = "file.after_compact_test"; | |
688 | const char* canary_data = "some random data"; | |
7c673cae FG |
689 | |
690 | BlueFS fs(g_ceph_context); | |
f67539c2 | 691 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 692 | uuid_d fsid; |
9f95a23c | 693 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 694 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 695 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
696 | { |
697 | std::vector<std::thread> write_threads; | |
698 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
699 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
700 | for (int i=0; i<NUM_WRITERS; i++) { | |
701 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
702 | } | |
703 | ||
704 | std::vector<std::thread> sync_threads; | |
705 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
706 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
707 | } | |
708 | ||
709 | join_all(write_threads); | |
710 | writes_done = true; | |
711 | join_all(sync_threads); | |
712 | fs.compact_log(); | |
1d09f67e TL |
713 | |
714 | { | |
715 | ASSERT_EQ(0, fs.mkdir(canary_dir)); | |
716 | BlueFS::FileWriter *h; | |
717 | ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false)); | |
718 | ASSERT_NE(nullptr, h); | |
719 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
720 | h->append(canary_data, strlen(canary_data)); | |
721 | int r = fs.fsync(h); | |
722 | ASSERT_EQ(r, 0); | |
723 | } | |
724 | } | |
725 | fs.umount(); | |
726 | ||
727 | fs.mount(); | |
728 | { | |
729 | BlueFS::FileReader *h; | |
730 | ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h)); | |
731 | ASSERT_NE(nullptr, h); | |
732 | bufferlist bl; | |
733 | ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL)); | |
734 | std::cout << bl.c_str() << std::endl; | |
735 | ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data))); | |
736 | delete h; | |
7c673cae FG |
737 | } |
738 | fs.umount(); | |
7c673cae FG |
739 | } |
740 | ||
741 | TEST(BlueFS, test_replay) { | |
742 | uint64_t size = 1048576 * 128; | |
9f95a23c | 743 | TempBdev bdev{size}; |
11fdf7f2 | 744 | g_ceph_context->_conf.set_val( |
7c673cae FG |
745 | "bluefs_alloc_size", |
746 | "65536"); | |
11fdf7f2 | 747 | g_ceph_context->_conf.set_val( |
7c673cae FG |
748 | "bluefs_compact_log_sync", |
749 | "false"); | |
750 | ||
751 | BlueFS fs(g_ceph_context); | |
f67539c2 | 752 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 753 | uuid_d fsid; |
9f95a23c | 754 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 755 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 756 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
757 | { |
758 | std::vector<std::thread> write_threads; | |
759 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
760 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
761 | for (int i=0; i<NUM_WRITERS; i++) { | |
762 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
763 | } | |
764 | ||
765 | std::vector<std::thread> sync_threads; | |
766 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
767 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
768 | } | |
769 | ||
770 | join_all(write_threads); | |
771 | writes_done = true; | |
772 | join_all(sync_threads); | |
773 | fs.compact_log(); | |
774 | } | |
775 | fs.umount(); | |
776 | // remount and check log can replay safe? | |
31f18b77 | 777 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 778 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae | 779 | fs.umount(); |
7c673cae FG |
780 | } |
781 | ||
f6b5b4d7 TL |
782 | TEST(BlueFS, test_replay_growth) { |
783 | uint64_t size = 1048576LL * (2 * 1024 + 128); | |
784 | TempBdev bdev{size}; | |
785 | ||
786 | ConfSaver conf(g_ceph_context->_conf); | |
787 | conf.SetVal("bluefs_alloc_size", "4096"); | |
788 | conf.SetVal("bluefs_shared_alloc_size", "4096"); | |
789 | conf.SetVal("bluefs_compact_log_sync", "false"); | |
790 | conf.SetVal("bluefs_min_log_runway", "32768"); | |
791 | conf.SetVal("bluefs_max_log_runway", "65536"); | |
792 | conf.SetVal("bluefs_allocator", "stupid"); | |
793 | conf.SetVal("bluefs_sync_write", "true"); | |
794 | conf.ApplyChanges(); | |
795 | ||
796 | BlueFS fs(g_ceph_context); | |
f67539c2 | 797 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
f6b5b4d7 TL |
798 | uuid_d fsid; |
799 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
800 | ASSERT_EQ(0, fs.mount()); | |
801 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
802 | ASSERT_EQ(0, fs.mkdir("dir")); | |
803 | ||
804 | char data[2000]; | |
805 | BlueFS::FileWriter *h; | |
806 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
807 | for (size_t i = 0; i < 10000; i++) { | |
808 | h->append(data, 2000); | |
809 | fs.fsync(h); | |
810 | } | |
811 | fs.close_writer(h); | |
812 | fs.umount(true); //do not compact on exit! | |
813 | ||
814 | // remount and check log can replay safe? | |
815 | ASSERT_EQ(0, fs.mount()); | |
816 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
817 | fs.umount(); | |
818 | } | |
819 | ||
522d829b TL |
820 | TEST(BlueFS, test_tracker_50965) { |
821 | uint64_t size_wal = 1048576 * 64; | |
822 | TempBdev bdev_wal{size_wal}; | |
823 | uint64_t size_db = 1048576 * 128; | |
824 | TempBdev bdev_db{size_db}; | |
825 | uint64_t size_slow = 1048576 * 256; | |
826 | TempBdev bdev_slow{size_slow}; | |
827 | ||
828 | ConfSaver conf(g_ceph_context->_conf); | |
829 | conf.SetVal("bluefs_min_flush_size", "65536"); | |
830 | conf.ApplyChanges(); | |
831 | ||
832 | BlueFS fs(g_ceph_context); | |
833 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0)); | |
834 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0)); | |
835 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0)); | |
836 | uuid_d fsid; | |
837 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); | |
838 | ASSERT_EQ(0, fs.mount()); | |
839 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); | |
840 | ||
841 | string dir_slow = "dir.slow"; | |
842 | ASSERT_EQ(0, fs.mkdir(dir_slow)); | |
843 | string dir_db = "dir_db"; | |
844 | ASSERT_EQ(0, fs.mkdir(dir_db)); | |
845 | ||
846 | string file_slow = "file"; | |
847 | BlueFS::FileWriter *h_slow; | |
848 | ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false)); | |
849 | ASSERT_NE(nullptr, h_slow); | |
850 | ||
851 | string file_db = "file"; | |
852 | BlueFS::FileWriter *h_db; | |
853 | ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false)); | |
854 | ASSERT_NE(nullptr, h_db); | |
855 | ||
856 | bufferlist bl1; | |
857 | std::unique_ptr<char[]> buf1 = gen_buffer(70000); | |
858 | bufferptr bp1 = buffer::claim_char(70000, buf1.get()); | |
859 | bl1.push_back(bp1); | |
860 | h_slow->append(bl1.c_str(), bl1.length()); | |
861 | fs.flush(h_slow); | |
862 | ||
863 | uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow); | |
864 | ||
865 | bufferlist bl2; | |
866 | std::unique_ptr<char[]> buf2 = gen_buffer(1000); | |
867 | bufferptr bp2 = buffer::claim_char(1000, buf2.get()); | |
868 | bl2.push_back(bp2); | |
869 | h_db->append(bl2.c_str(), bl2.length()); | |
870 | fs.fsync(h_db); | |
871 | ||
872 | uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow); | |
873 | bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW); | |
874 | ||
875 | //problem if allocations are stable in log but slow device is not flushed yet | |
876 | ASSERT_FALSE(h_slow_dirty_seq_1 != 0 && | |
877 | h_slow_dirty_seq_2 == 0 && | |
878 | h_slow_dev_dirty == true); | |
879 | ||
880 | fs.close_writer(h_slow); | |
881 | fs.close_writer(h_db); | |
882 | ||
883 | fs.umount(); | |
884 | } | |
885 | ||
20effc67 TL |
886 | TEST(BlueFS, test_truncate_stable_53129) { |
887 | ||
888 | ConfSaver conf(g_ceph_context->_conf); | |
889 | conf.SetVal("bluefs_min_flush_size", "65536"); | |
890 | conf.ApplyChanges(); | |
891 | ||
892 | uint64_t size_wal = 1048576 * 64; | |
893 | TempBdev bdev_wal{size_wal}; | |
894 | uint64_t size_db = 1048576 * 128; | |
895 | TempBdev bdev_db{size_db}; | |
896 | uint64_t size_slow = 1048576 * 256; | |
897 | TempBdev bdev_slow{size_slow}; | |
898 | ||
899 | BlueFS fs(g_ceph_context); | |
900 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0)); | |
901 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0)); | |
902 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0)); | |
903 | uuid_d fsid; | |
904 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); | |
905 | ASSERT_EQ(0, fs.mount()); | |
906 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); | |
907 | ||
908 | string dir_slow = "dir.slow"; | |
909 | ASSERT_EQ(0, fs.mkdir(dir_slow)); | |
910 | string dir_db = "dir_db"; | |
911 | ASSERT_EQ(0, fs.mkdir(dir_db)); | |
912 | ||
913 | string file_slow = "file"; | |
914 | BlueFS::FileWriter *h_slow; | |
915 | ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false)); | |
916 | ASSERT_NE(nullptr, h_slow); | |
917 | ||
918 | string file_db = "file"; | |
919 | BlueFS::FileWriter *h_db; | |
920 | ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false)); | |
921 | ASSERT_NE(nullptr, h_db); | |
922 | ||
923 | bufferlist bl1; | |
924 | std::unique_ptr<char[]> buf1 = gen_buffer(70000); | |
925 | bufferptr bp1 = buffer::claim_char(70000, buf1.get()); | |
926 | bl1.push_back(bp1); | |
927 | // add 70000 bytes | |
928 | h_slow->append(bl1.c_str(), bl1.length()); | |
929 | fs.flush(h_slow); | |
930 | // and truncate to 60000 bytes | |
931 | fs.truncate(h_slow, 60000); | |
932 | ||
933 | // write something to file on DB device | |
934 | bufferlist bl2; | |
935 | std::unique_ptr<char[]> buf2 = gen_buffer(1000); | |
936 | bufferptr bp2 = buffer::claim_char(1000, buf2.get()); | |
937 | bl2.push_back(bp2); | |
938 | h_db->append(bl2.c_str(), bl2.length()); | |
939 | // and force bluefs log to flush | |
940 | fs.fsync(h_db); | |
941 | ||
942 | // This is the actual test point. | |
943 | // We completed truncate, and we expect | |
944 | // - size to be 60000 | |
945 | // - data to be stable on slow device | |
946 | // OR | |
947 | // - size = 0 or file does not exist | |
948 | // - dev_dirty is irrelevant | |
949 | bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW); | |
950 | // Imagine power goes down here. | |
951 | ||
952 | fs.close_writer(h_slow); | |
953 | fs.close_writer(h_db); | |
954 | ||
955 | fs.umount(); | |
956 | ||
957 | ASSERT_EQ(0, fs.mount()); | |
958 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); | |
959 | ||
960 | uint64_t size; | |
961 | utime_t mtime; | |
962 | ASSERT_EQ(0, fs.stat("dir.slow", "file", &size, &mtime)); | |
963 | // check file size 60000 | |
964 | ASSERT_EQ(size, 60000); | |
965 | // check that dev_dirty was false (data stable on media) | |
966 | ASSERT_EQ(h_slow_dev_dirty, false); | |
967 | ||
968 | fs.umount(); | |
969 | } | |
970 | ||
971 | TEST(BlueFS, test_update_ino1_delta_after_replay) { | |
972 | uint64_t size = 1048576LL * (2 * 1024 + 128); | |
973 | TempBdev bdev{size}; | |
974 | ||
975 | ConfSaver conf(g_ceph_context->_conf); | |
976 | conf.SetVal("bluefs_alloc_size", "4096"); | |
977 | conf.SetVal("bluefs_shared_alloc_size", "4096"); | |
978 | conf.SetVal("bluefs_compact_log_sync", "false"); | |
979 | conf.SetVal("bluefs_min_log_runway", "32768"); | |
980 | conf.SetVal("bluefs_max_log_runway", "65536"); | |
981 | conf.SetVal("bluefs_allocator", "stupid"); | |
982 | conf.ApplyChanges(); | |
983 | ||
984 | BlueFS fs(g_ceph_context); | |
985 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); | |
986 | uuid_d fsid; | |
987 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
988 | ASSERT_EQ(0, fs.mount()); | |
989 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
990 | ASSERT_EQ(0, fs.mkdir("dir")); | |
991 | ||
992 | char data[2000]; | |
993 | BlueFS::FileWriter *h; | |
994 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
995 | for (size_t i = 0; i < 100; i++) { | |
996 | h->append(data, 2000); | |
997 | fs.fsync(h); | |
998 | } | |
999 | fs.close_writer(h); | |
1000 | fs.umount(true); //do not compact on exit! | |
7c673cae | 1001 | |
20effc67 TL |
1002 | ASSERT_EQ(0, fs.mount()); |
1003 | ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false)); | |
1004 | for (size_t i = 0; i < 100; i++) { | |
1005 | h->append(data, 2000); | |
1006 | fs.fsync(h); | |
1007 | } | |
1008 | fs.close_writer(h); | |
1009 | fs.umount(); | |
1010 | ||
1011 | // remount and check log can replay safe? | |
1012 | ASSERT_EQ(0, fs.mount()); | |
1013 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
1014 | fs.umount(); | |
1015 | } | |
1016 | ||
39ae355f TL |
1017 | TEST(BlueFS, test_shared_alloc) { |
1018 | uint64_t size = 1048576 * 128; | |
1019 | TempBdev bdev_slow{size}; | |
1020 | uint64_t size_db = 1048576 * 8; | |
1021 | TempBdev bdev_db{size_db}; | |
1022 | ||
1023 | ConfSaver conf(g_ceph_context->_conf); | |
1024 | conf.SetVal("bluefs_shared_alloc_size", "1048576"); | |
1025 | ||
1026 | bluefs_shared_alloc_context_t shared_alloc; | |
1027 | uint64_t shared_alloc_unit = 4096; | |
1028 | shared_alloc.set( | |
1029 | Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator, | |
1030 | size, shared_alloc_unit, 0, 0, "test shared allocator"), | |
1031 | shared_alloc_unit); | |
1032 | shared_alloc.a->init_add_free(0, size); | |
1033 | ||
1034 | BlueFS fs(g_ceph_context); | |
1035 | // DB device is fully utilized | |
1036 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, size_db - 0x1000)); | |
1037 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0, | |
1038 | &shared_alloc)); | |
1039 | uuid_d fsid; | |
1040 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
1041 | ASSERT_EQ(0, fs.mount()); | |
1042 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
1043 | { | |
1044 | for (int i=0; i<10; i++) { | |
1045 | string dir = "dir."; | |
1046 | dir.append(to_string(i)); | |
1047 | ASSERT_EQ(0, fs.mkdir(dir)); | |
1048 | for (int j=0; j<10; j++) { | |
1049 | string file = "file."; | |
1050 | file.append(to_string(j)); | |
1051 | BlueFS::FileWriter *h; | |
1052 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); | |
1053 | ASSERT_NE(nullptr, h); | |
1054 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
1055 | bufferlist bl; | |
1056 | std::unique_ptr<char[]> buf = gen_buffer(4096); | |
1057 | bufferptr bp = buffer::claim_char(4096, buf.get()); | |
1058 | bl.push_back(bp); | |
1059 | h->append(bl.c_str(), bl.length()); | |
1060 | fs.fsync(h); | |
1061 | } | |
1062 | } | |
1063 | } | |
1064 | { | |
1065 | for (int i=0; i<10; i+=2) { | |
1066 | string dir = "dir."; | |
1067 | dir.append(to_string(i)); | |
1068 | for (int j=0; j<10; j++) { | |
1069 | string file = "file."; | |
1070 | file.append(to_string(j)); | |
1071 | fs.unlink(dir, file); | |
1072 | fs.sync_metadata(false); | |
1073 | } | |
1074 | ASSERT_EQ(0, fs.rmdir(dir)); | |
1075 | fs.sync_metadata(false); | |
1076 | } | |
1077 | } | |
1078 | fs.compact_log(); | |
1079 | auto *logger = fs.get_perf_counters(); | |
1080 | ASSERT_NE(logger->get(l_bluefs_alloc_shared_dev_fallbacks), 0); | |
1081 | auto num_files = logger->get(l_bluefs_num_files); | |
1082 | fs.umount(); | |
1083 | fs.mount(); | |
1084 | ASSERT_EQ(num_files, logger->get(l_bluefs_num_files)); | |
1085 | fs.umount(); | |
1086 | } | |
1087 | ||
1088 | TEST(BlueFS, test_shared_alloc_sparse) { | |
1089 | uint64_t size = 1048576 * 128 * 2; | |
1090 | uint64_t main_unit = 4096; | |
1091 | uint64_t bluefs_alloc_unit = 1048576; | |
1092 | TempBdev bdev_slow{size}; | |
1093 | ||
1094 | ConfSaver conf(g_ceph_context->_conf); | |
1095 | conf.SetVal("bluefs_shared_alloc_size", | |
1096 | stringify(bluefs_alloc_unit).c_str()); | |
1097 | ||
1098 | bluefs_shared_alloc_context_t shared_alloc; | |
1099 | shared_alloc.set( | |
1100 | Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator, | |
1101 | size, main_unit, 0, 0, "test shared allocator"), | |
1102 | main_unit); | |
1103 | // prepare sparse free space but let's have a continuous chunk at | |
1104 | // the beginning to fit initial log's fnode into superblock, | |
1105 | // we don't have any tricks to deal with sparse allocations | |
1106 | // (and hence long fnode) at mkfs | |
1107 | shared_alloc.a->init_add_free(bluefs_alloc_unit, 4 * bluefs_alloc_unit); | |
1108 | for(uint64_t i = 5 * bluefs_alloc_unit; i < size; i += 2 * main_unit) { | |
1109 | shared_alloc.a->init_add_free(i, main_unit); | |
1110 | } | |
1111 | ||
1112 | BlueFS fs(g_ceph_context); | |
1113 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_slow.path, false, 0, | |
1114 | &shared_alloc)); | |
1115 | uuid_d fsid; | |
1116 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
1117 | ASSERT_EQ(0, fs.mount()); | |
1118 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
1119 | { | |
1120 | for (int i=0; i<10; i++) { | |
1121 | string dir = "dir."; | |
1122 | dir.append(to_string(i)); | |
1123 | ASSERT_EQ(0, fs.mkdir(dir)); | |
1124 | for (int j=0; j<10; j++) { | |
1125 | string file = "file."; | |
1126 | file.append(to_string(j)); | |
1127 | BlueFS::FileWriter *h; | |
1128 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); | |
1129 | ASSERT_NE(nullptr, h); | |
1130 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
1131 | bufferlist bl; | |
1132 | std::unique_ptr<char[]> buf = gen_buffer(4096); | |
1133 | bufferptr bp = buffer::claim_char(4096, buf.get()); | |
1134 | bl.push_back(bp); | |
1135 | h->append(bl.c_str(), bl.length()); | |
1136 | fs.fsync(h); | |
1137 | } | |
1138 | } | |
1139 | } | |
1140 | { | |
1141 | for (int i=0; i<10; i+=2) { | |
1142 | string dir = "dir."; | |
1143 | dir.append(to_string(i)); | |
1144 | for (int j=0; j<10; j++) { | |
1145 | string file = "file."; | |
1146 | file.append(to_string(j)); | |
1147 | fs.unlink(dir, file); | |
1148 | fs.sync_metadata(false); | |
1149 | } | |
1150 | ASSERT_EQ(0, fs.rmdir(dir)); | |
1151 | fs.sync_metadata(false); | |
1152 | } | |
1153 | } | |
1154 | fs.compact_log(); | |
1155 | auto *logger = fs.get_perf_counters(); | |
1156 | ASSERT_NE(logger->get(l_bluefs_alloc_shared_size_fallbacks), 0); | |
1157 | auto num_files = logger->get(l_bluefs_num_files); | |
1158 | fs.umount(); | |
1159 | ||
1160 | fs.mount(); | |
1161 | ASSERT_EQ(num_files, logger->get(l_bluefs_num_files)); | |
1162 | fs.umount(); | |
1163 | } | |
1164 | ||
1165 | TEST(BlueFS, test_4k_shared_alloc) { | |
1166 | uint64_t size = 1048576 * 128 * 2; | |
1167 | uint64_t main_unit = 4096; | |
1168 | uint64_t bluefs_alloc_unit = main_unit; | |
1169 | TempBdev bdev_slow{size}; | |
1170 | ||
1171 | ConfSaver conf(g_ceph_context->_conf); | |
1172 | conf.SetVal("bluefs_shared_alloc_size", | |
1173 | stringify(bluefs_alloc_unit).c_str()); | |
1174 | ||
1175 | bluefs_shared_alloc_context_t shared_alloc; | |
1176 | shared_alloc.set( | |
1177 | Allocator::create(g_ceph_context, g_ceph_context->_conf->bluefs_allocator, | |
1178 | size, main_unit, 0, 0, "test shared allocator"), | |
1179 | main_unit); | |
1180 | shared_alloc.a->init_add_free(bluefs_alloc_unit, size - bluefs_alloc_unit); | |
1181 | ||
1182 | BlueFS fs(g_ceph_context); | |
1183 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_slow.path, false, 0, | |
1184 | &shared_alloc)); | |
1185 | uuid_d fsid; | |
1186 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
1187 | ASSERT_EQ(0, fs.mount()); | |
1188 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
1189 | { | |
1190 | for (int i=0; i<10; i++) { | |
1191 | string dir = "dir."; | |
1192 | dir.append(to_string(i)); | |
1193 | ASSERT_EQ(0, fs.mkdir(dir)); | |
1194 | for (int j=0; j<10; j++) { | |
1195 | string file = "file."; | |
1196 | file.append(to_string(j)); | |
1197 | BlueFS::FileWriter *h; | |
1198 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); | |
1199 | ASSERT_NE(nullptr, h); | |
1200 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
1201 | bufferlist bl; | |
1202 | std::unique_ptr<char[]> buf = gen_buffer(4096); | |
1203 | bufferptr bp = buffer::claim_char(4096, buf.get()); | |
1204 | bl.push_back(bp); | |
1205 | h->append(bl.c_str(), bl.length()); | |
1206 | fs.fsync(h); | |
1207 | } | |
1208 | } | |
1209 | } | |
1210 | { | |
1211 | for (int i=0; i<10; i+=2) { | |
1212 | string dir = "dir."; | |
1213 | dir.append(to_string(i)); | |
1214 | for (int j=0; j<10; j++) { | |
1215 | string file = "file."; | |
1216 | file.append(to_string(j)); | |
1217 | fs.unlink(dir, file); | |
1218 | fs.sync_metadata(false); | |
1219 | } | |
1220 | ASSERT_EQ(0, fs.rmdir(dir)); | |
1221 | fs.sync_metadata(false); | |
1222 | } | |
1223 | } | |
1224 | fs.compact_log(); | |
1225 | auto *logger = fs.get_perf_counters(); | |
1226 | ASSERT_EQ(logger->get(l_bluefs_alloc_shared_dev_fallbacks), 0); | |
1227 | ASSERT_EQ(logger->get(l_bluefs_alloc_shared_size_fallbacks), 0); | |
1228 | auto num_files = logger->get(l_bluefs_num_files); | |
1229 | fs.umount(); | |
1230 | ||
1231 | fs.mount(); | |
1232 | ASSERT_EQ(num_files, logger->get(l_bluefs_num_files)); | |
1233 | fs.umount(); | |
1234 | } | |
1235 | ||
20effc67 TL |
1236 | int main(int argc, char **argv) { |
1237 | auto args = argv_to_vec(argc, argv); | |
11fdf7f2 TL |
1238 | map<string,string> defaults = { |
1239 | { "debug_bluefs", "1/20" }, | |
1240 | { "debug_bdev", "1/20" } | |
1241 | }; | |
7c673cae | 1242 | |
11fdf7f2 | 1243 | auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT, |
7c673cae | 1244 | CODE_ENVIRONMENT_UTILITY, |
11fdf7f2 | 1245 | CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); |
7c673cae | 1246 | common_init_finish(g_ceph_context); |
11fdf7f2 | 1247 | g_ceph_context->_conf.set_val( |
7c673cae FG |
1248 | "enable_experimental_unrecoverable_data_corrupting_features", |
1249 | "*"); | |
11fdf7f2 | 1250 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
1251 | |
1252 | ::testing::InitGoogleTest(&argc, argv); | |
1253 | return RUN_ALL_TESTS(); | |
1254 | } |