]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <stdio.h> | |
5 | #include <string.h> | |
6 | #include <iostream> | |
7 | #include <time.h> | |
8 | #include <fcntl.h> | |
9 | #include <unistd.h> | |
91327a77 | 10 | #include <random> |
7c673cae | 11 | #include <thread> |
f6b5b4d7 | 12 | #include <stack> |
7c673cae FG |
13 | #include "global/global_init.h" |
14 | #include "common/ceph_argparse.h" | |
15 | #include "include/stringify.h" | |
11fdf7f2 | 16 | #include "include/scope_guard.h" |
7c673cae FG |
17 | #include "common/errno.h" |
18 | #include <gtest/gtest.h> | |
19 | ||
20 | #include "os/bluestore/BlueFS.h" | |
21 | ||
20effc67 TL |
22 | using namespace std; |
23 | ||
11fdf7f2 | 24 | std::unique_ptr<char[]> gen_buffer(uint64_t size) |
7c673cae | 25 | { |
11fdf7f2 TL |
26 | std::unique_ptr<char[]> buffer = std::make_unique<char[]>(size); |
27 | std::independent_bits_engine<std::default_random_engine, CHAR_BIT, unsigned char> e; | |
28 | std::generate(buffer.get(), buffer.get()+size, std::ref(e)); | |
7c673cae FG |
29 | return buffer; |
30 | } | |
31 | ||
9f95a23c TL |
32 | class TempBdev { |
33 | public: | |
34 | TempBdev(uint64_t size) | |
35 | : path{get_temp_bdev(size)} | |
36 | {} | |
37 | ~TempBdev() { | |
38 | rm_temp_bdev(path); | |
39 | } | |
40 | const std::string path; | |
41 | private: | |
42 | static string get_temp_bdev(uint64_t size) | |
43 | { | |
44 | static int n = 0; | |
45 | string fn = "ceph_test_bluefs.tmp.block." + stringify(getpid()) | |
46 | + "." + stringify(++n); | |
47 | int fd = ::open(fn.c_str(), O_CREAT|O_RDWR|O_TRUNC, 0644); | |
48 | ceph_assert(fd >= 0); | |
49 | int r = ::ftruncate(fd, size); | |
50 | ceph_assert(r >= 0); | |
51 | ::close(fd); | |
52 | return fn; | |
53 | } | |
54 | static void rm_temp_bdev(string f) | |
55 | { | |
56 | ::unlink(f.c_str()); | |
57 | } | |
58 | }; | |
7c673cae | 59 | |
f6b5b4d7 TL |
60 | class ConfSaver { |
61 | std::stack<std::pair<std::string, std::string>> saved_settings; | |
62 | ConfigProxy& conf; | |
63 | public: | |
64 | ConfSaver(ConfigProxy& conf) : conf(conf) { | |
65 | conf._clear_safe_to_start_threads(); | |
66 | }; | |
67 | ~ConfSaver() { | |
68 | conf._clear_safe_to_start_threads(); | |
69 | while(saved_settings.size() > 0) { | |
70 | auto& e = saved_settings.top(); | |
71 | conf.set_val_or_die(e.first, e.second); | |
72 | saved_settings.pop(); | |
73 | } | |
74 | conf.set_safe_to_start_threads(); | |
75 | conf.apply_changes(nullptr); | |
76 | } | |
77 | void SetVal(const char* key, const char* val) { | |
78 | std::string skey(key); | |
79 | std::string prev_val; | |
80 | conf.get_val(skey, &prev_val); | |
81 | conf.set_val_or_die(skey, val); | |
82 | saved_settings.emplace(skey, prev_val); | |
83 | } | |
84 | void ApplyChanges() { | |
85 | conf.set_safe_to_start_threads(); | |
86 | conf.apply_changes(nullptr); | |
87 | } | |
88 | }; | |
89 | ||
7c673cae FG |
90 | TEST(BlueFS, mkfs) { |
91 | uint64_t size = 1048576 * 128; | |
9f95a23c | 92 | TempBdev bdev{size}; |
7c673cae FG |
93 | uuid_d fsid; |
94 | BlueFS fs(g_ceph_context); | |
f67539c2 | 95 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
9f95a23c | 96 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
97 | } |
98 | ||
99 | TEST(BlueFS, mkfs_mount) { | |
100 | uint64_t size = 1048576 * 128; | |
9f95a23c | 101 | TempBdev bdev{size}; |
7c673cae | 102 | BlueFS fs(g_ceph_context); |
f67539c2 | 103 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 104 | uuid_d fsid; |
9f95a23c | 105 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 106 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 107 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
108 | ASSERT_EQ(fs.get_total(BlueFS::BDEV_DB), size - 1048576); |
109 | ASSERT_LT(fs.get_free(BlueFS::BDEV_DB), size - 1048576); | |
110 | fs.umount(); | |
9f95a23c TL |
111 | } |
112 | ||
7c673cae FG |
113 | TEST(BlueFS, write_read) { |
114 | uint64_t size = 1048576 * 128; | |
9f95a23c | 115 | TempBdev bdev{size}; |
7c673cae | 116 | BlueFS fs(g_ceph_context); |
f67539c2 | 117 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 118 | uuid_d fsid; |
9f95a23c | 119 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 120 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 121 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
122 | { |
123 | BlueFS::FileWriter *h; | |
124 | ASSERT_EQ(0, fs.mkdir("dir")); | |
125 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
126 | h->append("foo", 3); | |
127 | h->append("bar", 3); | |
128 | h->append("baz", 3); | |
129 | fs.fsync(h); | |
130 | fs.close_writer(h); | |
131 | } | |
132 | { | |
133 | BlueFS::FileReader *h; | |
134 | ASSERT_EQ(0, fs.open_for_read("dir", "file", &h)); | |
135 | bufferlist bl; | |
f67539c2 | 136 | ASSERT_EQ(9, fs.read(h, 0, 1024, &bl, NULL)); |
7c673cae FG |
137 | ASSERT_EQ(0, strncmp("foobarbaz", bl.c_str(), 9)); |
138 | delete h; | |
139 | } | |
140 | fs.umount(); | |
7c673cae FG |
141 | } |
142 | ||
143 | TEST(BlueFS, small_appends) { | |
144 | uint64_t size = 1048576 * 128; | |
9f95a23c | 145 | TempBdev bdev{size}; |
7c673cae | 146 | BlueFS fs(g_ceph_context); |
f67539c2 | 147 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 148 | uuid_d fsid; |
9f95a23c | 149 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 150 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 151 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
152 | { |
153 | BlueFS::FileWriter *h; | |
154 | ASSERT_EQ(0, fs.mkdir("dir")); | |
155 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
156 | for (unsigned i = 0; i < 10000; ++i) { | |
157 | h->append("abcdeabcdeabcdeabcdeabcdeabc", 23); | |
158 | } | |
159 | fs.fsync(h); | |
160 | fs.close_writer(h); | |
161 | } | |
162 | { | |
163 | BlueFS::FileWriter *h; | |
164 | ASSERT_EQ(0, fs.open_for_write("dir", "file_sync", &h, false)); | |
165 | for (unsigned i = 0; i < 1000; ++i) { | |
166 | h->append("abcdeabcdeabcdeabcdeabcdeabc", 23); | |
31f18b77 | 167 | ASSERT_EQ(0, fs.fsync(h)); |
7c673cae FG |
168 | } |
169 | fs.close_writer(h); | |
170 | } | |
171 | fs.umount(); | |
7c673cae FG |
172 | } |
173 | ||
494da23a | 174 | TEST(BlueFS, very_large_write) { |
adb31ebb | 175 | // we'll write a ~5G file, so allocate more than that for the whole fs |
cd265ab1 | 176 | uint64_t size = 1048576 * 1024 * 6ull; |
9f95a23c | 177 | TempBdev bdev{size}; |
494da23a TL |
178 | BlueFS fs(g_ceph_context); |
179 | ||
180 | bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io"); | |
181 | g_ceph_context->_conf.set_val("bluefs_buffered_io", "false"); | |
adb31ebb | 182 | uint64_t total_written = 0; |
494da23a | 183 | |
f67539c2 | 184 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
494da23a | 185 | uuid_d fsid; |
9f95a23c | 186 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
494da23a | 187 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 188 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
494da23a TL |
189 | char buf[1048571]; // this is biggish, but intentionally not evenly aligned |
190 | for (unsigned i = 0; i < sizeof(buf); ++i) { | |
191 | buf[i] = i; | |
192 | } | |
193 | { | |
194 | BlueFS::FileWriter *h; | |
195 | ASSERT_EQ(0, fs.mkdir("dir")); | |
196 | ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false)); | |
197 | for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) { | |
198 | h->append(buf, sizeof(buf)); | |
adb31ebb TL |
199 | total_written += sizeof(buf); |
200 | } | |
201 | fs.fsync(h); | |
202 | for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) { | |
203 | h->append(buf, sizeof(buf)); | |
204 | total_written += sizeof(buf); | |
494da23a TL |
205 | } |
206 | fs.fsync(h); | |
207 | fs.close_writer(h); | |
208 | } | |
209 | { | |
210 | BlueFS::FileReader *h; | |
211 | ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h)); | |
212 | bufferlist bl; | |
adb31ebb | 213 | ASSERT_EQ(h->file->fnode.size, total_written); |
494da23a TL |
214 | for (unsigned i = 0; i < 3*1024*1048576ull / sizeof(buf); ++i) { |
215 | bl.clear(); | |
f67539c2 | 216 | fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL); |
494da23a TL |
217 | int r = memcmp(buf, bl.c_str(), sizeof(buf)); |
218 | if (r) { | |
219 | cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r | |
220 | << std::endl; | |
221 | } | |
222 | ASSERT_EQ(0, r); | |
223 | } | |
adb31ebb TL |
224 | for (unsigned i = 0; i < 2*1024*1048576ull / sizeof(buf); ++i) { |
225 | bl.clear(); | |
f67539c2 | 226 | fs.read(h, i * sizeof(buf), sizeof(buf), &bl, NULL); |
adb31ebb TL |
227 | int r = memcmp(buf, bl.c_str(), sizeof(buf)); |
228 | if (r) { | |
229 | cerr << "read got mismatch at offset " << i*sizeof(buf) << " r " << r | |
230 | << std::endl; | |
231 | } | |
232 | ASSERT_EQ(0, r); | |
233 | } | |
234 | delete h; | |
235 | ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h)); | |
236 | ASSERT_EQ(h->file->fnode.size, total_written); | |
237 | unique_ptr<char> huge_buf(new char[h->file->fnode.size]); | |
238 | auto l = h->file->fnode.size; | |
f67539c2 TL |
239 | int64_t r = fs.read(h, 0, l, NULL, huge_buf.get()); |
240 | ASSERT_EQ(r, l); | |
cd265ab1 TL |
241 | delete h; |
242 | } | |
243 | fs.umount(); | |
244 | ||
245 | g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old)); | |
246 | } | |
247 | ||
248 | TEST(BlueFS, very_large_write2) { | |
249 | // we'll write a ~5G file, so allocate more than that for the whole fs | |
250 | uint64_t size_full = 1048576 * 1024 * 6ull; | |
251 | uint64_t size = 1048576 * 1024 * 5ull; | |
252 | TempBdev bdev{ size_full }; | |
253 | BlueFS fs(g_ceph_context); | |
254 | ||
255 | bool old = g_ceph_context->_conf.get_val<bool>("bluefs_buffered_io"); | |
256 | g_ceph_context->_conf.set_val("bluefs_buffered_io", "false"); | |
257 | uint64_t total_written = 0; | |
258 | ||
259 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); | |
cd265ab1 TL |
260 | uuid_d fsid; |
261 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
262 | ASSERT_EQ(0, fs.mount()); | |
263 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
264 | ||
265 | char fill_arr[1 << 20]; // 1M | |
266 | for (size_t i = 0; i < sizeof(fill_arr); ++i) { | |
267 | fill_arr[i] = (char)i; | |
268 | } | |
269 | std::unique_ptr<char[]> buf; | |
270 | buf.reset(new char[size]); | |
271 | for (size_t i = 0; i < size; i += sizeof(fill_arr)) { | |
272 | memcpy(buf.get() + i, fill_arr, sizeof(fill_arr)); | |
273 | } | |
274 | { | |
275 | BlueFS::FileWriter* h; | |
276 | ASSERT_EQ(0, fs.mkdir("dir")); | |
277 | ASSERT_EQ(0, fs.open_for_write("dir", "bigfile", &h, false)); | |
278 | fs.append_try_flush(h, buf.get(), size); | |
279 | total_written = size; | |
280 | fs.fsync(h); | |
281 | fs.close_writer(h); | |
282 | } | |
283 | memset(buf.get(), 0, size); | |
284 | { | |
285 | BlueFS::FileReader* h; | |
286 | ASSERT_EQ(0, fs.open_for_read("dir", "bigfile", &h)); | |
287 | ASSERT_EQ(h->file->fnode.size, total_written); | |
288 | auto l = h->file->fnode.size; | |
f67539c2 TL |
289 | int64_t r = fs.read(h, 0, l, NULL, buf.get()); |
290 | ASSERT_EQ(r, l); | |
cd265ab1 TL |
291 | for (size_t i = 0; i < size; i += sizeof(fill_arr)) { |
292 | ceph_assert(memcmp(buf.get() + i, fill_arr, sizeof(fill_arr)) == 0); | |
293 | } | |
494da23a TL |
294 | delete h; |
295 | } | |
296 | fs.umount(); | |
297 | ||
298 | g_ceph_context->_conf.set_val("bluefs_buffered_io", stringify((int)old)); | |
494da23a TL |
299 | } |
300 | ||
7c673cae FG |
301 | #define ALLOC_SIZE 4096 |
302 | ||
303 | void write_data(BlueFS &fs, uint64_t rationed_bytes) | |
304 | { | |
7c673cae FG |
305 | int j=0, r=0; |
306 | uint64_t written_bytes = 0; | |
307 | rationed_bytes -= ALLOC_SIZE; | |
308 | stringstream ss; | |
309 | string dir = "dir."; | |
310 | ss << std::this_thread::get_id(); | |
311 | dir.append(ss.str()); | |
312 | dir.append("."); | |
313 | dir.append(to_string(j)); | |
314 | ASSERT_EQ(0, fs.mkdir(dir)); | |
315 | while (1) { | |
316 | string file = "file."; | |
317 | file.append(to_string(j)); | |
11fdf7f2 | 318 | BlueFS::FileWriter *h; |
7c673cae | 319 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
320 | ASSERT_NE(nullptr, h); |
321 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 322 | bufferlist bl; |
11fdf7f2 TL |
323 | std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE); |
324 | bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get()); | |
7c673cae FG |
325 | bl.push_back(bp); |
326 | h->append(bl.c_str(), bl.length()); | |
327 | r = fs.fsync(h); | |
328 | if (r < 0) { | |
7c673cae FG |
329 | break; |
330 | } | |
11fdf7f2 | 331 | written_bytes += g_conf()->bluefs_alloc_size; |
7c673cae | 332 | j++; |
11fdf7f2 | 333 | if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) { |
7c673cae FG |
334 | break; |
335 | } | |
336 | } | |
337 | } | |
338 | ||
339 | void create_single_file(BlueFS &fs) | |
340 | { | |
341 | BlueFS::FileWriter *h; | |
342 | stringstream ss; | |
343 | string dir = "dir.test"; | |
344 | ASSERT_EQ(0, fs.mkdir(dir)); | |
345 | string file = "testfile"; | |
346 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); | |
347 | bufferlist bl; | |
11fdf7f2 TL |
348 | std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE); |
349 | bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get()); | |
7c673cae FG |
350 | bl.push_back(bp); |
351 | h->append(bl.c_str(), bl.length()); | |
352 | fs.fsync(h); | |
353 | fs.close_writer(h); | |
354 | } | |
355 | ||
356 | void write_single_file(BlueFS &fs, uint64_t rationed_bytes) | |
357 | { | |
7c673cae | 358 | stringstream ss; |
11fdf7f2 TL |
359 | const string dir = "dir.test"; |
360 | const string file = "testfile"; | |
7c673cae FG |
361 | uint64_t written_bytes = 0; |
362 | rationed_bytes -= ALLOC_SIZE; | |
363 | while (1) { | |
11fdf7f2 | 364 | BlueFS::FileWriter *h; |
7c673cae | 365 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
366 | ASSERT_NE(nullptr, h); |
367 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 368 | bufferlist bl; |
11fdf7f2 TL |
369 | std::unique_ptr<char[]> buf = gen_buffer(ALLOC_SIZE); |
370 | bufferptr bp = buffer::claim_char(ALLOC_SIZE, buf.get()); | |
7c673cae FG |
371 | bl.push_back(bp); |
372 | h->append(bl.c_str(), bl.length()); | |
11fdf7f2 | 373 | int r = fs.fsync(h); |
7c673cae | 374 | if (r < 0) { |
7c673cae FG |
375 | break; |
376 | } | |
11fdf7f2 TL |
377 | written_bytes += g_conf()->bluefs_alloc_size; |
378 | if ((rationed_bytes - written_bytes) <= g_conf()->bluefs_alloc_size) { | |
7c673cae FG |
379 | break; |
380 | } | |
381 | } | |
382 | } | |
383 | ||
384 | bool writes_done = false; | |
385 | ||
386 | void sync_fs(BlueFS &fs) | |
387 | { | |
388 | while (1) { | |
389 | if (writes_done == true) | |
390 | break; | |
1911f103 | 391 | fs.sync_metadata(false); |
7c673cae FG |
392 | sleep(1); |
393 | } | |
394 | } | |
395 | ||
396 | ||
397 | void do_join(std::thread& t) | |
398 | { | |
399 | t.join(); | |
400 | } | |
401 | ||
402 | void join_all(std::vector<std::thread>& v) | |
403 | { | |
404 | std::for_each(v.begin(),v.end(),do_join); | |
405 | } | |
406 | ||
407 | #define NUM_WRITERS 3 | |
408 | #define NUM_SYNC_THREADS 1 | |
409 | ||
410 | #define NUM_SINGLE_FILE_WRITERS 1 | |
411 | #define NUM_MULTIPLE_FILE_WRITERS 2 | |
412 | ||
413 | TEST(BlueFS, test_flush_1) { | |
414 | uint64_t size = 1048576 * 128; | |
9f95a23c | 415 | TempBdev bdev{size}; |
11fdf7f2 | 416 | g_ceph_context->_conf.set_val( |
7c673cae FG |
417 | "bluefs_alloc_size", |
418 | "65536"); | |
11fdf7f2 | 419 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
420 | |
421 | BlueFS fs(g_ceph_context); | |
f67539c2 | 422 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 423 | uuid_d fsid; |
9f95a23c | 424 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 425 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 426 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
427 | { |
428 | std::vector<std::thread> write_thread_multiple; | |
429 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
430 | uint64_t per_thread_bytes = (effective_size/(NUM_MULTIPLE_FILE_WRITERS + NUM_SINGLE_FILE_WRITERS)); | |
431 | for (int i=0; i<NUM_MULTIPLE_FILE_WRITERS ; i++) { | |
432 | write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
433 | } | |
434 | ||
435 | create_single_file(fs); | |
436 | std::vector<std::thread> write_thread_single; | |
437 | for (int i=0; i<NUM_SINGLE_FILE_WRITERS; i++) { | |
438 | write_thread_single.push_back(std::thread(write_single_file, std::ref(fs), per_thread_bytes)); | |
439 | } | |
440 | ||
441 | join_all(write_thread_single); | |
442 | join_all(write_thread_multiple); | |
443 | } | |
444 | fs.umount(); | |
7c673cae FG |
445 | } |
446 | ||
447 | TEST(BlueFS, test_flush_2) { | |
448 | uint64_t size = 1048576 * 256; | |
9f95a23c | 449 | TempBdev bdev{size}; |
11fdf7f2 | 450 | g_ceph_context->_conf.set_val( |
7c673cae FG |
451 | "bluefs_alloc_size", |
452 | "65536"); | |
11fdf7f2 | 453 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
454 | |
455 | BlueFS fs(g_ceph_context); | |
f67539c2 | 456 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 457 | uuid_d fsid; |
9f95a23c | 458 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 459 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 460 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
461 | { |
462 | uint64_t effective_size = size - (128 * 1048576); // leaving the last 32 MB for log compaction | |
463 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
464 | std::vector<std::thread> write_thread_multiple; | |
465 | for (int i=0; i<NUM_WRITERS; i++) { | |
466 | write_thread_multiple.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
467 | } | |
468 | ||
469 | join_all(write_thread_multiple); | |
470 | } | |
471 | fs.umount(); | |
7c673cae FG |
472 | } |
473 | ||
474 | TEST(BlueFS, test_flush_3) { | |
475 | uint64_t size = 1048576 * 256; | |
9f95a23c | 476 | TempBdev bdev{size}; |
11fdf7f2 | 477 | g_ceph_context->_conf.set_val( |
7c673cae FG |
478 | "bluefs_alloc_size", |
479 | "65536"); | |
11fdf7f2 | 480 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
481 | |
482 | BlueFS fs(g_ceph_context); | |
f67539c2 | 483 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 484 | uuid_d fsid; |
9f95a23c | 485 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 486 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 487 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
488 | { |
489 | std::vector<std::thread> write_threads; | |
490 | uint64_t effective_size = size - (64 * 1048576); // leaving the last 11 MB for log compaction | |
491 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
492 | for (int i=0; i<NUM_WRITERS; i++) { | |
493 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
494 | } | |
495 | ||
496 | std::vector<std::thread> sync_threads; | |
497 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
498 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
499 | } | |
500 | ||
501 | join_all(write_threads); | |
502 | writes_done = true; | |
503 | join_all(sync_threads); | |
504 | } | |
505 | fs.umount(); | |
7c673cae FG |
506 | } |
507 | ||
508 | TEST(BlueFS, test_simple_compaction_sync) { | |
11fdf7f2 | 509 | g_ceph_context->_conf.set_val( |
7c673cae FG |
510 | "bluefs_compact_log_sync", |
511 | "true"); | |
512 | uint64_t size = 1048576 * 128; | |
9f95a23c | 513 | TempBdev bdev{size}; |
7c673cae FG |
514 | |
515 | BlueFS fs(g_ceph_context); | |
f67539c2 | 516 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 517 | uuid_d fsid; |
9f95a23c | 518 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 519 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 520 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae | 521 | { |
7c673cae FG |
522 | for (int i=0; i<10; i++) { |
523 | string dir = "dir."; | |
524 | dir.append(to_string(i)); | |
525 | ASSERT_EQ(0, fs.mkdir(dir)); | |
526 | for (int j=0; j<10; j++) { | |
527 | string file = "file."; | |
528 | file.append(to_string(j)); | |
11fdf7f2 | 529 | BlueFS::FileWriter *h; |
7c673cae | 530 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
531 | ASSERT_NE(nullptr, h); |
532 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 533 | bufferlist bl; |
11fdf7f2 TL |
534 | std::unique_ptr<char[]> buf = gen_buffer(4096); |
535 | bufferptr bp = buffer::claim_char(4096, buf.get()); | |
7c673cae FG |
536 | bl.push_back(bp); |
537 | h->append(bl.c_str(), bl.length()); | |
538 | fs.fsync(h); | |
7c673cae FG |
539 | } |
540 | } | |
541 | } | |
7c673cae FG |
542 | { |
543 | for (int i=0; i<10; i+=2) { | |
544 | string dir = "dir."; | |
545 | dir.append(to_string(i)); | |
11fdf7f2 | 546 | for (int j=0; j<10; j++) { |
7c673cae FG |
547 | string file = "file."; |
548 | file.append(to_string(j)); | |
549 | fs.unlink(dir, file); | |
1911f103 | 550 | fs.sync_metadata(false); |
7c673cae | 551 | } |
11fdf7f2 | 552 | ASSERT_EQ(0, fs.rmdir(dir)); |
1911f103 | 553 | fs.sync_metadata(false); |
7c673cae FG |
554 | } |
555 | } | |
556 | fs.compact_log(); | |
557 | fs.umount(); | |
7c673cae FG |
558 | } |
559 | ||
560 | TEST(BlueFS, test_simple_compaction_async) { | |
11fdf7f2 | 561 | g_ceph_context->_conf.set_val( |
7c673cae FG |
562 | "bluefs_compact_log_sync", |
563 | "false"); | |
564 | uint64_t size = 1048576 * 128; | |
9f95a23c | 565 | TempBdev bdev{size}; |
7c673cae FG |
566 | |
567 | BlueFS fs(g_ceph_context); | |
f67539c2 | 568 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 569 | uuid_d fsid; |
9f95a23c | 570 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 571 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 572 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae | 573 | { |
7c673cae FG |
574 | for (int i=0; i<10; i++) { |
575 | string dir = "dir."; | |
576 | dir.append(to_string(i)); | |
577 | ASSERT_EQ(0, fs.mkdir(dir)); | |
578 | for (int j=0; j<10; j++) { | |
579 | string file = "file."; | |
580 | file.append(to_string(j)); | |
11fdf7f2 | 581 | BlueFS::FileWriter *h; |
7c673cae | 582 | ASSERT_EQ(0, fs.open_for_write(dir, file, &h, false)); |
11fdf7f2 TL |
583 | ASSERT_NE(nullptr, h); |
584 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
7c673cae | 585 | bufferlist bl; |
11fdf7f2 TL |
586 | std::unique_ptr<char[]> buf = gen_buffer(4096); |
587 | bufferptr bp = buffer::claim_char(4096, buf.get()); | |
7c673cae FG |
588 | bl.push_back(bp); |
589 | h->append(bl.c_str(), bl.length()); | |
590 | fs.fsync(h); | |
7c673cae FG |
591 | } |
592 | } | |
593 | } | |
7c673cae FG |
594 | { |
595 | for (int i=0; i<10; i+=2) { | |
596 | string dir = "dir."; | |
597 | dir.append(to_string(i)); | |
11fdf7f2 | 598 | for (int j=0; j<10; j++) { |
7c673cae FG |
599 | string file = "file."; |
600 | file.append(to_string(j)); | |
601 | fs.unlink(dir, file); | |
1911f103 | 602 | fs.sync_metadata(false); |
7c673cae | 603 | } |
11fdf7f2 | 604 | ASSERT_EQ(0, fs.rmdir(dir)); |
1911f103 | 605 | fs.sync_metadata(false); |
7c673cae FG |
606 | } |
607 | } | |
608 | fs.compact_log(); | |
609 | fs.umount(); | |
7c673cae FG |
610 | } |
611 | ||
612 | TEST(BlueFS, test_compaction_sync) { | |
613 | uint64_t size = 1048576 * 128; | |
9f95a23c | 614 | TempBdev bdev{size}; |
11fdf7f2 | 615 | g_ceph_context->_conf.set_val( |
7c673cae FG |
616 | "bluefs_alloc_size", |
617 | "65536"); | |
11fdf7f2 | 618 | g_ceph_context->_conf.set_val( |
7c673cae FG |
619 | "bluefs_compact_log_sync", |
620 | "true"); | |
1d09f67e TL |
621 | const char* canary_dir = "dir.after_compact_test"; |
622 | const char* canary_file = "file.after_compact_test"; | |
623 | const char* canary_data = "some random data"; | |
7c673cae FG |
624 | |
625 | BlueFS fs(g_ceph_context); | |
f67539c2 | 626 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 627 | uuid_d fsid; |
9f95a23c | 628 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 629 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 630 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
631 | { |
632 | std::vector<std::thread> write_threads; | |
633 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
634 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
635 | for (int i=0; i<NUM_WRITERS; i++) { | |
636 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
637 | } | |
638 | ||
639 | std::vector<std::thread> sync_threads; | |
640 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
641 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
642 | } | |
643 | ||
644 | join_all(write_threads); | |
645 | writes_done = true; | |
646 | join_all(sync_threads); | |
647 | fs.compact_log(); | |
1d09f67e TL |
648 | |
649 | { | |
650 | ASSERT_EQ(0, fs.mkdir(canary_dir)); | |
651 | BlueFS::FileWriter *h; | |
652 | ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false)); | |
653 | ASSERT_NE(nullptr, h); | |
654 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
655 | h->append(canary_data, strlen(canary_data)); | |
656 | int r = fs.fsync(h); | |
657 | ASSERT_EQ(r, 0); | |
658 | } | |
659 | } | |
660 | fs.umount(); | |
661 | ||
662 | fs.mount(); | |
663 | { | |
664 | BlueFS::FileReader *h; | |
665 | ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h)); | |
666 | ASSERT_NE(nullptr, h); | |
667 | bufferlist bl; | |
668 | ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL)); | |
669 | std::cout << bl.c_str() << std::endl; | |
670 | ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data))); | |
671 | delete h; | |
7c673cae FG |
672 | } |
673 | fs.umount(); | |
7c673cae FG |
674 | } |
675 | ||
676 | TEST(BlueFS, test_compaction_async) { | |
677 | uint64_t size = 1048576 * 128; | |
9f95a23c | 678 | TempBdev bdev{size}; |
11fdf7f2 | 679 | g_ceph_context->_conf.set_val( |
7c673cae FG |
680 | "bluefs_alloc_size", |
681 | "65536"); | |
11fdf7f2 | 682 | g_ceph_context->_conf.set_val( |
7c673cae FG |
683 | "bluefs_compact_log_sync", |
684 | "false"); | |
1d09f67e TL |
685 | const char* canary_dir = "dir.after_compact_test"; |
686 | const char* canary_file = "file.after_compact_test"; | |
687 | const char* canary_data = "some random data"; | |
7c673cae FG |
688 | |
689 | BlueFS fs(g_ceph_context); | |
f67539c2 | 690 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 691 | uuid_d fsid; |
9f95a23c | 692 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 693 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 694 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
695 | { |
696 | std::vector<std::thread> write_threads; | |
697 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
698 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
699 | for (int i=0; i<NUM_WRITERS; i++) { | |
700 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
701 | } | |
702 | ||
703 | std::vector<std::thread> sync_threads; | |
704 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
705 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
706 | } | |
707 | ||
708 | join_all(write_threads); | |
709 | writes_done = true; | |
710 | join_all(sync_threads); | |
711 | fs.compact_log(); | |
1d09f67e TL |
712 | |
713 | { | |
714 | ASSERT_EQ(0, fs.mkdir(canary_dir)); | |
715 | BlueFS::FileWriter *h; | |
716 | ASSERT_EQ(0, fs.open_for_write(canary_dir, canary_file, &h, false)); | |
717 | ASSERT_NE(nullptr, h); | |
718 | auto sg = make_scope_guard([&fs, h] { fs.close_writer(h); }); | |
719 | h->append(canary_data, strlen(canary_data)); | |
720 | int r = fs.fsync(h); | |
721 | ASSERT_EQ(r, 0); | |
722 | } | |
723 | } | |
724 | fs.umount(); | |
725 | ||
726 | fs.mount(); | |
727 | { | |
728 | BlueFS::FileReader *h; | |
729 | ASSERT_EQ(0, fs.open_for_read(canary_dir, canary_file, &h)); | |
730 | ASSERT_NE(nullptr, h); | |
731 | bufferlist bl; | |
732 | ASSERT_EQ(strlen(canary_data), fs.read(h, 0, 1024, &bl, NULL)); | |
733 | std::cout << bl.c_str() << std::endl; | |
734 | ASSERT_EQ(0, strncmp(canary_data, bl.c_str(), strlen(canary_data))); | |
735 | delete h; | |
7c673cae FG |
736 | } |
737 | fs.umount(); | |
7c673cae FG |
738 | } |
739 | ||
740 | TEST(BlueFS, test_replay) { | |
741 | uint64_t size = 1048576 * 128; | |
9f95a23c | 742 | TempBdev bdev{size}; |
11fdf7f2 | 743 | g_ceph_context->_conf.set_val( |
7c673cae FG |
744 | "bluefs_alloc_size", |
745 | "65536"); | |
11fdf7f2 | 746 | g_ceph_context->_conf.set_val( |
7c673cae FG |
747 | "bluefs_compact_log_sync", |
748 | "false"); | |
749 | ||
750 | BlueFS fs(g_ceph_context); | |
f67539c2 | 751 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
7c673cae | 752 | uuid_d fsid; |
9f95a23c | 753 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); |
7c673cae | 754 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 755 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae FG |
756 | { |
757 | std::vector<std::thread> write_threads; | |
758 | uint64_t effective_size = size - (32 * 1048576); // leaving the last 32 MB for log compaction | |
759 | uint64_t per_thread_bytes = (effective_size/(NUM_WRITERS)); | |
760 | for (int i=0; i<NUM_WRITERS; i++) { | |
761 | write_threads.push_back(std::thread(write_data, std::ref(fs), per_thread_bytes)); | |
762 | } | |
763 | ||
764 | std::vector<std::thread> sync_threads; | |
765 | for (int i=0; i<NUM_SYNC_THREADS; i++) { | |
766 | sync_threads.push_back(std::thread(sync_fs, std::ref(fs))); | |
767 | } | |
768 | ||
769 | join_all(write_threads); | |
770 | writes_done = true; | |
771 | join_all(sync_threads); | |
772 | fs.compact_log(); | |
773 | } | |
774 | fs.umount(); | |
775 | // remount and check log can replay safe? | |
31f18b77 | 776 | ASSERT_EQ(0, fs.mount()); |
9f95a23c | 777 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); |
7c673cae | 778 | fs.umount(); |
7c673cae FG |
779 | } |
780 | ||
f6b5b4d7 TL |
781 | TEST(BlueFS, test_replay_growth) { |
782 | uint64_t size = 1048576LL * (2 * 1024 + 128); | |
783 | TempBdev bdev{size}; | |
784 | ||
785 | ConfSaver conf(g_ceph_context->_conf); | |
786 | conf.SetVal("bluefs_alloc_size", "4096"); | |
787 | conf.SetVal("bluefs_shared_alloc_size", "4096"); | |
788 | conf.SetVal("bluefs_compact_log_sync", "false"); | |
789 | conf.SetVal("bluefs_min_log_runway", "32768"); | |
790 | conf.SetVal("bluefs_max_log_runway", "65536"); | |
791 | conf.SetVal("bluefs_allocator", "stupid"); | |
792 | conf.SetVal("bluefs_sync_write", "true"); | |
793 | conf.ApplyChanges(); | |
794 | ||
795 | BlueFS fs(g_ceph_context); | |
f67539c2 | 796 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); |
f6b5b4d7 TL |
797 | uuid_d fsid; |
798 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
799 | ASSERT_EQ(0, fs.mount()); | |
800 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
801 | ASSERT_EQ(0, fs.mkdir("dir")); | |
802 | ||
803 | char data[2000]; | |
804 | BlueFS::FileWriter *h; | |
805 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
806 | for (size_t i = 0; i < 10000; i++) { | |
807 | h->append(data, 2000); | |
808 | fs.fsync(h); | |
809 | } | |
810 | fs.close_writer(h); | |
811 | fs.umount(true); //do not compact on exit! | |
812 | ||
813 | // remount and check log can replay safe? | |
814 | ASSERT_EQ(0, fs.mount()); | |
815 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
816 | fs.umount(); | |
817 | } | |
818 | ||
522d829b TL |
819 | TEST(BlueFS, test_tracker_50965) { |
820 | uint64_t size_wal = 1048576 * 64; | |
821 | TempBdev bdev_wal{size_wal}; | |
822 | uint64_t size_db = 1048576 * 128; | |
823 | TempBdev bdev_db{size_db}; | |
824 | uint64_t size_slow = 1048576 * 256; | |
825 | TempBdev bdev_slow{size_slow}; | |
826 | ||
827 | ConfSaver conf(g_ceph_context->_conf); | |
828 | conf.SetVal("bluefs_min_flush_size", "65536"); | |
829 | conf.ApplyChanges(); | |
830 | ||
831 | BlueFS fs(g_ceph_context); | |
832 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0)); | |
833 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0)); | |
834 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0)); | |
835 | uuid_d fsid; | |
836 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); | |
837 | ASSERT_EQ(0, fs.mount()); | |
838 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); | |
839 | ||
840 | string dir_slow = "dir.slow"; | |
841 | ASSERT_EQ(0, fs.mkdir(dir_slow)); | |
842 | string dir_db = "dir_db"; | |
843 | ASSERT_EQ(0, fs.mkdir(dir_db)); | |
844 | ||
845 | string file_slow = "file"; | |
846 | BlueFS::FileWriter *h_slow; | |
847 | ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false)); | |
848 | ASSERT_NE(nullptr, h_slow); | |
849 | ||
850 | string file_db = "file"; | |
851 | BlueFS::FileWriter *h_db; | |
852 | ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false)); | |
853 | ASSERT_NE(nullptr, h_db); | |
854 | ||
855 | bufferlist bl1; | |
856 | std::unique_ptr<char[]> buf1 = gen_buffer(70000); | |
857 | bufferptr bp1 = buffer::claim_char(70000, buf1.get()); | |
858 | bl1.push_back(bp1); | |
859 | h_slow->append(bl1.c_str(), bl1.length()); | |
860 | fs.flush(h_slow); | |
861 | ||
862 | uint64_t h_slow_dirty_seq_1 = fs.debug_get_dirty_seq(h_slow); | |
863 | ||
864 | bufferlist bl2; | |
865 | std::unique_ptr<char[]> buf2 = gen_buffer(1000); | |
866 | bufferptr bp2 = buffer::claim_char(1000, buf2.get()); | |
867 | bl2.push_back(bp2); | |
868 | h_db->append(bl2.c_str(), bl2.length()); | |
869 | fs.fsync(h_db); | |
870 | ||
871 | uint64_t h_slow_dirty_seq_2 = fs.debug_get_dirty_seq(h_slow); | |
872 | bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW); | |
873 | ||
874 | //problem if allocations are stable in log but slow device is not flushed yet | |
875 | ASSERT_FALSE(h_slow_dirty_seq_1 != 0 && | |
876 | h_slow_dirty_seq_2 == 0 && | |
877 | h_slow_dev_dirty == true); | |
878 | ||
879 | fs.close_writer(h_slow); | |
880 | fs.close_writer(h_db); | |
881 | ||
882 | fs.umount(); | |
883 | } | |
884 | ||
20effc67 TL |
885 | TEST(BlueFS, test_truncate_stable_53129) { |
886 | ||
887 | ConfSaver conf(g_ceph_context->_conf); | |
888 | conf.SetVal("bluefs_min_flush_size", "65536"); | |
889 | conf.ApplyChanges(); | |
890 | ||
891 | uint64_t size_wal = 1048576 * 64; | |
892 | TempBdev bdev_wal{size_wal}; | |
893 | uint64_t size_db = 1048576 * 128; | |
894 | TempBdev bdev_db{size_db}; | |
895 | uint64_t size_slow = 1048576 * 256; | |
896 | TempBdev bdev_slow{size_slow}; | |
897 | ||
898 | BlueFS fs(g_ceph_context); | |
899 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_WAL, bdev_wal.path, false, 0)); | |
900 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev_db.path, false, 0)); | |
901 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_SLOW, bdev_slow.path, false, 0)); | |
902 | uuid_d fsid; | |
903 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, true, true })); | |
904 | ASSERT_EQ(0, fs.mount()); | |
905 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); | |
906 | ||
907 | string dir_slow = "dir.slow"; | |
908 | ASSERT_EQ(0, fs.mkdir(dir_slow)); | |
909 | string dir_db = "dir_db"; | |
910 | ASSERT_EQ(0, fs.mkdir(dir_db)); | |
911 | ||
912 | string file_slow = "file"; | |
913 | BlueFS::FileWriter *h_slow; | |
914 | ASSERT_EQ(0, fs.open_for_write(dir_slow, file_slow, &h_slow, false)); | |
915 | ASSERT_NE(nullptr, h_slow); | |
916 | ||
917 | string file_db = "file"; | |
918 | BlueFS::FileWriter *h_db; | |
919 | ASSERT_EQ(0, fs.open_for_write(dir_db, file_db, &h_db, false)); | |
920 | ASSERT_NE(nullptr, h_db); | |
921 | ||
922 | bufferlist bl1; | |
923 | std::unique_ptr<char[]> buf1 = gen_buffer(70000); | |
924 | bufferptr bp1 = buffer::claim_char(70000, buf1.get()); | |
925 | bl1.push_back(bp1); | |
926 | // add 70000 bytes | |
927 | h_slow->append(bl1.c_str(), bl1.length()); | |
928 | fs.flush(h_slow); | |
929 | // and truncate to 60000 bytes | |
930 | fs.truncate(h_slow, 60000); | |
931 | ||
932 | // write something to file on DB device | |
933 | bufferlist bl2; | |
934 | std::unique_ptr<char[]> buf2 = gen_buffer(1000); | |
935 | bufferptr bp2 = buffer::claim_char(1000, buf2.get()); | |
936 | bl2.push_back(bp2); | |
937 | h_db->append(bl2.c_str(), bl2.length()); | |
938 | // and force bluefs log to flush | |
939 | fs.fsync(h_db); | |
940 | ||
941 | // This is the actual test point. | |
942 | // We completed truncate, and we expect | |
943 | // - size to be 60000 | |
944 | // - data to be stable on slow device | |
945 | // OR | |
946 | // - size = 0 or file does not exist | |
947 | // - dev_dirty is irrelevant | |
948 | bool h_slow_dev_dirty = fs.debug_get_is_dev_dirty(h_slow, BlueFS::BDEV_SLOW); | |
949 | // Imagine power goes down here. | |
950 | ||
951 | fs.close_writer(h_slow); | |
952 | fs.close_writer(h_db); | |
953 | ||
954 | fs.umount(); | |
955 | ||
956 | ASSERT_EQ(0, fs.mount()); | |
957 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, true, true })); | |
958 | ||
959 | uint64_t size; | |
960 | utime_t mtime; | |
961 | ASSERT_EQ(0, fs.stat("dir.slow", "file", &size, &mtime)); | |
962 | // check file size 60000 | |
963 | ASSERT_EQ(size, 60000); | |
964 | // check that dev_dirty was false (data stable on media) | |
965 | ASSERT_EQ(h_slow_dev_dirty, false); | |
966 | ||
967 | fs.umount(); | |
968 | } | |
969 | ||
970 | TEST(BlueFS, test_update_ino1_delta_after_replay) { | |
971 | uint64_t size = 1048576LL * (2 * 1024 + 128); | |
972 | TempBdev bdev{size}; | |
973 | ||
974 | ConfSaver conf(g_ceph_context->_conf); | |
975 | conf.SetVal("bluefs_alloc_size", "4096"); | |
976 | conf.SetVal("bluefs_shared_alloc_size", "4096"); | |
977 | conf.SetVal("bluefs_compact_log_sync", "false"); | |
978 | conf.SetVal("bluefs_min_log_runway", "32768"); | |
979 | conf.SetVal("bluefs_max_log_runway", "65536"); | |
980 | conf.SetVal("bluefs_allocator", "stupid"); | |
981 | conf.ApplyChanges(); | |
982 | ||
983 | BlueFS fs(g_ceph_context); | |
984 | ASSERT_EQ(0, fs.add_block_device(BlueFS::BDEV_DB, bdev.path, false, 1048576)); | |
985 | uuid_d fsid; | |
986 | ASSERT_EQ(0, fs.mkfs(fsid, { BlueFS::BDEV_DB, false, false })); | |
987 | ASSERT_EQ(0, fs.mount()); | |
988 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
989 | ASSERT_EQ(0, fs.mkdir("dir")); | |
990 | ||
991 | char data[2000]; | |
992 | BlueFS::FileWriter *h; | |
993 | ASSERT_EQ(0, fs.open_for_write("dir", "file", &h, false)); | |
994 | for (size_t i = 0; i < 100; i++) { | |
995 | h->append(data, 2000); | |
996 | fs.fsync(h); | |
997 | } | |
998 | fs.close_writer(h); | |
999 | fs.umount(true); //do not compact on exit! | |
7c673cae | 1000 | |
20effc67 TL |
1001 | ASSERT_EQ(0, fs.mount()); |
1002 | ASSERT_EQ(0, fs.open_for_write("dir", "file2", &h, false)); | |
1003 | for (size_t i = 0; i < 100; i++) { | |
1004 | h->append(data, 2000); | |
1005 | fs.fsync(h); | |
1006 | } | |
1007 | fs.close_writer(h); | |
1008 | fs.umount(); | |
1009 | ||
1010 | // remount and check log can replay safe? | |
1011 | ASSERT_EQ(0, fs.mount()); | |
1012 | ASSERT_EQ(0, fs.maybe_verify_layout({ BlueFS::BDEV_DB, false, false })); | |
1013 | fs.umount(); | |
1014 | } | |
1015 | ||
1016 | int main(int argc, char **argv) { | |
1017 | auto args = argv_to_vec(argc, argv); | |
11fdf7f2 TL |
1018 | map<string,string> defaults = { |
1019 | { "debug_bluefs", "1/20" }, | |
1020 | { "debug_bdev", "1/20" } | |
1021 | }; | |
7c673cae | 1022 | |
11fdf7f2 | 1023 | auto cct = global_init(&defaults, args, CEPH_ENTITY_TYPE_CLIENT, |
7c673cae | 1024 | CODE_ENVIRONMENT_UTILITY, |
11fdf7f2 | 1025 | CINIT_FLAG_NO_DEFAULT_CONFIG_FILE); |
7c673cae | 1026 | common_init_finish(g_ceph_context); |
11fdf7f2 | 1027 | g_ceph_context->_conf.set_val( |
7c673cae FG |
1028 | "enable_experimental_unrecoverable_data_corrupting_features", |
1029 | "*"); | |
11fdf7f2 | 1030 | g_ceph_context->_conf.apply_changes(nullptr); |
7c673cae FG |
1031 | |
1032 | ::testing::InitGoogleTest(&argc, argv); | |
1033 | return RUN_ALL_TESTS(); | |
1034 | } |