]>
Commit | Line | Data |
---|---|---|
20effc67 TL |
1 | // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- |
2 | // vim: ts=8 sw=2 smarttab | |
3 | ||
4 | #include <sys/mman.h> | |
5 | #include <string.h> | |
6 | ||
7 | #include "crimson/common/log.h" | |
8 | ||
9 | #include "include/buffer.h" | |
10 | #include "nvmedevice.h" | |
11 | #include "include/interval_set.h" | |
12 | #include "include/intarith.h" | |
13 | #include "nvme_manager.h" | |
14 | ||
15 | namespace { | |
16 | seastar::logger& logger() { | |
17 | return crimson::get_logger(ceph_subsys_seastore_tm); | |
18 | } | |
19 | } | |
20 | ||
21 | namespace crimson::os::seastore { | |
22 | ||
23 | NVMeManager::write_ertr::future<> NVMeManager::rbm_sync_block_bitmap( | |
24 | rbm_bitmap_block_t &block, blk_id_t block_no) | |
25 | { | |
26 | bufferptr bptr; | |
27 | try { | |
28 | bptr = bufferptr(ceph::buffer::create_page_aligned(block.get_size())); | |
29 | bufferlist bl; | |
30 | encode(block, bl); | |
31 | auto iter = bl.cbegin(); | |
32 | iter.copy(block.get_size(), bptr.c_str()); | |
33 | } catch (const std::exception &e) { | |
34 | logger().error( | |
35 | "rmb_sync_block_bitmap: " | |
36 | "exception creating aligned buffer {}", | |
37 | e | |
38 | ); | |
39 | ceph_assert(0 == "unhandled exception"); | |
40 | } | |
41 | uint64_t bitmap_block_no = convert_block_no_to_bitmap_block(block_no); | |
42 | return device->write(super.start_alloc_area + | |
43 | bitmap_block_no * super.block_size, | |
44 | bptr); | |
45 | } | |
46 | ||
47 | NVMeManager::mkfs_ertr::future<> NVMeManager::initialize_blk_alloc_area() { | |
48 | auto start = super.start_data_area / super.block_size; | |
49 | logger().debug("initialize_alloc_area: start to read at {} ", start); | |
50 | ||
51 | /* write allocated bitmap info to rbm meta block */ | |
52 | rbm_bitmap_block_t b_block(super.block_size); | |
53 | alloc_rbm_bitmap_block_buf(b_block); | |
54 | for (uint64_t i = 0; i < start; i++) { | |
55 | b_block.set_bit(i); | |
56 | } | |
57 | ||
58 | // CRC calculation is offloaded to NVMeDevice if data protection is enabled. | |
59 | if (device->is_data_protection_enabled() == false) { | |
60 | b_block.set_crc(); | |
61 | } | |
62 | ||
63 | return rbm_sync_block_bitmap(b_block, | |
64 | super.start_alloc_area / super.block_size | |
65 | ).safe_then([this, b_block, start] () mutable { | |
66 | ||
67 | /* initialize bitmap blocks as unused */ | |
68 | auto max = max_block_by_bitmap_block(); | |
69 | auto max_block = super.size / super.block_size; | |
70 | blk_id_t end = round_up_to(max_block, max) - 1; | |
71 | logger().debug(" init start {} end {} ", start, end); | |
72 | return rbm_sync_block_bitmap_by_range( | |
73 | start, | |
74 | end, | |
75 | bitmap_op_types_t::ALL_CLEAR | |
76 | ).safe_then([this, b_block]() mutable { | |
77 | /* | |
78 | * Set rest of the block bitmap, which is not used, to 1 | |
79 | * To do so, we only mark 1 to empty bitmap blocks | |
80 | */ | |
81 | uint64_t na_block_no = super.size/super.block_size; | |
82 | uint64_t remain_block = na_block_no % max_block_by_bitmap_block(); | |
83 | logger().debug(" na_block_no: {}, remain_block: {} ", | |
84 | na_block_no, remain_block); | |
85 | if (remain_block) { | |
86 | logger().debug(" try to remained write alloc info "); | |
87 | if (na_block_no > max_block_by_bitmap_block()) { | |
88 | b_block.buf.clear(); | |
89 | alloc_rbm_bitmap_block_buf(b_block); | |
90 | } | |
91 | for (uint64_t i = remain_block; i < max_block_by_bitmap_block(); i++) { | |
92 | b_block.set_bit(i); | |
93 | } | |
94 | b_block.set_crc(); | |
95 | return rbm_sync_block_bitmap(b_block, na_block_no | |
96 | ).handle_error( | |
97 | mkfs_ertr::pass_further{}, | |
98 | crimson::ct_error::assert_all{ | |
99 | "Invalid error rbm_sync_block_bitmap to update \ | |
100 | last bitmap block in NVMeManager::initialize_blk_alloc_area" | |
101 | } | |
102 | ); | |
103 | } | |
104 | return mkfs_ertr::now(); | |
105 | }).handle_error( | |
106 | mkfs_ertr::pass_further{}, | |
107 | crimson::ct_error::assert_all{ | |
108 | "Invalid error rbm_sync_block_bitmap \ | |
109 | in NVMeManager::initialize_blk_alloc_area" | |
110 | } | |
111 | ); | |
112 | }).handle_error( | |
113 | mkfs_ertr::pass_further{}, | |
114 | crimson::ct_error::assert_all{ | |
115 | "Invalid error rbm_sync_block_bitmap_by_range \ | |
116 | in NVMeManager::initialize_blk_alloc_area" | |
117 | } | |
118 | ); | |
119 | ||
120 | } | |
121 | ||
122 | NVMeManager::mkfs_ertr::future<> NVMeManager::mkfs(mkfs_config_t config) | |
123 | { | |
124 | logger().debug("path {}", path); | |
125 | return _open_device(path).safe_then([this, &config]() { | |
126 | blk_paddr_t addr = convert_paddr_to_blk_paddr( | |
127 | config.start, | |
128 | config.block_size, | |
129 | config.blocks_per_segment); | |
130 | return read_rbm_header(addr).safe_then([](auto super) { | |
131 | logger().debug(" already exists "); | |
132 | return mkfs_ertr::now(); | |
133 | }).handle_error( | |
134 | crimson::ct_error::enoent::handle([this, &config] (auto) { | |
135 | super.uuid = uuid_d(); // TODO | |
136 | super.magic = 0xFF; // TODO | |
137 | super.start = convert_paddr_to_blk_paddr( | |
138 | config.start, | |
139 | config.block_size, | |
140 | config.blocks_per_segment); | |
141 | super.end = convert_paddr_to_blk_paddr( | |
142 | config.end, | |
143 | config.block_size, | |
144 | config.blocks_per_segment); | |
145 | super.block_size = config.block_size; | |
146 | super.size = config.total_size; | |
147 | super.free_block_count = config.total_size/config.block_size - 2; | |
148 | super.alloc_area_size = get_alloc_area_size(); | |
149 | super.start_alloc_area = RBM_SUPERBLOCK_SIZE; | |
150 | super.start_data_area = | |
151 | super.start_alloc_area + super.alloc_area_size; | |
152 | super.crc = 0; | |
153 | super.feature |= RBM_BITMAP_BLOCK_CRC; | |
154 | super.blocks_per_segment = config.blocks_per_segment; | |
155 | super.device_id = config.device_id; | |
156 | ||
157 | logger().debug(" super {} ", super); | |
158 | // write super block | |
159 | return write_rbm_header().safe_then([this] { | |
160 | return initialize_blk_alloc_area(); | |
161 | }).handle_error( | |
162 | mkfs_ertr::pass_further{}, | |
163 | crimson::ct_error::assert_all{ | |
164 | "Invalid error write_rbm_header in NVMeManager::mkfs" | |
165 | }); | |
166 | }), | |
167 | mkfs_ertr::pass_further{}, | |
168 | crimson::ct_error::assert_all{ | |
169 | "Invalid error read_rbm_header in NVMeManager::mkfs" | |
170 | } | |
171 | ); | |
172 | }).handle_error( | |
173 | mkfs_ertr::pass_further{}, | |
174 | crimson::ct_error::assert_all{ | |
175 | "Invalid error open_device in NVMeManager::mkfs" | |
176 | }).finally([this] { | |
177 | if (device) { | |
178 | return device->close(); | |
179 | } else { | |
180 | return seastar::now(); | |
181 | } | |
182 | }); | |
183 | } | |
184 | ||
185 | NVMeManager::find_block_ret NVMeManager::find_free_block(Transaction &t, size_t size) | |
186 | { | |
187 | auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); | |
188 | return seastar::do_with(uint64_t(0), | |
189 | uint64_t(super.start_alloc_area), | |
190 | interval_set<blk_id_t>(), | |
191 | bp, | |
192 | [&, this] (auto &allocated, auto &addr, auto &alloc_extent, auto &bp) mutable { | |
193 | return crimson::repeat( | |
194 | [&, this] () mutable { | |
195 | return device->read( | |
196 | addr, | |
197 | bp | |
198 | ).safe_then( | |
199 | [&bp, &addr, size, &allocated, &alloc_extent, this]() mutable { | |
200 | logger().debug("find_free_list: allocate {}, addr {}", allocated, addr); | |
201 | rbm_bitmap_block_t b_block(super.block_size); | |
202 | bufferlist bl_bitmap_block; | |
203 | bl_bitmap_block.append(bp); | |
204 | decode(b_block, bl_bitmap_block); | |
205 | auto max = max_block_by_bitmap_block(); | |
206 | for (uint64_t i = 0; | |
207 | i < max && (uint64_t)size/super.block_size > allocated; i++) { | |
208 | auto block_id = convert_bitmap_block_no_to_block_id(i, addr); | |
209 | if (b_block.is_allocated(i)) { | |
210 | continue; | |
211 | } | |
212 | logger().debug("find_free_list: allocated block no {} i {}", | |
213 | convert_bitmap_block_no_to_block_id(i, addr), i); | |
214 | if (allocated != 0 && alloc_extent.range_end() != block_id) { | |
215 | /* | |
216 | * if not continous block, just restart to find continuous blocks | |
217 | * at the next block. | |
218 | * in-memory allocator can handle this efficiently. | |
219 | */ | |
220 | allocated = 0; | |
221 | alloc_extent.clear(); // a range of block allocation | |
222 | logger().debug("find_free_list: rety to find continuous blocks"); | |
223 | continue; | |
224 | } | |
225 | allocated += 1; | |
226 | alloc_extent.insert(block_id); | |
227 | } | |
228 | addr += super.block_size; | |
229 | logger().debug("find_free_list: allocated: {} alloc_extent {}", | |
230 | allocated, alloc_extent); | |
231 | if (((uint64_t)size)/super.block_size == allocated) { | |
232 | return seastar::stop_iteration::yes; | |
233 | } else if (addr >= super.start_data_area) { | |
234 | alloc_extent.clear(); | |
235 | return seastar::stop_iteration::yes; | |
236 | } | |
237 | return seastar::stop_iteration::no; | |
238 | }); | |
239 | }).safe_then([&allocated, &alloc_extent, size, this] () { | |
240 | logger().debug(" allocated: {} size {} ", | |
241 | allocated * super.block_size, size); | |
242 | if (allocated * super.block_size < size) { | |
243 | alloc_extent.clear(); | |
244 | } | |
245 | return find_block_ret( | |
246 | find_block_ertr::ready_future_marker{}, | |
247 | alloc_extent); | |
248 | }).handle_error( | |
249 | find_block_ertr::pass_further{}, | |
250 | crimson::ct_error::assert_all{ | |
251 | "Invalid error in NVMeManager::find_free_block" | |
252 | } | |
253 | ); | |
254 | }); | |
255 | } | |
256 | ||
257 | /* TODO : block allocator */ | |
258 | NVMeManager::allocate_ret NVMeManager::alloc_extent( | |
259 | Transaction &t, size_t size) | |
260 | { | |
261 | ||
262 | /* | |
263 | * 1. find free blocks using block allocator | |
264 | * 2. add free blocks to transaction | |
265 | * (the free block is reserved state, not stored) | |
266 | * 3. link free blocks to onode | |
267 | * Due to in-memory block allocator is the next work to do, | |
268 | * just read the block bitmap directly to find free blocks. | |
269 | * | |
270 | */ | |
271 | return find_free_block(t, size | |
272 | ).safe_then([this, &t] (auto alloc_extent) mutable | |
273 | -> allocate_ertr::future<paddr_t> { | |
274 | logger().debug("after find_free_block: allocated {}", alloc_extent); | |
275 | if (!alloc_extent.empty()) { | |
276 | rbm_alloc_delta_t alloc_info; | |
277 | for (auto p : alloc_extent) { | |
278 | paddr_t paddr = convert_blk_paddr_to_paddr( | |
279 | p.first * super.block_size, | |
280 | super.block_size, | |
281 | super.blocks_per_segment, | |
282 | super.device_id); | |
283 | size_t len = p.second * super.block_size; | |
284 | alloc_info.alloc_blk_ranges.push_back(std::make_pair(paddr, len)); | |
285 | alloc_info.op = rbm_alloc_delta_t::op_types_t::SET; | |
286 | } | |
287 | t.add_rbm_alloc_info_blocks(alloc_info); | |
288 | } else { | |
289 | return crimson::ct_error::enospc::make(); | |
290 | } | |
291 | paddr_t paddr = convert_blk_paddr_to_paddr( | |
292 | alloc_extent.range_start() * super.block_size, | |
293 | super.block_size, | |
294 | super.blocks_per_segment, | |
295 | super.device_id); | |
296 | return allocate_ret( | |
297 | allocate_ertr::ready_future_marker{}, | |
298 | paddr); | |
299 | } | |
300 | ).handle_error( | |
301 | allocate_ertr::pass_further{}, | |
302 | crimson::ct_error::assert_all{ | |
303 | "Invalid error find_free_block in NVMeManager::alloc_extent" | |
304 | } | |
305 | ); | |
306 | } | |
307 | ||
308 | void NVMeManager::add_free_extent( | |
309 | std::vector<rbm_alloc_delta_t>& v, blk_paddr_t from, size_t len) | |
310 | { | |
311 | ceph_assert(!(len % super.block_size)); | |
312 | paddr_t paddr = convert_blk_paddr_to_paddr( | |
313 | from, | |
314 | super.block_size, | |
315 | super.blocks_per_segment, | |
316 | super.device_id); | |
317 | rbm_alloc_delta_t alloc_info; | |
318 | alloc_info.alloc_blk_ranges.push_back(std::make_pair(paddr, len)); | |
319 | alloc_info.op = rbm_alloc_delta_t::op_types_t::CLEAR; | |
320 | v.push_back(alloc_info); | |
321 | } | |
322 | ||
323 | NVMeManager::write_ertr::future<> NVMeManager::rbm_sync_block_bitmap_by_range( | |
324 | blk_id_t start, blk_id_t end, bitmap_op_types_t op) | |
325 | { | |
326 | auto addr = super.start_alloc_area + | |
327 | (start / max_block_by_bitmap_block()) | |
328 | * super.block_size; | |
329 | // aligned write | |
330 | if (start % max_block_by_bitmap_block() == 0 && | |
331 | end % (max_block_by_bitmap_block() - 1) == 0) { | |
332 | auto num_block = num_block_between_blk_ids(start, end); | |
333 | bufferlist bl_bitmap_block; | |
334 | add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block, op); | |
335 | return write( | |
336 | addr, | |
337 | bl_bitmap_block); | |
338 | } | |
339 | auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); | |
340 | // try to read first block, then check the block is aligned | |
341 | return device->read( | |
342 | addr, | |
343 | bp).safe_then([bp, start, end, op, addr, this]() { | |
344 | rbm_bitmap_block_t b_block(super.block_size); | |
345 | bufferlist bl_bitmap_block; | |
346 | bl_bitmap_block.append(bp); | |
347 | decode(b_block, bl_bitmap_block); | |
348 | auto max = max_block_by_bitmap_block(); | |
349 | auto loop_end = end < (start / max + 1) * max ? | |
350 | end % max : max - 1; | |
351 | for (uint64_t i = (start % max); i <= loop_end; i++) { | |
352 | if (op == bitmap_op_types_t::ALL_SET) { | |
353 | b_block.set_bit(i); | |
354 | } else { | |
355 | b_block.clear_bit(i); | |
356 | } | |
357 | } | |
358 | auto num_block = num_block_between_blk_ids(start, end); | |
359 | logger().debug("rbm_sync_block_bitmap_by_range: start {}, end {}, \ | |
360 | loop_end {}, num_block {}", | |
361 | start, end, loop_end, num_block); | |
362 | ||
363 | bl_bitmap_block.clear(); | |
364 | encode(b_block, bl_bitmap_block); | |
365 | if (num_block == 1) { | |
366 | // | front (unaligned) | | |
367 | return write( | |
368 | addr, | |
369 | bl_bitmap_block); | |
370 | } else if (!((end + 1) % max)) { | |
371 | // | front (unaligned) | middle (aligned) | | |
372 | add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block - 1, op); | |
373 | logger().debug("partially aligned write: addr {} length {}", | |
374 | addr, bl_bitmap_block.length()); | |
375 | return write( | |
376 | addr, | |
377 | bl_bitmap_block); | |
378 | } else if (num_block > 2) { | |
379 | // | front (unaligned) | middle | end (unaligned) | | |
380 | // fill up the middle | |
381 | add_cont_bitmap_blocks_to_buf(bl_bitmap_block, num_block - 2, op); | |
382 | } | |
383 | ||
384 | auto next_addr = super.start_alloc_area + | |
385 | (end / max_block_by_bitmap_block()) | |
386 | * super.block_size; | |
387 | auto bptr = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); | |
388 | // | front (unaligned) | middle | end (unaligned) | or | |
389 | // | front (unaligned) | end (unaligned) | | |
390 | return device->read( | |
391 | next_addr, | |
392 | bptr).safe_then( | |
393 | [bptr, bl_bitmap_block, end, op, addr, this]() mutable { | |
394 | rbm_bitmap_block_t b_block(super.block_size); | |
395 | bufferlist block; | |
396 | block.append(bptr); | |
397 | decode(b_block, block); | |
398 | auto max = max_block_by_bitmap_block(); | |
399 | for (uint64_t i = (end - (end % max)) % max; | |
400 | i <= (end % max); i++) { | |
401 | if (op == bitmap_op_types_t::ALL_SET) { | |
402 | b_block.set_bit(i); | |
403 | } else { | |
404 | b_block.clear_bit(i); | |
405 | } | |
406 | } | |
407 | logger().debug("start {} end {} ", end - (end % max), end); | |
408 | bl_bitmap_block.claim_append(block); | |
409 | return write( | |
410 | addr, | |
411 | bl_bitmap_block); | |
412 | }).handle_error( | |
413 | write_ertr::pass_further{}, | |
414 | crimson::ct_error::assert_all{ | |
415 | "Invalid error in NVMeManager::rbm_sync_block_bitmap_by_range" | |
416 | } | |
417 | ); | |
418 | }).handle_error( | |
419 | write_ertr::pass_further{}, | |
420 | crimson::ct_error::assert_all{ | |
421 | "Invalid error in NVMeManager::rbm_sync_block_bitmap_by_range" | |
422 | } | |
423 | ); | |
424 | } | |
425 | ||
426 | NVMeManager::abort_allocation_ertr::future<> NVMeManager::abort_allocation( | |
427 | Transaction &t) | |
428 | { | |
429 | /* | |
430 | * TODO: clear all allocation infos associated with transaction in in-memory allocator | |
431 | */ | |
432 | return abort_allocation_ertr::now(); | |
433 | } | |
434 | ||
435 | NVMeManager::write_ertr::future<> NVMeManager::complete_allocation( | |
436 | Transaction &t) | |
437 | { | |
438 | return write_ertr::now(); | |
439 | } | |
440 | ||
441 | NVMeManager::write_ertr::future<> NVMeManager::sync_allocation( | |
442 | std::vector<rbm_alloc_delta_t> &alloc_blocks) | |
443 | { | |
444 | if (alloc_blocks.empty()) { | |
445 | return write_ertr::now(); | |
446 | } | |
447 | return seastar::do_with(move(alloc_blocks), | |
448 | [&, this] (auto &alloc_blocks) mutable { | |
449 | return crimson::do_for_each(alloc_blocks, | |
450 | [this](auto &alloc) { | |
451 | return crimson::do_for_each(alloc.alloc_blk_ranges, | |
452 | [this, &alloc] (auto &range) -> write_ertr::future<> { | |
453 | logger().debug("range {} ~ {}", range.first, range.second); | |
454 | bitmap_op_types_t op = | |
455 | (alloc.op == rbm_alloc_delta_t::op_types_t::SET) ? | |
456 | bitmap_op_types_t::ALL_SET : | |
457 | bitmap_op_types_t::ALL_CLEAR; | |
458 | blk_paddr_t addr = convert_paddr_to_blk_paddr( | |
459 | range.first, | |
460 | super.block_size, | |
461 | super.blocks_per_segment); | |
462 | blk_id_t start = addr / super.block_size; | |
463 | blk_id_t end = start + | |
464 | (round_up_to(range.second, super.block_size)) / super.block_size | |
465 | - 1; | |
466 | return rbm_sync_block_bitmap_by_range( | |
467 | start, | |
468 | end, | |
469 | op); | |
470 | }); | |
471 | }).safe_then([this, &alloc_blocks]() mutable { | |
472 | int alloc_block_count = 0; | |
473 | for (const auto& b : alloc_blocks) { | |
474 | for (auto r : b.alloc_blk_ranges) { | |
475 | if (b.op == rbm_alloc_delta_t::op_types_t::SET) { | |
476 | alloc_block_count += | |
477 | round_up_to(r.second, super.block_size) / super.block_size; | |
478 | logger().debug(" complete alloc block: start {} len {} ", | |
479 | r.first, r.second); | |
480 | } else { | |
481 | alloc_block_count -= | |
482 | round_up_to(r.second, super.block_size) / super.block_size; | |
483 | logger().debug(" complete alloc block: start {} len {} ", | |
484 | r.first, r.second); | |
485 | } | |
486 | } | |
487 | } | |
488 | logger().debug("complete_alloction: complete to allocate {} blocks", | |
489 | alloc_block_count); | |
490 | super.free_block_count -= alloc_block_count; | |
491 | return write_ertr::now(); | |
492 | }); | |
493 | }); | |
494 | } | |
495 | ||
496 | NVMeManager::open_ertr::future<> NVMeManager::open( | |
497 | const std::string &path, paddr_t paddr) | |
498 | { | |
499 | logger().debug("open: path{}", path); | |
500 | ||
501 | blk_paddr_t addr = convert_paddr_to_blk_paddr( | |
502 | paddr, | |
503 | super.block_size, | |
504 | super.blocks_per_segment); | |
505 | return _open_device(path | |
506 | ).safe_then([this, addr]() { | |
507 | return read_rbm_header(addr).safe_then([&](auto s) | |
508 | -> open_ertr::future<> { | |
509 | if (s.magic != 0xFF) { | |
510 | return crimson::ct_error::enoent::make(); | |
511 | } | |
512 | super = s; | |
513 | return check_bitmap_blocks().safe_then([]() { | |
514 | return open_ertr::now(); | |
515 | }); | |
516 | } | |
517 | ).handle_error( | |
518 | open_ertr::pass_further{}, | |
519 | crimson::ct_error::assert_all{ | |
520 | "Invalid error read_rbm_header in NVMeManager::open" | |
521 | } | |
522 | ); | |
523 | }); | |
524 | } | |
525 | ||
526 | NVMeManager::write_ertr::future<> NVMeManager::write( | |
527 | blk_paddr_t addr, | |
528 | bufferptr &bptr) | |
529 | { | |
530 | ceph_assert(device); | |
531 | if (addr > super.end || addr < super.start || | |
532 | bptr.length() > super.end - super.start) { | |
533 | return crimson::ct_error::erange::make(); | |
534 | } | |
535 | return device->write( | |
536 | addr, | |
537 | bptr); | |
538 | } | |
539 | ||
540 | NVMeManager::read_ertr::future<> NVMeManager::read( | |
541 | blk_paddr_t addr, | |
542 | bufferptr &bptr) | |
543 | { | |
544 | ceph_assert(device); | |
545 | if (addr > super.end || addr < super.start || | |
546 | bptr.length() > super.end - super.start) { | |
547 | return crimson::ct_error::erange::make(); | |
548 | } | |
549 | return device->read( | |
550 | addr, | |
551 | bptr); | |
552 | } | |
553 | ||
554 | NVMeManager::close_ertr::future<> NVMeManager::close() | |
555 | { | |
556 | ceph_assert(device); | |
557 | return device->close(); | |
558 | } | |
559 | ||
560 | NVMeManager::open_ertr::future<> NVMeManager::_open_device( | |
561 | const std::string path) | |
562 | { | |
563 | ceph_assert(device); | |
564 | return device->open(path, seastar::open_flags::rw); | |
565 | } | |
566 | ||
567 | NVMeManager::write_ertr::future<> NVMeManager::write_rbm_header() | |
568 | { | |
569 | bufferlist meta_b_header; | |
570 | super.crc = 0; | |
571 | encode(super, meta_b_header); | |
572 | // If NVMeDevice supports data protection, CRC for checksum is not required | |
573 | // NVMeDevice is expected to generate and store checksum internally. | |
574 | // CPU overhead for CRC might be saved. | |
575 | if (device->is_data_protection_enabled()) { | |
576 | super.crc = -1; | |
577 | } | |
578 | else { | |
579 | super.crc = meta_b_header.crc32c(-1); | |
580 | } | |
581 | ||
582 | bufferlist bl; | |
583 | encode(super, bl); | |
584 | auto iter = bl.begin(); | |
585 | auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); | |
586 | assert(bl.length() < super.block_size); | |
587 | iter.copy(bl.length(), bp.c_str()); | |
588 | ||
589 | return device->write(super.start, bp); | |
590 | } | |
591 | ||
592 | NVMeManager::read_ertr::future<rbm_metadata_header_t> NVMeManager::read_rbm_header( | |
593 | blk_paddr_t addr) | |
594 | { | |
595 | ceph_assert(device); | |
596 | bufferptr bptr = | |
597 | bufferptr(ceph::buffer::create_page_aligned(RBM_SUPERBLOCK_SIZE)); | |
598 | bptr.zero(); | |
599 | return device->read( | |
600 | addr, | |
601 | bptr | |
602 | ).safe_then([length=bptr.length(), this, bptr]() | |
603 | -> read_ertr::future<rbm_metadata_header_t> { | |
604 | bufferlist bl; | |
605 | bl.append(bptr); | |
606 | auto p = bl.cbegin(); | |
607 | rbm_metadata_header_t super_block; | |
608 | try { | |
609 | decode(super_block, p); | |
610 | } | |
611 | catch (ceph::buffer::error& e) { | |
612 | logger().debug(" read_rbm_header: unable to decode rbm super block {}", | |
613 | e.what()); | |
614 | return crimson::ct_error::enoent::make(); | |
615 | } | |
616 | checksum_t crc = super_block.crc; | |
617 | bufferlist meta_b_header; | |
618 | super_block.crc = 0; | |
619 | encode(super_block, meta_b_header); | |
620 | ||
621 | // Do CRC verification only if data protection is not supported. | |
622 | if (device->is_data_protection_enabled() == false) { | |
623 | if (meta_b_header.crc32c(-1) != crc) { | |
624 | logger().debug(" bad crc on super block, expected {} != actual {} ", | |
625 | meta_b_header.crc32c(-1), crc); | |
626 | return crimson::ct_error::input_output_error::make(); | |
627 | } | |
628 | } | |
629 | logger().debug(" got {} ", super); | |
630 | return read_ertr::future<rbm_metadata_header_t>( | |
631 | read_ertr::ready_future_marker{}, | |
632 | super_block | |
633 | ); | |
634 | ||
635 | }).handle_error( | |
636 | read_ertr::pass_further{}, | |
637 | crimson::ct_error::assert_all{ | |
638 | "Invalid error in NVMeManager::read_rbm_header" | |
639 | } | |
640 | ); | |
641 | } | |
642 | ||
643 | NVMeManager::check_bitmap_blocks_ertr::future<> NVMeManager::check_bitmap_blocks() | |
644 | { | |
645 | auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size)); | |
646 | return seastar::do_with(uint64_t(super.start_alloc_area), uint64_t(0), bp, | |
647 | [&, this] (auto &addr, auto &free_blocks, auto &bp) mutable { | |
648 | return crimson::repeat([&, this] () mutable { | |
649 | return device->read(addr,bp).safe_then( | |
650 | [&bp, &addr, &free_blocks, this]() mutable { | |
651 | logger().debug("verify_bitmap_blocks: addr {}", addr); | |
652 | rbm_bitmap_block_t b_block(super.block_size); | |
653 | bufferlist bl_bitmap_block; | |
654 | bl_bitmap_block.append(bp); | |
655 | decode(b_block, bl_bitmap_block); | |
656 | auto max = max_block_by_bitmap_block(); | |
657 | for (uint64_t i = 0; i < max; i++) { | |
658 | if (!b_block.is_allocated(i)) { | |
659 | free_blocks++; | |
660 | } | |
661 | } | |
662 | addr += super.block_size; | |
663 | if (addr >= super.start_data_area) { | |
664 | return seastar::stop_iteration::yes; | |
665 | } | |
666 | return seastar::stop_iteration::no; | |
667 | }); | |
668 | }).safe_then([&free_blocks, this] () { | |
669 | logger().debug(" free_blocks: {} ", free_blocks); | |
670 | super.free_block_count = free_blocks; | |
671 | return check_bitmap_blocks_ertr::now(); | |
672 | }).handle_error( | |
673 | check_bitmap_blocks_ertr::pass_further{}, | |
674 | crimson::ct_error::assert_all{ | |
675 | "Invalid error in NVMeManager::find_free_block" | |
676 | } | |
677 | ); | |
678 | }); | |
679 | } | |
680 | ||
681 | NVMeManager::write_ertr::future<> NVMeManager::write( | |
682 | blk_paddr_t addr, | |
683 | bufferlist &bl) | |
684 | { | |
685 | ceph_assert(device); | |
686 | bufferptr bptr; | |
687 | try { | |
688 | bptr = bufferptr(ceph::buffer::create_page_aligned(bl.length())); | |
689 | auto iter = bl.cbegin(); | |
690 | iter.copy(bl.length(), bptr.c_str()); | |
691 | } catch (const std::exception &e) { | |
692 | logger().error( | |
693 | "write: " | |
694 | "exception creating aligned buffer {}", | |
695 | e | |
696 | ); | |
697 | ceph_assert(0 == "unhandled exception"); | |
698 | } | |
699 | return device->write( | |
700 | addr, | |
701 | bptr); | |
702 | } | |
703 | ||
704 | std::ostream &operator<<(std::ostream &out, const rbm_metadata_header_t &header) | |
705 | { | |
706 | out << " rbm_metadata_header_t(size=" << header.size | |
707 | << ", block_size=" << header.block_size | |
708 | << ", start=" << header.start | |
709 | << ", end=" << header.end | |
710 | << ", magic=" << header.magic | |
711 | << ", uuid=" << header.uuid | |
712 | << ", free_block_count=" << header.free_block_count | |
713 | << ", alloc_area_size=" << header.alloc_area_size | |
714 | << ", start_alloc_area=" << header.start_alloc_area | |
715 | << ", start_data_area=" << header.start_data_area | |
716 | << ", flag=" << header.flag | |
717 | << ", feature=" << header.feature | |
718 | << ", crc=" << header.crc; | |
719 | return out << ")"; | |
720 | } | |
721 | ||
722 | std::ostream &operator<<(std::ostream &out, | |
723 | const rbm_bitmap_block_header_t &header) | |
724 | { | |
725 | out << " rbm_bitmap_block_header_t(size=" << header.size | |
726 | << ", checksum=" << header.checksum; | |
727 | return out << ")"; | |
728 | } | |
729 | ||
730 | } |