]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/segment_manager/zbd.cc
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / crimson / os / seastore / segment_manager / zbd.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <sys/mman.h>
5 #include <string.h>
6 #include <linux/blkzoned.h>
7
8 #include <fmt/format.h>
9 #include "crimson/os/seastore/segment_manager/zbd.h"
10 #include "crimson/common/config_proxy.h"
11 #include "crimson/os/seastore/logging.h"
12 #include "crimson/common/errorator-loop.h"
13 #include "include/buffer.h"
14
15 SET_SUBSYS(seastore_device);
16
17 #define SECT_SHIFT 9
18 #define RESERVED_ZONES 1
19 // limit the max padding buf size to 1MB
20 #define MAX_PADDING_SIZE 4194304
21
22 using z_op = crimson::os::seastore::segment_manager::zbd::zone_op;
23 template <> struct fmt::formatter<z_op>: fmt::formatter<std::string_view> {
24 template <typename FormatContext>
25 auto format(z_op s, FormatContext& ctx) {
26 std::string_view name = "Unknown";
27 switch (s) {
28 using enum z_op;
29 case OPEN:
30 name = "BLKOPENZONE";
31 break;
32 case FINISH:
33 name = "BLKFINISHZONE";
34 break;
35 case CLOSE:
36 name = "BLKCLOSEZONE";
37 break;
38 case RESET:
39 name = "BLKRESETZONE";
40 break;
41 }
42 return formatter<string_view>::format(name, ctx);
43 }
44 };
45
46 namespace crimson::os::seastore::segment_manager::zbd {
47
48 using open_device_ret = ZBDSegmentManager::access_ertr::future<
49 std::pair<seastar::file, seastar::stat_data>>;
50 static open_device_ret open_device(
51 const std::string &path,
52 seastar::open_flags mode)
53 {
54 LOG_PREFIX(ZBDSegmentManager::open_device);
55 return seastar::file_stat(
56 path, seastar::follow_symlink::yes
57 ).then([FNAME, mode, &path](auto stat) mutable {
58 return seastar::open_file_dma(path, mode).then([=](auto file) {
59 DEBUG("open of device {} successful, size {}",
60 path,
61 stat.size);
62 return std::make_pair(file, stat);
63 });
64 }).handle_exception(
65 [FNAME](auto e) -> open_device_ret {
66 ERROR("got error {}",
67 e);
68 return crimson::ct_error::input_output_error::make();
69 }
70 );
71 }
72
73 static zbd_sm_metadata_t make_metadata(
74 uint64_t total_size,
75 seastore_meta_t meta,
76 const seastar::stat_data &data,
77 size_t zone_size_sectors,
78 size_t zone_capacity_sectors,
79 size_t nr_cnv_zones,
80 size_t num_zones)
81 {
82 LOG_PREFIX(ZBDSegmentManager::make_metadata);
83
84 // Using only SWR zones in a SMR drive, for now
85 auto skipped_zones = RESERVED_ZONES + nr_cnv_zones;
86 assert(num_zones > skipped_zones);
87
88 // TODO: support Option::size_t seastore_segment_size
89 // to allow zones_per_segment > 1 with striping.
90 size_t zone_size = zone_size_sectors << SECT_SHIFT;
91 assert(total_size == num_zones * zone_size);
92 size_t zone_capacity = zone_capacity_sectors << SECT_SHIFT;
93 size_t segment_size = zone_size;
94 size_t zones_per_segment = segment_size / zone_size;
95 size_t segments = (num_zones - skipped_zones) / zones_per_segment;
96 size_t per_shard_segments = segments / seastar::smp::count;
97 size_t available_size = zone_capacity * segments;
98 size_t per_shard_available_size = zone_capacity * per_shard_segments;
99
100
101 WARN("Ignoring configuration values for device and segment size");
102 INFO(
103 "device size: {}, available size: {}, block size: {}, allocated size: {},"
104 " total zones {}, zone size: {}, zone capacity: {},"
105 " total segments: {}, zones per segment: {}, segment size: {}"
106 " conv zones: {}, swr zones: {}, per shard segments: {}"
107 " per shard available size: {}",
108 total_size,
109 available_size,
110 data.block_size,
111 data.allocated_size,
112 num_zones,
113 zone_size,
114 zone_capacity,
115 segments,
116 zones_per_segment,
117 zone_capacity * zones_per_segment,
118 nr_cnv_zones,
119 num_zones - nr_cnv_zones,
120 per_shard_segments,
121 per_shard_available_size);
122
123 std::vector<zbd_shard_info_t> shard_infos(seastar::smp::count);
124 for (unsigned int i = 0; i < seastar::smp::count; i++) {
125 shard_infos[i].size = per_shard_available_size;
126 shard_infos[i].segments = per_shard_segments;
127 shard_infos[i].first_segment_offset = zone_size * skipped_zones
128 + i * segment_size * per_shard_segments;
129 INFO("First segment offset for shard {} is: {}",
130 i, shard_infos[i].first_segment_offset);
131 }
132
133 zbd_sm_metadata_t ret = zbd_sm_metadata_t{
134 seastar::smp::count,
135 segment_size,
136 zone_capacity * zones_per_segment,
137 zones_per_segment,
138 zone_capacity,
139 data.block_size,
140 zone_size,
141 shard_infos,
142 meta};
143 ret.validate();
144 return ret;
145 }
146
147 struct ZoneReport {
148 struct blk_zone_report *hdr;
149 ZoneReport(int nr_zones)
150 : hdr((blk_zone_report *)malloc(
151 sizeof(struct blk_zone_report) + nr_zones * sizeof(struct blk_zone))){;}
152 ~ZoneReport(){
153 free(hdr);
154 }
155 ZoneReport(const ZoneReport &) = delete;
156 ZoneReport(ZoneReport &&rhs) : hdr(rhs.hdr) {
157 rhs.hdr = nullptr;
158 }
159 };
160
161 static seastar::future<size_t> get_blk_dev_size(
162 seastar::file &device)
163 {
164 return seastar::do_with(
165 (uint64_t)0,
166 [&](auto& size_sects) {
167 return device.ioctl(
168 BLKGETSIZE,
169 (void *)&size_sects
170 ).then([&](int ret) {
171 ceph_assert(size_sects);
172 size_t size = size_sects << SECT_SHIFT;
173 return seastar::make_ready_future<size_t>(size);
174 });
175 });
176 }
177
178 // zone_size should be in 512B sectors
179 static seastar::future<> reset_device(
180 seastar::file &device,
181 uint64_t zone_size_sects,
182 uint64_t nr_zones)
183 {
184 return seastar::do_with(
185 blk_zone_range{},
186 [&, nr_zones, zone_size_sects](auto &range) {
187 range.sector = 0;
188 range.nr_sectors = zone_size_sects * nr_zones;
189 return device.ioctl(
190 BLKRESETZONE,
191 &range
192 ).then([&](int ret){
193 return seastar::now();
194 });
195 }
196 );
197 }
198
199 static seastar::future<size_t> get_zone_capacity(
200 seastar::file &device,
201 uint32_t nr_zones)
202 {
203 return seastar::do_with(
204 ZoneReport(nr_zones),
205 [&](auto &zr) {
206 zr.hdr->sector = 0;
207 zr.hdr->nr_zones = nr_zones;
208 return device.ioctl(
209 BLKREPORTZONE,
210 zr.hdr
211 ).then([&](int ret) {
212 return seastar::make_ready_future<size_t>(zr.hdr->zones[0].capacity);
213 });
214 }
215 );
216 }
217
218 // get the number of conventional zones of SMR HDD,
219 // they are randomly writable and don't respond to zone operations
220 static seastar::future<size_t> get_nr_cnv_zones(
221 seastar::file &device,
222 uint32_t nr_zones)
223 {
224 return seastar::do_with(
225 ZoneReport(nr_zones),
226 [&](auto &zr) {
227 zr.hdr->sector = 0;
228 zr.hdr->nr_zones = nr_zones;
229 return device.ioctl(
230 BLKREPORTZONE,
231 zr.hdr
232 ).then([&, nr_zones](int ret) {
233 size_t cnv_zones = 0;
234 for (uint32_t i = 0; i < nr_zones; i++) {
235 if (zr.hdr->zones[i].type == BLK_ZONE_TYPE_CONVENTIONAL)
236 cnv_zones++;
237 }
238 return seastar::make_ready_future<size_t>(cnv_zones);
239 });
240 }
241 );
242 }
243
244
245 static write_ertr::future<> do_write(
246 seastar::file &device,
247 uint64_t offset,
248 bufferptr &bptr)
249 {
250 LOG_PREFIX(ZBDSegmentManager::do_write);
251 DEBUG("offset {} len {}",
252 offset,
253 bptr.length());
254 return device.dma_write(
255 offset,
256 bptr.c_str(),
257 bptr.length()
258 ).handle_exception(
259 [FNAME](auto e) -> write_ertr::future<size_t> {
260 ERROR("dma_write got error {}",
261 e);
262 return crimson::ct_error::input_output_error::make();
263 }
264 ).then([length = bptr.length()](auto result) -> write_ertr::future<> {
265 if (result != length) {
266 return crimson::ct_error::input_output_error::make();
267 }
268 return write_ertr::now();
269 });
270 }
271
272 static write_ertr::future<> do_writev(
273 device_id_t device_id,
274 seastar::file &device,
275 uint64_t offset,
276 bufferlist&& bl,
277 size_t block_size)
278 {
279 LOG_PREFIX(ZBDSegmentManager::do_writev);
280 DEBUG("{} offset {} len {}",
281 device_id_printer_t{device_id}, offset, bl.length());
282 // writev requires each buffer to be aligned to the disks' block
283 // size, we need to rebuild here
284 bl.rebuild_aligned(block_size);
285
286 return seastar::do_with(
287 bl.prepare_iovs(),
288 std::move(bl),
289 [&device, device_id, offset, FNAME](auto& iovs, auto& bl)
290 {
291 return write_ertr::parallel_for_each(
292 iovs,
293 [&device, device_id, offset, FNAME](auto& p)
294 {
295 auto off = offset + p.offset;
296 auto len = p.length;
297 auto& iov = p.iov;
298 DEBUG("{} poffset={}~{} dma_write ...",
299 device_id_printer_t{device_id},
300 off, len);
301 return device.dma_write(off, std::move(iov)
302 ).handle_exception(
303 [FNAME, device_id, off, len](auto e) -> write_ertr::future<size_t>
304 {
305 ERROR("{} poffset={}~{} dma_write got error -- {}",
306 device_id_printer_t{device_id}, off, len, e);
307 return crimson::ct_error::input_output_error::make();
308 }).then([FNAME, device_id, off, len](size_t written) -> write_ertr::future<> {
309 if (written != len) {
310 ERROR("{} poffset={}~{} dma_write len={} inconsistent",
311 device_id_printer_t{device_id}, off, len, written);
312 return crimson::ct_error::input_output_error::make();
313 }
314 DEBUG("{} poffset={}~{} dma_write done",
315 device_id_printer_t{device_id},
316 off, len);
317 return write_ertr::now();
318 });
319 });
320 });
321 }
322
323 static ZBDSegmentManager::access_ertr::future<>
324 write_metadata(seastar::file &device, zbd_sm_metadata_t sb)
325 {
326 assert(ceph::encoded_sizeof_bounded<zbd_sm_metadata_t>() <
327 sb.block_size);
328 return seastar::do_with(
329 bufferptr(ceph::buffer::create_page_aligned(sb.block_size)),
330 [=, &device](auto &bp) {
331 LOG_PREFIX(ZBDSegmentManager::write_metadata);
332 DEBUG("block_size {}", sb.block_size);
333 bufferlist bl;
334 encode(sb, bl);
335 auto iter = bl.begin();
336 assert(bl.length() < sb.block_size);
337 DEBUG("buffer length {}", bl.length());
338 iter.copy(bl.length(), bp.c_str());
339 DEBUG("doing writeout");
340 return do_write(device, 0, bp);
341 });
342 }
343
344 static read_ertr::future<> do_read(
345 seastar::file &device,
346 uint64_t offset,
347 size_t len,
348 bufferptr &bptr)
349 {
350 LOG_PREFIX(ZBDSegmentManager::do_read);
351 assert(len <= bptr.length());
352 DEBUG("offset {} len {}",
353 offset,
354 len);
355 return device.dma_read(
356 offset,
357 bptr.c_str(),
358 len
359 ).handle_exception(
360 [FNAME](auto e) -> read_ertr::future<size_t> {
361 ERROR("dma_read got error {}",
362 e);
363 return crimson::ct_error::input_output_error::make();
364 }
365 ).then([len](auto result) -> read_ertr::future<> {
366 if (result != len) {
367 return crimson::ct_error::input_output_error::make();
368 }
369 return read_ertr::now();
370 });
371 }
372
373 static
374 ZBDSegmentManager::access_ertr::future<zbd_sm_metadata_t>
375 read_metadata(seastar::file &device, seastar::stat_data sd)
376 {
377 assert(ceph::encoded_sizeof_bounded<zbd_sm_metadata_t>() <
378 sd.block_size);
379 return seastar::do_with(
380 bufferptr(ceph::buffer::create_page_aligned(sd.block_size)),
381 [=, &device](auto &bp) {
382 return do_read(
383 device,
384 0,
385 bp.length(),
386 bp
387 ).safe_then([=, &bp] {
388 bufferlist bl;
389 bl.push_back(bp);
390 zbd_sm_metadata_t ret;
391 auto bliter = bl.cbegin();
392 decode(ret, bliter);
393 ret.validate();
394 return ZBDSegmentManager::access_ertr::future<zbd_sm_metadata_t>(
395 ZBDSegmentManager::access_ertr::ready_future_marker{},
396 ret);
397 });
398 });
399 }
400
401 ZBDSegmentManager::mount_ret ZBDSegmentManager::mount()
402 {
403 return shard_devices.invoke_on_all([](auto &local_device) {
404 return local_device.shard_mount(
405 ).handle_error(
406 crimson::ct_error::assert_all{
407 "Invalid error in ZBDSegmentManager::mount"
408 });
409 });
410 }
411
412 ZBDSegmentManager::mount_ret ZBDSegmentManager::shard_mount()
413 {
414 return open_device(
415 device_path, seastar::open_flags::rw
416 ).safe_then([=, this](auto p) {
417 device = std::move(p.first);
418 auto sd = p.second;
419 return read_metadata(device, sd);
420 }).safe_then([=, this](auto meta){
421 shard_info = meta.shard_infos[seastar::this_shard_id()];
422 metadata = meta;
423 return mount_ertr::now();
424 });
425 }
426
427 ZBDSegmentManager::mkfs_ret ZBDSegmentManager::mkfs(
428 device_config_t config)
429 {
430 return shard_devices.local().primary_mkfs(config
431 ).safe_then([this] {
432 return shard_devices.invoke_on_all([](auto &local_device) {
433 return local_device.shard_mkfs(
434 ).handle_error(
435 crimson::ct_error::assert_all{
436 "Invalid error in ZBDSegmentManager::mkfs"
437 });
438 });
439 });
440 }
441
442 ZBDSegmentManager::mkfs_ret ZBDSegmentManager::primary_mkfs(
443 device_config_t config)
444 {
445 LOG_PREFIX(ZBDSegmentManager::primary_mkfs);
446 INFO("starting, device_path {}", device_path);
447 return seastar::do_with(
448 seastar::file{},
449 seastar::stat_data{},
450 zbd_sm_metadata_t{},
451 size_t(),
452 size_t(),
453 size_t(),
454 size_t(),
455 [=, this]
456 (auto &device,
457 auto &stat,
458 auto &sb,
459 auto &zone_size_sects,
460 auto &nr_zones,
461 auto &size,
462 auto &nr_cnv_zones) {
463 return open_device(
464 device_path,
465 seastar::open_flags::rw
466 ).safe_then([=, this, &device, &stat, &sb, &zone_size_sects, &nr_zones, &size, &nr_cnv_zones](auto p) {
467 device = p.first;
468 stat = p.second;
469 return device.ioctl(
470 BLKGETNRZONES,
471 (void *)&nr_zones
472 ).then([&](int ret) {
473 if (nr_zones == 0) {
474 return seastar::make_exception_future<int>(
475 std::system_error(std::make_error_code(std::errc::io_error)));
476 }
477 return device.ioctl(BLKGETZONESZ, (void *)&zone_size_sects);
478 }).then([&](int ret) {
479 ceph_assert(zone_size_sects);
480 return reset_device(device, zone_size_sects, nr_zones);
481 }).then([&] {
482 return get_blk_dev_size(device);
483 }).then([&](auto devsize) {
484 size = devsize;
485 return get_nr_cnv_zones(device, nr_zones);
486 }).then([&](auto cnv_zones) {
487 DEBUG("Found {} conventional zones", cnv_zones);
488 nr_cnv_zones = cnv_zones;
489 return get_zone_capacity(device, nr_zones);
490 }).then([&, FNAME, config](auto zone_capacity_sects) {
491 ceph_assert(zone_capacity_sects);
492 DEBUG("zone_size in sectors {}, zone_capacity in sectors {}",
493 zone_size_sects, zone_capacity_sects);
494 sb = make_metadata(
495 size,
496 config.meta,
497 stat,
498 zone_size_sects,
499 zone_capacity_sects,
500 nr_cnv_zones,
501 nr_zones);
502 metadata = sb;
503 stats.metadata_write.increment(
504 ceph::encoded_sizeof_bounded<zbd_sm_metadata_t>());
505 DEBUG("Wrote to stats.");
506 return write_metadata(device, sb);
507 }).finally([&, FNAME] {
508 DEBUG("Closing device.");
509 return device.close();
510 }).safe_then([FNAME] {
511 DEBUG("Returning from mkfs.");
512 return mkfs_ertr::now();
513 });
514 });
515 });
516 }
517
518 ZBDSegmentManager::mkfs_ret ZBDSegmentManager::shard_mkfs()
519 {
520 LOG_PREFIX(ZBDSegmentManager::shard_mkfs);
521 INFO("starting, device_path {}", device_path);
522 return open_device(
523 device_path, seastar::open_flags::rw
524 ).safe_then([=, this](auto p) {
525 device = std::move(p.first);
526 auto sd = p.second;
527 return read_metadata(device, sd);
528 }).safe_then([=, this](auto meta){
529 shard_info = meta.shard_infos[seastar::this_shard_id()];
530 metadata = meta;
531 return device.close();
532 }).safe_then([FNAME] {
533 DEBUG("Returning from shard_mkfs.");
534 return mkfs_ertr::now();
535 });
536 }
537
538 // Return range of sectors to operate on.
539 struct blk_zone_range make_range(
540 segment_id_t id,
541 size_t segment_size,
542 size_t first_segment_offset)
543 {
544 return blk_zone_range{
545 (id.device_segment_id() * (segment_size >> SECT_SHIFT)
546 + (first_segment_offset >> SECT_SHIFT)),
547 (segment_size >> SECT_SHIFT)
548 };
549 }
550
551 using blk_zone_op_ertr = crimson::errorator<
552 crimson::ct_error::input_output_error>;
553 using blk_zone_op_ret = blk_zone_op_ertr::future<>;
554 blk_zone_op_ret blk_zone_op(seastar::file &device,
555 blk_zone_range &range,
556 zone_op op) {
557 LOG_PREFIX(ZBDSegmentManager::blk_zone_op);
558
559 unsigned long ioctl_op = 0;
560 switch (op) {
561 using enum zone_op;
562 case OPEN:
563 ioctl_op = BLKOPENZONE;
564 break;
565 case FINISH:
566 ioctl_op = BLKFINISHZONE;
567 break;
568 case RESET:
569 ioctl_op = BLKRESETZONE;
570 break;
571 case CLOSE:
572 ioctl_op = BLKCLOSEZONE;
573 break;
574 default:
575 ERROR("Invalid zone operation {}", op);
576 ceph_assert(ioctl_op);
577 }
578
579 return device.ioctl(
580 ioctl_op,
581 &range
582 ).then_wrapped([=](auto f) -> blk_zone_op_ret {
583 if (f.failed()) {
584 ERROR("{} ioctl failed", op);
585 return crimson::ct_error::input_output_error::make();
586 } else {
587 int ret = f.get();
588 if (ret == 0) {
589 return seastar::now();
590 } else {
591 ERROR("{} ioctl failed with return code {}", op, ret);
592 return crimson::ct_error::input_output_error::make();
593 }
594 }
595 });
596 }
597
598 ZBDSegmentManager::open_ertr::future<SegmentRef> ZBDSegmentManager::open(
599 segment_id_t id)
600 {
601 LOG_PREFIX(ZBDSegmentManager::open);
602 return seastar::do_with(
603 blk_zone_range{},
604 [=, this](auto &range) {
605 range = make_range(
606 id,
607 metadata.segment_size,
608 shard_info.first_segment_offset);
609 return blk_zone_op(
610 device,
611 range,
612 zone_op::OPEN
613 );
614 }
615 ).safe_then([=, this] {
616 DEBUG("segment {}, open successful", id);
617 return open_ertr::future<SegmentRef>(
618 open_ertr::ready_future_marker{},
619 SegmentRef(new ZBDSegment(*this, id))
620 );
621 });
622 }
623
624 ZBDSegmentManager::release_ertr::future<> ZBDSegmentManager::release(
625 segment_id_t id)
626 {
627 LOG_PREFIX(ZBDSegmentManager::release);
628 DEBUG("Resetting zone/segment {}", id);
629 return seastar::do_with(
630 blk_zone_range{},
631 [=, this](auto &range) {
632 range = make_range(
633 id,
634 metadata.segment_size,
635 shard_info.first_segment_offset);
636 return blk_zone_op(
637 device,
638 range,
639 zone_op::RESET
640 );
641 }
642 ).safe_then([=] {
643 DEBUG("segment release successful");
644 return release_ertr::now();
645 });
646 }
647
648 SegmentManager::read_ertr::future<> ZBDSegmentManager::read(
649 paddr_t addr,
650 size_t len,
651 ceph::bufferptr &out)
652 {
653 LOG_PREFIX(ZBDSegmentManager::read);
654 auto& seg_addr = addr.as_seg_paddr();
655 if (seg_addr.get_segment_id().device_segment_id() >= get_num_segments()) {
656 ERROR("invalid segment {}",
657 seg_addr.get_segment_id().device_segment_id());
658 return crimson::ct_error::invarg::make();
659 }
660
661 if (seg_addr.get_segment_off() + len > metadata.segment_capacity) {
662 ERROR("invalid read offset {}, len {}",
663 addr,
664 len);
665 return crimson::ct_error::invarg::make();
666 }
667 return do_read(
668 device,
669 get_offset(addr),
670 len,
671 out);
672 }
673
674 Segment::close_ertr::future<> ZBDSegmentManager::segment_close(
675 segment_id_t id, segment_off_t write_pointer)
676 {
677 LOG_PREFIX(ZBDSegmentManager::segment_close);
678 return seastar::do_with(
679 blk_zone_range{},
680 [=, this](auto &range) {
681 range = make_range(
682 id,
683 metadata.segment_size,
684 shard_info.first_segment_offset);
685 return blk_zone_op(
686 device,
687 range,
688 zone_op::FINISH
689 );
690 }
691 ).safe_then([=] {
692 DEBUG("zone finish successful");
693 return Segment::close_ertr::now();
694 });
695 }
696
697 Segment::write_ertr::future<> ZBDSegmentManager::segment_write(
698 paddr_t addr,
699 ceph::bufferlist bl,
700 bool ignore_check)
701 {
702 LOG_PREFIX(ZBDSegmentManager::segment_write);
703 assert(addr.get_device_id() == get_device_id());
704 assert((bl.length() % metadata.block_size) == 0);
705 auto& seg_addr = addr.as_seg_paddr();
706 DEBUG("write to segment {} at offset {}, physical offset {}, len {}",
707 seg_addr.get_segment_id(),
708 seg_addr.get_segment_off(),
709 get_offset(addr),
710 bl.length());
711 stats.data_write.increment(bl.length());
712 return do_writev(
713 get_device_id(),
714 device,
715 get_offset(addr),
716 std::move(bl),
717 metadata.block_size);
718 }
719
720 device_id_t ZBDSegmentManager::get_device_id() const
721 {
722 return metadata.device_id;
723 };
724
725 secondary_device_set_t& ZBDSegmentManager::get_secondary_devices()
726 {
727 return metadata.secondary_devices;
728 };
729
730 magic_t ZBDSegmentManager::get_magic() const
731 {
732 return metadata.magic;
733 };
734
735 segment_off_t ZBDSegment::get_write_capacity() const
736 {
737 return manager.get_segment_size();
738 }
739
740 SegmentManager::close_ertr::future<> ZBDSegmentManager::close()
741 {
742 if (device) {
743 return device.close();
744 }
745 return seastar::now();
746 }
747
748 Segment::close_ertr::future<> ZBDSegment::close()
749 {
750 return manager.segment_close(id, write_pointer);
751 }
752
753 Segment::write_ertr::future<> ZBDSegment::write(
754 segment_off_t offset, ceph::bufferlist bl)
755 {
756 LOG_PREFIX(ZBDSegment::write);
757 if (offset != write_pointer || offset % manager.metadata.block_size != 0) {
758 ERROR("Segment offset and zone write pointer mismatch. "
759 "segment {} segment-offset {} write pointer {}",
760 id, offset, write_pointer);
761 return crimson::ct_error::invarg::make();
762 }
763 if (offset + bl.length() > manager.metadata.segment_capacity) {
764 return crimson::ct_error::enospc::make();
765 }
766
767 write_pointer = offset + bl.length();
768 return manager.segment_write(paddr_t::make_seg_paddr(id, offset), bl);
769 }
770
771 Segment::write_ertr::future<> ZBDSegment::write_padding_bytes(
772 size_t padding_bytes)
773 {
774 LOG_PREFIX(ZBDSegment::write_padding_bytes);
775 DEBUG("Writing {} padding bytes to segment {} at wp {}",
776 padding_bytes, id, write_pointer);
777
778 return crimson::repeat([FNAME, padding_bytes, this] () mutable {
779 size_t bufsize = 0;
780 if (padding_bytes >= MAX_PADDING_SIZE) {
781 bufsize = MAX_PADDING_SIZE;
782 } else {
783 bufsize = padding_bytes;
784 }
785
786 padding_bytes -= bufsize;
787 bufferptr bp(ceph::buffer::create_page_aligned(bufsize));
788 bp.zero();
789 bufferlist padd_bl;
790 padd_bl.append(bp);
791 return write(write_pointer, padd_bl).safe_then([FNAME, padding_bytes, this]() {
792 if (padding_bytes == 0) {
793 return write_ertr::make_ready_future<seastar::stop_iteration>(seastar::stop_iteration::yes);
794 } else {
795 return write_ertr::make_ready_future<seastar::stop_iteration>(seastar::stop_iteration::no);
796 }
797 });
798 });
799 }
800
801 // Advance write pointer, to given offset.
802 Segment::write_ertr::future<> ZBDSegment::advance_wp(
803 segment_off_t offset)
804 {
805 LOG_PREFIX(ZBDSegment::advance_wp);
806
807 DEBUG("Advancing write pointer from {} to {}", write_pointer, offset);
808 if (offset < write_pointer) {
809 return crimson::ct_error::invarg::make();
810 }
811
812 size_t padding_bytes = offset - write_pointer;
813
814 if (padding_bytes == 0) {
815 return write_ertr::now();
816 }
817
818 assert(padding_bytes % manager.metadata.block_size == 0);
819
820 return write_padding_bytes(padding_bytes);
821 }
822
823 }