1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
6 #include <linux/blkzoned.h>
8 #include "crimson/os/seastore/segment_manager/zns.h"
9 #include "crimson/common/config_proxy.h"
10 #include "crimson/common/log.h"
11 #include "include/buffer.h"
14 seastar::logger
&logger(){
15 return crimson::get_logger(ceph_subsys_seastore_device
);
19 namespace crimson::os::seastore::segment_manager::zns
{
21 using open_device_ret
= ZNSSegmentManager::access_ertr::future
<
22 std::pair
<seastar::file
, seastar::stat_data
>>;
23 static open_device_ret
open_device(
24 const std::string
&path
,
25 seastar::open_flags mode
)
27 return seastar::file_stat(
28 path
, seastar::follow_symlink::yes
29 ).then([mode
, &path
](auto stat
) mutable{
30 return seastar::open_file_dma(path
, mode
).then([=](auto file
){
32 "open_device: open successful, size {}",
34 return std::make_pair(file
, stat
);
37 [](auto e
) -> open_device_ret
{
39 "open_device: got error {}",
41 return crimson::ct_error::input_output_error::make();
46 static zns_sm_metadata_t
make_metadata(
48 const seastar::stat_data
&data
,
53 using crimson::common::get_conf
;
55 auto config_size
= get_conf
<Option::size_t>(
56 "seastore_device_size");
58 size_t size
= (data
.size
== 0) ? config_size
: data
.size
;
60 auto config_segment_size
= get_conf
<Option::size_t>(
61 "seastore_segment_size");
62 logger().error("CONFIG SIZE: {}", config_segment_size
);
63 size_t zones_per_segment
= config_segment_size
/ zone_capacity
;
65 size_t segments
= (num_zones
- 1) * zones_per_segment
;
68 "{}: size {}, block_size {}, allocated_size {}, configured_size {}, "
77 zns_sm_metadata_t ret
= zns_sm_metadata_t
{
80 zone_capacity
* zones_per_segment
,
92 struct blk_zone_report
*hdr
;
93 ZoneReport(int nr_zones
)
94 : hdr((blk_zone_report
*)malloc(
95 sizeof(struct blk_zone_report
) + nr_zones
* sizeof(struct blk_zone
))){;}
99 ZoneReport(const ZoneReport
&) = delete;
100 ZoneReport(ZoneReport
&&rhs
) : hdr(rhs
.hdr
) {
105 static seastar::future
<> reset_device(
106 seastar::file
&device
,
110 return seastar::do_with(
112 ZoneReport(nr_zones
),
113 [&, nr_zones
] (auto &range
, auto &zr
){
115 range
.nr_sectors
= zone_size
* nr_zones
;
120 return seastar::now();
126 static seastar::future
<size_t> get_zone_capacity(
127 seastar::file
&device
,
131 return seastar::do_with(
133 ZoneReport(nr_zones
),
134 [&] (auto &first_zone_range
, auto &zr
){
135 first_zone_range
.sector
= 0;
136 first_zone_range
.nr_sectors
= zone_size
;
141 return device
.ioctl(BLKREPORTZONE
, zr
.hdr
);
142 }).then([&] (int ret
){
143 return device
.ioctl(BLKRESETZONE
, &first_zone_range
);
144 }).then([&](int ret
){
145 return seastar::make_ready_future
<size_t>(zr
.hdr
->zones
[0].wp
);
151 static write_ertr::future
<> do_write(
152 seastar::file
&device
,
157 "zns: do_write offset {} len {}",
160 return device
.dma_write(
165 [](auto e
) -> write_ertr::future
<size_t> {
167 "do_write: dma_write got error {}",
169 return crimson::ct_error::input_output_error::make();
171 ).then([length
= bptr
.length()](auto result
) -> write_ertr::future
<> {
172 if (result
!= length
) {
173 return crimson::ct_error::input_output_error::make();
175 return write_ertr::now();
179 static write_ertr::future
<> do_writev(
180 seastar::file
&device
,
186 "block: do_writev offset {} len {}",
189 // writev requires each buffer to be aligned to the disks' block
190 // size, we need to rebuild here
191 bl
.rebuild_aligned(block_size
);
193 std::vector
<iovec
> iov
;
194 bl
.prepare_iov(&iov
);
195 return device
.dma_write(
199 [](auto e
) -> write_ertr::future
<size_t> {
201 "do_writev: dma_write got error {}",
203 return crimson::ct_error::input_output_error::make();
205 ).then([bl
=std::move(bl
)/* hold the buf until the end of io */](size_t written
)
206 -> write_ertr::future
<> {
207 if (written
!= bl
.length()) {
208 return crimson::ct_error::input_output_error::make();
210 return write_ertr::now();
214 static ZNSSegmentManager::access_ertr::future
<>
215 write_metadata(seastar::file
&device
, zns_sm_metadata_t sb
)
217 assert(ceph::encoded_sizeof_bounded
<zns_sm_metadata_t
>() <
219 return seastar::do_with(
220 bufferptr(ceph::buffer::create_page_aligned(sb
.block_size
)),
221 [=, &device
](auto &bp
){
222 logger().error("BLOCK SIZE: {}", sb
.block_size
);
225 auto iter
= bl
.begin();
226 assert(bl
.length() < sb
.block_size
);
227 logger().error("{}", bl
.length());
228 iter
.copy(bl
.length(), bp
.c_str());
229 logger().debug("write_metadata: doing writeout");
230 return do_write(device
, 0, bp
);
234 static read_ertr::future
<> do_read(
235 seastar::file
&device
,
240 assert(len
<= bptr
.length());
242 "block: do_read offset {} len {}",
245 return device
.dma_read(
250 [](auto e
) -> read_ertr::future
<size_t> {
252 "do_read: dma_read got error {}",
254 return crimson::ct_error::input_output_error::make();
256 ).then([len
](auto result
) -> read_ertr::future
<> {
258 return crimson::ct_error::input_output_error::make();
260 return read_ertr::now();
265 ZNSSegmentManager::access_ertr::future
<zns_sm_metadata_t
>
266 read_metadata(seastar::file
&device
, seastar::stat_data sd
)
268 assert(ceph::encoded_sizeof_bounded
<zns_sm_metadata_t
>() <
270 return seastar::do_with(
271 bufferptr(ceph::buffer::create_page_aligned(sd
.block_size
)),
272 [=, &device
](auto &bp
) {
278 ).safe_then([=, &bp
] {
281 zns_sm_metadata_t ret
;
282 auto bliter
= bl
.cbegin();
284 return ZNSSegmentManager::access_ertr::future
<zns_sm_metadata_t
>(
285 ZNSSegmentManager::access_ertr::ready_future_marker
{},
291 ZNSSegmentManager::mount_ret
ZNSSegmentManager::mount()
294 device_path
, seastar::open_flags::rw
295 ).safe_then([=](auto p
) {
296 device
= std::move(p
.first
);
298 return read_metadata(device
, sd
);
299 }).safe_then([=](auto meta
){
301 return mount_ertr::now();
305 ZNSSegmentManager::mkfs_ret
ZNSSegmentManager::mkfs(
306 segment_manager_config_t config
)
308 logger().error("ZNSSegmentManager::mkfs: starting");
309 return seastar::do_with(
311 seastar::stat_data
{},
315 [=](auto &device
, auto &stat
, auto &sb
, auto &zone_size
, auto &nr_zones
){
316 logger().error("ZNSSegmentManager::mkfs path {}", device_path
);
319 seastar::open_flags::rw
320 ).safe_then([=, &device
, &stat
, &sb
, &zone_size
, &nr_zones
](auto p
){
328 return seastar::make_exception_future
<int>(
329 std::system_error(std::make_error_code(std::errc::io_error
)));
331 return device
.ioctl(BLKGETZONESZ
, (void *)&zone_size
);
332 }).then([&] (int ret
){
333 return reset_device(device
, zone_size
, nr_zones
);
335 return get_zone_capacity(device
, zone_size
, nr_zones
);
336 }).then([&, config
] (auto zone_capacity
){
344 stats
.metadata_write
.increment(
345 ceph::encoded_sizeof_bounded
<zns_sm_metadata_t
>());
346 logger().error("WROTE TO STATS");
347 return write_metadata(device
, sb
);
349 logger().error("CLOSING DEVICE");
350 return device
.close();
352 logger().error("RETURNING FROM MKFS");
353 return mkfs_ertr::now();
359 struct blk_zone_range
make_range(
363 size_t first_segment_offset
)
365 return blk_zone_range
{
366 (id
.device_segment_id() * segment_size
+ first_segment_offset
),
371 using blk_open_zone_ertr
= crimson::errorator
<
372 crimson::ct_error::input_output_error
>;
373 using blk_open_zone_ret
= blk_open_zone_ertr::future
<>;
374 blk_open_zone_ret
blk_open_zone(seastar::file
&device
, blk_zone_range
&range
){
378 ).then_wrapped([=](auto f
) -> blk_open_zone_ret
{
380 return crimson::ct_error::input_output_error::make();
385 return seastar::now();
387 return crimson::ct_error::input_output_error::make();
393 ZNSSegmentManager::open_ertr::future
<SegmentRef
> ZNSSegmentManager::open(
396 return seastar::do_with(
403 metadata
.first_segment_offset
);
404 return blk_open_zone(
410 logger().error("open _segment: open successful");
411 return open_ertr::future
<SegmentRef
>(
412 open_ertr::ready_future_marker
{},
413 SegmentRef(new ZNSSegment(*this, id
))
418 using blk_close_zone_ertr
= crimson::errorator
<
419 crimson::ct_error::input_output_error
>;
420 using blk_close_zone_ret
= blk_close_zone_ertr::future
<>;
421 blk_close_zone_ret
blk_close_zone(
422 seastar::file
&device
,
423 blk_zone_range
&range
)
428 ).then_wrapped([=](auto f
) -> blk_open_zone_ret
{
430 return crimson::ct_error::input_output_error::make();
435 return seastar::now();
437 return crimson::ct_error::input_output_error::make();
443 ZNSSegmentManager::release_ertr::future
<> ZNSSegmentManager::release(
446 return seastar::do_with(
453 metadata
.first_segment_offset
);
454 return blk_close_zone(
460 logger().error("release _segment: release successful");
461 return release_ertr::now();
465 SegmentManager::read_ertr::future
<> ZNSSegmentManager::read(
468 ceph::bufferptr
&out
)
470 auto& seg_addr
= addr
.as_seg_paddr();
471 if (seg_addr
.get_segment_id().device_segment_id() >= get_num_segments()) {
473 "ZNSSegmentManager::read: invalid segment {}",
475 return crimson::ct_error::invarg::make();
478 if (seg_addr
.get_segment_off() + len
> metadata
.zone_size
) {
480 "ZNSSegmentManager::read: invalid offset {}~{}!",
483 return crimson::ct_error::invarg::make();
492 Segment::close_ertr::future
<> ZNSSegmentManager::segment_close(
493 segment_id_t id
, segment_off_t write_pointer
)
495 return seastar::do_with(
502 metadata
.first_segment_offset
);
503 return blk_close_zone(
509 logger().error("open _segment: open successful");
510 return Segment::close_ertr::now();
514 Segment::write_ertr::future
<> ZNSSegmentManager::segment_write(
519 assert(addr
.get_device_id() == get_device_id());
520 assert((bl
.length() % metadata
.block_size
) == 0);
521 auto& seg_addr
= addr
.as_seg_paddr();
523 "BlockSegmentManager::segment_write: "
524 "segment_write to segment {} at offset {}, physical offset {}, len {}",
525 seg_addr
.get_segment_id(),
526 seg_addr
.get_segment_off(),
529 stats
.data_write
.increment(bl
.length());
534 metadata
.block_size
);
537 device_id_t
ZNSSegmentManager::get_device_id() const
539 return metadata
.device_id
;
542 secondary_device_set_t
& ZNSSegmentManager::get_secondary_devices()
544 return metadata
.secondary_devices
;
547 device_spec_t
ZNSSegmentManager::get_device_spec() const
549 auto spec
= device_spec_t();
550 spec
.magic
= metadata
.magic
;
551 spec
.dtype
= metadata
.dtype
;
552 spec
.id
= metadata
.device_id
;
556 magic_t
ZNSSegmentManager::get_magic() const
558 return metadata
.magic
;
561 segment_off_t
ZNSSegment::get_write_capacity() const
563 return manager
.get_segment_size();
566 SegmentManager::close_ertr::future
<> ZNSSegmentManager::close()
569 return device
.close();
571 return seastar::now();
574 Segment::close_ertr::future
<> ZNSSegment::close()
576 return manager
.segment_close(id
, write_pointer
);
579 Segment::write_ertr::future
<> ZNSSegment::write(
580 segment_off_t offset
, ceph::bufferlist bl
)
582 if (offset
< write_pointer
|| offset
% manager
.metadata
.block_size
!= 0) {
584 "ZNSSegmentManager::ZNSSegment::write: "
585 "invalid segment write on segment {} to offset {}",
588 return crimson::ct_error::invarg::make();
590 if (offset
+ bl
.length() > manager
.metadata
.segment_size
)
591 return crimson::ct_error::enospc::make();
593 write_pointer
= offset
+ bl
.length();
594 return manager
.segment_write(paddr_t::make_seg_paddr(id
, offset
), bl
);