1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
9 #include "crimson/common/log.h"
10 #include "crimson/common/errorator-loop.h"
12 #include "include/buffer.h"
13 #include "rbm_device.h"
14 #include "nvme_block_device.h"
15 #include "block_rb_manager.h"
17 namespace crimson::os::seastore::random_block_device
{
18 #include "crimson/os/seastore/logging.h"
19 SET_SUBSYS(seastore_device
);
21 RBMDevice::mkfs_ret
RBMDevice::do_primary_mkfs(device_config_t config
,
22 int shard_num
, size_t journal_size
) {
23 LOG_PREFIX(RBMDevice::do_primary_mkfs
);
26 mkfs_ertr::pass_further
{},
27 crimson::ct_error::assert_all
{
28 "Invalid error stat_device in RBMDevice::do_primary_mkfs"}
30 [this, FNAME
, config
=std::move(config
), shard_num
, journal_size
](auto st
) {
31 super
.block_size
= st
.block_size
;
33 super
.feature
|= RBM_BITMAP_BLOCK_CRC
;
34 super
.config
= std::move(config
);
35 super
.journal_size
= journal_size
;
36 ceph_assert_always(super
.journal_size
> 0);
37 ceph_assert_always(super
.size
>= super
.journal_size
);
38 ceph_assert_always(shard_num
> 0);
40 std::vector
<rbm_shard_info_t
> shard_infos(shard_num
);
41 for (int i
= 0; i
< shard_num
; i
++) {
42 uint64_t aligned_size
=
43 (super
.size
/ shard_num
) -
44 ((super
.size
/ shard_num
) % super
.block_size
);
45 shard_infos
[i
].size
= aligned_size
;
46 shard_infos
[i
].start_offset
= i
* aligned_size
;
47 assert(shard_infos
[i
].size
> super
.journal_size
);
49 super
.shard_infos
= shard_infos
;
50 super
.shard_num
= shard_num
;
51 shard_info
= shard_infos
[seastar::this_shard_id()];
52 DEBUG("super {} ", super
);
55 return open(get_device_path(),
56 seastar::open_flags::rw
| seastar::open_flags::dsync
58 mkfs_ertr::pass_further
{},
59 crimson::ct_error::assert_all
{
60 "Invalid error open in RBMDevice::do_primary_mkfs"}
62 return write_rbm_header(
66 mkfs_ertr::pass_further
{},
67 crimson::ct_error::assert_all
{
68 "Invalid error write_rbm_header in RBMDevice::do_primary_mkfs"
74 write_ertr::future
<> RBMDevice::write_rbm_header()
76 bufferlist meta_b_header
;
78 encode(super
, meta_b_header
);
79 // If NVMeDevice supports data protection, CRC for checksum is not required
80 // NVMeDevice is expected to generate and store checksum internally.
81 // CPU overhead for CRC might be saved.
82 if (is_data_protection_enabled()) {
85 super
.crc
= meta_b_header
.crc32c(-1);
90 auto iter
= bl
.begin();
91 auto bp
= bufferptr(ceph::buffer::create_page_aligned(super
.block_size
));
92 assert(bl
.length() < super
.block_size
);
93 iter
.copy(bl
.length(), bp
.c_str());
94 return write(RBM_START_ADDRESS
, std::move(bp
));
97 read_ertr::future
<rbm_metadata_header_t
> RBMDevice::read_rbm_header(
100 LOG_PREFIX(RBMDevice::read_rbm_header
);
101 assert(super
.block_size
> 0);
102 return seastar::do_with(
103 bufferptr(ceph::buffer::create_page_aligned(super
.block_size
)),
104 [this, addr
, FNAME
](auto &bptr
) {
108 ).safe_then([length
=bptr
.length(), this, bptr
, FNAME
]()
109 -> read_ertr::future
<rbm_metadata_header_t
> {
112 auto p
= bl
.cbegin();
113 rbm_metadata_header_t super_block
;
115 decode(super_block
, p
);
117 catch (ceph::buffer::error
& e
) {
118 DEBUG("read_rbm_header: unable to decode rbm super block {}",
120 return crimson::ct_error::enoent::make();
122 checksum_t crc
= super_block
.crc
;
123 bufferlist meta_b_header
;
125 encode(super_block
, meta_b_header
);
126 assert(ceph::encoded_sizeof
<rbm_metadata_header_t
>(super_block
) <
127 super_block
.block_size
);
129 // Do CRC verification only if data protection is not supported.
130 if (is_data_protection_enabled() == false) {
131 if (meta_b_header
.crc32c(-1) != crc
) {
132 DEBUG("bad crc on super block, expected {} != actual {} ",
133 meta_b_header
.crc32c(-1), crc
);
134 return crimson::ct_error::input_output_error::make();
137 ceph_assert_always(crc
== (checksum_t
)-1);
139 super_block
.crc
= crc
;
141 DEBUG("got {} ", super
);
142 return read_ertr::future
<rbm_metadata_header_t
>(
143 read_ertr::ready_future_marker
{},
150 RBMDevice::mount_ret
RBMDevice::do_shard_mount()
152 return open(get_device_path(),
153 seastar::open_flags::rw
| seastar::open_flags::dsync
157 mount_ertr::pass_further
{},
158 crimson::ct_error::assert_all
{
159 "Invalid error stat_device in RBMDevice::do_shard_mount"}
160 ).safe_then([this](auto st
) {
161 assert(st
.block_size
> 0);
162 super
.block_size
= st
.block_size
;
163 return read_rbm_header(RBM_START_ADDRESS
164 ).safe_then([this](auto s
) {
165 LOG_PREFIX(RBMDevice::do_shard_mount
);
166 shard_info
= s
.shard_infos
[seastar::this_shard_id()];
167 INFO("{} read {}", device_id_printer_t
{get_device_id()}, shard_info
);
169 return seastar::now();
173 mount_ertr::pass_further
{},
174 crimson::ct_error::assert_all
{
175 "Invalid error mount in RBMDevice::do_shard_mount"}
179 EphemeralRBMDeviceRef
create_test_ephemeral(uint64_t journal_size
, uint64_t data_size
) {
180 return EphemeralRBMDeviceRef(
181 new EphemeralRBMDevice(journal_size
+ data_size
+
182 random_block_device::RBMDevice::get_shard_reserved_size(),
183 EphemeralRBMDevice::TEST_BLOCK_SIZE
));
186 open_ertr::future
<> EphemeralRBMDevice::open(
187 const std::string
&in_path
,
188 seastar::open_flags mode
) {
189 LOG_PREFIX(EphemeralRBMDevice::open
);
191 return open_ertr::now();
195 "Initializing test memory device {}",
201 PROT_READ
| PROT_WRITE
, MAP_SHARED
| MAP_ANONYMOUS
,
207 ::memset(buf
, 0, size
);
208 return open_ertr::now();
211 write_ertr::future
<> EphemeralRBMDevice::write(
215 LOG_PREFIX(EphemeralRBMDevice::write
);
218 "EphemeralRBMDevice: write offset {} len {}",
222 ::memcpy(buf
+ offset
, bptr
.c_str(), bptr
.length());
224 return write_ertr::now();
227 read_ertr::future
<> EphemeralRBMDevice::read(
230 LOG_PREFIX(EphemeralRBMDevice::read
);
233 "EphemeralRBMDevice: read offset {} len {}",
237 bptr
.copy_in(0, bptr
.length(), buf
+ offset
);
238 return read_ertr::now();
241 Device::close_ertr::future
<> EphemeralRBMDevice::close() {
242 LOG_PREFIX(EphemeralRBMDevice::close
);
244 return close_ertr::now();
247 write_ertr::future
<> EphemeralRBMDevice::writev(
251 LOG_PREFIX(EphemeralRBMDevice::writev
);
254 "EphemeralRBMDevice: write offset {} len {}",
258 bl
.begin().copy(bl
.length(), buf
+ offset
);
259 return write_ertr::now();
262 EphemeralRBMDevice::mount_ret
EphemeralRBMDevice::mount() {
263 return do_shard_mount();
266 EphemeralRBMDevice::mkfs_ret
EphemeralRBMDevice::mkfs(device_config_t config
) {
267 return do_primary_mkfs(config
, 1, DEFAULT_TEST_CBJOURNAL_SIZE
);