]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/random_block_manager/rbm_device.cc
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / crimson / os / seastore / random_block_manager / rbm_device.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <sys/mman.h>
5 #include <string.h>
6
7 #include <fcntl.h>
8
9 #include "crimson/common/log.h"
10 #include "crimson/common/errorator-loop.h"
11
12 #include "include/buffer.h"
13 #include "rbm_device.h"
14 #include "nvme_block_device.h"
15 #include "block_rb_manager.h"
16
17 namespace crimson::os::seastore::random_block_device {
18 #include "crimson/os/seastore/logging.h"
19 SET_SUBSYS(seastore_device);
20
21 RBMDevice::mkfs_ret RBMDevice::do_primary_mkfs(device_config_t config,
22 int shard_num, size_t journal_size) {
23 LOG_PREFIX(RBMDevice::do_primary_mkfs);
24 return stat_device(
25 ).handle_error(
26 mkfs_ertr::pass_further{},
27 crimson::ct_error::assert_all{
28 "Invalid error stat_device in RBMDevice::do_primary_mkfs"}
29 ).safe_then(
30 [this, FNAME, config=std::move(config), shard_num, journal_size](auto st) {
31 super.block_size = st.block_size;
32 super.size = st.size;
33 super.feature |= RBM_BITMAP_BLOCK_CRC;
34 super.config = std::move(config);
35 super.journal_size = journal_size;
36 ceph_assert_always(super.journal_size > 0);
37 ceph_assert_always(super.size >= super.journal_size);
38 ceph_assert_always(shard_num > 0);
39
40 std::vector<rbm_shard_info_t> shard_infos(shard_num);
41 for (int i = 0; i < shard_num; i++) {
42 uint64_t aligned_size =
43 (super.size / shard_num) -
44 ((super.size / shard_num) % super.block_size);
45 shard_infos[i].size = aligned_size;
46 shard_infos[i].start_offset = i * aligned_size;
47 assert(shard_infos[i].size > super.journal_size);
48 }
49 super.shard_infos = shard_infos;
50 super.shard_num = shard_num;
51 shard_info = shard_infos[seastar::this_shard_id()];
52 DEBUG("super {} ", super);
53
54 // write super block
55 return open(get_device_path(),
56 seastar::open_flags::rw | seastar::open_flags::dsync
57 ).handle_error(
58 mkfs_ertr::pass_further{},
59 crimson::ct_error::assert_all{
60 "Invalid error open in RBMDevice::do_primary_mkfs"}
61 ).safe_then([this] {
62 return write_rbm_header(
63 ).safe_then([this] {
64 return close();
65 }).handle_error(
66 mkfs_ertr::pass_further{},
67 crimson::ct_error::assert_all{
68 "Invalid error write_rbm_header in RBMDevice::do_primary_mkfs"
69 });
70 });
71 });
72 }
73
74 write_ertr::future<> RBMDevice::write_rbm_header()
75 {
76 bufferlist meta_b_header;
77 super.crc = 0;
78 encode(super, meta_b_header);
79 // If NVMeDevice supports data protection, CRC for checksum is not required
80 // NVMeDevice is expected to generate and store checksum internally.
81 // CPU overhead for CRC might be saved.
82 if (is_data_protection_enabled()) {
83 super.crc = -1;
84 } else {
85 super.crc = meta_b_header.crc32c(-1);
86 }
87
88 bufferlist bl;
89 encode(super, bl);
90 auto iter = bl.begin();
91 auto bp = bufferptr(ceph::buffer::create_page_aligned(super.block_size));
92 assert(bl.length() < super.block_size);
93 iter.copy(bl.length(), bp.c_str());
94 return write(RBM_START_ADDRESS, std::move(bp));
95 }
96
97 read_ertr::future<rbm_metadata_header_t> RBMDevice::read_rbm_header(
98 rbm_abs_addr addr)
99 {
100 LOG_PREFIX(RBMDevice::read_rbm_header);
101 assert(super.block_size > 0);
102 return seastar::do_with(
103 bufferptr(ceph::buffer::create_page_aligned(super.block_size)),
104 [this, addr, FNAME](auto &bptr) {
105 return read(
106 addr,
107 bptr
108 ).safe_then([length=bptr.length(), this, bptr, FNAME]()
109 -> read_ertr::future<rbm_metadata_header_t> {
110 bufferlist bl;
111 bl.append(bptr);
112 auto p = bl.cbegin();
113 rbm_metadata_header_t super_block;
114 try {
115 decode(super_block, p);
116 }
117 catch (ceph::buffer::error& e) {
118 DEBUG("read_rbm_header: unable to decode rbm super block {}",
119 e.what());
120 return crimson::ct_error::enoent::make();
121 }
122 checksum_t crc = super_block.crc;
123 bufferlist meta_b_header;
124 super_block.crc = 0;
125 encode(super_block, meta_b_header);
126 assert(ceph::encoded_sizeof<rbm_metadata_header_t>(super_block) <
127 super_block.block_size);
128
129 // Do CRC verification only if data protection is not supported.
130 if (is_data_protection_enabled() == false) {
131 if (meta_b_header.crc32c(-1) != crc) {
132 DEBUG("bad crc on super block, expected {} != actual {} ",
133 meta_b_header.crc32c(-1), crc);
134 return crimson::ct_error::input_output_error::make();
135 }
136 } else {
137 ceph_assert_always(crc == (checksum_t)-1);
138 }
139 super_block.crc = crc;
140 super = super_block;
141 DEBUG("got {} ", super);
142 return read_ertr::future<rbm_metadata_header_t>(
143 read_ertr::ready_future_marker{},
144 super_block
145 );
146 });
147 });
148 }
149
150 RBMDevice::mount_ret RBMDevice::do_shard_mount()
151 {
152 return open(get_device_path(),
153 seastar::open_flags::rw | seastar::open_flags::dsync
154 ).safe_then([this] {
155 return stat_device(
156 ).handle_error(
157 mount_ertr::pass_further{},
158 crimson::ct_error::assert_all{
159 "Invalid error stat_device in RBMDevice::do_shard_mount"}
160 ).safe_then([this](auto st) {
161 assert(st.block_size > 0);
162 super.block_size = st.block_size;
163 return read_rbm_header(RBM_START_ADDRESS
164 ).safe_then([this](auto s) {
165 LOG_PREFIX(RBMDevice::do_shard_mount);
166 shard_info = s.shard_infos[seastar::this_shard_id()];
167 INFO("{} read {}", device_id_printer_t{get_device_id()}, shard_info);
168 s.validate();
169 return seastar::now();
170 });
171 });
172 }).handle_error(
173 mount_ertr::pass_further{},
174 crimson::ct_error::assert_all{
175 "Invalid error mount in RBMDevice::do_shard_mount"}
176 );
177 }
178
179 EphemeralRBMDeviceRef create_test_ephemeral(uint64_t journal_size, uint64_t data_size) {
180 return EphemeralRBMDeviceRef(
181 new EphemeralRBMDevice(journal_size + data_size +
182 random_block_device::RBMDevice::get_shard_reserved_size(),
183 EphemeralRBMDevice::TEST_BLOCK_SIZE));
184 }
185
186 open_ertr::future<> EphemeralRBMDevice::open(
187 const std::string &in_path,
188 seastar::open_flags mode) {
189 LOG_PREFIX(EphemeralRBMDevice::open);
190 if (buf) {
191 return open_ertr::now();
192 }
193
194 DEBUG(
195 "Initializing test memory device {}",
196 size);
197
198 void* addr = ::mmap(
199 nullptr,
200 size,
201 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
202 -1,
203 0);
204
205 buf = (char*)addr;
206
207 ::memset(buf, 0, size);
208 return open_ertr::now();
209 }
210
211 write_ertr::future<> EphemeralRBMDevice::write(
212 uint64_t offset,
213 bufferptr &&bptr,
214 uint16_t stream) {
215 LOG_PREFIX(EphemeralRBMDevice::write);
216 ceph_assert(buf);
217 DEBUG(
218 "EphemeralRBMDevice: write offset {} len {}",
219 offset,
220 bptr.length());
221
222 ::memcpy(buf + offset, bptr.c_str(), bptr.length());
223
224 return write_ertr::now();
225 }
226
227 read_ertr::future<> EphemeralRBMDevice::read(
228 uint64_t offset,
229 bufferptr &bptr) {
230 LOG_PREFIX(EphemeralRBMDevice::read);
231 ceph_assert(buf);
232 DEBUG(
233 "EphemeralRBMDevice: read offset {} len {}",
234 offset,
235 bptr.length());
236
237 bptr.copy_in(0, bptr.length(), buf + offset);
238 return read_ertr::now();
239 }
240
241 Device::close_ertr::future<> EphemeralRBMDevice::close() {
242 LOG_PREFIX(EphemeralRBMDevice::close);
243 DEBUG(" close ");
244 return close_ertr::now();
245 }
246
247 write_ertr::future<> EphemeralRBMDevice::writev(
248 uint64_t offset,
249 ceph::bufferlist bl,
250 uint16_t stream) {
251 LOG_PREFIX(EphemeralRBMDevice::writev);
252 ceph_assert(buf);
253 DEBUG(
254 "EphemeralRBMDevice: write offset {} len {}",
255 offset,
256 bl.length());
257
258 bl.begin().copy(bl.length(), buf + offset);
259 return write_ertr::now();
260 }
261
262 EphemeralRBMDevice::mount_ret EphemeralRBMDevice::mount() {
263 return do_shard_mount();
264 }
265
266 EphemeralRBMDevice::mkfs_ret EphemeralRBMDevice::mkfs(device_config_t config) {
267 return do_primary_mkfs(config, 1, DEFAULT_TEST_CBJOURNAL_SIZE);
268 }
269
270 }
271