]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/random_block_manager/nvme_block_device.cc
update ceph source to reef 18.1.2
[ceph.git] / ceph / src / crimson / os / seastore / random_block_manager / nvme_block_device.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <sys/mman.h>
5 #include <string.h>
6
7 #include <fcntl.h>
8
9 #include "crimson/common/log.h"
10 #include "crimson/common/errorator-loop.h"
11
12 #include "include/buffer.h"
13 #include "rbm_device.h"
14 #include "nvme_block_device.h"
15 #include "block_rb_manager.h"
16
17 namespace {
18 seastar::logger& logger() {
19 return crimson::get_logger(ceph_subsys_seastore_tm);
20 }
21 }
22
23 namespace crimson::os::seastore::random_block_device::nvme {
24
25 open_ertr::future<> NVMeBlockDevice::open(
26 const std::string &in_path,
27 seastar::open_flags mode) {
28 return seastar::do_with(in_path, [this, mode](auto& in_path) {
29 return seastar::file_stat(in_path).then([this, mode, in_path](auto stat) {
30 return seastar::open_file_dma(in_path, mode).then([=, this](auto file) {
31 device = file;
32 logger().debug("open");
33 // Get SSD's features from identify_controller and namespace command.
34 // Do identify_controller first, and then identify_namespace.
35 return identify_controller(device).safe_then([this, in_path, mode](
36 auto id_controller_data) {
37 support_multistream = id_controller_data.oacs.support_directives;
38 if (support_multistream) {
39 stream_id_count = WRITE_LIFE_MAX;
40 }
41 awupf = id_controller_data.awupf + 1;
42 return identify_namespace(device).safe_then([this, in_path, mode] (
43 auto id_namespace_data) {
44 atomic_write_unit = awupf * super.block_size;
45 data_protection_type = id_namespace_data.dps.protection_type;
46 data_protection_enabled = (data_protection_type > 0);
47 if (id_namespace_data.nsfeat.opterf == 1){
48 // NPWG and NPWA is 0'based value
49 write_granularity = super.block_size * (id_namespace_data.npwg + 1);
50 write_alignment = super.block_size * (id_namespace_data.npwa + 1);
51 }
52 return open_for_io(in_path, mode);
53 });
54 }).handle_error(crimson::ct_error::input_output_error::handle([this, in_path, mode]{
55 logger().error("open: id ctrlr failed. open without ioctl");
56 return open_for_io(in_path, mode);
57 }), crimson::ct_error::pass_further_all{});
58 });
59 });
60 });
61 }
62
63 open_ertr::future<> NVMeBlockDevice::open_for_io(
64 const std::string& in_path,
65 seastar::open_flags mode) {
66 io_device.resize(stream_id_count);
67 return seastar::do_for_each(io_device, [=, this](auto &target_device) {
68 return seastar::open_file_dma(in_path, mode).then([this](
69 auto file) {
70 assert(io_device.size() > stream_index_to_open);
71 io_device[stream_index_to_open] = file;
72 return io_device[stream_index_to_open].fcntl(
73 F_SET_FILE_RW_HINT,
74 (uintptr_t)&stream_index_to_open).then([this](auto ret) {
75 stream_index_to_open++;
76 return seastar::now();
77 });
78 });
79 });
80 }
81
82 NVMeBlockDevice::mount_ret NVMeBlockDevice::mount()
83 {
84 logger().debug(" mount ");
85 return do_mount();
86 }
87
88 write_ertr::future<> NVMeBlockDevice::write(
89 uint64_t offset,
90 bufferptr &&bptr,
91 uint16_t stream) {
92 logger().debug(
93 "block: write offset {} len {}",
94 offset,
95 bptr.length());
96 auto length = bptr.length();
97
98 assert((length % super.block_size) == 0);
99 uint16_t supported_stream = stream;
100 if (stream >= stream_id_count) {
101 supported_stream = WRITE_LIFE_NOT_SET;
102 }
103 return seastar::do_with(
104 std::move(bptr),
105 [this, offset, length, supported_stream] (auto& bptr) {
106 return io_device[supported_stream].dma_write(
107 offset, bptr.c_str(), length).handle_exception(
108 [](auto e) -> write_ertr::future<size_t> {
109 logger().error("write: dma_write got error{}", e);
110 return crimson::ct_error::input_output_error::make();
111 }).then([length](auto result) -> write_ertr::future<> {
112 if (result != length) {
113 logger().error("write: dma_write got error with not proper length");
114 return crimson::ct_error::input_output_error::make();
115 }
116 return write_ertr::now();
117 });
118 });
119 }
120
121 read_ertr::future<> NVMeBlockDevice::read(
122 uint64_t offset,
123 bufferptr &bptr) {
124 logger().debug(
125 "block: read offset {} len {}",
126 offset,
127 bptr.length());
128 auto length = bptr.length();
129
130 assert((length % super.block_size) == 0);
131
132 return device.dma_read(offset, bptr.c_str(), length).handle_exception(
133 [](auto e) -> read_ertr::future<size_t> {
134 logger().error("read: dma_read got error{}", e);
135 return crimson::ct_error::input_output_error::make();
136 }).then([length](auto result) -> read_ertr::future<> {
137 if (result != length) {
138 logger().error("read: dma_read got error with not proper length");
139 return crimson::ct_error::input_output_error::make();
140 }
141 return read_ertr::now();
142 });
143 }
144
145 write_ertr::future<> NVMeBlockDevice::writev(
146 uint64_t offset,
147 ceph::bufferlist bl,
148 uint16_t stream) {
149 logger().debug(
150 "block: write offset {} len {}",
151 offset,
152 bl.length());
153
154 uint16_t supported_stream = stream;
155 if (stream >= stream_id_count) {
156 supported_stream = WRITE_LIFE_NOT_SET;
157 }
158 bl.rebuild_aligned(super.block_size);
159
160 return seastar::do_with(
161 bl.prepare_iovs(),
162 std::move(bl),
163 [this, supported_stream, offset](auto& iovs, auto& bl)
164 {
165 return write_ertr::parallel_for_each(
166 iovs,
167 [this, supported_stream, offset](auto& p) mutable
168 {
169 auto off = offset + p.offset;
170 auto len = p.length;
171 auto& iov = p.iov;
172 return io_device[supported_stream].dma_write(off, std::move(iov)
173 ).handle_exception(
174 [this, off, len](auto e) -> write_ertr::future<size_t>
175 {
176 logger().error("{} poffset={}~{} dma_write got error -- {}",
177 device_id_printer_t{get_device_id()}, off, len, e);
178 return crimson::ct_error::input_output_error::make();
179 }).then([this, off, len](size_t written) -> write_ertr::future<> {
180 if (written != len) {
181 logger().error("{} poffset={}~{} dma_write len={} inconsistent",
182 device_id_printer_t{get_device_id()}, off, len, written);
183 return crimson::ct_error::input_output_error::make();
184 }
185 return write_ertr::now();
186 });
187 });
188 });
189 }
190
191 Device::close_ertr::future<> NVMeBlockDevice::close() {
192 logger().debug(" close ");
193 stream_index_to_open = WRITE_LIFE_NOT_SET;
194 return device.close().then([this]() {
195 return seastar::do_for_each(io_device, [](auto target_device) {
196 return target_device.close();
197 });
198 });
199 }
200
201 nvme_command_ertr::future<nvme_identify_controller_data_t>
202 NVMeBlockDevice::identify_controller(seastar::file f) {
203 return seastar::do_with(
204 nvme_admin_command_t(),
205 nvme_identify_controller_data_t(),
206 [this, f](auto &admin_command, auto &data) {
207 admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY;
208 admin_command.common.addr = (uint64_t)&data;
209 admin_command.common.data_len = sizeof(data);
210 admin_command.identify.cns = nvme_identify_command_t::CNS_CONTROLLER;
211
212 return pass_admin(admin_command, f).safe_then([&data](auto status) {
213 return seastar::make_ready_future<nvme_identify_controller_data_t>(
214 std::move(data));
215 });
216 });
217 }
218
219 discard_ertr::future<> NVMeBlockDevice::discard(uint64_t offset, uint64_t len) {
220 return device.discard(offset, len);
221 }
222
223 nvme_command_ertr::future<nvme_identify_namespace_data_t>
224 NVMeBlockDevice::identify_namespace(seastar::file f) {
225 return get_nsid(f).safe_then([this, f](auto nsid) {
226 return seastar::do_with(
227 nvme_admin_command_t(),
228 nvme_identify_namespace_data_t(),
229 [this, nsid, f](auto &admin_command, auto &data) {
230 admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY;
231 admin_command.common.addr = (uint64_t)&data;
232 admin_command.common.data_len = sizeof(data);
233 admin_command.common.nsid = nsid;
234 admin_command.identify.cns = nvme_identify_command_t::CNS_NAMESPACE;
235
236 return pass_admin(admin_command, f).safe_then([&data](auto status){
237 return seastar::make_ready_future<nvme_identify_namespace_data_t>(
238 std::move(data));
239 });
240 });
241 });
242 }
243
244 nvme_command_ertr::future<int> NVMeBlockDevice::get_nsid(seastar::file f) {
245 return f.ioctl(NVME_IOCTL_ID, nullptr).handle_exception(
246 [](auto e)->nvme_command_ertr::future<int> {
247 logger().error("pass_admin: ioctl failed");
248 return crimson::ct_error::input_output_error::make();
249 });
250 }
251
252 nvme_command_ertr::future<int> NVMeBlockDevice::pass_admin(
253 nvme_admin_command_t& admin_cmd, seastar::file f) {
254 return f.ioctl(NVME_IOCTL_ADMIN_CMD, &admin_cmd).handle_exception(
255 [](auto e)->nvme_command_ertr::future<int> {
256 logger().error("pass_admin: ioctl failed");
257 return crimson::ct_error::input_output_error::make();
258 });
259 }
260
261 nvme_command_ertr::future<int> NVMeBlockDevice::pass_through_io(
262 nvme_io_command_t& io_cmd) {
263 return device.ioctl(NVME_IOCTL_IO_CMD, &io_cmd);
264 }
265
266 }