1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
9 #include "crimson/common/log.h"
10 #include "crimson/common/errorator-loop.h"
12 #include "include/buffer.h"
13 #include "rbm_device.h"
14 #include "nvme_block_device.h"
15 #include "block_rb_manager.h"
18 seastar::logger
& logger() {
19 return crimson::get_logger(ceph_subsys_seastore_tm
);
23 namespace crimson::os::seastore::random_block_device::nvme
{
25 open_ertr::future
<> NVMeBlockDevice::open(
26 const std::string
&in_path
,
27 seastar::open_flags mode
) {
28 return seastar::do_with(in_path
, [this, mode
](auto& in_path
) {
29 return seastar::file_stat(in_path
).then([this, mode
, in_path
](auto stat
) {
30 return seastar::open_file_dma(in_path
, mode
).then([=, this](auto file
) {
32 logger().debug("open");
33 // Get SSD's features from identify_controller and namespace command.
34 // Do identify_controller first, and then identify_namespace.
35 return identify_controller(device
).safe_then([this, in_path
, mode
](
36 auto id_controller_data
) {
37 support_multistream
= id_controller_data
.oacs
.support_directives
;
38 if (support_multistream
) {
39 stream_id_count
= WRITE_LIFE_MAX
;
41 awupf
= id_controller_data
.awupf
+ 1;
42 return identify_namespace(device
).safe_then([this, in_path
, mode
] (
43 auto id_namespace_data
) {
44 atomic_write_unit
= awupf
* super
.block_size
;
45 data_protection_type
= id_namespace_data
.dps
.protection_type
;
46 data_protection_enabled
= (data_protection_type
> 0);
47 if (id_namespace_data
.nsfeat
.opterf
== 1){
48 // NPWG and NPWA is 0'based value
49 write_granularity
= super
.block_size
* (id_namespace_data
.npwg
+ 1);
50 write_alignment
= super
.block_size
* (id_namespace_data
.npwa
+ 1);
52 return open_for_io(in_path
, mode
);
54 }).handle_error(crimson::ct_error::input_output_error::handle([this, in_path
, mode
]{
55 logger().error("open: id ctrlr failed. open without ioctl");
56 return open_for_io(in_path
, mode
);
57 }), crimson::ct_error::pass_further_all
{});
63 open_ertr::future
<> NVMeBlockDevice::open_for_io(
64 const std::string
& in_path
,
65 seastar::open_flags mode
) {
66 io_device
.resize(stream_id_count
);
67 return seastar::do_for_each(io_device
, [=, this](auto &target_device
) {
68 return seastar::open_file_dma(in_path
, mode
).then([this](
70 assert(io_device
.size() > stream_index_to_open
);
71 io_device
[stream_index_to_open
] = file
;
72 return io_device
[stream_index_to_open
].fcntl(
74 (uintptr_t)&stream_index_to_open
).then([this](auto ret
) {
75 stream_index_to_open
++;
76 return seastar::now();
82 NVMeBlockDevice::mount_ret
NVMeBlockDevice::mount()
84 logger().debug(" mount ");
88 write_ertr::future
<> NVMeBlockDevice::write(
93 "block: write offset {} len {}",
96 auto length
= bptr
.length();
98 assert((length
% super
.block_size
) == 0);
99 uint16_t supported_stream
= stream
;
100 if (stream
>= stream_id_count
) {
101 supported_stream
= WRITE_LIFE_NOT_SET
;
103 return seastar::do_with(
105 [this, offset
, length
, supported_stream
] (auto& bptr
) {
106 return io_device
[supported_stream
].dma_write(
107 offset
, bptr
.c_str(), length
).handle_exception(
108 [](auto e
) -> write_ertr::future
<size_t> {
109 logger().error("write: dma_write got error{}", e
);
110 return crimson::ct_error::input_output_error::make();
111 }).then([length
](auto result
) -> write_ertr::future
<> {
112 if (result
!= length
) {
113 logger().error("write: dma_write got error with not proper length");
114 return crimson::ct_error::input_output_error::make();
116 return write_ertr::now();
121 read_ertr::future
<> NVMeBlockDevice::read(
125 "block: read offset {} len {}",
128 auto length
= bptr
.length();
130 assert((length
% super
.block_size
) == 0);
132 return device
.dma_read(offset
, bptr
.c_str(), length
).handle_exception(
133 [](auto e
) -> read_ertr::future
<size_t> {
134 logger().error("read: dma_read got error{}", e
);
135 return crimson::ct_error::input_output_error::make();
136 }).then([length
](auto result
) -> read_ertr::future
<> {
137 if (result
!= length
) {
138 logger().error("read: dma_read got error with not proper length");
139 return crimson::ct_error::input_output_error::make();
141 return read_ertr::now();
145 write_ertr::future
<> NVMeBlockDevice::writev(
150 "block: write offset {} len {}",
154 uint16_t supported_stream
= stream
;
155 if (stream
>= stream_id_count
) {
156 supported_stream
= WRITE_LIFE_NOT_SET
;
158 bl
.rebuild_aligned(super
.block_size
);
160 return seastar::do_with(
163 [this, supported_stream
, offset
](auto& iovs
, auto& bl
)
165 return write_ertr::parallel_for_each(
167 [this, supported_stream
, offset
](auto& p
) mutable
169 auto off
= offset
+ p
.offset
;
172 return io_device
[supported_stream
].dma_write(off
, std::move(iov
)
174 [this, off
, len
](auto e
) -> write_ertr::future
<size_t>
176 logger().error("{} poffset={}~{} dma_write got error -- {}",
177 device_id_printer_t
{get_device_id()}, off
, len
, e
);
178 return crimson::ct_error::input_output_error::make();
179 }).then([this, off
, len
](size_t written
) -> write_ertr::future
<> {
180 if (written
!= len
) {
181 logger().error("{} poffset={}~{} dma_write len={} inconsistent",
182 device_id_printer_t
{get_device_id()}, off
, len
, written
);
183 return crimson::ct_error::input_output_error::make();
185 return write_ertr::now();
191 Device::close_ertr::future
<> NVMeBlockDevice::close() {
192 logger().debug(" close ");
193 stream_index_to_open
= WRITE_LIFE_NOT_SET
;
194 return device
.close().then([this]() {
195 return seastar::do_for_each(io_device
, [](auto target_device
) {
196 return target_device
.close();
201 nvme_command_ertr::future
<nvme_identify_controller_data_t
>
202 NVMeBlockDevice::identify_controller(seastar::file f
) {
203 return seastar::do_with(
204 nvme_admin_command_t(),
205 nvme_identify_controller_data_t(),
206 [this, f
](auto &admin_command
, auto &data
) {
207 admin_command
.common
.opcode
= nvme_admin_command_t::OPCODE_IDENTIFY
;
208 admin_command
.common
.addr
= (uint64_t)&data
;
209 admin_command
.common
.data_len
= sizeof(data
);
210 admin_command
.identify
.cns
= nvme_identify_command_t::CNS_CONTROLLER
;
212 return pass_admin(admin_command
, f
).safe_then([&data
](auto status
) {
213 return seastar::make_ready_future
<nvme_identify_controller_data_t
>(
219 discard_ertr::future
<> NVMeBlockDevice::discard(uint64_t offset
, uint64_t len
) {
220 return device
.discard(offset
, len
);
223 nvme_command_ertr::future
<nvme_identify_namespace_data_t
>
224 NVMeBlockDevice::identify_namespace(seastar::file f
) {
225 return get_nsid(f
).safe_then([this, f
](auto nsid
) {
226 return seastar::do_with(
227 nvme_admin_command_t(),
228 nvme_identify_namespace_data_t(),
229 [this, nsid
, f
](auto &admin_command
, auto &data
) {
230 admin_command
.common
.opcode
= nvme_admin_command_t::OPCODE_IDENTIFY
;
231 admin_command
.common
.addr
= (uint64_t)&data
;
232 admin_command
.common
.data_len
= sizeof(data
);
233 admin_command
.common
.nsid
= nsid
;
234 admin_command
.identify
.cns
= nvme_identify_command_t::CNS_NAMESPACE
;
236 return pass_admin(admin_command
, f
).safe_then([&data
](auto status
){
237 return seastar::make_ready_future
<nvme_identify_namespace_data_t
>(
244 nvme_command_ertr::future
<int> NVMeBlockDevice::get_nsid(seastar::file f
) {
245 return f
.ioctl(NVME_IOCTL_ID
, nullptr).handle_exception(
246 [](auto e
)->nvme_command_ertr::future
<int> {
247 logger().error("pass_admin: ioctl failed");
248 return crimson::ct_error::input_output_error::make();
252 nvme_command_ertr::future
<int> NVMeBlockDevice::pass_admin(
253 nvme_admin_command_t
& admin_cmd
, seastar::file f
) {
254 return f
.ioctl(NVME_IOCTL_ADMIN_CMD
, &admin_cmd
).handle_exception(
255 [](auto e
)->nvme_command_ertr::future
<int> {
256 logger().error("pass_admin: ioctl failed");
257 return crimson::ct_error::input_output_error::make();
261 nvme_command_ertr::future
<int> NVMeBlockDevice::pass_through_io(
262 nvme_io_command_t
& io_cmd
) {
263 return device
.ioctl(NVME_IOCTL_IO_CMD
, &io_cmd
);