]> git.proxmox.com Git - ceph.git/blob - ceph/src/crimson/os/seastore/random_block_manager/nvmedevice.cc
import quincy beta 17.1.0
[ceph.git] / ceph / src / crimson / os / seastore / random_block_manager / nvmedevice.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #include <sys/mman.h>
5 #include <string.h>
6
7 #include <fcntl.h>
8
9 #include "crimson/common/log.h"
10
11 #include "include/buffer.h"
12 #include "nvmedevice.h"
13
14 namespace {
15 seastar::logger& logger() {
16 return crimson::get_logger(ceph_subsys_seastore_tm);
17 }
18 }
19
20 namespace crimson::os::seastore::nvme_device {
21
22 open_ertr::future<> PosixNVMeDevice::open(
23 const std::string &in_path,
24 seastar::open_flags mode) {
25 return seastar::do_with(in_path, [this, mode](auto& in_path) {
26 return seastar::file_stat(in_path).then([this, mode, in_path](auto stat) {
27 size = stat.size;
28 return seastar::open_file_dma(in_path, mode).then([=](auto file) {
29 device = file;
30 logger().debug("open");
31 // Get SSD's features from identify_controller and namespace command.
32 // Do identify_controller first, and then identify_namespace.
33 return identify_controller().safe_then([this, in_path, mode](
34 auto id_controller_data) {
35 support_multistream = id_controller_data.oacs.support_directives;
36 if (support_multistream) {
37 stream_id_count = WRITE_LIFE_MAX;
38 }
39 awupf = id_controller_data.awupf + 1;
40 return identify_namespace().safe_then([this, in_path, mode] (
41 auto id_namespace_data) {
42 // LBA format provides LBA size which is power of 2. LBA is the
43 // minimum size of read and write.
44 block_size = (1 << id_namespace_data.lbaf0.lbads);
45 atomic_write_unit = awupf * block_size;
46 data_protection_type = id_namespace_data.dps.protection_type;
47 data_protection_enabled = (data_protection_type > 0);
48 if (id_namespace_data.nsfeat.opterf == 1){
49 // NPWG and NPWA is 0'based value
50 write_granularity = block_size * (id_namespace_data.npwg + 1);
51 write_alignment = block_size * (id_namespace_data.npwa + 1);
52 }
53 return open_for_io(in_path, mode);
54 });
55 }).handle_error(crimson::ct_error::input_output_error::handle([this, in_path, mode]{
56 logger().error("open: id ctrlr failed. open without ioctl");
57 return open_for_io(in_path, mode);
58 }), crimson::ct_error::pass_further_all{});
59 });
60 });
61 });
62 }
63
64 open_ertr::future<> PosixNVMeDevice::open_for_io(
65 const std::string& in_path,
66 seastar::open_flags mode) {
67 io_device.resize(stream_id_count);
68 return seastar::do_for_each(io_device, [=](auto &target_device) {
69 return seastar::open_file_dma(in_path, mode).then([this](
70 auto file) {
71 io_device[stream_index_to_open] = file;
72 return io_device[stream_index_to_open].fcntl(
73 F_SET_FILE_RW_HINT,
74 (uintptr_t)&stream_index_to_open).then([this](auto ret) {
75 stream_index_to_open++;
76 return seastar::now();
77 });
78 });
79 });
80 }
81
82 write_ertr::future<> PosixNVMeDevice::write(
83 uint64_t offset,
84 bufferptr &bptr,
85 uint16_t stream) {
86 logger().debug(
87 "block: write offset {} len {}",
88 offset,
89 bptr.length());
90 auto length = bptr.length();
91
92 assert((length % block_size) == 0);
93 uint16_t supported_stream = stream;
94 if (stream >= stream_id_count) {
95 supported_stream = WRITE_LIFE_NOT_SET;
96 }
97 return io_device[supported_stream].dma_write(
98 offset, bptr.c_str(), length).handle_exception(
99 [](auto e) -> write_ertr::future<size_t> {
100 logger().error("write: dma_write got error{}", e);
101 return crimson::ct_error::input_output_error::make();
102 }).then([length](auto result) -> write_ertr::future<> {
103 if (result != length) {
104 logger().error("write: dma_write got error with not proper length");
105 return crimson::ct_error::input_output_error::make();
106 }
107 return write_ertr::now();
108 });
109 }
110
111 read_ertr::future<> PosixNVMeDevice::read(
112 uint64_t offset,
113 bufferptr &bptr) {
114 logger().debug(
115 "block: read offset {} len {}",
116 offset,
117 bptr.length());
118 auto length = bptr.length();
119
120 assert((length % block_size) == 0);
121
122 return device.dma_read(offset, bptr.c_str(), length).handle_exception(
123 [](auto e) -> read_ertr::future<size_t> {
124 logger().error("read: dma_read got error{}", e);
125 return crimson::ct_error::input_output_error::make();
126 }).then([length](auto result) -> read_ertr::future<> {
127 if (result != length) {
128 logger().error("read: dma_read got error with not proper length");
129 return crimson::ct_error::input_output_error::make();
130 }
131 return read_ertr::now();
132 });
133 }
134
135 seastar::future<> PosixNVMeDevice::close() {
136 logger().debug(" close ");
137 return device.close().then([this]() {
138 return seastar::do_for_each(io_device, [](auto target_device) {
139 return target_device.close();
140 });
141 });
142 }
143
144 nvme_command_ertr::future<nvme_identify_controller_data_t>
145 PosixNVMeDevice::identify_controller() {
146 return seastar::do_with(
147 nvme_admin_command_t(),
148 nvme_identify_controller_data_t(),
149 [this](auto &admin_command, auto &data) {
150 admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY;
151 admin_command.common.addr = (uint64_t)&data;
152 admin_command.common.data_len = sizeof(data);
153 admin_command.identify.cns = nvme_identify_command_t::CNS_CONTROLLER;
154
155 return pass_admin(admin_command).safe_then([&data](auto status) {
156 return seastar::make_ready_future<nvme_identify_controller_data_t>(
157 std::move(data));
158 });
159 });
160 }
161
162 discard_ertr::future<> PosixNVMeDevice::discard(uint64_t offset, uint64_t len) {
163 return device.discard(offset, len);
164 }
165
166 nvme_command_ertr::future<nvme_identify_namespace_data_t>
167 PosixNVMeDevice::identify_namespace() {
168 return get_nsid().safe_then([this](auto nsid) {
169 return seastar::do_with(
170 nvme_admin_command_t(),
171 nvme_identify_namespace_data_t(),
172 [this, nsid](auto &admin_command, auto &data) {
173 admin_command.common.opcode = nvme_admin_command_t::OPCODE_IDENTIFY;
174 admin_command.common.addr = (uint64_t)&data;
175 admin_command.common.data_len = sizeof(data);
176 admin_command.common.nsid = nsid;
177 admin_command.identify.cns = nvme_identify_command_t::CNS_NAMESPACE;
178
179 return pass_admin(admin_command).safe_then([&data](auto status){
180 return seastar::make_ready_future<nvme_identify_namespace_data_t>(
181 std::move(data));
182 });
183 });
184 });
185 }
186
187 nvme_command_ertr::future<int> PosixNVMeDevice::get_nsid() {
188 return device.ioctl(NVME_IOCTL_ID, nullptr);
189 }
190
191 nvme_command_ertr::future<int> PosixNVMeDevice::pass_admin(
192 nvme_admin_command_t& admin_cmd) {
193 return device.ioctl(NVME_IOCTL_ADMIN_CMD, &admin_cmd).handle_exception(
194 [](auto e)->nvme_command_ertr::future<int> {
195 logger().error("pass_admin: ioctl failed");
196 return crimson::ct_error::input_output_error::make();
197 });
198 }
199
200 nvme_command_ertr::future<int> PosixNVMeDevice::pass_through_io(
201 nvme_io_command_t& io_cmd) {
202 return device.ioctl(NVME_IOCTL_IO_CMD, &io_cmd);
203 }
204
205 open_ertr::future<> TestMemory::open(
206 const std::string &in_path,
207 seastar::open_flags mode) {
208 if (buf) {
209 return open_ertr::now();
210 }
211
212 logger().debug(
213 "Initializing test memory device {}",
214 size);
215
216 void* addr = ::mmap(
217 nullptr,
218 size,
219 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
220 -1,
221 0);
222
223 buf = (char*)addr;
224
225 ::memset(buf, 0, size);
226 return open_ertr::now();
227 }
228
229 write_ertr::future<> TestMemory::write(
230 uint64_t offset,
231 bufferptr &bptr,
232 uint16_t stream) {
233 ceph_assert(buf);
234 logger().debug(
235 "TestMemory: write offset {} len {}",
236 offset,
237 bptr.length());
238
239 ::memcpy(buf + offset, bptr.c_str(), bptr.length());
240
241 return write_ertr::now();
242 }
243
244 read_ertr::future<> TestMemory::read(
245 uint64_t offset,
246 bufferptr &bptr) {
247 ceph_assert(buf);
248 logger().debug(
249 "TestMemory: read offset {} len {}",
250 offset,
251 bptr.length());
252
253 bptr.copy_in(0, bptr.length(), buf + offset);
254 return read_ertr::now();
255 }
256
257 seastar::future<> TestMemory::close() {
258 logger().debug(" close ");
259 return seastar::now();
260 }
261 }