]> git.proxmox.com Git - ceph.git/blob - ceph/src/tools/rbd_wnbd/wnbd_handler.cc
ecfa47240f2db44186e0d24538f704b5095eaa35
[ceph.git] / ceph / src / tools / rbd_wnbd / wnbd_handler.cc
1 /*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2020 SUSE LINUX GmbH
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13 #define dout_context g_ceph_context
14 #define dout_subsys ceph_subsys_rbd
15
16 #include "wnbd_handler.h"
17
18 #define _NTSCSI_USER_MODE_
19 #include <rpc.h>
20 #include <ddk/scsi.h>
21
22 #include <boost/thread/tss.hpp>
23
24 #include "common/debug.h"
25 #include "common/errno.h"
26 #include "common/safe_io.h"
27 #include "common/SubProcess.h"
28 #include "common/Formatter.h"
29
30 #include "global/global_context.h"
31
32 WnbdHandler::~WnbdHandler()
33 {
34 if (started && wnbd_disk) {
35 dout(10) << __func__ << ": terminating" << dendl;
36
37 shutdown();
38 reply_tpool->join();
39
40 WnbdClose(wnbd_disk);
41
42 started = false;
43
44 delete reply_tpool;
45 delete admin_hook;
46 }
47 }
48
49 int WnbdHandler::wait()
50 {
51 int err = 0;
52 if (started && wnbd_disk) {
53 dout(10) << __func__ << ": waiting" << dendl;
54
55 err = WnbdWaitDispatcher(wnbd_disk);
56 if (err) {
57 derr << __func__ << " failed waiting for dispatcher to stop: "
58 << err << dendl;
59 }
60 }
61
62 return err;
63 }
64
65 int WnbdAdminHook::call (std::string_view command, const cmdmap_t& cmdmap,
66 Formatter *f,
67 std::ostream& errss,
68 bufferlist& out) {
69 if (command == "wnbd stats") {
70 return m_handler->dump_stats(f);
71 }
72 return -ENOSYS;
73 }
74
75 int WnbdHandler::dump_stats(Formatter *f)
76 {
77 if (!f) {
78 return -EINVAL;
79 }
80
81 WNBD_USR_STATS stats = { 0 };
82 DWORD err = WnbdGetUserspaceStats(wnbd_disk, &stats);
83 if (err) {
84 derr << "Failed to retrieve WNBD userspace stats. Error: " << err << dendl;
85 return -EINVAL;
86 }
87
88 f->open_object_section("stats");
89 f->dump_int("TotalReceivedRequests", stats.TotalReceivedRequests);
90 f->dump_int("TotalSubmittedRequests", stats.TotalSubmittedRequests);
91 f->dump_int("TotalReceivedReplies", stats.TotalReceivedReplies);
92 f->dump_int("UnsubmittedRequests", stats.UnsubmittedRequests);
93 f->dump_int("PendingSubmittedRequests", stats.PendingSubmittedRequests);
94 f->dump_int("PendingReplies", stats.PendingReplies);
95 f->dump_int("ReadErrors", stats.ReadErrors);
96 f->dump_int("WriteErrors", stats.WriteErrors);
97 f->dump_int("FlushErrors", stats.FlushErrors);
98 f->dump_int("UnmapErrors", stats.UnmapErrors);
99 f->dump_int("InvalidRequests", stats.InvalidRequests);
100 f->dump_int("TotalRWRequests", stats.TotalRWRequests);
101 f->dump_int("TotalReadBlocks", stats.TotalReadBlocks);
102 f->dump_int("TotalWrittenBlocks", stats.TotalWrittenBlocks);
103
104 f->close_section();
105 return 0;
106 }
107
108 void WnbdHandler::shutdown()
109 {
110 std::unique_lock l{shutdown_lock};
111 if (!terminated && wnbd_disk) {
112 // We're requesting the disk to be removed but continue serving IO
113 // requests until the driver sends us the "Disconnect" event.
114 // TODO: expose PWNBD_REMOVE_OPTIONS, we're using the defaults ATM.
115 WnbdRemove(wnbd_disk, NULL);
116 wait();
117 terminated = true;
118 }
119 }
120
121 void WnbdHandler::aio_callback(librbd::completion_t cb, void *arg)
122 {
123 librbd::RBD::AioCompletion *aio_completion =
124 reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
125
126 WnbdHandler::IOContext* ctx = static_cast<WnbdHandler::IOContext*>(arg);
127 int ret = aio_completion->get_return_value();
128
129 dout(20) << __func__ << ": " << *ctx << dendl;
130
131 if (ret == -EINVAL) {
132 // if shrinking an image, a pagecache writeback might reference
133 // extents outside of the range of the new image extents
134 dout(0) << __func__ << ": masking IO out-of-bounds error" << *ctx << dendl;
135 ctx->data.clear();
136 ret = 0;
137 }
138
139 if (ret < 0) {
140 ctx->err_code = -ret;
141 // TODO: check the actual error.
142 ctx->set_sense(SCSI_SENSE_MEDIUM_ERROR,
143 SCSI_ADSENSE_UNRECOVERED_ERROR);
144 } else if ((ctx->req_type == WnbdReqTypeRead) &&
145 ret < static_cast<int>(ctx->req_size)) {
146 int pad_byte_count = static_cast<int> (ctx->req_size) - ret;
147 ctx->data.append_zero(pad_byte_count);
148 dout(20) << __func__ << ": " << *ctx << ": Pad byte count: "
149 << pad_byte_count << dendl;
150 ctx->err_code = 0;
151 } else {
152 ctx->err_code = 0;
153 }
154
155 boost::asio::post(
156 *ctx->handler->reply_tpool,
157 [&, ctx]()
158 {
159 ctx->handler->send_io_response(ctx);
160 });
161
162 aio_completion->release();
163 }
164
165 void WnbdHandler::send_io_response(WnbdHandler::IOContext *ctx) {
166 std::unique_ptr<WnbdHandler::IOContext> pctx{ctx};
167 ceph_assert(WNBD_DEFAULT_MAX_TRANSFER_LENGTH >= pctx->data.length());
168
169 WNBD_IO_RESPONSE wnbd_rsp = {0};
170 wnbd_rsp.RequestHandle = pctx->req_handle;
171 wnbd_rsp.RequestType = pctx->req_type;
172 wnbd_rsp.Status = pctx->wnbd_status;
173 int err = 0;
174
175 // Use TLS to store an overlapped structure so that we avoid
176 // recreating one each time we send a reply.
177 static boost::thread_specific_ptr<OVERLAPPED> overlapped_tls(
178 // Cleanup routine
179 [](LPOVERLAPPED p_overlapped)
180 {
181 if (p_overlapped->hEvent) {
182 CloseHandle(p_overlapped->hEvent);
183 }
184 delete p_overlapped;
185 });
186
187 LPOVERLAPPED overlapped = overlapped_tls.get();
188 if (!overlapped)
189 {
190 overlapped = new OVERLAPPED{0};
191 HANDLE overlapped_evt = CreateEventA(0, TRUE, TRUE, NULL);
192 if (!overlapped_evt) {
193 err = GetLastError();
194 derr << "Could not create event. Error: " << err << dendl;
195 return;
196 }
197
198 overlapped->hEvent = overlapped_evt;
199 overlapped_tls.reset(overlapped);
200 }
201
202 if (!ResetEvent(overlapped->hEvent)) {
203 err = GetLastError();
204 derr << "Could not reset event. Error: " << err << dendl;
205 return;
206 }
207
208 err = WnbdSendResponseEx(
209 pctx->handler->wnbd_disk,
210 &wnbd_rsp,
211 pctx->data.c_str(),
212 pctx->data.length(),
213 overlapped);
214 if (err == ERROR_IO_PENDING) {
215 DWORD returned_bytes = 0;
216 err = 0;
217 // We've got ERROR_IO_PENDING, which means that the operation is in
218 // progress. We'll use GetOverlappedResult to wait for it to complete
219 // and then retrieve the result.
220 if (!GetOverlappedResult(pctx->handler->wnbd_disk, overlapped,
221 &returned_bytes, TRUE)) {
222 err = GetLastError();
223 derr << "Could not send response. Request id: " << wnbd_rsp.RequestHandle
224 << ". Error: " << err << dendl;
225 }
226 }
227 }
228
229 void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc, uint64_t info)
230 {
231 WnbdSetSenseEx(&wnbd_status, sense_key, asc, info);
232 }
233
234 void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc)
235 {
236 WnbdSetSense(&wnbd_status, sense_key, asc);
237 }
238
239 void WnbdHandler::Read(
240 PWNBD_DISK Disk,
241 UINT64 RequestHandle,
242 PVOID Buffer,
243 UINT64 BlockAddress,
244 UINT32 BlockCount,
245 BOOLEAN ForceUnitAccess)
246 {
247 WnbdHandler* handler = nullptr;
248 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
249
250 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
251 ctx->handler = handler;
252 ctx->req_handle = RequestHandle;
253 ctx->req_type = WnbdReqTypeRead;
254 ctx->req_size = BlockCount * handler->block_size;
255 ctx->req_from = BlockAddress * handler->block_size;
256 ceph_assert(ctx->req_size <= WNBD_DEFAULT_MAX_TRANSFER_LENGTH);
257
258 int op_flags = 0;
259 if (ForceUnitAccess) {
260 op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA;
261 }
262
263 dout(20) << *ctx << ": start" << dendl;
264
265 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
266 handler->image.aio_read2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags);
267
268 dout(20) << *ctx << ": submitted" << dendl;
269 }
270
271 void WnbdHandler::Write(
272 PWNBD_DISK Disk,
273 UINT64 RequestHandle,
274 PVOID Buffer,
275 UINT64 BlockAddress,
276 UINT32 BlockCount,
277 BOOLEAN ForceUnitAccess)
278 {
279 WnbdHandler* handler = nullptr;
280 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
281
282 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
283 ctx->handler = handler;
284 ctx->req_handle = RequestHandle;
285 ctx->req_type = WnbdReqTypeWrite;
286 ctx->req_size = BlockCount * handler->block_size;
287 ctx->req_from = BlockAddress * handler->block_size;
288
289 bufferptr ptr((char*)Buffer, ctx->req_size);
290 ctx->data.push_back(ptr);
291
292 int op_flags = 0;
293 if (ForceUnitAccess) {
294 op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA;
295 }
296
297 dout(20) << *ctx << ": start" << dendl;
298
299 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
300 handler->image.aio_write2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags);
301
302 dout(20) << *ctx << ": submitted" << dendl;
303 }
304
305 void WnbdHandler::Flush(
306 PWNBD_DISK Disk,
307 UINT64 RequestHandle,
308 UINT64 BlockAddress,
309 UINT32 BlockCount)
310 {
311 WnbdHandler* handler = nullptr;
312 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
313
314 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
315 ctx->handler = handler;
316 ctx->req_handle = RequestHandle;
317 ctx->req_type = WnbdReqTypeFlush;
318 ctx->req_size = BlockCount * handler->block_size;
319 ctx->req_from = BlockAddress * handler->block_size;
320
321 dout(20) << *ctx << ": start" << dendl;
322
323 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
324 handler->image.aio_flush(c);
325
326 dout(20) << *ctx << ": submitted" << dendl;
327 }
328
329 void WnbdHandler::Unmap(
330 PWNBD_DISK Disk,
331 UINT64 RequestHandle,
332 PWNBD_UNMAP_DESCRIPTOR Descriptors,
333 UINT32 Count)
334 {
335 WnbdHandler* handler = nullptr;
336 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
337 ceph_assert(1 == Count);
338
339 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
340 ctx->handler = handler;
341 ctx->req_handle = RequestHandle;
342 ctx->req_type = WnbdReqTypeUnmap;
343 ctx->req_size = Descriptors[0].BlockCount * handler->block_size;
344 ctx->req_from = Descriptors[0].BlockAddress * handler->block_size;
345
346 dout(20) << *ctx << ": start" << dendl;
347
348 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
349 handler->image.aio_discard(ctx->req_from, ctx->req_size, c);
350
351 dout(20) << *ctx << ": submitted" << dendl;
352 }
353
354 void WnbdHandler::LogMessage(
355 WnbdLogLevel LogLevel,
356 const char* Message,
357 const char* FileName,
358 UINT32 Line,
359 const char* FunctionName)
360 {
361 // We're already passing the log level to WNBD, so we'll use the highest
362 // log level here.
363 dout(0) << "libwnbd.dll!" << FunctionName << " "
364 << WnbdLogLevelToStr(LogLevel) << " " << Message << dendl;
365 }
366
367
368 int WnbdHandler::start()
369 {
370 int err = 0;
371 WNBD_PROPERTIES wnbd_props = {0};
372
373 instance_name.copy(wnbd_props.InstanceName, sizeof(wnbd_props.InstanceName));
374 ceph_assert(strlen(RBD_WNBD_OWNER_NAME) < WNBD_MAX_OWNER_LENGTH);
375 strncpy(wnbd_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH);
376
377 wnbd_props.BlockCount = block_count;
378 wnbd_props.BlockSize = block_size;
379 wnbd_props.MaxUnmapDescCount = 1;
380
381 wnbd_props.Flags.ReadOnly = readonly;
382 wnbd_props.Flags.UnmapSupported = 1;
383 if (rbd_cache_enabled) {
384 wnbd_props.Flags.FUASupported = 1;
385 wnbd_props.Flags.FlushSupported = 1;
386 }
387
388 err = WnbdCreate(&wnbd_props, &RbdWnbdInterface, this, &wnbd_disk);
389 if (err)
390 goto exit;
391
392 started = true;
393
394 err = WnbdStartDispatcher(wnbd_disk, io_req_workers);
395 if (err) {
396 derr << "Could not start WNBD dispatcher. Error: " << err << dendl;
397 }
398
399 exit:
400 return err;
401 }
402
403 std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx) {
404
405 os << "[" << std::hex << ctx.req_handle;
406
407 switch (ctx.req_type)
408 {
409 case WnbdReqTypeRead:
410 os << " READ ";
411 break;
412 case WnbdReqTypeWrite:
413 os << " WRITE ";
414 break;
415 case WnbdReqTypeFlush:
416 os << " FLUSH ";
417 break;
418 case WnbdReqTypeUnmap:
419 os << " TRIM ";
420 break;
421 default:
422 os << " UNKNOWN(" << ctx.req_type << ") ";
423 break;
424 }
425
426 os << ctx.req_from << "~" << ctx.req_size << " "
427 << std::dec << ntohl(ctx.err_code) << "]";
428
429 return os;
430 }