2 * Ceph - scalable distributed file system
4 * Copyright (C) 2020 SUSE LINUX GmbH
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
13 #define dout_context g_ceph_context
14 #define dout_subsys ceph_subsys_rbd
16 #include "wnbd_handler.h"
18 #define _NTSCSI_USER_MODE_
22 #include <boost/thread/tss.hpp>
24 #include "common/debug.h"
25 #include "common/errno.h"
26 #include "common/safe_io.h"
27 #include "common/SubProcess.h"
28 #include "common/Formatter.h"
30 #include "global/global_context.h"
32 WnbdHandler::~WnbdHandler()
34 if (started
&& wnbd_disk
) {
35 dout(10) << __func__
<< ": terminating" << dendl
;
49 int WnbdHandler::wait()
52 if (started
&& wnbd_disk
) {
53 dout(10) << __func__
<< ": waiting" << dendl
;
55 err
= WnbdWaitDispatcher(wnbd_disk
);
57 derr
<< __func__
<< " failed waiting for dispatcher to stop: "
65 int WnbdAdminHook::call (std::string_view command
, const cmdmap_t
& cmdmap
,
69 if (command
== "wnbd stats") {
70 return m_handler
->dump_stats(f
);
75 int WnbdHandler::dump_stats(Formatter
*f
)
81 WNBD_USR_STATS stats
= { 0 };
82 DWORD err
= WnbdGetUserspaceStats(wnbd_disk
, &stats
);
84 derr
<< "Failed to retrieve WNBD userspace stats. Error: " << err
<< dendl
;
88 f
->open_object_section("stats");
89 f
->dump_int("TotalReceivedRequests", stats
.TotalReceivedRequests
);
90 f
->dump_int("TotalSubmittedRequests", stats
.TotalSubmittedRequests
);
91 f
->dump_int("TotalReceivedReplies", stats
.TotalReceivedReplies
);
92 f
->dump_int("UnsubmittedRequests", stats
.UnsubmittedRequests
);
93 f
->dump_int("PendingSubmittedRequests", stats
.PendingSubmittedRequests
);
94 f
->dump_int("PendingReplies", stats
.PendingReplies
);
95 f
->dump_int("ReadErrors", stats
.ReadErrors
);
96 f
->dump_int("WriteErrors", stats
.WriteErrors
);
97 f
->dump_int("FlushErrors", stats
.FlushErrors
);
98 f
->dump_int("UnmapErrors", stats
.UnmapErrors
);
99 f
->dump_int("InvalidRequests", stats
.InvalidRequests
);
100 f
->dump_int("TotalRWRequests", stats
.TotalRWRequests
);
101 f
->dump_int("TotalReadBlocks", stats
.TotalReadBlocks
);
102 f
->dump_int("TotalWrittenBlocks", stats
.TotalWrittenBlocks
);
108 void WnbdHandler::shutdown()
110 std::unique_lock l
{shutdown_lock
};
111 if (!terminated
&& wnbd_disk
) {
112 // We're requesting the disk to be removed but continue serving IO
113 // requests until the driver sends us the "Disconnect" event.
114 // TODO: expose PWNBD_REMOVE_OPTIONS, we're using the defaults ATM.
115 WnbdRemove(wnbd_disk
, NULL
);
121 void WnbdHandler::aio_callback(librbd::completion_t cb
, void *arg
)
123 librbd::RBD::AioCompletion
*aio_completion
=
124 reinterpret_cast<librbd::RBD::AioCompletion
*>(cb
);
126 WnbdHandler::IOContext
* ctx
= static_cast<WnbdHandler::IOContext
*>(arg
);
127 int ret
= aio_completion
->get_return_value();
129 dout(20) << __func__
<< ": " << *ctx
<< dendl
;
131 if (ret
== -EINVAL
) {
132 // if shrinking an image, a pagecache writeback might reference
133 // extents outside of the range of the new image extents
134 dout(0) << __func__
<< ": masking IO out-of-bounds error" << *ctx
<< dendl
;
140 ctx
->err_code
= -ret
;
141 // TODO: check the actual error.
142 ctx
->set_sense(SCSI_SENSE_MEDIUM_ERROR
,
143 SCSI_ADSENSE_UNRECOVERED_ERROR
);
144 } else if ((ctx
->req_type
== WnbdReqTypeRead
) &&
145 ret
< static_cast<int>(ctx
->req_size
)) {
146 int pad_byte_count
= static_cast<int> (ctx
->req_size
) - ret
;
147 ctx
->data
.append_zero(pad_byte_count
);
148 dout(20) << __func__
<< ": " << *ctx
<< ": Pad byte count: "
149 << pad_byte_count
<< dendl
;
156 *ctx
->handler
->reply_tpool
,
159 ctx
->handler
->send_io_response(ctx
);
162 aio_completion
->release();
165 void WnbdHandler::send_io_response(WnbdHandler::IOContext
*ctx
) {
166 std::unique_ptr
<WnbdHandler::IOContext
> pctx
{ctx
};
167 ceph_assert(WNBD_DEFAULT_MAX_TRANSFER_LENGTH
>= pctx
->data
.length());
169 WNBD_IO_RESPONSE wnbd_rsp
= {0};
170 wnbd_rsp
.RequestHandle
= pctx
->req_handle
;
171 wnbd_rsp
.RequestType
= pctx
->req_type
;
172 wnbd_rsp
.Status
= pctx
->wnbd_status
;
175 // Use TLS to store an overlapped structure so that we avoid
176 // recreating one each time we send a reply.
177 static boost::thread_specific_ptr
<OVERLAPPED
> overlapped_tls(
179 [](LPOVERLAPPED p_overlapped
)
181 if (p_overlapped
->hEvent
) {
182 CloseHandle(p_overlapped
->hEvent
);
187 LPOVERLAPPED overlapped
= overlapped_tls
.get();
190 overlapped
= new OVERLAPPED
{0};
191 HANDLE overlapped_evt
= CreateEventA(0, TRUE
, TRUE
, NULL
);
192 if (!overlapped_evt
) {
193 err
= GetLastError();
194 derr
<< "Could not create event. Error: " << err
<< dendl
;
198 overlapped
->hEvent
= overlapped_evt
;
199 overlapped_tls
.reset(overlapped
);
202 if (!ResetEvent(overlapped
->hEvent
)) {
203 err
= GetLastError();
204 derr
<< "Could not reset event. Error: " << err
<< dendl
;
208 err
= WnbdSendResponseEx(
209 pctx
->handler
->wnbd_disk
,
214 if (err
== ERROR_IO_PENDING
) {
215 DWORD returned_bytes
= 0;
217 // We've got ERROR_IO_PENDING, which means that the operation is in
218 // progress. We'll use GetOverlappedResult to wait for it to complete
219 // and then retrieve the result.
220 if (!GetOverlappedResult(pctx
->handler
->wnbd_disk
, overlapped
,
221 &returned_bytes
, TRUE
)) {
222 err
= GetLastError();
223 derr
<< "Could not send response. Request id: " << wnbd_rsp
.RequestHandle
224 << ". Error: " << err
<< dendl
;
229 void WnbdHandler::IOContext::set_sense(uint8_t sense_key
, uint8_t asc
, uint64_t info
)
231 WnbdSetSenseEx(&wnbd_status
, sense_key
, asc
, info
);
234 void WnbdHandler::IOContext::set_sense(uint8_t sense_key
, uint8_t asc
)
236 WnbdSetSense(&wnbd_status
, sense_key
, asc
);
239 void WnbdHandler::Read(
241 UINT64 RequestHandle
,
245 BOOLEAN ForceUnitAccess
)
247 WnbdHandler
* handler
= nullptr;
248 ceph_assert(!WnbdGetUserContext(Disk
, (PVOID
*)&handler
));
250 WnbdHandler::IOContext
* ctx
= new WnbdHandler::IOContext();
251 ctx
->handler
= handler
;
252 ctx
->req_handle
= RequestHandle
;
253 ctx
->req_type
= WnbdReqTypeRead
;
254 ctx
->req_size
= BlockCount
* handler
->block_size
;
255 ctx
->req_from
= BlockAddress
* handler
->block_size
;
256 ceph_assert(ctx
->req_size
<= WNBD_DEFAULT_MAX_TRANSFER_LENGTH
);
259 if (ForceUnitAccess
) {
260 op_flags
|= LIBRADOS_OP_FLAG_FADVISE_FUA
;
263 dout(20) << *ctx
<< ": start" << dendl
;
265 librbd::RBD::AioCompletion
*c
= new librbd::RBD::AioCompletion(ctx
, aio_callback
);
266 handler
->image
.aio_read2(ctx
->req_from
, ctx
->req_size
, ctx
->data
, c
, op_flags
);
268 dout(20) << *ctx
<< ": submitted" << dendl
;
271 void WnbdHandler::Write(
273 UINT64 RequestHandle
,
277 BOOLEAN ForceUnitAccess
)
279 WnbdHandler
* handler
= nullptr;
280 ceph_assert(!WnbdGetUserContext(Disk
, (PVOID
*)&handler
));
282 WnbdHandler::IOContext
* ctx
= new WnbdHandler::IOContext();
283 ctx
->handler
= handler
;
284 ctx
->req_handle
= RequestHandle
;
285 ctx
->req_type
= WnbdReqTypeWrite
;
286 ctx
->req_size
= BlockCount
* handler
->block_size
;
287 ctx
->req_from
= BlockAddress
* handler
->block_size
;
289 bufferptr
ptr((char*)Buffer
, ctx
->req_size
);
290 ctx
->data
.push_back(ptr
);
293 if (ForceUnitAccess
) {
294 op_flags
|= LIBRADOS_OP_FLAG_FADVISE_FUA
;
297 dout(20) << *ctx
<< ": start" << dendl
;
299 librbd::RBD::AioCompletion
*c
= new librbd::RBD::AioCompletion(ctx
, aio_callback
);
300 handler
->image
.aio_write2(ctx
->req_from
, ctx
->req_size
, ctx
->data
, c
, op_flags
);
302 dout(20) << *ctx
<< ": submitted" << dendl
;
305 void WnbdHandler::Flush(
307 UINT64 RequestHandle
,
311 WnbdHandler
* handler
= nullptr;
312 ceph_assert(!WnbdGetUserContext(Disk
, (PVOID
*)&handler
));
314 WnbdHandler::IOContext
* ctx
= new WnbdHandler::IOContext();
315 ctx
->handler
= handler
;
316 ctx
->req_handle
= RequestHandle
;
317 ctx
->req_type
= WnbdReqTypeFlush
;
318 ctx
->req_size
= BlockCount
* handler
->block_size
;
319 ctx
->req_from
= BlockAddress
* handler
->block_size
;
321 dout(20) << *ctx
<< ": start" << dendl
;
323 librbd::RBD::AioCompletion
*c
= new librbd::RBD::AioCompletion(ctx
, aio_callback
);
324 handler
->image
.aio_flush(c
);
326 dout(20) << *ctx
<< ": submitted" << dendl
;
329 void WnbdHandler::Unmap(
331 UINT64 RequestHandle
,
332 PWNBD_UNMAP_DESCRIPTOR Descriptors
,
335 WnbdHandler
* handler
= nullptr;
336 ceph_assert(!WnbdGetUserContext(Disk
, (PVOID
*)&handler
));
337 ceph_assert(1 == Count
);
339 WnbdHandler::IOContext
* ctx
= new WnbdHandler::IOContext();
340 ctx
->handler
= handler
;
341 ctx
->req_handle
= RequestHandle
;
342 ctx
->req_type
= WnbdReqTypeUnmap
;
343 ctx
->req_size
= Descriptors
[0].BlockCount
* handler
->block_size
;
344 ctx
->req_from
= Descriptors
[0].BlockAddress
* handler
->block_size
;
346 dout(20) << *ctx
<< ": start" << dendl
;
348 librbd::RBD::AioCompletion
*c
= new librbd::RBD::AioCompletion(ctx
, aio_callback
);
349 handler
->image
.aio_discard(ctx
->req_from
, ctx
->req_size
, c
);
351 dout(20) << *ctx
<< ": submitted" << dendl
;
354 void WnbdHandler::LogMessage(
355 WnbdLogLevel LogLevel
,
357 const char* FileName
,
359 const char* FunctionName
)
361 // We're already passing the log level to WNBD, so we'll use the highest
363 dout(0) << "libwnbd.dll!" << FunctionName
<< " "
364 << WnbdLogLevelToStr(LogLevel
) << " " << Message
<< dendl
;
368 int WnbdHandler::start()
371 WNBD_PROPERTIES wnbd_props
= {0};
373 instance_name
.copy(wnbd_props
.InstanceName
, sizeof(wnbd_props
.InstanceName
));
374 ceph_assert(strlen(RBD_WNBD_OWNER_NAME
) < WNBD_MAX_OWNER_LENGTH
);
375 strncpy(wnbd_props
.Owner
, RBD_WNBD_OWNER_NAME
, WNBD_MAX_OWNER_LENGTH
);
377 wnbd_props
.BlockCount
= block_count
;
378 wnbd_props
.BlockSize
= block_size
;
379 wnbd_props
.MaxUnmapDescCount
= 1;
381 wnbd_props
.Flags
.ReadOnly
= readonly
;
382 wnbd_props
.Flags
.UnmapSupported
= 1;
383 if (rbd_cache_enabled
) {
384 wnbd_props
.Flags
.FUASupported
= 1;
385 wnbd_props
.Flags
.FlushSupported
= 1;
388 err
= WnbdCreate(&wnbd_props
, &RbdWnbdInterface
, this, &wnbd_disk
);
394 err
= WnbdStartDispatcher(wnbd_disk
, io_req_workers
);
396 derr
<< "Could not start WNBD dispatcher. Error: " << err
<< dendl
;
403 std::ostream
&operator<<(std::ostream
&os
, const WnbdHandler::IOContext
&ctx
) {
405 os
<< "[" << std::hex
<< ctx
.req_handle
;
407 switch (ctx
.req_type
)
409 case WnbdReqTypeRead
:
412 case WnbdReqTypeWrite
:
415 case WnbdReqTypeFlush
:
418 case WnbdReqTypeUnmap
:
422 os
<< " UNKNOWN(" << ctx
.req_type
<< ") ";
426 os
<< ctx
.req_from
<< "~" << ctx
.req_size
<< " "
427 << std::dec
<< ntohl(ctx
.err_code
) << "]";