]> git.proxmox.com Git - ceph.git/blame - ceph/src/tools/rbd_wnbd/wnbd_handler.cc
import ceph quincy 17.2.6
[ceph.git] / ceph / src / tools / rbd_wnbd / wnbd_handler.cc
CommitLineData
f67539c2
TL
1/*
2 * Ceph - scalable distributed file system
3 *
4 * Copyright (C) 2020 SUSE LINUX GmbH
5 *
6 * This is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License version 2.1, as published by the Free Software
9 * Foundation. See file COPYING.
10 *
11 */
12
13#define dout_context g_ceph_context
14#define dout_subsys ceph_subsys_rbd
15
16#include "wnbd_handler.h"
17
18#define _NTSCSI_USER_MODE_
19#include <rpc.h>
20#include <ddk/scsi.h>
21
22#include <boost/thread/tss.hpp>
23
24#include "common/debug.h"
25#include "common/errno.h"
26#include "common/safe_io.h"
27#include "common/SubProcess.h"
28#include "common/Formatter.h"
29
30#include "global/global_context.h"
31
32WnbdHandler::~WnbdHandler()
33{
34 if (started && wnbd_disk) {
35 dout(10) << __func__ << ": terminating" << dendl;
36
37 shutdown();
38 reply_tpool->join();
39
40 WnbdClose(wnbd_disk);
41
42 started = false;
43
44 delete reply_tpool;
45 delete admin_hook;
46 }
47}
48
49int WnbdHandler::wait()
50{
51 int err = 0;
52 if (started && wnbd_disk) {
53 dout(10) << __func__ << ": waiting" << dendl;
54
55 err = WnbdWaitDispatcher(wnbd_disk);
56 if (err) {
57 derr << __func__ << " failed waiting for dispatcher to stop: "
58 << err << dendl;
59 }
60 }
61
62 return err;
63}
64
39ae355f
TL
65int WnbdAdminHook::call (
66 std::string_view command, const cmdmap_t& cmdmap,
67 const bufferlist&,
68 Formatter *f,
69 std::ostream& errss,
70 bufferlist& out)
71{
72 if (command == "wnbd stats") {
73 return m_handler->dump_stats(f);
f67539c2 74 }
39ae355f
TL
75 return -ENOSYS;
76}
f67539c2
TL
77
78int WnbdHandler::dump_stats(Formatter *f)
79{
80 if (!f) {
81 return -EINVAL;
82 }
83
84 WNBD_USR_STATS stats = { 0 };
85 DWORD err = WnbdGetUserspaceStats(wnbd_disk, &stats);
86 if (err) {
87 derr << "Failed to retrieve WNBD userspace stats. Error: " << err << dendl;
88 return -EINVAL;
89 }
90
91 f->open_object_section("stats");
92 f->dump_int("TotalReceivedRequests", stats.TotalReceivedRequests);
93 f->dump_int("TotalSubmittedRequests", stats.TotalSubmittedRequests);
94 f->dump_int("TotalReceivedReplies", stats.TotalReceivedReplies);
95 f->dump_int("UnsubmittedRequests", stats.UnsubmittedRequests);
96 f->dump_int("PendingSubmittedRequests", stats.PendingSubmittedRequests);
97 f->dump_int("PendingReplies", stats.PendingReplies);
98 f->dump_int("ReadErrors", stats.ReadErrors);
99 f->dump_int("WriteErrors", stats.WriteErrors);
100 f->dump_int("FlushErrors", stats.FlushErrors);
101 f->dump_int("UnmapErrors", stats.UnmapErrors);
102 f->dump_int("InvalidRequests", stats.InvalidRequests);
103 f->dump_int("TotalRWRequests", stats.TotalRWRequests);
104 f->dump_int("TotalReadBlocks", stats.TotalReadBlocks);
105 f->dump_int("TotalWrittenBlocks", stats.TotalWrittenBlocks);
106
107 f->close_section();
108 return 0;
109}
110
111void WnbdHandler::shutdown()
112{
113 std::unique_lock l{shutdown_lock};
114 if (!terminated && wnbd_disk) {
115 // We're requesting the disk to be removed but continue serving IO
116 // requests until the driver sends us the "Disconnect" event.
117 // TODO: expose PWNBD_REMOVE_OPTIONS, we're using the defaults ATM.
118 WnbdRemove(wnbd_disk, NULL);
119 wait();
120 terminated = true;
121 }
122}
123
124void WnbdHandler::aio_callback(librbd::completion_t cb, void *arg)
125{
126 librbd::RBD::AioCompletion *aio_completion =
127 reinterpret_cast<librbd::RBD::AioCompletion*>(cb);
128
129 WnbdHandler::IOContext* ctx = static_cast<WnbdHandler::IOContext*>(arg);
130 int ret = aio_completion->get_return_value();
131
132 dout(20) << __func__ << ": " << *ctx << dendl;
133
134 if (ret == -EINVAL) {
135 // if shrinking an image, a pagecache writeback might reference
136 // extents outside of the range of the new image extents
137 dout(0) << __func__ << ": masking IO out-of-bounds error" << *ctx << dendl;
138 ctx->data.clear();
139 ret = 0;
140 }
141
142 if (ret < 0) {
143 ctx->err_code = -ret;
144 // TODO: check the actual error.
145 ctx->set_sense(SCSI_SENSE_MEDIUM_ERROR,
146 SCSI_ADSENSE_UNRECOVERED_ERROR);
147 } else if ((ctx->req_type == WnbdReqTypeRead) &&
148 ret < static_cast<int>(ctx->req_size)) {
149 int pad_byte_count = static_cast<int> (ctx->req_size) - ret;
150 ctx->data.append_zero(pad_byte_count);
151 dout(20) << __func__ << ": " << *ctx << ": Pad byte count: "
152 << pad_byte_count << dendl;
153 ctx->err_code = 0;
154 } else {
155 ctx->err_code = 0;
156 }
157
158 boost::asio::post(
159 *ctx->handler->reply_tpool,
160 [&, ctx]()
161 {
162 ctx->handler->send_io_response(ctx);
163 });
164
165 aio_completion->release();
166}
167
168void WnbdHandler::send_io_response(WnbdHandler::IOContext *ctx) {
169 std::unique_ptr<WnbdHandler::IOContext> pctx{ctx};
170 ceph_assert(WNBD_DEFAULT_MAX_TRANSFER_LENGTH >= pctx->data.length());
171
172 WNBD_IO_RESPONSE wnbd_rsp = {0};
173 wnbd_rsp.RequestHandle = pctx->req_handle;
174 wnbd_rsp.RequestType = pctx->req_type;
175 wnbd_rsp.Status = pctx->wnbd_status;
176 int err = 0;
177
178 // Use TLS to store an overlapped structure so that we avoid
179 // recreating one each time we send a reply.
180 static boost::thread_specific_ptr<OVERLAPPED> overlapped_tls(
181 // Cleanup routine
182 [](LPOVERLAPPED p_overlapped)
183 {
184 if (p_overlapped->hEvent) {
185 CloseHandle(p_overlapped->hEvent);
186 }
187 delete p_overlapped;
188 });
189
190 LPOVERLAPPED overlapped = overlapped_tls.get();
191 if (!overlapped)
192 {
193 overlapped = new OVERLAPPED{0};
194 HANDLE overlapped_evt = CreateEventA(0, TRUE, TRUE, NULL);
195 if (!overlapped_evt) {
196 err = GetLastError();
197 derr << "Could not create event. Error: " << err << dendl;
198 return;
199 }
200
201 overlapped->hEvent = overlapped_evt;
202 overlapped_tls.reset(overlapped);
203 }
204
205 if (!ResetEvent(overlapped->hEvent)) {
206 err = GetLastError();
207 derr << "Could not reset event. Error: " << err << dendl;
208 return;
209 }
210
211 err = WnbdSendResponseEx(
212 pctx->handler->wnbd_disk,
213 &wnbd_rsp,
214 pctx->data.c_str(),
215 pctx->data.length(),
216 overlapped);
217 if (err == ERROR_IO_PENDING) {
218 DWORD returned_bytes = 0;
219 err = 0;
220 // We've got ERROR_IO_PENDING, which means that the operation is in
221 // progress. We'll use GetOverlappedResult to wait for it to complete
222 // and then retrieve the result.
223 if (!GetOverlappedResult(pctx->handler->wnbd_disk, overlapped,
224 &returned_bytes, TRUE)) {
225 err = GetLastError();
226 derr << "Could not send response. Request id: " << wnbd_rsp.RequestHandle
227 << ". Error: " << err << dendl;
228 }
229 }
230}
231
232void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc, uint64_t info)
233{
234 WnbdSetSenseEx(&wnbd_status, sense_key, asc, info);
235}
236
237void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc)
238{
239 WnbdSetSense(&wnbd_status, sense_key, asc);
240}
241
242void WnbdHandler::Read(
243 PWNBD_DISK Disk,
244 UINT64 RequestHandle,
245 PVOID Buffer,
246 UINT64 BlockAddress,
247 UINT32 BlockCount,
248 BOOLEAN ForceUnitAccess)
249{
250 WnbdHandler* handler = nullptr;
251 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
252
253 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
254 ctx->handler = handler;
255 ctx->req_handle = RequestHandle;
256 ctx->req_type = WnbdReqTypeRead;
257 ctx->req_size = BlockCount * handler->block_size;
258 ctx->req_from = BlockAddress * handler->block_size;
259 ceph_assert(ctx->req_size <= WNBD_DEFAULT_MAX_TRANSFER_LENGTH);
260
261 int op_flags = 0;
262 if (ForceUnitAccess) {
263 op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA;
264 }
265
266 dout(20) << *ctx << ": start" << dendl;
267
268 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
269 handler->image.aio_read2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags);
270
271 dout(20) << *ctx << ": submitted" << dendl;
272}
273
274void WnbdHandler::Write(
275 PWNBD_DISK Disk,
276 UINT64 RequestHandle,
277 PVOID Buffer,
278 UINT64 BlockAddress,
279 UINT32 BlockCount,
280 BOOLEAN ForceUnitAccess)
281{
282 WnbdHandler* handler = nullptr;
283 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
284
285 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
286 ctx->handler = handler;
287 ctx->req_handle = RequestHandle;
288 ctx->req_type = WnbdReqTypeWrite;
289 ctx->req_size = BlockCount * handler->block_size;
290 ctx->req_from = BlockAddress * handler->block_size;
291
292 bufferptr ptr((char*)Buffer, ctx->req_size);
293 ctx->data.push_back(ptr);
294
295 int op_flags = 0;
296 if (ForceUnitAccess) {
297 op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA;
298 }
299
300 dout(20) << *ctx << ": start" << dendl;
301
302 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
303 handler->image.aio_write2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags);
304
305 dout(20) << *ctx << ": submitted" << dendl;
306}
307
308void WnbdHandler::Flush(
309 PWNBD_DISK Disk,
310 UINT64 RequestHandle,
311 UINT64 BlockAddress,
312 UINT32 BlockCount)
313{
314 WnbdHandler* handler = nullptr;
315 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
316
317 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
318 ctx->handler = handler;
319 ctx->req_handle = RequestHandle;
320 ctx->req_type = WnbdReqTypeFlush;
321 ctx->req_size = BlockCount * handler->block_size;
322 ctx->req_from = BlockAddress * handler->block_size;
323
324 dout(20) << *ctx << ": start" << dendl;
325
326 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
327 handler->image.aio_flush(c);
328
329 dout(20) << *ctx << ": submitted" << dendl;
330}
331
332void WnbdHandler::Unmap(
333 PWNBD_DISK Disk,
334 UINT64 RequestHandle,
335 PWNBD_UNMAP_DESCRIPTOR Descriptors,
336 UINT32 Count)
337{
338 WnbdHandler* handler = nullptr;
339 ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler));
340 ceph_assert(1 == Count);
341
342 WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext();
343 ctx->handler = handler;
344 ctx->req_handle = RequestHandle;
345 ctx->req_type = WnbdReqTypeUnmap;
346 ctx->req_size = Descriptors[0].BlockCount * handler->block_size;
347 ctx->req_from = Descriptors[0].BlockAddress * handler->block_size;
348
349 dout(20) << *ctx << ": start" << dendl;
350
351 librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback);
352 handler->image.aio_discard(ctx->req_from, ctx->req_size, c);
353
354 dout(20) << *ctx << ": submitted" << dendl;
355}
356
357void WnbdHandler::LogMessage(
358 WnbdLogLevel LogLevel,
359 const char* Message,
360 const char* FileName,
361 UINT32 Line,
362 const char* FunctionName)
363{
364 // We're already passing the log level to WNBD, so we'll use the highest
365 // log level here.
366 dout(0) << "libwnbd.dll!" << FunctionName << " "
367 << WnbdLogLevelToStr(LogLevel) << " " << Message << dendl;
368}
369
370
371int WnbdHandler::start()
372{
373 int err = 0;
374 WNBD_PROPERTIES wnbd_props = {0};
375
376 instance_name.copy(wnbd_props.InstanceName, sizeof(wnbd_props.InstanceName));
377 ceph_assert(strlen(RBD_WNBD_OWNER_NAME) < WNBD_MAX_OWNER_LENGTH);
378 strncpy(wnbd_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH);
379
380 wnbd_props.BlockCount = block_count;
381 wnbd_props.BlockSize = block_size;
382 wnbd_props.MaxUnmapDescCount = 1;
383
384 wnbd_props.Flags.ReadOnly = readonly;
385 wnbd_props.Flags.UnmapSupported = 1;
386 if (rbd_cache_enabled) {
387 wnbd_props.Flags.FUASupported = 1;
388 wnbd_props.Flags.FlushSupported = 1;
389 }
390
391 err = WnbdCreate(&wnbd_props, &RbdWnbdInterface, this, &wnbd_disk);
392 if (err)
393 goto exit;
394
395 started = true;
396
397 err = WnbdStartDispatcher(wnbd_disk, io_req_workers);
398 if (err) {
399 derr << "Could not start WNBD dispatcher. Error: " << err << dendl;
400 }
401
402exit:
403 return err;
404}
405
406std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx) {
407
408 os << "[" << std::hex << ctx.req_handle;
409
410 switch (ctx.req_type)
411 {
412 case WnbdReqTypeRead:
413 os << " READ ";
414 break;
415 case WnbdReqTypeWrite:
416 os << " WRITE ";
417 break;
418 case WnbdReqTypeFlush:
419 os << " FLUSH ";
420 break;
421 case WnbdReqTypeUnmap:
422 os << " TRIM ";
423 break;
424 default:
425 os << " UNKNOWN(" << ctx.req_type << ") ";
426 break;
427 }
428
429 os << ctx.req_from << "~" << ctx.req_size << " "
430 << std::dec << ntohl(ctx.err_code) << "]";
431
432 return os;
433}