]>
Commit | Line | Data |
---|---|---|
f67539c2 TL |
1 | /* |
2 | * Ceph - scalable distributed file system | |
3 | * | |
4 | * Copyright (C) 2020 SUSE LINUX GmbH | |
5 | * | |
6 | * This is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU Lesser General Public | |
8 | * License version 2.1, as published by the Free Software | |
9 | * Foundation. See file COPYING. | |
10 | * | |
11 | */ | |
12 | ||
13 | #define dout_context g_ceph_context | |
14 | #define dout_subsys ceph_subsys_rbd | |
15 | ||
16 | #include "wnbd_handler.h" | |
17 | ||
18 | #define _NTSCSI_USER_MODE_ | |
19 | #include <rpc.h> | |
20 | #include <ddk/scsi.h> | |
21 | ||
22 | #include <boost/thread/tss.hpp> | |
23 | ||
24 | #include "common/debug.h" | |
25 | #include "common/errno.h" | |
26 | #include "common/safe_io.h" | |
27 | #include "common/SubProcess.h" | |
28 | #include "common/Formatter.h" | |
29 | ||
30 | #include "global/global_context.h" | |
31 | ||
32 | WnbdHandler::~WnbdHandler() | |
33 | { | |
34 | if (started && wnbd_disk) { | |
35 | dout(10) << __func__ << ": terminating" << dendl; | |
36 | ||
37 | shutdown(); | |
38 | reply_tpool->join(); | |
39 | ||
40 | WnbdClose(wnbd_disk); | |
41 | ||
42 | started = false; | |
43 | ||
44 | delete reply_tpool; | |
45 | delete admin_hook; | |
46 | } | |
47 | } | |
48 | ||
49 | int WnbdHandler::wait() | |
50 | { | |
51 | int err = 0; | |
52 | if (started && wnbd_disk) { | |
53 | dout(10) << __func__ << ": waiting" << dendl; | |
54 | ||
55 | err = WnbdWaitDispatcher(wnbd_disk); | |
56 | if (err) { | |
57 | derr << __func__ << " failed waiting for dispatcher to stop: " | |
58 | << err << dendl; | |
59 | } | |
60 | } | |
61 | ||
62 | return err; | |
63 | } | |
64 | ||
39ae355f TL |
65 | int WnbdAdminHook::call ( |
66 | std::string_view command, const cmdmap_t& cmdmap, | |
67 | const bufferlist&, | |
68 | Formatter *f, | |
69 | std::ostream& errss, | |
70 | bufferlist& out) | |
71 | { | |
72 | if (command == "wnbd stats") { | |
73 | return m_handler->dump_stats(f); | |
f67539c2 | 74 | } |
39ae355f TL |
75 | return -ENOSYS; |
76 | } | |
f67539c2 TL |
77 | |
78 | int WnbdHandler::dump_stats(Formatter *f) | |
79 | { | |
80 | if (!f) { | |
81 | return -EINVAL; | |
82 | } | |
83 | ||
84 | WNBD_USR_STATS stats = { 0 }; | |
85 | DWORD err = WnbdGetUserspaceStats(wnbd_disk, &stats); | |
86 | if (err) { | |
87 | derr << "Failed to retrieve WNBD userspace stats. Error: " << err << dendl; | |
88 | return -EINVAL; | |
89 | } | |
90 | ||
91 | f->open_object_section("stats"); | |
92 | f->dump_int("TotalReceivedRequests", stats.TotalReceivedRequests); | |
93 | f->dump_int("TotalSubmittedRequests", stats.TotalSubmittedRequests); | |
94 | f->dump_int("TotalReceivedReplies", stats.TotalReceivedReplies); | |
95 | f->dump_int("UnsubmittedRequests", stats.UnsubmittedRequests); | |
96 | f->dump_int("PendingSubmittedRequests", stats.PendingSubmittedRequests); | |
97 | f->dump_int("PendingReplies", stats.PendingReplies); | |
98 | f->dump_int("ReadErrors", stats.ReadErrors); | |
99 | f->dump_int("WriteErrors", stats.WriteErrors); | |
100 | f->dump_int("FlushErrors", stats.FlushErrors); | |
101 | f->dump_int("UnmapErrors", stats.UnmapErrors); | |
102 | f->dump_int("InvalidRequests", stats.InvalidRequests); | |
103 | f->dump_int("TotalRWRequests", stats.TotalRWRequests); | |
104 | f->dump_int("TotalReadBlocks", stats.TotalReadBlocks); | |
105 | f->dump_int("TotalWrittenBlocks", stats.TotalWrittenBlocks); | |
106 | ||
107 | f->close_section(); | |
108 | return 0; | |
109 | } | |
110 | ||
111 | void WnbdHandler::shutdown() | |
112 | { | |
113 | std::unique_lock l{shutdown_lock}; | |
114 | if (!terminated && wnbd_disk) { | |
115 | // We're requesting the disk to be removed but continue serving IO | |
116 | // requests until the driver sends us the "Disconnect" event. | |
117 | // TODO: expose PWNBD_REMOVE_OPTIONS, we're using the defaults ATM. | |
118 | WnbdRemove(wnbd_disk, NULL); | |
119 | wait(); | |
120 | terminated = true; | |
121 | } | |
122 | } | |
123 | ||
124 | void WnbdHandler::aio_callback(librbd::completion_t cb, void *arg) | |
125 | { | |
126 | librbd::RBD::AioCompletion *aio_completion = | |
127 | reinterpret_cast<librbd::RBD::AioCompletion*>(cb); | |
128 | ||
129 | WnbdHandler::IOContext* ctx = static_cast<WnbdHandler::IOContext*>(arg); | |
130 | int ret = aio_completion->get_return_value(); | |
131 | ||
132 | dout(20) << __func__ << ": " << *ctx << dendl; | |
133 | ||
134 | if (ret == -EINVAL) { | |
135 | // if shrinking an image, a pagecache writeback might reference | |
136 | // extents outside of the range of the new image extents | |
137 | dout(0) << __func__ << ": masking IO out-of-bounds error" << *ctx << dendl; | |
138 | ctx->data.clear(); | |
139 | ret = 0; | |
140 | } | |
141 | ||
142 | if (ret < 0) { | |
143 | ctx->err_code = -ret; | |
144 | // TODO: check the actual error. | |
145 | ctx->set_sense(SCSI_SENSE_MEDIUM_ERROR, | |
146 | SCSI_ADSENSE_UNRECOVERED_ERROR); | |
147 | } else if ((ctx->req_type == WnbdReqTypeRead) && | |
148 | ret < static_cast<int>(ctx->req_size)) { | |
149 | int pad_byte_count = static_cast<int> (ctx->req_size) - ret; | |
150 | ctx->data.append_zero(pad_byte_count); | |
151 | dout(20) << __func__ << ": " << *ctx << ": Pad byte count: " | |
152 | << pad_byte_count << dendl; | |
153 | ctx->err_code = 0; | |
154 | } else { | |
155 | ctx->err_code = 0; | |
156 | } | |
157 | ||
158 | boost::asio::post( | |
159 | *ctx->handler->reply_tpool, | |
160 | [&, ctx]() | |
161 | { | |
162 | ctx->handler->send_io_response(ctx); | |
163 | }); | |
164 | ||
165 | aio_completion->release(); | |
166 | } | |
167 | ||
168 | void WnbdHandler::send_io_response(WnbdHandler::IOContext *ctx) { | |
169 | std::unique_ptr<WnbdHandler::IOContext> pctx{ctx}; | |
170 | ceph_assert(WNBD_DEFAULT_MAX_TRANSFER_LENGTH >= pctx->data.length()); | |
171 | ||
172 | WNBD_IO_RESPONSE wnbd_rsp = {0}; | |
173 | wnbd_rsp.RequestHandle = pctx->req_handle; | |
174 | wnbd_rsp.RequestType = pctx->req_type; | |
175 | wnbd_rsp.Status = pctx->wnbd_status; | |
176 | int err = 0; | |
177 | ||
178 | // Use TLS to store an overlapped structure so that we avoid | |
179 | // recreating one each time we send a reply. | |
180 | static boost::thread_specific_ptr<OVERLAPPED> overlapped_tls( | |
181 | // Cleanup routine | |
182 | [](LPOVERLAPPED p_overlapped) | |
183 | { | |
184 | if (p_overlapped->hEvent) { | |
185 | CloseHandle(p_overlapped->hEvent); | |
186 | } | |
187 | delete p_overlapped; | |
188 | }); | |
189 | ||
190 | LPOVERLAPPED overlapped = overlapped_tls.get(); | |
191 | if (!overlapped) | |
192 | { | |
193 | overlapped = new OVERLAPPED{0}; | |
194 | HANDLE overlapped_evt = CreateEventA(0, TRUE, TRUE, NULL); | |
195 | if (!overlapped_evt) { | |
196 | err = GetLastError(); | |
197 | derr << "Could not create event. Error: " << err << dendl; | |
198 | return; | |
199 | } | |
200 | ||
201 | overlapped->hEvent = overlapped_evt; | |
202 | overlapped_tls.reset(overlapped); | |
203 | } | |
204 | ||
205 | if (!ResetEvent(overlapped->hEvent)) { | |
206 | err = GetLastError(); | |
207 | derr << "Could not reset event. Error: " << err << dendl; | |
208 | return; | |
209 | } | |
210 | ||
211 | err = WnbdSendResponseEx( | |
212 | pctx->handler->wnbd_disk, | |
213 | &wnbd_rsp, | |
214 | pctx->data.c_str(), | |
215 | pctx->data.length(), | |
216 | overlapped); | |
217 | if (err == ERROR_IO_PENDING) { | |
218 | DWORD returned_bytes = 0; | |
219 | err = 0; | |
220 | // We've got ERROR_IO_PENDING, which means that the operation is in | |
221 | // progress. We'll use GetOverlappedResult to wait for it to complete | |
222 | // and then retrieve the result. | |
223 | if (!GetOverlappedResult(pctx->handler->wnbd_disk, overlapped, | |
224 | &returned_bytes, TRUE)) { | |
225 | err = GetLastError(); | |
226 | derr << "Could not send response. Request id: " << wnbd_rsp.RequestHandle | |
227 | << ". Error: " << err << dendl; | |
228 | } | |
229 | } | |
230 | } | |
231 | ||
232 | void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc, uint64_t info) | |
233 | { | |
234 | WnbdSetSenseEx(&wnbd_status, sense_key, asc, info); | |
235 | } | |
236 | ||
237 | void WnbdHandler::IOContext::set_sense(uint8_t sense_key, uint8_t asc) | |
238 | { | |
239 | WnbdSetSense(&wnbd_status, sense_key, asc); | |
240 | } | |
241 | ||
242 | void WnbdHandler::Read( | |
243 | PWNBD_DISK Disk, | |
244 | UINT64 RequestHandle, | |
245 | PVOID Buffer, | |
246 | UINT64 BlockAddress, | |
247 | UINT32 BlockCount, | |
248 | BOOLEAN ForceUnitAccess) | |
249 | { | |
250 | WnbdHandler* handler = nullptr; | |
251 | ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); | |
252 | ||
253 | WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); | |
254 | ctx->handler = handler; | |
255 | ctx->req_handle = RequestHandle; | |
256 | ctx->req_type = WnbdReqTypeRead; | |
257 | ctx->req_size = BlockCount * handler->block_size; | |
258 | ctx->req_from = BlockAddress * handler->block_size; | |
259 | ceph_assert(ctx->req_size <= WNBD_DEFAULT_MAX_TRANSFER_LENGTH); | |
260 | ||
261 | int op_flags = 0; | |
262 | if (ForceUnitAccess) { | |
263 | op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA; | |
264 | } | |
265 | ||
266 | dout(20) << *ctx << ": start" << dendl; | |
267 | ||
268 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); | |
269 | handler->image.aio_read2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags); | |
270 | ||
271 | dout(20) << *ctx << ": submitted" << dendl; | |
272 | } | |
273 | ||
274 | void WnbdHandler::Write( | |
275 | PWNBD_DISK Disk, | |
276 | UINT64 RequestHandle, | |
277 | PVOID Buffer, | |
278 | UINT64 BlockAddress, | |
279 | UINT32 BlockCount, | |
280 | BOOLEAN ForceUnitAccess) | |
281 | { | |
282 | WnbdHandler* handler = nullptr; | |
283 | ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); | |
284 | ||
285 | WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); | |
286 | ctx->handler = handler; | |
287 | ctx->req_handle = RequestHandle; | |
288 | ctx->req_type = WnbdReqTypeWrite; | |
289 | ctx->req_size = BlockCount * handler->block_size; | |
290 | ctx->req_from = BlockAddress * handler->block_size; | |
291 | ||
292 | bufferptr ptr((char*)Buffer, ctx->req_size); | |
293 | ctx->data.push_back(ptr); | |
294 | ||
295 | int op_flags = 0; | |
296 | if (ForceUnitAccess) { | |
297 | op_flags |= LIBRADOS_OP_FLAG_FADVISE_FUA; | |
298 | } | |
299 | ||
300 | dout(20) << *ctx << ": start" << dendl; | |
301 | ||
302 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); | |
303 | handler->image.aio_write2(ctx->req_from, ctx->req_size, ctx->data, c, op_flags); | |
304 | ||
305 | dout(20) << *ctx << ": submitted" << dendl; | |
306 | } | |
307 | ||
308 | void WnbdHandler::Flush( | |
309 | PWNBD_DISK Disk, | |
310 | UINT64 RequestHandle, | |
311 | UINT64 BlockAddress, | |
312 | UINT32 BlockCount) | |
313 | { | |
314 | WnbdHandler* handler = nullptr; | |
315 | ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); | |
316 | ||
317 | WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); | |
318 | ctx->handler = handler; | |
319 | ctx->req_handle = RequestHandle; | |
320 | ctx->req_type = WnbdReqTypeFlush; | |
321 | ctx->req_size = BlockCount * handler->block_size; | |
322 | ctx->req_from = BlockAddress * handler->block_size; | |
323 | ||
324 | dout(20) << *ctx << ": start" << dendl; | |
325 | ||
326 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); | |
327 | handler->image.aio_flush(c); | |
328 | ||
329 | dout(20) << *ctx << ": submitted" << dendl; | |
330 | } | |
331 | ||
332 | void WnbdHandler::Unmap( | |
333 | PWNBD_DISK Disk, | |
334 | UINT64 RequestHandle, | |
335 | PWNBD_UNMAP_DESCRIPTOR Descriptors, | |
336 | UINT32 Count) | |
337 | { | |
338 | WnbdHandler* handler = nullptr; | |
339 | ceph_assert(!WnbdGetUserContext(Disk, (PVOID*)&handler)); | |
340 | ceph_assert(1 == Count); | |
341 | ||
342 | WnbdHandler::IOContext* ctx = new WnbdHandler::IOContext(); | |
343 | ctx->handler = handler; | |
344 | ctx->req_handle = RequestHandle; | |
345 | ctx->req_type = WnbdReqTypeUnmap; | |
346 | ctx->req_size = Descriptors[0].BlockCount * handler->block_size; | |
347 | ctx->req_from = Descriptors[0].BlockAddress * handler->block_size; | |
348 | ||
349 | dout(20) << *ctx << ": start" << dendl; | |
350 | ||
351 | librbd::RBD::AioCompletion *c = new librbd::RBD::AioCompletion(ctx, aio_callback); | |
352 | handler->image.aio_discard(ctx->req_from, ctx->req_size, c); | |
353 | ||
354 | dout(20) << *ctx << ": submitted" << dendl; | |
355 | } | |
356 | ||
357 | void WnbdHandler::LogMessage( | |
358 | WnbdLogLevel LogLevel, | |
359 | const char* Message, | |
360 | const char* FileName, | |
361 | UINT32 Line, | |
362 | const char* FunctionName) | |
363 | { | |
364 | // We're already passing the log level to WNBD, so we'll use the highest | |
365 | // log level here. | |
366 | dout(0) << "libwnbd.dll!" << FunctionName << " " | |
367 | << WnbdLogLevelToStr(LogLevel) << " " << Message << dendl; | |
368 | } | |
369 | ||
370 | ||
371 | int WnbdHandler::start() | |
372 | { | |
373 | int err = 0; | |
374 | WNBD_PROPERTIES wnbd_props = {0}; | |
375 | ||
376 | instance_name.copy(wnbd_props.InstanceName, sizeof(wnbd_props.InstanceName)); | |
377 | ceph_assert(strlen(RBD_WNBD_OWNER_NAME) < WNBD_MAX_OWNER_LENGTH); | |
378 | strncpy(wnbd_props.Owner, RBD_WNBD_OWNER_NAME, WNBD_MAX_OWNER_LENGTH); | |
379 | ||
380 | wnbd_props.BlockCount = block_count; | |
381 | wnbd_props.BlockSize = block_size; | |
382 | wnbd_props.MaxUnmapDescCount = 1; | |
383 | ||
384 | wnbd_props.Flags.ReadOnly = readonly; | |
385 | wnbd_props.Flags.UnmapSupported = 1; | |
386 | if (rbd_cache_enabled) { | |
387 | wnbd_props.Flags.FUASupported = 1; | |
388 | wnbd_props.Flags.FlushSupported = 1; | |
389 | } | |
390 | ||
391 | err = WnbdCreate(&wnbd_props, &RbdWnbdInterface, this, &wnbd_disk); | |
392 | if (err) | |
393 | goto exit; | |
394 | ||
395 | started = true; | |
396 | ||
397 | err = WnbdStartDispatcher(wnbd_disk, io_req_workers); | |
398 | if (err) { | |
399 | derr << "Could not start WNBD dispatcher. Error: " << err << dendl; | |
400 | } | |
401 | ||
402 | exit: | |
403 | return err; | |
404 | } | |
405 | ||
406 | std::ostream &operator<<(std::ostream &os, const WnbdHandler::IOContext &ctx) { | |
407 | ||
408 | os << "[" << std::hex << ctx.req_handle; | |
409 | ||
410 | switch (ctx.req_type) | |
411 | { | |
412 | case WnbdReqTypeRead: | |
413 | os << " READ "; | |
414 | break; | |
415 | case WnbdReqTypeWrite: | |
416 | os << " WRITE "; | |
417 | break; | |
418 | case WnbdReqTypeFlush: | |
419 | os << " FLUSH "; | |
420 | break; | |
421 | case WnbdReqTypeUnmap: | |
422 | os << " TRIM "; | |
423 | break; | |
424 | default: | |
425 | os << " UNKNOWN(" << ctx.req_type << ") "; | |
426 | break; | |
427 | } | |
428 | ||
429 | os << ctx.req_from << "~" << ctx.req_size << " " | |
430 | << std::dec << ntohl(ctx.err_code) << "]"; | |
431 | ||
432 | return os; | |
433 | } |