]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/lib/nvme/nvme_cuse.c
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / lib / nvme / nvme_cuse.c
CommitLineData
f67539c2
TL
1/*-
2 * BSD LICENSE
3 *
4 * Copyright (c) Intel Corporation.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34#define FUSE_USE_VERSION 31
35
36#include <fuse3/cuse_lowlevel.h>
37
38#include <linux/nvme_ioctl.h>
39#include <linux/fs.h>
40
41#include "nvme_internal.h"
42#include "nvme_io_msg.h"
43#include "nvme_cuse.h"
44
45struct cuse_device {
46 bool is_started;
47
48 char dev_name[128];
49 uint32_t index;
50 int claim_fd;
51 char lock_name[64];
52
53 struct spdk_nvme_ctrlr *ctrlr; /**< NVMe controller */
54 uint32_t nsid; /**< NVMe name space id, or 0 */
55
56 pthread_t tid;
57 struct fuse_session *session;
58
59 struct cuse_device *ctrlr_device;
60 struct cuse_device *ns_devices; /**< Array of cuse ns devices */
61
62 TAILQ_ENTRY(cuse_device) tailq;
63};
64
65static pthread_mutex_t g_cuse_mtx = PTHREAD_MUTEX_INITIALIZER;
66static TAILQ_HEAD(, cuse_device) g_ctrlr_ctx_head = TAILQ_HEAD_INITIALIZER(g_ctrlr_ctx_head);
67static struct spdk_bit_array *g_ctrlr_started;
68
69struct cuse_io_ctx {
70 struct spdk_nvme_cmd nvme_cmd;
71 enum spdk_nvme_data_transfer data_transfer;
72
73 uint64_t lba;
74 uint32_t lba_count;
75
76 void *data;
77 int data_len;
78
79 fuse_req_t req;
80};
81
82static void
83cuse_io_ctx_free(struct cuse_io_ctx *ctx)
84{
85 spdk_free(ctx->data);
86 free(ctx);
87}
88
89#define FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, val) \
90 if (out_bufsz == 0) { \
91 struct iovec out_iov; \
92 out_iov.iov_base = (void *)arg; \
93 out_iov.iov_len = sizeof(val); \
94 fuse_reply_ioctl_retry(req, NULL, 0, &out_iov, 1); \
95 return; \
96 }
97
98static void
99cuse_nvme_admin_cmd_cb(void *arg, const struct spdk_nvme_cpl *cpl)
100{
101 struct cuse_io_ctx *ctx = arg;
102 struct iovec out_iov[2];
103 struct spdk_nvme_cpl _cpl;
104
105 if (ctx->data_transfer == SPDK_NVME_DATA_HOST_TO_CONTROLLER) {
106 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0);
107 } else {
108 memcpy(&_cpl, cpl, sizeof(struct spdk_nvme_cpl));
109
110 out_iov[0].iov_base = &_cpl.cdw0;
111 out_iov[0].iov_len = sizeof(_cpl.cdw0);
112
113 if (ctx->data_len > 0) {
114 out_iov[1].iov_base = ctx->data;
115 out_iov[1].iov_len = ctx->data_len;
116 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 2);
117 } else {
118 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, out_iov, 1);
119 }
120 }
121
122 cuse_io_ctx_free(ctx);
123}
124
125static void
126cuse_nvme_admin_cmd_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
127{
128 int rc;
129 struct cuse_io_ctx *ctx = arg;
130
131 rc = spdk_nvme_ctrlr_cmd_admin_raw(ctrlr, &ctx->nvme_cmd, ctx->data, ctx->data_len,
132 cuse_nvme_admin_cmd_cb, (void *)ctx);
133 if (rc < 0) {
134 fuse_reply_err(ctx->req, EINVAL);
135 cuse_io_ctx_free(ctx);
136 }
137}
138
139static void
140cuse_nvme_admin_cmd_send(fuse_req_t req, struct nvme_admin_cmd *admin_cmd,
141 const void *data)
142{
143 struct cuse_io_ctx *ctx;
144 struct cuse_device *cuse_device = fuse_req_userdata(req);
145 int rv;
146
147 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
148 if (!ctx) {
149 SPDK_ERRLOG("Cannot allocate memory for cuse_io_ctx\n");
150 fuse_reply_err(req, ENOMEM);
151 return;
152 }
153
154 ctx->req = req;
155 ctx->data_transfer = spdk_nvme_opc_get_data_transfer(admin_cmd->opcode);
156
157 memset(&ctx->nvme_cmd, 0, sizeof(ctx->nvme_cmd));
158 ctx->nvme_cmd.opc = admin_cmd->opcode;
159 ctx->nvme_cmd.nsid = admin_cmd->nsid;
160 ctx->nvme_cmd.cdw10 = admin_cmd->cdw10;
161 ctx->nvme_cmd.cdw11 = admin_cmd->cdw11;
162 ctx->nvme_cmd.cdw12 = admin_cmd->cdw12;
163 ctx->nvme_cmd.cdw13 = admin_cmd->cdw13;
164 ctx->nvme_cmd.cdw14 = admin_cmd->cdw14;
165 ctx->nvme_cmd.cdw15 = admin_cmd->cdw15;
166
167 ctx->data_len = admin_cmd->data_len;
168
169 if (ctx->data_len > 0) {
170 ctx->data = spdk_malloc(ctx->data_len, 0, NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
171 if (!ctx->data) {
172 SPDK_ERRLOG("Cannot allocate memory for data\n");
173 fuse_reply_err(req, ENOMEM);
174 free(ctx);
175 return;
176 }
177 if (data != NULL) {
178 memcpy(ctx->data, data, ctx->data_len);
179 }
180 }
181
182 rv = nvme_io_msg_send(cuse_device->ctrlr, 0, cuse_nvme_admin_cmd_execute, ctx);
183 if (rv) {
184 SPDK_ERRLOG("Cannot send io msg to the controller\n");
185 fuse_reply_err(req, -rv);
186 cuse_io_ctx_free(ctx);
187 return;
188 }
189}
190
191static void
192cuse_nvme_admin_cmd(fuse_req_t req, int cmd, void *arg,
193 struct fuse_file_info *fi, unsigned flags,
194 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
195{
196 struct nvme_admin_cmd *admin_cmd;
197 struct iovec in_iov[2], out_iov[2];
198
199 in_iov[0].iov_base = (void *)arg;
200 in_iov[0].iov_len = sizeof(*admin_cmd);
201 if (in_bufsz == 0) {
202 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0);
203 return;
204 }
205
206 admin_cmd = (struct nvme_admin_cmd *)in_buf;
207
208 switch (spdk_nvme_opc_get_data_transfer(admin_cmd->opcode)) {
209 case SPDK_NVME_DATA_NONE:
210 SPDK_ERRLOG("SPDK_NVME_DATA_NONE not implemented\n");
211 fuse_reply_err(req, EINVAL);
212 return;
213 case SPDK_NVME_DATA_HOST_TO_CONTROLLER:
214 if (admin_cmd->addr != 0) {
215 in_iov[1].iov_base = (void *)admin_cmd->addr;
216 in_iov[1].iov_len = admin_cmd->data_len;
217 if (in_bufsz == sizeof(*admin_cmd)) {
218 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0);
219 return;
220 }
221 cuse_nvme_admin_cmd_send(req, admin_cmd, in_buf + sizeof(*admin_cmd));
222 } else {
223 cuse_nvme_admin_cmd_send(req, admin_cmd, NULL);
224 }
225 return;
226 case SPDK_NVME_DATA_CONTROLLER_TO_HOST:
227 if (out_bufsz == 0) {
228 out_iov[0].iov_base = &((struct nvme_admin_cmd *)arg)->result;
229 out_iov[0].iov_len = sizeof(uint32_t);
230 if (admin_cmd->data_len > 0) {
231 out_iov[1].iov_base = (void *)admin_cmd->addr;
232 out_iov[1].iov_len = admin_cmd->data_len;
233 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 2);
234 } else {
235 fuse_reply_ioctl_retry(req, in_iov, 1, out_iov, 1);
236 }
237 return;
238 }
239
240 cuse_nvme_admin_cmd_send(req, admin_cmd, NULL);
241
242 return;
243 case SPDK_NVME_DATA_BIDIRECTIONAL:
244 fuse_reply_err(req, EINVAL);
245 return;
246 }
247}
248
249static void
250cuse_nvme_reset_execute(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
251{
252 int rc;
253 fuse_req_t req = arg;
254
255 rc = spdk_nvme_ctrlr_reset(ctrlr);
256 if (rc) {
257 fuse_reply_err(req, rc);
258 return;
259 }
260
261 fuse_reply_ioctl_iov(req, 0, NULL, 0);
262}
263
264static void
265cuse_nvme_reset(fuse_req_t req, int cmd, void *arg,
266 struct fuse_file_info *fi, unsigned flags,
267 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
268{
269 int rv;
270 struct cuse_device *cuse_device = fuse_req_userdata(req);
271
272 if (cuse_device->nsid) {
273 SPDK_ERRLOG("Namespace reset not supported\n");
274 fuse_reply_err(req, EINVAL);
275 return;
276 }
277
278 rv = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_reset_execute, (void *)req);
279 if (rv) {
280 SPDK_ERRLOG("Cannot send reset\n");
281 fuse_reply_err(req, EINVAL);
282 }
283}
284
285/*****************************************************************************
286 * Namespace IO requests
287 */
288
289static void
290cuse_nvme_submit_io_write_done(void *ref, const struct spdk_nvme_cpl *cpl)
291{
292 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
293
294 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, NULL, 0);
295
296 cuse_io_ctx_free(ctx);
297}
298
299static void
300cuse_nvme_submit_io_write_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
301{
302 int rc;
303 struct cuse_io_ctx *ctx = arg;
304 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
305
306 rc = spdk_nvme_ns_cmd_write(ns, ctrlr->external_io_msgs_qpair, ctx->data,
307 ctx->lba, /* LBA start */
308 ctx->lba_count, /* number of LBAs */
309 cuse_nvme_submit_io_write_done, ctx, 0);
310
311 if (rc != 0) {
312 SPDK_ERRLOG("write failed: rc = %d\n", rc);
313 fuse_reply_err(ctx->req, rc);
314 cuse_io_ctx_free(ctx);
315 return;
316 }
317}
318
319static void
320cuse_nvme_submit_io_write(fuse_req_t req, int cmd, void *arg,
321 struct fuse_file_info *fi, unsigned flags,
322 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
323{
324 const struct nvme_user_io *user_io = in_buf;
325 struct cuse_io_ctx *ctx;
326 struct spdk_nvme_ns *ns;
327 uint32_t block_size;
328 int rc;
329 struct cuse_device *cuse_device = fuse_req_userdata(req);
330
331 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
332 if (!ctx) {
333 SPDK_ERRLOG("Cannot allocate memory for context\n");
334 fuse_reply_err(req, ENOMEM);
335 return;
336 }
337
338 ctx->req = req;
339
340 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
341 block_size = spdk_nvme_ns_get_sector_size(ns);
342
343 ctx->lba = user_io->slba;
344 ctx->lba_count = user_io->nblocks + 1;
345 ctx->data_len = ctx->lba_count * block_size;
346
347 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
348 SPDK_MALLOC_DMA);
349 if (ctx->data == NULL) {
350 SPDK_ERRLOG("Write buffer allocation failed\n");
351 fuse_reply_err(ctx->req, ENOMEM);
352 free(ctx);
353 return;
354 }
355
356 memcpy(ctx->data, in_buf + sizeof(*user_io), ctx->data_len);
357
358 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_write_cb,
359 ctx);
360 if (rc < 0) {
361 SPDK_ERRLOG("Cannot send write io\n");
362 fuse_reply_err(ctx->req, rc);
363 cuse_io_ctx_free(ctx);
364 }
365}
366
367static void
368cuse_nvme_submit_io_read_done(void *ref, const struct spdk_nvme_cpl *cpl)
369{
370 struct cuse_io_ctx *ctx = (struct cuse_io_ctx *)ref;
371 struct iovec iov;
372
373 iov.iov_base = ctx->data;
374 iov.iov_len = ctx->data_len;
375
376 fuse_reply_ioctl_iov(ctx->req, cpl->status.sc, &iov, 1);
377
378 cuse_io_ctx_free(ctx);
379}
380
381static void
382cuse_nvme_submit_io_read_cb(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, void *arg)
383{
384 int rc;
385 struct cuse_io_ctx *ctx = arg;
386 struct spdk_nvme_ns *ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid);
387
388 rc = spdk_nvme_ns_cmd_read(ns, ctrlr->external_io_msgs_qpair, ctx->data,
389 ctx->lba, /* LBA start */
390 ctx->lba_count, /* number of LBAs */
391 cuse_nvme_submit_io_read_done, ctx, 0);
392
393 if (rc != 0) {
394 SPDK_ERRLOG("read failed: rc = %d\n", rc);
395 fuse_reply_err(ctx->req, rc);
396 cuse_io_ctx_free(ctx);
397 return;
398 }
399}
400
401static void
402cuse_nvme_submit_io_read(fuse_req_t req, int cmd, void *arg,
403 struct fuse_file_info *fi, unsigned flags,
404 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
405{
406 int rc;
407 struct cuse_io_ctx *ctx;
408 const struct nvme_user_io *user_io = in_buf;
409 struct cuse_device *cuse_device = fuse_req_userdata(req);
410 struct spdk_nvme_ns *ns;
411 uint32_t block_size;
412
413 ctx = (struct cuse_io_ctx *)calloc(1, sizeof(struct cuse_io_ctx));
414 if (!ctx) {
415 SPDK_ERRLOG("Cannot allocate memory for context\n");
416 fuse_reply_err(req, ENOMEM);
417 return;
418 }
419
420 ctx->req = req;
421 ctx->lba = user_io->slba;
422 ctx->lba_count = user_io->nblocks;
423
424 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
425 block_size = spdk_nvme_ns_get_sector_size(ns);
426
427 ctx->data_len = ctx->lba_count * block_size;
428 ctx->data = spdk_zmalloc(ctx->data_len, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
429 SPDK_MALLOC_DMA);
430 if (ctx->data == NULL) {
431 SPDK_ERRLOG("Read buffer allocation failed\n");
432 fuse_reply_err(ctx->req, ENOMEM);
433 free(ctx);
434 return;
435 }
436
437 rc = nvme_io_msg_send(cuse_device->ctrlr, cuse_device->nsid, cuse_nvme_submit_io_read_cb, ctx);
438 if (rc < 0) {
439 SPDK_ERRLOG("Cannot send read io\n");
440 fuse_reply_err(ctx->req, rc);
441 cuse_io_ctx_free(ctx);
442 }
443}
444
445
446static void
447cuse_nvme_submit_io(fuse_req_t req, int cmd, void *arg,
448 struct fuse_file_info *fi, unsigned flags,
449 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
450{
451 const struct nvme_user_io *user_io;
452 struct iovec in_iov[2], out_iov;
453
454 in_iov[0].iov_base = (void *)arg;
455 in_iov[0].iov_len = sizeof(*user_io);
456 if (in_bufsz == 0) {
457 fuse_reply_ioctl_retry(req, in_iov, 1, NULL, 0);
458 return;
459 }
460
461 user_io = in_buf;
462
463 switch (user_io->opcode) {
464 case SPDK_NVME_OPC_READ:
465 out_iov.iov_base = (void *)user_io->addr;
466 out_iov.iov_len = (user_io->nblocks + 1) * 512;
467 if (out_bufsz == 0) {
468 fuse_reply_ioctl_retry(req, in_iov, 1, &out_iov, 1);
469 return;
470 }
471
472 cuse_nvme_submit_io_read(req, cmd, arg, fi, flags, in_buf,
473 in_bufsz, out_bufsz);
474 break;
475 case SPDK_NVME_OPC_WRITE:
476 in_iov[1].iov_base = (void *)user_io->addr;
477 in_iov[1].iov_len = (user_io->nblocks + 1) * 512;
478 if (in_bufsz == sizeof(*user_io)) {
479 fuse_reply_ioctl_retry(req, in_iov, 2, NULL, 0);
480 return;
481 }
482
483 cuse_nvme_submit_io_write(req, cmd, arg, fi, flags, in_buf,
484 in_bufsz, out_bufsz);
485
486 break;
487 default:
488 SPDK_ERRLOG("SUBMIT_IO: opc:%d not valid\n", user_io->opcode);
489 fuse_reply_err(req, EINVAL);
490 return;
491 }
492
493}
494
495/*****************************************************************************
496 * Other namespace IOCTLs
497 */
498static void
499cuse_blkgetsize64(fuse_req_t req, int cmd, void *arg,
500 struct fuse_file_info *fi, unsigned flags,
501 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
502{
503 uint64_t size;
504 struct spdk_nvme_ns *ns;
505 struct cuse_device *cuse_device = fuse_req_userdata(req);
506
507 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
508
509 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
510 size = spdk_nvme_ns_get_num_sectors(ns);
511 fuse_reply_ioctl(req, 0, &size, sizeof(size));
512}
513
514static void
515cuse_blkpbszget(fuse_req_t req, int cmd, void *arg,
516 struct fuse_file_info *fi, unsigned flags,
517 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
518{
519 int pbsz;
520 struct spdk_nvme_ns *ns;
521 struct cuse_device *cuse_device = fuse_req_userdata(req);
522
523 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, pbsz);
524
525 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
526 pbsz = spdk_nvme_ns_get_sector_size(ns);
527 fuse_reply_ioctl(req, 0, &pbsz, sizeof(pbsz));
528}
529
530static void
531cuse_blkgetsize(fuse_req_t req, int cmd, void *arg,
532 struct fuse_file_info *fi, unsigned flags,
533 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
534{
535 long size;
536 struct spdk_nvme_ns *ns;
537 struct cuse_device *cuse_device = fuse_req_userdata(req);
538
539 FUSE_REPLY_CHECK_BUFFER(req, arg, out_bufsz, size);
540
541 ns = spdk_nvme_ctrlr_get_ns(cuse_device->ctrlr, cuse_device->nsid);
542
543 /* return size in 512 bytes blocks */
544 size = spdk_nvme_ns_get_num_sectors(ns) * 512 / spdk_nvme_ns_get_sector_size(ns);
545 fuse_reply_ioctl(req, 0, &size, sizeof(size));
546}
547
548static void
549cuse_getid(fuse_req_t req, int cmd, void *arg,
550 struct fuse_file_info *fi, unsigned flags,
551 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
552{
553 struct cuse_device *cuse_device = fuse_req_userdata(req);
554
555 fuse_reply_ioctl(req, cuse_device->nsid, NULL, 0);
556}
557
558static void
559cuse_ctrlr_ioctl(fuse_req_t req, int cmd, void *arg,
560 struct fuse_file_info *fi, unsigned flags,
561 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
562{
563 if (flags & FUSE_IOCTL_COMPAT) {
564 fuse_reply_err(req, ENOSYS);
565 return;
566 }
567
568 switch (cmd) {
569 case NVME_IOCTL_ADMIN_CMD:
570 cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
571 break;
572
573 case NVME_IOCTL_RESET:
574 cuse_nvme_reset(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
575 break;
576
577 default:
578 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
579 fuse_reply_err(req, EINVAL);
580 }
581}
582
583static void
584cuse_ns_ioctl(fuse_req_t req, int cmd, void *arg,
585 struct fuse_file_info *fi, unsigned flags,
586 const void *in_buf, size_t in_bufsz, size_t out_bufsz)
587{
588 if (flags & FUSE_IOCTL_COMPAT) {
589 fuse_reply_err(req, ENOSYS);
590 return;
591 }
592
593 switch (cmd) {
594 case NVME_IOCTL_ADMIN_CMD:
595 cuse_nvme_admin_cmd(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
596 break;
597
598 case NVME_IOCTL_SUBMIT_IO:
599 cuse_nvme_submit_io(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
600 break;
601
602 case NVME_IOCTL_ID:
603 cuse_getid(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
604 break;
605
606 case BLKPBSZGET:
607 cuse_blkpbszget(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
608 break;
609
610 case BLKGETSIZE:
611 /* Returns the device size as a number of 512-byte blocks (returns pointer to long) */
612 cuse_blkgetsize(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
613 break;
614
615 case BLKGETSIZE64:
616 /* Returns the device size in sectors (returns pointer to uint64_t) */
617 cuse_blkgetsize64(req, cmd, arg, fi, flags, in_buf, in_bufsz, out_bufsz);
618 break;
619
620 default:
621 SPDK_ERRLOG("Unsupported IOCTL 0x%X.\n", cmd);
622 fuse_reply_err(req, EINVAL);
623 }
624}
625
626/*****************************************************************************
627 * CUSE threads initialization.
628 */
629
630static void cuse_open(fuse_req_t req, struct fuse_file_info *fi)
631{
632 fuse_reply_open(req, fi);
633}
634
635static const struct cuse_lowlevel_ops cuse_ctrlr_clop = {
636 .open = cuse_open,
637 .ioctl = cuse_ctrlr_ioctl,
638};
639
640static const struct cuse_lowlevel_ops cuse_ns_clop = {
641 .open = cuse_open,
642 .ioctl = cuse_ns_ioctl,
643};
644
645static void *
646cuse_thread(void *arg)
647{
648 struct cuse_device *cuse_device = arg;
649 char *cuse_argv[] = { "cuse", "-f" };
650 int cuse_argc = SPDK_COUNTOF(cuse_argv);
651 char devname_arg[128 + 8];
652 const char *dev_info_argv[] = { devname_arg };
653 struct cuse_info ci;
654 int multithreaded;
655 int rc;
656 struct fuse_buf buf = { .mem = NULL };
657 struct pollfd fds;
658 int timeout_msecs = 500;
659
660 spdk_unaffinitize_thread();
661
662 snprintf(devname_arg, sizeof(devname_arg), "DEVNAME=%s", cuse_device->dev_name);
663
664 memset(&ci, 0, sizeof(ci));
665 ci.dev_info_argc = 1;
666 ci.dev_info_argv = dev_info_argv;
667 ci.flags = CUSE_UNRESTRICTED_IOCTL;
668
669 if (cuse_device->nsid) {
670 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ns_clop,
671 &multithreaded, cuse_device);
672 } else {
673 cuse_device->session = cuse_lowlevel_setup(cuse_argc, cuse_argv, &ci, &cuse_ctrlr_clop,
674 &multithreaded, cuse_device);
675 }
676 if (!cuse_device->session) {
677 SPDK_ERRLOG("Cannot create cuse session\n");
678 goto err;
679 }
680
681 SPDK_NOTICELOG("fuse session for device %s created\n", cuse_device->dev_name);
682
683 /* Receive and process fuse requests */
684 fds.fd = fuse_session_fd(cuse_device->session);
685 fds.events = POLLIN;
686 while (!fuse_session_exited(cuse_device->session)) {
687 rc = poll(&fds, 1, timeout_msecs);
688 if (rc <= 0) {
689 continue;
690 }
691 rc = fuse_session_receive_buf(cuse_device->session, &buf);
692 if (rc > 0) {
693 fuse_session_process_buf(cuse_device->session, &buf);
694 }
695 }
696 free(buf.mem);
697 fuse_session_reset(cuse_device->session);
698 cuse_lowlevel_teardown(cuse_device->session);
699err:
700 pthread_exit(NULL);
701}
702
703/*****************************************************************************
704 * CUSE devices management
705 */
706
707static int
708cuse_nvme_ns_start(struct cuse_device *ctrlr_device, uint32_t nsid)
709{
710 struct cuse_device *ns_device;
711 int rv;
712
713 ns_device = &ctrlr_device->ns_devices[nsid - 1];
714 if (ns_device->is_started) {
715 return 0;
716 }
717
718 ns_device->ctrlr = ctrlr_device->ctrlr;
719 ns_device->ctrlr_device = ctrlr_device;
720 ns_device->nsid = nsid;
721 rv = snprintf(ns_device->dev_name, sizeof(ns_device->dev_name), "%sn%d",
722 ctrlr_device->dev_name, ns_device->nsid);
723 if (rv < 0) {
724 SPDK_ERRLOG("Device name too long.\n");
725 free(ns_device);
726 return -ENAMETOOLONG;
727 }
728
729 rv = pthread_create(&ns_device->tid, NULL, cuse_thread, ns_device);
730 if (rv != 0) {
731 SPDK_ERRLOG("pthread_create failed\n");
732 return -rv;
733 }
734
735 ns_device->is_started = true;
736
737 return 0;
738}
739
740static void
741cuse_nvme_ns_stop(struct cuse_device *ctrlr_device, uint32_t nsid)
742{
743 struct cuse_device *ns_device;
744
745 ns_device = &ctrlr_device->ns_devices[nsid - 1];
746 if (!ns_device->is_started) {
747 return;
748 }
749
750 fuse_session_exit(ns_device->session);
751 pthread_join(ns_device->tid, NULL);
752 ns_device->is_started = false;
753}
754
755static int
756nvme_cuse_claim(struct cuse_device *ctrlr_device, uint32_t index)
757{
758 int dev_fd;
759 int pid;
760 void *dev_map;
761 struct flock cusedev_lock = {
762 .l_type = F_WRLCK,
763 .l_whence = SEEK_SET,
764 .l_start = 0,
765 .l_len = 0,
766 };
767
768 snprintf(ctrlr_device->lock_name, sizeof(ctrlr_device->lock_name),
769 "/tmp/spdk_nvme_cuse_lock_%" PRIu32, index);
770
771 dev_fd = open(ctrlr_device->lock_name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
772 if (dev_fd == -1) {
773 SPDK_ERRLOG("could not open %s\n", ctrlr_device->lock_name);
774 return -errno;
775 }
776
777 if (ftruncate(dev_fd, sizeof(int)) != 0) {
778 SPDK_ERRLOG("could not truncate %s\n", ctrlr_device->lock_name);
779 close(dev_fd);
780 return -errno;
781 }
782
783 dev_map = mmap(NULL, sizeof(int), PROT_READ | PROT_WRITE,
784 MAP_SHARED, dev_fd, 0);
785 if (dev_map == MAP_FAILED) {
786 SPDK_ERRLOG("could not mmap dev %s (%d)\n", ctrlr_device->lock_name, errno);
787 close(dev_fd);
788 return -errno;
789 }
790
791 if (fcntl(dev_fd, F_SETLK, &cusedev_lock) != 0) {
792 pid = *(int *)dev_map;
793 SPDK_ERRLOG("Cannot create lock on device %s, probably"
794 " process %d has claimed it\n", ctrlr_device->lock_name, pid);
795 munmap(dev_map, sizeof(int));
796 close(dev_fd);
797 /* F_SETLK returns unspecified errnos, normalize them */
798 return -EACCES;
799 }
800
801 *(int *)dev_map = (int)getpid();
802 munmap(dev_map, sizeof(int));
803 ctrlr_device->claim_fd = dev_fd;
804 ctrlr_device->index = index;
805 /* Keep dev_fd open to maintain the lock. */
806 return 0;
807}
808
809static void
810nvme_cuse_unclaim(struct cuse_device *ctrlr_device)
811{
812 close(ctrlr_device->claim_fd);
813 ctrlr_device->claim_fd = -1;
814 unlink(ctrlr_device->lock_name);
815}
816
817static void
818cuse_nvme_ctrlr_stop(struct cuse_device *ctrlr_device)
819{
820 uint32_t i;
821 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr);
822
823 for (i = 1; i <= num_ns; i++) {
824 cuse_nvme_ns_stop(ctrlr_device, i);
825 }
826
827 fuse_session_exit(ctrlr_device->session);
828 pthread_join(ctrlr_device->tid, NULL);
829 TAILQ_REMOVE(&g_ctrlr_ctx_head, ctrlr_device, tailq);
830 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
831 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
832 spdk_bit_array_free(&g_ctrlr_started);
833 }
834 nvme_cuse_unclaim(ctrlr_device);
835 free(ctrlr_device->ns_devices);
836 free(ctrlr_device);
837}
838
839static int
840cuse_nvme_ctrlr_update_namespaces(struct cuse_device *ctrlr_device)
841{
842 uint32_t nsid;
843 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr_device->ctrlr);
844
845 for (nsid = 1; nsid <= num_ns; nsid++) {
846 if (!spdk_nvme_ctrlr_is_active_ns(ctrlr_device->ctrlr, nsid)) {
847 cuse_nvme_ns_stop(ctrlr_device, nsid);
848 continue;
849 }
850
851 if (cuse_nvme_ns_start(ctrlr_device, nsid) < 0) {
852 SPDK_ERRLOG("Cannot start CUSE namespace device.");
853 return -1;
854 }
855 }
856
857 return 0;
858}
859
860static int
861nvme_cuse_start(struct spdk_nvme_ctrlr *ctrlr)
862{
863 int rv = 0;
864 struct cuse_device *ctrlr_device;
865 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
866
867 SPDK_NOTICELOG("Creating cuse device for controller\n");
868
869 if (g_ctrlr_started == NULL) {
870 g_ctrlr_started = spdk_bit_array_create(128);
871 if (g_ctrlr_started == NULL) {
872 SPDK_ERRLOG("Cannot create bit array\n");
873 return -ENOMEM;
874 }
875 }
876
877 ctrlr_device = (struct cuse_device *)calloc(1, sizeof(struct cuse_device));
878 if (!ctrlr_device) {
879 SPDK_ERRLOG("Cannot allocate memory for ctrlr_device.");
880 rv = -ENOMEM;
881 goto err2;
882 }
883
884 ctrlr_device->ctrlr = ctrlr;
885
886 /* Check if device already exists, if not increment index until success */
887 ctrlr_device->index = 0;
888 while (1) {
889 ctrlr_device->index = spdk_bit_array_find_first_clear(g_ctrlr_started, ctrlr_device->index);
890 if (ctrlr_device->index == UINT32_MAX) {
891 SPDK_ERRLOG("Too many registered controllers\n");
892 goto err2;
893 }
894
895 if (nvme_cuse_claim(ctrlr_device, ctrlr_device->index) == 0) {
896 break;
897 }
898 ctrlr_device->index++;
899 }
900 spdk_bit_array_set(g_ctrlr_started, ctrlr_device->index);
901 snprintf(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name), "spdk/nvme%d",
902 ctrlr_device->index);
903
904 rv = pthread_create(&ctrlr_device->tid, NULL, cuse_thread, ctrlr_device);
905 if (rv != 0) {
906 SPDK_ERRLOG("pthread_create failed\n");
907 rv = -rv;
908 goto err3;
909 }
910 TAILQ_INSERT_TAIL(&g_ctrlr_ctx_head, ctrlr_device, tailq);
911
912 ctrlr_device->ns_devices = (struct cuse_device *)calloc(num_ns, sizeof(struct cuse_device));
913 /* Start all active namespaces */
914 if (cuse_nvme_ctrlr_update_namespaces(ctrlr_device) < 0) {
915 SPDK_ERRLOG("Cannot start CUSE namespace devices.");
916 cuse_nvme_ctrlr_stop(ctrlr_device);
917 rv = -1;
918 goto err3;
919 }
920
921 return 0;
922
923err3:
924 spdk_bit_array_clear(g_ctrlr_started, ctrlr_device->index);
925err2:
926 free(ctrlr_device);
927 if (spdk_bit_array_count_set(g_ctrlr_started) == 0) {
928 spdk_bit_array_free(&g_ctrlr_started);
929 }
930 return rv;
931}
932
933static struct cuse_device *
934nvme_cuse_get_cuse_ctrlr_device(struct spdk_nvme_ctrlr *ctrlr)
935{
936 struct cuse_device *ctrlr_device = NULL;
937
938 TAILQ_FOREACH(ctrlr_device, &g_ctrlr_ctx_head, tailq) {
939 if (ctrlr_device->ctrlr == ctrlr) {
940 break;
941 }
942 }
943
944 return ctrlr_device;
945}
946
947static struct cuse_device *
948nvme_cuse_get_cuse_ns_device(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid)
949{
950 struct cuse_device *ctrlr_device = NULL;
951 uint32_t num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr);
952
953 if (nsid < 1 || nsid > num_ns) {
954 return NULL;
955 }
956
957 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
958 if (!ctrlr_device) {
959 return NULL;
960 }
961
962 if (!ctrlr_device->ns_devices[nsid - 1].is_started) {
963 return NULL;
964 }
965
966 return &ctrlr_device->ns_devices[nsid - 1];
967}
968
969static void
970nvme_cuse_stop(struct spdk_nvme_ctrlr *ctrlr)
971{
972 struct cuse_device *ctrlr_device;
973
974 pthread_mutex_lock(&g_cuse_mtx);
975
976 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
977 if (!ctrlr_device) {
978 SPDK_ERRLOG("Cannot find associated CUSE device\n");
979 pthread_mutex_unlock(&g_cuse_mtx);
980 return;
981 }
982
983 cuse_nvme_ctrlr_stop(ctrlr_device);
984
985 pthread_mutex_unlock(&g_cuse_mtx);
986}
987
988static void
989nvme_cuse_update(struct spdk_nvme_ctrlr *ctrlr)
990{
991 struct cuse_device *ctrlr_device;
992
993 pthread_mutex_lock(&g_cuse_mtx);
994
995 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
996 if (!ctrlr_device) {
997 pthread_mutex_unlock(&g_cuse_mtx);
998 return;
999 }
1000
1001 cuse_nvme_ctrlr_update_namespaces(ctrlr_device);
1002
1003 pthread_mutex_unlock(&g_cuse_mtx);
1004}
1005
1006static struct nvme_io_msg_producer cuse_nvme_io_msg_producer = {
1007 .name = "cuse",
1008 .stop = nvme_cuse_stop,
1009 .update = nvme_cuse_update,
1010};
1011
1012int
1013spdk_nvme_cuse_register(struct spdk_nvme_ctrlr *ctrlr)
1014{
1015 int rc;
1016
1017 rc = nvme_io_msg_ctrlr_register(ctrlr, &cuse_nvme_io_msg_producer);
1018 if (rc) {
1019 return rc;
1020 }
1021
1022 pthread_mutex_lock(&g_cuse_mtx);
1023
1024 rc = nvme_cuse_start(ctrlr);
1025 if (rc) {
1026 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1027 }
1028
1029 pthread_mutex_unlock(&g_cuse_mtx);
1030
1031 return rc;
1032}
1033
1034int
1035spdk_nvme_cuse_unregister(struct spdk_nvme_ctrlr *ctrlr)
1036{
1037 struct cuse_device *ctrlr_device;
1038
1039 pthread_mutex_lock(&g_cuse_mtx);
1040
1041 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1042 if (!ctrlr_device) {
1043 SPDK_ERRLOG("Cannot find associated CUSE device\n");
1044 pthread_mutex_unlock(&g_cuse_mtx);
1045 return -ENODEV;
1046 }
1047
1048 cuse_nvme_ctrlr_stop(ctrlr_device);
1049
1050 pthread_mutex_unlock(&g_cuse_mtx);
1051
1052 nvme_io_msg_ctrlr_unregister(ctrlr, &cuse_nvme_io_msg_producer);
1053
1054 return 0;
1055}
1056
1057void
1058spdk_nvme_cuse_update_namespaces(struct spdk_nvme_ctrlr *ctrlr)
1059{
1060 nvme_cuse_update(ctrlr);
1061}
1062
1063int
1064spdk_nvme_cuse_get_ctrlr_name(struct spdk_nvme_ctrlr *ctrlr, char *name, size_t *size)
1065{
1066 struct cuse_device *ctrlr_device;
1067 size_t req_len;
1068
1069 pthread_mutex_lock(&g_cuse_mtx);
1070
1071 ctrlr_device = nvme_cuse_get_cuse_ctrlr_device(ctrlr);
1072 if (!ctrlr_device) {
1073 pthread_mutex_unlock(&g_cuse_mtx);
1074 return -ENODEV;
1075 }
1076
1077 req_len = strnlen(ctrlr_device->dev_name, sizeof(ctrlr_device->dev_name));
1078 if (*size < req_len) {
1079 *size = req_len;
1080 pthread_mutex_unlock(&g_cuse_mtx);
1081 return -ENOSPC;
1082 }
1083 snprintf(name, req_len + 1, "%s", ctrlr_device->dev_name);
1084
1085 pthread_mutex_unlock(&g_cuse_mtx);
1086
1087 return 0;
1088}
1089
1090int
1091spdk_nvme_cuse_get_ns_name(struct spdk_nvme_ctrlr *ctrlr, uint32_t nsid, char *name, size_t *size)
1092{
1093 struct cuse_device *ns_device;
1094 size_t req_len;
1095
1096 pthread_mutex_lock(&g_cuse_mtx);
1097
1098 ns_device = nvme_cuse_get_cuse_ns_device(ctrlr, nsid);
1099 if (!ns_device) {
1100 pthread_mutex_unlock(&g_cuse_mtx);
1101 return -ENODEV;
1102 }
1103
1104 req_len = strnlen(ns_device->dev_name, sizeof(ns_device->dev_name));
1105 if (*size < req_len) {
1106 *size = req_len;
1107 pthread_mutex_unlock(&g_cuse_mtx);
1108 return -ENOSPC;
1109 }
1110 snprintf(name, req_len + 1, "%s", ns_device->dev_name);
1111
1112 pthread_mutex_unlock(&g_cuse_mtx);
1113
1114 return 0;
1115}