1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe Over Fabrics Target File I/O commands implementation.
4 * Copyright (c) 2017-2018 Western Digital Corporation or its
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
9 #include <linux/falloc.h>
10 #include <linux/file.h>
13 #define NVMET_MAX_MPOOL_BVEC 16
14 #define NVMET_MIN_MPOOL_OBJ 16
16 void nvmet_file_ns_disable(struct nvmet_ns
*ns
)
19 mempool_destroy(ns
->bvec_pool
);
21 kmem_cache_destroy(ns
->bvec_cache
);
22 ns
->bvec_cache
= NULL
;
28 int nvmet_file_ns_enable(struct nvmet_ns
*ns
)
33 ns
->file
= filp_open(ns
->device_path
,
34 O_RDWR
| O_LARGEFILE
| O_DIRECT
, 0);
35 if (IS_ERR(ns
->file
)) {
36 pr_err("failed to open file %s: (%ld)\n",
37 ns
->device_path
, PTR_ERR(ns
->file
));
38 return PTR_ERR(ns
->file
);
41 ret
= vfs_getattr(&ns
->file
->f_path
,
42 &stat
, STATX_SIZE
, AT_STATX_FORCE_SYNC
);
47 ns
->blksize_shift
= file_inode(ns
->file
)->i_blkbits
;
49 ns
->bvec_cache
= kmem_cache_create("nvmet-bvec",
50 NVMET_MAX_MPOOL_BVEC
* sizeof(struct bio_vec
),
51 0, SLAB_HWCACHE_ALIGN
, NULL
);
52 if (!ns
->bvec_cache
) {
57 ns
->bvec_pool
= mempool_create(NVMET_MIN_MPOOL_OBJ
, mempool_alloc_slab
,
58 mempool_free_slab
, ns
->bvec_cache
);
68 ns
->blksize_shift
= 0;
69 nvmet_file_ns_disable(ns
);
73 static void nvmet_file_init_bvec(struct bio_vec
*bv
, struct sg_page_iter
*iter
)
75 bv
->bv_page
= sg_page_iter_page(iter
);
76 bv
->bv_offset
= iter
->sg
->offset
;
77 bv
->bv_len
= PAGE_SIZE
- iter
->sg
->offset
;
80 static ssize_t
nvmet_file_submit_bvec(struct nvmet_req
*req
, loff_t pos
,
81 unsigned long nr_segs
, size_t count
)
83 struct kiocb
*iocb
= &req
->f
.iocb
;
84 ssize_t (*call_iter
)(struct kiocb
*iocb
, struct iov_iter
*iter
);
89 if (req
->cmd
->rw
.opcode
== nvme_cmd_write
) {
90 if (req
->cmd
->rw
.control
& cpu_to_le16(NVME_RW_FUA
))
91 ki_flags
= IOCB_DSYNC
;
92 call_iter
= req
->ns
->file
->f_op
->write_iter
;
95 call_iter
= req
->ns
->file
->f_op
->read_iter
;
99 iov_iter_bvec(&iter
, ITER_BVEC
| rw
, req
->f
.bvec
, nr_segs
, count
);
102 iocb
->ki_filp
= req
->ns
->file
;
103 iocb
->ki_flags
= IOCB_DIRECT
| ki_flags
;
105 ret
= call_iter(iocb
, &iter
);
107 if (ret
!= -EIOCBQUEUED
&& iocb
->ki_complete
)
108 iocb
->ki_complete(iocb
, ret
, 0);
113 static void nvmet_file_io_done(struct kiocb
*iocb
, long ret
, long ret2
)
115 struct nvmet_req
*req
= container_of(iocb
, struct nvmet_req
, f
.iocb
);
117 if (req
->f
.bvec
!= req
->inline_bvec
) {
118 if (likely(req
->f
.mpool_alloc
== false))
121 mempool_free(req
->f
.bvec
, req
->ns
->bvec_pool
);
124 nvmet_req_complete(req
, ret
!= req
->data_len
?
125 NVME_SC_INTERNAL
| NVME_SC_DNR
: 0);
128 static void nvmet_file_execute_rw(struct nvmet_req
*req
)
130 ssize_t nr_bvec
= DIV_ROUND_UP(req
->data_len
, PAGE_SIZE
);
131 struct sg_page_iter sg_pg_iter
;
132 unsigned long bv_cnt
= 0;
133 bool is_sync
= false;
134 size_t len
= 0, total_len
= 0;
138 if (!req
->sg_cnt
|| !nr_bvec
) {
139 nvmet_req_complete(req
, 0);
143 if (nr_bvec
> NVMET_MAX_INLINE_BIOVEC
)
144 req
->f
.bvec
= kmalloc_array(nr_bvec
, sizeof(struct bio_vec
),
147 req
->f
.bvec
= req
->inline_bvec
;
149 req
->f
.mpool_alloc
= false;
150 if (unlikely(!req
->f
.bvec
)) {
151 /* fallback under memory pressure */
152 req
->f
.bvec
= mempool_alloc(req
->ns
->bvec_pool
, GFP_KERNEL
);
153 req
->f
.mpool_alloc
= true;
154 if (nr_bvec
> NVMET_MAX_MPOOL_BVEC
)
158 pos
= le64_to_cpu(req
->cmd
->rw
.slba
) << req
->ns
->blksize_shift
;
160 memset(&req
->f
.iocb
, 0, sizeof(struct kiocb
));
161 for_each_sg_page(req
->sg
, &sg_pg_iter
, req
->sg_cnt
, 0) {
162 nvmet_file_init_bvec(&req
->f
.bvec
[bv_cnt
], &sg_pg_iter
);
163 len
+= req
->f
.bvec
[bv_cnt
].bv_len
;
164 total_len
+= req
->f
.bvec
[bv_cnt
].bv_len
;
167 WARN_ON_ONCE((nr_bvec
- 1) < 0);
169 if (unlikely(is_sync
) &&
170 (nr_bvec
- 1 == 0 || bv_cnt
== NVMET_MAX_MPOOL_BVEC
)) {
171 ret
= nvmet_file_submit_bvec(req
, pos
, bv_cnt
, len
);
181 if (WARN_ON_ONCE(total_len
!= req
->data_len
))
184 if (unlikely(is_sync
|| ret
)) {
185 nvmet_file_io_done(&req
->f
.iocb
, ret
< 0 ? ret
: total_len
, 0);
188 req
->f
.iocb
.ki_complete
= nvmet_file_io_done
;
189 nvmet_file_submit_bvec(req
, pos
, bv_cnt
, total_len
);
192 static void nvmet_file_flush_work(struct work_struct
*w
)
194 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, f
.work
);
197 ret
= vfs_fsync(req
->ns
->file
, 1);
199 nvmet_req_complete(req
, ret
< 0 ? NVME_SC_INTERNAL
| NVME_SC_DNR
: 0);
202 static void nvmet_file_execute_flush(struct nvmet_req
*req
)
204 INIT_WORK(&req
->f
.work
, nvmet_file_flush_work
);
205 schedule_work(&req
->f
.work
);
208 static void nvmet_file_execute_discard(struct nvmet_req
*req
)
210 int mode
= FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
;
211 struct nvme_dsm_range range
;
216 for (i
= 0; i
<= le32_to_cpu(req
->cmd
->dsm
.nr
); i
++) {
217 if (nvmet_copy_from_sgl(req
, i
* sizeof(range
), &range
,
220 offset
= le64_to_cpu(range
.slba
) << req
->ns
->blksize_shift
;
221 len
= le32_to_cpu(range
.nlb
) << req
->ns
->blksize_shift
;
222 ret
= vfs_fallocate(req
->ns
->file
, mode
, offset
, len
);
227 nvmet_req_complete(req
, ret
< 0 ? NVME_SC_INTERNAL
| NVME_SC_DNR
: 0);
230 static void nvmet_file_dsm_work(struct work_struct
*w
)
232 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, f
.work
);
234 switch (le32_to_cpu(req
->cmd
->dsm
.attributes
)) {
236 nvmet_file_execute_discard(req
);
238 case NVME_DSMGMT_IDR
:
239 case NVME_DSMGMT_IDW
:
241 /* Not supported yet */
242 nvmet_req_complete(req
, 0);
247 static void nvmet_file_execute_dsm(struct nvmet_req
*req
)
249 INIT_WORK(&req
->f
.work
, nvmet_file_dsm_work
);
250 schedule_work(&req
->f
.work
);
253 static void nvmet_file_write_zeroes_work(struct work_struct
*w
)
255 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, f
.work
);
256 struct nvme_write_zeroes_cmd
*write_zeroes
= &req
->cmd
->write_zeroes
;
257 int mode
= FALLOC_FL_ZERO_RANGE
| FALLOC_FL_KEEP_SIZE
;
262 offset
= le64_to_cpu(write_zeroes
->slba
) << req
->ns
->blksize_shift
;
263 len
= (((sector_t
)le16_to_cpu(write_zeroes
->length
) + 1) <<
264 req
->ns
->blksize_shift
);
266 ret
= vfs_fallocate(req
->ns
->file
, mode
, offset
, len
);
267 nvmet_req_complete(req
, ret
< 0 ? NVME_SC_INTERNAL
| NVME_SC_DNR
: 0);
270 static void nvmet_file_execute_write_zeroes(struct nvmet_req
*req
)
272 INIT_WORK(&req
->f
.work
, nvmet_file_write_zeroes_work
);
273 schedule_work(&req
->f
.work
);
276 u16
nvmet_file_parse_io_cmd(struct nvmet_req
*req
)
278 struct nvme_command
*cmd
= req
->cmd
;
280 switch (cmd
->common
.opcode
) {
283 req
->execute
= nvmet_file_execute_rw
;
284 req
->data_len
= nvmet_rw_len(req
);
287 req
->execute
= nvmet_file_execute_flush
;
291 req
->execute
= nvmet_file_execute_dsm
;
292 req
->data_len
= (le32_to_cpu(cmd
->dsm
.nr
) + 1) *
293 sizeof(struct nvme_dsm_range
);
295 case nvme_cmd_write_zeroes
:
296 req
->execute
= nvmet_file_execute_write_zeroes
;
300 pr_err("unhandled cmd for file ns %d on qid %d\n",
301 cmd
->common
.opcode
, req
->sq
->qid
);
302 return NVME_SC_INVALID_OPCODE
| NVME_SC_DNR
;