]>
Commit | Line | Data |
---|---|---|
872d26a3 SG |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* | |
3 | * NVMe over Fabrics TCP target. | |
4 | * Copyright (c) 2018 Lightbits Labs. All rights reserved. | |
5 | */ | |
6 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
7 | #include <linux/module.h> | |
8 | #include <linux/init.h> | |
9 | #include <linux/slab.h> | |
10 | #include <linux/err.h> | |
11 | #include <linux/nvme-tcp.h> | |
12 | #include <net/sock.h> | |
13 | #include <net/tcp.h> | |
14 | #include <linux/inet.h> | |
15 | #include <linux/llist.h> | |
16 | #include <crypto/hash.h> | |
17 | ||
18 | #include "nvmet.h" | |
19 | ||
20 | #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) | |
21 | ||
22 | #define NVMET_TCP_RECV_BUDGET 8 | |
23 | #define NVMET_TCP_SEND_BUDGET 8 | |
24 | #define NVMET_TCP_IO_WORK_BUDGET 64 | |
25 | ||
26 | enum nvmet_tcp_send_state { | |
27 | NVMET_TCP_SEND_DATA_PDU, | |
28 | NVMET_TCP_SEND_DATA, | |
29 | NVMET_TCP_SEND_R2T, | |
30 | NVMET_TCP_SEND_DDGST, | |
31 | NVMET_TCP_SEND_RESPONSE | |
32 | }; | |
33 | ||
34 | enum nvmet_tcp_recv_state { | |
35 | NVMET_TCP_RECV_PDU, | |
36 | NVMET_TCP_RECV_DATA, | |
37 | NVMET_TCP_RECV_DDGST, | |
38 | NVMET_TCP_RECV_ERR, | |
39 | }; | |
40 | ||
41 | enum { | |
42 | NVMET_TCP_F_INIT_FAILED = (1 << 0), | |
43 | }; | |
44 | ||
45 | struct nvmet_tcp_cmd { | |
46 | struct nvmet_tcp_queue *queue; | |
47 | struct nvmet_req req; | |
48 | ||
49 | struct nvme_tcp_cmd_pdu *cmd_pdu; | |
50 | struct nvme_tcp_rsp_pdu *rsp_pdu; | |
51 | struct nvme_tcp_data_pdu *data_pdu; | |
52 | struct nvme_tcp_r2t_pdu *r2t_pdu; | |
53 | ||
54 | u32 rbytes_done; | |
55 | u32 wbytes_done; | |
56 | ||
57 | u32 pdu_len; | |
58 | u32 pdu_recv; | |
59 | int sg_idx; | |
60 | int nr_mapped; | |
61 | struct msghdr recv_msg; | |
62 | struct kvec *iov; | |
63 | u32 flags; | |
64 | ||
65 | struct list_head entry; | |
66 | struct llist_node lentry; | |
67 | ||
68 | /* send state */ | |
69 | u32 offset; | |
70 | struct scatterlist *cur_sg; | |
71 | enum nvmet_tcp_send_state state; | |
72 | ||
73 | __le32 exp_ddgst; | |
74 | __le32 recv_ddgst; | |
75 | }; | |
76 | ||
77 | enum nvmet_tcp_queue_state { | |
78 | NVMET_TCP_Q_CONNECTING, | |
79 | NVMET_TCP_Q_LIVE, | |
80 | NVMET_TCP_Q_DISCONNECTING, | |
81 | }; | |
82 | ||
83 | struct nvmet_tcp_queue { | |
84 | struct socket *sock; | |
85 | struct nvmet_tcp_port *port; | |
86 | struct work_struct io_work; | |
87 | int cpu; | |
88 | struct nvmet_cq nvme_cq; | |
89 | struct nvmet_sq nvme_sq; | |
90 | ||
91 | /* send state */ | |
92 | struct nvmet_tcp_cmd *cmds; | |
93 | unsigned int nr_cmds; | |
94 | struct list_head free_list; | |
95 | struct llist_head resp_list; | |
96 | struct list_head resp_send_list; | |
97 | int send_list_len; | |
98 | struct nvmet_tcp_cmd *snd_cmd; | |
99 | ||
100 | /* recv state */ | |
101 | int offset; | |
102 | int left; | |
103 | enum nvmet_tcp_recv_state rcv_state; | |
104 | struct nvmet_tcp_cmd *cmd; | |
105 | union nvme_tcp_pdu pdu; | |
106 | ||
107 | /* digest state */ | |
108 | bool hdr_digest; | |
109 | bool data_digest; | |
110 | struct ahash_request *snd_hash; | |
111 | struct ahash_request *rcv_hash; | |
112 | ||
113 | spinlock_t state_lock; | |
114 | enum nvmet_tcp_queue_state state; | |
115 | ||
116 | struct sockaddr_storage sockaddr; | |
117 | struct sockaddr_storage sockaddr_peer; | |
118 | struct work_struct release_work; | |
119 | ||
120 | int idx; | |
121 | struct list_head queue_list; | |
122 | ||
123 | struct nvmet_tcp_cmd connect; | |
124 | ||
125 | struct page_frag_cache pf_cache; | |
126 | ||
127 | void (*data_ready)(struct sock *); | |
128 | void (*state_change)(struct sock *); | |
129 | void (*write_space)(struct sock *); | |
130 | }; | |
131 | ||
132 | struct nvmet_tcp_port { | |
133 | struct socket *sock; | |
134 | struct work_struct accept_work; | |
135 | struct nvmet_port *nport; | |
136 | struct sockaddr_storage addr; | |
137 | int last_cpu; | |
138 | void (*data_ready)(struct sock *); | |
139 | }; | |
140 | ||
141 | static DEFINE_IDA(nvmet_tcp_queue_ida); | |
142 | static LIST_HEAD(nvmet_tcp_queue_list); | |
143 | static DEFINE_MUTEX(nvmet_tcp_queue_mutex); | |
144 | ||
145 | static struct workqueue_struct *nvmet_tcp_wq; | |
146 | static struct nvmet_fabrics_ops nvmet_tcp_ops; | |
147 | static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); | |
148 | static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); | |
149 | ||
150 | static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, | |
151 | struct nvmet_tcp_cmd *cmd) | |
152 | { | |
153 | return cmd - queue->cmds; | |
154 | } | |
155 | ||
156 | static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd) | |
157 | { | |
158 | return nvme_is_write(cmd->req.cmd) && | |
159 | cmd->rbytes_done < cmd->req.transfer_len; | |
160 | } | |
161 | ||
162 | static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) | |
163 | { | |
164 | return nvmet_tcp_has_data_in(cmd) && !cmd->req.rsp->status; | |
165 | } | |
166 | ||
167 | static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) | |
168 | { | |
169 | return !nvme_is_write(cmd->req.cmd) && | |
170 | cmd->req.transfer_len > 0 && | |
171 | !cmd->req.rsp->status; | |
172 | } | |
173 | ||
174 | static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) | |
175 | { | |
176 | return nvme_is_write(cmd->req.cmd) && cmd->pdu_len && | |
177 | !cmd->rbytes_done; | |
178 | } | |
179 | ||
180 | static inline struct nvmet_tcp_cmd * | |
181 | nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue) | |
182 | { | |
183 | struct nvmet_tcp_cmd *cmd; | |
184 | ||
185 | cmd = list_first_entry_or_null(&queue->free_list, | |
186 | struct nvmet_tcp_cmd, entry); | |
187 | if (!cmd) | |
188 | return NULL; | |
189 | list_del_init(&cmd->entry); | |
190 | ||
191 | cmd->rbytes_done = cmd->wbytes_done = 0; | |
192 | cmd->pdu_len = 0; | |
193 | cmd->pdu_recv = 0; | |
194 | cmd->iov = NULL; | |
195 | cmd->flags = 0; | |
196 | return cmd; | |
197 | } | |
198 | ||
199 | static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd) | |
200 | { | |
201 | if (unlikely(cmd == &cmd->queue->connect)) | |
202 | return; | |
203 | ||
204 | list_add_tail(&cmd->entry, &cmd->queue->free_list); | |
205 | } | |
206 | ||
207 | static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue) | |
208 | { | |
209 | return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
210 | } | |
211 | ||
212 | static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue) | |
213 | { | |
214 | return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; | |
215 | } | |
216 | ||
217 | static inline void nvmet_tcp_hdgst(struct ahash_request *hash, | |
218 | void *pdu, size_t len) | |
219 | { | |
220 | struct scatterlist sg; | |
221 | ||
222 | sg_init_one(&sg, pdu, len); | |
223 | ahash_request_set_crypt(hash, &sg, pdu + len, len); | |
224 | crypto_ahash_digest(hash); | |
225 | } | |
226 | ||
227 | static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, | |
228 | void *pdu, size_t len) | |
229 | { | |
230 | struct nvme_tcp_hdr *hdr = pdu; | |
231 | __le32 recv_digest; | |
232 | __le32 exp_digest; | |
233 | ||
234 | if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { | |
235 | pr_err("queue %d: header digest enabled but no header digest\n", | |
236 | queue->idx); | |
237 | return -EPROTO; | |
238 | } | |
239 | ||
240 | recv_digest = *(__le32 *)(pdu + hdr->hlen); | |
241 | nvmet_tcp_hdgst(queue->rcv_hash, pdu, len); | |
242 | exp_digest = *(__le32 *)(pdu + hdr->hlen); | |
243 | if (recv_digest != exp_digest) { | |
244 | pr_err("queue %d: header digest error: recv %#x expected %#x\n", | |
245 | queue->idx, le32_to_cpu(recv_digest), | |
246 | le32_to_cpu(exp_digest)); | |
247 | return -EPROTO; | |
248 | } | |
249 | ||
250 | return 0; | |
251 | } | |
252 | ||
253 | static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) | |
254 | { | |
255 | struct nvme_tcp_hdr *hdr = pdu; | |
256 | u8 digest_len = nvmet_tcp_hdgst_len(queue); | |
257 | u32 len; | |
258 | ||
259 | len = le32_to_cpu(hdr->plen) - hdr->hlen - | |
260 | (hdr->flags & NVME_TCP_F_HDGST ? digest_len : 0); | |
261 | ||
262 | if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { | |
263 | pr_err("queue %d: data digest flag is cleared\n", queue->idx); | |
264 | return -EPROTO; | |
265 | } | |
266 | ||
267 | return 0; | |
268 | } | |
269 | ||
270 | static void nvmet_tcp_unmap_pdu_iovec(struct nvmet_tcp_cmd *cmd) | |
271 | { | |
272 | struct scatterlist *sg; | |
273 | int i; | |
274 | ||
275 | sg = &cmd->req.sg[cmd->sg_idx]; | |
276 | ||
277 | for (i = 0; i < cmd->nr_mapped; i++) | |
278 | kunmap(sg_page(&sg[i])); | |
279 | } | |
280 | ||
281 | static void nvmet_tcp_map_pdu_iovec(struct nvmet_tcp_cmd *cmd) | |
282 | { | |
283 | struct kvec *iov = cmd->iov; | |
284 | struct scatterlist *sg; | |
285 | u32 length, offset, sg_offset; | |
286 | ||
287 | length = cmd->pdu_len; | |
288 | cmd->nr_mapped = DIV_ROUND_UP(length, PAGE_SIZE); | |
289 | offset = cmd->rbytes_done; | |
290 | cmd->sg_idx = DIV_ROUND_UP(offset, PAGE_SIZE); | |
291 | sg_offset = offset % PAGE_SIZE; | |
292 | sg = &cmd->req.sg[cmd->sg_idx]; | |
293 | ||
294 | while (length) { | |
295 | u32 iov_len = min_t(u32, length, sg->length - sg_offset); | |
296 | ||
297 | iov->iov_base = kmap(sg_page(sg)) + sg->offset + sg_offset; | |
298 | iov->iov_len = iov_len; | |
299 | ||
300 | length -= iov_len; | |
301 | sg = sg_next(sg); | |
302 | iov++; | |
303 | } | |
304 | ||
305 | iov_iter_kvec(&cmd->recv_msg.msg_iter, READ, cmd->iov, | |
306 | cmd->nr_mapped, cmd->pdu_len); | |
307 | } | |
308 | ||
309 | static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) | |
310 | { | |
311 | queue->rcv_state = NVMET_TCP_RECV_ERR; | |
312 | if (queue->nvme_sq.ctrl) | |
313 | nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); | |
314 | else | |
315 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
316 | } | |
317 | ||
318 | static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) | |
319 | { | |
320 | struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; | |
321 | u32 len = le32_to_cpu(sgl->length); | |
322 | ||
323 | if (!cmd->req.data_len) | |
324 | return 0; | |
325 | ||
326 | if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) | | |
327 | NVME_SGL_FMT_OFFSET)) { | |
328 | if (!nvme_is_write(cmd->req.cmd)) | |
329 | return NVME_SC_INVALID_FIELD | NVME_SC_DNR; | |
330 | ||
331 | if (len > cmd->req.port->inline_data_size) | |
332 | return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR; | |
333 | cmd->pdu_len = len; | |
334 | } | |
335 | cmd->req.transfer_len += len; | |
336 | ||
337 | cmd->req.sg = sgl_alloc(len, GFP_KERNEL, &cmd->req.sg_cnt); | |
338 | if (!cmd->req.sg) | |
339 | return NVME_SC_INTERNAL; | |
340 | cmd->cur_sg = cmd->req.sg; | |
341 | ||
342 | if (nvmet_tcp_has_data_in(cmd)) { | |
343 | cmd->iov = kmalloc_array(cmd->req.sg_cnt, | |
344 | sizeof(*cmd->iov), GFP_KERNEL); | |
345 | if (!cmd->iov) | |
346 | goto err; | |
347 | } | |
348 | ||
349 | return 0; | |
350 | err: | |
351 | sgl_free(cmd->req.sg); | |
352 | return NVME_SC_INTERNAL; | |
353 | } | |
354 | ||
355 | static void nvmet_tcp_ddgst(struct ahash_request *hash, | |
356 | struct nvmet_tcp_cmd *cmd) | |
357 | { | |
358 | ahash_request_set_crypt(hash, cmd->req.sg, | |
359 | (void *)&cmd->exp_ddgst, cmd->req.transfer_len); | |
360 | crypto_ahash_digest(hash); | |
361 | } | |
362 | ||
363 | static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) | |
364 | { | |
365 | struct nvme_tcp_data_pdu *pdu = cmd->data_pdu; | |
366 | struct nvmet_tcp_queue *queue = cmd->queue; | |
367 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
368 | u8 ddgst = nvmet_tcp_ddgst_len(cmd->queue); | |
369 | ||
370 | cmd->offset = 0; | |
371 | cmd->state = NVMET_TCP_SEND_DATA_PDU; | |
372 | ||
373 | pdu->hdr.type = nvme_tcp_c2h_data; | |
374 | pdu->hdr.flags = NVME_TCP_F_DATA_LAST; | |
375 | pdu->hdr.hlen = sizeof(*pdu); | |
376 | pdu->hdr.pdo = pdu->hdr.hlen + hdgst; | |
377 | pdu->hdr.plen = | |
378 | cpu_to_le32(pdu->hdr.hlen + hdgst + | |
379 | cmd->req.transfer_len + ddgst); | |
380 | pdu->command_id = cmd->req.rsp->command_id; | |
381 | pdu->data_length = cpu_to_le32(cmd->req.transfer_len); | |
382 | pdu->data_offset = cpu_to_le32(cmd->wbytes_done); | |
383 | ||
384 | if (queue->data_digest) { | |
385 | pdu->hdr.flags |= NVME_TCP_F_DDGST; | |
386 | nvmet_tcp_ddgst(queue->snd_hash, cmd); | |
387 | } | |
388 | ||
389 | if (cmd->queue->hdr_digest) { | |
390 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
391 | nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
392 | } | |
393 | } | |
394 | ||
395 | static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) | |
396 | { | |
397 | struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu; | |
398 | struct nvmet_tcp_queue *queue = cmd->queue; | |
399 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
400 | ||
401 | cmd->offset = 0; | |
402 | cmd->state = NVMET_TCP_SEND_R2T; | |
403 | ||
404 | pdu->hdr.type = nvme_tcp_r2t; | |
405 | pdu->hdr.flags = 0; | |
406 | pdu->hdr.hlen = sizeof(*pdu); | |
407 | pdu->hdr.pdo = 0; | |
408 | pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); | |
409 | ||
410 | pdu->command_id = cmd->req.cmd->common.command_id; | |
411 | pdu->ttag = nvmet_tcp_cmd_tag(cmd->queue, cmd); | |
412 | pdu->r2t_length = cpu_to_le32(cmd->req.transfer_len - cmd->rbytes_done); | |
413 | pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done); | |
414 | if (cmd->queue->hdr_digest) { | |
415 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
416 | nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
417 | } | |
418 | } | |
419 | ||
420 | static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) | |
421 | { | |
422 | struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu; | |
423 | struct nvmet_tcp_queue *queue = cmd->queue; | |
424 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
425 | ||
426 | cmd->offset = 0; | |
427 | cmd->state = NVMET_TCP_SEND_RESPONSE; | |
428 | ||
429 | pdu->hdr.type = nvme_tcp_rsp; | |
430 | pdu->hdr.flags = 0; | |
431 | pdu->hdr.hlen = sizeof(*pdu); | |
432 | pdu->hdr.pdo = 0; | |
433 | pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); | |
434 | if (cmd->queue->hdr_digest) { | |
435 | pdu->hdr.flags |= NVME_TCP_F_HDGST; | |
436 | nvmet_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); | |
437 | } | |
438 | } | |
439 | ||
440 | static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue) | |
441 | { | |
442 | struct llist_node *node; | |
443 | ||
444 | node = llist_del_all(&queue->resp_list); | |
445 | if (!node) | |
446 | return; | |
447 | ||
448 | while (node) { | |
449 | struct nvmet_tcp_cmd *cmd = llist_entry(node, | |
450 | struct nvmet_tcp_cmd, lentry); | |
451 | ||
452 | list_add(&cmd->entry, &queue->resp_send_list); | |
453 | node = node->next; | |
454 | queue->send_list_len++; | |
455 | } | |
456 | } | |
457 | ||
458 | static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue) | |
459 | { | |
460 | queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list, | |
461 | struct nvmet_tcp_cmd, entry); | |
462 | if (!queue->snd_cmd) { | |
463 | nvmet_tcp_process_resp_list(queue); | |
464 | queue->snd_cmd = | |
465 | list_first_entry_or_null(&queue->resp_send_list, | |
466 | struct nvmet_tcp_cmd, entry); | |
467 | if (unlikely(!queue->snd_cmd)) | |
468 | return NULL; | |
469 | } | |
470 | ||
471 | list_del_init(&queue->snd_cmd->entry); | |
472 | queue->send_list_len--; | |
473 | ||
474 | if (nvmet_tcp_need_data_out(queue->snd_cmd)) | |
475 | nvmet_setup_c2h_data_pdu(queue->snd_cmd); | |
476 | else if (nvmet_tcp_need_data_in(queue->snd_cmd)) | |
477 | nvmet_setup_r2t_pdu(queue->snd_cmd); | |
478 | else | |
479 | nvmet_setup_response_pdu(queue->snd_cmd); | |
480 | ||
481 | return queue->snd_cmd; | |
482 | } | |
483 | ||
484 | static void nvmet_tcp_queue_response(struct nvmet_req *req) | |
485 | { | |
486 | struct nvmet_tcp_cmd *cmd = | |
487 | container_of(req, struct nvmet_tcp_cmd, req); | |
488 | struct nvmet_tcp_queue *queue = cmd->queue; | |
489 | ||
490 | llist_add(&cmd->lentry, &queue->resp_list); | |
491 | queue_work_on(cmd->queue->cpu, nvmet_tcp_wq, &cmd->queue->io_work); | |
492 | } | |
493 | ||
494 | static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) | |
495 | { | |
496 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
497 | int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst; | |
498 | int ret; | |
499 | ||
500 | ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), | |
501 | offset_in_page(cmd->data_pdu) + cmd->offset, | |
502 | left, MSG_DONTWAIT | MSG_MORE); | |
503 | if (ret <= 0) | |
504 | return ret; | |
505 | ||
506 | cmd->offset += ret; | |
507 | left -= ret; | |
508 | ||
509 | if (left) | |
510 | return -EAGAIN; | |
511 | ||
512 | cmd->state = NVMET_TCP_SEND_DATA; | |
513 | cmd->offset = 0; | |
514 | return 1; | |
515 | } | |
516 | ||
517 | static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) | |
518 | { | |
519 | struct nvmet_tcp_queue *queue = cmd->queue; | |
520 | int ret; | |
521 | ||
522 | while (cmd->cur_sg) { | |
523 | struct page *page = sg_page(cmd->cur_sg); | |
524 | u32 left = cmd->cur_sg->length - cmd->offset; | |
525 | ||
526 | ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, | |
527 | left, MSG_DONTWAIT | MSG_MORE); | |
528 | if (ret <= 0) | |
529 | return ret; | |
530 | ||
531 | cmd->offset += ret; | |
532 | cmd->wbytes_done += ret; | |
533 | ||
534 | /* Done with sg?*/ | |
535 | if (cmd->offset == cmd->cur_sg->length) { | |
536 | cmd->cur_sg = sg_next(cmd->cur_sg); | |
537 | cmd->offset = 0; | |
538 | } | |
539 | } | |
540 | ||
541 | if (queue->data_digest) { | |
542 | cmd->state = NVMET_TCP_SEND_DDGST; | |
543 | cmd->offset = 0; | |
544 | } else { | |
545 | nvmet_setup_response_pdu(cmd); | |
546 | } | |
547 | return 1; | |
548 | ||
549 | } | |
550 | ||
551 | static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, | |
552 | bool last_in_batch) | |
553 | { | |
554 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
555 | int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst; | |
556 | int flags = MSG_DONTWAIT; | |
557 | int ret; | |
558 | ||
559 | if (!last_in_batch && cmd->queue->send_list_len) | |
560 | flags |= MSG_MORE; | |
561 | else | |
562 | flags |= MSG_EOR; | |
563 | ||
564 | ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->rsp_pdu), | |
565 | offset_in_page(cmd->rsp_pdu) + cmd->offset, left, flags); | |
566 | if (ret <= 0) | |
567 | return ret; | |
568 | cmd->offset += ret; | |
569 | left -= ret; | |
570 | ||
571 | if (left) | |
572 | return -EAGAIN; | |
573 | ||
574 | kfree(cmd->iov); | |
575 | sgl_free(cmd->req.sg); | |
576 | cmd->queue->snd_cmd = NULL; | |
577 | nvmet_tcp_put_cmd(cmd); | |
578 | return 1; | |
579 | } | |
580 | ||
581 | static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) | |
582 | { | |
583 | u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); | |
584 | int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst; | |
585 | int flags = MSG_DONTWAIT; | |
586 | int ret; | |
587 | ||
588 | if (!last_in_batch && cmd->queue->send_list_len) | |
589 | flags |= MSG_MORE; | |
590 | else | |
591 | flags |= MSG_EOR; | |
592 | ||
593 | ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->r2t_pdu), | |
594 | offset_in_page(cmd->r2t_pdu) + cmd->offset, left, flags); | |
595 | if (ret <= 0) | |
596 | return ret; | |
597 | cmd->offset += ret; | |
598 | left -= ret; | |
599 | ||
600 | if (left) | |
601 | return -EAGAIN; | |
602 | ||
603 | cmd->queue->snd_cmd = NULL; | |
604 | return 1; | |
605 | } | |
606 | ||
607 | static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) | |
608 | { | |
609 | struct nvmet_tcp_queue *queue = cmd->queue; | |
610 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; | |
611 | struct kvec iov = { | |
612 | .iov_base = &cmd->exp_ddgst + cmd->offset, | |
613 | .iov_len = NVME_TCP_DIGEST_LENGTH - cmd->offset | |
614 | }; | |
615 | int ret; | |
616 | ||
617 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); | |
618 | if (unlikely(ret <= 0)) | |
619 | return ret; | |
620 | ||
621 | cmd->offset += ret; | |
622 | nvmet_setup_response_pdu(cmd); | |
623 | return 1; | |
624 | } | |
625 | ||
626 | static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, | |
627 | bool last_in_batch) | |
628 | { | |
629 | struct nvmet_tcp_cmd *cmd = queue->snd_cmd; | |
630 | int ret = 0; | |
631 | ||
632 | if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) { | |
633 | cmd = nvmet_tcp_fetch_cmd(queue); | |
634 | if (unlikely(!cmd)) | |
635 | return 0; | |
636 | } | |
637 | ||
638 | if (cmd->state == NVMET_TCP_SEND_DATA_PDU) { | |
639 | ret = nvmet_try_send_data_pdu(cmd); | |
640 | if (ret <= 0) | |
641 | goto done_send; | |
642 | } | |
643 | ||
644 | if (cmd->state == NVMET_TCP_SEND_DATA) { | |
645 | ret = nvmet_try_send_data(cmd); | |
646 | if (ret <= 0) | |
647 | goto done_send; | |
648 | } | |
649 | ||
650 | if (cmd->state == NVMET_TCP_SEND_DDGST) { | |
651 | ret = nvmet_try_send_ddgst(cmd); | |
652 | if (ret <= 0) | |
653 | goto done_send; | |
654 | } | |
655 | ||
656 | if (cmd->state == NVMET_TCP_SEND_R2T) { | |
657 | ret = nvmet_try_send_r2t(cmd, last_in_batch); | |
658 | if (ret <= 0) | |
659 | goto done_send; | |
660 | } | |
661 | ||
662 | if (cmd->state == NVMET_TCP_SEND_RESPONSE) | |
663 | ret = nvmet_try_send_response(cmd, last_in_batch); | |
664 | ||
665 | done_send: | |
666 | if (ret < 0) { | |
667 | if (ret == -EAGAIN) | |
668 | return 0; | |
669 | return ret; | |
670 | } | |
671 | ||
672 | return 1; | |
673 | } | |
674 | ||
675 | static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, | |
676 | int budget, int *sends) | |
677 | { | |
678 | int i, ret = 0; | |
679 | ||
680 | for (i = 0; i < budget; i++) { | |
681 | ret = nvmet_tcp_try_send_one(queue, i == budget - 1); | |
682 | if (ret <= 0) | |
683 | break; | |
684 | (*sends)++; | |
685 | } | |
686 | ||
687 | return ret; | |
688 | } | |
689 | ||
690 | static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) | |
691 | { | |
692 | queue->offset = 0; | |
693 | queue->left = sizeof(struct nvme_tcp_hdr); | |
694 | queue->cmd = NULL; | |
695 | queue->rcv_state = NVMET_TCP_RECV_PDU; | |
696 | } | |
697 | ||
698 | static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) | |
699 | { | |
700 | struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); | |
701 | ||
702 | ahash_request_free(queue->rcv_hash); | |
703 | ahash_request_free(queue->snd_hash); | |
704 | crypto_free_ahash(tfm); | |
705 | } | |
706 | ||
707 | static int nvmet_tcp_alloc_crypto(struct nvmet_tcp_queue *queue) | |
708 | { | |
709 | struct crypto_ahash *tfm; | |
710 | ||
711 | tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); | |
712 | if (IS_ERR(tfm)) | |
713 | return PTR_ERR(tfm); | |
714 | ||
715 | queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); | |
716 | if (!queue->snd_hash) | |
717 | goto free_tfm; | |
718 | ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); | |
719 | ||
720 | queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); | |
721 | if (!queue->rcv_hash) | |
722 | goto free_snd_hash; | |
723 | ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); | |
724 | ||
725 | return 0; | |
726 | free_snd_hash: | |
727 | ahash_request_free(queue->snd_hash); | |
728 | free_tfm: | |
729 | crypto_free_ahash(tfm); | |
730 | return -ENOMEM; | |
731 | } | |
732 | ||
733 | ||
734 | static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) | |
735 | { | |
736 | struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq; | |
737 | struct nvme_tcp_icresp_pdu *icresp = &queue->pdu.icresp; | |
738 | struct msghdr msg = {}; | |
739 | struct kvec iov; | |
740 | int ret; | |
741 | ||
742 | if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) { | |
743 | pr_err("bad nvme-tcp pdu length (%d)\n", | |
744 | le32_to_cpu(icreq->hdr.plen)); | |
745 | nvmet_tcp_fatal_error(queue); | |
746 | } | |
747 | ||
748 | if (icreq->pfv != NVME_TCP_PFV_1_0) { | |
749 | pr_err("queue %d: bad pfv %d\n", queue->idx, icreq->pfv); | |
750 | return -EPROTO; | |
751 | } | |
752 | ||
753 | if (icreq->hpda != 0) { | |
754 | pr_err("queue %d: unsupported hpda %d\n", queue->idx, | |
755 | icreq->hpda); | |
756 | return -EPROTO; | |
757 | } | |
758 | ||
759 | if (icreq->maxr2t != 0) { | |
760 | pr_err("queue %d: unsupported maxr2t %d\n", queue->idx, | |
f4d10b5c | 761 | le32_to_cpu(icreq->maxr2t) + 1); |
872d26a3 SG |
762 | return -EPROTO; |
763 | } | |
764 | ||
765 | queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); | |
766 | queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); | |
767 | if (queue->hdr_digest || queue->data_digest) { | |
768 | ret = nvmet_tcp_alloc_crypto(queue); | |
769 | if (ret) | |
770 | return ret; | |
771 | } | |
772 | ||
773 | memset(icresp, 0, sizeof(*icresp)); | |
774 | icresp->hdr.type = nvme_tcp_icresp; | |
775 | icresp->hdr.hlen = sizeof(*icresp); | |
776 | icresp->hdr.pdo = 0; | |
777 | icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); | |
778 | icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); | |
f4d10b5c | 779 | icresp->maxdata = cpu_to_le32(0xffff); /* FIXME: support r2t */ |
872d26a3 SG |
780 | icresp->cpda = 0; |
781 | if (queue->hdr_digest) | |
782 | icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; | |
783 | if (queue->data_digest) | |
784 | icresp->digest |= NVME_TCP_DATA_DIGEST_ENABLE; | |
785 | ||
786 | iov.iov_base = icresp; | |
787 | iov.iov_len = sizeof(*icresp); | |
788 | ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); | |
789 | if (ret < 0) | |
790 | goto free_crypto; | |
791 | ||
792 | queue->state = NVMET_TCP_Q_LIVE; | |
793 | nvmet_prepare_receive_pdu(queue); | |
794 | return 0; | |
795 | free_crypto: | |
796 | if (queue->hdr_digest || queue->data_digest) | |
797 | nvmet_tcp_free_crypto(queue); | |
798 | return ret; | |
799 | } | |
800 | ||
801 | static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, | |
802 | struct nvmet_tcp_cmd *cmd, struct nvmet_req *req) | |
803 | { | |
804 | int ret; | |
805 | ||
806 | /* recover the expected data transfer length */ | |
807 | req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); | |
808 | ||
809 | if (!nvme_is_write(cmd->req.cmd) || | |
810 | req->data_len > cmd->req.port->inline_data_size) { | |
811 | nvmet_prepare_receive_pdu(queue); | |
812 | return; | |
813 | } | |
814 | ||
815 | ret = nvmet_tcp_map_data(cmd); | |
816 | if (unlikely(ret)) { | |
817 | pr_err("queue %d: failed to map data\n", queue->idx); | |
818 | nvmet_tcp_fatal_error(queue); | |
819 | return; | |
820 | } | |
821 | ||
822 | queue->rcv_state = NVMET_TCP_RECV_DATA; | |
823 | nvmet_tcp_map_pdu_iovec(cmd); | |
824 | cmd->flags |= NVMET_TCP_F_INIT_FAILED; | |
825 | } | |
826 | ||
827 | static int nvmet_tcp_handle_h2c_data_pdu(struct nvmet_tcp_queue *queue) | |
828 | { | |
829 | struct nvme_tcp_data_pdu *data = &queue->pdu.data; | |
830 | struct nvmet_tcp_cmd *cmd; | |
831 | ||
832 | cmd = &queue->cmds[data->ttag]; | |
833 | ||
834 | if (le32_to_cpu(data->data_offset) != cmd->rbytes_done) { | |
835 | pr_err("ttag %u unexpected data offset %u (expected %u)\n", | |
836 | data->ttag, le32_to_cpu(data->data_offset), | |
837 | cmd->rbytes_done); | |
838 | /* FIXME: use path and transport errors */ | |
839 | nvmet_req_complete(&cmd->req, | |
840 | NVME_SC_INVALID_FIELD | NVME_SC_DNR); | |
841 | return -EPROTO; | |
842 | } | |
843 | ||
844 | cmd->pdu_len = le32_to_cpu(data->data_length); | |
845 | cmd->pdu_recv = 0; | |
846 | nvmet_tcp_map_pdu_iovec(cmd); | |
847 | queue->cmd = cmd; | |
848 | queue->rcv_state = NVMET_TCP_RECV_DATA; | |
849 | ||
850 | return 0; | |
851 | } | |
852 | ||
853 | static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) | |
854 | { | |
855 | struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; | |
856 | struct nvme_command *nvme_cmd = &queue->pdu.cmd.cmd; | |
857 | struct nvmet_req *req; | |
858 | int ret; | |
859 | ||
860 | if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) { | |
861 | if (hdr->type != nvme_tcp_icreq) { | |
862 | pr_err("unexpected pdu type (%d) before icreq\n", | |
863 | hdr->type); | |
864 | nvmet_tcp_fatal_error(queue); | |
865 | return -EPROTO; | |
866 | } | |
867 | return nvmet_tcp_handle_icreq(queue); | |
868 | } | |
869 | ||
870 | if (hdr->type == nvme_tcp_h2c_data) { | |
871 | ret = nvmet_tcp_handle_h2c_data_pdu(queue); | |
872 | if (unlikely(ret)) | |
873 | return ret; | |
874 | return 0; | |
875 | } | |
876 | ||
877 | queue->cmd = nvmet_tcp_get_cmd(queue); | |
878 | if (unlikely(!queue->cmd)) { | |
879 | /* This should never happen */ | |
880 | pr_err("queue %d: out of commands (%d) send_list_len: %d, opcode: %d", | |
881 | queue->idx, queue->nr_cmds, queue->send_list_len, | |
882 | nvme_cmd->common.opcode); | |
883 | nvmet_tcp_fatal_error(queue); | |
884 | return -ENOMEM; | |
885 | } | |
886 | ||
887 | req = &queue->cmd->req; | |
888 | memcpy(req->cmd, nvme_cmd, sizeof(*nvme_cmd)); | |
889 | ||
890 | if (unlikely(!nvmet_req_init(req, &queue->nvme_cq, | |
891 | &queue->nvme_sq, &nvmet_tcp_ops))) { | |
892 | pr_err("failed cmd %p id %d opcode %d, data_len: %d\n", | |
893 | req->cmd, req->cmd->common.command_id, | |
894 | req->cmd->common.opcode, | |
895 | le32_to_cpu(req->cmd->common.dptr.sgl.length)); | |
896 | ||
897 | nvmet_tcp_handle_req_failure(queue, queue->cmd, req); | |
898 | return -EAGAIN; | |
899 | } | |
900 | ||
901 | ret = nvmet_tcp_map_data(queue->cmd); | |
902 | if (unlikely(ret)) { | |
903 | pr_err("queue %d: failed to map data\n", queue->idx); | |
904 | if (nvmet_tcp_has_inline_data(queue->cmd)) | |
905 | nvmet_tcp_fatal_error(queue); | |
906 | else | |
907 | nvmet_req_complete(req, ret); | |
908 | ret = -EAGAIN; | |
909 | goto out; | |
910 | } | |
911 | ||
912 | if (nvmet_tcp_need_data_in(queue->cmd)) { | |
913 | if (nvmet_tcp_has_inline_data(queue->cmd)) { | |
914 | queue->rcv_state = NVMET_TCP_RECV_DATA; | |
915 | nvmet_tcp_map_pdu_iovec(queue->cmd); | |
916 | return 0; | |
917 | } | |
918 | /* send back R2T */ | |
919 | nvmet_tcp_queue_response(&queue->cmd->req); | |
920 | goto out; | |
921 | } | |
922 | ||
923 | nvmet_req_execute(&queue->cmd->req); | |
924 | out: | |
925 | nvmet_prepare_receive_pdu(queue); | |
926 | return ret; | |
927 | } | |
928 | ||
929 | static const u8 nvme_tcp_pdu_sizes[] = { | |
930 | [nvme_tcp_icreq] = sizeof(struct nvme_tcp_icreq_pdu), | |
931 | [nvme_tcp_cmd] = sizeof(struct nvme_tcp_cmd_pdu), | |
932 | [nvme_tcp_h2c_data] = sizeof(struct nvme_tcp_data_pdu), | |
933 | }; | |
934 | ||
935 | static inline u8 nvmet_tcp_pdu_size(u8 type) | |
936 | { | |
937 | size_t idx = type; | |
938 | ||
939 | return (idx < ARRAY_SIZE(nvme_tcp_pdu_sizes) && | |
940 | nvme_tcp_pdu_sizes[idx]) ? | |
941 | nvme_tcp_pdu_sizes[idx] : 0; | |
942 | } | |
943 | ||
944 | static inline bool nvmet_tcp_pdu_valid(u8 type) | |
945 | { | |
946 | switch (type) { | |
947 | case nvme_tcp_icreq: | |
948 | case nvme_tcp_cmd: | |
949 | case nvme_tcp_h2c_data: | |
950 | /* fallthru */ | |
951 | return true; | |
952 | } | |
953 | ||
954 | return false; | |
955 | } | |
956 | ||
957 | static int nvmet_tcp_try_recv_pdu(struct nvmet_tcp_queue *queue) | |
958 | { | |
959 | struct nvme_tcp_hdr *hdr = &queue->pdu.cmd.hdr; | |
960 | int len; | |
961 | struct kvec iov; | |
962 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; | |
963 | ||
964 | recv: | |
965 | iov.iov_base = (void *)&queue->pdu + queue->offset; | |
966 | iov.iov_len = queue->left; | |
967 | len = kernel_recvmsg(queue->sock, &msg, &iov, 1, | |
968 | iov.iov_len, msg.msg_flags); | |
969 | if (unlikely(len < 0)) | |
970 | return len; | |
971 | ||
972 | queue->offset += len; | |
973 | queue->left -= len; | |
974 | if (queue->left) | |
975 | return -EAGAIN; | |
976 | ||
977 | if (queue->offset == sizeof(struct nvme_tcp_hdr)) { | |
978 | u8 hdgst = nvmet_tcp_hdgst_len(queue); | |
979 | ||
980 | if (unlikely(!nvmet_tcp_pdu_valid(hdr->type))) { | |
981 | pr_err("unexpected pdu type %d\n", hdr->type); | |
982 | nvmet_tcp_fatal_error(queue); | |
983 | return -EIO; | |
984 | } | |
985 | ||
986 | if (unlikely(hdr->hlen != nvmet_tcp_pdu_size(hdr->type))) { | |
987 | pr_err("pdu %d bad hlen %d\n", hdr->type, hdr->hlen); | |
988 | return -EIO; | |
989 | } | |
990 | ||
991 | queue->left = hdr->hlen - queue->offset + hdgst; | |
992 | goto recv; | |
993 | } | |
994 | ||
995 | if (queue->hdr_digest && | |
996 | nvmet_tcp_verify_hdgst(queue, &queue->pdu, queue->offset)) { | |
997 | nvmet_tcp_fatal_error(queue); /* fatal */ | |
998 | return -EPROTO; | |
999 | } | |
1000 | ||
1001 | if (queue->data_digest && | |
1002 | nvmet_tcp_check_ddgst(queue, &queue->pdu)) { | |
1003 | nvmet_tcp_fatal_error(queue); /* fatal */ | |
1004 | return -EPROTO; | |
1005 | } | |
1006 | ||
1007 | return nvmet_tcp_done_recv_pdu(queue); | |
1008 | } | |
1009 | ||
1010 | static void nvmet_tcp_prep_recv_ddgst(struct nvmet_tcp_cmd *cmd) | |
1011 | { | |
1012 | struct nvmet_tcp_queue *queue = cmd->queue; | |
1013 | ||
1014 | nvmet_tcp_ddgst(queue->rcv_hash, cmd); | |
1015 | queue->offset = 0; | |
1016 | queue->left = NVME_TCP_DIGEST_LENGTH; | |
1017 | queue->rcv_state = NVMET_TCP_RECV_DDGST; | |
1018 | } | |
1019 | ||
1020 | static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) | |
1021 | { | |
1022 | struct nvmet_tcp_cmd *cmd = queue->cmd; | |
1023 | int ret; | |
1024 | ||
1025 | while (msg_data_left(&cmd->recv_msg)) { | |
1026 | ret = sock_recvmsg(cmd->queue->sock, &cmd->recv_msg, | |
1027 | cmd->recv_msg.msg_flags); | |
1028 | if (ret <= 0) | |
1029 | return ret; | |
1030 | ||
1031 | cmd->pdu_recv += ret; | |
1032 | cmd->rbytes_done += ret; | |
1033 | } | |
1034 | ||
1035 | nvmet_tcp_unmap_pdu_iovec(cmd); | |
1036 | ||
1037 | if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && | |
1038 | cmd->rbytes_done == cmd->req.transfer_len) { | |
1039 | if (queue->data_digest) { | |
1040 | nvmet_tcp_prep_recv_ddgst(cmd); | |
1041 | return 0; | |
1042 | } | |
1043 | nvmet_req_execute(&cmd->req); | |
1044 | } | |
1045 | ||
1046 | nvmet_prepare_receive_pdu(queue); | |
1047 | return 0; | |
1048 | } | |
1049 | ||
1050 | static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) | |
1051 | { | |
1052 | struct nvmet_tcp_cmd *cmd = queue->cmd; | |
1053 | int ret; | |
1054 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; | |
1055 | struct kvec iov = { | |
1056 | .iov_base = (void *)&cmd->recv_ddgst + queue->offset, | |
1057 | .iov_len = queue->left | |
1058 | }; | |
1059 | ||
1060 | ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, | |
1061 | iov.iov_len, msg.msg_flags); | |
1062 | if (unlikely(ret < 0)) | |
1063 | return ret; | |
1064 | ||
1065 | queue->offset += ret; | |
1066 | queue->left -= ret; | |
1067 | if (queue->left) | |
1068 | return -EAGAIN; | |
1069 | ||
1070 | if (queue->data_digest && cmd->exp_ddgst != cmd->recv_ddgst) { | |
1071 | pr_err("queue %d: cmd %d pdu (%d) data digest error: recv %#x expected %#x\n", | |
1072 | queue->idx, cmd->req.cmd->common.command_id, | |
1073 | queue->pdu.cmd.hdr.type, le32_to_cpu(cmd->recv_ddgst), | |
1074 | le32_to_cpu(cmd->exp_ddgst)); | |
1075 | nvmet_tcp_finish_cmd(cmd); | |
1076 | nvmet_tcp_fatal_error(queue); | |
1077 | ret = -EPROTO; | |
1078 | goto out; | |
1079 | } | |
1080 | ||
1081 | if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && | |
1082 | cmd->rbytes_done == cmd->req.transfer_len) | |
1083 | nvmet_req_execute(&cmd->req); | |
1084 | ret = 0; | |
1085 | out: | |
1086 | nvmet_prepare_receive_pdu(queue); | |
1087 | return ret; | |
1088 | } | |
1089 | ||
1090 | static int nvmet_tcp_try_recv_one(struct nvmet_tcp_queue *queue) | |
1091 | { | |
fb865858 | 1092 | int result = 0; |
872d26a3 SG |
1093 | |
1094 | if (unlikely(queue->rcv_state == NVMET_TCP_RECV_ERR)) | |
1095 | return 0; | |
1096 | ||
1097 | if (queue->rcv_state == NVMET_TCP_RECV_PDU) { | |
1098 | result = nvmet_tcp_try_recv_pdu(queue); | |
1099 | if (result != 0) | |
1100 | goto done_recv; | |
1101 | } | |
1102 | ||
1103 | if (queue->rcv_state == NVMET_TCP_RECV_DATA) { | |
1104 | result = nvmet_tcp_try_recv_data(queue); | |
1105 | if (result != 0) | |
1106 | goto done_recv; | |
1107 | } | |
1108 | ||
1109 | if (queue->rcv_state == NVMET_TCP_RECV_DDGST) { | |
1110 | result = nvmet_tcp_try_recv_ddgst(queue); | |
1111 | if (result != 0) | |
1112 | goto done_recv; | |
1113 | } | |
1114 | ||
1115 | done_recv: | |
1116 | if (result < 0) { | |
1117 | if (result == -EAGAIN) | |
1118 | return 0; | |
1119 | return result; | |
1120 | } | |
1121 | return 1; | |
1122 | } | |
1123 | ||
1124 | static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, | |
1125 | int budget, int *recvs) | |
1126 | { | |
1127 | int i, ret = 0; | |
1128 | ||
1129 | for (i = 0; i < budget; i++) { | |
1130 | ret = nvmet_tcp_try_recv_one(queue); | |
1131 | if (ret <= 0) | |
1132 | break; | |
1133 | (*recvs)++; | |
1134 | } | |
1135 | ||
1136 | return ret; | |
1137 | } | |
1138 | ||
1139 | static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) | |
1140 | { | |
1141 | spin_lock(&queue->state_lock); | |
1142 | if (queue->state != NVMET_TCP_Q_DISCONNECTING) { | |
1143 | queue->state = NVMET_TCP_Q_DISCONNECTING; | |
1144 | schedule_work(&queue->release_work); | |
1145 | } | |
1146 | spin_unlock(&queue->state_lock); | |
1147 | } | |
1148 | ||
1149 | static void nvmet_tcp_io_work(struct work_struct *w) | |
1150 | { | |
1151 | struct nvmet_tcp_queue *queue = | |
1152 | container_of(w, struct nvmet_tcp_queue, io_work); | |
1153 | bool pending; | |
1154 | int ret, ops = 0; | |
1155 | ||
1156 | do { | |
1157 | pending = false; | |
1158 | ||
1159 | ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); | |
1160 | if (ret > 0) { | |
1161 | pending = true; | |
1162 | } else if (ret < 0) { | |
1163 | if (ret == -EPIPE || ret == -ECONNRESET) | |
1164 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1165 | else | |
1166 | nvmet_tcp_fatal_error(queue); | |
1167 | return; | |
1168 | } | |
1169 | ||
1170 | ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); | |
1171 | if (ret > 0) { | |
1172 | /* transmitted message/data */ | |
1173 | pending = true; | |
1174 | } else if (ret < 0) { | |
1175 | if (ret == -EPIPE || ret == -ECONNRESET) | |
1176 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1177 | else | |
1178 | nvmet_tcp_fatal_error(queue); | |
1179 | return; | |
1180 | } | |
1181 | ||
1182 | } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); | |
1183 | ||
1184 | /* | |
1185 | * We exahusted our budget, requeue our selves | |
1186 | */ | |
1187 | if (pending) | |
1188 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1189 | } | |
1190 | ||
1191 | static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, | |
1192 | struct nvmet_tcp_cmd *c) | |
1193 | { | |
1194 | u8 hdgst = nvmet_tcp_hdgst_len(queue); | |
1195 | ||
1196 | c->queue = queue; | |
1197 | c->req.port = queue->port->nport; | |
1198 | ||
1199 | c->cmd_pdu = page_frag_alloc(&queue->pf_cache, | |
1200 | sizeof(*c->cmd_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1201 | if (!c->cmd_pdu) | |
1202 | return -ENOMEM; | |
1203 | c->req.cmd = &c->cmd_pdu->cmd; | |
1204 | ||
1205 | c->rsp_pdu = page_frag_alloc(&queue->pf_cache, | |
1206 | sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1207 | if (!c->rsp_pdu) | |
1208 | goto out_free_cmd; | |
1209 | c->req.rsp = &c->rsp_pdu->cqe; | |
1210 | ||
1211 | c->data_pdu = page_frag_alloc(&queue->pf_cache, | |
1212 | sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1213 | if (!c->data_pdu) | |
1214 | goto out_free_rsp; | |
1215 | ||
1216 | c->r2t_pdu = page_frag_alloc(&queue->pf_cache, | |
1217 | sizeof(*c->r2t_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); | |
1218 | if (!c->r2t_pdu) | |
1219 | goto out_free_data; | |
1220 | ||
1221 | c->recv_msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; | |
1222 | ||
1223 | list_add_tail(&c->entry, &queue->free_list); | |
1224 | ||
1225 | return 0; | |
1226 | out_free_data: | |
1227 | page_frag_free(c->data_pdu); | |
1228 | out_free_rsp: | |
1229 | page_frag_free(c->rsp_pdu); | |
1230 | out_free_cmd: | |
1231 | page_frag_free(c->cmd_pdu); | |
1232 | return -ENOMEM; | |
1233 | } | |
1234 | ||
1235 | static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c) | |
1236 | { | |
1237 | page_frag_free(c->r2t_pdu); | |
1238 | page_frag_free(c->data_pdu); | |
1239 | page_frag_free(c->rsp_pdu); | |
1240 | page_frag_free(c->cmd_pdu); | |
1241 | } | |
1242 | ||
1243 | static int nvmet_tcp_alloc_cmds(struct nvmet_tcp_queue *queue) | |
1244 | { | |
1245 | struct nvmet_tcp_cmd *cmds; | |
1246 | int i, ret = -EINVAL, nr_cmds = queue->nr_cmds; | |
1247 | ||
1248 | cmds = kcalloc(nr_cmds, sizeof(struct nvmet_tcp_cmd), GFP_KERNEL); | |
1249 | if (!cmds) | |
1250 | goto out; | |
1251 | ||
1252 | for (i = 0; i < nr_cmds; i++) { | |
1253 | ret = nvmet_tcp_alloc_cmd(queue, cmds + i); | |
1254 | if (ret) | |
1255 | goto out_free; | |
1256 | } | |
1257 | ||
1258 | queue->cmds = cmds; | |
1259 | ||
1260 | return 0; | |
1261 | out_free: | |
1262 | while (--i >= 0) | |
1263 | nvmet_tcp_free_cmd(cmds + i); | |
1264 | kfree(cmds); | |
1265 | out: | |
1266 | return ret; | |
1267 | } | |
1268 | ||
1269 | static void nvmet_tcp_free_cmds(struct nvmet_tcp_queue *queue) | |
1270 | { | |
1271 | struct nvmet_tcp_cmd *cmds = queue->cmds; | |
1272 | int i; | |
1273 | ||
1274 | for (i = 0; i < queue->nr_cmds; i++) | |
1275 | nvmet_tcp_free_cmd(cmds + i); | |
1276 | ||
1277 | nvmet_tcp_free_cmd(&queue->connect); | |
1278 | kfree(cmds); | |
1279 | } | |
1280 | ||
1281 | static void nvmet_tcp_restore_socket_callbacks(struct nvmet_tcp_queue *queue) | |
1282 | { | |
1283 | struct socket *sock = queue->sock; | |
1284 | ||
1285 | write_lock_bh(&sock->sk->sk_callback_lock); | |
1286 | sock->sk->sk_data_ready = queue->data_ready; | |
1287 | sock->sk->sk_state_change = queue->state_change; | |
1288 | sock->sk->sk_write_space = queue->write_space; | |
1289 | sock->sk->sk_user_data = NULL; | |
1290 | write_unlock_bh(&sock->sk->sk_callback_lock); | |
1291 | } | |
1292 | ||
1293 | static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd) | |
1294 | { | |
1295 | nvmet_req_uninit(&cmd->req); | |
1296 | nvmet_tcp_unmap_pdu_iovec(cmd); | |
1297 | sgl_free(cmd->req.sg); | |
1298 | } | |
1299 | ||
1300 | static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) | |
1301 | { | |
1302 | struct nvmet_tcp_cmd *cmd = queue->cmds; | |
1303 | int i; | |
1304 | ||
1305 | for (i = 0; i < queue->nr_cmds; i++, cmd++) { | |
1306 | if (nvmet_tcp_need_data_in(cmd)) | |
1307 | nvmet_tcp_finish_cmd(cmd); | |
1308 | } | |
1309 | ||
1310 | if (!queue->nr_cmds && nvmet_tcp_need_data_in(&queue->connect)) { | |
1311 | /* failed in connect */ | |
1312 | nvmet_tcp_finish_cmd(&queue->connect); | |
1313 | } | |
1314 | } | |
1315 | ||
1316 | static void nvmet_tcp_release_queue_work(struct work_struct *w) | |
1317 | { | |
1318 | struct nvmet_tcp_queue *queue = | |
1319 | container_of(w, struct nvmet_tcp_queue, release_work); | |
1320 | ||
1321 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1322 | list_del_init(&queue->queue_list); | |
1323 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1324 | ||
1325 | nvmet_tcp_restore_socket_callbacks(queue); | |
1326 | flush_work(&queue->io_work); | |
1327 | ||
1328 | nvmet_tcp_uninit_data_in_cmds(queue); | |
1329 | nvmet_sq_destroy(&queue->nvme_sq); | |
1330 | cancel_work_sync(&queue->io_work); | |
1331 | sock_release(queue->sock); | |
1332 | nvmet_tcp_free_cmds(queue); | |
1333 | if (queue->hdr_digest || queue->data_digest) | |
1334 | nvmet_tcp_free_crypto(queue); | |
1335 | ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); | |
1336 | ||
1337 | kfree(queue); | |
1338 | } | |
1339 | ||
1340 | static void nvmet_tcp_data_ready(struct sock *sk) | |
1341 | { | |
1342 | struct nvmet_tcp_queue *queue; | |
1343 | ||
1344 | read_lock_bh(&sk->sk_callback_lock); | |
1345 | queue = sk->sk_user_data; | |
1346 | if (likely(queue)) | |
1347 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1348 | read_unlock_bh(&sk->sk_callback_lock); | |
1349 | } | |
1350 | ||
1351 | static void nvmet_tcp_write_space(struct sock *sk) | |
1352 | { | |
1353 | struct nvmet_tcp_queue *queue; | |
1354 | ||
1355 | read_lock_bh(&sk->sk_callback_lock); | |
1356 | queue = sk->sk_user_data; | |
1357 | if (unlikely(!queue)) | |
1358 | goto out; | |
1359 | ||
1360 | if (unlikely(queue->state == NVMET_TCP_Q_CONNECTING)) { | |
1361 | queue->write_space(sk); | |
1362 | goto out; | |
1363 | } | |
1364 | ||
1365 | if (sk_stream_is_writeable(sk)) { | |
1366 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | |
1367 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1368 | } | |
1369 | out: | |
1370 | read_unlock_bh(&sk->sk_callback_lock); | |
1371 | } | |
1372 | ||
1373 | static void nvmet_tcp_state_change(struct sock *sk) | |
1374 | { | |
1375 | struct nvmet_tcp_queue *queue; | |
1376 | ||
1377 | write_lock_bh(&sk->sk_callback_lock); | |
1378 | queue = sk->sk_user_data; | |
1379 | if (!queue) | |
1380 | goto done; | |
1381 | ||
1382 | switch (sk->sk_state) { | |
1383 | case TCP_FIN_WAIT1: | |
1384 | case TCP_CLOSE_WAIT: | |
1385 | case TCP_CLOSE: | |
1386 | /* FALLTHRU */ | |
1387 | sk->sk_user_data = NULL; | |
1388 | nvmet_tcp_schedule_release_queue(queue); | |
1389 | break; | |
1390 | default: | |
1391 | pr_warn("queue %d unhandled state %d\n", | |
1392 | queue->idx, sk->sk_state); | |
1393 | } | |
1394 | done: | |
1395 | write_unlock_bh(&sk->sk_callback_lock); | |
1396 | } | |
1397 | ||
1398 | static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue) | |
1399 | { | |
1400 | struct socket *sock = queue->sock; | |
1401 | struct linger sol = { .l_onoff = 1, .l_linger = 0 }; | |
1402 | int ret; | |
1403 | ||
1404 | ret = kernel_getsockname(sock, | |
1405 | (struct sockaddr *)&queue->sockaddr); | |
1406 | if (ret < 0) | |
1407 | return ret; | |
1408 | ||
1409 | ret = kernel_getpeername(sock, | |
1410 | (struct sockaddr *)&queue->sockaddr_peer); | |
1411 | if (ret < 0) | |
1412 | return ret; | |
1413 | ||
1414 | /* | |
1415 | * Cleanup whatever is sitting in the TCP transmit queue on socket | |
1416 | * close. This is done to prevent stale data from being sent should | |
1417 | * the network connection be restored before TCP times out. | |
1418 | */ | |
1419 | ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER, | |
1420 | (char *)&sol, sizeof(sol)); | |
1421 | if (ret) | |
1422 | return ret; | |
1423 | ||
1424 | write_lock_bh(&sock->sk->sk_callback_lock); | |
1425 | sock->sk->sk_user_data = queue; | |
1426 | queue->data_ready = sock->sk->sk_data_ready; | |
1427 | sock->sk->sk_data_ready = nvmet_tcp_data_ready; | |
1428 | queue->state_change = sock->sk->sk_state_change; | |
1429 | sock->sk->sk_state_change = nvmet_tcp_state_change; | |
1430 | queue->write_space = sock->sk->sk_write_space; | |
1431 | sock->sk->sk_write_space = nvmet_tcp_write_space; | |
1432 | write_unlock_bh(&sock->sk->sk_callback_lock); | |
1433 | ||
1434 | return 0; | |
1435 | } | |
1436 | ||
1437 | static int nvmet_tcp_alloc_queue(struct nvmet_tcp_port *port, | |
1438 | struct socket *newsock) | |
1439 | { | |
1440 | struct nvmet_tcp_queue *queue; | |
1441 | int ret; | |
1442 | ||
1443 | queue = kzalloc(sizeof(*queue), GFP_KERNEL); | |
1444 | if (!queue) | |
1445 | return -ENOMEM; | |
1446 | ||
1447 | INIT_WORK(&queue->release_work, nvmet_tcp_release_queue_work); | |
1448 | INIT_WORK(&queue->io_work, nvmet_tcp_io_work); | |
1449 | queue->sock = newsock; | |
1450 | queue->port = port; | |
1451 | queue->nr_cmds = 0; | |
1452 | spin_lock_init(&queue->state_lock); | |
1453 | queue->state = NVMET_TCP_Q_CONNECTING; | |
1454 | INIT_LIST_HEAD(&queue->free_list); | |
1455 | init_llist_head(&queue->resp_list); | |
1456 | INIT_LIST_HEAD(&queue->resp_send_list); | |
1457 | ||
1458 | queue->idx = ida_simple_get(&nvmet_tcp_queue_ida, 0, 0, GFP_KERNEL); | |
1459 | if (queue->idx < 0) { | |
1460 | ret = queue->idx; | |
1461 | goto out_free_queue; | |
1462 | } | |
1463 | ||
1464 | ret = nvmet_tcp_alloc_cmd(queue, &queue->connect); | |
1465 | if (ret) | |
1466 | goto out_ida_remove; | |
1467 | ||
1468 | ret = nvmet_sq_init(&queue->nvme_sq); | |
1469 | if (ret) | |
1470 | goto out_free_connect; | |
1471 | ||
1472 | port->last_cpu = cpumask_next_wrap(port->last_cpu, | |
1473 | cpu_online_mask, -1, false); | |
1474 | queue->cpu = port->last_cpu; | |
1475 | nvmet_prepare_receive_pdu(queue); | |
1476 | ||
1477 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1478 | list_add_tail(&queue->queue_list, &nvmet_tcp_queue_list); | |
1479 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1480 | ||
1481 | ret = nvmet_tcp_set_queue_sock(queue); | |
1482 | if (ret) | |
1483 | goto out_destroy_sq; | |
1484 | ||
1485 | queue_work_on(queue->cpu, nvmet_tcp_wq, &queue->io_work); | |
1486 | ||
1487 | return 0; | |
1488 | out_destroy_sq: | |
1489 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1490 | list_del_init(&queue->queue_list); | |
1491 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1492 | nvmet_sq_destroy(&queue->nvme_sq); | |
1493 | out_free_connect: | |
1494 | nvmet_tcp_free_cmd(&queue->connect); | |
1495 | out_ida_remove: | |
1496 | ida_simple_remove(&nvmet_tcp_queue_ida, queue->idx); | |
1497 | out_free_queue: | |
1498 | kfree(queue); | |
1499 | return ret; | |
1500 | } | |
1501 | ||
1502 | static void nvmet_tcp_accept_work(struct work_struct *w) | |
1503 | { | |
1504 | struct nvmet_tcp_port *port = | |
1505 | container_of(w, struct nvmet_tcp_port, accept_work); | |
1506 | struct socket *newsock; | |
1507 | int ret; | |
1508 | ||
1509 | while (true) { | |
1510 | ret = kernel_accept(port->sock, &newsock, O_NONBLOCK); | |
1511 | if (ret < 0) { | |
1512 | if (ret != -EAGAIN) | |
1513 | pr_warn("failed to accept err=%d\n", ret); | |
1514 | return; | |
1515 | } | |
1516 | ret = nvmet_tcp_alloc_queue(port, newsock); | |
1517 | if (ret) { | |
1518 | pr_err("failed to allocate queue\n"); | |
1519 | sock_release(newsock); | |
1520 | } | |
1521 | } | |
1522 | } | |
1523 | ||
1524 | static void nvmet_tcp_listen_data_ready(struct sock *sk) | |
1525 | { | |
1526 | struct nvmet_tcp_port *port; | |
1527 | ||
1528 | read_lock_bh(&sk->sk_callback_lock); | |
1529 | port = sk->sk_user_data; | |
1530 | if (!port) | |
1531 | goto out; | |
1532 | ||
1533 | if (sk->sk_state == TCP_LISTEN) | |
1534 | schedule_work(&port->accept_work); | |
1535 | out: | |
1536 | read_unlock_bh(&sk->sk_callback_lock); | |
1537 | } | |
1538 | ||
1539 | static int nvmet_tcp_add_port(struct nvmet_port *nport) | |
1540 | { | |
1541 | struct nvmet_tcp_port *port; | |
1542 | __kernel_sa_family_t af; | |
1543 | int opt, ret; | |
1544 | ||
1545 | port = kzalloc(sizeof(*port), GFP_KERNEL); | |
1546 | if (!port) | |
1547 | return -ENOMEM; | |
1548 | ||
1549 | switch (nport->disc_addr.adrfam) { | |
1550 | case NVMF_ADDR_FAMILY_IP4: | |
1551 | af = AF_INET; | |
1552 | break; | |
1553 | case NVMF_ADDR_FAMILY_IP6: | |
1554 | af = AF_INET6; | |
1555 | break; | |
1556 | default: | |
1557 | pr_err("address family %d not supported\n", | |
1558 | nport->disc_addr.adrfam); | |
1559 | ret = -EINVAL; | |
1560 | goto err_port; | |
1561 | } | |
1562 | ||
1563 | ret = inet_pton_with_scope(&init_net, af, nport->disc_addr.traddr, | |
1564 | nport->disc_addr.trsvcid, &port->addr); | |
1565 | if (ret) { | |
1566 | pr_err("malformed ip/port passed: %s:%s\n", | |
1567 | nport->disc_addr.traddr, nport->disc_addr.trsvcid); | |
1568 | goto err_port; | |
1569 | } | |
1570 | ||
1571 | port->nport = nport; | |
1572 | port->last_cpu = -1; | |
1573 | INIT_WORK(&port->accept_work, nvmet_tcp_accept_work); | |
1574 | if (port->nport->inline_data_size < 0) | |
1575 | port->nport->inline_data_size = NVMET_TCP_DEF_INLINE_DATA_SIZE; | |
1576 | ||
1577 | ret = sock_create(port->addr.ss_family, SOCK_STREAM, | |
1578 | IPPROTO_TCP, &port->sock); | |
1579 | if (ret) { | |
1580 | pr_err("failed to create a socket\n"); | |
1581 | goto err_port; | |
1582 | } | |
1583 | ||
1584 | port->sock->sk->sk_user_data = port; | |
1585 | port->data_ready = port->sock->sk->sk_data_ready; | |
1586 | port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready; | |
1587 | ||
1588 | opt = 1; | |
1589 | ret = kernel_setsockopt(port->sock, IPPROTO_TCP, | |
1590 | TCP_NODELAY, (char *)&opt, sizeof(opt)); | |
1591 | if (ret) { | |
1592 | pr_err("failed to set TCP_NODELAY sock opt %d\n", ret); | |
1593 | goto err_sock; | |
1594 | } | |
1595 | ||
1596 | ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR, | |
1597 | (char *)&opt, sizeof(opt)); | |
1598 | if (ret) { | |
1599 | pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret); | |
1600 | goto err_sock; | |
1601 | } | |
1602 | ||
1603 | ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr, | |
1604 | sizeof(port->addr)); | |
1605 | if (ret) { | |
1606 | pr_err("failed to bind port socket %d\n", ret); | |
1607 | goto err_sock; | |
1608 | } | |
1609 | ||
1610 | ret = kernel_listen(port->sock, 128); | |
1611 | if (ret) { | |
1612 | pr_err("failed to listen %d on port sock\n", ret); | |
1613 | goto err_sock; | |
1614 | } | |
1615 | ||
1616 | nport->priv = port; | |
1617 | pr_info("enabling port %d (%pISpc)\n", | |
1618 | le16_to_cpu(nport->disc_addr.portid), &port->addr); | |
1619 | ||
1620 | return 0; | |
1621 | ||
1622 | err_sock: | |
1623 | sock_release(port->sock); | |
1624 | err_port: | |
1625 | kfree(port); | |
1626 | return ret; | |
1627 | } | |
1628 | ||
1629 | static void nvmet_tcp_remove_port(struct nvmet_port *nport) | |
1630 | { | |
1631 | struct nvmet_tcp_port *port = nport->priv; | |
1632 | ||
1633 | write_lock_bh(&port->sock->sk->sk_callback_lock); | |
1634 | port->sock->sk->sk_data_ready = port->data_ready; | |
1635 | port->sock->sk->sk_user_data = NULL; | |
1636 | write_unlock_bh(&port->sock->sk->sk_callback_lock); | |
1637 | cancel_work_sync(&port->accept_work); | |
1638 | ||
1639 | sock_release(port->sock); | |
1640 | kfree(port); | |
1641 | } | |
1642 | ||
1643 | static void nvmet_tcp_delete_ctrl(struct nvmet_ctrl *ctrl) | |
1644 | { | |
1645 | struct nvmet_tcp_queue *queue; | |
1646 | ||
1647 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1648 | list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) | |
1649 | if (queue->nvme_sq.ctrl == ctrl) | |
1650 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1651 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1652 | } | |
1653 | ||
1654 | static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) | |
1655 | { | |
1656 | struct nvmet_tcp_queue *queue = | |
1657 | container_of(sq, struct nvmet_tcp_queue, nvme_sq); | |
1658 | ||
1659 | if (sq->qid == 0) { | |
1660 | /* Let inflight controller teardown complete */ | |
1661 | flush_scheduled_work(); | |
1662 | } | |
1663 | ||
1664 | queue->nr_cmds = sq->size * 2; | |
1665 | if (nvmet_tcp_alloc_cmds(queue)) | |
1666 | return NVME_SC_INTERNAL; | |
1667 | return 0; | |
1668 | } | |
1669 | ||
1670 | static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, | |
1671 | struct nvmet_port *nport, char *traddr) | |
1672 | { | |
1673 | struct nvmet_tcp_port *port = nport->priv; | |
1674 | ||
1675 | if (inet_addr_is_any((struct sockaddr *)&port->addr)) { | |
1676 | struct nvmet_tcp_cmd *cmd = | |
1677 | container_of(req, struct nvmet_tcp_cmd, req); | |
1678 | struct nvmet_tcp_queue *queue = cmd->queue; | |
1679 | ||
1680 | sprintf(traddr, "%pISc", (struct sockaddr *)&queue->sockaddr); | |
1681 | } else { | |
1682 | memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE); | |
1683 | } | |
1684 | } | |
1685 | ||
1686 | static struct nvmet_fabrics_ops nvmet_tcp_ops = { | |
1687 | .owner = THIS_MODULE, | |
1688 | .type = NVMF_TRTYPE_TCP, | |
1689 | .msdbd = 1, | |
1690 | .has_keyed_sgls = 0, | |
1691 | .add_port = nvmet_tcp_add_port, | |
1692 | .remove_port = nvmet_tcp_remove_port, | |
1693 | .queue_response = nvmet_tcp_queue_response, | |
1694 | .delete_ctrl = nvmet_tcp_delete_ctrl, | |
1695 | .install_queue = nvmet_tcp_install_queue, | |
1696 | .disc_traddr = nvmet_tcp_disc_port_addr, | |
1697 | }; | |
1698 | ||
1699 | static int __init nvmet_tcp_init(void) | |
1700 | { | |
1701 | int ret; | |
1702 | ||
1703 | nvmet_tcp_wq = alloc_workqueue("nvmet_tcp_wq", WQ_HIGHPRI, 0); | |
1704 | if (!nvmet_tcp_wq) | |
1705 | return -ENOMEM; | |
1706 | ||
1707 | ret = nvmet_register_transport(&nvmet_tcp_ops); | |
1708 | if (ret) | |
1709 | goto err; | |
1710 | ||
1711 | return 0; | |
1712 | err: | |
1713 | destroy_workqueue(nvmet_tcp_wq); | |
1714 | return ret; | |
1715 | } | |
1716 | ||
1717 | static void __exit nvmet_tcp_exit(void) | |
1718 | { | |
1719 | struct nvmet_tcp_queue *queue; | |
1720 | ||
1721 | nvmet_unregister_transport(&nvmet_tcp_ops); | |
1722 | ||
1723 | flush_scheduled_work(); | |
1724 | mutex_lock(&nvmet_tcp_queue_mutex); | |
1725 | list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) | |
1726 | kernel_sock_shutdown(queue->sock, SHUT_RDWR); | |
1727 | mutex_unlock(&nvmet_tcp_queue_mutex); | |
1728 | flush_scheduled_work(); | |
1729 | ||
1730 | destroy_workqueue(nvmet_tcp_wq); | |
1731 | } | |
1732 | ||
1733 | module_init(nvmet_tcp_init); | |
1734 | module_exit(nvmet_tcp_exit); | |
1735 | ||
1736 | MODULE_LICENSE("GPL v2"); | |
1737 | MODULE_ALIAS("nvmet-transport-3"); /* 3 == NVMF_TRTYPE_TCP */ |