2 * Copyright (c) 2016 Avago Technologies. All rights reserved.
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
8 * This program is distributed in the hope that it will be useful.
9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES,
10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO
12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID.
13 * See the GNU General Public License for more details, a copy of which
14 * can be found in the file COPYING included with this package
17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
18 #include <linux/module.h>
19 #include <linux/parser.h>
20 #include <uapi/scsi/fc/fc_fs.h>
21 #include <uapi/scsi/fc/fc_els.h>
22 #include <linux/delay.h>
26 #include <linux/nvme-fc-driver.h>
27 #include <linux/nvme-fc.h>
30 /* *************************** Data Structures/Defines ****************** */
34 * We handle AEN commands ourselves and don't even let the
35 * block layer know about them.
37 #define NVME_FC_NR_AEN_COMMANDS 1
38 #define NVME_FC_AQ_BLKMQ_DEPTH \
39 (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS)
40 #define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1)
42 enum nvme_fc_queue_flags
{
43 NVME_FC_Q_CONNECTED
= (1 << 0),
46 #define NVMEFC_QUEUE_DELAY 3 /* ms units */
48 struct nvme_fc_queue
{
49 struct nvme_fc_ctrl
*ctrl
;
51 struct blk_mq_hw_ctx
*hctx
;
54 size_t cmnd_capsule_len
;
63 } __aligned(sizeof(u64
)); /* alignment for other things alloc'd with */
65 enum nvme_fcop_flags
{
66 FCOP_FLAGS_TERMIO
= (1 << 0),
67 FCOP_FLAGS_RELEASED
= (1 << 1),
68 FCOP_FLAGS_COMPLETE
= (1 << 2),
69 FCOP_FLAGS_AEN
= (1 << 3),
72 struct nvmefc_ls_req_op
{
73 struct nvmefc_ls_req ls_req
;
75 struct nvme_fc_rport
*rport
;
76 struct nvme_fc_queue
*queue
;
81 struct completion ls_done
;
82 struct list_head lsreq_list
; /* rport->ls_req_list */
86 enum nvme_fcpop_state
{
87 FCPOP_STATE_UNINIT
= 0,
89 FCPOP_STATE_ACTIVE
= 2,
90 FCPOP_STATE_ABORTED
= 3,
91 FCPOP_STATE_COMPLETE
= 4,
94 struct nvme_fc_fcp_op
{
95 struct nvme_request nreq
; /*
98 * the 1st element in the
100 * associated with the
103 struct nvmefc_fcp_req fcp_req
;
105 struct nvme_fc_ctrl
*ctrl
;
106 struct nvme_fc_queue
*queue
;
114 struct nvme_fc_cmd_iu cmd_iu
;
115 struct nvme_fc_ersp_iu rsp_iu
;
118 struct nvme_fc_lport
{
119 struct nvme_fc_local_port localport
;
122 struct list_head port_list
; /* nvme_fc_port_list */
123 struct list_head endp_list
;
124 struct device
*dev
; /* physical device for dma */
125 struct nvme_fc_port_template
*ops
;
127 } __aligned(sizeof(u64
)); /* alignment for other things alloc'd with */
129 struct nvme_fc_rport
{
130 struct nvme_fc_remote_port remoteport
;
132 struct list_head endp_list
; /* for lport->endp_list */
133 struct list_head ctrl_list
;
134 struct list_head ls_req_list
;
135 struct device
*dev
; /* physical device for dma */
136 struct nvme_fc_lport
*lport
;
139 } __aligned(sizeof(u64
)); /* alignment for other things alloc'd with */
141 enum nvme_fcctrl_flags
{
142 FCCTRL_TERMIO
= (1 << 0),
145 struct nvme_fc_ctrl
{
147 struct nvme_fc_queue
*queues
;
149 struct nvme_fc_lport
*lport
;
150 struct nvme_fc_rport
*rport
;
155 struct list_head ctrl_list
; /* rport->ctrl_list */
157 struct blk_mq_tag_set admin_tag_set
;
158 struct blk_mq_tag_set tag_set
;
160 struct work_struct delete_work
;
161 struct delayed_work connect_work
;
166 wait_queue_head_t ioabort_wait
;
168 struct nvme_fc_fcp_op aen_ops
[NVME_FC_NR_AEN_COMMANDS
];
170 struct nvme_ctrl ctrl
;
173 static inline struct nvme_fc_ctrl
*
174 to_fc_ctrl(struct nvme_ctrl
*ctrl
)
176 return container_of(ctrl
, struct nvme_fc_ctrl
, ctrl
);
179 static inline struct nvme_fc_lport
*
180 localport_to_lport(struct nvme_fc_local_port
*portptr
)
182 return container_of(portptr
, struct nvme_fc_lport
, localport
);
185 static inline struct nvme_fc_rport
*
186 remoteport_to_rport(struct nvme_fc_remote_port
*portptr
)
188 return container_of(portptr
, struct nvme_fc_rport
, remoteport
);
191 static inline struct nvmefc_ls_req_op
*
192 ls_req_to_lsop(struct nvmefc_ls_req
*lsreq
)
194 return container_of(lsreq
, struct nvmefc_ls_req_op
, ls_req
);
197 static inline struct nvme_fc_fcp_op
*
198 fcp_req_to_fcp_op(struct nvmefc_fcp_req
*fcpreq
)
200 return container_of(fcpreq
, struct nvme_fc_fcp_op
, fcp_req
);
205 /* *************************** Globals **************************** */
208 static DEFINE_SPINLOCK(nvme_fc_lock
);
210 static LIST_HEAD(nvme_fc_lport_list
);
211 static DEFINE_IDA(nvme_fc_local_port_cnt
);
212 static DEFINE_IDA(nvme_fc_ctrl_cnt
);
217 /* *********************** FC-NVME Port Management ************************ */
219 static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl
*);
220 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl
*,
221 struct nvme_fc_queue
*, unsigned int);
224 nvme_fc_free_lport(struct kref
*ref
)
226 struct nvme_fc_lport
*lport
=
227 container_of(ref
, struct nvme_fc_lport
, ref
);
230 WARN_ON(lport
->localport
.port_state
!= FC_OBJSTATE_DELETED
);
231 WARN_ON(!list_empty(&lport
->endp_list
));
233 /* remove from transport list */
234 spin_lock_irqsave(&nvme_fc_lock
, flags
);
235 list_del(&lport
->port_list
);
236 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
238 /* let the LLDD know we've finished tearing it down */
239 lport
->ops
->localport_delete(&lport
->localport
);
241 ida_simple_remove(&nvme_fc_local_port_cnt
, lport
->localport
.port_num
);
242 ida_destroy(&lport
->endp_cnt
);
244 put_device(lport
->dev
);
250 nvme_fc_lport_put(struct nvme_fc_lport
*lport
)
252 kref_put(&lport
->ref
, nvme_fc_free_lport
);
256 nvme_fc_lport_get(struct nvme_fc_lport
*lport
)
258 return kref_get_unless_zero(&lport
->ref
);
262 static struct nvme_fc_lport
*
263 nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info
*pinfo
)
265 struct nvme_fc_lport
*lport
;
268 spin_lock_irqsave(&nvme_fc_lock
, flags
);
270 list_for_each_entry(lport
, &nvme_fc_lport_list
, port_list
) {
271 if (lport
->localport
.node_name
!= pinfo
->node_name
||
272 lport
->localport
.port_name
!= pinfo
->port_name
)
275 if (lport
->localport
.port_state
!= FC_OBJSTATE_DELETED
) {
276 lport
= ERR_PTR(-EEXIST
);
280 if (!nvme_fc_lport_get(lport
)) {
282 * fails if ref cnt already 0. If so,
283 * act as if lport already deleted
289 /* resume the lport */
291 lport
->localport
.port_role
= pinfo
->port_role
;
292 lport
->localport
.port_id
= pinfo
->port_id
;
293 lport
->localport
.port_state
= FC_OBJSTATE_ONLINE
;
295 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
303 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
309 * nvme_fc_register_localport - transport entry point called by an
310 * LLDD to register the existence of a NVME
312 * @pinfo: pointer to information about the port to be registered
313 * @template: LLDD entrypoints and operational parameters for the port
314 * @dev: physical hardware device node port corresponds to. Will be
315 * used for DMA mappings
316 * @lport_p: pointer to a local port pointer. Upon success, the routine
317 * will allocate a nvme_fc_local_port structure and place its
318 * address in the local port pointer. Upon failure, local port
319 * pointer will be set to 0.
322 * a completion status. Must be 0 upon success; a negative errno
323 * (ex: -ENXIO) upon failure.
326 nvme_fc_register_localport(struct nvme_fc_port_info
*pinfo
,
327 struct nvme_fc_port_template
*template,
329 struct nvme_fc_local_port
**portptr
)
331 struct nvme_fc_lport
*newrec
;
335 if (!template->localport_delete
|| !template->remoteport_delete
||
336 !template->ls_req
|| !template->fcp_io
||
337 !template->ls_abort
|| !template->fcp_abort
||
338 !template->max_hw_queues
|| !template->max_sgl_segments
||
339 !template->max_dif_sgl_segments
|| !template->dma_boundary
) {
341 goto out_reghost_failed
;
345 * look to see if there is already a localport that had been
346 * deregistered and in the process of waiting for all the
347 * references to fully be removed. If the references haven't
348 * expired, we can simply re-enable the localport. Remoteports
349 * and controller reconnections should resume naturally.
351 newrec
= nvme_fc_attach_to_unreg_lport(pinfo
);
353 /* found an lport, but something about its state is bad */
354 if (IS_ERR(newrec
)) {
355 ret
= PTR_ERR(newrec
);
356 goto out_reghost_failed
;
358 /* found existing lport, which was resumed */
360 *portptr
= &newrec
->localport
;
364 /* nothing found - allocate a new localport struct */
366 newrec
= kmalloc((sizeof(*newrec
) + template->local_priv_sz
),
370 goto out_reghost_failed
;
373 idx
= ida_simple_get(&nvme_fc_local_port_cnt
, 0, 0, GFP_KERNEL
);
379 if (!get_device(dev
) && dev
) {
384 INIT_LIST_HEAD(&newrec
->port_list
);
385 INIT_LIST_HEAD(&newrec
->endp_list
);
386 kref_init(&newrec
->ref
);
387 newrec
->ops
= template;
389 ida_init(&newrec
->endp_cnt
);
390 newrec
->localport
.private = &newrec
[1];
391 newrec
->localport
.node_name
= pinfo
->node_name
;
392 newrec
->localport
.port_name
= pinfo
->port_name
;
393 newrec
->localport
.port_role
= pinfo
->port_role
;
394 newrec
->localport
.port_id
= pinfo
->port_id
;
395 newrec
->localport
.port_state
= FC_OBJSTATE_ONLINE
;
396 newrec
->localport
.port_num
= idx
;
398 spin_lock_irqsave(&nvme_fc_lock
, flags
);
399 list_add_tail(&newrec
->port_list
, &nvme_fc_lport_list
);
400 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
403 dma_set_seg_boundary(dev
, template->dma_boundary
);
405 *portptr
= &newrec
->localport
;
409 ida_simple_remove(&nvme_fc_local_port_cnt
, idx
);
417 EXPORT_SYMBOL_GPL(nvme_fc_register_localport
);
420 * nvme_fc_unregister_localport - transport entry point called by an
421 * LLDD to deregister/remove a previously
422 * registered a NVME host FC port.
423 * @localport: pointer to the (registered) local port that is to be
427 * a completion status. Must be 0 upon success; a negative errno
428 * (ex: -ENXIO) upon failure.
431 nvme_fc_unregister_localport(struct nvme_fc_local_port
*portptr
)
433 struct nvme_fc_lport
*lport
= localport_to_lport(portptr
);
439 spin_lock_irqsave(&nvme_fc_lock
, flags
);
441 if (portptr
->port_state
!= FC_OBJSTATE_ONLINE
) {
442 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
445 portptr
->port_state
= FC_OBJSTATE_DELETED
;
447 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
449 nvme_fc_lport_put(lport
);
453 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport
);
456 * nvme_fc_register_remoteport - transport entry point called by an
457 * LLDD to register the existence of a NVME
458 * subsystem FC port on its fabric.
459 * @localport: pointer to the (registered) local port that the remote
460 * subsystem port is connected to.
461 * @pinfo: pointer to information about the port to be registered
462 * @rport_p: pointer to a remote port pointer. Upon success, the routine
463 * will allocate a nvme_fc_remote_port structure and place its
464 * address in the remote port pointer. Upon failure, remote port
465 * pointer will be set to 0.
468 * a completion status. Must be 0 upon success; a negative errno
469 * (ex: -ENXIO) upon failure.
472 nvme_fc_register_remoteport(struct nvme_fc_local_port
*localport
,
473 struct nvme_fc_port_info
*pinfo
,
474 struct nvme_fc_remote_port
**portptr
)
476 struct nvme_fc_lport
*lport
= localport_to_lport(localport
);
477 struct nvme_fc_rport
*newrec
;
481 newrec
= kmalloc((sizeof(*newrec
) + lport
->ops
->remote_priv_sz
),
485 goto out_reghost_failed
;
488 if (!nvme_fc_lport_get(lport
)) {
490 goto out_kfree_rport
;
493 idx
= ida_simple_get(&lport
->endp_cnt
, 0, 0, GFP_KERNEL
);
499 INIT_LIST_HEAD(&newrec
->endp_list
);
500 INIT_LIST_HEAD(&newrec
->ctrl_list
);
501 INIT_LIST_HEAD(&newrec
->ls_req_list
);
502 kref_init(&newrec
->ref
);
503 spin_lock_init(&newrec
->lock
);
504 newrec
->remoteport
.localport
= &lport
->localport
;
505 newrec
->dev
= lport
->dev
;
506 newrec
->lport
= lport
;
507 newrec
->remoteport
.private = &newrec
[1];
508 newrec
->remoteport
.port_role
= pinfo
->port_role
;
509 newrec
->remoteport
.node_name
= pinfo
->node_name
;
510 newrec
->remoteport
.port_name
= pinfo
->port_name
;
511 newrec
->remoteport
.port_id
= pinfo
->port_id
;
512 newrec
->remoteport
.port_state
= FC_OBJSTATE_ONLINE
;
513 newrec
->remoteport
.port_num
= idx
;
515 spin_lock_irqsave(&nvme_fc_lock
, flags
);
516 list_add_tail(&newrec
->endp_list
, &lport
->endp_list
);
517 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
519 *portptr
= &newrec
->remoteport
;
523 nvme_fc_lport_put(lport
);
530 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport
);
533 nvme_fc_free_rport(struct kref
*ref
)
535 struct nvme_fc_rport
*rport
=
536 container_of(ref
, struct nvme_fc_rport
, ref
);
537 struct nvme_fc_lport
*lport
=
538 localport_to_lport(rport
->remoteport
.localport
);
541 WARN_ON(rport
->remoteport
.port_state
!= FC_OBJSTATE_DELETED
);
542 WARN_ON(!list_empty(&rport
->ctrl_list
));
544 /* remove from lport list */
545 spin_lock_irqsave(&nvme_fc_lock
, flags
);
546 list_del(&rport
->endp_list
);
547 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
549 /* let the LLDD know we've finished tearing it down */
550 lport
->ops
->remoteport_delete(&rport
->remoteport
);
552 ida_simple_remove(&lport
->endp_cnt
, rport
->remoteport
.port_num
);
556 nvme_fc_lport_put(lport
);
560 nvme_fc_rport_put(struct nvme_fc_rport
*rport
)
562 kref_put(&rport
->ref
, nvme_fc_free_rport
);
566 nvme_fc_rport_get(struct nvme_fc_rport
*rport
)
568 return kref_get_unless_zero(&rport
->ref
);
572 nvme_fc_abort_lsops(struct nvme_fc_rport
*rport
)
574 struct nvmefc_ls_req_op
*lsop
;
578 spin_lock_irqsave(&rport
->lock
, flags
);
580 list_for_each_entry(lsop
, &rport
->ls_req_list
, lsreq_list
) {
581 if (!(lsop
->flags
& FCOP_FLAGS_TERMIO
)) {
582 lsop
->flags
|= FCOP_FLAGS_TERMIO
;
583 spin_unlock_irqrestore(&rport
->lock
, flags
);
584 rport
->lport
->ops
->ls_abort(&rport
->lport
->localport
,
590 spin_unlock_irqrestore(&rport
->lock
, flags
);
596 * nvme_fc_unregister_remoteport - transport entry point called by an
597 * LLDD to deregister/remove a previously
598 * registered a NVME subsystem FC port.
599 * @remoteport: pointer to the (registered) remote port that is to be
603 * a completion status. Must be 0 upon success; a negative errno
604 * (ex: -ENXIO) upon failure.
607 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port
*portptr
)
609 struct nvme_fc_rport
*rport
= remoteport_to_rport(portptr
);
610 struct nvme_fc_ctrl
*ctrl
;
616 spin_lock_irqsave(&rport
->lock
, flags
);
618 if (portptr
->port_state
!= FC_OBJSTATE_ONLINE
) {
619 spin_unlock_irqrestore(&rport
->lock
, flags
);
622 portptr
->port_state
= FC_OBJSTATE_DELETED
;
624 /* tear down all associations to the remote port */
625 list_for_each_entry(ctrl
, &rport
->ctrl_list
, ctrl_list
)
626 __nvme_fc_del_ctrl(ctrl
);
628 spin_unlock_irqrestore(&rport
->lock
, flags
);
630 nvme_fc_abort_lsops(rport
);
632 nvme_fc_rport_put(rport
);
635 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport
);
638 /* *********************** FC-NVME DMA Handling **************************** */
641 * The fcloop device passes in a NULL device pointer. Real LLD's will
642 * pass in a valid device pointer. If NULL is passed to the dma mapping
643 * routines, depending on the platform, it may or may not succeed, and
647 * Wrapper all the dma routines and check the dev pointer.
649 * If simple mappings (return just a dma address, we'll noop them,
650 * returning a dma address of 0.
652 * On more complex mappings (dma_map_sg), a pseudo routine fills
653 * in the scatter list, setting all dma addresses to 0.
656 static inline dma_addr_t
657 fc_dma_map_single(struct device
*dev
, void *ptr
, size_t size
,
658 enum dma_data_direction dir
)
660 return dev
? dma_map_single(dev
, ptr
, size
, dir
) : (dma_addr_t
)0L;
664 fc_dma_mapping_error(struct device
*dev
, dma_addr_t dma_addr
)
666 return dev
? dma_mapping_error(dev
, dma_addr
) : 0;
670 fc_dma_unmap_single(struct device
*dev
, dma_addr_t addr
, size_t size
,
671 enum dma_data_direction dir
)
674 dma_unmap_single(dev
, addr
, size
, dir
);
678 fc_dma_sync_single_for_cpu(struct device
*dev
, dma_addr_t addr
, size_t size
,
679 enum dma_data_direction dir
)
682 dma_sync_single_for_cpu(dev
, addr
, size
, dir
);
686 fc_dma_sync_single_for_device(struct device
*dev
, dma_addr_t addr
, size_t size
,
687 enum dma_data_direction dir
)
690 dma_sync_single_for_device(dev
, addr
, size
, dir
);
693 /* pseudo dma_map_sg call */
695 fc_map_sg(struct scatterlist
*sg
, int nents
)
697 struct scatterlist
*s
;
700 WARN_ON(nents
== 0 || sg
[0].length
== 0);
702 for_each_sg(sg
, s
, nents
, i
) {
704 #ifdef CONFIG_NEED_SG_DMA_LENGTH
705 s
->dma_length
= s
->length
;
712 fc_dma_map_sg(struct device
*dev
, struct scatterlist
*sg
, int nents
,
713 enum dma_data_direction dir
)
715 return dev
? dma_map_sg(dev
, sg
, nents
, dir
) : fc_map_sg(sg
, nents
);
719 fc_dma_unmap_sg(struct device
*dev
, struct scatterlist
*sg
, int nents
,
720 enum dma_data_direction dir
)
723 dma_unmap_sg(dev
, sg
, nents
, dir
);
727 /* *********************** FC-NVME LS Handling **************************** */
729 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl
*);
730 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl
*);
734 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op
*lsop
)
736 struct nvme_fc_rport
*rport
= lsop
->rport
;
737 struct nvmefc_ls_req
*lsreq
= &lsop
->ls_req
;
740 spin_lock_irqsave(&rport
->lock
, flags
);
742 if (!lsop
->req_queued
) {
743 spin_unlock_irqrestore(&rport
->lock
, flags
);
747 list_del(&lsop
->lsreq_list
);
749 lsop
->req_queued
= false;
751 spin_unlock_irqrestore(&rport
->lock
, flags
);
753 fc_dma_unmap_single(rport
->dev
, lsreq
->rqstdma
,
754 (lsreq
->rqstlen
+ lsreq
->rsplen
),
757 nvme_fc_rport_put(rport
);
761 __nvme_fc_send_ls_req(struct nvme_fc_rport
*rport
,
762 struct nvmefc_ls_req_op
*lsop
,
763 void (*done
)(struct nvmefc_ls_req
*req
, int status
))
765 struct nvmefc_ls_req
*lsreq
= &lsop
->ls_req
;
769 if (rport
->remoteport
.port_state
!= FC_OBJSTATE_ONLINE
)
770 return -ECONNREFUSED
;
772 if (!nvme_fc_rport_get(rport
))
777 lsop
->req_queued
= false;
778 INIT_LIST_HEAD(&lsop
->lsreq_list
);
779 init_completion(&lsop
->ls_done
);
781 lsreq
->rqstdma
= fc_dma_map_single(rport
->dev
, lsreq
->rqstaddr
,
782 lsreq
->rqstlen
+ lsreq
->rsplen
,
784 if (fc_dma_mapping_error(rport
->dev
, lsreq
->rqstdma
)) {
788 lsreq
->rspdma
= lsreq
->rqstdma
+ lsreq
->rqstlen
;
790 spin_lock_irqsave(&rport
->lock
, flags
);
792 list_add_tail(&lsop
->lsreq_list
, &rport
->ls_req_list
);
794 lsop
->req_queued
= true;
796 spin_unlock_irqrestore(&rport
->lock
, flags
);
798 ret
= rport
->lport
->ops
->ls_req(&rport
->lport
->localport
,
799 &rport
->remoteport
, lsreq
);
806 lsop
->ls_error
= ret
;
807 spin_lock_irqsave(&rport
->lock
, flags
);
808 lsop
->req_queued
= false;
809 list_del(&lsop
->lsreq_list
);
810 spin_unlock_irqrestore(&rport
->lock
, flags
);
811 fc_dma_unmap_single(rport
->dev
, lsreq
->rqstdma
,
812 (lsreq
->rqstlen
+ lsreq
->rsplen
),
815 nvme_fc_rport_put(rport
);
821 nvme_fc_send_ls_req_done(struct nvmefc_ls_req
*lsreq
, int status
)
823 struct nvmefc_ls_req_op
*lsop
= ls_req_to_lsop(lsreq
);
825 lsop
->ls_error
= status
;
826 complete(&lsop
->ls_done
);
830 nvme_fc_send_ls_req(struct nvme_fc_rport
*rport
, struct nvmefc_ls_req_op
*lsop
)
832 struct nvmefc_ls_req
*lsreq
= &lsop
->ls_req
;
833 struct fcnvme_ls_rjt
*rjt
= lsreq
->rspaddr
;
836 ret
= __nvme_fc_send_ls_req(rport
, lsop
, nvme_fc_send_ls_req_done
);
840 * No timeout/not interruptible as we need the struct
841 * to exist until the lldd calls us back. Thus mandate
842 * wait until driver calls back. lldd responsible for
845 wait_for_completion(&lsop
->ls_done
);
847 __nvme_fc_finish_ls_req(lsop
);
849 ret
= lsop
->ls_error
;
855 /* ACC or RJT payload ? */
856 if (rjt
->w0
.ls_cmd
== FCNVME_LS_RJT
)
863 nvme_fc_send_ls_req_async(struct nvme_fc_rport
*rport
,
864 struct nvmefc_ls_req_op
*lsop
,
865 void (*done
)(struct nvmefc_ls_req
*req
, int status
))
867 /* don't wait for completion */
869 return __nvme_fc_send_ls_req(rport
, lsop
, done
);
872 /* Validation Error indexes into the string table below */
876 VERR_LSDESC_RQST
= 2,
877 VERR_LSDESC_RQST_LEN
= 3,
879 VERR_ASSOC_ID_LEN
= 5,
881 VERR_CONN_ID_LEN
= 7,
883 VERR_CR_ASSOC_ACC_LEN
= 9,
885 VERR_CR_CONN_ACC_LEN
= 11,
887 VERR_DISCONN_ACC_LEN
= 13,
890 static char *validation_errors
[] = {
894 "Bad LSDESC_RQST Length",
895 "Not Association ID",
896 "Bad Association ID Length",
898 "Bad Connection ID Length",
900 "Bad CR_ASSOC ACC Length",
902 "Bad CR_CONN ACC Length",
903 "Not Disconnect Rqst",
904 "Bad Disconnect ACC Length",
908 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl
*ctrl
,
909 struct nvme_fc_queue
*queue
, u16 qsize
, u16 ersp_ratio
)
911 struct nvmefc_ls_req_op
*lsop
;
912 struct nvmefc_ls_req
*lsreq
;
913 struct fcnvme_ls_cr_assoc_rqst
*assoc_rqst
;
914 struct fcnvme_ls_cr_assoc_acc
*assoc_acc
;
917 lsop
= kzalloc((sizeof(*lsop
) +
918 ctrl
->lport
->ops
->lsrqst_priv_sz
+
919 sizeof(*assoc_rqst
) + sizeof(*assoc_acc
)), GFP_KERNEL
);
924 lsreq
= &lsop
->ls_req
;
926 lsreq
->private = (void *)&lsop
[1];
927 assoc_rqst
= (struct fcnvme_ls_cr_assoc_rqst
*)
928 (lsreq
->private + ctrl
->lport
->ops
->lsrqst_priv_sz
);
929 assoc_acc
= (struct fcnvme_ls_cr_assoc_acc
*)&assoc_rqst
[1];
931 assoc_rqst
->w0
.ls_cmd
= FCNVME_LS_CREATE_ASSOCIATION
;
932 assoc_rqst
->desc_list_len
=
933 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd
));
935 assoc_rqst
->assoc_cmd
.desc_tag
=
936 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD
);
937 assoc_rqst
->assoc_cmd
.desc_len
=
939 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd
));
941 assoc_rqst
->assoc_cmd
.ersp_ratio
= cpu_to_be16(ersp_ratio
);
942 assoc_rqst
->assoc_cmd
.sqsize
= cpu_to_be16(qsize
);
943 /* Linux supports only Dynamic controllers */
944 assoc_rqst
->assoc_cmd
.cntlid
= cpu_to_be16(0xffff);
945 uuid_copy(&assoc_rqst
->assoc_cmd
.hostid
, &ctrl
->ctrl
.opts
->host
->id
);
946 strncpy(assoc_rqst
->assoc_cmd
.hostnqn
, ctrl
->ctrl
.opts
->host
->nqn
,
947 min(FCNVME_ASSOC_HOSTNQN_LEN
, NVMF_NQN_SIZE
));
948 strncpy(assoc_rqst
->assoc_cmd
.subnqn
, ctrl
->ctrl
.opts
->subsysnqn
,
949 min(FCNVME_ASSOC_SUBNQN_LEN
, NVMF_NQN_SIZE
));
952 lsreq
->rqstaddr
= assoc_rqst
;
953 lsreq
->rqstlen
= sizeof(*assoc_rqst
);
954 lsreq
->rspaddr
= assoc_acc
;
955 lsreq
->rsplen
= sizeof(*assoc_acc
);
956 lsreq
->timeout
= NVME_FC_CONNECT_TIMEOUT_SEC
;
958 ret
= nvme_fc_send_ls_req(ctrl
->rport
, lsop
);
960 goto out_free_buffer
;
962 /* process connect LS completion */
964 /* validate the ACC response */
965 if (assoc_acc
->hdr
.w0
.ls_cmd
!= FCNVME_LS_ACC
)
967 else if (assoc_acc
->hdr
.desc_list_len
!=
969 sizeof(struct fcnvme_ls_cr_assoc_acc
)))
970 fcret
= VERR_CR_ASSOC_ACC_LEN
;
971 else if (assoc_acc
->hdr
.rqst
.desc_tag
!=
972 cpu_to_be32(FCNVME_LSDESC_RQST
))
973 fcret
= VERR_LSDESC_RQST
;
974 else if (assoc_acc
->hdr
.rqst
.desc_len
!=
975 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst
)))
976 fcret
= VERR_LSDESC_RQST_LEN
;
977 else if (assoc_acc
->hdr
.rqst
.w0
.ls_cmd
!= FCNVME_LS_CREATE_ASSOCIATION
)
978 fcret
= VERR_CR_ASSOC
;
979 else if (assoc_acc
->associd
.desc_tag
!=
980 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID
))
981 fcret
= VERR_ASSOC_ID
;
982 else if (assoc_acc
->associd
.desc_len
!=
984 sizeof(struct fcnvme_lsdesc_assoc_id
)))
985 fcret
= VERR_ASSOC_ID_LEN
;
986 else if (assoc_acc
->connectid
.desc_tag
!=
987 cpu_to_be32(FCNVME_LSDESC_CONN_ID
))
988 fcret
= VERR_CONN_ID
;
989 else if (assoc_acc
->connectid
.desc_len
!=
990 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id
)))
991 fcret
= VERR_CONN_ID_LEN
;
996 "q %d connect failed: %s\n",
997 queue
->qnum
, validation_errors
[fcret
]);
999 ctrl
->association_id
=
1000 be64_to_cpu(assoc_acc
->associd
.association_id
);
1001 queue
->connection_id
=
1002 be64_to_cpu(assoc_acc
->connectid
.connection_id
);
1003 set_bit(NVME_FC_Q_CONNECTED
, &queue
->flags
);
1011 "queue %d connect admin queue failed (%d).\n",
1017 nvme_fc_connect_queue(struct nvme_fc_ctrl
*ctrl
, struct nvme_fc_queue
*queue
,
1018 u16 qsize
, u16 ersp_ratio
)
1020 struct nvmefc_ls_req_op
*lsop
;
1021 struct nvmefc_ls_req
*lsreq
;
1022 struct fcnvme_ls_cr_conn_rqst
*conn_rqst
;
1023 struct fcnvme_ls_cr_conn_acc
*conn_acc
;
1026 lsop
= kzalloc((sizeof(*lsop
) +
1027 ctrl
->lport
->ops
->lsrqst_priv_sz
+
1028 sizeof(*conn_rqst
) + sizeof(*conn_acc
)), GFP_KERNEL
);
1033 lsreq
= &lsop
->ls_req
;
1035 lsreq
->private = (void *)&lsop
[1];
1036 conn_rqst
= (struct fcnvme_ls_cr_conn_rqst
*)
1037 (lsreq
->private + ctrl
->lport
->ops
->lsrqst_priv_sz
);
1038 conn_acc
= (struct fcnvme_ls_cr_conn_acc
*)&conn_rqst
[1];
1040 conn_rqst
->w0
.ls_cmd
= FCNVME_LS_CREATE_CONNECTION
;
1041 conn_rqst
->desc_list_len
= cpu_to_be32(
1042 sizeof(struct fcnvme_lsdesc_assoc_id
) +
1043 sizeof(struct fcnvme_lsdesc_cr_conn_cmd
));
1045 conn_rqst
->associd
.desc_tag
= cpu_to_be32(FCNVME_LSDESC_ASSOC_ID
);
1046 conn_rqst
->associd
.desc_len
=
1048 sizeof(struct fcnvme_lsdesc_assoc_id
));
1049 conn_rqst
->associd
.association_id
= cpu_to_be64(ctrl
->association_id
);
1050 conn_rqst
->connect_cmd
.desc_tag
=
1051 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD
);
1052 conn_rqst
->connect_cmd
.desc_len
=
1054 sizeof(struct fcnvme_lsdesc_cr_conn_cmd
));
1055 conn_rqst
->connect_cmd
.ersp_ratio
= cpu_to_be16(ersp_ratio
);
1056 conn_rqst
->connect_cmd
.qid
= cpu_to_be16(queue
->qnum
);
1057 conn_rqst
->connect_cmd
.sqsize
= cpu_to_be16(qsize
);
1059 lsop
->queue
= queue
;
1060 lsreq
->rqstaddr
= conn_rqst
;
1061 lsreq
->rqstlen
= sizeof(*conn_rqst
);
1062 lsreq
->rspaddr
= conn_acc
;
1063 lsreq
->rsplen
= sizeof(*conn_acc
);
1064 lsreq
->timeout
= NVME_FC_CONNECT_TIMEOUT_SEC
;
1066 ret
= nvme_fc_send_ls_req(ctrl
->rport
, lsop
);
1068 goto out_free_buffer
;
1070 /* process connect LS completion */
1072 /* validate the ACC response */
1073 if (conn_acc
->hdr
.w0
.ls_cmd
!= FCNVME_LS_ACC
)
1075 else if (conn_acc
->hdr
.desc_list_len
!=
1076 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc
)))
1077 fcret
= VERR_CR_CONN_ACC_LEN
;
1078 else if (conn_acc
->hdr
.rqst
.desc_tag
!= cpu_to_be32(FCNVME_LSDESC_RQST
))
1079 fcret
= VERR_LSDESC_RQST
;
1080 else if (conn_acc
->hdr
.rqst
.desc_len
!=
1081 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst
)))
1082 fcret
= VERR_LSDESC_RQST_LEN
;
1083 else if (conn_acc
->hdr
.rqst
.w0
.ls_cmd
!= FCNVME_LS_CREATE_CONNECTION
)
1084 fcret
= VERR_CR_CONN
;
1085 else if (conn_acc
->connectid
.desc_tag
!=
1086 cpu_to_be32(FCNVME_LSDESC_CONN_ID
))
1087 fcret
= VERR_CONN_ID
;
1088 else if (conn_acc
->connectid
.desc_len
!=
1089 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id
)))
1090 fcret
= VERR_CONN_ID_LEN
;
1095 "q %d connect failed: %s\n",
1096 queue
->qnum
, validation_errors
[fcret
]);
1098 queue
->connection_id
=
1099 be64_to_cpu(conn_acc
->connectid
.connection_id
);
1100 set_bit(NVME_FC_Q_CONNECTED
, &queue
->flags
);
1108 "queue %d connect command failed (%d).\n",
1114 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req
*lsreq
, int status
)
1116 struct nvmefc_ls_req_op
*lsop
= ls_req_to_lsop(lsreq
);
1118 __nvme_fc_finish_ls_req(lsop
);
1120 /* fc-nvme iniator doesn't care about success or failure of cmd */
1126 * This routine sends a FC-NVME LS to disconnect (aka terminate)
1127 * the FC-NVME Association. Terminating the association also
1128 * terminates the FC-NVME connections (per queue, both admin and io
1129 * queues) that are part of the association. E.g. things are torn
1130 * down, and the related FC-NVME Association ID and Connection IDs
1133 * The behavior of the fc-nvme initiator is such that it's
1134 * understanding of the association and connections will implicitly
1135 * be torn down. The action is implicit as it may be due to a loss of
1136 * connectivity with the fc-nvme target, so you may never get a
1137 * response even if you tried. As such, the action of this routine
1138 * is to asynchronously send the LS, ignore any results of the LS, and
1139 * continue on with terminating the association. If the fc-nvme target
1140 * is present and receives the LS, it too can tear down.
1143 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl
*ctrl
)
1145 struct fcnvme_ls_disconnect_rqst
*discon_rqst
;
1146 struct fcnvme_ls_disconnect_acc
*discon_acc
;
1147 struct nvmefc_ls_req_op
*lsop
;
1148 struct nvmefc_ls_req
*lsreq
;
1151 lsop
= kzalloc((sizeof(*lsop
) +
1152 ctrl
->lport
->ops
->lsrqst_priv_sz
+
1153 sizeof(*discon_rqst
) + sizeof(*discon_acc
)),
1156 /* couldn't sent it... too bad */
1159 lsreq
= &lsop
->ls_req
;
1161 lsreq
->private = (void *)&lsop
[1];
1162 discon_rqst
= (struct fcnvme_ls_disconnect_rqst
*)
1163 (lsreq
->private + ctrl
->lport
->ops
->lsrqst_priv_sz
);
1164 discon_acc
= (struct fcnvme_ls_disconnect_acc
*)&discon_rqst
[1];
1166 discon_rqst
->w0
.ls_cmd
= FCNVME_LS_DISCONNECT
;
1167 discon_rqst
->desc_list_len
= cpu_to_be32(
1168 sizeof(struct fcnvme_lsdesc_assoc_id
) +
1169 sizeof(struct fcnvme_lsdesc_disconn_cmd
));
1171 discon_rqst
->associd
.desc_tag
= cpu_to_be32(FCNVME_LSDESC_ASSOC_ID
);
1172 discon_rqst
->associd
.desc_len
=
1174 sizeof(struct fcnvme_lsdesc_assoc_id
));
1176 discon_rqst
->associd
.association_id
= cpu_to_be64(ctrl
->association_id
);
1178 discon_rqst
->discon_cmd
.desc_tag
= cpu_to_be32(
1179 FCNVME_LSDESC_DISCONN_CMD
);
1180 discon_rqst
->discon_cmd
.desc_len
=
1182 sizeof(struct fcnvme_lsdesc_disconn_cmd
));
1183 discon_rqst
->discon_cmd
.scope
= FCNVME_DISCONN_ASSOCIATION
;
1184 discon_rqst
->discon_cmd
.id
= cpu_to_be64(ctrl
->association_id
);
1186 lsreq
->rqstaddr
= discon_rqst
;
1187 lsreq
->rqstlen
= sizeof(*discon_rqst
);
1188 lsreq
->rspaddr
= discon_acc
;
1189 lsreq
->rsplen
= sizeof(*discon_acc
);
1190 lsreq
->timeout
= NVME_FC_CONNECT_TIMEOUT_SEC
;
1192 ret
= nvme_fc_send_ls_req_async(ctrl
->rport
, lsop
,
1193 nvme_fc_disconnect_assoc_done
);
1197 /* only meaningful part to terminating the association */
1198 ctrl
->association_id
= 0;
1202 /* *********************** NVME Ctrl Routines **************************** */
1204 static void __nvme_fc_final_op_cleanup(struct request
*rq
);
1205 static void nvme_fc_error_recovery(struct nvme_fc_ctrl
*ctrl
, char *errmsg
);
1208 nvme_fc_reinit_request(void *data
, struct request
*rq
)
1210 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1211 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
1213 memset(cmdiu
, 0, sizeof(*cmdiu
));
1214 cmdiu
->scsi_id
= NVME_CMD_SCSI_ID
;
1215 cmdiu
->fc_id
= NVME_CMD_FC_ID
;
1216 cmdiu
->iu_len
= cpu_to_be16(sizeof(*cmdiu
) / sizeof(u32
));
1217 memset(&op
->rsp_iu
, 0, sizeof(op
->rsp_iu
));
1223 __nvme_fc_exit_request(struct nvme_fc_ctrl
*ctrl
,
1224 struct nvme_fc_fcp_op
*op
)
1226 fc_dma_unmap_single(ctrl
->lport
->dev
, op
->fcp_req
.rspdma
,
1227 sizeof(op
->rsp_iu
), DMA_FROM_DEVICE
);
1228 fc_dma_unmap_single(ctrl
->lport
->dev
, op
->fcp_req
.cmddma
,
1229 sizeof(op
->cmd_iu
), DMA_TO_DEVICE
);
1231 atomic_set(&op
->state
, FCPOP_STATE_UNINIT
);
1235 nvme_fc_exit_request(struct blk_mq_tag_set
*set
, struct request
*rq
,
1236 unsigned int hctx_idx
)
1238 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1240 return __nvme_fc_exit_request(set
->driver_data
, op
);
1244 __nvme_fc_abort_op(struct nvme_fc_ctrl
*ctrl
, struct nvme_fc_fcp_op
*op
)
1248 state
= atomic_xchg(&op
->state
, FCPOP_STATE_ABORTED
);
1249 if (state
!= FCPOP_STATE_ACTIVE
) {
1250 atomic_set(&op
->state
, state
);
1254 ctrl
->lport
->ops
->fcp_abort(&ctrl
->lport
->localport
,
1255 &ctrl
->rport
->remoteport
,
1256 op
->queue
->lldd_handle
,
1263 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl
*ctrl
)
1265 struct nvme_fc_fcp_op
*aen_op
= ctrl
->aen_ops
;
1266 unsigned long flags
;
1269 for (i
= 0; i
< NVME_FC_NR_AEN_COMMANDS
; i
++, aen_op
++) {
1270 if (atomic_read(&aen_op
->state
) != FCPOP_STATE_ACTIVE
)
1273 spin_lock_irqsave(&ctrl
->lock
, flags
);
1274 if (ctrl
->flags
& FCCTRL_TERMIO
) {
1276 aen_op
->flags
|= FCOP_FLAGS_TERMIO
;
1278 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
1280 ret
= __nvme_fc_abort_op(ctrl
, aen_op
);
1283 * if __nvme_fc_abort_op failed the io wasn't
1284 * active. Thus this call path is running in
1285 * parallel to the io complete. Treat as non-error.
1288 /* back out the flags/counters */
1289 spin_lock_irqsave(&ctrl
->lock
, flags
);
1290 if (ctrl
->flags
& FCCTRL_TERMIO
)
1292 aen_op
->flags
&= ~FCOP_FLAGS_TERMIO
;
1293 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
1300 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl
*ctrl
,
1301 struct nvme_fc_fcp_op
*op
)
1303 unsigned long flags
;
1304 bool complete_rq
= false;
1306 spin_lock_irqsave(&ctrl
->lock
, flags
);
1307 if (unlikely(op
->flags
& FCOP_FLAGS_TERMIO
)) {
1308 if (ctrl
->flags
& FCCTRL_TERMIO
) {
1310 wake_up(&ctrl
->ioabort_wait
);
1313 if (op
->flags
& FCOP_FLAGS_RELEASED
)
1316 op
->flags
|= FCOP_FLAGS_COMPLETE
;
1317 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
1323 nvme_fc_fcpio_done(struct nvmefc_fcp_req
*req
)
1325 struct nvme_fc_fcp_op
*op
= fcp_req_to_fcp_op(req
);
1326 struct request
*rq
= op
->rq
;
1327 struct nvmefc_fcp_req
*freq
= &op
->fcp_req
;
1328 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
1329 struct nvme_fc_queue
*queue
= op
->queue
;
1330 struct nvme_completion
*cqe
= &op
->rsp_iu
.cqe
;
1331 struct nvme_command
*sqe
= &op
->cmd_iu
.sqe
;
1332 __le16 status
= cpu_to_le16(NVME_SC_SUCCESS
<< 1);
1333 union nvme_result result
;
1334 bool complete_rq
, terminate_assoc
= true;
1338 * The current linux implementation of a nvme controller
1339 * allocates a single tag set for all io queues and sizes
1340 * the io queues to fully hold all possible tags. Thus, the
1341 * implementation does not reference or care about the sqhd
1342 * value as it never needs to use the sqhd/sqtail pointers
1343 * for submission pacing.
1345 * This affects the FC-NVME implementation in two ways:
1346 * 1) As the value doesn't matter, we don't need to waste
1347 * cycles extracting it from ERSPs and stamping it in the
1348 * cases where the transport fabricates CQEs on successful
1350 * 2) The FC-NVME implementation requires that delivery of
1351 * ERSP completions are to go back to the nvme layer in order
1352 * relative to the rsn, such that the sqhd value will always
1353 * be "in order" for the nvme layer. As the nvme layer in
1354 * linux doesn't care about sqhd, there's no need to return
1358 * As the core nvme layer in linux currently does not look at
1359 * every field in the cqe - in cases where the FC transport must
1360 * fabricate a CQE, the following fields will not be set as they
1361 * are not referenced:
1362 * cqe.sqid, cqe.sqhd, cqe.command_id
1364 * Failure or error of an individual i/o, in a transport
1365 * detected fashion unrelated to the nvme completion status,
1366 * potentially cause the initiator and target sides to get out
1367 * of sync on SQ head/tail (aka outstanding io count allowed).
1368 * Per FC-NVME spec, failure of an individual command requires
1369 * the connection to be terminated, which in turn requires the
1370 * association to be terminated.
1373 fc_dma_sync_single_for_cpu(ctrl
->lport
->dev
, op
->fcp_req
.rspdma
,
1374 sizeof(op
->rsp_iu
), DMA_FROM_DEVICE
);
1376 if (atomic_read(&op
->state
) == FCPOP_STATE_ABORTED
)
1377 status
= cpu_to_le16((NVME_SC_ABORT_REQ
| NVME_SC_DNR
) << 1);
1378 else if (freq
->status
)
1379 status
= cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR
<< 1);
1382 * For the linux implementation, if we have an unsuccesful
1383 * status, they blk-mq layer can typically be called with the
1384 * non-zero status and the content of the cqe isn't important.
1390 * command completed successfully relative to the wire
1391 * protocol. However, validate anything received and
1392 * extract the status and result from the cqe (create it
1396 switch (freq
->rcv_rsplen
) {
1399 case NVME_FC_SIZEOF_ZEROS_RSP
:
1401 * No response payload or 12 bytes of payload (which
1402 * should all be zeros) are considered successful and
1403 * no payload in the CQE by the transport.
1405 if (freq
->transferred_length
!=
1406 be32_to_cpu(op
->cmd_iu
.data_len
)) {
1407 status
= cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR
<< 1);
1413 case sizeof(struct nvme_fc_ersp_iu
):
1415 * The ERSP IU contains a full completion with CQE.
1416 * Validate ERSP IU and look at cqe.
1418 if (unlikely(be16_to_cpu(op
->rsp_iu
.iu_len
) !=
1419 (freq
->rcv_rsplen
/ 4) ||
1420 be32_to_cpu(op
->rsp_iu
.xfrd_len
) !=
1421 freq
->transferred_length
||
1422 op
->rsp_iu
.status_code
||
1423 sqe
->common
.command_id
!= cqe
->command_id
)) {
1424 status
= cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR
<< 1);
1427 result
= cqe
->result
;
1428 status
= cqe
->status
;
1432 status
= cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR
<< 1);
1436 terminate_assoc
= false;
1439 if (op
->flags
& FCOP_FLAGS_AEN
) {
1440 nvme_complete_async_event(&queue
->ctrl
->ctrl
, status
, &result
);
1441 complete_rq
= __nvme_fc_fcpop_chk_teardowns(ctrl
, op
);
1442 atomic_set(&op
->state
, FCPOP_STATE_IDLE
);
1443 op
->flags
= FCOP_FLAGS_AEN
; /* clear other flags */
1444 nvme_fc_ctrl_put(ctrl
);
1448 complete_rq
= __nvme_fc_fcpop_chk_teardowns(ctrl
, op
);
1450 if (unlikely(op
->flags
& FCOP_FLAGS_TERMIO
)) {
1451 status
= cpu_to_le16(NVME_SC_ABORT_REQ
<< 1);
1452 if (blk_queue_dying(rq
->q
))
1453 status
|= cpu_to_le16(NVME_SC_DNR
<< 1);
1455 nvme_end_request(rq
, status
, result
);
1457 __nvme_fc_final_op_cleanup(rq
);
1460 if (terminate_assoc
)
1461 nvme_fc_error_recovery(ctrl
, "transport detected io error");
1465 __nvme_fc_init_request(struct nvme_fc_ctrl
*ctrl
,
1466 struct nvme_fc_queue
*queue
, struct nvme_fc_fcp_op
*op
,
1467 struct request
*rq
, u32 rqno
)
1469 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
1472 memset(op
, 0, sizeof(*op
));
1473 op
->fcp_req
.cmdaddr
= &op
->cmd_iu
;
1474 op
->fcp_req
.cmdlen
= sizeof(op
->cmd_iu
);
1475 op
->fcp_req
.rspaddr
= &op
->rsp_iu
;
1476 op
->fcp_req
.rsplen
= sizeof(op
->rsp_iu
);
1477 op
->fcp_req
.done
= nvme_fc_fcpio_done
;
1478 op
->fcp_req
.first_sgl
= (struct scatterlist
*)&op
[1];
1479 op
->fcp_req
.private = &op
->fcp_req
.first_sgl
[SG_CHUNK_SIZE
];
1485 cmdiu
->scsi_id
= NVME_CMD_SCSI_ID
;
1486 cmdiu
->fc_id
= NVME_CMD_FC_ID
;
1487 cmdiu
->iu_len
= cpu_to_be16(sizeof(*cmdiu
) / sizeof(u32
));
1489 op
->fcp_req
.cmddma
= fc_dma_map_single(ctrl
->lport
->dev
,
1490 &op
->cmd_iu
, sizeof(op
->cmd_iu
), DMA_TO_DEVICE
);
1491 if (fc_dma_mapping_error(ctrl
->lport
->dev
, op
->fcp_req
.cmddma
)) {
1493 "FCP Op failed - cmdiu dma mapping failed.\n");
1498 op
->fcp_req
.rspdma
= fc_dma_map_single(ctrl
->lport
->dev
,
1499 &op
->rsp_iu
, sizeof(op
->rsp_iu
),
1501 if (fc_dma_mapping_error(ctrl
->lport
->dev
, op
->fcp_req
.rspdma
)) {
1503 "FCP Op failed - rspiu dma mapping failed.\n");
1507 atomic_set(&op
->state
, FCPOP_STATE_IDLE
);
1513 nvme_fc_init_request(struct blk_mq_tag_set
*set
, struct request
*rq
,
1514 unsigned int hctx_idx
, unsigned int numa_node
)
1516 struct nvme_fc_ctrl
*ctrl
= set
->driver_data
;
1517 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1518 int queue_idx
= (set
== &ctrl
->tag_set
) ? hctx_idx
+ 1 : 0;
1519 struct nvme_fc_queue
*queue
= &ctrl
->queues
[queue_idx
];
1521 return __nvme_fc_init_request(ctrl
, queue
, op
, rq
, queue
->rqcnt
++);
1525 nvme_fc_init_aen_ops(struct nvme_fc_ctrl
*ctrl
)
1527 struct nvme_fc_fcp_op
*aen_op
;
1528 struct nvme_fc_cmd_iu
*cmdiu
;
1529 struct nvme_command
*sqe
;
1533 aen_op
= ctrl
->aen_ops
;
1534 for (i
= 0; i
< NVME_FC_NR_AEN_COMMANDS
; i
++, aen_op
++) {
1535 private = kzalloc(ctrl
->lport
->ops
->fcprqst_priv_sz
,
1540 cmdiu
= &aen_op
->cmd_iu
;
1542 ret
= __nvme_fc_init_request(ctrl
, &ctrl
->queues
[0],
1543 aen_op
, (struct request
*)NULL
,
1544 (AEN_CMDID_BASE
+ i
));
1550 aen_op
->flags
= FCOP_FLAGS_AEN
;
1551 aen_op
->fcp_req
.first_sgl
= NULL
; /* no sg list */
1552 aen_op
->fcp_req
.private = private;
1554 memset(sqe
, 0, sizeof(*sqe
));
1555 sqe
->common
.opcode
= nvme_admin_async_event
;
1556 /* Note: core layer may overwrite the sqe.command_id value */
1557 sqe
->common
.command_id
= AEN_CMDID_BASE
+ i
;
1563 nvme_fc_term_aen_ops(struct nvme_fc_ctrl
*ctrl
)
1565 struct nvme_fc_fcp_op
*aen_op
;
1568 aen_op
= ctrl
->aen_ops
;
1569 for (i
= 0; i
< NVME_FC_NR_AEN_COMMANDS
; i
++, aen_op
++) {
1570 if (!aen_op
->fcp_req
.private)
1573 __nvme_fc_exit_request(ctrl
, aen_op
);
1575 kfree(aen_op
->fcp_req
.private);
1576 aen_op
->fcp_req
.private = NULL
;
1581 __nvme_fc_init_hctx(struct blk_mq_hw_ctx
*hctx
, struct nvme_fc_ctrl
*ctrl
,
1584 struct nvme_fc_queue
*queue
= &ctrl
->queues
[qidx
];
1586 hctx
->driver_data
= queue
;
1591 nvme_fc_init_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
1592 unsigned int hctx_idx
)
1594 struct nvme_fc_ctrl
*ctrl
= data
;
1596 __nvme_fc_init_hctx(hctx
, ctrl
, hctx_idx
+ 1);
1602 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx
*hctx
, void *data
,
1603 unsigned int hctx_idx
)
1605 struct nvme_fc_ctrl
*ctrl
= data
;
1607 __nvme_fc_init_hctx(hctx
, ctrl
, hctx_idx
);
1613 nvme_fc_init_queue(struct nvme_fc_ctrl
*ctrl
, int idx
, size_t queue_size
)
1615 struct nvme_fc_queue
*queue
;
1617 queue
= &ctrl
->queues
[idx
];
1618 memset(queue
, 0, sizeof(*queue
));
1621 atomic_set(&queue
->csn
, 1);
1622 queue
->dev
= ctrl
->dev
;
1625 queue
->cmnd_capsule_len
= ctrl
->ctrl
.ioccsz
* 16;
1627 queue
->cmnd_capsule_len
= sizeof(struct nvme_command
);
1629 queue
->queue_size
= queue_size
;
1632 * Considered whether we should allocate buffers for all SQEs
1633 * and CQEs and dma map them - mapping their respective entries
1634 * into the request structures (kernel vm addr and dma address)
1635 * thus the driver could use the buffers/mappings directly.
1636 * It only makes sense if the LLDD would use them for its
1637 * messaging api. It's very unlikely most adapter api's would use
1638 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload
1639 * structures were used instead.
1644 * This routine terminates a queue at the transport level.
1645 * The transport has already ensured that all outstanding ios on
1646 * the queue have been terminated.
1647 * The transport will send a Disconnect LS request to terminate
1648 * the queue's connection. Termination of the admin queue will also
1649 * terminate the association at the target.
1652 nvme_fc_free_queue(struct nvme_fc_queue
*queue
)
1654 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED
, &queue
->flags
))
1658 * Current implementation never disconnects a single queue.
1659 * It always terminates a whole association. So there is never
1660 * a disconnect(queue) LS sent to the target.
1663 queue
->connection_id
= 0;
1664 clear_bit(NVME_FC_Q_CONNECTED
, &queue
->flags
);
1668 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl
*ctrl
,
1669 struct nvme_fc_queue
*queue
, unsigned int qidx
)
1671 if (ctrl
->lport
->ops
->delete_queue
)
1672 ctrl
->lport
->ops
->delete_queue(&ctrl
->lport
->localport
, qidx
,
1673 queue
->lldd_handle
);
1674 queue
->lldd_handle
= NULL
;
1678 nvme_fc_free_io_queues(struct nvme_fc_ctrl
*ctrl
)
1682 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++)
1683 nvme_fc_free_queue(&ctrl
->queues
[i
]);
1687 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl
*ctrl
,
1688 struct nvme_fc_queue
*queue
, unsigned int qidx
, u16 qsize
)
1692 queue
->lldd_handle
= NULL
;
1693 if (ctrl
->lport
->ops
->create_queue
)
1694 ret
= ctrl
->lport
->ops
->create_queue(&ctrl
->lport
->localport
,
1695 qidx
, qsize
, &queue
->lldd_handle
);
1701 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl
*ctrl
)
1703 struct nvme_fc_queue
*queue
= &ctrl
->queues
[ctrl
->ctrl
.queue_count
- 1];
1706 for (i
= ctrl
->ctrl
.queue_count
- 1; i
>= 1; i
--, queue
--)
1707 __nvme_fc_delete_hw_queue(ctrl
, queue
, i
);
1711 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl
*ctrl
, u16 qsize
)
1713 struct nvme_fc_queue
*queue
= &ctrl
->queues
[1];
1716 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++, queue
++) {
1717 ret
= __nvme_fc_create_hw_queue(ctrl
, queue
, i
, qsize
);
1726 __nvme_fc_delete_hw_queue(ctrl
, &ctrl
->queues
[i
], i
);
1731 nvme_fc_connect_io_queues(struct nvme_fc_ctrl
*ctrl
, u16 qsize
)
1735 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++) {
1736 ret
= nvme_fc_connect_queue(ctrl
, &ctrl
->queues
[i
], qsize
,
1740 ret
= nvmf_connect_io_queue(&ctrl
->ctrl
, i
);
1749 nvme_fc_init_io_queues(struct nvme_fc_ctrl
*ctrl
)
1753 for (i
= 1; i
< ctrl
->ctrl
.queue_count
; i
++)
1754 nvme_fc_init_queue(ctrl
, i
, ctrl
->ctrl
.sqsize
);
1758 nvme_fc_ctrl_free(struct kref
*ref
)
1760 struct nvme_fc_ctrl
*ctrl
=
1761 container_of(ref
, struct nvme_fc_ctrl
, ref
);
1762 unsigned long flags
;
1764 if (ctrl
->ctrl
.tagset
) {
1765 blk_cleanup_queue(ctrl
->ctrl
.connect_q
);
1766 blk_mq_free_tag_set(&ctrl
->tag_set
);
1769 /* remove from rport list */
1770 spin_lock_irqsave(&ctrl
->rport
->lock
, flags
);
1771 list_del(&ctrl
->ctrl_list
);
1772 spin_unlock_irqrestore(&ctrl
->rport
->lock
, flags
);
1774 blk_mq_unquiesce_queue(ctrl
->ctrl
.admin_q
);
1775 blk_cleanup_queue(ctrl
->ctrl
.admin_q
);
1776 blk_mq_free_tag_set(&ctrl
->admin_tag_set
);
1778 kfree(ctrl
->queues
);
1780 put_device(ctrl
->dev
);
1781 nvme_fc_rport_put(ctrl
->rport
);
1783 ida_simple_remove(&nvme_fc_ctrl_cnt
, ctrl
->cnum
);
1784 if (ctrl
->ctrl
.opts
)
1785 nvmf_free_options(ctrl
->ctrl
.opts
);
1790 nvme_fc_ctrl_put(struct nvme_fc_ctrl
*ctrl
)
1792 kref_put(&ctrl
->ref
, nvme_fc_ctrl_free
);
1796 nvme_fc_ctrl_get(struct nvme_fc_ctrl
*ctrl
)
1798 return kref_get_unless_zero(&ctrl
->ref
);
1802 * All accesses from nvme core layer done - can now free the
1803 * controller. Called after last nvme_put_ctrl() call
1806 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl
*nctrl
)
1808 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(nctrl
);
1810 WARN_ON(nctrl
!= &ctrl
->ctrl
);
1812 nvme_fc_ctrl_put(ctrl
);
1816 nvme_fc_error_recovery(struct nvme_fc_ctrl
*ctrl
, char *errmsg
)
1818 /* only proceed if in LIVE state - e.g. on first error */
1819 if (ctrl
->ctrl
.state
!= NVME_CTRL_LIVE
)
1822 dev_warn(ctrl
->ctrl
.device
,
1823 "NVME-FC{%d}: transport association error detected: %s\n",
1824 ctrl
->cnum
, errmsg
);
1825 dev_warn(ctrl
->ctrl
.device
,
1826 "NVME-FC{%d}: resetting controller\n", ctrl
->cnum
);
1828 if (!nvme_change_ctrl_state(&ctrl
->ctrl
, NVME_CTRL_RECONNECTING
)) {
1829 dev_err(ctrl
->ctrl
.device
,
1830 "NVME-FC{%d}: error_recovery: Couldn't change state "
1831 "to RECONNECTING\n", ctrl
->cnum
);
1835 nvme_reset_ctrl(&ctrl
->ctrl
);
1838 static enum blk_eh_timer_return
1839 nvme_fc_timeout(struct request
*rq
, bool reserved
)
1841 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
1842 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
1846 return BLK_EH_RESET_TIMER
;
1848 ret
= __nvme_fc_abort_op(ctrl
, op
);
1850 /* io wasn't active to abort consider it done */
1851 return BLK_EH_HANDLED
;
1854 * we can't individually ABTS an io without affecting the queue,
1855 * thus killing the queue, adn thus the association.
1856 * So resolve by performing a controller reset, which will stop
1857 * the host/io stack, terminate the association on the link,
1858 * and recreate an association on the link.
1860 nvme_fc_error_recovery(ctrl
, "io timeout error");
1862 return BLK_EH_HANDLED
;
1866 nvme_fc_map_data(struct nvme_fc_ctrl
*ctrl
, struct request
*rq
,
1867 struct nvme_fc_fcp_op
*op
)
1869 struct nvmefc_fcp_req
*freq
= &op
->fcp_req
;
1870 enum dma_data_direction dir
;
1875 if (!blk_rq_payload_bytes(rq
))
1878 freq
->sg_table
.sgl
= freq
->first_sgl
;
1879 ret
= sg_alloc_table_chained(&freq
->sg_table
,
1880 blk_rq_nr_phys_segments(rq
), freq
->sg_table
.sgl
);
1884 op
->nents
= blk_rq_map_sg(rq
->q
, rq
, freq
->sg_table
.sgl
);
1885 WARN_ON(op
->nents
> blk_rq_nr_phys_segments(rq
));
1886 dir
= (rq_data_dir(rq
) == WRITE
) ? DMA_TO_DEVICE
: DMA_FROM_DEVICE
;
1887 freq
->sg_cnt
= fc_dma_map_sg(ctrl
->lport
->dev
, freq
->sg_table
.sgl
,
1889 if (unlikely(freq
->sg_cnt
<= 0)) {
1890 sg_free_table_chained(&freq
->sg_table
, true);
1896 * TODO: blk_integrity_rq(rq) for DIF
1902 nvme_fc_unmap_data(struct nvme_fc_ctrl
*ctrl
, struct request
*rq
,
1903 struct nvme_fc_fcp_op
*op
)
1905 struct nvmefc_fcp_req
*freq
= &op
->fcp_req
;
1910 fc_dma_unmap_sg(ctrl
->lport
->dev
, freq
->sg_table
.sgl
, op
->nents
,
1911 ((rq_data_dir(rq
) == WRITE
) ?
1912 DMA_TO_DEVICE
: DMA_FROM_DEVICE
));
1914 nvme_cleanup_cmd(rq
);
1916 sg_free_table_chained(&freq
->sg_table
, true);
1922 * In FC, the queue is a logical thing. At transport connect, the target
1923 * creates its "queue" and returns a handle that is to be given to the
1924 * target whenever it posts something to the corresponding SQ. When an
1925 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the
1926 * command contained within the SQE, an io, and assigns a FC exchange
1927 * to it. The SQE and the associated SQ handle are sent in the initial
1928 * CMD IU sents on the exchange. All transfers relative to the io occur
1929 * as part of the exchange. The CQE is the last thing for the io,
1930 * which is transferred (explicitly or implicitly) with the RSP IU
1931 * sent on the exchange. After the CQE is received, the FC exchange is
1932 * terminaed and the Exchange may be used on a different io.
1934 * The transport to LLDD api has the transport making a request for a
1935 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange
1936 * resource and transfers the command. The LLDD will then process all
1937 * steps to complete the io. Upon completion, the transport done routine
1940 * So - while the operation is outstanding to the LLDD, there is a link
1941 * level FC exchange resource that is also outstanding. This must be
1942 * considered in all cleanup operations.
1945 nvme_fc_start_fcp_op(struct nvme_fc_ctrl
*ctrl
, struct nvme_fc_queue
*queue
,
1946 struct nvme_fc_fcp_op
*op
, u32 data_len
,
1947 enum nvmefc_fcp_datadir io_dir
)
1949 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
1950 struct nvme_command
*sqe
= &cmdiu
->sqe
;
1955 * before attempting to send the io, check to see if we believe
1956 * the target device is present
1958 if (ctrl
->rport
->remoteport
.port_state
!= FC_OBJSTATE_ONLINE
)
1961 if (!nvme_fc_ctrl_get(ctrl
))
1962 return BLK_STS_IOERR
;
1964 /* format the FC-NVME CMD IU and fcp_req */
1965 cmdiu
->connection_id
= cpu_to_be64(queue
->connection_id
);
1966 csn
= atomic_inc_return(&queue
->csn
);
1967 cmdiu
->csn
= cpu_to_be32(csn
);
1968 cmdiu
->data_len
= cpu_to_be32(data_len
);
1970 case NVMEFC_FCP_WRITE
:
1971 cmdiu
->flags
= FCNVME_CMD_FLAGS_WRITE
;
1973 case NVMEFC_FCP_READ
:
1974 cmdiu
->flags
= FCNVME_CMD_FLAGS_READ
;
1976 case NVMEFC_FCP_NODATA
:
1980 op
->fcp_req
.payload_length
= data_len
;
1981 op
->fcp_req
.io_dir
= io_dir
;
1982 op
->fcp_req
.transferred_length
= 0;
1983 op
->fcp_req
.rcv_rsplen
= 0;
1984 op
->fcp_req
.status
= NVME_SC_SUCCESS
;
1985 op
->fcp_req
.sqid
= cpu_to_le16(queue
->qnum
);
1988 * validate per fabric rules, set fields mandated by fabric spec
1989 * as well as those by FC-NVME spec.
1991 WARN_ON_ONCE(sqe
->common
.metadata
);
1992 WARN_ON_ONCE(sqe
->common
.dptr
.prp1
);
1993 WARN_ON_ONCE(sqe
->common
.dptr
.prp2
);
1994 sqe
->common
.flags
|= NVME_CMD_SGL_METABUF
;
1997 * format SQE DPTR field per FC-NVME rules
1998 * type=data block descr; subtype=offset;
1999 * offset is currently 0.
2001 sqe
->rw
.dptr
.sgl
.type
= NVME_SGL_FMT_OFFSET
;
2002 sqe
->rw
.dptr
.sgl
.length
= cpu_to_le32(data_len
);
2003 sqe
->rw
.dptr
.sgl
.addr
= 0;
2005 if (!(op
->flags
& FCOP_FLAGS_AEN
)) {
2006 ret
= nvme_fc_map_data(ctrl
, op
->rq
, op
);
2008 nvme_cleanup_cmd(op
->rq
);
2009 nvme_fc_ctrl_put(ctrl
);
2010 if (ret
== -ENOMEM
|| ret
== -EAGAIN
)
2011 return BLK_STS_RESOURCE
;
2012 return BLK_STS_IOERR
;
2016 fc_dma_sync_single_for_device(ctrl
->lport
->dev
, op
->fcp_req
.cmddma
,
2017 sizeof(op
->cmd_iu
), DMA_TO_DEVICE
);
2019 atomic_set(&op
->state
, FCPOP_STATE_ACTIVE
);
2021 if (!(op
->flags
& FCOP_FLAGS_AEN
))
2022 blk_mq_start_request(op
->rq
);
2024 ret
= ctrl
->lport
->ops
->fcp_io(&ctrl
->lport
->localport
,
2025 &ctrl
->rport
->remoteport
,
2026 queue
->lldd_handle
, &op
->fcp_req
);
2029 if (!(op
->flags
& FCOP_FLAGS_AEN
))
2030 nvme_fc_unmap_data(ctrl
, op
->rq
, op
);
2032 nvme_fc_ctrl_put(ctrl
);
2034 if (ctrl
->rport
->remoteport
.port_state
== FC_OBJSTATE_ONLINE
&&
2036 return BLK_STS_IOERR
;
2044 if (!(op
->flags
& FCOP_FLAGS_AEN
) && queue
->hctx
)
2045 blk_mq_delay_run_hw_queue(queue
->hctx
, NVMEFC_QUEUE_DELAY
);
2047 return BLK_STS_RESOURCE
;
2051 nvme_fc_queue_rq(struct blk_mq_hw_ctx
*hctx
,
2052 const struct blk_mq_queue_data
*bd
)
2054 struct nvme_ns
*ns
= hctx
->queue
->queuedata
;
2055 struct nvme_fc_queue
*queue
= hctx
->driver_data
;
2056 struct nvme_fc_ctrl
*ctrl
= queue
->ctrl
;
2057 struct request
*rq
= bd
->rq
;
2058 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
2059 struct nvme_fc_cmd_iu
*cmdiu
= &op
->cmd_iu
;
2060 struct nvme_command
*sqe
= &cmdiu
->sqe
;
2061 enum nvmefc_fcp_datadir io_dir
;
2065 ret
= nvme_setup_cmd(ns
, rq
, sqe
);
2069 data_len
= blk_rq_payload_bytes(rq
);
2071 io_dir
= ((rq_data_dir(rq
) == WRITE
) ?
2072 NVMEFC_FCP_WRITE
: NVMEFC_FCP_READ
);
2074 io_dir
= NVMEFC_FCP_NODATA
;
2076 return nvme_fc_start_fcp_op(ctrl
, queue
, op
, data_len
, io_dir
);
2079 static struct blk_mq_tags
*
2080 nvme_fc_tagset(struct nvme_fc_queue
*queue
)
2082 if (queue
->qnum
== 0)
2083 return queue
->ctrl
->admin_tag_set
.tags
[queue
->qnum
];
2085 return queue
->ctrl
->tag_set
.tags
[queue
->qnum
- 1];
2089 nvme_fc_poll(struct blk_mq_hw_ctx
*hctx
, unsigned int tag
)
2092 struct nvme_fc_queue
*queue
= hctx
->driver_data
;
2093 struct nvme_fc_ctrl
*ctrl
= queue
->ctrl
;
2094 struct request
*req
;
2095 struct nvme_fc_fcp_op
*op
;
2097 req
= blk_mq_tag_to_rq(nvme_fc_tagset(queue
), tag
);
2101 op
= blk_mq_rq_to_pdu(req
);
2103 if ((atomic_read(&op
->state
) == FCPOP_STATE_ACTIVE
) &&
2104 (ctrl
->lport
->ops
->poll_queue
))
2105 ctrl
->lport
->ops
->poll_queue(&ctrl
->lport
->localport
,
2106 queue
->lldd_handle
);
2108 return ((atomic_read(&op
->state
) != FCPOP_STATE_ACTIVE
));
2112 nvme_fc_submit_async_event(struct nvme_ctrl
*arg
, int aer_idx
)
2114 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(arg
);
2115 struct nvme_fc_fcp_op
*aen_op
;
2116 unsigned long flags
;
2117 bool terminating
= false;
2120 if (aer_idx
> NVME_FC_NR_AEN_COMMANDS
)
2123 spin_lock_irqsave(&ctrl
->lock
, flags
);
2124 if (ctrl
->flags
& FCCTRL_TERMIO
)
2126 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2131 aen_op
= &ctrl
->aen_ops
[aer_idx
];
2133 ret
= nvme_fc_start_fcp_op(ctrl
, aen_op
->queue
, aen_op
, 0,
2136 dev_err(ctrl
->ctrl
.device
,
2137 "failed async event work [%d]\n", aer_idx
);
2141 __nvme_fc_final_op_cleanup(struct request
*rq
)
2143 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
2144 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
2146 atomic_set(&op
->state
, FCPOP_STATE_IDLE
);
2147 op
->flags
&= ~(FCOP_FLAGS_TERMIO
| FCOP_FLAGS_RELEASED
|
2148 FCOP_FLAGS_COMPLETE
);
2150 nvme_fc_unmap_data(ctrl
, rq
, op
);
2151 nvme_complete_rq(rq
);
2152 nvme_fc_ctrl_put(ctrl
);
2157 nvme_fc_complete_rq(struct request
*rq
)
2159 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(rq
);
2160 struct nvme_fc_ctrl
*ctrl
= op
->ctrl
;
2161 unsigned long flags
;
2162 bool completed
= false;
2165 * the core layer, on controller resets after calling
2166 * nvme_shutdown_ctrl(), calls complete_rq without our
2167 * calling blk_mq_complete_request(), thus there may still
2168 * be live i/o outstanding with the LLDD. Means transport has
2169 * to track complete calls vs fcpio_done calls to know what
2170 * path to take on completes and dones.
2172 spin_lock_irqsave(&ctrl
->lock
, flags
);
2173 if (op
->flags
& FCOP_FLAGS_COMPLETE
)
2176 op
->flags
|= FCOP_FLAGS_RELEASED
;
2177 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2180 __nvme_fc_final_op_cleanup(rq
);
2184 * This routine is used by the transport when it needs to find active
2185 * io on a queue that is to be terminated. The transport uses
2186 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke
2187 * this routine to kill them on a 1 by 1 basis.
2189 * As FC allocates FC exchange for each io, the transport must contact
2190 * the LLDD to terminate the exchange, thus releasing the FC exchange.
2191 * After terminating the exchange the LLDD will call the transport's
2192 * normal io done path for the request, but it will have an aborted
2193 * status. The done path will return the io request back to the block
2194 * layer with an error status.
2197 nvme_fc_terminate_exchange(struct request
*req
, void *data
, bool reserved
)
2199 struct nvme_ctrl
*nctrl
= data
;
2200 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(nctrl
);
2201 struct nvme_fc_fcp_op
*op
= blk_mq_rq_to_pdu(req
);
2202 unsigned long flags
;
2205 if (!blk_mq_request_started(req
))
2208 spin_lock_irqsave(&ctrl
->lock
, flags
);
2209 if (ctrl
->flags
& FCCTRL_TERMIO
) {
2211 op
->flags
|= FCOP_FLAGS_TERMIO
;
2213 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2215 status
= __nvme_fc_abort_op(ctrl
, op
);
2218 * if __nvme_fc_abort_op failed the io wasn't
2219 * active. Thus this call path is running in
2220 * parallel to the io complete. Treat as non-error.
2223 /* back out the flags/counters */
2224 spin_lock_irqsave(&ctrl
->lock
, flags
);
2225 if (ctrl
->flags
& FCCTRL_TERMIO
)
2227 op
->flags
&= ~FCOP_FLAGS_TERMIO
;
2228 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2234 static const struct blk_mq_ops nvme_fc_mq_ops
= {
2235 .queue_rq
= nvme_fc_queue_rq
,
2236 .complete
= nvme_fc_complete_rq
,
2237 .init_request
= nvme_fc_init_request
,
2238 .exit_request
= nvme_fc_exit_request
,
2239 .init_hctx
= nvme_fc_init_hctx
,
2240 .poll
= nvme_fc_poll
,
2241 .timeout
= nvme_fc_timeout
,
2245 nvme_fc_create_io_queues(struct nvme_fc_ctrl
*ctrl
)
2247 struct nvmf_ctrl_options
*opts
= ctrl
->ctrl
.opts
;
2248 unsigned int nr_io_queues
;
2251 nr_io_queues
= min(min(opts
->nr_io_queues
, num_online_cpus()),
2252 ctrl
->lport
->ops
->max_hw_queues
);
2253 ret
= nvme_set_queue_count(&ctrl
->ctrl
, &nr_io_queues
);
2255 dev_info(ctrl
->ctrl
.device
,
2256 "set_queue_count failed: %d\n", ret
);
2260 ctrl
->ctrl
.queue_count
= nr_io_queues
+ 1;
2264 nvme_fc_init_io_queues(ctrl
);
2266 memset(&ctrl
->tag_set
, 0, sizeof(ctrl
->tag_set
));
2267 ctrl
->tag_set
.ops
= &nvme_fc_mq_ops
;
2268 ctrl
->tag_set
.queue_depth
= ctrl
->ctrl
.opts
->queue_size
;
2269 ctrl
->tag_set
.reserved_tags
= 1; /* fabric connect */
2270 ctrl
->tag_set
.numa_node
= NUMA_NO_NODE
;
2271 ctrl
->tag_set
.flags
= BLK_MQ_F_SHOULD_MERGE
;
2272 ctrl
->tag_set
.cmd_size
= sizeof(struct nvme_fc_fcp_op
) +
2274 sizeof(struct scatterlist
)) +
2275 ctrl
->lport
->ops
->fcprqst_priv_sz
;
2276 ctrl
->tag_set
.driver_data
= ctrl
;
2277 ctrl
->tag_set
.nr_hw_queues
= ctrl
->ctrl
.queue_count
- 1;
2278 ctrl
->tag_set
.timeout
= NVME_IO_TIMEOUT
;
2280 ret
= blk_mq_alloc_tag_set(&ctrl
->tag_set
);
2284 ctrl
->ctrl
.tagset
= &ctrl
->tag_set
;
2286 ctrl
->ctrl
.connect_q
= blk_mq_init_queue(&ctrl
->tag_set
);
2287 if (IS_ERR(ctrl
->ctrl
.connect_q
)) {
2288 ret
= PTR_ERR(ctrl
->ctrl
.connect_q
);
2289 goto out_free_tag_set
;
2292 ret
= nvme_fc_create_hw_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2294 goto out_cleanup_blk_queue
;
2296 ret
= nvme_fc_connect_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2298 goto out_delete_hw_queues
;
2302 out_delete_hw_queues
:
2303 nvme_fc_delete_hw_io_queues(ctrl
);
2304 out_cleanup_blk_queue
:
2305 blk_cleanup_queue(ctrl
->ctrl
.connect_q
);
2307 blk_mq_free_tag_set(&ctrl
->tag_set
);
2308 nvme_fc_free_io_queues(ctrl
);
2310 /* force put free routine to ignore io queues */
2311 ctrl
->ctrl
.tagset
= NULL
;
2317 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl
*ctrl
)
2319 struct nvmf_ctrl_options
*opts
= ctrl
->ctrl
.opts
;
2320 unsigned int nr_io_queues
;
2323 nr_io_queues
= min(min(opts
->nr_io_queues
, num_online_cpus()),
2324 ctrl
->lport
->ops
->max_hw_queues
);
2325 ret
= nvme_set_queue_count(&ctrl
->ctrl
, &nr_io_queues
);
2327 dev_info(ctrl
->ctrl
.device
,
2328 "set_queue_count failed: %d\n", ret
);
2332 ctrl
->ctrl
.queue_count
= nr_io_queues
+ 1;
2333 /* check for io queues existing */
2334 if (ctrl
->ctrl
.queue_count
== 1)
2337 nvme_fc_init_io_queues(ctrl
);
2339 ret
= blk_mq_reinit_tagset(&ctrl
->tag_set
, nvme_fc_reinit_request
);
2341 goto out_free_io_queues
;
2343 ret
= nvme_fc_create_hw_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2345 goto out_free_io_queues
;
2347 ret
= nvme_fc_connect_io_queues(ctrl
, ctrl
->ctrl
.opts
->queue_size
);
2349 goto out_delete_hw_queues
;
2351 blk_mq_update_nr_hw_queues(&ctrl
->tag_set
, nr_io_queues
);
2355 out_delete_hw_queues
:
2356 nvme_fc_delete_hw_io_queues(ctrl
);
2358 nvme_fc_free_io_queues(ctrl
);
2363 * This routine restarts the controller on the host side, and
2364 * on the link side, recreates the controller association.
2367 nvme_fc_create_association(struct nvme_fc_ctrl
*ctrl
)
2369 struct nvmf_ctrl_options
*opts
= ctrl
->ctrl
.opts
;
2374 ++ctrl
->ctrl
.nr_reconnects
;
2377 * Create the admin queue
2380 nvme_fc_init_queue(ctrl
, 0, NVME_FC_AQ_BLKMQ_DEPTH
);
2382 ret
= __nvme_fc_create_hw_queue(ctrl
, &ctrl
->queues
[0], 0,
2383 NVME_FC_AQ_BLKMQ_DEPTH
);
2385 goto out_free_queue
;
2387 ret
= nvme_fc_connect_admin_queue(ctrl
, &ctrl
->queues
[0],
2388 NVME_FC_AQ_BLKMQ_DEPTH
,
2389 (NVME_FC_AQ_BLKMQ_DEPTH
/ 4));
2391 goto out_delete_hw_queue
;
2393 if (ctrl
->ctrl
.state
!= NVME_CTRL_NEW
)
2394 blk_mq_unquiesce_queue(ctrl
->ctrl
.admin_q
);
2396 ret
= nvmf_connect_admin_queue(&ctrl
->ctrl
);
2398 goto out_disconnect_admin_queue
;
2401 * Check controller capabilities
2403 * todo:- add code to check if ctrl attributes changed from
2404 * prior connection values
2407 ret
= nvmf_reg_read64(&ctrl
->ctrl
, NVME_REG_CAP
, &ctrl
->ctrl
.cap
);
2409 dev_err(ctrl
->ctrl
.device
,
2410 "prop_get NVME_REG_CAP failed\n");
2411 goto out_disconnect_admin_queue
;
2415 min_t(int, NVME_CAP_MQES(ctrl
->ctrl
.cap
) + 1, ctrl
->ctrl
.sqsize
);
2417 ret
= nvme_enable_ctrl(&ctrl
->ctrl
, ctrl
->ctrl
.cap
);
2419 goto out_disconnect_admin_queue
;
2421 segs
= min_t(u32
, NVME_FC_MAX_SEGMENTS
,
2422 ctrl
->lport
->ops
->max_sgl_segments
);
2423 ctrl
->ctrl
.max_hw_sectors
= (segs
- 1) << (PAGE_SHIFT
- 9);
2425 ret
= nvme_init_identify(&ctrl
->ctrl
);
2427 goto out_disconnect_admin_queue
;
2431 /* FC-NVME does not have other data in the capsule */
2432 if (ctrl
->ctrl
.icdoff
) {
2433 dev_err(ctrl
->ctrl
.device
, "icdoff %d is not supported!\n",
2435 goto out_disconnect_admin_queue
;
2438 /* FC-NVME supports normal SGL Data Block Descriptors */
2440 if (opts
->queue_size
> ctrl
->ctrl
.maxcmd
) {
2441 /* warn if maxcmd is lower than queue_size */
2442 dev_warn(ctrl
->ctrl
.device
,
2443 "queue_size %zu > ctrl maxcmd %u, reducing "
2445 opts
->queue_size
, ctrl
->ctrl
.maxcmd
);
2446 opts
->queue_size
= ctrl
->ctrl
.maxcmd
;
2449 ret
= nvme_fc_init_aen_ops(ctrl
);
2451 goto out_term_aen_ops
;
2454 * Create the io queues
2457 if (ctrl
->ctrl
.queue_count
> 1) {
2458 if (ctrl
->ctrl
.state
== NVME_CTRL_NEW
)
2459 ret
= nvme_fc_create_io_queues(ctrl
);
2461 ret
= nvme_fc_reinit_io_queues(ctrl
);
2463 goto out_term_aen_ops
;
2466 changed
= nvme_change_ctrl_state(&ctrl
->ctrl
, NVME_CTRL_LIVE
);
2467 WARN_ON_ONCE(!changed
);
2469 ctrl
->ctrl
.nr_reconnects
= 0;
2471 nvme_start_ctrl(&ctrl
->ctrl
);
2473 return 0; /* Success */
2476 nvme_fc_term_aen_ops(ctrl
);
2477 out_disconnect_admin_queue
:
2478 /* send a Disconnect(association) LS to fc-nvme target */
2479 nvme_fc_xmt_disconnect_assoc(ctrl
);
2480 out_delete_hw_queue
:
2481 __nvme_fc_delete_hw_queue(ctrl
, &ctrl
->queues
[0], 0);
2483 nvme_fc_free_queue(&ctrl
->queues
[0]);
2489 * This routine stops operation of the controller on the host side.
2490 * On the host os stack side: Admin and IO queues are stopped,
2491 * outstanding ios on them terminated via FC ABTS.
2492 * On the link side: the association is terminated.
2495 nvme_fc_delete_association(struct nvme_fc_ctrl
*ctrl
)
2497 unsigned long flags
;
2499 spin_lock_irqsave(&ctrl
->lock
, flags
);
2500 ctrl
->flags
|= FCCTRL_TERMIO
;
2502 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2505 * If io queues are present, stop them and terminate all outstanding
2506 * ios on them. As FC allocates FC exchange for each io, the
2507 * transport must contact the LLDD to terminate the exchange,
2508 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr()
2509 * to tell us what io's are busy and invoke a transport routine
2510 * to kill them with the LLDD. After terminating the exchange
2511 * the LLDD will call the transport's normal io done path, but it
2512 * will have an aborted status. The done path will return the
2513 * io requests back to the block layer as part of normal completions
2514 * (but with error status).
2516 if (ctrl
->ctrl
.queue_count
> 1) {
2517 nvme_stop_queues(&ctrl
->ctrl
);
2518 blk_mq_tagset_busy_iter(&ctrl
->tag_set
,
2519 nvme_fc_terminate_exchange
, &ctrl
->ctrl
);
2523 * Other transports, which don't have link-level contexts bound
2524 * to sqe's, would try to gracefully shutdown the controller by
2525 * writing the registers for shutdown and polling (call
2526 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially
2527 * just aborted and we will wait on those contexts, and given
2528 * there was no indication of how live the controlelr is on the
2529 * link, don't send more io to create more contexts for the
2530 * shutdown. Let the controller fail via keepalive failure if
2531 * its still present.
2535 * clean up the admin queue. Same thing as above.
2536 * use blk_mq_tagset_busy_itr() and the transport routine to
2537 * terminate the exchanges.
2539 blk_mq_quiesce_queue(ctrl
->ctrl
.admin_q
);
2540 blk_mq_tagset_busy_iter(&ctrl
->admin_tag_set
,
2541 nvme_fc_terminate_exchange
, &ctrl
->ctrl
);
2543 /* kill the aens as they are a separate path */
2544 nvme_fc_abort_aen_ops(ctrl
);
2546 /* wait for all io that had to be aborted */
2547 spin_lock_irqsave(&ctrl
->lock
, flags
);
2548 wait_event_lock_irq(ctrl
->ioabort_wait
, ctrl
->iocnt
== 0, ctrl
->lock
);
2549 ctrl
->flags
&= ~FCCTRL_TERMIO
;
2550 spin_unlock_irqrestore(&ctrl
->lock
, flags
);
2552 nvme_fc_term_aen_ops(ctrl
);
2555 * send a Disconnect(association) LS to fc-nvme target
2556 * Note: could have been sent at top of process, but
2557 * cleaner on link traffic if after the aborts complete.
2558 * Note: if association doesn't exist, association_id will be 0
2560 if (ctrl
->association_id
)
2561 nvme_fc_xmt_disconnect_assoc(ctrl
);
2563 if (ctrl
->ctrl
.tagset
) {
2564 nvme_fc_delete_hw_io_queues(ctrl
);
2565 nvme_fc_free_io_queues(ctrl
);
2568 __nvme_fc_delete_hw_queue(ctrl
, &ctrl
->queues
[0], 0);
2569 nvme_fc_free_queue(&ctrl
->queues
[0]);
2573 nvme_fc_delete_ctrl_work(struct work_struct
*work
)
2575 struct nvme_fc_ctrl
*ctrl
=
2576 container_of(work
, struct nvme_fc_ctrl
, delete_work
);
2578 cancel_work_sync(&ctrl
->ctrl
.reset_work
);
2579 cancel_delayed_work_sync(&ctrl
->connect_work
);
2580 nvme_stop_ctrl(&ctrl
->ctrl
);
2581 nvme_remove_namespaces(&ctrl
->ctrl
);
2583 * kill the association on the link side. this will block
2584 * waiting for io to terminate
2586 nvme_fc_delete_association(ctrl
);
2589 * tear down the controller
2590 * After the last reference on the nvme ctrl is removed,
2591 * the transport nvme_fc_nvme_ctrl_freed() callback will be
2592 * invoked. From there, the transport will tear down it's
2593 * logical queues and association.
2595 nvme_uninit_ctrl(&ctrl
->ctrl
);
2597 nvme_put_ctrl(&ctrl
->ctrl
);
2601 __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl
*ctrl
)
2603 if (!nvme_change_ctrl_state(&ctrl
->ctrl
, NVME_CTRL_DELETING
))
2606 if (!queue_work(nvme_wq
, &ctrl
->delete_work
))
2613 __nvme_fc_del_ctrl(struct nvme_fc_ctrl
*ctrl
)
2615 return __nvme_fc_schedule_delete_work(ctrl
) ? -EBUSY
: 0;
2619 * Request from nvme core layer to delete the controller
2622 nvme_fc_del_nvme_ctrl(struct nvme_ctrl
*nctrl
)
2624 struct nvme_fc_ctrl
*ctrl
= to_fc_ctrl(nctrl
);
2627 if (!kref_get_unless_zero(&ctrl
->ctrl
.kref
))
2630 ret
= __nvme_fc_del_ctrl(ctrl
);
2633 flush_workqueue(nvme_wq
);
2635 nvme_put_ctrl(&ctrl
->ctrl
);
2641 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl
*ctrl
, int status
)
2643 /* If we are resetting/deleting then do nothing */
2644 if (ctrl
->ctrl
.state
!= NVME_CTRL_RECONNECTING
) {
2645 WARN_ON_ONCE(ctrl
->ctrl
.state
== NVME_CTRL_NEW
||
2646 ctrl
->ctrl
.state
== NVME_CTRL_LIVE
);
2650 dev_info(ctrl
->ctrl
.device
,
2651 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n",
2652 ctrl
->cnum
, status
);
2654 if (nvmf_should_reconnect(&ctrl
->ctrl
)) {
2655 dev_info(ctrl
->ctrl
.device
,
2656 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n",
2657 ctrl
->cnum
, ctrl
->ctrl
.opts
->reconnect_delay
);
2658 queue_delayed_work(nvme_wq
, &ctrl
->connect_work
,
2659 ctrl
->ctrl
.opts
->reconnect_delay
* HZ
);
2661 dev_warn(ctrl
->ctrl
.device
,
2662 "NVME-FC{%d}: Max reconnect attempts (%d) "
2663 "reached. Removing controller\n",
2664 ctrl
->cnum
, ctrl
->ctrl
.nr_reconnects
);
2665 WARN_ON(__nvme_fc_schedule_delete_work(ctrl
));
2670 nvme_fc_reset_ctrl_work(struct work_struct
*work
)
2672 struct nvme_fc_ctrl
*ctrl
=
2673 container_of(work
, struct nvme_fc_ctrl
, ctrl
.reset_work
);
2676 nvme_stop_ctrl(&ctrl
->ctrl
);
2677 /* will block will waiting for io to terminate */
2678 nvme_fc_delete_association(ctrl
);
2680 ret
= nvme_fc_create_association(ctrl
);
2682 nvme_fc_reconnect_or_delete(ctrl
, ret
);
2684 dev_info(ctrl
->ctrl
.device
,
2685 "NVME-FC{%d}: controller reset complete\n", ctrl
->cnum
);
2688 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops
= {
2690 .module
= THIS_MODULE
,
2691 .flags
= NVME_F_FABRICS
,
2692 .reg_read32
= nvmf_reg_read32
,
2693 .reg_read64
= nvmf_reg_read64
,
2694 .reg_write32
= nvmf_reg_write32
,
2695 .free_ctrl
= nvme_fc_nvme_ctrl_freed
,
2696 .submit_async_event
= nvme_fc_submit_async_event
,
2697 .delete_ctrl
= nvme_fc_del_nvme_ctrl
,
2698 .get_address
= nvmf_get_address
,
2702 nvme_fc_connect_ctrl_work(struct work_struct
*work
)
2706 struct nvme_fc_ctrl
*ctrl
=
2707 container_of(to_delayed_work(work
),
2708 struct nvme_fc_ctrl
, connect_work
);
2710 ret
= nvme_fc_create_association(ctrl
);
2712 nvme_fc_reconnect_or_delete(ctrl
, ret
);
2714 dev_info(ctrl
->ctrl
.device
,
2715 "NVME-FC{%d}: controller reconnect complete\n",
2720 static const struct blk_mq_ops nvme_fc_admin_mq_ops
= {
2721 .queue_rq
= nvme_fc_queue_rq
,
2722 .complete
= nvme_fc_complete_rq
,
2723 .init_request
= nvme_fc_init_request
,
2724 .exit_request
= nvme_fc_exit_request
,
2725 .init_hctx
= nvme_fc_init_admin_hctx
,
2726 .timeout
= nvme_fc_timeout
,
2730 static struct nvme_ctrl
*
2731 nvme_fc_init_ctrl(struct device
*dev
, struct nvmf_ctrl_options
*opts
,
2732 struct nvme_fc_lport
*lport
, struct nvme_fc_rport
*rport
)
2734 struct nvme_fc_ctrl
*ctrl
;
2735 unsigned long flags
;
2738 if (!(rport
->remoteport
.port_role
&
2739 (FC_PORT_ROLE_NVME_DISCOVERY
| FC_PORT_ROLE_NVME_TARGET
))) {
2744 ctrl
= kzalloc(sizeof(*ctrl
), GFP_KERNEL
);
2750 idx
= ida_simple_get(&nvme_fc_ctrl_cnt
, 0, 0, GFP_KERNEL
);
2756 ctrl
->ctrl
.opts
= opts
;
2757 INIT_LIST_HEAD(&ctrl
->ctrl_list
);
2758 ctrl
->lport
= lport
;
2759 ctrl
->rport
= rport
;
2760 ctrl
->dev
= lport
->dev
;
2763 get_device(ctrl
->dev
);
2764 kref_init(&ctrl
->ref
);
2766 INIT_WORK(&ctrl
->delete_work
, nvme_fc_delete_ctrl_work
);
2767 INIT_WORK(&ctrl
->ctrl
.reset_work
, nvme_fc_reset_ctrl_work
);
2768 INIT_DELAYED_WORK(&ctrl
->connect_work
, nvme_fc_connect_ctrl_work
);
2769 spin_lock_init(&ctrl
->lock
);
2771 /* io queue count */
2772 ctrl
->ctrl
.queue_count
= min_t(unsigned int,
2774 lport
->ops
->max_hw_queues
);
2775 ctrl
->ctrl
.queue_count
++; /* +1 for admin queue */
2777 ctrl
->ctrl
.sqsize
= opts
->queue_size
- 1;
2778 ctrl
->ctrl
.kato
= opts
->kato
;
2781 ctrl
->queues
= kcalloc(ctrl
->ctrl
.queue_count
,
2782 sizeof(struct nvme_fc_queue
), GFP_KERNEL
);
2786 memset(&ctrl
->admin_tag_set
, 0, sizeof(ctrl
->admin_tag_set
));
2787 ctrl
->admin_tag_set
.ops
= &nvme_fc_admin_mq_ops
;
2788 ctrl
->admin_tag_set
.queue_depth
= NVME_FC_AQ_BLKMQ_DEPTH
;
2789 ctrl
->admin_tag_set
.reserved_tags
= 2; /* fabric connect + Keep-Alive */
2790 ctrl
->admin_tag_set
.numa_node
= NUMA_NO_NODE
;
2791 ctrl
->admin_tag_set
.cmd_size
= sizeof(struct nvme_fc_fcp_op
) +
2793 sizeof(struct scatterlist
)) +
2794 ctrl
->lport
->ops
->fcprqst_priv_sz
;
2795 ctrl
->admin_tag_set
.driver_data
= ctrl
;
2796 ctrl
->admin_tag_set
.nr_hw_queues
= 1;
2797 ctrl
->admin_tag_set
.timeout
= ADMIN_TIMEOUT
;
2799 ret
= blk_mq_alloc_tag_set(&ctrl
->admin_tag_set
);
2801 goto out_free_queues
;
2802 ctrl
->ctrl
.admin_tagset
= &ctrl
->admin_tag_set
;
2804 ctrl
->ctrl
.admin_q
= blk_mq_init_queue(&ctrl
->admin_tag_set
);
2805 if (IS_ERR(ctrl
->ctrl
.admin_q
)) {
2806 ret
= PTR_ERR(ctrl
->ctrl
.admin_q
);
2807 goto out_free_admin_tag_set
;
2811 * Would have been nice to init io queues tag set as well.
2812 * However, we require interaction from the controller
2813 * for max io queue count before we can do so.
2814 * Defer this to the connect path.
2817 ret
= nvme_init_ctrl(&ctrl
->ctrl
, dev
, &nvme_fc_ctrl_ops
, 0);
2819 goto out_cleanup_admin_q
;
2821 /* at this point, teardown path changes to ref counting on nvme ctrl */
2823 spin_lock_irqsave(&rport
->lock
, flags
);
2824 list_add_tail(&ctrl
->ctrl_list
, &rport
->ctrl_list
);
2825 spin_unlock_irqrestore(&rport
->lock
, flags
);
2827 ret
= nvme_fc_create_association(ctrl
);
2829 ctrl
->ctrl
.opts
= NULL
;
2830 /* initiate nvme ctrl ref counting teardown */
2831 nvme_uninit_ctrl(&ctrl
->ctrl
);
2832 nvme_put_ctrl(&ctrl
->ctrl
);
2834 /* Remove core ctrl ref. */
2835 nvme_put_ctrl(&ctrl
->ctrl
);
2837 /* as we're past the point where we transition to the ref
2838 * counting teardown path, if we return a bad pointer here,
2839 * the calling routine, thinking it's prior to the
2840 * transition, will do an rport put. Since the teardown
2841 * path also does a rport put, we do an extra get here to
2842 * so proper order/teardown happens.
2844 nvme_fc_rport_get(rport
);
2848 return ERR_PTR(ret
);
2851 kref_get(&ctrl
->ctrl
.kref
);
2853 dev_info(ctrl
->ctrl
.device
,
2854 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
2855 ctrl
->cnum
, ctrl
->ctrl
.opts
->subsysnqn
);
2859 out_cleanup_admin_q
:
2860 blk_cleanup_queue(ctrl
->ctrl
.admin_q
);
2861 out_free_admin_tag_set
:
2862 blk_mq_free_tag_set(&ctrl
->admin_tag_set
);
2864 kfree(ctrl
->queues
);
2866 put_device(ctrl
->dev
);
2867 ida_simple_remove(&nvme_fc_ctrl_cnt
, ctrl
->cnum
);
2871 /* exit via here doesn't follow ctlr ref points */
2872 return ERR_PTR(ret
);
2876 struct nvmet_fc_traddr
{
2882 __nvme_fc_parse_u64(substring_t
*sstr
, u64
*val
)
2886 if (match_u64(sstr
, &token64
))
2894 * This routine validates and extracts the WWN's from the TRADDR string.
2895 * As kernel parsers need the 0x to determine number base, universally
2896 * build string to parse with 0x prefix before parsing name strings.
2899 nvme_fc_parse_traddr(struct nvmet_fc_traddr
*traddr
, char *buf
, size_t blen
)
2901 char name
[2 + NVME_FC_TRADDR_HEXNAMELEN
+ 1];
2902 substring_t wwn
= { name
, &name
[sizeof(name
)-1] };
2903 int nnoffset
, pnoffset
;
2905 /* validate it string one of the 2 allowed formats */
2906 if (strnlen(buf
, blen
) == NVME_FC_TRADDR_MAXLENGTH
&&
2907 !strncmp(buf
, "nn-0x", NVME_FC_TRADDR_OXNNLEN
) &&
2908 !strncmp(&buf
[NVME_FC_TRADDR_MAX_PN_OFFSET
],
2909 "pn-0x", NVME_FC_TRADDR_OXNNLEN
)) {
2910 nnoffset
= NVME_FC_TRADDR_OXNNLEN
;
2911 pnoffset
= NVME_FC_TRADDR_MAX_PN_OFFSET
+
2912 NVME_FC_TRADDR_OXNNLEN
;
2913 } else if ((strnlen(buf
, blen
) == NVME_FC_TRADDR_MINLENGTH
&&
2914 !strncmp(buf
, "nn-", NVME_FC_TRADDR_NNLEN
) &&
2915 !strncmp(&buf
[NVME_FC_TRADDR_MIN_PN_OFFSET
],
2916 "pn-", NVME_FC_TRADDR_NNLEN
))) {
2917 nnoffset
= NVME_FC_TRADDR_NNLEN
;
2918 pnoffset
= NVME_FC_TRADDR_MIN_PN_OFFSET
+ NVME_FC_TRADDR_NNLEN
;
2924 name
[2 + NVME_FC_TRADDR_HEXNAMELEN
] = 0;
2926 memcpy(&name
[2], &buf
[nnoffset
], NVME_FC_TRADDR_HEXNAMELEN
);
2927 if (__nvme_fc_parse_u64(&wwn
, &traddr
->nn
))
2930 memcpy(&name
[2], &buf
[pnoffset
], NVME_FC_TRADDR_HEXNAMELEN
);
2931 if (__nvme_fc_parse_u64(&wwn
, &traddr
->pn
))
2937 pr_warn("%s: bad traddr string\n", __func__
);
2941 static struct nvme_ctrl
*
2942 nvme_fc_create_ctrl(struct device
*dev
, struct nvmf_ctrl_options
*opts
)
2944 struct nvme_fc_lport
*lport
;
2945 struct nvme_fc_rport
*rport
;
2946 struct nvme_ctrl
*ctrl
;
2947 struct nvmet_fc_traddr laddr
= { 0L, 0L };
2948 struct nvmet_fc_traddr raddr
= { 0L, 0L };
2949 unsigned long flags
;
2952 ret
= nvme_fc_parse_traddr(&raddr
, opts
->traddr
, NVMF_TRADDR_SIZE
);
2953 if (ret
|| !raddr
.nn
|| !raddr
.pn
)
2954 return ERR_PTR(-EINVAL
);
2956 ret
= nvme_fc_parse_traddr(&laddr
, opts
->host_traddr
, NVMF_TRADDR_SIZE
);
2957 if (ret
|| !laddr
.nn
|| !laddr
.pn
)
2958 return ERR_PTR(-EINVAL
);
2960 /* find the host and remote ports to connect together */
2961 spin_lock_irqsave(&nvme_fc_lock
, flags
);
2962 list_for_each_entry(lport
, &nvme_fc_lport_list
, port_list
) {
2963 if (lport
->localport
.node_name
!= laddr
.nn
||
2964 lport
->localport
.port_name
!= laddr
.pn
)
2967 list_for_each_entry(rport
, &lport
->endp_list
, endp_list
) {
2968 if (rport
->remoteport
.node_name
!= raddr
.nn
||
2969 rport
->remoteport
.port_name
!= raddr
.pn
)
2972 /* if fail to get reference fall through. Will error */
2973 if (!nvme_fc_rport_get(rport
))
2976 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
2978 ctrl
= nvme_fc_init_ctrl(dev
, opts
, lport
, rport
);
2980 nvme_fc_rport_put(rport
);
2984 spin_unlock_irqrestore(&nvme_fc_lock
, flags
);
2986 return ERR_PTR(-ENOENT
);
2990 static struct nvmf_transport_ops nvme_fc_transport
= {
2992 .required_opts
= NVMF_OPT_TRADDR
| NVMF_OPT_HOST_TRADDR
,
2993 .allowed_opts
= NVMF_OPT_RECONNECT_DELAY
| NVMF_OPT_CTRL_LOSS_TMO
,
2994 .create_ctrl
= nvme_fc_create_ctrl
,
2997 static int __init
nvme_fc_init_module(void)
2999 return nvmf_register_transport(&nvme_fc_transport
);
3002 static void __exit
nvme_fc_exit_module(void)
3004 /* sanity check - all lports should be removed */
3005 if (!list_empty(&nvme_fc_lport_list
))
3006 pr_warn("%s: localport list not empty\n", __func__
);
3008 nvmf_unregister_transport(&nvme_fc_transport
);
3010 ida_destroy(&nvme_fc_local_port_cnt
);
3011 ida_destroy(&nvme_fc_ctrl_cnt
);
3014 module_init(nvme_fc_init_module
);
3015 module_exit(nvme_fc_exit_module
);
3017 MODULE_LICENSE("GPL v2");