2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
47 #include <linux/atomic.h>
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
54 #include <scsi/scsi_transport_srp.h>
58 #define DRV_NAME "ib_srp"
59 #define PFX DRV_NAME ": "
60 #define DRV_VERSION "2.0"
61 #define DRV_RELDATE "July 26, 2015"
63 MODULE_AUTHOR("Roland Dreier");
64 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
65 MODULE_LICENSE("Dual BSD/GPL");
66 MODULE_INFO(release_date
, DRV_RELDATE
);
68 #if !defined(CONFIG_DYNAMIC_DEBUG)
69 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
70 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
73 static unsigned int srp_sg_tablesize
;
74 static unsigned int cmd_sg_entries
;
75 static unsigned int indirect_sg_entries
;
76 static bool allow_ext_sg
;
77 static bool prefer_fr
= true;
78 static bool register_always
= true;
79 static bool never_register
;
80 static int topspin_workarounds
= 1;
82 module_param(srp_sg_tablesize
, uint
, 0444);
83 MODULE_PARM_DESC(srp_sg_tablesize
, "Deprecated name for cmd_sg_entries");
85 module_param(cmd_sg_entries
, uint
, 0444);
86 MODULE_PARM_DESC(cmd_sg_entries
,
87 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
89 module_param(indirect_sg_entries
, uint
, 0444);
90 MODULE_PARM_DESC(indirect_sg_entries
,
91 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS
) ")");
93 module_param(allow_ext_sg
, bool, 0444);
94 MODULE_PARM_DESC(allow_ext_sg
,
95 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
97 module_param(topspin_workarounds
, int, 0444);
98 MODULE_PARM_DESC(topspin_workarounds
,
99 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
101 module_param(prefer_fr
, bool, 0444);
102 MODULE_PARM_DESC(prefer_fr
,
103 "Whether to use fast registration if both FMR and fast registration are supported");
105 module_param(register_always
, bool, 0444);
106 MODULE_PARM_DESC(register_always
,
107 "Use memory registration even for contiguous memory regions");
109 module_param(never_register
, bool, 0444);
110 MODULE_PARM_DESC(never_register
, "Never register memory");
112 static const struct kernel_param_ops srp_tmo_ops
;
114 static int srp_reconnect_delay
= 10;
115 module_param_cb(reconnect_delay
, &srp_tmo_ops
, &srp_reconnect_delay
,
117 MODULE_PARM_DESC(reconnect_delay
, "Time between successive reconnect attempts");
119 static int srp_fast_io_fail_tmo
= 15;
120 module_param_cb(fast_io_fail_tmo
, &srp_tmo_ops
, &srp_fast_io_fail_tmo
,
122 MODULE_PARM_DESC(fast_io_fail_tmo
,
123 "Number of seconds between the observation of a transport"
124 " layer error and failing all I/O. \"off\" means that this"
125 " functionality is disabled.");
127 static int srp_dev_loss_tmo
= 600;
128 module_param_cb(dev_loss_tmo
, &srp_tmo_ops
, &srp_dev_loss_tmo
,
130 MODULE_PARM_DESC(dev_loss_tmo
,
131 "Maximum number of seconds that the SRP transport should"
132 " insulate transport layer errors. After this time has been"
133 " exceeded the SCSI host is removed. Should be"
134 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT
)
135 " if fast_io_fail_tmo has not been set. \"off\" means that"
136 " this functionality is disabled.");
138 static unsigned ch_count
;
139 module_param(ch_count
, uint
, 0444);
140 MODULE_PARM_DESC(ch_count
,
141 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
143 static void srp_add_one(struct ib_device
*device
);
144 static void srp_remove_one(struct ib_device
*device
, void *client_data
);
145 static void srp_recv_done(struct ib_cq
*cq
, struct ib_wc
*wc
);
146 static void srp_handle_qp_err(struct ib_cq
*cq
, struct ib_wc
*wc
,
148 static int srp_ib_cm_handler(struct ib_cm_id
*cm_id
, struct ib_cm_event
*event
);
149 static int srp_rdma_cm_handler(struct rdma_cm_id
*cm_id
,
150 struct rdma_cm_event
*event
);
152 static struct scsi_transport_template
*ib_srp_transport_template
;
153 static struct workqueue_struct
*srp_remove_wq
;
155 static struct ib_client srp_client
= {
158 .remove
= srp_remove_one
161 static struct ib_sa_client srp_sa_client
;
163 static int srp_tmo_get(char *buffer
, const struct kernel_param
*kp
)
165 int tmo
= *(int *)kp
->arg
;
168 return sprintf(buffer
, "%d", tmo
);
170 return sprintf(buffer
, "off");
173 static int srp_tmo_set(const char *val
, const struct kernel_param
*kp
)
177 res
= srp_parse_tmo(&tmo
, val
);
181 if (kp
->arg
== &srp_reconnect_delay
)
182 res
= srp_tmo_valid(tmo
, srp_fast_io_fail_tmo
,
184 else if (kp
->arg
== &srp_fast_io_fail_tmo
)
185 res
= srp_tmo_valid(srp_reconnect_delay
, tmo
, srp_dev_loss_tmo
);
187 res
= srp_tmo_valid(srp_reconnect_delay
, srp_fast_io_fail_tmo
,
191 *(int *)kp
->arg
= tmo
;
197 static const struct kernel_param_ops srp_tmo_ops
= {
202 static inline struct srp_target_port
*host_to_target(struct Scsi_Host
*host
)
204 return (struct srp_target_port
*) host
->hostdata
;
207 static const char *srp_target_info(struct Scsi_Host
*host
)
209 return host_to_target(host
)->target_name
;
212 static int srp_target_is_topspin(struct srp_target_port
*target
)
214 static const u8 topspin_oui
[3] = { 0x00, 0x05, 0xad };
215 static const u8 cisco_oui
[3] = { 0x00, 0x1b, 0x0d };
217 return topspin_workarounds
&&
218 (!memcmp(&target
->ioc_guid
, topspin_oui
, sizeof topspin_oui
) ||
219 !memcmp(&target
->ioc_guid
, cisco_oui
, sizeof cisco_oui
));
222 static struct srp_iu
*srp_alloc_iu(struct srp_host
*host
, size_t size
,
224 enum dma_data_direction direction
)
228 iu
= kmalloc(sizeof *iu
, gfp_mask
);
232 iu
->buf
= kzalloc(size
, gfp_mask
);
236 iu
->dma
= ib_dma_map_single(host
->srp_dev
->dev
, iu
->buf
, size
,
238 if (ib_dma_mapping_error(host
->srp_dev
->dev
, iu
->dma
))
242 iu
->direction
= direction
;
254 static void srp_free_iu(struct srp_host
*host
, struct srp_iu
*iu
)
259 ib_dma_unmap_single(host
->srp_dev
->dev
, iu
->dma
, iu
->size
,
265 static void srp_qp_event(struct ib_event
*event
, void *context
)
267 pr_debug("QP event %s (%d)\n",
268 ib_event_msg(event
->event
), event
->event
);
271 static int srp_init_ib_qp(struct srp_target_port
*target
,
274 struct ib_qp_attr
*attr
;
277 attr
= kmalloc(sizeof *attr
, GFP_KERNEL
);
281 ret
= ib_find_cached_pkey(target
->srp_host
->srp_dev
->dev
,
282 target
->srp_host
->port
,
283 be16_to_cpu(target
->ib_cm
.pkey
),
288 attr
->qp_state
= IB_QPS_INIT
;
289 attr
->qp_access_flags
= (IB_ACCESS_REMOTE_READ
|
290 IB_ACCESS_REMOTE_WRITE
);
291 attr
->port_num
= target
->srp_host
->port
;
293 ret
= ib_modify_qp(qp
, attr
,
304 static int srp_new_ib_cm_id(struct srp_rdma_ch
*ch
)
306 struct srp_target_port
*target
= ch
->target
;
307 struct ib_cm_id
*new_cm_id
;
309 new_cm_id
= ib_create_cm_id(target
->srp_host
->srp_dev
->dev
,
310 srp_ib_cm_handler
, ch
);
311 if (IS_ERR(new_cm_id
))
312 return PTR_ERR(new_cm_id
);
315 ib_destroy_cm_id(ch
->ib_cm
.cm_id
);
316 ch
->ib_cm
.cm_id
= new_cm_id
;
317 if (rdma_cap_opa_ah(target
->srp_host
->srp_dev
->dev
,
318 target
->srp_host
->port
))
319 ch
->ib_cm
.path
.rec_type
= SA_PATH_REC_TYPE_OPA
;
321 ch
->ib_cm
.path
.rec_type
= SA_PATH_REC_TYPE_IB
;
322 ch
->ib_cm
.path
.sgid
= target
->sgid
;
323 ch
->ib_cm
.path
.dgid
= target
->ib_cm
.orig_dgid
;
324 ch
->ib_cm
.path
.pkey
= target
->ib_cm
.pkey
;
325 ch
->ib_cm
.path
.service_id
= target
->ib_cm
.service_id
;
330 static int srp_new_rdma_cm_id(struct srp_rdma_ch
*ch
)
332 struct srp_target_port
*target
= ch
->target
;
333 struct rdma_cm_id
*new_cm_id
;
336 new_cm_id
= rdma_create_id(target
->net
, srp_rdma_cm_handler
, ch
,
337 RDMA_PS_TCP
, IB_QPT_RC
);
338 if (IS_ERR(new_cm_id
)) {
339 ret
= PTR_ERR(new_cm_id
);
344 init_completion(&ch
->done
);
345 ret
= rdma_resolve_addr(new_cm_id
, target
->rdma_cm
.src_specified
?
346 (struct sockaddr
*)&target
->rdma_cm
.src
: NULL
,
347 (struct sockaddr
*)&target
->rdma_cm
.dst
,
348 SRP_PATH_REC_TIMEOUT_MS
);
350 pr_err("No route available from %pIS to %pIS (%d)\n",
351 &target
->rdma_cm
.src
, &target
->rdma_cm
.dst
, ret
);
354 ret
= wait_for_completion_interruptible(&ch
->done
);
360 pr_err("Resolving address %pIS failed (%d)\n",
361 &target
->rdma_cm
.dst
, ret
);
365 swap(ch
->rdma_cm
.cm_id
, new_cm_id
);
369 rdma_destroy_id(new_cm_id
);
374 static int srp_new_cm_id(struct srp_rdma_ch
*ch
)
376 struct srp_target_port
*target
= ch
->target
;
378 return target
->using_rdma_cm
? srp_new_rdma_cm_id(ch
) :
379 srp_new_ib_cm_id(ch
);
382 static struct ib_fmr_pool
*srp_alloc_fmr_pool(struct srp_target_port
*target
)
384 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
385 struct ib_fmr_pool_param fmr_param
;
387 memset(&fmr_param
, 0, sizeof(fmr_param
));
388 fmr_param
.pool_size
= target
->mr_pool_size
;
389 fmr_param
.dirty_watermark
= fmr_param
.pool_size
/ 4;
391 fmr_param
.max_pages_per_fmr
= dev
->max_pages_per_mr
;
392 fmr_param
.page_shift
= ilog2(dev
->mr_page_size
);
393 fmr_param
.access
= (IB_ACCESS_LOCAL_WRITE
|
394 IB_ACCESS_REMOTE_WRITE
|
395 IB_ACCESS_REMOTE_READ
);
397 return ib_create_fmr_pool(dev
->pd
, &fmr_param
);
401 * srp_destroy_fr_pool() - free the resources owned by a pool
402 * @pool: Fast registration pool to be destroyed.
404 static void srp_destroy_fr_pool(struct srp_fr_pool
*pool
)
407 struct srp_fr_desc
*d
;
412 for (i
= 0, d
= &pool
->desc
[0]; i
< pool
->size
; i
++, d
++) {
420 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
421 * @device: IB device to allocate fast registration descriptors for.
422 * @pd: Protection domain associated with the FR descriptors.
423 * @pool_size: Number of descriptors to allocate.
424 * @max_page_list_len: Maximum fast registration work request page list length.
426 static struct srp_fr_pool
*srp_create_fr_pool(struct ib_device
*device
,
427 struct ib_pd
*pd
, int pool_size
,
428 int max_page_list_len
)
430 struct srp_fr_pool
*pool
;
431 struct srp_fr_desc
*d
;
433 int i
, ret
= -EINVAL
;
434 enum ib_mr_type mr_type
;
439 pool
= kzalloc(sizeof(struct srp_fr_pool
) +
440 pool_size
* sizeof(struct srp_fr_desc
), GFP_KERNEL
);
443 pool
->size
= pool_size
;
444 pool
->max_page_list_len
= max_page_list_len
;
445 spin_lock_init(&pool
->lock
);
446 INIT_LIST_HEAD(&pool
->free_list
);
448 if (device
->attrs
.device_cap_flags
& IB_DEVICE_SG_GAPS_REG
)
449 mr_type
= IB_MR_TYPE_SG_GAPS
;
451 mr_type
= IB_MR_TYPE_MEM_REG
;
453 for (i
= 0, d
= &pool
->desc
[0]; i
< pool
->size
; i
++, d
++) {
454 mr
= ib_alloc_mr(pd
, mr_type
, max_page_list_len
);
458 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
459 dev_name(&device
->dev
));
463 list_add_tail(&d
->entry
, &pool
->free_list
);
470 srp_destroy_fr_pool(pool
);
478 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
479 * @pool: Pool to obtain descriptor from.
481 static struct srp_fr_desc
*srp_fr_pool_get(struct srp_fr_pool
*pool
)
483 struct srp_fr_desc
*d
= NULL
;
486 spin_lock_irqsave(&pool
->lock
, flags
);
487 if (!list_empty(&pool
->free_list
)) {
488 d
= list_first_entry(&pool
->free_list
, typeof(*d
), entry
);
491 spin_unlock_irqrestore(&pool
->lock
, flags
);
497 * srp_fr_pool_put() - put an FR descriptor back in the free list
498 * @pool: Pool the descriptor was allocated from.
499 * @desc: Pointer to an array of fast registration descriptor pointers.
500 * @n: Number of descriptors to put back.
502 * Note: The caller must already have queued an invalidation request for
503 * desc->mr->rkey before calling this function.
505 static void srp_fr_pool_put(struct srp_fr_pool
*pool
, struct srp_fr_desc
**desc
,
511 spin_lock_irqsave(&pool
->lock
, flags
);
512 for (i
= 0; i
< n
; i
++)
513 list_add(&desc
[i
]->entry
, &pool
->free_list
);
514 spin_unlock_irqrestore(&pool
->lock
, flags
);
517 static struct srp_fr_pool
*srp_alloc_fr_pool(struct srp_target_port
*target
)
519 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
521 return srp_create_fr_pool(dev
->dev
, dev
->pd
, target
->mr_pool_size
,
522 dev
->max_pages_per_mr
);
526 * srp_destroy_qp() - destroy an RDMA queue pair
527 * @ch: SRP RDMA channel.
529 * Drain the qp before destroying it. This avoids that the receive
530 * completion handler can access the queue pair while it is
533 static void srp_destroy_qp(struct srp_rdma_ch
*ch
)
535 spin_lock_irq(&ch
->lock
);
536 ib_process_cq_direct(ch
->send_cq
, -1);
537 spin_unlock_irq(&ch
->lock
);
540 ib_destroy_qp(ch
->qp
);
543 static int srp_create_ch_ib(struct srp_rdma_ch
*ch
)
545 struct srp_target_port
*target
= ch
->target
;
546 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
547 struct ib_qp_init_attr
*init_attr
;
548 struct ib_cq
*recv_cq
, *send_cq
;
550 struct ib_fmr_pool
*fmr_pool
= NULL
;
551 struct srp_fr_pool
*fr_pool
= NULL
;
552 const int m
= 1 + dev
->use_fast_reg
* target
->mr_per_cmd
* 2;
555 init_attr
= kzalloc(sizeof *init_attr
, GFP_KERNEL
);
559 /* queue_size + 1 for ib_drain_rq() */
560 recv_cq
= ib_alloc_cq(dev
->dev
, ch
, target
->queue_size
+ 1,
561 ch
->comp_vector
, IB_POLL_SOFTIRQ
);
562 if (IS_ERR(recv_cq
)) {
563 ret
= PTR_ERR(recv_cq
);
567 send_cq
= ib_alloc_cq(dev
->dev
, ch
, m
* target
->queue_size
,
568 ch
->comp_vector
, IB_POLL_DIRECT
);
569 if (IS_ERR(send_cq
)) {
570 ret
= PTR_ERR(send_cq
);
574 init_attr
->event_handler
= srp_qp_event
;
575 init_attr
->cap
.max_send_wr
= m
* target
->queue_size
;
576 init_attr
->cap
.max_recv_wr
= target
->queue_size
+ 1;
577 init_attr
->cap
.max_recv_sge
= 1;
578 init_attr
->cap
.max_send_sge
= 1;
579 init_attr
->sq_sig_type
= IB_SIGNAL_REQ_WR
;
580 init_attr
->qp_type
= IB_QPT_RC
;
581 init_attr
->send_cq
= send_cq
;
582 init_attr
->recv_cq
= recv_cq
;
584 if (target
->using_rdma_cm
) {
585 ret
= rdma_create_qp(ch
->rdma_cm
.cm_id
, dev
->pd
, init_attr
);
586 qp
= ch
->rdma_cm
.cm_id
->qp
;
588 qp
= ib_create_qp(dev
->pd
, init_attr
);
590 ret
= srp_init_ib_qp(target
, qp
);
598 pr_err("QP creation failed for dev %s: %d\n",
599 dev_name(&dev
->dev
->dev
), ret
);
603 if (dev
->use_fast_reg
) {
604 fr_pool
= srp_alloc_fr_pool(target
);
605 if (IS_ERR(fr_pool
)) {
606 ret
= PTR_ERR(fr_pool
);
607 shost_printk(KERN_WARNING
, target
->scsi_host
, PFX
608 "FR pool allocation failed (%d)\n", ret
);
611 } else if (dev
->use_fmr
) {
612 fmr_pool
= srp_alloc_fmr_pool(target
);
613 if (IS_ERR(fmr_pool
)) {
614 ret
= PTR_ERR(fmr_pool
);
615 shost_printk(KERN_WARNING
, target
->scsi_host
, PFX
616 "FMR pool allocation failed (%d)\n", ret
);
624 ib_free_cq(ch
->recv_cq
);
626 ib_free_cq(ch
->send_cq
);
629 ch
->recv_cq
= recv_cq
;
630 ch
->send_cq
= send_cq
;
632 if (dev
->use_fast_reg
) {
634 srp_destroy_fr_pool(ch
->fr_pool
);
635 ch
->fr_pool
= fr_pool
;
636 } else if (dev
->use_fmr
) {
638 ib_destroy_fmr_pool(ch
->fmr_pool
);
639 ch
->fmr_pool
= fmr_pool
;
646 if (target
->using_rdma_cm
)
647 rdma_destroy_qp(ch
->rdma_cm
.cm_id
);
663 * Note: this function may be called without srp_alloc_iu_bufs() having been
664 * invoked. Hence the ch->[rt]x_ring checks.
666 static void srp_free_ch_ib(struct srp_target_port
*target
,
667 struct srp_rdma_ch
*ch
)
669 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
675 if (target
->using_rdma_cm
) {
676 if (ch
->rdma_cm
.cm_id
) {
677 rdma_destroy_id(ch
->rdma_cm
.cm_id
);
678 ch
->rdma_cm
.cm_id
= NULL
;
681 if (ch
->ib_cm
.cm_id
) {
682 ib_destroy_cm_id(ch
->ib_cm
.cm_id
);
683 ch
->ib_cm
.cm_id
= NULL
;
687 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
691 if (dev
->use_fast_reg
) {
693 srp_destroy_fr_pool(ch
->fr_pool
);
694 } else if (dev
->use_fmr
) {
696 ib_destroy_fmr_pool(ch
->fmr_pool
);
700 ib_free_cq(ch
->send_cq
);
701 ib_free_cq(ch
->recv_cq
);
704 * Avoid that the SCSI error handler tries to use this channel after
705 * it has been freed. The SCSI error handler can namely continue
706 * trying to perform recovery actions after scsi_remove_host()
712 ch
->send_cq
= ch
->recv_cq
= NULL
;
715 for (i
= 0; i
< target
->queue_size
; ++i
)
716 srp_free_iu(target
->srp_host
, ch
->rx_ring
[i
]);
721 for (i
= 0; i
< target
->queue_size
; ++i
)
722 srp_free_iu(target
->srp_host
, ch
->tx_ring
[i
]);
728 static void srp_path_rec_completion(int status
,
729 struct sa_path_rec
*pathrec
,
732 struct srp_rdma_ch
*ch
= ch_ptr
;
733 struct srp_target_port
*target
= ch
->target
;
737 shost_printk(KERN_ERR
, target
->scsi_host
,
738 PFX
"Got failed path rec status %d\n", status
);
740 ch
->ib_cm
.path
= *pathrec
;
744 static int srp_ib_lookup_path(struct srp_rdma_ch
*ch
)
746 struct srp_target_port
*target
= ch
->target
;
749 ch
->ib_cm
.path
.numb_path
= 1;
751 init_completion(&ch
->done
);
753 ch
->ib_cm
.path_query_id
= ib_sa_path_rec_get(&srp_sa_client
,
754 target
->srp_host
->srp_dev
->dev
,
755 target
->srp_host
->port
,
757 IB_SA_PATH_REC_SERVICE_ID
|
758 IB_SA_PATH_REC_DGID
|
759 IB_SA_PATH_REC_SGID
|
760 IB_SA_PATH_REC_NUMB_PATH
|
762 SRP_PATH_REC_TIMEOUT_MS
,
764 srp_path_rec_completion
,
765 ch
, &ch
->ib_cm
.path_query
);
766 if (ch
->ib_cm
.path_query_id
< 0)
767 return ch
->ib_cm
.path_query_id
;
769 ret
= wait_for_completion_interruptible(&ch
->done
);
774 shost_printk(KERN_WARNING
, target
->scsi_host
,
775 PFX
"Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
776 ch
->ib_cm
.path
.sgid
.raw
, ch
->ib_cm
.path
.dgid
.raw
,
777 be16_to_cpu(target
->ib_cm
.pkey
),
778 be64_to_cpu(target
->ib_cm
.service_id
));
783 static int srp_rdma_lookup_path(struct srp_rdma_ch
*ch
)
785 struct srp_target_port
*target
= ch
->target
;
788 init_completion(&ch
->done
);
790 ret
= rdma_resolve_route(ch
->rdma_cm
.cm_id
, SRP_PATH_REC_TIMEOUT_MS
);
794 wait_for_completion_interruptible(&ch
->done
);
797 shost_printk(KERN_WARNING
, target
->scsi_host
,
798 PFX
"Path resolution failed\n");
803 static int srp_lookup_path(struct srp_rdma_ch
*ch
)
805 struct srp_target_port
*target
= ch
->target
;
807 return target
->using_rdma_cm
? srp_rdma_lookup_path(ch
) :
808 srp_ib_lookup_path(ch
);
811 static u8
srp_get_subnet_timeout(struct srp_host
*host
)
813 struct ib_port_attr attr
;
815 u8 subnet_timeout
= 18;
817 ret
= ib_query_port(host
->srp_dev
->dev
, host
->port
, &attr
);
819 subnet_timeout
= attr
.subnet_timeout
;
821 if (unlikely(subnet_timeout
< 15))
822 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
823 dev_name(&host
->srp_dev
->dev
->dev
), subnet_timeout
);
825 return subnet_timeout
;
828 static int srp_send_req(struct srp_rdma_ch
*ch
, bool multich
)
830 struct srp_target_port
*target
= ch
->target
;
832 struct rdma_conn_param rdma_param
;
833 struct srp_login_req_rdma rdma_req
;
834 struct ib_cm_req_param ib_param
;
835 struct srp_login_req ib_req
;
840 req
= kzalloc(sizeof *req
, GFP_KERNEL
);
844 req
->ib_param
.flow_control
= 1;
845 req
->ib_param
.retry_count
= target
->tl_retry_count
;
848 * Pick some arbitrary defaults here; we could make these
849 * module parameters if anyone cared about setting them.
851 req
->ib_param
.responder_resources
= 4;
852 req
->ib_param
.rnr_retry_count
= 7;
853 req
->ib_param
.max_cm_retries
= 15;
855 req
->ib_req
.opcode
= SRP_LOGIN_REQ
;
857 req
->ib_req
.req_it_iu_len
= cpu_to_be32(target
->max_iu_len
);
858 req
->ib_req
.req_buf_fmt
= cpu_to_be16(SRP_BUF_FORMAT_DIRECT
|
859 SRP_BUF_FORMAT_INDIRECT
);
860 req
->ib_req
.req_flags
= (multich
? SRP_MULTICHAN_MULTI
:
861 SRP_MULTICHAN_SINGLE
);
863 if (target
->using_rdma_cm
) {
864 req
->rdma_param
.flow_control
= req
->ib_param
.flow_control
;
865 req
->rdma_param
.responder_resources
=
866 req
->ib_param
.responder_resources
;
867 req
->rdma_param
.initiator_depth
= req
->ib_param
.initiator_depth
;
868 req
->rdma_param
.retry_count
= req
->ib_param
.retry_count
;
869 req
->rdma_param
.rnr_retry_count
= req
->ib_param
.rnr_retry_count
;
870 req
->rdma_param
.private_data
= &req
->rdma_req
;
871 req
->rdma_param
.private_data_len
= sizeof(req
->rdma_req
);
873 req
->rdma_req
.opcode
= req
->ib_req
.opcode
;
874 req
->rdma_req
.tag
= req
->ib_req
.tag
;
875 req
->rdma_req
.req_it_iu_len
= req
->ib_req
.req_it_iu_len
;
876 req
->rdma_req
.req_buf_fmt
= req
->ib_req
.req_buf_fmt
;
877 req
->rdma_req
.req_flags
= req
->ib_req
.req_flags
;
879 ipi
= req
->rdma_req
.initiator_port_id
;
880 tpi
= req
->rdma_req
.target_port_id
;
884 subnet_timeout
= srp_get_subnet_timeout(target
->srp_host
);
886 req
->ib_param
.primary_path
= &ch
->ib_cm
.path
;
887 req
->ib_param
.alternate_path
= NULL
;
888 req
->ib_param
.service_id
= target
->ib_cm
.service_id
;
889 get_random_bytes(&req
->ib_param
.starting_psn
, 4);
890 req
->ib_param
.starting_psn
&= 0xffffff;
891 req
->ib_param
.qp_num
= ch
->qp
->qp_num
;
892 req
->ib_param
.qp_type
= ch
->qp
->qp_type
;
893 req
->ib_param
.local_cm_response_timeout
= subnet_timeout
+ 2;
894 req
->ib_param
.remote_cm_response_timeout
= subnet_timeout
+ 2;
895 req
->ib_param
.private_data
= &req
->ib_req
;
896 req
->ib_param
.private_data_len
= sizeof(req
->ib_req
);
898 ipi
= req
->ib_req
.initiator_port_id
;
899 tpi
= req
->ib_req
.target_port_id
;
903 * In the published SRP specification (draft rev. 16a), the
904 * port identifier format is 8 bytes of ID extension followed
905 * by 8 bytes of GUID. Older drafts put the two halves in the
906 * opposite order, so that the GUID comes first.
908 * Targets conforming to these obsolete drafts can be
909 * recognized by the I/O Class they report.
911 if (target
->io_class
== SRP_REV10_IB_IO_CLASS
) {
912 memcpy(ipi
, &target
->sgid
.global
.interface_id
, 8);
913 memcpy(ipi
+ 8, &target
->initiator_ext
, 8);
914 memcpy(tpi
, &target
->ioc_guid
, 8);
915 memcpy(tpi
+ 8, &target
->id_ext
, 8);
917 memcpy(ipi
, &target
->initiator_ext
, 8);
918 memcpy(ipi
+ 8, &target
->sgid
.global
.interface_id
, 8);
919 memcpy(tpi
, &target
->id_ext
, 8);
920 memcpy(tpi
+ 8, &target
->ioc_guid
, 8);
924 * Topspin/Cisco SRP targets will reject our login unless we
925 * zero out the first 8 bytes of our initiator port ID and set
926 * the second 8 bytes to the local node GUID.
928 if (srp_target_is_topspin(target
)) {
929 shost_printk(KERN_DEBUG
, target
->scsi_host
,
930 PFX
"Topspin/Cisco initiator port ID workaround "
931 "activated for target GUID %016llx\n",
932 be64_to_cpu(target
->ioc_guid
));
934 memcpy(ipi
+ 8, &target
->srp_host
->srp_dev
->dev
->node_guid
, 8);
937 if (target
->using_rdma_cm
)
938 status
= rdma_connect(ch
->rdma_cm
.cm_id
, &req
->rdma_param
);
940 status
= ib_send_cm_req(ch
->ib_cm
.cm_id
, &req
->ib_param
);
947 static bool srp_queue_remove_work(struct srp_target_port
*target
)
949 bool changed
= false;
951 spin_lock_irq(&target
->lock
);
952 if (target
->state
!= SRP_TARGET_REMOVED
) {
953 target
->state
= SRP_TARGET_REMOVED
;
956 spin_unlock_irq(&target
->lock
);
959 queue_work(srp_remove_wq
, &target
->remove_work
);
964 static void srp_disconnect_target(struct srp_target_port
*target
)
966 struct srp_rdma_ch
*ch
;
969 /* XXX should send SRP_I_LOGOUT request */
971 for (i
= 0; i
< target
->ch_count
; i
++) {
973 ch
->connected
= false;
975 if (target
->using_rdma_cm
) {
976 if (ch
->rdma_cm
.cm_id
)
977 rdma_disconnect(ch
->rdma_cm
.cm_id
);
980 ret
= ib_send_cm_dreq(ch
->ib_cm
.cm_id
,
984 shost_printk(KERN_DEBUG
, target
->scsi_host
,
985 PFX
"Sending CM DREQ failed\n");
990 static void srp_free_req_data(struct srp_target_port
*target
,
991 struct srp_rdma_ch
*ch
)
993 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
994 struct ib_device
*ibdev
= dev
->dev
;
995 struct srp_request
*req
;
1001 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
1002 req
= &ch
->req_ring
[i
];
1003 if (dev
->use_fast_reg
) {
1004 kfree(req
->fr_list
);
1006 kfree(req
->fmr_list
);
1007 kfree(req
->map_page
);
1009 if (req
->indirect_dma_addr
) {
1010 ib_dma_unmap_single(ibdev
, req
->indirect_dma_addr
,
1011 target
->indirect_size
,
1014 kfree(req
->indirect_desc
);
1017 kfree(ch
->req_ring
);
1018 ch
->req_ring
= NULL
;
1021 static int srp_alloc_req_data(struct srp_rdma_ch
*ch
)
1023 struct srp_target_port
*target
= ch
->target
;
1024 struct srp_device
*srp_dev
= target
->srp_host
->srp_dev
;
1025 struct ib_device
*ibdev
= srp_dev
->dev
;
1026 struct srp_request
*req
;
1028 dma_addr_t dma_addr
;
1029 int i
, ret
= -ENOMEM
;
1031 ch
->req_ring
= kcalloc(target
->req_ring_size
, sizeof(*ch
->req_ring
),
1036 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
1037 req
= &ch
->req_ring
[i
];
1038 mr_list
= kmalloc_array(target
->mr_per_cmd
, sizeof(void *),
1042 if (srp_dev
->use_fast_reg
) {
1043 req
->fr_list
= mr_list
;
1045 req
->fmr_list
= mr_list
;
1046 req
->map_page
= kmalloc_array(srp_dev
->max_pages_per_mr
,
1052 req
->indirect_desc
= kmalloc(target
->indirect_size
, GFP_KERNEL
);
1053 if (!req
->indirect_desc
)
1056 dma_addr
= ib_dma_map_single(ibdev
, req
->indirect_desc
,
1057 target
->indirect_size
,
1059 if (ib_dma_mapping_error(ibdev
, dma_addr
))
1062 req
->indirect_dma_addr
= dma_addr
;
1071 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1072 * @shost: SCSI host whose attributes to remove from sysfs.
1074 * Note: Any attributes defined in the host template and that did not exist
1075 * before invocation of this function will be ignored.
1077 static void srp_del_scsi_host_attr(struct Scsi_Host
*shost
)
1079 struct device_attribute
**attr
;
1081 for (attr
= shost
->hostt
->shost_attrs
; attr
&& *attr
; ++attr
)
1082 device_remove_file(&shost
->shost_dev
, *attr
);
1085 static void srp_remove_target(struct srp_target_port
*target
)
1087 struct srp_rdma_ch
*ch
;
1090 WARN_ON_ONCE(target
->state
!= SRP_TARGET_REMOVED
);
1092 srp_del_scsi_host_attr(target
->scsi_host
);
1093 srp_rport_get(target
->rport
);
1094 srp_remove_host(target
->scsi_host
);
1095 scsi_remove_host(target
->scsi_host
);
1096 srp_stop_rport_timers(target
->rport
);
1097 srp_disconnect_target(target
);
1098 kobj_ns_drop(KOBJ_NS_TYPE_NET
, target
->net
);
1099 for (i
= 0; i
< target
->ch_count
; i
++) {
1100 ch
= &target
->ch
[i
];
1101 srp_free_ch_ib(target
, ch
);
1103 cancel_work_sync(&target
->tl_err_work
);
1104 srp_rport_put(target
->rport
);
1105 for (i
= 0; i
< target
->ch_count
; i
++) {
1106 ch
= &target
->ch
[i
];
1107 srp_free_req_data(target
, ch
);
1112 spin_lock(&target
->srp_host
->target_lock
);
1113 list_del(&target
->list
);
1114 spin_unlock(&target
->srp_host
->target_lock
);
1116 scsi_host_put(target
->scsi_host
);
1119 static void srp_remove_work(struct work_struct
*work
)
1121 struct srp_target_port
*target
=
1122 container_of(work
, struct srp_target_port
, remove_work
);
1124 WARN_ON_ONCE(target
->state
!= SRP_TARGET_REMOVED
);
1126 srp_remove_target(target
);
1129 static void srp_rport_delete(struct srp_rport
*rport
)
1131 struct srp_target_port
*target
= rport
->lld_data
;
1133 srp_queue_remove_work(target
);
1137 * srp_connected_ch() - number of connected channels
1138 * @target: SRP target port.
1140 static int srp_connected_ch(struct srp_target_port
*target
)
1144 for (i
= 0; i
< target
->ch_count
; i
++)
1145 c
+= target
->ch
[i
].connected
;
1150 static int srp_connect_ch(struct srp_rdma_ch
*ch
, bool multich
)
1152 struct srp_target_port
*target
= ch
->target
;
1155 WARN_ON_ONCE(!multich
&& srp_connected_ch(target
) > 0);
1157 ret
= srp_lookup_path(ch
);
1162 init_completion(&ch
->done
);
1163 ret
= srp_send_req(ch
, multich
);
1166 ret
= wait_for_completion_interruptible(&ch
->done
);
1171 * The CM event handling code will set status to
1172 * SRP_PORT_REDIRECT if we get a port redirect REJ
1173 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1174 * redirect REJ back.
1179 ch
->connected
= true;
1182 case SRP_PORT_REDIRECT
:
1183 ret
= srp_lookup_path(ch
);
1188 case SRP_DLID_REDIRECT
:
1191 case SRP_STALE_CONN
:
1192 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
1193 "giving up on stale connection\n");
1203 return ret
<= 0 ? ret
: -ENODEV
;
1206 static void srp_inv_rkey_err_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1208 srp_handle_qp_err(cq
, wc
, "INV RKEY");
1211 static int srp_inv_rkey(struct srp_request
*req
, struct srp_rdma_ch
*ch
,
1214 struct ib_send_wr
*bad_wr
;
1215 struct ib_send_wr wr
= {
1216 .opcode
= IB_WR_LOCAL_INV
,
1220 .ex
.invalidate_rkey
= rkey
,
1223 wr
.wr_cqe
= &req
->reg_cqe
;
1224 req
->reg_cqe
.done
= srp_inv_rkey_err_done
;
1225 return ib_post_send(ch
->qp
, &wr
, &bad_wr
);
1228 static void srp_unmap_data(struct scsi_cmnd
*scmnd
,
1229 struct srp_rdma_ch
*ch
,
1230 struct srp_request
*req
)
1232 struct srp_target_port
*target
= ch
->target
;
1233 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1234 struct ib_device
*ibdev
= dev
->dev
;
1237 if (!scsi_sglist(scmnd
) ||
1238 (scmnd
->sc_data_direction
!= DMA_TO_DEVICE
&&
1239 scmnd
->sc_data_direction
!= DMA_FROM_DEVICE
))
1242 if (dev
->use_fast_reg
) {
1243 struct srp_fr_desc
**pfr
;
1245 for (i
= req
->nmdesc
, pfr
= req
->fr_list
; i
> 0; i
--, pfr
++) {
1246 res
= srp_inv_rkey(req
, ch
, (*pfr
)->mr
->rkey
);
1248 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
1249 "Queueing INV WR for rkey %#x failed (%d)\n",
1250 (*pfr
)->mr
->rkey
, res
);
1251 queue_work(system_long_wq
,
1252 &target
->tl_err_work
);
1256 srp_fr_pool_put(ch
->fr_pool
, req
->fr_list
,
1258 } else if (dev
->use_fmr
) {
1259 struct ib_pool_fmr
**pfmr
;
1261 for (i
= req
->nmdesc
, pfmr
= req
->fmr_list
; i
> 0; i
--, pfmr
++)
1262 ib_fmr_pool_unmap(*pfmr
);
1265 ib_dma_unmap_sg(ibdev
, scsi_sglist(scmnd
), scsi_sg_count(scmnd
),
1266 scmnd
->sc_data_direction
);
1270 * srp_claim_req - Take ownership of the scmnd associated with a request.
1271 * @ch: SRP RDMA channel.
1272 * @req: SRP request.
1273 * @sdev: If not NULL, only take ownership for this SCSI device.
1274 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1275 * ownership of @req->scmnd if it equals @scmnd.
1278 * Either NULL or a pointer to the SCSI command the caller became owner of.
1280 static struct scsi_cmnd
*srp_claim_req(struct srp_rdma_ch
*ch
,
1281 struct srp_request
*req
,
1282 struct scsi_device
*sdev
,
1283 struct scsi_cmnd
*scmnd
)
1285 unsigned long flags
;
1287 spin_lock_irqsave(&ch
->lock
, flags
);
1289 (!sdev
|| req
->scmnd
->device
== sdev
) &&
1290 (!scmnd
|| req
->scmnd
== scmnd
)) {
1296 spin_unlock_irqrestore(&ch
->lock
, flags
);
1302 * srp_free_req() - Unmap data and adjust ch->req_lim.
1303 * @ch: SRP RDMA channel.
1304 * @req: Request to be freed.
1305 * @scmnd: SCSI command associated with @req.
1306 * @req_lim_delta: Amount to be added to @target->req_lim.
1308 static void srp_free_req(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1309 struct scsi_cmnd
*scmnd
, s32 req_lim_delta
)
1311 unsigned long flags
;
1313 srp_unmap_data(scmnd
, ch
, req
);
1315 spin_lock_irqsave(&ch
->lock
, flags
);
1316 ch
->req_lim
+= req_lim_delta
;
1317 spin_unlock_irqrestore(&ch
->lock
, flags
);
1320 static void srp_finish_req(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1321 struct scsi_device
*sdev
, int result
)
1323 struct scsi_cmnd
*scmnd
= srp_claim_req(ch
, req
, sdev
, NULL
);
1326 srp_free_req(ch
, req
, scmnd
, 0);
1327 scmnd
->result
= result
;
1328 scmnd
->scsi_done(scmnd
);
1332 static void srp_terminate_io(struct srp_rport
*rport
)
1334 struct srp_target_port
*target
= rport
->lld_data
;
1335 struct srp_rdma_ch
*ch
;
1336 struct Scsi_Host
*shost
= target
->scsi_host
;
1337 struct scsi_device
*sdev
;
1341 * Invoking srp_terminate_io() while srp_queuecommand() is running
1342 * is not safe. Hence the warning statement below.
1344 shost_for_each_device(sdev
, shost
)
1345 WARN_ON_ONCE(sdev
->request_queue
->request_fn_active
);
1347 for (i
= 0; i
< target
->ch_count
; i
++) {
1348 ch
= &target
->ch
[i
];
1350 for (j
= 0; j
< target
->req_ring_size
; ++j
) {
1351 struct srp_request
*req
= &ch
->req_ring
[j
];
1353 srp_finish_req(ch
, req
, NULL
,
1354 DID_TRANSPORT_FAILFAST
<< 16);
1360 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1361 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1362 * srp_reset_device() or srp_reset_host() calls will occur while this function
1363 * is in progress. One way to realize that is not to call this function
1364 * directly but to call srp_reconnect_rport() instead since that last function
1365 * serializes calls of this function via rport->mutex and also blocks
1366 * srp_queuecommand() calls before invoking this function.
1368 static int srp_rport_reconnect(struct srp_rport
*rport
)
1370 struct srp_target_port
*target
= rport
->lld_data
;
1371 struct srp_rdma_ch
*ch
;
1373 bool multich
= false;
1375 srp_disconnect_target(target
);
1377 if (target
->state
== SRP_TARGET_SCANNING
)
1381 * Now get a new local CM ID so that we avoid confusing the target in
1382 * case things are really fouled up. Doing so also ensures that all CM
1383 * callbacks will have finished before a new QP is allocated.
1385 for (i
= 0; i
< target
->ch_count
; i
++) {
1386 ch
= &target
->ch
[i
];
1387 ret
+= srp_new_cm_id(ch
);
1389 for (i
= 0; i
< target
->ch_count
; i
++) {
1390 ch
= &target
->ch
[i
];
1391 for (j
= 0; j
< target
->req_ring_size
; ++j
) {
1392 struct srp_request
*req
= &ch
->req_ring
[j
];
1394 srp_finish_req(ch
, req
, NULL
, DID_RESET
<< 16);
1397 for (i
= 0; i
< target
->ch_count
; i
++) {
1398 ch
= &target
->ch
[i
];
1400 * Whether or not creating a new CM ID succeeded, create a new
1401 * QP. This guarantees that all completion callback function
1402 * invocations have finished before request resetting starts.
1404 ret
+= srp_create_ch_ib(ch
);
1406 INIT_LIST_HEAD(&ch
->free_tx
);
1407 for (j
= 0; j
< target
->queue_size
; ++j
)
1408 list_add(&ch
->tx_ring
[j
]->list
, &ch
->free_tx
);
1411 target
->qp_in_error
= false;
1413 for (i
= 0; i
< target
->ch_count
; i
++) {
1414 ch
= &target
->ch
[i
];
1417 ret
= srp_connect_ch(ch
, multich
);
1422 shost_printk(KERN_INFO
, target
->scsi_host
,
1423 PFX
"reconnect succeeded\n");
1428 static void srp_map_desc(struct srp_map_state
*state
, dma_addr_t dma_addr
,
1429 unsigned int dma_len
, u32 rkey
)
1431 struct srp_direct_buf
*desc
= state
->desc
;
1433 WARN_ON_ONCE(!dma_len
);
1435 desc
->va
= cpu_to_be64(dma_addr
);
1436 desc
->key
= cpu_to_be32(rkey
);
1437 desc
->len
= cpu_to_be32(dma_len
);
1439 state
->total_len
+= dma_len
;
1444 static int srp_map_finish_fmr(struct srp_map_state
*state
,
1445 struct srp_rdma_ch
*ch
)
1447 struct srp_target_port
*target
= ch
->target
;
1448 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1449 struct ib_pool_fmr
*fmr
;
1452 if (state
->fmr
.next
>= state
->fmr
.end
) {
1453 shost_printk(KERN_ERR
, ch
->target
->scsi_host
,
1454 PFX
"Out of MRs (mr_per_cmd = %d)\n",
1455 ch
->target
->mr_per_cmd
);
1459 WARN_ON_ONCE(!dev
->use_fmr
);
1461 if (state
->npages
== 0)
1464 if (state
->npages
== 1 && target
->global_rkey
) {
1465 srp_map_desc(state
, state
->base_dma_addr
, state
->dma_len
,
1466 target
->global_rkey
);
1470 fmr
= ib_fmr_pool_map_phys(ch
->fmr_pool
, state
->pages
,
1471 state
->npages
, io_addr
);
1473 return PTR_ERR(fmr
);
1475 *state
->fmr
.next
++ = fmr
;
1478 srp_map_desc(state
, state
->base_dma_addr
& ~dev
->mr_page_mask
,
1479 state
->dma_len
, fmr
->fmr
->rkey
);
1488 static void srp_reg_mr_err_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1490 srp_handle_qp_err(cq
, wc
, "FAST REG");
1494 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1495 * where to start in the first element. If sg_offset_p != NULL then
1496 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1497 * byte that has not yet been mapped.
1499 static int srp_map_finish_fr(struct srp_map_state
*state
,
1500 struct srp_request
*req
,
1501 struct srp_rdma_ch
*ch
, int sg_nents
,
1502 unsigned int *sg_offset_p
)
1504 struct srp_target_port
*target
= ch
->target
;
1505 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1506 struct ib_send_wr
*bad_wr
;
1507 struct ib_reg_wr wr
;
1508 struct srp_fr_desc
*desc
;
1512 if (state
->fr
.next
>= state
->fr
.end
) {
1513 shost_printk(KERN_ERR
, ch
->target
->scsi_host
,
1514 PFX
"Out of MRs (mr_per_cmd = %d)\n",
1515 ch
->target
->mr_per_cmd
);
1519 WARN_ON_ONCE(!dev
->use_fast_reg
);
1521 if (sg_nents
== 1 && target
->global_rkey
) {
1522 unsigned int sg_offset
= sg_offset_p
? *sg_offset_p
: 0;
1524 srp_map_desc(state
, sg_dma_address(state
->sg
) + sg_offset
,
1525 sg_dma_len(state
->sg
) - sg_offset
,
1526 target
->global_rkey
);
1532 desc
= srp_fr_pool_get(ch
->fr_pool
);
1536 rkey
= ib_inc_rkey(desc
->mr
->rkey
);
1537 ib_update_fast_reg_key(desc
->mr
, rkey
);
1539 n
= ib_map_mr_sg(desc
->mr
, state
->sg
, sg_nents
, sg_offset_p
,
1541 if (unlikely(n
< 0)) {
1542 srp_fr_pool_put(ch
->fr_pool
, &desc
, 1);
1543 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1544 dev_name(&req
->scmnd
->device
->sdev_gendev
), sg_nents
,
1545 sg_offset_p
? *sg_offset_p
: -1, n
);
1549 WARN_ON_ONCE(desc
->mr
->length
== 0);
1551 req
->reg_cqe
.done
= srp_reg_mr_err_done
;
1554 wr
.wr
.opcode
= IB_WR_REG_MR
;
1555 wr
.wr
.wr_cqe
= &req
->reg_cqe
;
1557 wr
.wr
.send_flags
= 0;
1559 wr
.key
= desc
->mr
->rkey
;
1560 wr
.access
= (IB_ACCESS_LOCAL_WRITE
|
1561 IB_ACCESS_REMOTE_READ
|
1562 IB_ACCESS_REMOTE_WRITE
);
1564 *state
->fr
.next
++ = desc
;
1567 srp_map_desc(state
, desc
->mr
->iova
,
1568 desc
->mr
->length
, desc
->mr
->rkey
);
1570 err
= ib_post_send(ch
->qp
, &wr
.wr
, &bad_wr
);
1571 if (unlikely(err
)) {
1572 WARN_ON_ONCE(err
== -ENOMEM
);
1579 static int srp_map_sg_entry(struct srp_map_state
*state
,
1580 struct srp_rdma_ch
*ch
,
1581 struct scatterlist
*sg
)
1583 struct srp_target_port
*target
= ch
->target
;
1584 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1585 struct ib_device
*ibdev
= dev
->dev
;
1586 dma_addr_t dma_addr
= ib_sg_dma_address(ibdev
, sg
);
1587 unsigned int dma_len
= ib_sg_dma_len(ibdev
, sg
);
1588 unsigned int len
= 0;
1591 WARN_ON_ONCE(!dma_len
);
1594 unsigned offset
= dma_addr
& ~dev
->mr_page_mask
;
1596 if (state
->npages
== dev
->max_pages_per_mr
||
1597 (state
->npages
> 0 && offset
!= 0)) {
1598 ret
= srp_map_finish_fmr(state
, ch
);
1603 len
= min_t(unsigned int, dma_len
, dev
->mr_page_size
- offset
);
1606 state
->base_dma_addr
= dma_addr
;
1607 state
->pages
[state
->npages
++] = dma_addr
& dev
->mr_page_mask
;
1608 state
->dma_len
+= len
;
1614 * If the end of the MR is not on a page boundary then we need to
1615 * close it out and start a new one -- we can only merge at page
1619 if ((dma_addr
& ~dev
->mr_page_mask
) != 0)
1620 ret
= srp_map_finish_fmr(state
, ch
);
1624 static int srp_map_sg_fmr(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1625 struct srp_request
*req
, struct scatterlist
*scat
,
1628 struct scatterlist
*sg
;
1631 state
->pages
= req
->map_page
;
1632 state
->fmr
.next
= req
->fmr_list
;
1633 state
->fmr
.end
= req
->fmr_list
+ ch
->target
->mr_per_cmd
;
1635 for_each_sg(scat
, sg
, count
, i
) {
1636 ret
= srp_map_sg_entry(state
, ch
, sg
);
1641 ret
= srp_map_finish_fmr(state
, ch
);
1648 static int srp_map_sg_fr(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1649 struct srp_request
*req
, struct scatterlist
*scat
,
1652 unsigned int sg_offset
= 0;
1654 state
->fr
.next
= req
->fr_list
;
1655 state
->fr
.end
= req
->fr_list
+ ch
->target
->mr_per_cmd
;
1664 n
= srp_map_finish_fr(state
, req
, ch
, count
, &sg_offset
);
1665 if (unlikely(n
< 0))
1669 for (i
= 0; i
< n
; i
++)
1670 state
->sg
= sg_next(state
->sg
);
1676 static int srp_map_sg_dma(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1677 struct srp_request
*req
, struct scatterlist
*scat
,
1680 struct srp_target_port
*target
= ch
->target
;
1681 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1682 struct scatterlist
*sg
;
1685 for_each_sg(scat
, sg
, count
, i
) {
1686 srp_map_desc(state
, ib_sg_dma_address(dev
->dev
, sg
),
1687 ib_sg_dma_len(dev
->dev
, sg
),
1688 target
->global_rkey
);
1695 * Register the indirect data buffer descriptor with the HCA.
1697 * Note: since the indirect data buffer descriptor has been allocated with
1698 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1701 static int srp_map_idb(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1702 void **next_mr
, void **end_mr
, u32 idb_len
,
1705 struct srp_target_port
*target
= ch
->target
;
1706 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1707 struct srp_map_state state
;
1708 struct srp_direct_buf idb_desc
;
1710 struct scatterlist idb_sg
[1];
1713 memset(&state
, 0, sizeof(state
));
1714 memset(&idb_desc
, 0, sizeof(idb_desc
));
1715 state
.gen
.next
= next_mr
;
1716 state
.gen
.end
= end_mr
;
1717 state
.desc
= &idb_desc
;
1718 state
.base_dma_addr
= req
->indirect_dma_addr
;
1719 state
.dma_len
= idb_len
;
1721 if (dev
->use_fast_reg
) {
1723 sg_init_one(idb_sg
, req
->indirect_desc
, idb_len
);
1724 idb_sg
->dma_address
= req
->indirect_dma_addr
; /* hack! */
1725 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1726 idb_sg
->dma_length
= idb_sg
->length
; /* hack^2 */
1728 ret
= srp_map_finish_fr(&state
, req
, ch
, 1, NULL
);
1731 WARN_ON_ONCE(ret
< 1);
1732 } else if (dev
->use_fmr
) {
1733 state
.pages
= idb_pages
;
1734 state
.pages
[0] = (req
->indirect_dma_addr
&
1737 ret
= srp_map_finish_fmr(&state
, ch
);
1744 *idb_rkey
= idb_desc
.key
;
1749 static void srp_check_mapping(struct srp_map_state
*state
,
1750 struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1751 struct scatterlist
*scat
, int count
)
1753 struct srp_device
*dev
= ch
->target
->srp_host
->srp_dev
;
1754 struct srp_fr_desc
**pfr
;
1755 u64 desc_len
= 0, mr_len
= 0;
1758 for (i
= 0; i
< state
->ndesc
; i
++)
1759 desc_len
+= be32_to_cpu(req
->indirect_desc
[i
].len
);
1760 if (dev
->use_fast_reg
)
1761 for (i
= 0, pfr
= req
->fr_list
; i
< state
->nmdesc
; i
++, pfr
++)
1762 mr_len
+= (*pfr
)->mr
->length
;
1763 else if (dev
->use_fmr
)
1764 for (i
= 0; i
< state
->nmdesc
; i
++)
1765 mr_len
+= be32_to_cpu(req
->indirect_desc
[i
].len
);
1766 if (desc_len
!= scsi_bufflen(req
->scmnd
) ||
1767 mr_len
> scsi_bufflen(req
->scmnd
))
1768 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1769 scsi_bufflen(req
->scmnd
), desc_len
, mr_len
,
1770 state
->ndesc
, state
->nmdesc
);
1774 * srp_map_data() - map SCSI data buffer onto an SRP request
1775 * @scmnd: SCSI command to map
1776 * @ch: SRP RDMA channel
1779 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1782 static int srp_map_data(struct scsi_cmnd
*scmnd
, struct srp_rdma_ch
*ch
,
1783 struct srp_request
*req
)
1785 struct srp_target_port
*target
= ch
->target
;
1786 struct scatterlist
*scat
;
1787 struct srp_cmd
*cmd
= req
->cmd
->buf
;
1788 int len
, nents
, count
, ret
;
1789 struct srp_device
*dev
;
1790 struct ib_device
*ibdev
;
1791 struct srp_map_state state
;
1792 struct srp_indirect_buf
*indirect_hdr
;
1793 u32 idb_len
, table_len
;
1797 if (!scsi_sglist(scmnd
) || scmnd
->sc_data_direction
== DMA_NONE
)
1798 return sizeof (struct srp_cmd
);
1800 if (scmnd
->sc_data_direction
!= DMA_FROM_DEVICE
&&
1801 scmnd
->sc_data_direction
!= DMA_TO_DEVICE
) {
1802 shost_printk(KERN_WARNING
, target
->scsi_host
,
1803 PFX
"Unhandled data direction %d\n",
1804 scmnd
->sc_data_direction
);
1808 nents
= scsi_sg_count(scmnd
);
1809 scat
= scsi_sglist(scmnd
);
1811 dev
= target
->srp_host
->srp_dev
;
1814 count
= ib_dma_map_sg(ibdev
, scat
, nents
, scmnd
->sc_data_direction
);
1815 if (unlikely(count
== 0))
1818 fmt
= SRP_DATA_DESC_DIRECT
;
1819 len
= sizeof (struct srp_cmd
) + sizeof (struct srp_direct_buf
);
1821 if (count
== 1 && target
->global_rkey
) {
1823 * The midlayer only generated a single gather/scatter
1824 * entry, or DMA mapping coalesced everything to a
1825 * single entry. So a direct descriptor along with
1826 * the DMA MR suffices.
1828 struct srp_direct_buf
*buf
= (void *) cmd
->add_data
;
1830 buf
->va
= cpu_to_be64(ib_sg_dma_address(ibdev
, scat
));
1831 buf
->key
= cpu_to_be32(target
->global_rkey
);
1832 buf
->len
= cpu_to_be32(ib_sg_dma_len(ibdev
, scat
));
1839 * We have more than one scatter/gather entry, so build our indirect
1840 * descriptor table, trying to merge as many entries as we can.
1842 indirect_hdr
= (void *) cmd
->add_data
;
1844 ib_dma_sync_single_for_cpu(ibdev
, req
->indirect_dma_addr
,
1845 target
->indirect_size
, DMA_TO_DEVICE
);
1847 memset(&state
, 0, sizeof(state
));
1848 state
.desc
= req
->indirect_desc
;
1849 if (dev
->use_fast_reg
)
1850 ret
= srp_map_sg_fr(&state
, ch
, req
, scat
, count
);
1851 else if (dev
->use_fmr
)
1852 ret
= srp_map_sg_fmr(&state
, ch
, req
, scat
, count
);
1854 ret
= srp_map_sg_dma(&state
, ch
, req
, scat
, count
);
1855 req
->nmdesc
= state
.nmdesc
;
1860 DEFINE_DYNAMIC_DEBUG_METADATA(ddm
,
1861 "Memory mapping consistency check");
1862 if (DYNAMIC_DEBUG_BRANCH(ddm
))
1863 srp_check_mapping(&state
, ch
, req
, scat
, count
);
1866 /* We've mapped the request, now pull as much of the indirect
1867 * descriptor table as we can into the command buffer. If this
1868 * target is not using an external indirect table, we are
1869 * guaranteed to fit into the command, as the SCSI layer won't
1870 * give us more S/G entries than we allow.
1872 if (state
.ndesc
== 1) {
1874 * Memory registration collapsed the sg-list into one entry,
1875 * so use a direct descriptor.
1877 struct srp_direct_buf
*buf
= (void *) cmd
->add_data
;
1879 *buf
= req
->indirect_desc
[0];
1883 if (unlikely(target
->cmd_sg_cnt
< state
.ndesc
&&
1884 !target
->allow_ext_sg
)) {
1885 shost_printk(KERN_ERR
, target
->scsi_host
,
1886 "Could not fit S/G list into SRP_CMD\n");
1891 count
= min(state
.ndesc
, target
->cmd_sg_cnt
);
1892 table_len
= state
.ndesc
* sizeof (struct srp_direct_buf
);
1893 idb_len
= sizeof(struct srp_indirect_buf
) + table_len
;
1895 fmt
= SRP_DATA_DESC_INDIRECT
;
1896 len
= sizeof(struct srp_cmd
) + sizeof (struct srp_indirect_buf
);
1897 len
+= count
* sizeof (struct srp_direct_buf
);
1899 memcpy(indirect_hdr
->desc_list
, req
->indirect_desc
,
1900 count
* sizeof (struct srp_direct_buf
));
1902 if (!target
->global_rkey
) {
1903 ret
= srp_map_idb(ch
, req
, state
.gen
.next
, state
.gen
.end
,
1904 idb_len
, &idb_rkey
);
1909 idb_rkey
= cpu_to_be32(target
->global_rkey
);
1912 indirect_hdr
->table_desc
.va
= cpu_to_be64(req
->indirect_dma_addr
);
1913 indirect_hdr
->table_desc
.key
= idb_rkey
;
1914 indirect_hdr
->table_desc
.len
= cpu_to_be32(table_len
);
1915 indirect_hdr
->len
= cpu_to_be32(state
.total_len
);
1917 if (scmnd
->sc_data_direction
== DMA_TO_DEVICE
)
1918 cmd
->data_out_desc_cnt
= count
;
1920 cmd
->data_in_desc_cnt
= count
;
1922 ib_dma_sync_single_for_device(ibdev
, req
->indirect_dma_addr
, table_len
,
1926 if (scmnd
->sc_data_direction
== DMA_TO_DEVICE
)
1927 cmd
->buf_fmt
= fmt
<< 4;
1934 srp_unmap_data(scmnd
, ch
, req
);
1935 if (ret
== -ENOMEM
&& req
->nmdesc
>= target
->mr_pool_size
)
1941 * Return an IU and possible credit to the free pool
1943 static void srp_put_tx_iu(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
,
1944 enum srp_iu_type iu_type
)
1946 unsigned long flags
;
1948 spin_lock_irqsave(&ch
->lock
, flags
);
1949 list_add(&iu
->list
, &ch
->free_tx
);
1950 if (iu_type
!= SRP_IU_RSP
)
1952 spin_unlock_irqrestore(&ch
->lock
, flags
);
1956 * Must be called with ch->lock held to protect req_lim and free_tx.
1957 * If IU is not sent, it must be returned using srp_put_tx_iu().
1960 * An upper limit for the number of allocated information units for each
1962 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1963 * more than Scsi_Host.can_queue requests.
1964 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1965 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1966 * one unanswered SRP request to an initiator.
1968 static struct srp_iu
*__srp_get_tx_iu(struct srp_rdma_ch
*ch
,
1969 enum srp_iu_type iu_type
)
1971 struct srp_target_port
*target
= ch
->target
;
1972 s32 rsv
= (iu_type
== SRP_IU_TSK_MGMT
) ? 0 : SRP_TSK_MGMT_SQ_SIZE
;
1975 lockdep_assert_held(&ch
->lock
);
1977 ib_process_cq_direct(ch
->send_cq
, -1);
1979 if (list_empty(&ch
->free_tx
))
1982 /* Initiator responses to target requests do not consume credits */
1983 if (iu_type
!= SRP_IU_RSP
) {
1984 if (ch
->req_lim
<= rsv
) {
1985 ++target
->zero_req_lim
;
1992 iu
= list_first_entry(&ch
->free_tx
, struct srp_iu
, list
);
1993 list_del(&iu
->list
);
1998 * Note: if this function is called from inside ib_drain_sq() then it will
1999 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
2000 * with status IB_WC_SUCCESS then that's a bug.
2002 static void srp_send_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
2004 struct srp_iu
*iu
= container_of(wc
->wr_cqe
, struct srp_iu
, cqe
);
2005 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2007 if (unlikely(wc
->status
!= IB_WC_SUCCESS
)) {
2008 srp_handle_qp_err(cq
, wc
, "SEND");
2012 lockdep_assert_held(&ch
->lock
);
2014 list_add(&iu
->list
, &ch
->free_tx
);
2017 static int srp_post_send(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
, int len
)
2019 struct srp_target_port
*target
= ch
->target
;
2021 struct ib_send_wr wr
, *bad_wr
;
2023 list
.addr
= iu
->dma
;
2025 list
.lkey
= target
->lkey
;
2027 iu
->cqe
.done
= srp_send_done
;
2030 wr
.wr_cqe
= &iu
->cqe
;
2033 wr
.opcode
= IB_WR_SEND
;
2034 wr
.send_flags
= IB_SEND_SIGNALED
;
2036 return ib_post_send(ch
->qp
, &wr
, &bad_wr
);
2039 static int srp_post_recv(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
)
2041 struct srp_target_port
*target
= ch
->target
;
2042 struct ib_recv_wr wr
, *bad_wr
;
2045 list
.addr
= iu
->dma
;
2046 list
.length
= iu
->size
;
2047 list
.lkey
= target
->lkey
;
2049 iu
->cqe
.done
= srp_recv_done
;
2052 wr
.wr_cqe
= &iu
->cqe
;
2056 return ib_post_recv(ch
->qp
, &wr
, &bad_wr
);
2059 static void srp_process_rsp(struct srp_rdma_ch
*ch
, struct srp_rsp
*rsp
)
2061 struct srp_target_port
*target
= ch
->target
;
2062 struct srp_request
*req
;
2063 struct scsi_cmnd
*scmnd
;
2064 unsigned long flags
;
2066 if (unlikely(rsp
->tag
& SRP_TAG_TSK_MGMT
)) {
2067 spin_lock_irqsave(&ch
->lock
, flags
);
2068 ch
->req_lim
+= be32_to_cpu(rsp
->req_lim_delta
);
2069 if (rsp
->tag
== ch
->tsk_mgmt_tag
) {
2070 ch
->tsk_mgmt_status
= -1;
2071 if (be32_to_cpu(rsp
->resp_data_len
) >= 4)
2072 ch
->tsk_mgmt_status
= rsp
->data
[3];
2073 complete(&ch
->tsk_mgmt_done
);
2075 shost_printk(KERN_ERR
, target
->scsi_host
,
2076 "Received tsk mgmt response too late for tag %#llx\n",
2079 spin_unlock_irqrestore(&ch
->lock
, flags
);
2081 scmnd
= scsi_host_find_tag(target
->scsi_host
, rsp
->tag
);
2082 if (scmnd
&& scmnd
->host_scribble
) {
2083 req
= (void *)scmnd
->host_scribble
;
2084 scmnd
= srp_claim_req(ch
, req
, NULL
, scmnd
);
2089 shost_printk(KERN_ERR
, target
->scsi_host
,
2090 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2091 rsp
->tag
, ch
- target
->ch
, ch
->qp
->qp_num
);
2093 spin_lock_irqsave(&ch
->lock
, flags
);
2094 ch
->req_lim
+= be32_to_cpu(rsp
->req_lim_delta
);
2095 spin_unlock_irqrestore(&ch
->lock
, flags
);
2099 scmnd
->result
= rsp
->status
;
2101 if (rsp
->flags
& SRP_RSP_FLAG_SNSVALID
) {
2102 memcpy(scmnd
->sense_buffer
, rsp
->data
+
2103 be32_to_cpu(rsp
->resp_data_len
),
2104 min_t(int, be32_to_cpu(rsp
->sense_data_len
),
2105 SCSI_SENSE_BUFFERSIZE
));
2108 if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DIUNDER
))
2109 scsi_set_resid(scmnd
, be32_to_cpu(rsp
->data_in_res_cnt
));
2110 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DIOVER
))
2111 scsi_set_resid(scmnd
, -be32_to_cpu(rsp
->data_in_res_cnt
));
2112 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DOUNDER
))
2113 scsi_set_resid(scmnd
, be32_to_cpu(rsp
->data_out_res_cnt
));
2114 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DOOVER
))
2115 scsi_set_resid(scmnd
, -be32_to_cpu(rsp
->data_out_res_cnt
));
2117 srp_free_req(ch
, req
, scmnd
,
2118 be32_to_cpu(rsp
->req_lim_delta
));
2120 scmnd
->host_scribble
= NULL
;
2121 scmnd
->scsi_done(scmnd
);
2125 static int srp_response_common(struct srp_rdma_ch
*ch
, s32 req_delta
,
2128 struct srp_target_port
*target
= ch
->target
;
2129 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2130 unsigned long flags
;
2134 spin_lock_irqsave(&ch
->lock
, flags
);
2135 ch
->req_lim
+= req_delta
;
2136 iu
= __srp_get_tx_iu(ch
, SRP_IU_RSP
);
2137 spin_unlock_irqrestore(&ch
->lock
, flags
);
2140 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2141 "no IU available to send response\n");
2145 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, len
, DMA_TO_DEVICE
);
2146 memcpy(iu
->buf
, rsp
, len
);
2147 ib_dma_sync_single_for_device(dev
, iu
->dma
, len
, DMA_TO_DEVICE
);
2149 err
= srp_post_send(ch
, iu
, len
);
2151 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2152 "unable to post response: %d\n", err
);
2153 srp_put_tx_iu(ch
, iu
, SRP_IU_RSP
);
2159 static void srp_process_cred_req(struct srp_rdma_ch
*ch
,
2160 struct srp_cred_req
*req
)
2162 struct srp_cred_rsp rsp
= {
2163 .opcode
= SRP_CRED_RSP
,
2166 s32 delta
= be32_to_cpu(req
->req_lim_delta
);
2168 if (srp_response_common(ch
, delta
, &rsp
, sizeof(rsp
)))
2169 shost_printk(KERN_ERR
, ch
->target
->scsi_host
, PFX
2170 "problems processing SRP_CRED_REQ\n");
2173 static void srp_process_aer_req(struct srp_rdma_ch
*ch
,
2174 struct srp_aer_req
*req
)
2176 struct srp_target_port
*target
= ch
->target
;
2177 struct srp_aer_rsp rsp
= {
2178 .opcode
= SRP_AER_RSP
,
2181 s32 delta
= be32_to_cpu(req
->req_lim_delta
);
2183 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2184 "ignoring AER for LUN %llu\n", scsilun_to_int(&req
->lun
));
2186 if (srp_response_common(ch
, delta
, &rsp
, sizeof(rsp
)))
2187 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2188 "problems processing SRP_AER_REQ\n");
2191 static void srp_recv_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
2193 struct srp_iu
*iu
= container_of(wc
->wr_cqe
, struct srp_iu
, cqe
);
2194 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2195 struct srp_target_port
*target
= ch
->target
;
2196 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2200 if (unlikely(wc
->status
!= IB_WC_SUCCESS
)) {
2201 srp_handle_qp_err(cq
, wc
, "RECV");
2205 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, ch
->max_ti_iu_len
,
2208 opcode
= *(u8
*) iu
->buf
;
2211 shost_printk(KERN_ERR
, target
->scsi_host
,
2212 PFX
"recv completion, opcode 0x%02x\n", opcode
);
2213 print_hex_dump(KERN_ERR
, "", DUMP_PREFIX_OFFSET
, 8, 1,
2214 iu
->buf
, wc
->byte_len
, true);
2219 srp_process_rsp(ch
, iu
->buf
);
2223 srp_process_cred_req(ch
, iu
->buf
);
2227 srp_process_aer_req(ch
, iu
->buf
);
2231 /* XXX Handle target logout */
2232 shost_printk(KERN_WARNING
, target
->scsi_host
,
2233 PFX
"Got target logout request\n");
2237 shost_printk(KERN_WARNING
, target
->scsi_host
,
2238 PFX
"Unhandled SRP opcode 0x%02x\n", opcode
);
2242 ib_dma_sync_single_for_device(dev
, iu
->dma
, ch
->max_ti_iu_len
,
2245 res
= srp_post_recv(ch
, iu
);
2247 shost_printk(KERN_ERR
, target
->scsi_host
,
2248 PFX
"Recv failed with error code %d\n", res
);
2252 * srp_tl_err_work() - handle a transport layer error
2253 * @work: Work structure embedded in an SRP target port.
2255 * Note: This function may get invoked before the rport has been created,
2256 * hence the target->rport test.
2258 static void srp_tl_err_work(struct work_struct
*work
)
2260 struct srp_target_port
*target
;
2262 target
= container_of(work
, struct srp_target_port
, tl_err_work
);
2264 srp_start_tl_fail_timers(target
->rport
);
2267 static void srp_handle_qp_err(struct ib_cq
*cq
, struct ib_wc
*wc
,
2270 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2271 struct srp_target_port
*target
= ch
->target
;
2273 if (ch
->connected
&& !target
->qp_in_error
) {
2274 shost_printk(KERN_ERR
, target
->scsi_host
,
2275 PFX
"failed %s status %s (%d) for CQE %p\n",
2276 opname
, ib_wc_status_msg(wc
->status
), wc
->status
,
2278 queue_work(system_long_wq
, &target
->tl_err_work
);
2280 target
->qp_in_error
= true;
2283 static int srp_queuecommand(struct Scsi_Host
*shost
, struct scsi_cmnd
*scmnd
)
2285 struct srp_target_port
*target
= host_to_target(shost
);
2286 struct srp_rport
*rport
= target
->rport
;
2287 struct srp_rdma_ch
*ch
;
2288 struct srp_request
*req
;
2290 struct srp_cmd
*cmd
;
2291 struct ib_device
*dev
;
2292 unsigned long flags
;
2296 const bool in_scsi_eh
= !in_interrupt() && current
== shost
->ehandler
;
2299 * The SCSI EH thread is the only context from which srp_queuecommand()
2300 * can get invoked for blocked devices (SDEV_BLOCK /
2301 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2302 * locking the rport mutex if invoked from inside the SCSI EH.
2305 mutex_lock(&rport
->mutex
);
2307 scmnd
->result
= srp_chkready(target
->rport
);
2308 if (unlikely(scmnd
->result
))
2311 WARN_ON_ONCE(scmnd
->request
->tag
< 0);
2312 tag
= blk_mq_unique_tag(scmnd
->request
);
2313 ch
= &target
->ch
[blk_mq_unique_tag_to_hwq(tag
)];
2314 idx
= blk_mq_unique_tag_to_tag(tag
);
2315 WARN_ONCE(idx
>= target
->req_ring_size
, "%s: tag %#x: idx %d >= %d\n",
2316 dev_name(&shost
->shost_gendev
), tag
, idx
,
2317 target
->req_ring_size
);
2319 spin_lock_irqsave(&ch
->lock
, flags
);
2320 iu
= __srp_get_tx_iu(ch
, SRP_IU_CMD
);
2321 spin_unlock_irqrestore(&ch
->lock
, flags
);
2326 req
= &ch
->req_ring
[idx
];
2327 dev
= target
->srp_host
->srp_dev
->dev
;
2328 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, target
->max_iu_len
,
2331 scmnd
->host_scribble
= (void *) req
;
2334 memset(cmd
, 0, sizeof *cmd
);
2336 cmd
->opcode
= SRP_CMD
;
2337 int_to_scsilun(scmnd
->device
->lun
, &cmd
->lun
);
2339 memcpy(cmd
->cdb
, scmnd
->cmnd
, scmnd
->cmd_len
);
2344 len
= srp_map_data(scmnd
, ch
, req
);
2346 shost_printk(KERN_ERR
, target
->scsi_host
,
2347 PFX
"Failed to map data (%d)\n", len
);
2349 * If we ran out of memory descriptors (-ENOMEM) because an
2350 * application is queuing many requests with more than
2351 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2352 * to reduce queue depth temporarily.
2354 scmnd
->result
= len
== -ENOMEM
?
2355 DID_OK
<< 16 | QUEUE_FULL
<< 1 : DID_ERROR
<< 16;
2359 ib_dma_sync_single_for_device(dev
, iu
->dma
, target
->max_iu_len
,
2362 if (srp_post_send(ch
, iu
, len
)) {
2363 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
"Send failed\n");
2371 mutex_unlock(&rport
->mutex
);
2376 srp_unmap_data(scmnd
, ch
, req
);
2379 srp_put_tx_iu(ch
, iu
, SRP_IU_CMD
);
2382 * Avoid that the loops that iterate over the request ring can
2383 * encounter a dangling SCSI command pointer.
2388 if (scmnd
->result
) {
2389 scmnd
->scsi_done(scmnd
);
2392 ret
= SCSI_MLQUEUE_HOST_BUSY
;
2399 * Note: the resources allocated in this function are freed in
2402 static int srp_alloc_iu_bufs(struct srp_rdma_ch
*ch
)
2404 struct srp_target_port
*target
= ch
->target
;
2407 ch
->rx_ring
= kcalloc(target
->queue_size
, sizeof(*ch
->rx_ring
),
2411 ch
->tx_ring
= kcalloc(target
->queue_size
, sizeof(*ch
->tx_ring
),
2416 for (i
= 0; i
< target
->queue_size
; ++i
) {
2417 ch
->rx_ring
[i
] = srp_alloc_iu(target
->srp_host
,
2419 GFP_KERNEL
, DMA_FROM_DEVICE
);
2420 if (!ch
->rx_ring
[i
])
2424 for (i
= 0; i
< target
->queue_size
; ++i
) {
2425 ch
->tx_ring
[i
] = srp_alloc_iu(target
->srp_host
,
2427 GFP_KERNEL
, DMA_TO_DEVICE
);
2428 if (!ch
->tx_ring
[i
])
2431 list_add(&ch
->tx_ring
[i
]->list
, &ch
->free_tx
);
2437 for (i
= 0; i
< target
->queue_size
; ++i
) {
2438 srp_free_iu(target
->srp_host
, ch
->rx_ring
[i
]);
2439 srp_free_iu(target
->srp_host
, ch
->tx_ring
[i
]);
2452 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr
*qp_attr
, int attr_mask
)
2454 uint64_t T_tr_ns
, max_compl_time_ms
;
2455 uint32_t rq_tmo_jiffies
;
2458 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2459 * table 91), both the QP timeout and the retry count have to be set
2460 * for RC QP's during the RTR to RTS transition.
2462 WARN_ON_ONCE((attr_mask
& (IB_QP_TIMEOUT
| IB_QP_RETRY_CNT
)) !=
2463 (IB_QP_TIMEOUT
| IB_QP_RETRY_CNT
));
2466 * Set target->rq_tmo_jiffies to one second more than the largest time
2467 * it can take before an error completion is generated. See also
2468 * C9-140..142 in the IBTA spec for more information about how to
2469 * convert the QP Local ACK Timeout value to nanoseconds.
2471 T_tr_ns
= 4096 * (1ULL << qp_attr
->timeout
);
2472 max_compl_time_ms
= qp_attr
->retry_cnt
* 4 * T_tr_ns
;
2473 do_div(max_compl_time_ms
, NSEC_PER_MSEC
);
2474 rq_tmo_jiffies
= msecs_to_jiffies(max_compl_time_ms
+ 1000);
2476 return rq_tmo_jiffies
;
2479 static void srp_cm_rep_handler(struct ib_cm_id
*cm_id
,
2480 const struct srp_login_rsp
*lrsp
,
2481 struct srp_rdma_ch
*ch
)
2483 struct srp_target_port
*target
= ch
->target
;
2484 struct ib_qp_attr
*qp_attr
= NULL
;
2489 if (lrsp
->opcode
== SRP_LOGIN_RSP
) {
2490 ch
->max_ti_iu_len
= be32_to_cpu(lrsp
->max_ti_iu_len
);
2491 ch
->req_lim
= be32_to_cpu(lrsp
->req_lim_delta
);
2494 * Reserve credits for task management so we don't
2495 * bounce requests back to the SCSI mid-layer.
2497 target
->scsi_host
->can_queue
2498 = min(ch
->req_lim
- SRP_TSK_MGMT_SQ_SIZE
,
2499 target
->scsi_host
->can_queue
);
2500 target
->scsi_host
->cmd_per_lun
2501 = min_t(int, target
->scsi_host
->can_queue
,
2502 target
->scsi_host
->cmd_per_lun
);
2504 shost_printk(KERN_WARNING
, target
->scsi_host
,
2505 PFX
"Unhandled RSP opcode %#x\n", lrsp
->opcode
);
2511 ret
= srp_alloc_iu_bufs(ch
);
2516 for (i
= 0; i
< target
->queue_size
; i
++) {
2517 struct srp_iu
*iu
= ch
->rx_ring
[i
];
2519 ret
= srp_post_recv(ch
, iu
);
2524 if (!target
->using_rdma_cm
) {
2526 qp_attr
= kmalloc(sizeof(*qp_attr
), GFP_KERNEL
);
2530 qp_attr
->qp_state
= IB_QPS_RTR
;
2531 ret
= ib_cm_init_qp_attr(cm_id
, qp_attr
, &attr_mask
);
2535 ret
= ib_modify_qp(ch
->qp
, qp_attr
, attr_mask
);
2539 qp_attr
->qp_state
= IB_QPS_RTS
;
2540 ret
= ib_cm_init_qp_attr(cm_id
, qp_attr
, &attr_mask
);
2544 target
->rq_tmo_jiffies
= srp_compute_rq_tmo(qp_attr
, attr_mask
);
2546 ret
= ib_modify_qp(ch
->qp
, qp_attr
, attr_mask
);
2550 ret
= ib_send_cm_rtu(cm_id
, NULL
, 0);
2560 static void srp_ib_cm_rej_handler(struct ib_cm_id
*cm_id
,
2561 struct ib_cm_event
*event
,
2562 struct srp_rdma_ch
*ch
)
2564 struct srp_target_port
*target
= ch
->target
;
2565 struct Scsi_Host
*shost
= target
->scsi_host
;
2566 struct ib_class_port_info
*cpi
;
2570 switch (event
->param
.rej_rcvd
.reason
) {
2571 case IB_CM_REJ_PORT_CM_REDIRECT
:
2572 cpi
= event
->param
.rej_rcvd
.ari
;
2573 dlid
= be16_to_cpu(cpi
->redirect_lid
);
2574 sa_path_set_dlid(&ch
->ib_cm
.path
, dlid
);
2575 ch
->ib_cm
.path
.pkey
= cpi
->redirect_pkey
;
2576 cm_id
->remote_cm_qpn
= be32_to_cpu(cpi
->redirect_qp
) & 0x00ffffff;
2577 memcpy(ch
->ib_cm
.path
.dgid
.raw
, cpi
->redirect_gid
, 16);
2579 ch
->status
= dlid
? SRP_DLID_REDIRECT
: SRP_PORT_REDIRECT
;
2582 case IB_CM_REJ_PORT_REDIRECT
:
2583 if (srp_target_is_topspin(target
)) {
2584 union ib_gid
*dgid
= &ch
->ib_cm
.path
.dgid
;
2587 * Topspin/Cisco SRP gateways incorrectly send
2588 * reject reason code 25 when they mean 24
2591 memcpy(dgid
->raw
, event
->param
.rej_rcvd
.ari
, 16);
2593 shost_printk(KERN_DEBUG
, shost
,
2594 PFX
"Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2595 be64_to_cpu(dgid
->global
.subnet_prefix
),
2596 be64_to_cpu(dgid
->global
.interface_id
));
2598 ch
->status
= SRP_PORT_REDIRECT
;
2600 shost_printk(KERN_WARNING
, shost
,
2601 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2602 ch
->status
= -ECONNRESET
;
2606 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID
:
2607 shost_printk(KERN_WARNING
, shost
,
2608 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2609 ch
->status
= -ECONNRESET
;
2612 case IB_CM_REJ_CONSUMER_DEFINED
:
2613 opcode
= *(u8
*) event
->private_data
;
2614 if (opcode
== SRP_LOGIN_REJ
) {
2615 struct srp_login_rej
*rej
= event
->private_data
;
2616 u32 reason
= be32_to_cpu(rej
->reason
);
2618 if (reason
== SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE
)
2619 shost_printk(KERN_WARNING
, shost
,
2620 PFX
"SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2622 shost_printk(KERN_WARNING
, shost
, PFX
2623 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2625 target
->ib_cm
.orig_dgid
.raw
,
2628 shost_printk(KERN_WARNING
, shost
,
2629 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2630 " opcode 0x%02x\n", opcode
);
2631 ch
->status
= -ECONNRESET
;
2634 case IB_CM_REJ_STALE_CONN
:
2635 shost_printk(KERN_WARNING
, shost
, " REJ reason: stale connection\n");
2636 ch
->status
= SRP_STALE_CONN
;
2640 shost_printk(KERN_WARNING
, shost
, " REJ reason 0x%x\n",
2641 event
->param
.rej_rcvd
.reason
);
2642 ch
->status
= -ECONNRESET
;
2646 static int srp_ib_cm_handler(struct ib_cm_id
*cm_id
, struct ib_cm_event
*event
)
2648 struct srp_rdma_ch
*ch
= cm_id
->context
;
2649 struct srp_target_port
*target
= ch
->target
;
2652 switch (event
->event
) {
2653 case IB_CM_REQ_ERROR
:
2654 shost_printk(KERN_DEBUG
, target
->scsi_host
,
2655 PFX
"Sending CM REQ failed\n");
2657 ch
->status
= -ECONNRESET
;
2660 case IB_CM_REP_RECEIVED
:
2662 srp_cm_rep_handler(cm_id
, event
->private_data
, ch
);
2665 case IB_CM_REJ_RECEIVED
:
2666 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
"REJ received\n");
2669 srp_ib_cm_rej_handler(cm_id
, event
, ch
);
2672 case IB_CM_DREQ_RECEIVED
:
2673 shost_printk(KERN_WARNING
, target
->scsi_host
,
2674 PFX
"DREQ received - connection closed\n");
2675 ch
->connected
= false;
2676 if (ib_send_cm_drep(cm_id
, NULL
, 0))
2677 shost_printk(KERN_ERR
, target
->scsi_host
,
2678 PFX
"Sending CM DREP failed\n");
2679 queue_work(system_long_wq
, &target
->tl_err_work
);
2682 case IB_CM_TIMEWAIT_EXIT
:
2683 shost_printk(KERN_ERR
, target
->scsi_host
,
2684 PFX
"connection closed\n");
2690 case IB_CM_MRA_RECEIVED
:
2691 case IB_CM_DREQ_ERROR
:
2692 case IB_CM_DREP_RECEIVED
:
2696 shost_printk(KERN_WARNING
, target
->scsi_host
,
2697 PFX
"Unhandled CM event %d\n", event
->event
);
2702 complete(&ch
->done
);
2707 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch
*ch
,
2708 struct rdma_cm_event
*event
)
2710 struct srp_target_port
*target
= ch
->target
;
2711 struct Scsi_Host
*shost
= target
->scsi_host
;
2714 switch (event
->status
) {
2715 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID
:
2716 shost_printk(KERN_WARNING
, shost
,
2717 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2718 ch
->status
= -ECONNRESET
;
2721 case IB_CM_REJ_CONSUMER_DEFINED
:
2722 opcode
= *(u8
*) event
->param
.conn
.private_data
;
2723 if (opcode
== SRP_LOGIN_REJ
) {
2724 struct srp_login_rej
*rej
=
2725 (struct srp_login_rej
*)
2726 event
->param
.conn
.private_data
;
2727 u32 reason
= be32_to_cpu(rej
->reason
);
2729 if (reason
== SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE
)
2730 shost_printk(KERN_WARNING
, shost
,
2731 PFX
"SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2733 shost_printk(KERN_WARNING
, shost
,
2734 PFX
"SRP LOGIN REJECTED, reason 0x%08x\n", reason
);
2736 shost_printk(KERN_WARNING
, shost
,
2737 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2740 ch
->status
= -ECONNRESET
;
2743 case IB_CM_REJ_STALE_CONN
:
2744 shost_printk(KERN_WARNING
, shost
,
2745 " REJ reason: stale connection\n");
2746 ch
->status
= SRP_STALE_CONN
;
2750 shost_printk(KERN_WARNING
, shost
, " REJ reason 0x%x\n",
2752 ch
->status
= -ECONNRESET
;
2757 static int srp_rdma_cm_handler(struct rdma_cm_id
*cm_id
,
2758 struct rdma_cm_event
*event
)
2760 struct srp_rdma_ch
*ch
= cm_id
->context
;
2761 struct srp_target_port
*target
= ch
->target
;
2764 switch (event
->event
) {
2765 case RDMA_CM_EVENT_ADDR_RESOLVED
:
2770 case RDMA_CM_EVENT_ADDR_ERROR
:
2771 ch
->status
= -ENXIO
;
2775 case RDMA_CM_EVENT_ROUTE_RESOLVED
:
2780 case RDMA_CM_EVENT_ROUTE_ERROR
:
2781 case RDMA_CM_EVENT_UNREACHABLE
:
2782 ch
->status
= -EHOSTUNREACH
;
2786 case RDMA_CM_EVENT_CONNECT_ERROR
:
2787 shost_printk(KERN_DEBUG
, target
->scsi_host
,
2788 PFX
"Sending CM REQ failed\n");
2790 ch
->status
= -ECONNRESET
;
2793 case RDMA_CM_EVENT_ESTABLISHED
:
2795 srp_cm_rep_handler(NULL
, event
->param
.conn
.private_data
, ch
);
2798 case RDMA_CM_EVENT_REJECTED
:
2799 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
"REJ received\n");
2802 srp_rdma_cm_rej_handler(ch
, event
);
2805 case RDMA_CM_EVENT_DISCONNECTED
:
2806 if (ch
->connected
) {
2807 shost_printk(KERN_WARNING
, target
->scsi_host
,
2808 PFX
"received DREQ\n");
2809 rdma_disconnect(ch
->rdma_cm
.cm_id
);
2812 queue_work(system_long_wq
, &target
->tl_err_work
);
2816 case RDMA_CM_EVENT_TIMEWAIT_EXIT
:
2817 shost_printk(KERN_ERR
, target
->scsi_host
,
2818 PFX
"connection closed\n");
2825 shost_printk(KERN_WARNING
, target
->scsi_host
,
2826 PFX
"Unhandled CM event %d\n", event
->event
);
2831 complete(&ch
->done
);
2837 * srp_change_queue_depth - setting device queue depth
2838 * @sdev: scsi device struct
2839 * @qdepth: requested queue depth
2841 * Returns queue depth.
2844 srp_change_queue_depth(struct scsi_device
*sdev
, int qdepth
)
2846 if (!sdev
->tagged_supported
)
2848 return scsi_change_queue_depth(sdev
, qdepth
);
2851 static int srp_send_tsk_mgmt(struct srp_rdma_ch
*ch
, u64 req_tag
, u64 lun
,
2852 u8 func
, u8
*status
)
2854 struct srp_target_port
*target
= ch
->target
;
2855 struct srp_rport
*rport
= target
->rport
;
2856 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2858 struct srp_tsk_mgmt
*tsk_mgmt
;
2861 if (!ch
->connected
|| target
->qp_in_error
)
2865 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2866 * invoked while a task management function is being sent.
2868 mutex_lock(&rport
->mutex
);
2869 spin_lock_irq(&ch
->lock
);
2870 iu
= __srp_get_tx_iu(ch
, SRP_IU_TSK_MGMT
);
2871 spin_unlock_irq(&ch
->lock
);
2874 mutex_unlock(&rport
->mutex
);
2879 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, sizeof *tsk_mgmt
,
2882 memset(tsk_mgmt
, 0, sizeof *tsk_mgmt
);
2884 tsk_mgmt
->opcode
= SRP_TSK_MGMT
;
2885 int_to_scsilun(lun
, &tsk_mgmt
->lun
);
2886 tsk_mgmt
->tsk_mgmt_func
= func
;
2887 tsk_mgmt
->task_tag
= req_tag
;
2889 spin_lock_irq(&ch
->lock
);
2890 ch
->tsk_mgmt_tag
= (ch
->tsk_mgmt_tag
+ 1) | SRP_TAG_TSK_MGMT
;
2891 tsk_mgmt
->tag
= ch
->tsk_mgmt_tag
;
2892 spin_unlock_irq(&ch
->lock
);
2894 init_completion(&ch
->tsk_mgmt_done
);
2896 ib_dma_sync_single_for_device(dev
, iu
->dma
, sizeof *tsk_mgmt
,
2898 if (srp_post_send(ch
, iu
, sizeof(*tsk_mgmt
))) {
2899 srp_put_tx_iu(ch
, iu
, SRP_IU_TSK_MGMT
);
2900 mutex_unlock(&rport
->mutex
);
2904 res
= wait_for_completion_timeout(&ch
->tsk_mgmt_done
,
2905 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS
));
2906 if (res
> 0 && status
)
2907 *status
= ch
->tsk_mgmt_status
;
2908 mutex_unlock(&rport
->mutex
);
2910 WARN_ON_ONCE(res
< 0);
2912 return res
> 0 ? 0 : -1;
2915 static int srp_abort(struct scsi_cmnd
*scmnd
)
2917 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2918 struct srp_request
*req
= (struct srp_request
*) scmnd
->host_scribble
;
2921 struct srp_rdma_ch
*ch
;
2924 shost_printk(KERN_ERR
, target
->scsi_host
, "SRP abort called\n");
2928 tag
= blk_mq_unique_tag(scmnd
->request
);
2929 ch_idx
= blk_mq_unique_tag_to_hwq(tag
);
2930 if (WARN_ON_ONCE(ch_idx
>= target
->ch_count
))
2932 ch
= &target
->ch
[ch_idx
];
2933 if (!srp_claim_req(ch
, req
, NULL
, scmnd
))
2935 shost_printk(KERN_ERR
, target
->scsi_host
,
2936 "Sending SRP abort for tag %#x\n", tag
);
2937 if (srp_send_tsk_mgmt(ch
, tag
, scmnd
->device
->lun
,
2938 SRP_TSK_ABORT_TASK
, NULL
) == 0)
2940 else if (target
->rport
->state
== SRP_RPORT_LOST
)
2944 if (ret
== SUCCESS
) {
2945 srp_free_req(ch
, req
, scmnd
, 0);
2946 scmnd
->result
= DID_ABORT
<< 16;
2947 scmnd
->scsi_done(scmnd
);
2953 static int srp_reset_device(struct scsi_cmnd
*scmnd
)
2955 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2956 struct srp_rdma_ch
*ch
;
2960 shost_printk(KERN_ERR
, target
->scsi_host
, "SRP reset_device called\n");
2962 ch
= &target
->ch
[0];
2963 if (srp_send_tsk_mgmt(ch
, SRP_TAG_NO_REQ
, scmnd
->device
->lun
,
2964 SRP_TSK_LUN_RESET
, &status
))
2969 for (i
= 0; i
< target
->ch_count
; i
++) {
2970 ch
= &target
->ch
[i
];
2971 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
2972 struct srp_request
*req
= &ch
->req_ring
[i
];
2974 srp_finish_req(ch
, req
, scmnd
->device
, DID_RESET
<< 16);
2981 static int srp_reset_host(struct scsi_cmnd
*scmnd
)
2983 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2985 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
"SRP reset_host called\n");
2987 return srp_reconnect_rport(target
->rport
) == 0 ? SUCCESS
: FAILED
;
2990 static int srp_target_alloc(struct scsi_target
*starget
)
2992 struct Scsi_Host
*shost
= dev_to_shost(starget
->dev
.parent
);
2993 struct srp_target_port
*target
= host_to_target(shost
);
2995 if (target
->target_can_queue
)
2996 starget
->can_queue
= target
->target_can_queue
;
3000 static int srp_slave_alloc(struct scsi_device
*sdev
)
3002 struct Scsi_Host
*shost
= sdev
->host
;
3003 struct srp_target_port
*target
= host_to_target(shost
);
3004 struct srp_device
*srp_dev
= target
->srp_host
->srp_dev
;
3005 struct ib_device
*ibdev
= srp_dev
->dev
;
3007 if (!(ibdev
->attrs
.device_cap_flags
& IB_DEVICE_SG_GAPS_REG
))
3008 blk_queue_virt_boundary(sdev
->request_queue
,
3009 ~srp_dev
->mr_page_mask
);
3014 static int srp_slave_configure(struct scsi_device
*sdev
)
3016 struct Scsi_Host
*shost
= sdev
->host
;
3017 struct srp_target_port
*target
= host_to_target(shost
);
3018 struct request_queue
*q
= sdev
->request_queue
;
3019 unsigned long timeout
;
3021 if (sdev
->type
== TYPE_DISK
) {
3022 timeout
= max_t(unsigned, 30 * HZ
, target
->rq_tmo_jiffies
);
3023 blk_queue_rq_timeout(q
, timeout
);
3029 static ssize_t
show_id_ext(struct device
*dev
, struct device_attribute
*attr
,
3032 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3034 return sprintf(buf
, "0x%016llx\n", be64_to_cpu(target
->id_ext
));
3037 static ssize_t
show_ioc_guid(struct device
*dev
, struct device_attribute
*attr
,
3040 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3042 return sprintf(buf
, "0x%016llx\n", be64_to_cpu(target
->ioc_guid
));
3045 static ssize_t
show_service_id(struct device
*dev
,
3046 struct device_attribute
*attr
, char *buf
)
3048 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3050 if (target
->using_rdma_cm
)
3052 return sprintf(buf
, "0x%016llx\n",
3053 be64_to_cpu(target
->ib_cm
.service_id
));
3056 static ssize_t
show_pkey(struct device
*dev
, struct device_attribute
*attr
,
3059 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3061 if (target
->using_rdma_cm
)
3063 return sprintf(buf
, "0x%04x\n", be16_to_cpu(target
->ib_cm
.pkey
));
3066 static ssize_t
show_sgid(struct device
*dev
, struct device_attribute
*attr
,
3069 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3071 return sprintf(buf
, "%pI6\n", target
->sgid
.raw
);
3074 static ssize_t
show_dgid(struct device
*dev
, struct device_attribute
*attr
,
3077 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3078 struct srp_rdma_ch
*ch
= &target
->ch
[0];
3080 if (target
->using_rdma_cm
)
3082 return sprintf(buf
, "%pI6\n", ch
->ib_cm
.path
.dgid
.raw
);
3085 static ssize_t
show_orig_dgid(struct device
*dev
,
3086 struct device_attribute
*attr
, char *buf
)
3088 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3090 if (target
->using_rdma_cm
)
3092 return sprintf(buf
, "%pI6\n", target
->ib_cm
.orig_dgid
.raw
);
3095 static ssize_t
show_req_lim(struct device
*dev
,
3096 struct device_attribute
*attr
, char *buf
)
3098 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3099 struct srp_rdma_ch
*ch
;
3100 int i
, req_lim
= INT_MAX
;
3102 for (i
= 0; i
< target
->ch_count
; i
++) {
3103 ch
= &target
->ch
[i
];
3104 req_lim
= min(req_lim
, ch
->req_lim
);
3106 return sprintf(buf
, "%d\n", req_lim
);
3109 static ssize_t
show_zero_req_lim(struct device
*dev
,
3110 struct device_attribute
*attr
, char *buf
)
3112 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3114 return sprintf(buf
, "%d\n", target
->zero_req_lim
);
3117 static ssize_t
show_local_ib_port(struct device
*dev
,
3118 struct device_attribute
*attr
, char *buf
)
3120 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3122 return sprintf(buf
, "%d\n", target
->srp_host
->port
);
3125 static ssize_t
show_local_ib_device(struct device
*dev
,
3126 struct device_attribute
*attr
, char *buf
)
3128 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3130 return sprintf(buf
, "%s\n", target
->srp_host
->srp_dev
->dev
->name
);
3133 static ssize_t
show_ch_count(struct device
*dev
, struct device_attribute
*attr
,
3136 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3138 return sprintf(buf
, "%d\n", target
->ch_count
);
3141 static ssize_t
show_comp_vector(struct device
*dev
,
3142 struct device_attribute
*attr
, char *buf
)
3144 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3146 return sprintf(buf
, "%d\n", target
->comp_vector
);
3149 static ssize_t
show_tl_retry_count(struct device
*dev
,
3150 struct device_attribute
*attr
, char *buf
)
3152 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3154 return sprintf(buf
, "%d\n", target
->tl_retry_count
);
3157 static ssize_t
show_cmd_sg_entries(struct device
*dev
,
3158 struct device_attribute
*attr
, char *buf
)
3160 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3162 return sprintf(buf
, "%u\n", target
->cmd_sg_cnt
);
3165 static ssize_t
show_allow_ext_sg(struct device
*dev
,
3166 struct device_attribute
*attr
, char *buf
)
3168 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3170 return sprintf(buf
, "%s\n", target
->allow_ext_sg
? "true" : "false");
3173 static DEVICE_ATTR(id_ext
, S_IRUGO
, show_id_ext
, NULL
);
3174 static DEVICE_ATTR(ioc_guid
, S_IRUGO
, show_ioc_guid
, NULL
);
3175 static DEVICE_ATTR(service_id
, S_IRUGO
, show_service_id
, NULL
);
3176 static DEVICE_ATTR(pkey
, S_IRUGO
, show_pkey
, NULL
);
3177 static DEVICE_ATTR(sgid
, S_IRUGO
, show_sgid
, NULL
);
3178 static DEVICE_ATTR(dgid
, S_IRUGO
, show_dgid
, NULL
);
3179 static DEVICE_ATTR(orig_dgid
, S_IRUGO
, show_orig_dgid
, NULL
);
3180 static DEVICE_ATTR(req_lim
, S_IRUGO
, show_req_lim
, NULL
);
3181 static DEVICE_ATTR(zero_req_lim
, S_IRUGO
, show_zero_req_lim
, NULL
);
3182 static DEVICE_ATTR(local_ib_port
, S_IRUGO
, show_local_ib_port
, NULL
);
3183 static DEVICE_ATTR(local_ib_device
, S_IRUGO
, show_local_ib_device
, NULL
);
3184 static DEVICE_ATTR(ch_count
, S_IRUGO
, show_ch_count
, NULL
);
3185 static DEVICE_ATTR(comp_vector
, S_IRUGO
, show_comp_vector
, NULL
);
3186 static DEVICE_ATTR(tl_retry_count
, S_IRUGO
, show_tl_retry_count
, NULL
);
3187 static DEVICE_ATTR(cmd_sg_entries
, S_IRUGO
, show_cmd_sg_entries
, NULL
);
3188 static DEVICE_ATTR(allow_ext_sg
, S_IRUGO
, show_allow_ext_sg
, NULL
);
3190 static struct device_attribute
*srp_host_attrs
[] = {
3193 &dev_attr_service_id
,
3197 &dev_attr_orig_dgid
,
3199 &dev_attr_zero_req_lim
,
3200 &dev_attr_local_ib_port
,
3201 &dev_attr_local_ib_device
,
3203 &dev_attr_comp_vector
,
3204 &dev_attr_tl_retry_count
,
3205 &dev_attr_cmd_sg_entries
,
3206 &dev_attr_allow_ext_sg
,
3210 static struct scsi_host_template srp_template
= {
3211 .module
= THIS_MODULE
,
3212 .name
= "InfiniBand SRP initiator",
3213 .proc_name
= DRV_NAME
,
3214 .target_alloc
= srp_target_alloc
,
3215 .slave_alloc
= srp_slave_alloc
,
3216 .slave_configure
= srp_slave_configure
,
3217 .info
= srp_target_info
,
3218 .queuecommand
= srp_queuecommand
,
3219 .change_queue_depth
= srp_change_queue_depth
,
3220 .eh_timed_out
= srp_timed_out
,
3221 .eh_abort_handler
= srp_abort
,
3222 .eh_device_reset_handler
= srp_reset_device
,
3223 .eh_host_reset_handler
= srp_reset_host
,
3224 .skip_settle_delay
= true,
3225 .sg_tablesize
= SRP_DEF_SG_TABLESIZE
,
3226 .can_queue
= SRP_DEFAULT_CMD_SQ_SIZE
,
3228 .cmd_per_lun
= SRP_DEFAULT_CMD_SQ_SIZE
,
3229 .use_clustering
= ENABLE_CLUSTERING
,
3230 .shost_attrs
= srp_host_attrs
,
3231 .track_queue_depth
= 1,
3234 static int srp_sdev_count(struct Scsi_Host
*host
)
3236 struct scsi_device
*sdev
;
3239 shost_for_each_device(sdev
, host
)
3247 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3248 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3249 * removal has been scheduled.
3250 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3252 static int srp_add_target(struct srp_host
*host
, struct srp_target_port
*target
)
3254 struct srp_rport_identifiers ids
;
3255 struct srp_rport
*rport
;
3257 target
->state
= SRP_TARGET_SCANNING
;
3258 sprintf(target
->target_name
, "SRP.T10:%016llX",
3259 be64_to_cpu(target
->id_ext
));
3261 if (scsi_add_host(target
->scsi_host
, host
->srp_dev
->dev
->dev
.parent
))
3264 memcpy(ids
.port_id
, &target
->id_ext
, 8);
3265 memcpy(ids
.port_id
+ 8, &target
->ioc_guid
, 8);
3266 ids
.roles
= SRP_RPORT_ROLE_TARGET
;
3267 rport
= srp_rport_add(target
->scsi_host
, &ids
);
3268 if (IS_ERR(rport
)) {
3269 scsi_remove_host(target
->scsi_host
);
3270 return PTR_ERR(rport
);
3273 rport
->lld_data
= target
;
3274 target
->rport
= rport
;
3276 spin_lock(&host
->target_lock
);
3277 list_add_tail(&target
->list
, &host
->target_list
);
3278 spin_unlock(&host
->target_lock
);
3280 scsi_scan_target(&target
->scsi_host
->shost_gendev
,
3281 0, target
->scsi_id
, SCAN_WILD_CARD
, SCSI_SCAN_INITIAL
);
3283 if (srp_connected_ch(target
) < target
->ch_count
||
3284 target
->qp_in_error
) {
3285 shost_printk(KERN_INFO
, target
->scsi_host
,
3286 PFX
"SCSI scan failed - removing SCSI host\n");
3287 srp_queue_remove_work(target
);
3291 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3292 dev_name(&target
->scsi_host
->shost_gendev
),
3293 srp_sdev_count(target
->scsi_host
));
3295 spin_lock_irq(&target
->lock
);
3296 if (target
->state
== SRP_TARGET_SCANNING
)
3297 target
->state
= SRP_TARGET_LIVE
;
3298 spin_unlock_irq(&target
->lock
);
3304 static void srp_release_dev(struct device
*dev
)
3306 struct srp_host
*host
=
3307 container_of(dev
, struct srp_host
, dev
);
3309 complete(&host
->released
);
3312 static struct class srp_class
= {
3313 .name
= "infiniband_srp",
3314 .dev_release
= srp_release_dev
3318 * srp_conn_unique() - check whether the connection to a target is unique
3320 * @target: SRP target port.
3322 static bool srp_conn_unique(struct srp_host
*host
,
3323 struct srp_target_port
*target
)
3325 struct srp_target_port
*t
;
3328 if (target
->state
== SRP_TARGET_REMOVED
)
3333 spin_lock(&host
->target_lock
);
3334 list_for_each_entry(t
, &host
->target_list
, list
) {
3336 target
->id_ext
== t
->id_ext
&&
3337 target
->ioc_guid
== t
->ioc_guid
&&
3338 target
->initiator_ext
== t
->initiator_ext
) {
3343 spin_unlock(&host
->target_lock
);
3350 * Target ports are added by writing
3352 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3353 * pkey=<P_Key>,service_id=<service ID>
3355 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3356 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3358 * to the add_target sysfs attribute.
3362 SRP_OPT_ID_EXT
= 1 << 0,
3363 SRP_OPT_IOC_GUID
= 1 << 1,
3364 SRP_OPT_DGID
= 1 << 2,
3365 SRP_OPT_PKEY
= 1 << 3,
3366 SRP_OPT_SERVICE_ID
= 1 << 4,
3367 SRP_OPT_MAX_SECT
= 1 << 5,
3368 SRP_OPT_MAX_CMD_PER_LUN
= 1 << 6,
3369 SRP_OPT_IO_CLASS
= 1 << 7,
3370 SRP_OPT_INITIATOR_EXT
= 1 << 8,
3371 SRP_OPT_CMD_SG_ENTRIES
= 1 << 9,
3372 SRP_OPT_ALLOW_EXT_SG
= 1 << 10,
3373 SRP_OPT_SG_TABLESIZE
= 1 << 11,
3374 SRP_OPT_COMP_VECTOR
= 1 << 12,
3375 SRP_OPT_TL_RETRY_COUNT
= 1 << 13,
3376 SRP_OPT_QUEUE_SIZE
= 1 << 14,
3377 SRP_OPT_IP_SRC
= 1 << 15,
3378 SRP_OPT_IP_DEST
= 1 << 16,
3379 SRP_OPT_TARGET_CAN_QUEUE
= 1 << 17,
3382 static unsigned int srp_opt_mandatory
[] = {
3393 static const match_table_t srp_opt_tokens
= {
3394 { SRP_OPT_ID_EXT
, "id_ext=%s" },
3395 { SRP_OPT_IOC_GUID
, "ioc_guid=%s" },
3396 { SRP_OPT_DGID
, "dgid=%s" },
3397 { SRP_OPT_PKEY
, "pkey=%x" },
3398 { SRP_OPT_SERVICE_ID
, "service_id=%s" },
3399 { SRP_OPT_MAX_SECT
, "max_sect=%d" },
3400 { SRP_OPT_MAX_CMD_PER_LUN
, "max_cmd_per_lun=%d" },
3401 { SRP_OPT_TARGET_CAN_QUEUE
, "target_can_queue=%d" },
3402 { SRP_OPT_IO_CLASS
, "io_class=%x" },
3403 { SRP_OPT_INITIATOR_EXT
, "initiator_ext=%s" },
3404 { SRP_OPT_CMD_SG_ENTRIES
, "cmd_sg_entries=%u" },
3405 { SRP_OPT_ALLOW_EXT_SG
, "allow_ext_sg=%u" },
3406 { SRP_OPT_SG_TABLESIZE
, "sg_tablesize=%u" },
3407 { SRP_OPT_COMP_VECTOR
, "comp_vector=%u" },
3408 { SRP_OPT_TL_RETRY_COUNT
, "tl_retry_count=%u" },
3409 { SRP_OPT_QUEUE_SIZE
, "queue_size=%d" },
3410 { SRP_OPT_IP_SRC
, "src=%s" },
3411 { SRP_OPT_IP_DEST
, "dest=%s" },
3412 { SRP_OPT_ERR
, NULL
}
3416 * srp_parse_in - parse an IP address and port number combination
3418 * Parse the following address formats:
3419 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3420 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3422 static int srp_parse_in(struct net
*net
, struct sockaddr_storage
*sa
,
3423 const char *addr_port_str
)
3425 char *addr_end
, *addr
= kstrdup(addr_port_str
, GFP_KERNEL
);
3431 port_str
= strrchr(addr
, ':');
3435 ret
= inet_pton_with_scope(net
, AF_INET
, addr
, port_str
, sa
);
3436 if (ret
&& addr
[0]) {
3437 addr_end
= addr
+ strlen(addr
) - 1;
3438 if (addr
[0] == '[' && *addr_end
== ']') {
3440 ret
= inet_pton_with_scope(net
, AF_INET6
, addr
+ 1,
3445 pr_debug("%s -> %pISpfsc\n", addr_port_str
, sa
);
3449 static int srp_parse_options(struct net
*net
, const char *buf
,
3450 struct srp_target_port
*target
)
3452 char *options
, *sep_opt
;
3454 substring_t args
[MAX_OPT_ARGS
];
3455 unsigned long long ull
;
3461 options
= kstrdup(buf
, GFP_KERNEL
);
3466 while ((p
= strsep(&sep_opt
, ",\n")) != NULL
) {
3470 token
= match_token(p
, srp_opt_tokens
, args
);
3474 case SRP_OPT_ID_EXT
:
3475 p
= match_strdup(args
);
3480 ret
= kstrtoull(p
, 16, &ull
);
3482 pr_warn("invalid id_ext parameter '%s'\n", p
);
3486 target
->id_ext
= cpu_to_be64(ull
);
3490 case SRP_OPT_IOC_GUID
:
3491 p
= match_strdup(args
);
3496 ret
= kstrtoull(p
, 16, &ull
);
3498 pr_warn("invalid ioc_guid parameter '%s'\n", p
);
3502 target
->ioc_guid
= cpu_to_be64(ull
);
3507 p
= match_strdup(args
);
3512 if (strlen(p
) != 32) {
3513 pr_warn("bad dest GID parameter '%s'\n", p
);
3518 ret
= hex2bin(target
->ib_cm
.orig_dgid
.raw
, p
, 16);
3525 if (match_hex(args
, &token
)) {
3526 pr_warn("bad P_Key parameter '%s'\n", p
);
3529 target
->ib_cm
.pkey
= cpu_to_be16(token
);
3532 case SRP_OPT_SERVICE_ID
:
3533 p
= match_strdup(args
);
3538 ret
= kstrtoull(p
, 16, &ull
);
3540 pr_warn("bad service_id parameter '%s'\n", p
);
3544 target
->ib_cm
.service_id
= cpu_to_be64(ull
);
3548 case SRP_OPT_IP_SRC
:
3549 p
= match_strdup(args
);
3554 ret
= srp_parse_in(net
, &target
->rdma_cm
.src
.ss
, p
);
3556 pr_warn("bad source parameter '%s'\n", p
);
3560 target
->rdma_cm
.src_specified
= true;
3564 case SRP_OPT_IP_DEST
:
3565 p
= match_strdup(args
);
3570 ret
= srp_parse_in(net
, &target
->rdma_cm
.dst
.ss
, p
);
3572 pr_warn("bad dest parameter '%s'\n", p
);
3576 target
->using_rdma_cm
= true;
3580 case SRP_OPT_MAX_SECT
:
3581 if (match_int(args
, &token
)) {
3582 pr_warn("bad max sect parameter '%s'\n", p
);
3585 target
->scsi_host
->max_sectors
= token
;
3588 case SRP_OPT_QUEUE_SIZE
:
3589 if (match_int(args
, &token
) || token
< 1) {
3590 pr_warn("bad queue_size parameter '%s'\n", p
);
3593 target
->scsi_host
->can_queue
= token
;
3594 target
->queue_size
= token
+ SRP_RSP_SQ_SIZE
+
3595 SRP_TSK_MGMT_SQ_SIZE
;
3596 if (!(opt_mask
& SRP_OPT_MAX_CMD_PER_LUN
))
3597 target
->scsi_host
->cmd_per_lun
= token
;
3600 case SRP_OPT_MAX_CMD_PER_LUN
:
3601 if (match_int(args
, &token
) || token
< 1) {
3602 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3606 target
->scsi_host
->cmd_per_lun
= token
;
3609 case SRP_OPT_TARGET_CAN_QUEUE
:
3610 if (match_int(args
, &token
) || token
< 1) {
3611 pr_warn("bad max target_can_queue parameter '%s'\n",
3615 target
->target_can_queue
= token
;
3618 case SRP_OPT_IO_CLASS
:
3619 if (match_hex(args
, &token
)) {
3620 pr_warn("bad IO class parameter '%s'\n", p
);
3623 if (token
!= SRP_REV10_IB_IO_CLASS
&&
3624 token
!= SRP_REV16A_IB_IO_CLASS
) {
3625 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3626 token
, SRP_REV10_IB_IO_CLASS
,
3627 SRP_REV16A_IB_IO_CLASS
);
3630 target
->io_class
= token
;
3633 case SRP_OPT_INITIATOR_EXT
:
3634 p
= match_strdup(args
);
3639 ret
= kstrtoull(p
, 16, &ull
);
3641 pr_warn("bad initiator_ext value '%s'\n", p
);
3645 target
->initiator_ext
= cpu_to_be64(ull
);
3649 case SRP_OPT_CMD_SG_ENTRIES
:
3650 if (match_int(args
, &token
) || token
< 1 || token
> 255) {
3651 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3655 target
->cmd_sg_cnt
= token
;
3658 case SRP_OPT_ALLOW_EXT_SG
:
3659 if (match_int(args
, &token
)) {
3660 pr_warn("bad allow_ext_sg parameter '%s'\n", p
);
3663 target
->allow_ext_sg
= !!token
;
3666 case SRP_OPT_SG_TABLESIZE
:
3667 if (match_int(args
, &token
) || token
< 1 ||
3668 token
> SG_MAX_SEGMENTS
) {
3669 pr_warn("bad max sg_tablesize parameter '%s'\n",
3673 target
->sg_tablesize
= token
;
3676 case SRP_OPT_COMP_VECTOR
:
3677 if (match_int(args
, &token
) || token
< 0) {
3678 pr_warn("bad comp_vector parameter '%s'\n", p
);
3681 target
->comp_vector
= token
;
3684 case SRP_OPT_TL_RETRY_COUNT
:
3685 if (match_int(args
, &token
) || token
< 2 || token
> 7) {
3686 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3690 target
->tl_retry_count
= token
;
3694 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3700 for (i
= 0; i
< ARRAY_SIZE(srp_opt_mandatory
); i
++) {
3701 if ((opt_mask
& srp_opt_mandatory
[i
]) == srp_opt_mandatory
[i
]) {
3707 pr_warn("target creation request is missing one or more parameters\n");
3709 if (target
->scsi_host
->cmd_per_lun
> target
->scsi_host
->can_queue
3710 && (opt_mask
& SRP_OPT_MAX_CMD_PER_LUN
))
3711 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3712 target
->scsi_host
->cmd_per_lun
,
3713 target
->scsi_host
->can_queue
);
3720 static ssize_t
srp_create_target(struct device
*dev
,
3721 struct device_attribute
*attr
,
3722 const char *buf
, size_t count
)
3724 struct srp_host
*host
=
3725 container_of(dev
, struct srp_host
, dev
);
3726 struct Scsi_Host
*target_host
;
3727 struct srp_target_port
*target
;
3728 struct srp_rdma_ch
*ch
;
3729 struct srp_device
*srp_dev
= host
->srp_dev
;
3730 struct ib_device
*ibdev
= srp_dev
->dev
;
3731 int ret
, node_idx
, node
, cpu
, i
;
3732 unsigned int max_sectors_per_mr
, mr_per_cmd
= 0;
3733 bool multich
= false;
3735 target_host
= scsi_host_alloc(&srp_template
,
3736 sizeof (struct srp_target_port
));
3740 target_host
->transportt
= ib_srp_transport_template
;
3741 target_host
->max_channel
= 0;
3742 target_host
->max_id
= 1;
3743 target_host
->max_lun
= -1LL;
3744 target_host
->max_cmd_len
= sizeof ((struct srp_cmd
*) (void *) 0L)->cdb
;
3746 target
= host_to_target(target_host
);
3748 target
->net
= kobj_ns_grab_current(KOBJ_NS_TYPE_NET
);
3749 target
->io_class
= SRP_REV16A_IB_IO_CLASS
;
3750 target
->scsi_host
= target_host
;
3751 target
->srp_host
= host
;
3752 target
->lkey
= host
->srp_dev
->pd
->local_dma_lkey
;
3753 target
->global_rkey
= host
->srp_dev
->global_rkey
;
3754 target
->cmd_sg_cnt
= cmd_sg_entries
;
3755 target
->sg_tablesize
= indirect_sg_entries
? : cmd_sg_entries
;
3756 target
->allow_ext_sg
= allow_ext_sg
;
3757 target
->tl_retry_count
= 7;
3758 target
->queue_size
= SRP_DEFAULT_QUEUE_SIZE
;
3761 * Avoid that the SCSI host can be removed by srp_remove_target()
3762 * before this function returns.
3764 scsi_host_get(target
->scsi_host
);
3766 ret
= mutex_lock_interruptible(&host
->add_target_mutex
);
3770 ret
= srp_parse_options(target
->net
, buf
, target
);
3774 target
->req_ring_size
= target
->queue_size
- SRP_TSK_MGMT_SQ_SIZE
;
3776 if (!srp_conn_unique(target
->srp_host
, target
)) {
3777 if (target
->using_rdma_cm
) {
3778 shost_printk(KERN_INFO
, target
->scsi_host
,
3779 PFX
"Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3780 be64_to_cpu(target
->id_ext
),
3781 be64_to_cpu(target
->ioc_guid
),
3782 &target
->rdma_cm
.dst
);
3784 shost_printk(KERN_INFO
, target
->scsi_host
,
3785 PFX
"Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3786 be64_to_cpu(target
->id_ext
),
3787 be64_to_cpu(target
->ioc_guid
),
3788 be64_to_cpu(target
->initiator_ext
));
3794 if (!srp_dev
->has_fmr
&& !srp_dev
->has_fr
&& !target
->allow_ext_sg
&&
3795 target
->cmd_sg_cnt
< target
->sg_tablesize
) {
3796 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3797 target
->sg_tablesize
= target
->cmd_sg_cnt
;
3800 if (srp_dev
->use_fast_reg
|| srp_dev
->use_fmr
) {
3801 bool gaps_reg
= (ibdev
->attrs
.device_cap_flags
&
3802 IB_DEVICE_SG_GAPS_REG
);
3804 max_sectors_per_mr
= srp_dev
->max_pages_per_mr
<<
3805 (ilog2(srp_dev
->mr_page_size
) - 9);
3808 * FR and FMR can only map one HCA page per entry. If
3809 * the start address is not aligned on a HCA page
3810 * boundary two entries will be used for the head and
3811 * the tail although these two entries combined
3812 * contain at most one HCA page of data. Hence the "+
3813 * 1" in the calculation below.
3815 * The indirect data buffer descriptor is contiguous
3816 * so the memory for that buffer will only be
3817 * registered if register_always is true. Hence add
3818 * one to mr_per_cmd if register_always has been set.
3820 mr_per_cmd
= register_always
+
3821 (target
->scsi_host
->max_sectors
+ 1 +
3822 max_sectors_per_mr
- 1) / max_sectors_per_mr
;
3824 mr_per_cmd
= register_always
+
3825 (target
->sg_tablesize
+
3826 srp_dev
->max_pages_per_mr
- 1) /
3827 srp_dev
->max_pages_per_mr
;
3829 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3830 target
->scsi_host
->max_sectors
, srp_dev
->max_pages_per_mr
, srp_dev
->mr_page_size
,
3831 max_sectors_per_mr
, mr_per_cmd
);
3834 target_host
->sg_tablesize
= target
->sg_tablesize
;
3835 target
->mr_pool_size
= target
->scsi_host
->can_queue
* mr_per_cmd
;
3836 target
->mr_per_cmd
= mr_per_cmd
;
3837 target
->indirect_size
= target
->sg_tablesize
*
3838 sizeof (struct srp_direct_buf
);
3839 target
->max_iu_len
= sizeof (struct srp_cmd
) +
3840 sizeof (struct srp_indirect_buf
) +
3841 target
->cmd_sg_cnt
* sizeof (struct srp_direct_buf
);
3843 INIT_WORK(&target
->tl_err_work
, srp_tl_err_work
);
3844 INIT_WORK(&target
->remove_work
, srp_remove_work
);
3845 spin_lock_init(&target
->lock
);
3846 ret
= ib_query_gid(ibdev
, host
->port
, 0, &target
->sgid
, NULL
);
3851 target
->ch_count
= max_t(unsigned, num_online_nodes(),
3853 min(4 * num_online_nodes(),
3854 ibdev
->num_comp_vectors
),
3855 num_online_cpus()));
3856 target
->ch
= kcalloc(target
->ch_count
, sizeof(*target
->ch
),
3862 for_each_online_node(node
) {
3863 const int ch_start
= (node_idx
* target
->ch_count
/
3864 num_online_nodes());
3865 const int ch_end
= ((node_idx
+ 1) * target
->ch_count
/
3866 num_online_nodes());
3867 const int cv_start
= node_idx
* ibdev
->num_comp_vectors
/
3869 const int cv_end
= (node_idx
+ 1) * ibdev
->num_comp_vectors
/
3873 for_each_online_cpu(cpu
) {
3874 if (cpu_to_node(cpu
) != node
)
3876 if (ch_start
+ cpu_idx
>= ch_end
)
3878 ch
= &target
->ch
[ch_start
+ cpu_idx
];
3879 ch
->target
= target
;
3880 ch
->comp_vector
= cv_start
== cv_end
? cv_start
:
3881 cv_start
+ cpu_idx
% (cv_end
- cv_start
);
3882 spin_lock_init(&ch
->lock
);
3883 INIT_LIST_HEAD(&ch
->free_tx
);
3884 ret
= srp_new_cm_id(ch
);
3886 goto err_disconnect
;
3888 ret
= srp_create_ch_ib(ch
);
3890 goto err_disconnect
;
3892 ret
= srp_alloc_req_data(ch
);
3894 goto err_disconnect
;
3896 ret
= srp_connect_ch(ch
, multich
);
3900 if (target
->using_rdma_cm
)
3901 snprintf(dst
, sizeof(dst
), "%pIS",
3902 &target
->rdma_cm
.dst
);
3904 snprintf(dst
, sizeof(dst
), "%pI6",
3905 target
->ib_cm
.orig_dgid
.raw
);
3906 shost_printk(KERN_ERR
, target
->scsi_host
,
3907 PFX
"Connection %d/%d to %s failed\n",
3909 target
->ch_count
, dst
);
3910 if (node_idx
== 0 && cpu_idx
== 0) {
3913 srp_free_ch_ib(target
, ch
);
3914 srp_free_req_data(target
, ch
);
3915 target
->ch_count
= ch
- target
->ch
;
3927 target
->scsi_host
->nr_hw_queues
= target
->ch_count
;
3929 ret
= srp_add_target(host
, target
);
3931 goto err_disconnect
;
3933 if (target
->state
!= SRP_TARGET_REMOVED
) {
3934 if (target
->using_rdma_cm
) {
3935 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
3936 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3937 be64_to_cpu(target
->id_ext
),
3938 be64_to_cpu(target
->ioc_guid
),
3939 target
->sgid
.raw
, &target
->rdma_cm
.dst
);
3941 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
3942 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3943 be64_to_cpu(target
->id_ext
),
3944 be64_to_cpu(target
->ioc_guid
),
3945 be16_to_cpu(target
->ib_cm
.pkey
),
3946 be64_to_cpu(target
->ib_cm
.service_id
),
3948 target
->ib_cm
.orig_dgid
.raw
);
3955 mutex_unlock(&host
->add_target_mutex
);
3958 scsi_host_put(target
->scsi_host
);
3961 * If a call to srp_remove_target() has not been scheduled,
3962 * drop the network namespace reference now that was obtained
3963 * earlier in this function.
3965 if (target
->state
!= SRP_TARGET_REMOVED
)
3966 kobj_ns_drop(KOBJ_NS_TYPE_NET
, target
->net
);
3967 scsi_host_put(target
->scsi_host
);
3973 srp_disconnect_target(target
);
3976 for (i
= 0; i
< target
->ch_count
; i
++) {
3977 ch
= &target
->ch
[i
];
3978 srp_free_ch_ib(target
, ch
);
3979 srp_free_req_data(target
, ch
);
3986 static DEVICE_ATTR(add_target
, S_IWUSR
, NULL
, srp_create_target
);
3988 static ssize_t
show_ibdev(struct device
*dev
, struct device_attribute
*attr
,
3991 struct srp_host
*host
= container_of(dev
, struct srp_host
, dev
);
3993 return sprintf(buf
, "%s\n", host
->srp_dev
->dev
->name
);
3996 static DEVICE_ATTR(ibdev
, S_IRUGO
, show_ibdev
, NULL
);
3998 static ssize_t
show_port(struct device
*dev
, struct device_attribute
*attr
,
4001 struct srp_host
*host
= container_of(dev
, struct srp_host
, dev
);
4003 return sprintf(buf
, "%d\n", host
->port
);
4006 static DEVICE_ATTR(port
, S_IRUGO
, show_port
, NULL
);
4008 static struct srp_host
*srp_add_port(struct srp_device
*device
, u8 port
)
4010 struct srp_host
*host
;
4012 host
= kzalloc(sizeof *host
, GFP_KERNEL
);
4016 INIT_LIST_HEAD(&host
->target_list
);
4017 spin_lock_init(&host
->target_lock
);
4018 init_completion(&host
->released
);
4019 mutex_init(&host
->add_target_mutex
);
4020 host
->srp_dev
= device
;
4023 host
->dev
.class = &srp_class
;
4024 host
->dev
.parent
= device
->dev
->dev
.parent
;
4025 dev_set_name(&host
->dev
, "srp-%s-%d", device
->dev
->name
, port
);
4027 if (device_register(&host
->dev
))
4029 if (device_create_file(&host
->dev
, &dev_attr_add_target
))
4031 if (device_create_file(&host
->dev
, &dev_attr_ibdev
))
4033 if (device_create_file(&host
->dev
, &dev_attr_port
))
4039 device_unregister(&host
->dev
);
4047 static void srp_add_one(struct ib_device
*device
)
4049 struct srp_device
*srp_dev
;
4050 struct ib_device_attr
*attr
= &device
->attrs
;
4051 struct srp_host
*host
;
4052 int mr_page_shift
, p
;
4053 u64 max_pages_per_mr
;
4054 unsigned int flags
= 0;
4056 srp_dev
= kzalloc(sizeof(*srp_dev
), GFP_KERNEL
);
4061 * Use the smallest page size supported by the HCA, down to a
4062 * minimum of 4096 bytes. We're unlikely to build large sglists
4063 * out of smaller entries.
4065 mr_page_shift
= max(12, ffs(attr
->page_size_cap
) - 1);
4066 srp_dev
->mr_page_size
= 1 << mr_page_shift
;
4067 srp_dev
->mr_page_mask
= ~((u64
) srp_dev
->mr_page_size
- 1);
4068 max_pages_per_mr
= attr
->max_mr_size
;
4069 do_div(max_pages_per_mr
, srp_dev
->mr_page_size
);
4070 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__
,
4071 attr
->max_mr_size
, srp_dev
->mr_page_size
,
4072 max_pages_per_mr
, SRP_MAX_PAGES_PER_MR
);
4073 srp_dev
->max_pages_per_mr
= min_t(u64
, SRP_MAX_PAGES_PER_MR
,
4076 srp_dev
->has_fmr
= (device
->alloc_fmr
&& device
->dealloc_fmr
&&
4077 device
->map_phys_fmr
&& device
->unmap_fmr
);
4078 srp_dev
->has_fr
= (attr
->device_cap_flags
&
4079 IB_DEVICE_MEM_MGT_EXTENSIONS
);
4080 if (!never_register
&& !srp_dev
->has_fmr
&& !srp_dev
->has_fr
) {
4081 dev_warn(&device
->dev
, "neither FMR nor FR is supported\n");
4082 } else if (!never_register
&&
4083 attr
->max_mr_size
>= 2 * srp_dev
->mr_page_size
) {
4084 srp_dev
->use_fast_reg
= (srp_dev
->has_fr
&&
4085 (!srp_dev
->has_fmr
|| prefer_fr
));
4086 srp_dev
->use_fmr
= !srp_dev
->use_fast_reg
&& srp_dev
->has_fmr
;
4089 if (never_register
|| !register_always
||
4090 (!srp_dev
->has_fmr
&& !srp_dev
->has_fr
))
4091 flags
|= IB_PD_UNSAFE_GLOBAL_RKEY
;
4093 if (srp_dev
->use_fast_reg
) {
4094 srp_dev
->max_pages_per_mr
=
4095 min_t(u32
, srp_dev
->max_pages_per_mr
,
4096 attr
->max_fast_reg_page_list_len
);
4098 srp_dev
->mr_max_size
= srp_dev
->mr_page_size
*
4099 srp_dev
->max_pages_per_mr
;
4100 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4101 device
->name
, mr_page_shift
, attr
->max_mr_size
,
4102 attr
->max_fast_reg_page_list_len
,
4103 srp_dev
->max_pages_per_mr
, srp_dev
->mr_max_size
);
4105 INIT_LIST_HEAD(&srp_dev
->dev_list
);
4107 srp_dev
->dev
= device
;
4108 srp_dev
->pd
= ib_alloc_pd(device
, flags
);
4109 if (IS_ERR(srp_dev
->pd
))
4112 if (flags
& IB_PD_UNSAFE_GLOBAL_RKEY
) {
4113 srp_dev
->global_rkey
= srp_dev
->pd
->unsafe_global_rkey
;
4114 WARN_ON_ONCE(srp_dev
->global_rkey
== 0);
4117 for (p
= rdma_start_port(device
); p
<= rdma_end_port(device
); ++p
) {
4118 host
= srp_add_port(srp_dev
, p
);
4120 list_add_tail(&host
->list
, &srp_dev
->dev_list
);
4123 ib_set_client_data(device
, &srp_client
, srp_dev
);
4130 static void srp_remove_one(struct ib_device
*device
, void *client_data
)
4132 struct srp_device
*srp_dev
;
4133 struct srp_host
*host
, *tmp_host
;
4134 struct srp_target_port
*target
;
4136 srp_dev
= client_data
;
4140 list_for_each_entry_safe(host
, tmp_host
, &srp_dev
->dev_list
, list
) {
4141 device_unregister(&host
->dev
);
4143 * Wait for the sysfs entry to go away, so that no new
4144 * target ports can be created.
4146 wait_for_completion(&host
->released
);
4149 * Remove all target ports.
4151 spin_lock(&host
->target_lock
);
4152 list_for_each_entry(target
, &host
->target_list
, list
)
4153 srp_queue_remove_work(target
);
4154 spin_unlock(&host
->target_lock
);
4157 * Wait for tl_err and target port removal tasks.
4159 flush_workqueue(system_long_wq
);
4160 flush_workqueue(srp_remove_wq
);
4165 ib_dealloc_pd(srp_dev
->pd
);
4170 static struct srp_function_template ib_srp_transport_functions
= {
4171 .has_rport_state
= true,
4172 .reset_timer_if_blocked
= true,
4173 .reconnect_delay
= &srp_reconnect_delay
,
4174 .fast_io_fail_tmo
= &srp_fast_io_fail_tmo
,
4175 .dev_loss_tmo
= &srp_dev_loss_tmo
,
4176 .reconnect
= srp_rport_reconnect
,
4177 .rport_delete
= srp_rport_delete
,
4178 .terminate_rport_io
= srp_terminate_io
,
4181 static int __init
srp_init_module(void)
4185 if (srp_sg_tablesize
) {
4186 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4187 if (!cmd_sg_entries
)
4188 cmd_sg_entries
= srp_sg_tablesize
;
4191 if (!cmd_sg_entries
)
4192 cmd_sg_entries
= SRP_DEF_SG_TABLESIZE
;
4194 if (cmd_sg_entries
> 255) {
4195 pr_warn("Clamping cmd_sg_entries to 255\n");
4196 cmd_sg_entries
= 255;
4199 if (!indirect_sg_entries
)
4200 indirect_sg_entries
= cmd_sg_entries
;
4201 else if (indirect_sg_entries
< cmd_sg_entries
) {
4202 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4204 indirect_sg_entries
= cmd_sg_entries
;
4207 if (indirect_sg_entries
> SG_MAX_SEGMENTS
) {
4208 pr_warn("Clamping indirect_sg_entries to %u\n",
4210 indirect_sg_entries
= SG_MAX_SEGMENTS
;
4213 srp_remove_wq
= create_workqueue("srp_remove");
4214 if (!srp_remove_wq
) {
4220 ib_srp_transport_template
=
4221 srp_attach_transport(&ib_srp_transport_functions
);
4222 if (!ib_srp_transport_template
)
4225 ret
= class_register(&srp_class
);
4227 pr_err("couldn't register class infiniband_srp\n");
4231 ib_sa_register_client(&srp_sa_client
);
4233 ret
= ib_register_client(&srp_client
);
4235 pr_err("couldn't register IB client\n");
4243 ib_sa_unregister_client(&srp_sa_client
);
4244 class_unregister(&srp_class
);
4247 srp_release_transport(ib_srp_transport_template
);
4250 destroy_workqueue(srp_remove_wq
);
4254 static void __exit
srp_cleanup_module(void)
4256 ib_unregister_client(&srp_client
);
4257 ib_sa_unregister_client(&srp_sa_client
);
4258 class_unregister(&srp_class
);
4259 srp_release_transport(ib_srp_transport_template
);
4260 destroy_workqueue(srp_remove_wq
);
4263 module_init(srp_init_module
);
4264 module_exit(srp_cleanup_module
);