2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
47 #include <linux/atomic.h>
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
54 #include <scsi/scsi_transport_srp.h>
58 #define DRV_NAME "ib_srp"
59 #define PFX DRV_NAME ": "
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
65 #if !defined(CONFIG_DYNAMIC_DEBUG)
66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
70 static unsigned int srp_sg_tablesize
;
71 static unsigned int cmd_sg_entries
;
72 static unsigned int indirect_sg_entries
;
73 static bool allow_ext_sg
;
74 static bool prefer_fr
= true;
75 static bool register_always
= true;
76 static bool never_register
;
77 static int topspin_workarounds
= 1;
79 module_param(srp_sg_tablesize
, uint
, 0444);
80 MODULE_PARM_DESC(srp_sg_tablesize
, "Deprecated name for cmd_sg_entries");
82 module_param(cmd_sg_entries
, uint
, 0444);
83 MODULE_PARM_DESC(cmd_sg_entries
,
84 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
86 module_param(indirect_sg_entries
, uint
, 0444);
87 MODULE_PARM_DESC(indirect_sg_entries
,
88 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS
) ")");
90 module_param(allow_ext_sg
, bool, 0444);
91 MODULE_PARM_DESC(allow_ext_sg
,
92 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
94 module_param(topspin_workarounds
, int, 0444);
95 MODULE_PARM_DESC(topspin_workarounds
,
96 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
98 module_param(prefer_fr
, bool, 0444);
99 MODULE_PARM_DESC(prefer_fr
,
100 "Whether to use fast registration if both FMR and fast registration are supported");
102 module_param(register_always
, bool, 0444);
103 MODULE_PARM_DESC(register_always
,
104 "Use memory registration even for contiguous memory regions");
106 module_param(never_register
, bool, 0444);
107 MODULE_PARM_DESC(never_register
, "Never register memory");
109 static const struct kernel_param_ops srp_tmo_ops
;
111 static int srp_reconnect_delay
= 10;
112 module_param_cb(reconnect_delay
, &srp_tmo_ops
, &srp_reconnect_delay
,
114 MODULE_PARM_DESC(reconnect_delay
, "Time between successive reconnect attempts");
116 static int srp_fast_io_fail_tmo
= 15;
117 module_param_cb(fast_io_fail_tmo
, &srp_tmo_ops
, &srp_fast_io_fail_tmo
,
119 MODULE_PARM_DESC(fast_io_fail_tmo
,
120 "Number of seconds between the observation of a transport"
121 " layer error and failing all I/O. \"off\" means that this"
122 " functionality is disabled.");
124 static int srp_dev_loss_tmo
= 600;
125 module_param_cb(dev_loss_tmo
, &srp_tmo_ops
, &srp_dev_loss_tmo
,
127 MODULE_PARM_DESC(dev_loss_tmo
,
128 "Maximum number of seconds that the SRP transport should"
129 " insulate transport layer errors. After this time has been"
130 " exceeded the SCSI host is removed. Should be"
131 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT
)
132 " if fast_io_fail_tmo has not been set. \"off\" means that"
133 " this functionality is disabled.");
135 static unsigned ch_count
;
136 module_param(ch_count
, uint
, 0444);
137 MODULE_PARM_DESC(ch_count
,
138 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
140 static void srp_add_one(struct ib_device
*device
);
141 static void srp_remove_one(struct ib_device
*device
, void *client_data
);
142 static void srp_recv_done(struct ib_cq
*cq
, struct ib_wc
*wc
);
143 static void srp_handle_qp_err(struct ib_cq
*cq
, struct ib_wc
*wc
,
145 static int srp_ib_cm_handler(struct ib_cm_id
*cm_id
,
146 const struct ib_cm_event
*event
);
147 static int srp_rdma_cm_handler(struct rdma_cm_id
*cm_id
,
148 struct rdma_cm_event
*event
);
150 static struct scsi_transport_template
*ib_srp_transport_template
;
151 static struct workqueue_struct
*srp_remove_wq
;
153 static struct ib_client srp_client
= {
156 .remove
= srp_remove_one
159 static struct ib_sa_client srp_sa_client
;
161 static int srp_tmo_get(char *buffer
, const struct kernel_param
*kp
)
163 int tmo
= *(int *)kp
->arg
;
166 return sprintf(buffer
, "%d", tmo
);
168 return sprintf(buffer
, "off");
171 static int srp_tmo_set(const char *val
, const struct kernel_param
*kp
)
175 res
= srp_parse_tmo(&tmo
, val
);
179 if (kp
->arg
== &srp_reconnect_delay
)
180 res
= srp_tmo_valid(tmo
, srp_fast_io_fail_tmo
,
182 else if (kp
->arg
== &srp_fast_io_fail_tmo
)
183 res
= srp_tmo_valid(srp_reconnect_delay
, tmo
, srp_dev_loss_tmo
);
185 res
= srp_tmo_valid(srp_reconnect_delay
, srp_fast_io_fail_tmo
,
189 *(int *)kp
->arg
= tmo
;
195 static const struct kernel_param_ops srp_tmo_ops
= {
200 static inline struct srp_target_port
*host_to_target(struct Scsi_Host
*host
)
202 return (struct srp_target_port
*) host
->hostdata
;
205 static const char *srp_target_info(struct Scsi_Host
*host
)
207 return host_to_target(host
)->target_name
;
210 static int srp_target_is_topspin(struct srp_target_port
*target
)
212 static const u8 topspin_oui
[3] = { 0x00, 0x05, 0xad };
213 static const u8 cisco_oui
[3] = { 0x00, 0x1b, 0x0d };
215 return topspin_workarounds
&&
216 (!memcmp(&target
->ioc_guid
, topspin_oui
, sizeof topspin_oui
) ||
217 !memcmp(&target
->ioc_guid
, cisco_oui
, sizeof cisco_oui
));
220 static struct srp_iu
*srp_alloc_iu(struct srp_host
*host
, size_t size
,
222 enum dma_data_direction direction
)
226 iu
= kmalloc(sizeof *iu
, gfp_mask
);
230 iu
->buf
= kzalloc(size
, gfp_mask
);
234 iu
->dma
= ib_dma_map_single(host
->srp_dev
->dev
, iu
->buf
, size
,
236 if (ib_dma_mapping_error(host
->srp_dev
->dev
, iu
->dma
))
240 iu
->direction
= direction
;
252 static void srp_free_iu(struct srp_host
*host
, struct srp_iu
*iu
)
257 ib_dma_unmap_single(host
->srp_dev
->dev
, iu
->dma
, iu
->size
,
263 static void srp_qp_event(struct ib_event
*event
, void *context
)
265 pr_debug("QP event %s (%d)\n",
266 ib_event_msg(event
->event
), event
->event
);
269 static int srp_init_ib_qp(struct srp_target_port
*target
,
272 struct ib_qp_attr
*attr
;
275 attr
= kmalloc(sizeof *attr
, GFP_KERNEL
);
279 ret
= ib_find_cached_pkey(target
->srp_host
->srp_dev
->dev
,
280 target
->srp_host
->port
,
281 be16_to_cpu(target
->ib_cm
.pkey
),
286 attr
->qp_state
= IB_QPS_INIT
;
287 attr
->qp_access_flags
= (IB_ACCESS_REMOTE_READ
|
288 IB_ACCESS_REMOTE_WRITE
);
289 attr
->port_num
= target
->srp_host
->port
;
291 ret
= ib_modify_qp(qp
, attr
,
302 static int srp_new_ib_cm_id(struct srp_rdma_ch
*ch
)
304 struct srp_target_port
*target
= ch
->target
;
305 struct ib_cm_id
*new_cm_id
;
307 new_cm_id
= ib_create_cm_id(target
->srp_host
->srp_dev
->dev
,
308 srp_ib_cm_handler
, ch
);
309 if (IS_ERR(new_cm_id
))
310 return PTR_ERR(new_cm_id
);
313 ib_destroy_cm_id(ch
->ib_cm
.cm_id
);
314 ch
->ib_cm
.cm_id
= new_cm_id
;
315 if (rdma_cap_opa_ah(target
->srp_host
->srp_dev
->dev
,
316 target
->srp_host
->port
))
317 ch
->ib_cm
.path
.rec_type
= SA_PATH_REC_TYPE_OPA
;
319 ch
->ib_cm
.path
.rec_type
= SA_PATH_REC_TYPE_IB
;
320 ch
->ib_cm
.path
.sgid
= target
->sgid
;
321 ch
->ib_cm
.path
.dgid
= target
->ib_cm
.orig_dgid
;
322 ch
->ib_cm
.path
.pkey
= target
->ib_cm
.pkey
;
323 ch
->ib_cm
.path
.service_id
= target
->ib_cm
.service_id
;
328 static int srp_new_rdma_cm_id(struct srp_rdma_ch
*ch
)
330 struct srp_target_port
*target
= ch
->target
;
331 struct rdma_cm_id
*new_cm_id
;
334 new_cm_id
= rdma_create_id(target
->net
, srp_rdma_cm_handler
, ch
,
335 RDMA_PS_TCP
, IB_QPT_RC
);
336 if (IS_ERR(new_cm_id
)) {
337 ret
= PTR_ERR(new_cm_id
);
342 init_completion(&ch
->done
);
343 ret
= rdma_resolve_addr(new_cm_id
, target
->rdma_cm
.src_specified
?
344 (struct sockaddr
*)&target
->rdma_cm
.src
: NULL
,
345 (struct sockaddr
*)&target
->rdma_cm
.dst
,
346 SRP_PATH_REC_TIMEOUT_MS
);
348 pr_err("No route available from %pIS to %pIS (%d)\n",
349 &target
->rdma_cm
.src
, &target
->rdma_cm
.dst
, ret
);
352 ret
= wait_for_completion_interruptible(&ch
->done
);
358 pr_err("Resolving address %pIS failed (%d)\n",
359 &target
->rdma_cm
.dst
, ret
);
363 swap(ch
->rdma_cm
.cm_id
, new_cm_id
);
367 rdma_destroy_id(new_cm_id
);
372 static int srp_new_cm_id(struct srp_rdma_ch
*ch
)
374 struct srp_target_port
*target
= ch
->target
;
376 return target
->using_rdma_cm
? srp_new_rdma_cm_id(ch
) :
377 srp_new_ib_cm_id(ch
);
380 static struct ib_fmr_pool
*srp_alloc_fmr_pool(struct srp_target_port
*target
)
382 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
383 struct ib_fmr_pool_param fmr_param
;
385 memset(&fmr_param
, 0, sizeof(fmr_param
));
386 fmr_param
.pool_size
= target
->mr_pool_size
;
387 fmr_param
.dirty_watermark
= fmr_param
.pool_size
/ 4;
389 fmr_param
.max_pages_per_fmr
= dev
->max_pages_per_mr
;
390 fmr_param
.page_shift
= ilog2(dev
->mr_page_size
);
391 fmr_param
.access
= (IB_ACCESS_LOCAL_WRITE
|
392 IB_ACCESS_REMOTE_WRITE
|
393 IB_ACCESS_REMOTE_READ
);
395 return ib_create_fmr_pool(dev
->pd
, &fmr_param
);
399 * srp_destroy_fr_pool() - free the resources owned by a pool
400 * @pool: Fast registration pool to be destroyed.
402 static void srp_destroy_fr_pool(struct srp_fr_pool
*pool
)
405 struct srp_fr_desc
*d
;
410 for (i
= 0, d
= &pool
->desc
[0]; i
< pool
->size
; i
++, d
++) {
418 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
419 * @device: IB device to allocate fast registration descriptors for.
420 * @pd: Protection domain associated with the FR descriptors.
421 * @pool_size: Number of descriptors to allocate.
422 * @max_page_list_len: Maximum fast registration work request page list length.
424 static struct srp_fr_pool
*srp_create_fr_pool(struct ib_device
*device
,
425 struct ib_pd
*pd
, int pool_size
,
426 int max_page_list_len
)
428 struct srp_fr_pool
*pool
;
429 struct srp_fr_desc
*d
;
431 int i
, ret
= -EINVAL
;
432 enum ib_mr_type mr_type
;
437 pool
= kzalloc(sizeof(struct srp_fr_pool
) +
438 pool_size
* sizeof(struct srp_fr_desc
), GFP_KERNEL
);
441 pool
->size
= pool_size
;
442 pool
->max_page_list_len
= max_page_list_len
;
443 spin_lock_init(&pool
->lock
);
444 INIT_LIST_HEAD(&pool
->free_list
);
446 if (device
->attrs
.device_cap_flags
& IB_DEVICE_SG_GAPS_REG
)
447 mr_type
= IB_MR_TYPE_SG_GAPS
;
449 mr_type
= IB_MR_TYPE_MEM_REG
;
451 for (i
= 0, d
= &pool
->desc
[0]; i
< pool
->size
; i
++, d
++) {
452 mr
= ib_alloc_mr(pd
, mr_type
, max_page_list_len
);
456 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
457 dev_name(&device
->dev
));
461 list_add_tail(&d
->entry
, &pool
->free_list
);
468 srp_destroy_fr_pool(pool
);
476 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
477 * @pool: Pool to obtain descriptor from.
479 static struct srp_fr_desc
*srp_fr_pool_get(struct srp_fr_pool
*pool
)
481 struct srp_fr_desc
*d
= NULL
;
484 spin_lock_irqsave(&pool
->lock
, flags
);
485 if (!list_empty(&pool
->free_list
)) {
486 d
= list_first_entry(&pool
->free_list
, typeof(*d
), entry
);
489 spin_unlock_irqrestore(&pool
->lock
, flags
);
495 * srp_fr_pool_put() - put an FR descriptor back in the free list
496 * @pool: Pool the descriptor was allocated from.
497 * @desc: Pointer to an array of fast registration descriptor pointers.
498 * @n: Number of descriptors to put back.
500 * Note: The caller must already have queued an invalidation request for
501 * desc->mr->rkey before calling this function.
503 static void srp_fr_pool_put(struct srp_fr_pool
*pool
, struct srp_fr_desc
**desc
,
509 spin_lock_irqsave(&pool
->lock
, flags
);
510 for (i
= 0; i
< n
; i
++)
511 list_add(&desc
[i
]->entry
, &pool
->free_list
);
512 spin_unlock_irqrestore(&pool
->lock
, flags
);
515 static struct srp_fr_pool
*srp_alloc_fr_pool(struct srp_target_port
*target
)
517 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
519 return srp_create_fr_pool(dev
->dev
, dev
->pd
, target
->mr_pool_size
,
520 dev
->max_pages_per_mr
);
524 * srp_destroy_qp() - destroy an RDMA queue pair
525 * @ch: SRP RDMA channel.
527 * Drain the qp before destroying it. This avoids that the receive
528 * completion handler can access the queue pair while it is
531 static void srp_destroy_qp(struct srp_rdma_ch
*ch
)
533 spin_lock_irq(&ch
->lock
);
534 ib_process_cq_direct(ch
->send_cq
, -1);
535 spin_unlock_irq(&ch
->lock
);
538 ib_destroy_qp(ch
->qp
);
541 static int srp_create_ch_ib(struct srp_rdma_ch
*ch
)
543 struct srp_target_port
*target
= ch
->target
;
544 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
545 struct ib_qp_init_attr
*init_attr
;
546 struct ib_cq
*recv_cq
, *send_cq
;
548 struct ib_fmr_pool
*fmr_pool
= NULL
;
549 struct srp_fr_pool
*fr_pool
= NULL
;
550 const int m
= 1 + dev
->use_fast_reg
* target
->mr_per_cmd
* 2;
553 init_attr
= kzalloc(sizeof *init_attr
, GFP_KERNEL
);
557 /* queue_size + 1 for ib_drain_rq() */
558 recv_cq
= ib_alloc_cq(dev
->dev
, ch
, target
->queue_size
+ 1,
559 ch
->comp_vector
, IB_POLL_SOFTIRQ
);
560 if (IS_ERR(recv_cq
)) {
561 ret
= PTR_ERR(recv_cq
);
565 send_cq
= ib_alloc_cq(dev
->dev
, ch
, m
* target
->queue_size
,
566 ch
->comp_vector
, IB_POLL_DIRECT
);
567 if (IS_ERR(send_cq
)) {
568 ret
= PTR_ERR(send_cq
);
572 init_attr
->event_handler
= srp_qp_event
;
573 init_attr
->cap
.max_send_wr
= m
* target
->queue_size
;
574 init_attr
->cap
.max_recv_wr
= target
->queue_size
+ 1;
575 init_attr
->cap
.max_recv_sge
= 1;
576 init_attr
->cap
.max_send_sge
= 1;
577 init_attr
->sq_sig_type
= IB_SIGNAL_REQ_WR
;
578 init_attr
->qp_type
= IB_QPT_RC
;
579 init_attr
->send_cq
= send_cq
;
580 init_attr
->recv_cq
= recv_cq
;
582 if (target
->using_rdma_cm
) {
583 ret
= rdma_create_qp(ch
->rdma_cm
.cm_id
, dev
->pd
, init_attr
);
584 qp
= ch
->rdma_cm
.cm_id
->qp
;
586 qp
= ib_create_qp(dev
->pd
, init_attr
);
588 ret
= srp_init_ib_qp(target
, qp
);
596 pr_err("QP creation failed for dev %s: %d\n",
597 dev_name(&dev
->dev
->dev
), ret
);
601 if (dev
->use_fast_reg
) {
602 fr_pool
= srp_alloc_fr_pool(target
);
603 if (IS_ERR(fr_pool
)) {
604 ret
= PTR_ERR(fr_pool
);
605 shost_printk(KERN_WARNING
, target
->scsi_host
, PFX
606 "FR pool allocation failed (%d)\n", ret
);
609 } else if (dev
->use_fmr
) {
610 fmr_pool
= srp_alloc_fmr_pool(target
);
611 if (IS_ERR(fmr_pool
)) {
612 ret
= PTR_ERR(fmr_pool
);
613 shost_printk(KERN_WARNING
, target
->scsi_host
, PFX
614 "FMR pool allocation failed (%d)\n", ret
);
622 ib_free_cq(ch
->recv_cq
);
624 ib_free_cq(ch
->send_cq
);
627 ch
->recv_cq
= recv_cq
;
628 ch
->send_cq
= send_cq
;
630 if (dev
->use_fast_reg
) {
632 srp_destroy_fr_pool(ch
->fr_pool
);
633 ch
->fr_pool
= fr_pool
;
634 } else if (dev
->use_fmr
) {
636 ib_destroy_fmr_pool(ch
->fmr_pool
);
637 ch
->fmr_pool
= fmr_pool
;
644 if (target
->using_rdma_cm
)
645 rdma_destroy_qp(ch
->rdma_cm
.cm_id
);
661 * Note: this function may be called without srp_alloc_iu_bufs() having been
662 * invoked. Hence the ch->[rt]x_ring checks.
664 static void srp_free_ch_ib(struct srp_target_port
*target
,
665 struct srp_rdma_ch
*ch
)
667 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
673 if (target
->using_rdma_cm
) {
674 if (ch
->rdma_cm
.cm_id
) {
675 rdma_destroy_id(ch
->rdma_cm
.cm_id
);
676 ch
->rdma_cm
.cm_id
= NULL
;
679 if (ch
->ib_cm
.cm_id
) {
680 ib_destroy_cm_id(ch
->ib_cm
.cm_id
);
681 ch
->ib_cm
.cm_id
= NULL
;
685 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
689 if (dev
->use_fast_reg
) {
691 srp_destroy_fr_pool(ch
->fr_pool
);
692 } else if (dev
->use_fmr
) {
694 ib_destroy_fmr_pool(ch
->fmr_pool
);
698 ib_free_cq(ch
->send_cq
);
699 ib_free_cq(ch
->recv_cq
);
702 * Avoid that the SCSI error handler tries to use this channel after
703 * it has been freed. The SCSI error handler can namely continue
704 * trying to perform recovery actions after scsi_remove_host()
710 ch
->send_cq
= ch
->recv_cq
= NULL
;
713 for (i
= 0; i
< target
->queue_size
; ++i
)
714 srp_free_iu(target
->srp_host
, ch
->rx_ring
[i
]);
719 for (i
= 0; i
< target
->queue_size
; ++i
)
720 srp_free_iu(target
->srp_host
, ch
->tx_ring
[i
]);
726 static void srp_path_rec_completion(int status
,
727 struct sa_path_rec
*pathrec
,
730 struct srp_rdma_ch
*ch
= ch_ptr
;
731 struct srp_target_port
*target
= ch
->target
;
735 shost_printk(KERN_ERR
, target
->scsi_host
,
736 PFX
"Got failed path rec status %d\n", status
);
738 ch
->ib_cm
.path
= *pathrec
;
742 static int srp_ib_lookup_path(struct srp_rdma_ch
*ch
)
744 struct srp_target_port
*target
= ch
->target
;
747 ch
->ib_cm
.path
.numb_path
= 1;
749 init_completion(&ch
->done
);
751 ch
->ib_cm
.path_query_id
= ib_sa_path_rec_get(&srp_sa_client
,
752 target
->srp_host
->srp_dev
->dev
,
753 target
->srp_host
->port
,
755 IB_SA_PATH_REC_SERVICE_ID
|
756 IB_SA_PATH_REC_DGID
|
757 IB_SA_PATH_REC_SGID
|
758 IB_SA_PATH_REC_NUMB_PATH
|
760 SRP_PATH_REC_TIMEOUT_MS
,
762 srp_path_rec_completion
,
763 ch
, &ch
->ib_cm
.path_query
);
764 if (ch
->ib_cm
.path_query_id
< 0)
765 return ch
->ib_cm
.path_query_id
;
767 ret
= wait_for_completion_interruptible(&ch
->done
);
772 shost_printk(KERN_WARNING
, target
->scsi_host
,
773 PFX
"Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
774 ch
->ib_cm
.path
.sgid
.raw
, ch
->ib_cm
.path
.dgid
.raw
,
775 be16_to_cpu(target
->ib_cm
.pkey
),
776 be64_to_cpu(target
->ib_cm
.service_id
));
781 static int srp_rdma_lookup_path(struct srp_rdma_ch
*ch
)
783 struct srp_target_port
*target
= ch
->target
;
786 init_completion(&ch
->done
);
788 ret
= rdma_resolve_route(ch
->rdma_cm
.cm_id
, SRP_PATH_REC_TIMEOUT_MS
);
792 wait_for_completion_interruptible(&ch
->done
);
795 shost_printk(KERN_WARNING
, target
->scsi_host
,
796 PFX
"Path resolution failed\n");
801 static int srp_lookup_path(struct srp_rdma_ch
*ch
)
803 struct srp_target_port
*target
= ch
->target
;
805 return target
->using_rdma_cm
? srp_rdma_lookup_path(ch
) :
806 srp_ib_lookup_path(ch
);
809 static u8
srp_get_subnet_timeout(struct srp_host
*host
)
811 struct ib_port_attr attr
;
813 u8 subnet_timeout
= 18;
815 ret
= ib_query_port(host
->srp_dev
->dev
, host
->port
, &attr
);
817 subnet_timeout
= attr
.subnet_timeout
;
819 if (unlikely(subnet_timeout
< 15))
820 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
821 dev_name(&host
->srp_dev
->dev
->dev
), subnet_timeout
);
823 return subnet_timeout
;
826 static int srp_send_req(struct srp_rdma_ch
*ch
, bool multich
)
828 struct srp_target_port
*target
= ch
->target
;
830 struct rdma_conn_param rdma_param
;
831 struct srp_login_req_rdma rdma_req
;
832 struct ib_cm_req_param ib_param
;
833 struct srp_login_req ib_req
;
838 req
= kzalloc(sizeof *req
, GFP_KERNEL
);
842 req
->ib_param
.flow_control
= 1;
843 req
->ib_param
.retry_count
= target
->tl_retry_count
;
846 * Pick some arbitrary defaults here; we could make these
847 * module parameters if anyone cared about setting them.
849 req
->ib_param
.responder_resources
= 4;
850 req
->ib_param
.rnr_retry_count
= 7;
851 req
->ib_param
.max_cm_retries
= 15;
853 req
->ib_req
.opcode
= SRP_LOGIN_REQ
;
855 req
->ib_req
.req_it_iu_len
= cpu_to_be32(target
->max_iu_len
);
856 req
->ib_req
.req_buf_fmt
= cpu_to_be16(SRP_BUF_FORMAT_DIRECT
|
857 SRP_BUF_FORMAT_INDIRECT
);
858 req
->ib_req
.req_flags
= (multich
? SRP_MULTICHAN_MULTI
:
859 SRP_MULTICHAN_SINGLE
);
861 if (target
->using_rdma_cm
) {
862 req
->rdma_param
.flow_control
= req
->ib_param
.flow_control
;
863 req
->rdma_param
.responder_resources
=
864 req
->ib_param
.responder_resources
;
865 req
->rdma_param
.initiator_depth
= req
->ib_param
.initiator_depth
;
866 req
->rdma_param
.retry_count
= req
->ib_param
.retry_count
;
867 req
->rdma_param
.rnr_retry_count
= req
->ib_param
.rnr_retry_count
;
868 req
->rdma_param
.private_data
= &req
->rdma_req
;
869 req
->rdma_param
.private_data_len
= sizeof(req
->rdma_req
);
871 req
->rdma_req
.opcode
= req
->ib_req
.opcode
;
872 req
->rdma_req
.tag
= req
->ib_req
.tag
;
873 req
->rdma_req
.req_it_iu_len
= req
->ib_req
.req_it_iu_len
;
874 req
->rdma_req
.req_buf_fmt
= req
->ib_req
.req_buf_fmt
;
875 req
->rdma_req
.req_flags
= req
->ib_req
.req_flags
;
877 ipi
= req
->rdma_req
.initiator_port_id
;
878 tpi
= req
->rdma_req
.target_port_id
;
882 subnet_timeout
= srp_get_subnet_timeout(target
->srp_host
);
884 req
->ib_param
.primary_path
= &ch
->ib_cm
.path
;
885 req
->ib_param
.alternate_path
= NULL
;
886 req
->ib_param
.service_id
= target
->ib_cm
.service_id
;
887 get_random_bytes(&req
->ib_param
.starting_psn
, 4);
888 req
->ib_param
.starting_psn
&= 0xffffff;
889 req
->ib_param
.qp_num
= ch
->qp
->qp_num
;
890 req
->ib_param
.qp_type
= ch
->qp
->qp_type
;
891 req
->ib_param
.local_cm_response_timeout
= subnet_timeout
+ 2;
892 req
->ib_param
.remote_cm_response_timeout
= subnet_timeout
+ 2;
893 req
->ib_param
.private_data
= &req
->ib_req
;
894 req
->ib_param
.private_data_len
= sizeof(req
->ib_req
);
896 ipi
= req
->ib_req
.initiator_port_id
;
897 tpi
= req
->ib_req
.target_port_id
;
901 * In the published SRP specification (draft rev. 16a), the
902 * port identifier format is 8 bytes of ID extension followed
903 * by 8 bytes of GUID. Older drafts put the two halves in the
904 * opposite order, so that the GUID comes first.
906 * Targets conforming to these obsolete drafts can be
907 * recognized by the I/O Class they report.
909 if (target
->io_class
== SRP_REV10_IB_IO_CLASS
) {
910 memcpy(ipi
, &target
->sgid
.global
.interface_id
, 8);
911 memcpy(ipi
+ 8, &target
->initiator_ext
, 8);
912 memcpy(tpi
, &target
->ioc_guid
, 8);
913 memcpy(tpi
+ 8, &target
->id_ext
, 8);
915 memcpy(ipi
, &target
->initiator_ext
, 8);
916 memcpy(ipi
+ 8, &target
->sgid
.global
.interface_id
, 8);
917 memcpy(tpi
, &target
->id_ext
, 8);
918 memcpy(tpi
+ 8, &target
->ioc_guid
, 8);
922 * Topspin/Cisco SRP targets will reject our login unless we
923 * zero out the first 8 bytes of our initiator port ID and set
924 * the second 8 bytes to the local node GUID.
926 if (srp_target_is_topspin(target
)) {
927 shost_printk(KERN_DEBUG
, target
->scsi_host
,
928 PFX
"Topspin/Cisco initiator port ID workaround "
929 "activated for target GUID %016llx\n",
930 be64_to_cpu(target
->ioc_guid
));
932 memcpy(ipi
+ 8, &target
->srp_host
->srp_dev
->dev
->node_guid
, 8);
935 if (target
->using_rdma_cm
)
936 status
= rdma_connect(ch
->rdma_cm
.cm_id
, &req
->rdma_param
);
938 status
= ib_send_cm_req(ch
->ib_cm
.cm_id
, &req
->ib_param
);
945 static bool srp_queue_remove_work(struct srp_target_port
*target
)
947 bool changed
= false;
949 spin_lock_irq(&target
->lock
);
950 if (target
->state
!= SRP_TARGET_REMOVED
) {
951 target
->state
= SRP_TARGET_REMOVED
;
954 spin_unlock_irq(&target
->lock
);
957 queue_work(srp_remove_wq
, &target
->remove_work
);
962 static void srp_disconnect_target(struct srp_target_port
*target
)
964 struct srp_rdma_ch
*ch
;
967 /* XXX should send SRP_I_LOGOUT request */
969 for (i
= 0; i
< target
->ch_count
; i
++) {
971 ch
->connected
= false;
973 if (target
->using_rdma_cm
) {
974 if (ch
->rdma_cm
.cm_id
)
975 rdma_disconnect(ch
->rdma_cm
.cm_id
);
978 ret
= ib_send_cm_dreq(ch
->ib_cm
.cm_id
,
982 shost_printk(KERN_DEBUG
, target
->scsi_host
,
983 PFX
"Sending CM DREQ failed\n");
988 static void srp_free_req_data(struct srp_target_port
*target
,
989 struct srp_rdma_ch
*ch
)
991 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
992 struct ib_device
*ibdev
= dev
->dev
;
993 struct srp_request
*req
;
999 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
1000 req
= &ch
->req_ring
[i
];
1001 if (dev
->use_fast_reg
) {
1002 kfree(req
->fr_list
);
1004 kfree(req
->fmr_list
);
1005 kfree(req
->map_page
);
1007 if (req
->indirect_dma_addr
) {
1008 ib_dma_unmap_single(ibdev
, req
->indirect_dma_addr
,
1009 target
->indirect_size
,
1012 kfree(req
->indirect_desc
);
1015 kfree(ch
->req_ring
);
1016 ch
->req_ring
= NULL
;
1019 static int srp_alloc_req_data(struct srp_rdma_ch
*ch
)
1021 struct srp_target_port
*target
= ch
->target
;
1022 struct srp_device
*srp_dev
= target
->srp_host
->srp_dev
;
1023 struct ib_device
*ibdev
= srp_dev
->dev
;
1024 struct srp_request
*req
;
1026 dma_addr_t dma_addr
;
1027 int i
, ret
= -ENOMEM
;
1029 ch
->req_ring
= kcalloc(target
->req_ring_size
, sizeof(*ch
->req_ring
),
1034 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
1035 req
= &ch
->req_ring
[i
];
1036 mr_list
= kmalloc_array(target
->mr_per_cmd
, sizeof(void *),
1040 if (srp_dev
->use_fast_reg
) {
1041 req
->fr_list
= mr_list
;
1043 req
->fmr_list
= mr_list
;
1044 req
->map_page
= kmalloc_array(srp_dev
->max_pages_per_mr
,
1050 req
->indirect_desc
= kmalloc(target
->indirect_size
, GFP_KERNEL
);
1051 if (!req
->indirect_desc
)
1054 dma_addr
= ib_dma_map_single(ibdev
, req
->indirect_desc
,
1055 target
->indirect_size
,
1057 if (ib_dma_mapping_error(ibdev
, dma_addr
))
1060 req
->indirect_dma_addr
= dma_addr
;
1069 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1070 * @shost: SCSI host whose attributes to remove from sysfs.
1072 * Note: Any attributes defined in the host template and that did not exist
1073 * before invocation of this function will be ignored.
1075 static void srp_del_scsi_host_attr(struct Scsi_Host
*shost
)
1077 struct device_attribute
**attr
;
1079 for (attr
= shost
->hostt
->shost_attrs
; attr
&& *attr
; ++attr
)
1080 device_remove_file(&shost
->shost_dev
, *attr
);
1083 static void srp_remove_target(struct srp_target_port
*target
)
1085 struct srp_rdma_ch
*ch
;
1088 WARN_ON_ONCE(target
->state
!= SRP_TARGET_REMOVED
);
1090 srp_del_scsi_host_attr(target
->scsi_host
);
1091 srp_rport_get(target
->rport
);
1092 srp_remove_host(target
->scsi_host
);
1093 scsi_remove_host(target
->scsi_host
);
1094 srp_stop_rport_timers(target
->rport
);
1095 srp_disconnect_target(target
);
1096 kobj_ns_drop(KOBJ_NS_TYPE_NET
, target
->net
);
1097 for (i
= 0; i
< target
->ch_count
; i
++) {
1098 ch
= &target
->ch
[i
];
1099 srp_free_ch_ib(target
, ch
);
1101 cancel_work_sync(&target
->tl_err_work
);
1102 srp_rport_put(target
->rport
);
1103 for (i
= 0; i
< target
->ch_count
; i
++) {
1104 ch
= &target
->ch
[i
];
1105 srp_free_req_data(target
, ch
);
1110 spin_lock(&target
->srp_host
->target_lock
);
1111 list_del(&target
->list
);
1112 spin_unlock(&target
->srp_host
->target_lock
);
1114 scsi_host_put(target
->scsi_host
);
1117 static void srp_remove_work(struct work_struct
*work
)
1119 struct srp_target_port
*target
=
1120 container_of(work
, struct srp_target_port
, remove_work
);
1122 WARN_ON_ONCE(target
->state
!= SRP_TARGET_REMOVED
);
1124 srp_remove_target(target
);
1127 static void srp_rport_delete(struct srp_rport
*rport
)
1129 struct srp_target_port
*target
= rport
->lld_data
;
1131 srp_queue_remove_work(target
);
1135 * srp_connected_ch() - number of connected channels
1136 * @target: SRP target port.
1138 static int srp_connected_ch(struct srp_target_port
*target
)
1142 for (i
= 0; i
< target
->ch_count
; i
++)
1143 c
+= target
->ch
[i
].connected
;
1148 static int srp_connect_ch(struct srp_rdma_ch
*ch
, bool multich
)
1150 struct srp_target_port
*target
= ch
->target
;
1153 WARN_ON_ONCE(!multich
&& srp_connected_ch(target
) > 0);
1155 ret
= srp_lookup_path(ch
);
1160 init_completion(&ch
->done
);
1161 ret
= srp_send_req(ch
, multich
);
1164 ret
= wait_for_completion_interruptible(&ch
->done
);
1169 * The CM event handling code will set status to
1170 * SRP_PORT_REDIRECT if we get a port redirect REJ
1171 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1172 * redirect REJ back.
1177 ch
->connected
= true;
1180 case SRP_PORT_REDIRECT
:
1181 ret
= srp_lookup_path(ch
);
1186 case SRP_DLID_REDIRECT
:
1189 case SRP_STALE_CONN
:
1190 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
1191 "giving up on stale connection\n");
1201 return ret
<= 0 ? ret
: -ENODEV
;
1204 static void srp_inv_rkey_err_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1206 srp_handle_qp_err(cq
, wc
, "INV RKEY");
1209 static int srp_inv_rkey(struct srp_request
*req
, struct srp_rdma_ch
*ch
,
1212 struct ib_send_wr wr
= {
1213 .opcode
= IB_WR_LOCAL_INV
,
1217 .ex
.invalidate_rkey
= rkey
,
1220 wr
.wr_cqe
= &req
->reg_cqe
;
1221 req
->reg_cqe
.done
= srp_inv_rkey_err_done
;
1222 return ib_post_send(ch
->qp
, &wr
, NULL
);
1225 static void srp_unmap_data(struct scsi_cmnd
*scmnd
,
1226 struct srp_rdma_ch
*ch
,
1227 struct srp_request
*req
)
1229 struct srp_target_port
*target
= ch
->target
;
1230 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1231 struct ib_device
*ibdev
= dev
->dev
;
1234 if (!scsi_sglist(scmnd
) ||
1235 (scmnd
->sc_data_direction
!= DMA_TO_DEVICE
&&
1236 scmnd
->sc_data_direction
!= DMA_FROM_DEVICE
))
1239 if (dev
->use_fast_reg
) {
1240 struct srp_fr_desc
**pfr
;
1242 for (i
= req
->nmdesc
, pfr
= req
->fr_list
; i
> 0; i
--, pfr
++) {
1243 res
= srp_inv_rkey(req
, ch
, (*pfr
)->mr
->rkey
);
1245 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
1246 "Queueing INV WR for rkey %#x failed (%d)\n",
1247 (*pfr
)->mr
->rkey
, res
);
1248 queue_work(system_long_wq
,
1249 &target
->tl_err_work
);
1253 srp_fr_pool_put(ch
->fr_pool
, req
->fr_list
,
1255 } else if (dev
->use_fmr
) {
1256 struct ib_pool_fmr
**pfmr
;
1258 for (i
= req
->nmdesc
, pfmr
= req
->fmr_list
; i
> 0; i
--, pfmr
++)
1259 ib_fmr_pool_unmap(*pfmr
);
1262 ib_dma_unmap_sg(ibdev
, scsi_sglist(scmnd
), scsi_sg_count(scmnd
),
1263 scmnd
->sc_data_direction
);
1267 * srp_claim_req - Take ownership of the scmnd associated with a request.
1268 * @ch: SRP RDMA channel.
1269 * @req: SRP request.
1270 * @sdev: If not NULL, only take ownership for this SCSI device.
1271 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1272 * ownership of @req->scmnd if it equals @scmnd.
1275 * Either NULL or a pointer to the SCSI command the caller became owner of.
1277 static struct scsi_cmnd
*srp_claim_req(struct srp_rdma_ch
*ch
,
1278 struct srp_request
*req
,
1279 struct scsi_device
*sdev
,
1280 struct scsi_cmnd
*scmnd
)
1282 unsigned long flags
;
1284 spin_lock_irqsave(&ch
->lock
, flags
);
1286 (!sdev
|| req
->scmnd
->device
== sdev
) &&
1287 (!scmnd
|| req
->scmnd
== scmnd
)) {
1293 spin_unlock_irqrestore(&ch
->lock
, flags
);
1299 * srp_free_req() - Unmap data and adjust ch->req_lim.
1300 * @ch: SRP RDMA channel.
1301 * @req: Request to be freed.
1302 * @scmnd: SCSI command associated with @req.
1303 * @req_lim_delta: Amount to be added to @target->req_lim.
1305 static void srp_free_req(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1306 struct scsi_cmnd
*scmnd
, s32 req_lim_delta
)
1308 unsigned long flags
;
1310 srp_unmap_data(scmnd
, ch
, req
);
1312 spin_lock_irqsave(&ch
->lock
, flags
);
1313 ch
->req_lim
+= req_lim_delta
;
1314 spin_unlock_irqrestore(&ch
->lock
, flags
);
1317 static void srp_finish_req(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1318 struct scsi_device
*sdev
, int result
)
1320 struct scsi_cmnd
*scmnd
= srp_claim_req(ch
, req
, sdev
, NULL
);
1323 srp_free_req(ch
, req
, scmnd
, 0);
1324 scmnd
->result
= result
;
1325 scmnd
->scsi_done(scmnd
);
1329 static void srp_terminate_io(struct srp_rport
*rport
)
1331 struct srp_target_port
*target
= rport
->lld_data
;
1332 struct srp_rdma_ch
*ch
;
1333 struct Scsi_Host
*shost
= target
->scsi_host
;
1334 struct scsi_device
*sdev
;
1338 * Invoking srp_terminate_io() while srp_queuecommand() is running
1339 * is not safe. Hence the warning statement below.
1341 shost_for_each_device(sdev
, shost
)
1342 WARN_ON_ONCE(sdev
->request_queue
->request_fn_active
);
1344 for (i
= 0; i
< target
->ch_count
; i
++) {
1345 ch
= &target
->ch
[i
];
1347 for (j
= 0; j
< target
->req_ring_size
; ++j
) {
1348 struct srp_request
*req
= &ch
->req_ring
[j
];
1350 srp_finish_req(ch
, req
, NULL
,
1351 DID_TRANSPORT_FAILFAST
<< 16);
1357 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1358 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1359 * srp_reset_device() or srp_reset_host() calls will occur while this function
1360 * is in progress. One way to realize that is not to call this function
1361 * directly but to call srp_reconnect_rport() instead since that last function
1362 * serializes calls of this function via rport->mutex and also blocks
1363 * srp_queuecommand() calls before invoking this function.
1365 static int srp_rport_reconnect(struct srp_rport
*rport
)
1367 struct srp_target_port
*target
= rport
->lld_data
;
1368 struct srp_rdma_ch
*ch
;
1370 bool multich
= false;
1372 srp_disconnect_target(target
);
1374 if (target
->state
== SRP_TARGET_SCANNING
)
1378 * Now get a new local CM ID so that we avoid confusing the target in
1379 * case things are really fouled up. Doing so also ensures that all CM
1380 * callbacks will have finished before a new QP is allocated.
1382 for (i
= 0; i
< target
->ch_count
; i
++) {
1383 ch
= &target
->ch
[i
];
1384 ret
+= srp_new_cm_id(ch
);
1386 for (i
= 0; i
< target
->ch_count
; i
++) {
1387 ch
= &target
->ch
[i
];
1388 for (j
= 0; j
< target
->req_ring_size
; ++j
) {
1389 struct srp_request
*req
= &ch
->req_ring
[j
];
1391 srp_finish_req(ch
, req
, NULL
, DID_RESET
<< 16);
1394 for (i
= 0; i
< target
->ch_count
; i
++) {
1395 ch
= &target
->ch
[i
];
1397 * Whether or not creating a new CM ID succeeded, create a new
1398 * QP. This guarantees that all completion callback function
1399 * invocations have finished before request resetting starts.
1401 ret
+= srp_create_ch_ib(ch
);
1403 INIT_LIST_HEAD(&ch
->free_tx
);
1404 for (j
= 0; j
< target
->queue_size
; ++j
)
1405 list_add(&ch
->tx_ring
[j
]->list
, &ch
->free_tx
);
1408 target
->qp_in_error
= false;
1410 for (i
= 0; i
< target
->ch_count
; i
++) {
1411 ch
= &target
->ch
[i
];
1414 ret
= srp_connect_ch(ch
, multich
);
1419 shost_printk(KERN_INFO
, target
->scsi_host
,
1420 PFX
"reconnect succeeded\n");
1425 static void srp_map_desc(struct srp_map_state
*state
, dma_addr_t dma_addr
,
1426 unsigned int dma_len
, u32 rkey
)
1428 struct srp_direct_buf
*desc
= state
->desc
;
1430 WARN_ON_ONCE(!dma_len
);
1432 desc
->va
= cpu_to_be64(dma_addr
);
1433 desc
->key
= cpu_to_be32(rkey
);
1434 desc
->len
= cpu_to_be32(dma_len
);
1436 state
->total_len
+= dma_len
;
1441 static int srp_map_finish_fmr(struct srp_map_state
*state
,
1442 struct srp_rdma_ch
*ch
)
1444 struct srp_target_port
*target
= ch
->target
;
1445 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1446 struct ib_pool_fmr
*fmr
;
1449 if (state
->fmr
.next
>= state
->fmr
.end
) {
1450 shost_printk(KERN_ERR
, ch
->target
->scsi_host
,
1451 PFX
"Out of MRs (mr_per_cmd = %d)\n",
1452 ch
->target
->mr_per_cmd
);
1456 WARN_ON_ONCE(!dev
->use_fmr
);
1458 if (state
->npages
== 0)
1461 if (state
->npages
== 1 && target
->global_rkey
) {
1462 srp_map_desc(state
, state
->base_dma_addr
, state
->dma_len
,
1463 target
->global_rkey
);
1467 fmr
= ib_fmr_pool_map_phys(ch
->fmr_pool
, state
->pages
,
1468 state
->npages
, io_addr
);
1470 return PTR_ERR(fmr
);
1472 *state
->fmr
.next
++ = fmr
;
1475 srp_map_desc(state
, state
->base_dma_addr
& ~dev
->mr_page_mask
,
1476 state
->dma_len
, fmr
->fmr
->rkey
);
1485 static void srp_reg_mr_err_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
1487 srp_handle_qp_err(cq
, wc
, "FAST REG");
1491 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1492 * where to start in the first element. If sg_offset_p != NULL then
1493 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1494 * byte that has not yet been mapped.
1496 static int srp_map_finish_fr(struct srp_map_state
*state
,
1497 struct srp_request
*req
,
1498 struct srp_rdma_ch
*ch
, int sg_nents
,
1499 unsigned int *sg_offset_p
)
1501 struct srp_target_port
*target
= ch
->target
;
1502 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1503 struct ib_reg_wr wr
;
1504 struct srp_fr_desc
*desc
;
1508 if (state
->fr
.next
>= state
->fr
.end
) {
1509 shost_printk(KERN_ERR
, ch
->target
->scsi_host
,
1510 PFX
"Out of MRs (mr_per_cmd = %d)\n",
1511 ch
->target
->mr_per_cmd
);
1515 WARN_ON_ONCE(!dev
->use_fast_reg
);
1517 if (sg_nents
== 1 && target
->global_rkey
) {
1518 unsigned int sg_offset
= sg_offset_p
? *sg_offset_p
: 0;
1520 srp_map_desc(state
, sg_dma_address(state
->sg
) + sg_offset
,
1521 sg_dma_len(state
->sg
) - sg_offset
,
1522 target
->global_rkey
);
1528 desc
= srp_fr_pool_get(ch
->fr_pool
);
1532 rkey
= ib_inc_rkey(desc
->mr
->rkey
);
1533 ib_update_fast_reg_key(desc
->mr
, rkey
);
1535 n
= ib_map_mr_sg(desc
->mr
, state
->sg
, sg_nents
, sg_offset_p
,
1537 if (unlikely(n
< 0)) {
1538 srp_fr_pool_put(ch
->fr_pool
, &desc
, 1);
1539 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1540 dev_name(&req
->scmnd
->device
->sdev_gendev
), sg_nents
,
1541 sg_offset_p
? *sg_offset_p
: -1, n
);
1545 WARN_ON_ONCE(desc
->mr
->length
== 0);
1547 req
->reg_cqe
.done
= srp_reg_mr_err_done
;
1550 wr
.wr
.opcode
= IB_WR_REG_MR
;
1551 wr
.wr
.wr_cqe
= &req
->reg_cqe
;
1553 wr
.wr
.send_flags
= 0;
1555 wr
.key
= desc
->mr
->rkey
;
1556 wr
.access
= (IB_ACCESS_LOCAL_WRITE
|
1557 IB_ACCESS_REMOTE_READ
|
1558 IB_ACCESS_REMOTE_WRITE
);
1560 *state
->fr
.next
++ = desc
;
1563 srp_map_desc(state
, desc
->mr
->iova
,
1564 desc
->mr
->length
, desc
->mr
->rkey
);
1566 err
= ib_post_send(ch
->qp
, &wr
.wr
, NULL
);
1567 if (unlikely(err
)) {
1568 WARN_ON_ONCE(err
== -ENOMEM
);
1575 static int srp_map_sg_entry(struct srp_map_state
*state
,
1576 struct srp_rdma_ch
*ch
,
1577 struct scatterlist
*sg
)
1579 struct srp_target_port
*target
= ch
->target
;
1580 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1581 struct ib_device
*ibdev
= dev
->dev
;
1582 dma_addr_t dma_addr
= ib_sg_dma_address(ibdev
, sg
);
1583 unsigned int dma_len
= ib_sg_dma_len(ibdev
, sg
);
1584 unsigned int len
= 0;
1587 WARN_ON_ONCE(!dma_len
);
1590 unsigned offset
= dma_addr
& ~dev
->mr_page_mask
;
1592 if (state
->npages
== dev
->max_pages_per_mr
||
1593 (state
->npages
> 0 && offset
!= 0)) {
1594 ret
= srp_map_finish_fmr(state
, ch
);
1599 len
= min_t(unsigned int, dma_len
, dev
->mr_page_size
- offset
);
1602 state
->base_dma_addr
= dma_addr
;
1603 state
->pages
[state
->npages
++] = dma_addr
& dev
->mr_page_mask
;
1604 state
->dma_len
+= len
;
1610 * If the end of the MR is not on a page boundary then we need to
1611 * close it out and start a new one -- we can only merge at page
1615 if ((dma_addr
& ~dev
->mr_page_mask
) != 0)
1616 ret
= srp_map_finish_fmr(state
, ch
);
1620 static int srp_map_sg_fmr(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1621 struct srp_request
*req
, struct scatterlist
*scat
,
1624 struct scatterlist
*sg
;
1627 state
->pages
= req
->map_page
;
1628 state
->fmr
.next
= req
->fmr_list
;
1629 state
->fmr
.end
= req
->fmr_list
+ ch
->target
->mr_per_cmd
;
1631 for_each_sg(scat
, sg
, count
, i
) {
1632 ret
= srp_map_sg_entry(state
, ch
, sg
);
1637 ret
= srp_map_finish_fmr(state
, ch
);
1644 static int srp_map_sg_fr(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1645 struct srp_request
*req
, struct scatterlist
*scat
,
1648 unsigned int sg_offset
= 0;
1650 state
->fr
.next
= req
->fr_list
;
1651 state
->fr
.end
= req
->fr_list
+ ch
->target
->mr_per_cmd
;
1660 n
= srp_map_finish_fr(state
, req
, ch
, count
, &sg_offset
);
1661 if (unlikely(n
< 0))
1665 for (i
= 0; i
< n
; i
++)
1666 state
->sg
= sg_next(state
->sg
);
1672 static int srp_map_sg_dma(struct srp_map_state
*state
, struct srp_rdma_ch
*ch
,
1673 struct srp_request
*req
, struct scatterlist
*scat
,
1676 struct srp_target_port
*target
= ch
->target
;
1677 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1678 struct scatterlist
*sg
;
1681 for_each_sg(scat
, sg
, count
, i
) {
1682 srp_map_desc(state
, ib_sg_dma_address(dev
->dev
, sg
),
1683 ib_sg_dma_len(dev
->dev
, sg
),
1684 target
->global_rkey
);
1691 * Register the indirect data buffer descriptor with the HCA.
1693 * Note: since the indirect data buffer descriptor has been allocated with
1694 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1697 static int srp_map_idb(struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1698 void **next_mr
, void **end_mr
, u32 idb_len
,
1701 struct srp_target_port
*target
= ch
->target
;
1702 struct srp_device
*dev
= target
->srp_host
->srp_dev
;
1703 struct srp_map_state state
;
1704 struct srp_direct_buf idb_desc
;
1706 struct scatterlist idb_sg
[1];
1709 memset(&state
, 0, sizeof(state
));
1710 memset(&idb_desc
, 0, sizeof(idb_desc
));
1711 state
.gen
.next
= next_mr
;
1712 state
.gen
.end
= end_mr
;
1713 state
.desc
= &idb_desc
;
1714 state
.base_dma_addr
= req
->indirect_dma_addr
;
1715 state
.dma_len
= idb_len
;
1717 if (dev
->use_fast_reg
) {
1719 sg_init_one(idb_sg
, req
->indirect_desc
, idb_len
);
1720 idb_sg
->dma_address
= req
->indirect_dma_addr
; /* hack! */
1721 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1722 idb_sg
->dma_length
= idb_sg
->length
; /* hack^2 */
1724 ret
= srp_map_finish_fr(&state
, req
, ch
, 1, NULL
);
1727 WARN_ON_ONCE(ret
< 1);
1728 } else if (dev
->use_fmr
) {
1729 state
.pages
= idb_pages
;
1730 state
.pages
[0] = (req
->indirect_dma_addr
&
1733 ret
= srp_map_finish_fmr(&state
, ch
);
1740 *idb_rkey
= idb_desc
.key
;
1745 static void srp_check_mapping(struct srp_map_state
*state
,
1746 struct srp_rdma_ch
*ch
, struct srp_request
*req
,
1747 struct scatterlist
*scat
, int count
)
1749 struct srp_device
*dev
= ch
->target
->srp_host
->srp_dev
;
1750 struct srp_fr_desc
**pfr
;
1751 u64 desc_len
= 0, mr_len
= 0;
1754 for (i
= 0; i
< state
->ndesc
; i
++)
1755 desc_len
+= be32_to_cpu(req
->indirect_desc
[i
].len
);
1756 if (dev
->use_fast_reg
)
1757 for (i
= 0, pfr
= req
->fr_list
; i
< state
->nmdesc
; i
++, pfr
++)
1758 mr_len
+= (*pfr
)->mr
->length
;
1759 else if (dev
->use_fmr
)
1760 for (i
= 0; i
< state
->nmdesc
; i
++)
1761 mr_len
+= be32_to_cpu(req
->indirect_desc
[i
].len
);
1762 if (desc_len
!= scsi_bufflen(req
->scmnd
) ||
1763 mr_len
> scsi_bufflen(req
->scmnd
))
1764 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1765 scsi_bufflen(req
->scmnd
), desc_len
, mr_len
,
1766 state
->ndesc
, state
->nmdesc
);
1770 * srp_map_data() - map SCSI data buffer onto an SRP request
1771 * @scmnd: SCSI command to map
1772 * @ch: SRP RDMA channel
1775 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1778 static int srp_map_data(struct scsi_cmnd
*scmnd
, struct srp_rdma_ch
*ch
,
1779 struct srp_request
*req
)
1781 struct srp_target_port
*target
= ch
->target
;
1782 struct scatterlist
*scat
;
1783 struct srp_cmd
*cmd
= req
->cmd
->buf
;
1784 int len
, nents
, count
, ret
;
1785 struct srp_device
*dev
;
1786 struct ib_device
*ibdev
;
1787 struct srp_map_state state
;
1788 struct srp_indirect_buf
*indirect_hdr
;
1789 u32 idb_len
, table_len
;
1793 if (!scsi_sglist(scmnd
) || scmnd
->sc_data_direction
== DMA_NONE
)
1794 return sizeof (struct srp_cmd
);
1796 if (scmnd
->sc_data_direction
!= DMA_FROM_DEVICE
&&
1797 scmnd
->sc_data_direction
!= DMA_TO_DEVICE
) {
1798 shost_printk(KERN_WARNING
, target
->scsi_host
,
1799 PFX
"Unhandled data direction %d\n",
1800 scmnd
->sc_data_direction
);
1804 nents
= scsi_sg_count(scmnd
);
1805 scat
= scsi_sglist(scmnd
);
1807 dev
= target
->srp_host
->srp_dev
;
1810 count
= ib_dma_map_sg(ibdev
, scat
, nents
, scmnd
->sc_data_direction
);
1811 if (unlikely(count
== 0))
1814 fmt
= SRP_DATA_DESC_DIRECT
;
1815 len
= sizeof (struct srp_cmd
) + sizeof (struct srp_direct_buf
);
1817 if (count
== 1 && target
->global_rkey
) {
1819 * The midlayer only generated a single gather/scatter
1820 * entry, or DMA mapping coalesced everything to a
1821 * single entry. So a direct descriptor along with
1822 * the DMA MR suffices.
1824 struct srp_direct_buf
*buf
= (void *) cmd
->add_data
;
1826 buf
->va
= cpu_to_be64(ib_sg_dma_address(ibdev
, scat
));
1827 buf
->key
= cpu_to_be32(target
->global_rkey
);
1828 buf
->len
= cpu_to_be32(ib_sg_dma_len(ibdev
, scat
));
1835 * We have more than one scatter/gather entry, so build our indirect
1836 * descriptor table, trying to merge as many entries as we can.
1838 indirect_hdr
= (void *) cmd
->add_data
;
1840 ib_dma_sync_single_for_cpu(ibdev
, req
->indirect_dma_addr
,
1841 target
->indirect_size
, DMA_TO_DEVICE
);
1843 memset(&state
, 0, sizeof(state
));
1844 state
.desc
= req
->indirect_desc
;
1845 if (dev
->use_fast_reg
)
1846 ret
= srp_map_sg_fr(&state
, ch
, req
, scat
, count
);
1847 else if (dev
->use_fmr
)
1848 ret
= srp_map_sg_fmr(&state
, ch
, req
, scat
, count
);
1850 ret
= srp_map_sg_dma(&state
, ch
, req
, scat
, count
);
1851 req
->nmdesc
= state
.nmdesc
;
1856 DEFINE_DYNAMIC_DEBUG_METADATA(ddm
,
1857 "Memory mapping consistency check");
1858 if (DYNAMIC_DEBUG_BRANCH(ddm
))
1859 srp_check_mapping(&state
, ch
, req
, scat
, count
);
1862 /* We've mapped the request, now pull as much of the indirect
1863 * descriptor table as we can into the command buffer. If this
1864 * target is not using an external indirect table, we are
1865 * guaranteed to fit into the command, as the SCSI layer won't
1866 * give us more S/G entries than we allow.
1868 if (state
.ndesc
== 1) {
1870 * Memory registration collapsed the sg-list into one entry,
1871 * so use a direct descriptor.
1873 struct srp_direct_buf
*buf
= (void *) cmd
->add_data
;
1875 *buf
= req
->indirect_desc
[0];
1879 if (unlikely(target
->cmd_sg_cnt
< state
.ndesc
&&
1880 !target
->allow_ext_sg
)) {
1881 shost_printk(KERN_ERR
, target
->scsi_host
,
1882 "Could not fit S/G list into SRP_CMD\n");
1887 count
= min(state
.ndesc
, target
->cmd_sg_cnt
);
1888 table_len
= state
.ndesc
* sizeof (struct srp_direct_buf
);
1889 idb_len
= sizeof(struct srp_indirect_buf
) + table_len
;
1891 fmt
= SRP_DATA_DESC_INDIRECT
;
1892 len
= sizeof(struct srp_cmd
) + sizeof (struct srp_indirect_buf
);
1893 len
+= count
* sizeof (struct srp_direct_buf
);
1895 memcpy(indirect_hdr
->desc_list
, req
->indirect_desc
,
1896 count
* sizeof (struct srp_direct_buf
));
1898 if (!target
->global_rkey
) {
1899 ret
= srp_map_idb(ch
, req
, state
.gen
.next
, state
.gen
.end
,
1900 idb_len
, &idb_rkey
);
1905 idb_rkey
= cpu_to_be32(target
->global_rkey
);
1908 indirect_hdr
->table_desc
.va
= cpu_to_be64(req
->indirect_dma_addr
);
1909 indirect_hdr
->table_desc
.key
= idb_rkey
;
1910 indirect_hdr
->table_desc
.len
= cpu_to_be32(table_len
);
1911 indirect_hdr
->len
= cpu_to_be32(state
.total_len
);
1913 if (scmnd
->sc_data_direction
== DMA_TO_DEVICE
)
1914 cmd
->data_out_desc_cnt
= count
;
1916 cmd
->data_in_desc_cnt
= count
;
1918 ib_dma_sync_single_for_device(ibdev
, req
->indirect_dma_addr
, table_len
,
1922 if (scmnd
->sc_data_direction
== DMA_TO_DEVICE
)
1923 cmd
->buf_fmt
= fmt
<< 4;
1930 srp_unmap_data(scmnd
, ch
, req
);
1931 if (ret
== -ENOMEM
&& req
->nmdesc
>= target
->mr_pool_size
)
1937 * Return an IU and possible credit to the free pool
1939 static void srp_put_tx_iu(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
,
1940 enum srp_iu_type iu_type
)
1942 unsigned long flags
;
1944 spin_lock_irqsave(&ch
->lock
, flags
);
1945 list_add(&iu
->list
, &ch
->free_tx
);
1946 if (iu_type
!= SRP_IU_RSP
)
1948 spin_unlock_irqrestore(&ch
->lock
, flags
);
1952 * Must be called with ch->lock held to protect req_lim and free_tx.
1953 * If IU is not sent, it must be returned using srp_put_tx_iu().
1956 * An upper limit for the number of allocated information units for each
1958 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1959 * more than Scsi_Host.can_queue requests.
1960 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1961 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1962 * one unanswered SRP request to an initiator.
1964 static struct srp_iu
*__srp_get_tx_iu(struct srp_rdma_ch
*ch
,
1965 enum srp_iu_type iu_type
)
1967 struct srp_target_port
*target
= ch
->target
;
1968 s32 rsv
= (iu_type
== SRP_IU_TSK_MGMT
) ? 0 : SRP_TSK_MGMT_SQ_SIZE
;
1971 lockdep_assert_held(&ch
->lock
);
1973 ib_process_cq_direct(ch
->send_cq
, -1);
1975 if (list_empty(&ch
->free_tx
))
1978 /* Initiator responses to target requests do not consume credits */
1979 if (iu_type
!= SRP_IU_RSP
) {
1980 if (ch
->req_lim
<= rsv
) {
1981 ++target
->zero_req_lim
;
1988 iu
= list_first_entry(&ch
->free_tx
, struct srp_iu
, list
);
1989 list_del(&iu
->list
);
1994 * Note: if this function is called from inside ib_drain_sq() then it will
1995 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1996 * with status IB_WC_SUCCESS then that's a bug.
1998 static void srp_send_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
2000 struct srp_iu
*iu
= container_of(wc
->wr_cqe
, struct srp_iu
, cqe
);
2001 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2003 if (unlikely(wc
->status
!= IB_WC_SUCCESS
)) {
2004 srp_handle_qp_err(cq
, wc
, "SEND");
2008 lockdep_assert_held(&ch
->lock
);
2010 list_add(&iu
->list
, &ch
->free_tx
);
2013 static int srp_post_send(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
, int len
)
2015 struct srp_target_port
*target
= ch
->target
;
2017 struct ib_send_wr wr
;
2019 list
.addr
= iu
->dma
;
2021 list
.lkey
= target
->lkey
;
2023 iu
->cqe
.done
= srp_send_done
;
2026 wr
.wr_cqe
= &iu
->cqe
;
2029 wr
.opcode
= IB_WR_SEND
;
2030 wr
.send_flags
= IB_SEND_SIGNALED
;
2032 return ib_post_send(ch
->qp
, &wr
, NULL
);
2035 static int srp_post_recv(struct srp_rdma_ch
*ch
, struct srp_iu
*iu
)
2037 struct srp_target_port
*target
= ch
->target
;
2038 struct ib_recv_wr wr
;
2041 list
.addr
= iu
->dma
;
2042 list
.length
= iu
->size
;
2043 list
.lkey
= target
->lkey
;
2045 iu
->cqe
.done
= srp_recv_done
;
2048 wr
.wr_cqe
= &iu
->cqe
;
2052 return ib_post_recv(ch
->qp
, &wr
, NULL
);
2055 static void srp_process_rsp(struct srp_rdma_ch
*ch
, struct srp_rsp
*rsp
)
2057 struct srp_target_port
*target
= ch
->target
;
2058 struct srp_request
*req
;
2059 struct scsi_cmnd
*scmnd
;
2060 unsigned long flags
;
2062 if (unlikely(rsp
->tag
& SRP_TAG_TSK_MGMT
)) {
2063 spin_lock_irqsave(&ch
->lock
, flags
);
2064 ch
->req_lim
+= be32_to_cpu(rsp
->req_lim_delta
);
2065 if (rsp
->tag
== ch
->tsk_mgmt_tag
) {
2066 ch
->tsk_mgmt_status
= -1;
2067 if (be32_to_cpu(rsp
->resp_data_len
) >= 4)
2068 ch
->tsk_mgmt_status
= rsp
->data
[3];
2069 complete(&ch
->tsk_mgmt_done
);
2071 shost_printk(KERN_ERR
, target
->scsi_host
,
2072 "Received tsk mgmt response too late for tag %#llx\n",
2075 spin_unlock_irqrestore(&ch
->lock
, flags
);
2077 scmnd
= scsi_host_find_tag(target
->scsi_host
, rsp
->tag
);
2078 if (scmnd
&& scmnd
->host_scribble
) {
2079 req
= (void *)scmnd
->host_scribble
;
2080 scmnd
= srp_claim_req(ch
, req
, NULL
, scmnd
);
2085 shost_printk(KERN_ERR
, target
->scsi_host
,
2086 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2087 rsp
->tag
, ch
- target
->ch
, ch
->qp
->qp_num
);
2089 spin_lock_irqsave(&ch
->lock
, flags
);
2090 ch
->req_lim
+= be32_to_cpu(rsp
->req_lim_delta
);
2091 spin_unlock_irqrestore(&ch
->lock
, flags
);
2095 scmnd
->result
= rsp
->status
;
2097 if (rsp
->flags
& SRP_RSP_FLAG_SNSVALID
) {
2098 memcpy(scmnd
->sense_buffer
, rsp
->data
+
2099 be32_to_cpu(rsp
->resp_data_len
),
2100 min_t(int, be32_to_cpu(rsp
->sense_data_len
),
2101 SCSI_SENSE_BUFFERSIZE
));
2104 if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DIUNDER
))
2105 scsi_set_resid(scmnd
, be32_to_cpu(rsp
->data_in_res_cnt
));
2106 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DIOVER
))
2107 scsi_set_resid(scmnd
, -be32_to_cpu(rsp
->data_in_res_cnt
));
2108 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DOUNDER
))
2109 scsi_set_resid(scmnd
, be32_to_cpu(rsp
->data_out_res_cnt
));
2110 else if (unlikely(rsp
->flags
& SRP_RSP_FLAG_DOOVER
))
2111 scsi_set_resid(scmnd
, -be32_to_cpu(rsp
->data_out_res_cnt
));
2113 srp_free_req(ch
, req
, scmnd
,
2114 be32_to_cpu(rsp
->req_lim_delta
));
2116 scmnd
->host_scribble
= NULL
;
2117 scmnd
->scsi_done(scmnd
);
2121 static int srp_response_common(struct srp_rdma_ch
*ch
, s32 req_delta
,
2124 struct srp_target_port
*target
= ch
->target
;
2125 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2126 unsigned long flags
;
2130 spin_lock_irqsave(&ch
->lock
, flags
);
2131 ch
->req_lim
+= req_delta
;
2132 iu
= __srp_get_tx_iu(ch
, SRP_IU_RSP
);
2133 spin_unlock_irqrestore(&ch
->lock
, flags
);
2136 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2137 "no IU available to send response\n");
2141 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, len
, DMA_TO_DEVICE
);
2142 memcpy(iu
->buf
, rsp
, len
);
2143 ib_dma_sync_single_for_device(dev
, iu
->dma
, len
, DMA_TO_DEVICE
);
2145 err
= srp_post_send(ch
, iu
, len
);
2147 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2148 "unable to post response: %d\n", err
);
2149 srp_put_tx_iu(ch
, iu
, SRP_IU_RSP
);
2155 static void srp_process_cred_req(struct srp_rdma_ch
*ch
,
2156 struct srp_cred_req
*req
)
2158 struct srp_cred_rsp rsp
= {
2159 .opcode
= SRP_CRED_RSP
,
2162 s32 delta
= be32_to_cpu(req
->req_lim_delta
);
2164 if (srp_response_common(ch
, delta
, &rsp
, sizeof(rsp
)))
2165 shost_printk(KERN_ERR
, ch
->target
->scsi_host
, PFX
2166 "problems processing SRP_CRED_REQ\n");
2169 static void srp_process_aer_req(struct srp_rdma_ch
*ch
,
2170 struct srp_aer_req
*req
)
2172 struct srp_target_port
*target
= ch
->target
;
2173 struct srp_aer_rsp rsp
= {
2174 .opcode
= SRP_AER_RSP
,
2177 s32 delta
= be32_to_cpu(req
->req_lim_delta
);
2179 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2180 "ignoring AER for LUN %llu\n", scsilun_to_int(&req
->lun
));
2182 if (srp_response_common(ch
, delta
, &rsp
, sizeof(rsp
)))
2183 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
2184 "problems processing SRP_AER_REQ\n");
2187 static void srp_recv_done(struct ib_cq
*cq
, struct ib_wc
*wc
)
2189 struct srp_iu
*iu
= container_of(wc
->wr_cqe
, struct srp_iu
, cqe
);
2190 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2191 struct srp_target_port
*target
= ch
->target
;
2192 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2196 if (unlikely(wc
->status
!= IB_WC_SUCCESS
)) {
2197 srp_handle_qp_err(cq
, wc
, "RECV");
2201 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, ch
->max_ti_iu_len
,
2204 opcode
= *(u8
*) iu
->buf
;
2207 shost_printk(KERN_ERR
, target
->scsi_host
,
2208 PFX
"recv completion, opcode 0x%02x\n", opcode
);
2209 print_hex_dump(KERN_ERR
, "", DUMP_PREFIX_OFFSET
, 8, 1,
2210 iu
->buf
, wc
->byte_len
, true);
2215 srp_process_rsp(ch
, iu
->buf
);
2219 srp_process_cred_req(ch
, iu
->buf
);
2223 srp_process_aer_req(ch
, iu
->buf
);
2227 /* XXX Handle target logout */
2228 shost_printk(KERN_WARNING
, target
->scsi_host
,
2229 PFX
"Got target logout request\n");
2233 shost_printk(KERN_WARNING
, target
->scsi_host
,
2234 PFX
"Unhandled SRP opcode 0x%02x\n", opcode
);
2238 ib_dma_sync_single_for_device(dev
, iu
->dma
, ch
->max_ti_iu_len
,
2241 res
= srp_post_recv(ch
, iu
);
2243 shost_printk(KERN_ERR
, target
->scsi_host
,
2244 PFX
"Recv failed with error code %d\n", res
);
2248 * srp_tl_err_work() - handle a transport layer error
2249 * @work: Work structure embedded in an SRP target port.
2251 * Note: This function may get invoked before the rport has been created,
2252 * hence the target->rport test.
2254 static void srp_tl_err_work(struct work_struct
*work
)
2256 struct srp_target_port
*target
;
2258 target
= container_of(work
, struct srp_target_port
, tl_err_work
);
2260 srp_start_tl_fail_timers(target
->rport
);
2263 static void srp_handle_qp_err(struct ib_cq
*cq
, struct ib_wc
*wc
,
2266 struct srp_rdma_ch
*ch
= cq
->cq_context
;
2267 struct srp_target_port
*target
= ch
->target
;
2269 if (ch
->connected
&& !target
->qp_in_error
) {
2270 shost_printk(KERN_ERR
, target
->scsi_host
,
2271 PFX
"failed %s status %s (%d) for CQE %p\n",
2272 opname
, ib_wc_status_msg(wc
->status
), wc
->status
,
2274 queue_work(system_long_wq
, &target
->tl_err_work
);
2276 target
->qp_in_error
= true;
2279 static int srp_queuecommand(struct Scsi_Host
*shost
, struct scsi_cmnd
*scmnd
)
2281 struct srp_target_port
*target
= host_to_target(shost
);
2282 struct srp_rport
*rport
= target
->rport
;
2283 struct srp_rdma_ch
*ch
;
2284 struct srp_request
*req
;
2286 struct srp_cmd
*cmd
;
2287 struct ib_device
*dev
;
2288 unsigned long flags
;
2292 const bool in_scsi_eh
= !in_interrupt() && current
== shost
->ehandler
;
2295 * The SCSI EH thread is the only context from which srp_queuecommand()
2296 * can get invoked for blocked devices (SDEV_BLOCK /
2297 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2298 * locking the rport mutex if invoked from inside the SCSI EH.
2301 mutex_lock(&rport
->mutex
);
2303 scmnd
->result
= srp_chkready(target
->rport
);
2304 if (unlikely(scmnd
->result
))
2307 WARN_ON_ONCE(scmnd
->request
->tag
< 0);
2308 tag
= blk_mq_unique_tag(scmnd
->request
);
2309 ch
= &target
->ch
[blk_mq_unique_tag_to_hwq(tag
)];
2310 idx
= blk_mq_unique_tag_to_tag(tag
);
2311 WARN_ONCE(idx
>= target
->req_ring_size
, "%s: tag %#x: idx %d >= %d\n",
2312 dev_name(&shost
->shost_gendev
), tag
, idx
,
2313 target
->req_ring_size
);
2315 spin_lock_irqsave(&ch
->lock
, flags
);
2316 iu
= __srp_get_tx_iu(ch
, SRP_IU_CMD
);
2317 spin_unlock_irqrestore(&ch
->lock
, flags
);
2322 req
= &ch
->req_ring
[idx
];
2323 dev
= target
->srp_host
->srp_dev
->dev
;
2324 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, target
->max_iu_len
,
2327 scmnd
->host_scribble
= (void *) req
;
2330 memset(cmd
, 0, sizeof *cmd
);
2332 cmd
->opcode
= SRP_CMD
;
2333 int_to_scsilun(scmnd
->device
->lun
, &cmd
->lun
);
2335 memcpy(cmd
->cdb
, scmnd
->cmnd
, scmnd
->cmd_len
);
2340 len
= srp_map_data(scmnd
, ch
, req
);
2342 shost_printk(KERN_ERR
, target
->scsi_host
,
2343 PFX
"Failed to map data (%d)\n", len
);
2345 * If we ran out of memory descriptors (-ENOMEM) because an
2346 * application is queuing many requests with more than
2347 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2348 * to reduce queue depth temporarily.
2350 scmnd
->result
= len
== -ENOMEM
?
2351 DID_OK
<< 16 | QUEUE_FULL
<< 1 : DID_ERROR
<< 16;
2355 ib_dma_sync_single_for_device(dev
, iu
->dma
, target
->max_iu_len
,
2358 if (srp_post_send(ch
, iu
, len
)) {
2359 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
"Send failed\n");
2367 mutex_unlock(&rport
->mutex
);
2372 srp_unmap_data(scmnd
, ch
, req
);
2375 srp_put_tx_iu(ch
, iu
, SRP_IU_CMD
);
2378 * Avoid that the loops that iterate over the request ring can
2379 * encounter a dangling SCSI command pointer.
2384 if (scmnd
->result
) {
2385 scmnd
->scsi_done(scmnd
);
2388 ret
= SCSI_MLQUEUE_HOST_BUSY
;
2395 * Note: the resources allocated in this function are freed in
2398 static int srp_alloc_iu_bufs(struct srp_rdma_ch
*ch
)
2400 struct srp_target_port
*target
= ch
->target
;
2403 ch
->rx_ring
= kcalloc(target
->queue_size
, sizeof(*ch
->rx_ring
),
2407 ch
->tx_ring
= kcalloc(target
->queue_size
, sizeof(*ch
->tx_ring
),
2412 for (i
= 0; i
< target
->queue_size
; ++i
) {
2413 ch
->rx_ring
[i
] = srp_alloc_iu(target
->srp_host
,
2415 GFP_KERNEL
, DMA_FROM_DEVICE
);
2416 if (!ch
->rx_ring
[i
])
2420 for (i
= 0; i
< target
->queue_size
; ++i
) {
2421 ch
->tx_ring
[i
] = srp_alloc_iu(target
->srp_host
,
2423 GFP_KERNEL
, DMA_TO_DEVICE
);
2424 if (!ch
->tx_ring
[i
])
2427 list_add(&ch
->tx_ring
[i
]->list
, &ch
->free_tx
);
2433 for (i
= 0; i
< target
->queue_size
; ++i
) {
2434 srp_free_iu(target
->srp_host
, ch
->rx_ring
[i
]);
2435 srp_free_iu(target
->srp_host
, ch
->tx_ring
[i
]);
2448 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr
*qp_attr
, int attr_mask
)
2450 uint64_t T_tr_ns
, max_compl_time_ms
;
2451 uint32_t rq_tmo_jiffies
;
2454 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2455 * table 91), both the QP timeout and the retry count have to be set
2456 * for RC QP's during the RTR to RTS transition.
2458 WARN_ON_ONCE((attr_mask
& (IB_QP_TIMEOUT
| IB_QP_RETRY_CNT
)) !=
2459 (IB_QP_TIMEOUT
| IB_QP_RETRY_CNT
));
2462 * Set target->rq_tmo_jiffies to one second more than the largest time
2463 * it can take before an error completion is generated. See also
2464 * C9-140..142 in the IBTA spec for more information about how to
2465 * convert the QP Local ACK Timeout value to nanoseconds.
2467 T_tr_ns
= 4096 * (1ULL << qp_attr
->timeout
);
2468 max_compl_time_ms
= qp_attr
->retry_cnt
* 4 * T_tr_ns
;
2469 do_div(max_compl_time_ms
, NSEC_PER_MSEC
);
2470 rq_tmo_jiffies
= msecs_to_jiffies(max_compl_time_ms
+ 1000);
2472 return rq_tmo_jiffies
;
2475 static void srp_cm_rep_handler(struct ib_cm_id
*cm_id
,
2476 const struct srp_login_rsp
*lrsp
,
2477 struct srp_rdma_ch
*ch
)
2479 struct srp_target_port
*target
= ch
->target
;
2480 struct ib_qp_attr
*qp_attr
= NULL
;
2485 if (lrsp
->opcode
== SRP_LOGIN_RSP
) {
2486 ch
->max_ti_iu_len
= be32_to_cpu(lrsp
->max_ti_iu_len
);
2487 ch
->req_lim
= be32_to_cpu(lrsp
->req_lim_delta
);
2490 * Reserve credits for task management so we don't
2491 * bounce requests back to the SCSI mid-layer.
2493 target
->scsi_host
->can_queue
2494 = min(ch
->req_lim
- SRP_TSK_MGMT_SQ_SIZE
,
2495 target
->scsi_host
->can_queue
);
2496 target
->scsi_host
->cmd_per_lun
2497 = min_t(int, target
->scsi_host
->can_queue
,
2498 target
->scsi_host
->cmd_per_lun
);
2500 shost_printk(KERN_WARNING
, target
->scsi_host
,
2501 PFX
"Unhandled RSP opcode %#x\n", lrsp
->opcode
);
2507 ret
= srp_alloc_iu_bufs(ch
);
2512 for (i
= 0; i
< target
->queue_size
; i
++) {
2513 struct srp_iu
*iu
= ch
->rx_ring
[i
];
2515 ret
= srp_post_recv(ch
, iu
);
2520 if (!target
->using_rdma_cm
) {
2522 qp_attr
= kmalloc(sizeof(*qp_attr
), GFP_KERNEL
);
2526 qp_attr
->qp_state
= IB_QPS_RTR
;
2527 ret
= ib_cm_init_qp_attr(cm_id
, qp_attr
, &attr_mask
);
2531 ret
= ib_modify_qp(ch
->qp
, qp_attr
, attr_mask
);
2535 qp_attr
->qp_state
= IB_QPS_RTS
;
2536 ret
= ib_cm_init_qp_attr(cm_id
, qp_attr
, &attr_mask
);
2540 target
->rq_tmo_jiffies
= srp_compute_rq_tmo(qp_attr
, attr_mask
);
2542 ret
= ib_modify_qp(ch
->qp
, qp_attr
, attr_mask
);
2546 ret
= ib_send_cm_rtu(cm_id
, NULL
, 0);
2556 static void srp_ib_cm_rej_handler(struct ib_cm_id
*cm_id
,
2557 const struct ib_cm_event
*event
,
2558 struct srp_rdma_ch
*ch
)
2560 struct srp_target_port
*target
= ch
->target
;
2561 struct Scsi_Host
*shost
= target
->scsi_host
;
2562 struct ib_class_port_info
*cpi
;
2566 switch (event
->param
.rej_rcvd
.reason
) {
2567 case IB_CM_REJ_PORT_CM_REDIRECT
:
2568 cpi
= event
->param
.rej_rcvd
.ari
;
2569 dlid
= be16_to_cpu(cpi
->redirect_lid
);
2570 sa_path_set_dlid(&ch
->ib_cm
.path
, dlid
);
2571 ch
->ib_cm
.path
.pkey
= cpi
->redirect_pkey
;
2572 cm_id
->remote_cm_qpn
= be32_to_cpu(cpi
->redirect_qp
) & 0x00ffffff;
2573 memcpy(ch
->ib_cm
.path
.dgid
.raw
, cpi
->redirect_gid
, 16);
2575 ch
->status
= dlid
? SRP_DLID_REDIRECT
: SRP_PORT_REDIRECT
;
2578 case IB_CM_REJ_PORT_REDIRECT
:
2579 if (srp_target_is_topspin(target
)) {
2580 union ib_gid
*dgid
= &ch
->ib_cm
.path
.dgid
;
2583 * Topspin/Cisco SRP gateways incorrectly send
2584 * reject reason code 25 when they mean 24
2587 memcpy(dgid
->raw
, event
->param
.rej_rcvd
.ari
, 16);
2589 shost_printk(KERN_DEBUG
, shost
,
2590 PFX
"Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2591 be64_to_cpu(dgid
->global
.subnet_prefix
),
2592 be64_to_cpu(dgid
->global
.interface_id
));
2594 ch
->status
= SRP_PORT_REDIRECT
;
2596 shost_printk(KERN_WARNING
, shost
,
2597 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2598 ch
->status
= -ECONNRESET
;
2602 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID
:
2603 shost_printk(KERN_WARNING
, shost
,
2604 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2605 ch
->status
= -ECONNRESET
;
2608 case IB_CM_REJ_CONSUMER_DEFINED
:
2609 opcode
= *(u8
*) event
->private_data
;
2610 if (opcode
== SRP_LOGIN_REJ
) {
2611 struct srp_login_rej
*rej
= event
->private_data
;
2612 u32 reason
= be32_to_cpu(rej
->reason
);
2614 if (reason
== SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE
)
2615 shost_printk(KERN_WARNING
, shost
,
2616 PFX
"SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2618 shost_printk(KERN_WARNING
, shost
, PFX
2619 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2621 target
->ib_cm
.orig_dgid
.raw
,
2624 shost_printk(KERN_WARNING
, shost
,
2625 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2626 " opcode 0x%02x\n", opcode
);
2627 ch
->status
= -ECONNRESET
;
2630 case IB_CM_REJ_STALE_CONN
:
2631 shost_printk(KERN_WARNING
, shost
, " REJ reason: stale connection\n");
2632 ch
->status
= SRP_STALE_CONN
;
2636 shost_printk(KERN_WARNING
, shost
, " REJ reason 0x%x\n",
2637 event
->param
.rej_rcvd
.reason
);
2638 ch
->status
= -ECONNRESET
;
2642 static int srp_ib_cm_handler(struct ib_cm_id
*cm_id
,
2643 const struct ib_cm_event
*event
)
2645 struct srp_rdma_ch
*ch
= cm_id
->context
;
2646 struct srp_target_port
*target
= ch
->target
;
2649 switch (event
->event
) {
2650 case IB_CM_REQ_ERROR
:
2651 shost_printk(KERN_DEBUG
, target
->scsi_host
,
2652 PFX
"Sending CM REQ failed\n");
2654 ch
->status
= -ECONNRESET
;
2657 case IB_CM_REP_RECEIVED
:
2659 srp_cm_rep_handler(cm_id
, event
->private_data
, ch
);
2662 case IB_CM_REJ_RECEIVED
:
2663 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
"REJ received\n");
2666 srp_ib_cm_rej_handler(cm_id
, event
, ch
);
2669 case IB_CM_DREQ_RECEIVED
:
2670 shost_printk(KERN_WARNING
, target
->scsi_host
,
2671 PFX
"DREQ received - connection closed\n");
2672 ch
->connected
= false;
2673 if (ib_send_cm_drep(cm_id
, NULL
, 0))
2674 shost_printk(KERN_ERR
, target
->scsi_host
,
2675 PFX
"Sending CM DREP failed\n");
2676 queue_work(system_long_wq
, &target
->tl_err_work
);
2679 case IB_CM_TIMEWAIT_EXIT
:
2680 shost_printk(KERN_ERR
, target
->scsi_host
,
2681 PFX
"connection closed\n");
2687 case IB_CM_MRA_RECEIVED
:
2688 case IB_CM_DREQ_ERROR
:
2689 case IB_CM_DREP_RECEIVED
:
2693 shost_printk(KERN_WARNING
, target
->scsi_host
,
2694 PFX
"Unhandled CM event %d\n", event
->event
);
2699 complete(&ch
->done
);
2704 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch
*ch
,
2705 struct rdma_cm_event
*event
)
2707 struct srp_target_port
*target
= ch
->target
;
2708 struct Scsi_Host
*shost
= target
->scsi_host
;
2711 switch (event
->status
) {
2712 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID
:
2713 shost_printk(KERN_WARNING
, shost
,
2714 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2715 ch
->status
= -ECONNRESET
;
2718 case IB_CM_REJ_CONSUMER_DEFINED
:
2719 opcode
= *(u8
*) event
->param
.conn
.private_data
;
2720 if (opcode
== SRP_LOGIN_REJ
) {
2721 struct srp_login_rej
*rej
=
2722 (struct srp_login_rej
*)
2723 event
->param
.conn
.private_data
;
2724 u32 reason
= be32_to_cpu(rej
->reason
);
2726 if (reason
== SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE
)
2727 shost_printk(KERN_WARNING
, shost
,
2728 PFX
"SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2730 shost_printk(KERN_WARNING
, shost
,
2731 PFX
"SRP LOGIN REJECTED, reason 0x%08x\n", reason
);
2733 shost_printk(KERN_WARNING
, shost
,
2734 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2737 ch
->status
= -ECONNRESET
;
2740 case IB_CM_REJ_STALE_CONN
:
2741 shost_printk(KERN_WARNING
, shost
,
2742 " REJ reason: stale connection\n");
2743 ch
->status
= SRP_STALE_CONN
;
2747 shost_printk(KERN_WARNING
, shost
, " REJ reason 0x%x\n",
2749 ch
->status
= -ECONNRESET
;
2754 static int srp_rdma_cm_handler(struct rdma_cm_id
*cm_id
,
2755 struct rdma_cm_event
*event
)
2757 struct srp_rdma_ch
*ch
= cm_id
->context
;
2758 struct srp_target_port
*target
= ch
->target
;
2761 switch (event
->event
) {
2762 case RDMA_CM_EVENT_ADDR_RESOLVED
:
2767 case RDMA_CM_EVENT_ADDR_ERROR
:
2768 ch
->status
= -ENXIO
;
2772 case RDMA_CM_EVENT_ROUTE_RESOLVED
:
2777 case RDMA_CM_EVENT_ROUTE_ERROR
:
2778 case RDMA_CM_EVENT_UNREACHABLE
:
2779 ch
->status
= -EHOSTUNREACH
;
2783 case RDMA_CM_EVENT_CONNECT_ERROR
:
2784 shost_printk(KERN_DEBUG
, target
->scsi_host
,
2785 PFX
"Sending CM REQ failed\n");
2787 ch
->status
= -ECONNRESET
;
2790 case RDMA_CM_EVENT_ESTABLISHED
:
2792 srp_cm_rep_handler(NULL
, event
->param
.conn
.private_data
, ch
);
2795 case RDMA_CM_EVENT_REJECTED
:
2796 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
"REJ received\n");
2799 srp_rdma_cm_rej_handler(ch
, event
);
2802 case RDMA_CM_EVENT_DISCONNECTED
:
2803 if (ch
->connected
) {
2804 shost_printk(KERN_WARNING
, target
->scsi_host
,
2805 PFX
"received DREQ\n");
2806 rdma_disconnect(ch
->rdma_cm
.cm_id
);
2809 queue_work(system_long_wq
, &target
->tl_err_work
);
2813 case RDMA_CM_EVENT_TIMEWAIT_EXIT
:
2814 shost_printk(KERN_ERR
, target
->scsi_host
,
2815 PFX
"connection closed\n");
2822 shost_printk(KERN_WARNING
, target
->scsi_host
,
2823 PFX
"Unhandled CM event %d\n", event
->event
);
2828 complete(&ch
->done
);
2834 * srp_change_queue_depth - setting device queue depth
2835 * @sdev: scsi device struct
2836 * @qdepth: requested queue depth
2838 * Returns queue depth.
2841 srp_change_queue_depth(struct scsi_device
*sdev
, int qdepth
)
2843 if (!sdev
->tagged_supported
)
2845 return scsi_change_queue_depth(sdev
, qdepth
);
2848 static int srp_send_tsk_mgmt(struct srp_rdma_ch
*ch
, u64 req_tag
, u64 lun
,
2849 u8 func
, u8
*status
)
2851 struct srp_target_port
*target
= ch
->target
;
2852 struct srp_rport
*rport
= target
->rport
;
2853 struct ib_device
*dev
= target
->srp_host
->srp_dev
->dev
;
2855 struct srp_tsk_mgmt
*tsk_mgmt
;
2858 if (!ch
->connected
|| target
->qp_in_error
)
2862 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2863 * invoked while a task management function is being sent.
2865 mutex_lock(&rport
->mutex
);
2866 spin_lock_irq(&ch
->lock
);
2867 iu
= __srp_get_tx_iu(ch
, SRP_IU_TSK_MGMT
);
2868 spin_unlock_irq(&ch
->lock
);
2871 mutex_unlock(&rport
->mutex
);
2876 ib_dma_sync_single_for_cpu(dev
, iu
->dma
, sizeof *tsk_mgmt
,
2879 memset(tsk_mgmt
, 0, sizeof *tsk_mgmt
);
2881 tsk_mgmt
->opcode
= SRP_TSK_MGMT
;
2882 int_to_scsilun(lun
, &tsk_mgmt
->lun
);
2883 tsk_mgmt
->tsk_mgmt_func
= func
;
2884 tsk_mgmt
->task_tag
= req_tag
;
2886 spin_lock_irq(&ch
->lock
);
2887 ch
->tsk_mgmt_tag
= (ch
->tsk_mgmt_tag
+ 1) | SRP_TAG_TSK_MGMT
;
2888 tsk_mgmt
->tag
= ch
->tsk_mgmt_tag
;
2889 spin_unlock_irq(&ch
->lock
);
2891 init_completion(&ch
->tsk_mgmt_done
);
2893 ib_dma_sync_single_for_device(dev
, iu
->dma
, sizeof *tsk_mgmt
,
2895 if (srp_post_send(ch
, iu
, sizeof(*tsk_mgmt
))) {
2896 srp_put_tx_iu(ch
, iu
, SRP_IU_TSK_MGMT
);
2897 mutex_unlock(&rport
->mutex
);
2901 res
= wait_for_completion_timeout(&ch
->tsk_mgmt_done
,
2902 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS
));
2903 if (res
> 0 && status
)
2904 *status
= ch
->tsk_mgmt_status
;
2905 mutex_unlock(&rport
->mutex
);
2907 WARN_ON_ONCE(res
< 0);
2909 return res
> 0 ? 0 : -1;
2912 static int srp_abort(struct scsi_cmnd
*scmnd
)
2914 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2915 struct srp_request
*req
= (struct srp_request
*) scmnd
->host_scribble
;
2918 struct srp_rdma_ch
*ch
;
2921 shost_printk(KERN_ERR
, target
->scsi_host
, "SRP abort called\n");
2925 tag
= blk_mq_unique_tag(scmnd
->request
);
2926 ch_idx
= blk_mq_unique_tag_to_hwq(tag
);
2927 if (WARN_ON_ONCE(ch_idx
>= target
->ch_count
))
2929 ch
= &target
->ch
[ch_idx
];
2930 if (!srp_claim_req(ch
, req
, NULL
, scmnd
))
2932 shost_printk(KERN_ERR
, target
->scsi_host
,
2933 "Sending SRP abort for tag %#x\n", tag
);
2934 if (srp_send_tsk_mgmt(ch
, tag
, scmnd
->device
->lun
,
2935 SRP_TSK_ABORT_TASK
, NULL
) == 0)
2937 else if (target
->rport
->state
== SRP_RPORT_LOST
)
2941 if (ret
== SUCCESS
) {
2942 srp_free_req(ch
, req
, scmnd
, 0);
2943 scmnd
->result
= DID_ABORT
<< 16;
2944 scmnd
->scsi_done(scmnd
);
2950 static int srp_reset_device(struct scsi_cmnd
*scmnd
)
2952 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2953 struct srp_rdma_ch
*ch
;
2957 shost_printk(KERN_ERR
, target
->scsi_host
, "SRP reset_device called\n");
2959 ch
= &target
->ch
[0];
2960 if (srp_send_tsk_mgmt(ch
, SRP_TAG_NO_REQ
, scmnd
->device
->lun
,
2961 SRP_TSK_LUN_RESET
, &status
))
2966 for (i
= 0; i
< target
->ch_count
; i
++) {
2967 ch
= &target
->ch
[i
];
2968 for (i
= 0; i
< target
->req_ring_size
; ++i
) {
2969 struct srp_request
*req
= &ch
->req_ring
[i
];
2971 srp_finish_req(ch
, req
, scmnd
->device
, DID_RESET
<< 16);
2978 static int srp_reset_host(struct scsi_cmnd
*scmnd
)
2980 struct srp_target_port
*target
= host_to_target(scmnd
->device
->host
);
2982 shost_printk(KERN_ERR
, target
->scsi_host
, PFX
"SRP reset_host called\n");
2984 return srp_reconnect_rport(target
->rport
) == 0 ? SUCCESS
: FAILED
;
2987 static int srp_target_alloc(struct scsi_target
*starget
)
2989 struct Scsi_Host
*shost
= dev_to_shost(starget
->dev
.parent
);
2990 struct srp_target_port
*target
= host_to_target(shost
);
2992 if (target
->target_can_queue
)
2993 starget
->can_queue
= target
->target_can_queue
;
2997 static int srp_slave_alloc(struct scsi_device
*sdev
)
2999 struct Scsi_Host
*shost
= sdev
->host
;
3000 struct srp_target_port
*target
= host_to_target(shost
);
3001 struct srp_device
*srp_dev
= target
->srp_host
->srp_dev
;
3002 struct ib_device
*ibdev
= srp_dev
->dev
;
3004 if (!(ibdev
->attrs
.device_cap_flags
& IB_DEVICE_SG_GAPS_REG
))
3005 blk_queue_virt_boundary(sdev
->request_queue
,
3006 ~srp_dev
->mr_page_mask
);
3011 static int srp_slave_configure(struct scsi_device
*sdev
)
3013 struct Scsi_Host
*shost
= sdev
->host
;
3014 struct srp_target_port
*target
= host_to_target(shost
);
3015 struct request_queue
*q
= sdev
->request_queue
;
3016 unsigned long timeout
;
3018 if (sdev
->type
== TYPE_DISK
) {
3019 timeout
= max_t(unsigned, 30 * HZ
, target
->rq_tmo_jiffies
);
3020 blk_queue_rq_timeout(q
, timeout
);
3026 static ssize_t
show_id_ext(struct device
*dev
, struct device_attribute
*attr
,
3029 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3031 return sprintf(buf
, "0x%016llx\n", be64_to_cpu(target
->id_ext
));
3034 static ssize_t
show_ioc_guid(struct device
*dev
, struct device_attribute
*attr
,
3037 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3039 return sprintf(buf
, "0x%016llx\n", be64_to_cpu(target
->ioc_guid
));
3042 static ssize_t
show_service_id(struct device
*dev
,
3043 struct device_attribute
*attr
, char *buf
)
3045 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3047 if (target
->using_rdma_cm
)
3049 return sprintf(buf
, "0x%016llx\n",
3050 be64_to_cpu(target
->ib_cm
.service_id
));
3053 static ssize_t
show_pkey(struct device
*dev
, struct device_attribute
*attr
,
3056 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3058 if (target
->using_rdma_cm
)
3060 return sprintf(buf
, "0x%04x\n", be16_to_cpu(target
->ib_cm
.pkey
));
3063 static ssize_t
show_sgid(struct device
*dev
, struct device_attribute
*attr
,
3066 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3068 return sprintf(buf
, "%pI6\n", target
->sgid
.raw
);
3071 static ssize_t
show_dgid(struct device
*dev
, struct device_attribute
*attr
,
3074 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3075 struct srp_rdma_ch
*ch
= &target
->ch
[0];
3077 if (target
->using_rdma_cm
)
3079 return sprintf(buf
, "%pI6\n", ch
->ib_cm
.path
.dgid
.raw
);
3082 static ssize_t
show_orig_dgid(struct device
*dev
,
3083 struct device_attribute
*attr
, char *buf
)
3085 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3087 if (target
->using_rdma_cm
)
3089 return sprintf(buf
, "%pI6\n", target
->ib_cm
.orig_dgid
.raw
);
3092 static ssize_t
show_req_lim(struct device
*dev
,
3093 struct device_attribute
*attr
, char *buf
)
3095 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3096 struct srp_rdma_ch
*ch
;
3097 int i
, req_lim
= INT_MAX
;
3099 for (i
= 0; i
< target
->ch_count
; i
++) {
3100 ch
= &target
->ch
[i
];
3101 req_lim
= min(req_lim
, ch
->req_lim
);
3103 return sprintf(buf
, "%d\n", req_lim
);
3106 static ssize_t
show_zero_req_lim(struct device
*dev
,
3107 struct device_attribute
*attr
, char *buf
)
3109 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3111 return sprintf(buf
, "%d\n", target
->zero_req_lim
);
3114 static ssize_t
show_local_ib_port(struct device
*dev
,
3115 struct device_attribute
*attr
, char *buf
)
3117 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3119 return sprintf(buf
, "%d\n", target
->srp_host
->port
);
3122 static ssize_t
show_local_ib_device(struct device
*dev
,
3123 struct device_attribute
*attr
, char *buf
)
3125 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3127 return sprintf(buf
, "%s\n", target
->srp_host
->srp_dev
->dev
->name
);
3130 static ssize_t
show_ch_count(struct device
*dev
, struct device_attribute
*attr
,
3133 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3135 return sprintf(buf
, "%d\n", target
->ch_count
);
3138 static ssize_t
show_comp_vector(struct device
*dev
,
3139 struct device_attribute
*attr
, char *buf
)
3141 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3143 return sprintf(buf
, "%d\n", target
->comp_vector
);
3146 static ssize_t
show_tl_retry_count(struct device
*dev
,
3147 struct device_attribute
*attr
, char *buf
)
3149 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3151 return sprintf(buf
, "%d\n", target
->tl_retry_count
);
3154 static ssize_t
show_cmd_sg_entries(struct device
*dev
,
3155 struct device_attribute
*attr
, char *buf
)
3157 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3159 return sprintf(buf
, "%u\n", target
->cmd_sg_cnt
);
3162 static ssize_t
show_allow_ext_sg(struct device
*dev
,
3163 struct device_attribute
*attr
, char *buf
)
3165 struct srp_target_port
*target
= host_to_target(class_to_shost(dev
));
3167 return sprintf(buf
, "%s\n", target
->allow_ext_sg
? "true" : "false");
3170 static DEVICE_ATTR(id_ext
, S_IRUGO
, show_id_ext
, NULL
);
3171 static DEVICE_ATTR(ioc_guid
, S_IRUGO
, show_ioc_guid
, NULL
);
3172 static DEVICE_ATTR(service_id
, S_IRUGO
, show_service_id
, NULL
);
3173 static DEVICE_ATTR(pkey
, S_IRUGO
, show_pkey
, NULL
);
3174 static DEVICE_ATTR(sgid
, S_IRUGO
, show_sgid
, NULL
);
3175 static DEVICE_ATTR(dgid
, S_IRUGO
, show_dgid
, NULL
);
3176 static DEVICE_ATTR(orig_dgid
, S_IRUGO
, show_orig_dgid
, NULL
);
3177 static DEVICE_ATTR(req_lim
, S_IRUGO
, show_req_lim
, NULL
);
3178 static DEVICE_ATTR(zero_req_lim
, S_IRUGO
, show_zero_req_lim
, NULL
);
3179 static DEVICE_ATTR(local_ib_port
, S_IRUGO
, show_local_ib_port
, NULL
);
3180 static DEVICE_ATTR(local_ib_device
, S_IRUGO
, show_local_ib_device
, NULL
);
3181 static DEVICE_ATTR(ch_count
, S_IRUGO
, show_ch_count
, NULL
);
3182 static DEVICE_ATTR(comp_vector
, S_IRUGO
, show_comp_vector
, NULL
);
3183 static DEVICE_ATTR(tl_retry_count
, S_IRUGO
, show_tl_retry_count
, NULL
);
3184 static DEVICE_ATTR(cmd_sg_entries
, S_IRUGO
, show_cmd_sg_entries
, NULL
);
3185 static DEVICE_ATTR(allow_ext_sg
, S_IRUGO
, show_allow_ext_sg
, NULL
);
3187 static struct device_attribute
*srp_host_attrs
[] = {
3190 &dev_attr_service_id
,
3194 &dev_attr_orig_dgid
,
3196 &dev_attr_zero_req_lim
,
3197 &dev_attr_local_ib_port
,
3198 &dev_attr_local_ib_device
,
3200 &dev_attr_comp_vector
,
3201 &dev_attr_tl_retry_count
,
3202 &dev_attr_cmd_sg_entries
,
3203 &dev_attr_allow_ext_sg
,
3207 static struct scsi_host_template srp_template
= {
3208 .module
= THIS_MODULE
,
3209 .name
= "InfiniBand SRP initiator",
3210 .proc_name
= DRV_NAME
,
3211 .target_alloc
= srp_target_alloc
,
3212 .slave_alloc
= srp_slave_alloc
,
3213 .slave_configure
= srp_slave_configure
,
3214 .info
= srp_target_info
,
3215 .queuecommand
= srp_queuecommand
,
3216 .change_queue_depth
= srp_change_queue_depth
,
3217 .eh_timed_out
= srp_timed_out
,
3218 .eh_abort_handler
= srp_abort
,
3219 .eh_device_reset_handler
= srp_reset_device
,
3220 .eh_host_reset_handler
= srp_reset_host
,
3221 .skip_settle_delay
= true,
3222 .sg_tablesize
= SRP_DEF_SG_TABLESIZE
,
3223 .can_queue
= SRP_DEFAULT_CMD_SQ_SIZE
,
3225 .cmd_per_lun
= SRP_DEFAULT_CMD_SQ_SIZE
,
3226 .use_clustering
= ENABLE_CLUSTERING
,
3227 .shost_attrs
= srp_host_attrs
,
3228 .track_queue_depth
= 1,
3231 static int srp_sdev_count(struct Scsi_Host
*host
)
3233 struct scsi_device
*sdev
;
3236 shost_for_each_device(sdev
, host
)
3244 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3245 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3246 * removal has been scheduled.
3247 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3249 static int srp_add_target(struct srp_host
*host
, struct srp_target_port
*target
)
3251 struct srp_rport_identifiers ids
;
3252 struct srp_rport
*rport
;
3254 target
->state
= SRP_TARGET_SCANNING
;
3255 sprintf(target
->target_name
, "SRP.T10:%016llX",
3256 be64_to_cpu(target
->id_ext
));
3258 if (scsi_add_host(target
->scsi_host
, host
->srp_dev
->dev
->dev
.parent
))
3261 memcpy(ids
.port_id
, &target
->id_ext
, 8);
3262 memcpy(ids
.port_id
+ 8, &target
->ioc_guid
, 8);
3263 ids
.roles
= SRP_RPORT_ROLE_TARGET
;
3264 rport
= srp_rport_add(target
->scsi_host
, &ids
);
3265 if (IS_ERR(rport
)) {
3266 scsi_remove_host(target
->scsi_host
);
3267 return PTR_ERR(rport
);
3270 rport
->lld_data
= target
;
3271 target
->rport
= rport
;
3273 spin_lock(&host
->target_lock
);
3274 list_add_tail(&target
->list
, &host
->target_list
);
3275 spin_unlock(&host
->target_lock
);
3277 scsi_scan_target(&target
->scsi_host
->shost_gendev
,
3278 0, target
->scsi_id
, SCAN_WILD_CARD
, SCSI_SCAN_INITIAL
);
3280 if (srp_connected_ch(target
) < target
->ch_count
||
3281 target
->qp_in_error
) {
3282 shost_printk(KERN_INFO
, target
->scsi_host
,
3283 PFX
"SCSI scan failed - removing SCSI host\n");
3284 srp_queue_remove_work(target
);
3288 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3289 dev_name(&target
->scsi_host
->shost_gendev
),
3290 srp_sdev_count(target
->scsi_host
));
3292 spin_lock_irq(&target
->lock
);
3293 if (target
->state
== SRP_TARGET_SCANNING
)
3294 target
->state
= SRP_TARGET_LIVE
;
3295 spin_unlock_irq(&target
->lock
);
3301 static void srp_release_dev(struct device
*dev
)
3303 struct srp_host
*host
=
3304 container_of(dev
, struct srp_host
, dev
);
3306 complete(&host
->released
);
3309 static struct class srp_class
= {
3310 .name
= "infiniband_srp",
3311 .dev_release
= srp_release_dev
3315 * srp_conn_unique() - check whether the connection to a target is unique
3317 * @target: SRP target port.
3319 static bool srp_conn_unique(struct srp_host
*host
,
3320 struct srp_target_port
*target
)
3322 struct srp_target_port
*t
;
3325 if (target
->state
== SRP_TARGET_REMOVED
)
3330 spin_lock(&host
->target_lock
);
3331 list_for_each_entry(t
, &host
->target_list
, list
) {
3333 target
->id_ext
== t
->id_ext
&&
3334 target
->ioc_guid
== t
->ioc_guid
&&
3335 target
->initiator_ext
== t
->initiator_ext
) {
3340 spin_unlock(&host
->target_lock
);
3347 * Target ports are added by writing
3349 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3350 * pkey=<P_Key>,service_id=<service ID>
3352 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3353 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3355 * to the add_target sysfs attribute.
3359 SRP_OPT_ID_EXT
= 1 << 0,
3360 SRP_OPT_IOC_GUID
= 1 << 1,
3361 SRP_OPT_DGID
= 1 << 2,
3362 SRP_OPT_PKEY
= 1 << 3,
3363 SRP_OPT_SERVICE_ID
= 1 << 4,
3364 SRP_OPT_MAX_SECT
= 1 << 5,
3365 SRP_OPT_MAX_CMD_PER_LUN
= 1 << 6,
3366 SRP_OPT_IO_CLASS
= 1 << 7,
3367 SRP_OPT_INITIATOR_EXT
= 1 << 8,
3368 SRP_OPT_CMD_SG_ENTRIES
= 1 << 9,
3369 SRP_OPT_ALLOW_EXT_SG
= 1 << 10,
3370 SRP_OPT_SG_TABLESIZE
= 1 << 11,
3371 SRP_OPT_COMP_VECTOR
= 1 << 12,
3372 SRP_OPT_TL_RETRY_COUNT
= 1 << 13,
3373 SRP_OPT_QUEUE_SIZE
= 1 << 14,
3374 SRP_OPT_IP_SRC
= 1 << 15,
3375 SRP_OPT_IP_DEST
= 1 << 16,
3376 SRP_OPT_TARGET_CAN_QUEUE
= 1 << 17,
3379 static unsigned int srp_opt_mandatory
[] = {
3390 static const match_table_t srp_opt_tokens
= {
3391 { SRP_OPT_ID_EXT
, "id_ext=%s" },
3392 { SRP_OPT_IOC_GUID
, "ioc_guid=%s" },
3393 { SRP_OPT_DGID
, "dgid=%s" },
3394 { SRP_OPT_PKEY
, "pkey=%x" },
3395 { SRP_OPT_SERVICE_ID
, "service_id=%s" },
3396 { SRP_OPT_MAX_SECT
, "max_sect=%d" },
3397 { SRP_OPT_MAX_CMD_PER_LUN
, "max_cmd_per_lun=%d" },
3398 { SRP_OPT_TARGET_CAN_QUEUE
, "target_can_queue=%d" },
3399 { SRP_OPT_IO_CLASS
, "io_class=%x" },
3400 { SRP_OPT_INITIATOR_EXT
, "initiator_ext=%s" },
3401 { SRP_OPT_CMD_SG_ENTRIES
, "cmd_sg_entries=%u" },
3402 { SRP_OPT_ALLOW_EXT_SG
, "allow_ext_sg=%u" },
3403 { SRP_OPT_SG_TABLESIZE
, "sg_tablesize=%u" },
3404 { SRP_OPT_COMP_VECTOR
, "comp_vector=%u" },
3405 { SRP_OPT_TL_RETRY_COUNT
, "tl_retry_count=%u" },
3406 { SRP_OPT_QUEUE_SIZE
, "queue_size=%d" },
3407 { SRP_OPT_IP_SRC
, "src=%s" },
3408 { SRP_OPT_IP_DEST
, "dest=%s" },
3409 { SRP_OPT_ERR
, NULL
}
3413 * srp_parse_in - parse an IP address and port number combination
3415 * Parse the following address formats:
3416 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3417 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3419 static int srp_parse_in(struct net
*net
, struct sockaddr_storage
*sa
,
3420 const char *addr_port_str
)
3422 char *addr_end
, *addr
= kstrdup(addr_port_str
, GFP_KERNEL
);
3428 port_str
= strrchr(addr
, ':');
3432 ret
= inet_pton_with_scope(net
, AF_INET
, addr
, port_str
, sa
);
3433 if (ret
&& addr
[0]) {
3434 addr_end
= addr
+ strlen(addr
) - 1;
3435 if (addr
[0] == '[' && *addr_end
== ']') {
3437 ret
= inet_pton_with_scope(net
, AF_INET6
, addr
+ 1,
3442 pr_debug("%s -> %pISpfsc\n", addr_port_str
, sa
);
3446 static int srp_parse_options(struct net
*net
, const char *buf
,
3447 struct srp_target_port
*target
)
3449 char *options
, *sep_opt
;
3451 substring_t args
[MAX_OPT_ARGS
];
3452 unsigned long long ull
;
3458 options
= kstrdup(buf
, GFP_KERNEL
);
3463 while ((p
= strsep(&sep_opt
, ",\n")) != NULL
) {
3467 token
= match_token(p
, srp_opt_tokens
, args
);
3471 case SRP_OPT_ID_EXT
:
3472 p
= match_strdup(args
);
3477 ret
= kstrtoull(p
, 16, &ull
);
3479 pr_warn("invalid id_ext parameter '%s'\n", p
);
3483 target
->id_ext
= cpu_to_be64(ull
);
3487 case SRP_OPT_IOC_GUID
:
3488 p
= match_strdup(args
);
3493 ret
= kstrtoull(p
, 16, &ull
);
3495 pr_warn("invalid ioc_guid parameter '%s'\n", p
);
3499 target
->ioc_guid
= cpu_to_be64(ull
);
3504 p
= match_strdup(args
);
3509 if (strlen(p
) != 32) {
3510 pr_warn("bad dest GID parameter '%s'\n", p
);
3515 ret
= hex2bin(target
->ib_cm
.orig_dgid
.raw
, p
, 16);
3522 if (match_hex(args
, &token
)) {
3523 pr_warn("bad P_Key parameter '%s'\n", p
);
3526 target
->ib_cm
.pkey
= cpu_to_be16(token
);
3529 case SRP_OPT_SERVICE_ID
:
3530 p
= match_strdup(args
);
3535 ret
= kstrtoull(p
, 16, &ull
);
3537 pr_warn("bad service_id parameter '%s'\n", p
);
3541 target
->ib_cm
.service_id
= cpu_to_be64(ull
);
3545 case SRP_OPT_IP_SRC
:
3546 p
= match_strdup(args
);
3551 ret
= srp_parse_in(net
, &target
->rdma_cm
.src
.ss
, p
);
3553 pr_warn("bad source parameter '%s'\n", p
);
3557 target
->rdma_cm
.src_specified
= true;
3561 case SRP_OPT_IP_DEST
:
3562 p
= match_strdup(args
);
3567 ret
= srp_parse_in(net
, &target
->rdma_cm
.dst
.ss
, p
);
3569 pr_warn("bad dest parameter '%s'\n", p
);
3573 target
->using_rdma_cm
= true;
3577 case SRP_OPT_MAX_SECT
:
3578 if (match_int(args
, &token
)) {
3579 pr_warn("bad max sect parameter '%s'\n", p
);
3582 target
->scsi_host
->max_sectors
= token
;
3585 case SRP_OPT_QUEUE_SIZE
:
3586 if (match_int(args
, &token
) || token
< 1) {
3587 pr_warn("bad queue_size parameter '%s'\n", p
);
3590 target
->scsi_host
->can_queue
= token
;
3591 target
->queue_size
= token
+ SRP_RSP_SQ_SIZE
+
3592 SRP_TSK_MGMT_SQ_SIZE
;
3593 if (!(opt_mask
& SRP_OPT_MAX_CMD_PER_LUN
))
3594 target
->scsi_host
->cmd_per_lun
= token
;
3597 case SRP_OPT_MAX_CMD_PER_LUN
:
3598 if (match_int(args
, &token
) || token
< 1) {
3599 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3603 target
->scsi_host
->cmd_per_lun
= token
;
3606 case SRP_OPT_TARGET_CAN_QUEUE
:
3607 if (match_int(args
, &token
) || token
< 1) {
3608 pr_warn("bad max target_can_queue parameter '%s'\n",
3612 target
->target_can_queue
= token
;
3615 case SRP_OPT_IO_CLASS
:
3616 if (match_hex(args
, &token
)) {
3617 pr_warn("bad IO class parameter '%s'\n", p
);
3620 if (token
!= SRP_REV10_IB_IO_CLASS
&&
3621 token
!= SRP_REV16A_IB_IO_CLASS
) {
3622 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3623 token
, SRP_REV10_IB_IO_CLASS
,
3624 SRP_REV16A_IB_IO_CLASS
);
3627 target
->io_class
= token
;
3630 case SRP_OPT_INITIATOR_EXT
:
3631 p
= match_strdup(args
);
3636 ret
= kstrtoull(p
, 16, &ull
);
3638 pr_warn("bad initiator_ext value '%s'\n", p
);
3642 target
->initiator_ext
= cpu_to_be64(ull
);
3646 case SRP_OPT_CMD_SG_ENTRIES
:
3647 if (match_int(args
, &token
) || token
< 1 || token
> 255) {
3648 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3652 target
->cmd_sg_cnt
= token
;
3655 case SRP_OPT_ALLOW_EXT_SG
:
3656 if (match_int(args
, &token
)) {
3657 pr_warn("bad allow_ext_sg parameter '%s'\n", p
);
3660 target
->allow_ext_sg
= !!token
;
3663 case SRP_OPT_SG_TABLESIZE
:
3664 if (match_int(args
, &token
) || token
< 1 ||
3665 token
> SG_MAX_SEGMENTS
) {
3666 pr_warn("bad max sg_tablesize parameter '%s'\n",
3670 target
->sg_tablesize
= token
;
3673 case SRP_OPT_COMP_VECTOR
:
3674 if (match_int(args
, &token
) || token
< 0) {
3675 pr_warn("bad comp_vector parameter '%s'\n", p
);
3678 target
->comp_vector
= token
;
3681 case SRP_OPT_TL_RETRY_COUNT
:
3682 if (match_int(args
, &token
) || token
< 2 || token
> 7) {
3683 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3687 target
->tl_retry_count
= token
;
3691 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3697 for (i
= 0; i
< ARRAY_SIZE(srp_opt_mandatory
); i
++) {
3698 if ((opt_mask
& srp_opt_mandatory
[i
]) == srp_opt_mandatory
[i
]) {
3704 pr_warn("target creation request is missing one or more parameters\n");
3706 if (target
->scsi_host
->cmd_per_lun
> target
->scsi_host
->can_queue
3707 && (opt_mask
& SRP_OPT_MAX_CMD_PER_LUN
))
3708 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3709 target
->scsi_host
->cmd_per_lun
,
3710 target
->scsi_host
->can_queue
);
3717 static ssize_t
srp_create_target(struct device
*dev
,
3718 struct device_attribute
*attr
,
3719 const char *buf
, size_t count
)
3721 struct srp_host
*host
=
3722 container_of(dev
, struct srp_host
, dev
);
3723 struct Scsi_Host
*target_host
;
3724 struct srp_target_port
*target
;
3725 struct srp_rdma_ch
*ch
;
3726 struct srp_device
*srp_dev
= host
->srp_dev
;
3727 struct ib_device
*ibdev
= srp_dev
->dev
;
3728 int ret
, node_idx
, node
, cpu
, i
;
3729 unsigned int max_sectors_per_mr
, mr_per_cmd
= 0;
3730 bool multich
= false;
3732 target_host
= scsi_host_alloc(&srp_template
,
3733 sizeof (struct srp_target_port
));
3737 target_host
->transportt
= ib_srp_transport_template
;
3738 target_host
->max_channel
= 0;
3739 target_host
->max_id
= 1;
3740 target_host
->max_lun
= -1LL;
3741 target_host
->max_cmd_len
= sizeof ((struct srp_cmd
*) (void *) 0L)->cdb
;
3743 target
= host_to_target(target_host
);
3745 target
->net
= kobj_ns_grab_current(KOBJ_NS_TYPE_NET
);
3746 target
->io_class
= SRP_REV16A_IB_IO_CLASS
;
3747 target
->scsi_host
= target_host
;
3748 target
->srp_host
= host
;
3749 target
->lkey
= host
->srp_dev
->pd
->local_dma_lkey
;
3750 target
->global_rkey
= host
->srp_dev
->global_rkey
;
3751 target
->cmd_sg_cnt
= cmd_sg_entries
;
3752 target
->sg_tablesize
= indirect_sg_entries
? : cmd_sg_entries
;
3753 target
->allow_ext_sg
= allow_ext_sg
;
3754 target
->tl_retry_count
= 7;
3755 target
->queue_size
= SRP_DEFAULT_QUEUE_SIZE
;
3758 * Avoid that the SCSI host can be removed by srp_remove_target()
3759 * before this function returns.
3761 scsi_host_get(target
->scsi_host
);
3763 ret
= mutex_lock_interruptible(&host
->add_target_mutex
);
3767 ret
= srp_parse_options(target
->net
, buf
, target
);
3771 target
->req_ring_size
= target
->queue_size
- SRP_TSK_MGMT_SQ_SIZE
;
3773 if (!srp_conn_unique(target
->srp_host
, target
)) {
3774 if (target
->using_rdma_cm
) {
3775 shost_printk(KERN_INFO
, target
->scsi_host
,
3776 PFX
"Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3777 be64_to_cpu(target
->id_ext
),
3778 be64_to_cpu(target
->ioc_guid
),
3779 &target
->rdma_cm
.dst
);
3781 shost_printk(KERN_INFO
, target
->scsi_host
,
3782 PFX
"Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3783 be64_to_cpu(target
->id_ext
),
3784 be64_to_cpu(target
->ioc_guid
),
3785 be64_to_cpu(target
->initiator_ext
));
3791 if (!srp_dev
->has_fmr
&& !srp_dev
->has_fr
&& !target
->allow_ext_sg
&&
3792 target
->cmd_sg_cnt
< target
->sg_tablesize
) {
3793 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3794 target
->sg_tablesize
= target
->cmd_sg_cnt
;
3797 if (srp_dev
->use_fast_reg
|| srp_dev
->use_fmr
) {
3798 bool gaps_reg
= (ibdev
->attrs
.device_cap_flags
&
3799 IB_DEVICE_SG_GAPS_REG
);
3801 max_sectors_per_mr
= srp_dev
->max_pages_per_mr
<<
3802 (ilog2(srp_dev
->mr_page_size
) - 9);
3805 * FR and FMR can only map one HCA page per entry. If
3806 * the start address is not aligned on a HCA page
3807 * boundary two entries will be used for the head and
3808 * the tail although these two entries combined
3809 * contain at most one HCA page of data. Hence the "+
3810 * 1" in the calculation below.
3812 * The indirect data buffer descriptor is contiguous
3813 * so the memory for that buffer will only be
3814 * registered if register_always is true. Hence add
3815 * one to mr_per_cmd if register_always has been set.
3817 mr_per_cmd
= register_always
+
3818 (target
->scsi_host
->max_sectors
+ 1 +
3819 max_sectors_per_mr
- 1) / max_sectors_per_mr
;
3821 mr_per_cmd
= register_always
+
3822 (target
->sg_tablesize
+
3823 srp_dev
->max_pages_per_mr
- 1) /
3824 srp_dev
->max_pages_per_mr
;
3826 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3827 target
->scsi_host
->max_sectors
, srp_dev
->max_pages_per_mr
, srp_dev
->mr_page_size
,
3828 max_sectors_per_mr
, mr_per_cmd
);
3831 target_host
->sg_tablesize
= target
->sg_tablesize
;
3832 target
->mr_pool_size
= target
->scsi_host
->can_queue
* mr_per_cmd
;
3833 target
->mr_per_cmd
= mr_per_cmd
;
3834 target
->indirect_size
= target
->sg_tablesize
*
3835 sizeof (struct srp_direct_buf
);
3836 target
->max_iu_len
= sizeof (struct srp_cmd
) +
3837 sizeof (struct srp_indirect_buf
) +
3838 target
->cmd_sg_cnt
* sizeof (struct srp_direct_buf
);
3840 INIT_WORK(&target
->tl_err_work
, srp_tl_err_work
);
3841 INIT_WORK(&target
->remove_work
, srp_remove_work
);
3842 spin_lock_init(&target
->lock
);
3843 ret
= rdma_query_gid(ibdev
, host
->port
, 0, &target
->sgid
);
3848 target
->ch_count
= max_t(unsigned, num_online_nodes(),
3850 min(4 * num_online_nodes(),
3851 ibdev
->num_comp_vectors
),
3852 num_online_cpus()));
3853 target
->ch
= kcalloc(target
->ch_count
, sizeof(*target
->ch
),
3859 for_each_online_node(node
) {
3860 const int ch_start
= (node_idx
* target
->ch_count
/
3861 num_online_nodes());
3862 const int ch_end
= ((node_idx
+ 1) * target
->ch_count
/
3863 num_online_nodes());
3864 const int cv_start
= node_idx
* ibdev
->num_comp_vectors
/
3866 const int cv_end
= (node_idx
+ 1) * ibdev
->num_comp_vectors
/
3870 for_each_online_cpu(cpu
) {
3871 if (cpu_to_node(cpu
) != node
)
3873 if (ch_start
+ cpu_idx
>= ch_end
)
3875 ch
= &target
->ch
[ch_start
+ cpu_idx
];
3876 ch
->target
= target
;
3877 ch
->comp_vector
= cv_start
== cv_end
? cv_start
:
3878 cv_start
+ cpu_idx
% (cv_end
- cv_start
);
3879 spin_lock_init(&ch
->lock
);
3880 INIT_LIST_HEAD(&ch
->free_tx
);
3881 ret
= srp_new_cm_id(ch
);
3883 goto err_disconnect
;
3885 ret
= srp_create_ch_ib(ch
);
3887 goto err_disconnect
;
3889 ret
= srp_alloc_req_data(ch
);
3891 goto err_disconnect
;
3893 ret
= srp_connect_ch(ch
, multich
);
3897 if (target
->using_rdma_cm
)
3898 snprintf(dst
, sizeof(dst
), "%pIS",
3899 &target
->rdma_cm
.dst
);
3901 snprintf(dst
, sizeof(dst
), "%pI6",
3902 target
->ib_cm
.orig_dgid
.raw
);
3903 shost_printk(KERN_ERR
, target
->scsi_host
,
3904 PFX
"Connection %d/%d to %s failed\n",
3906 target
->ch_count
, dst
);
3907 if (node_idx
== 0 && cpu_idx
== 0) {
3910 srp_free_ch_ib(target
, ch
);
3911 srp_free_req_data(target
, ch
);
3912 target
->ch_count
= ch
- target
->ch
;
3924 target
->scsi_host
->nr_hw_queues
= target
->ch_count
;
3926 ret
= srp_add_target(host
, target
);
3928 goto err_disconnect
;
3930 if (target
->state
!= SRP_TARGET_REMOVED
) {
3931 if (target
->using_rdma_cm
) {
3932 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
3933 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3934 be64_to_cpu(target
->id_ext
),
3935 be64_to_cpu(target
->ioc_guid
),
3936 target
->sgid
.raw
, &target
->rdma_cm
.dst
);
3938 shost_printk(KERN_DEBUG
, target
->scsi_host
, PFX
3939 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3940 be64_to_cpu(target
->id_ext
),
3941 be64_to_cpu(target
->ioc_guid
),
3942 be16_to_cpu(target
->ib_cm
.pkey
),
3943 be64_to_cpu(target
->ib_cm
.service_id
),
3945 target
->ib_cm
.orig_dgid
.raw
);
3952 mutex_unlock(&host
->add_target_mutex
);
3955 scsi_host_put(target
->scsi_host
);
3958 * If a call to srp_remove_target() has not been scheduled,
3959 * drop the network namespace reference now that was obtained
3960 * earlier in this function.
3962 if (target
->state
!= SRP_TARGET_REMOVED
)
3963 kobj_ns_drop(KOBJ_NS_TYPE_NET
, target
->net
);
3964 scsi_host_put(target
->scsi_host
);
3970 srp_disconnect_target(target
);
3973 for (i
= 0; i
< target
->ch_count
; i
++) {
3974 ch
= &target
->ch
[i
];
3975 srp_free_ch_ib(target
, ch
);
3976 srp_free_req_data(target
, ch
);
3983 static DEVICE_ATTR(add_target
, S_IWUSR
, NULL
, srp_create_target
);
3985 static ssize_t
show_ibdev(struct device
*dev
, struct device_attribute
*attr
,
3988 struct srp_host
*host
= container_of(dev
, struct srp_host
, dev
);
3990 return sprintf(buf
, "%s\n", host
->srp_dev
->dev
->name
);
3993 static DEVICE_ATTR(ibdev
, S_IRUGO
, show_ibdev
, NULL
);
3995 static ssize_t
show_port(struct device
*dev
, struct device_attribute
*attr
,
3998 struct srp_host
*host
= container_of(dev
, struct srp_host
, dev
);
4000 return sprintf(buf
, "%d\n", host
->port
);
4003 static DEVICE_ATTR(port
, S_IRUGO
, show_port
, NULL
);
4005 static struct srp_host
*srp_add_port(struct srp_device
*device
, u8 port
)
4007 struct srp_host
*host
;
4009 host
= kzalloc(sizeof *host
, GFP_KERNEL
);
4013 INIT_LIST_HEAD(&host
->target_list
);
4014 spin_lock_init(&host
->target_lock
);
4015 init_completion(&host
->released
);
4016 mutex_init(&host
->add_target_mutex
);
4017 host
->srp_dev
= device
;
4020 host
->dev
.class = &srp_class
;
4021 host
->dev
.parent
= device
->dev
->dev
.parent
;
4022 dev_set_name(&host
->dev
, "srp-%s-%d", device
->dev
->name
, port
);
4024 if (device_register(&host
->dev
))
4026 if (device_create_file(&host
->dev
, &dev_attr_add_target
))
4028 if (device_create_file(&host
->dev
, &dev_attr_ibdev
))
4030 if (device_create_file(&host
->dev
, &dev_attr_port
))
4036 device_unregister(&host
->dev
);
4044 static void srp_add_one(struct ib_device
*device
)
4046 struct srp_device
*srp_dev
;
4047 struct ib_device_attr
*attr
= &device
->attrs
;
4048 struct srp_host
*host
;
4049 int mr_page_shift
, p
;
4050 u64 max_pages_per_mr
;
4051 unsigned int flags
= 0;
4053 srp_dev
= kzalloc(sizeof(*srp_dev
), GFP_KERNEL
);
4058 * Use the smallest page size supported by the HCA, down to a
4059 * minimum of 4096 bytes. We're unlikely to build large sglists
4060 * out of smaller entries.
4062 mr_page_shift
= max(12, ffs(attr
->page_size_cap
) - 1);
4063 srp_dev
->mr_page_size
= 1 << mr_page_shift
;
4064 srp_dev
->mr_page_mask
= ~((u64
) srp_dev
->mr_page_size
- 1);
4065 max_pages_per_mr
= attr
->max_mr_size
;
4066 do_div(max_pages_per_mr
, srp_dev
->mr_page_size
);
4067 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__
,
4068 attr
->max_mr_size
, srp_dev
->mr_page_size
,
4069 max_pages_per_mr
, SRP_MAX_PAGES_PER_MR
);
4070 srp_dev
->max_pages_per_mr
= min_t(u64
, SRP_MAX_PAGES_PER_MR
,
4073 srp_dev
->has_fmr
= (device
->alloc_fmr
&& device
->dealloc_fmr
&&
4074 device
->map_phys_fmr
&& device
->unmap_fmr
);
4075 srp_dev
->has_fr
= (attr
->device_cap_flags
&
4076 IB_DEVICE_MEM_MGT_EXTENSIONS
);
4077 if (!never_register
&& !srp_dev
->has_fmr
&& !srp_dev
->has_fr
) {
4078 dev_warn(&device
->dev
, "neither FMR nor FR is supported\n");
4079 } else if (!never_register
&&
4080 attr
->max_mr_size
>= 2 * srp_dev
->mr_page_size
) {
4081 srp_dev
->use_fast_reg
= (srp_dev
->has_fr
&&
4082 (!srp_dev
->has_fmr
|| prefer_fr
));
4083 srp_dev
->use_fmr
= !srp_dev
->use_fast_reg
&& srp_dev
->has_fmr
;
4086 if (never_register
|| !register_always
||
4087 (!srp_dev
->has_fmr
&& !srp_dev
->has_fr
))
4088 flags
|= IB_PD_UNSAFE_GLOBAL_RKEY
;
4090 if (srp_dev
->use_fast_reg
) {
4091 srp_dev
->max_pages_per_mr
=
4092 min_t(u32
, srp_dev
->max_pages_per_mr
,
4093 attr
->max_fast_reg_page_list_len
);
4095 srp_dev
->mr_max_size
= srp_dev
->mr_page_size
*
4096 srp_dev
->max_pages_per_mr
;
4097 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4098 device
->name
, mr_page_shift
, attr
->max_mr_size
,
4099 attr
->max_fast_reg_page_list_len
,
4100 srp_dev
->max_pages_per_mr
, srp_dev
->mr_max_size
);
4102 INIT_LIST_HEAD(&srp_dev
->dev_list
);
4104 srp_dev
->dev
= device
;
4105 srp_dev
->pd
= ib_alloc_pd(device
, flags
);
4106 if (IS_ERR(srp_dev
->pd
))
4109 if (flags
& IB_PD_UNSAFE_GLOBAL_RKEY
) {
4110 srp_dev
->global_rkey
= srp_dev
->pd
->unsafe_global_rkey
;
4111 WARN_ON_ONCE(srp_dev
->global_rkey
== 0);
4114 for (p
= rdma_start_port(device
); p
<= rdma_end_port(device
); ++p
) {
4115 host
= srp_add_port(srp_dev
, p
);
4117 list_add_tail(&host
->list
, &srp_dev
->dev_list
);
4120 ib_set_client_data(device
, &srp_client
, srp_dev
);
4127 static void srp_remove_one(struct ib_device
*device
, void *client_data
)
4129 struct srp_device
*srp_dev
;
4130 struct srp_host
*host
, *tmp_host
;
4131 struct srp_target_port
*target
;
4133 srp_dev
= client_data
;
4137 list_for_each_entry_safe(host
, tmp_host
, &srp_dev
->dev_list
, list
) {
4138 device_unregister(&host
->dev
);
4140 * Wait for the sysfs entry to go away, so that no new
4141 * target ports can be created.
4143 wait_for_completion(&host
->released
);
4146 * Remove all target ports.
4148 spin_lock(&host
->target_lock
);
4149 list_for_each_entry(target
, &host
->target_list
, list
)
4150 srp_queue_remove_work(target
);
4151 spin_unlock(&host
->target_lock
);
4154 * Wait for tl_err and target port removal tasks.
4156 flush_workqueue(system_long_wq
);
4157 flush_workqueue(srp_remove_wq
);
4162 ib_dealloc_pd(srp_dev
->pd
);
4167 static struct srp_function_template ib_srp_transport_functions
= {
4168 .has_rport_state
= true,
4169 .reset_timer_if_blocked
= true,
4170 .reconnect_delay
= &srp_reconnect_delay
,
4171 .fast_io_fail_tmo
= &srp_fast_io_fail_tmo
,
4172 .dev_loss_tmo
= &srp_dev_loss_tmo
,
4173 .reconnect
= srp_rport_reconnect
,
4174 .rport_delete
= srp_rport_delete
,
4175 .terminate_rport_io
= srp_terminate_io
,
4178 static int __init
srp_init_module(void)
4182 if (srp_sg_tablesize
) {
4183 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4184 if (!cmd_sg_entries
)
4185 cmd_sg_entries
= srp_sg_tablesize
;
4188 if (!cmd_sg_entries
)
4189 cmd_sg_entries
= SRP_DEF_SG_TABLESIZE
;
4191 if (cmd_sg_entries
> 255) {
4192 pr_warn("Clamping cmd_sg_entries to 255\n");
4193 cmd_sg_entries
= 255;
4196 if (!indirect_sg_entries
)
4197 indirect_sg_entries
= cmd_sg_entries
;
4198 else if (indirect_sg_entries
< cmd_sg_entries
) {
4199 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4201 indirect_sg_entries
= cmd_sg_entries
;
4204 if (indirect_sg_entries
> SG_MAX_SEGMENTS
) {
4205 pr_warn("Clamping indirect_sg_entries to %u\n",
4207 indirect_sg_entries
= SG_MAX_SEGMENTS
;
4210 srp_remove_wq
= create_workqueue("srp_remove");
4211 if (!srp_remove_wq
) {
4217 ib_srp_transport_template
=
4218 srp_attach_transport(&ib_srp_transport_functions
);
4219 if (!ib_srp_transport_template
)
4222 ret
= class_register(&srp_class
);
4224 pr_err("couldn't register class infiniband_srp\n");
4228 ib_sa_register_client(&srp_sa_client
);
4230 ret
= ib_register_client(&srp_client
);
4232 pr_err("couldn't register IB client\n");
4240 ib_sa_unregister_client(&srp_sa_client
);
4241 class_unregister(&srp_class
);
4244 srp_release_transport(ib_srp_transport_template
);
4247 destroy_workqueue(srp_remove_wq
);
4251 static void __exit
srp_cleanup_module(void)
4253 ib_unregister_client(&srp_client
);
4254 ib_sa_unregister_client(&srp_sa_client
);
4255 class_unregister(&srp_class
);
4256 srp_release_transport(ib_srp_transport_template
);
4257 destroy_workqueue(srp_remove_wq
);
4260 module_init(srp_init_module
);
4261 module_exit(srp_cleanup_module
);