]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blob - drivers/infiniband/ulp/srp/ib_srp.c
treewide: kmalloc() -> kmalloc_array()
[mirror_ubuntu-jammy-kernel.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
46
47 #include <linux/atomic.h>
48
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
53 #include <scsi/srp.h>
54 #include <scsi/scsi_transport_srp.h>
55
56 #include "ib_srp.h"
57
58 #define DRV_NAME "ib_srp"
59 #define PFX DRV_NAME ": "
60 #define DRV_VERSION "2.0"
61 #define DRV_RELDATE "July 26, 2015"
62
63 MODULE_AUTHOR("Roland Dreier");
64 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
65 MODULE_LICENSE("Dual BSD/GPL");
66 MODULE_INFO(release_date, DRV_RELDATE);
67
68 #if !defined(CONFIG_DYNAMIC_DEBUG)
69 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
70 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
71 #endif
72
73 static unsigned int srp_sg_tablesize;
74 static unsigned int cmd_sg_entries;
75 static unsigned int indirect_sg_entries;
76 static bool allow_ext_sg;
77 static bool prefer_fr = true;
78 static bool register_always = true;
79 static bool never_register;
80 static int topspin_workarounds = 1;
81
82 module_param(srp_sg_tablesize, uint, 0444);
83 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
84
85 module_param(cmd_sg_entries, uint, 0444);
86 MODULE_PARM_DESC(cmd_sg_entries,
87 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
88
89 module_param(indirect_sg_entries, uint, 0444);
90 MODULE_PARM_DESC(indirect_sg_entries,
91 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
92
93 module_param(allow_ext_sg, bool, 0444);
94 MODULE_PARM_DESC(allow_ext_sg,
95 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
96
97 module_param(topspin_workarounds, int, 0444);
98 MODULE_PARM_DESC(topspin_workarounds,
99 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
100
101 module_param(prefer_fr, bool, 0444);
102 MODULE_PARM_DESC(prefer_fr,
103 "Whether to use fast registration if both FMR and fast registration are supported");
104
105 module_param(register_always, bool, 0444);
106 MODULE_PARM_DESC(register_always,
107 "Use memory registration even for contiguous memory regions");
108
109 module_param(never_register, bool, 0444);
110 MODULE_PARM_DESC(never_register, "Never register memory");
111
112 static const struct kernel_param_ops srp_tmo_ops;
113
114 static int srp_reconnect_delay = 10;
115 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
116 S_IRUGO | S_IWUSR);
117 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
118
119 static int srp_fast_io_fail_tmo = 15;
120 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
121 S_IRUGO | S_IWUSR);
122 MODULE_PARM_DESC(fast_io_fail_tmo,
123 "Number of seconds between the observation of a transport"
124 " layer error and failing all I/O. \"off\" means that this"
125 " functionality is disabled.");
126
127 static int srp_dev_loss_tmo = 600;
128 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
129 S_IRUGO | S_IWUSR);
130 MODULE_PARM_DESC(dev_loss_tmo,
131 "Maximum number of seconds that the SRP transport should"
132 " insulate transport layer errors. After this time has been"
133 " exceeded the SCSI host is removed. Should be"
134 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
135 " if fast_io_fail_tmo has not been set. \"off\" means that"
136 " this functionality is disabled.");
137
138 static unsigned ch_count;
139 module_param(ch_count, uint, 0444);
140 MODULE_PARM_DESC(ch_count,
141 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
142
143 static void srp_add_one(struct ib_device *device);
144 static void srp_remove_one(struct ib_device *device, void *client_data);
145 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
146 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
147 const char *opname);
148 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
149 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
150 struct rdma_cm_event *event);
151
152 static struct scsi_transport_template *ib_srp_transport_template;
153 static struct workqueue_struct *srp_remove_wq;
154
155 static struct ib_client srp_client = {
156 .name = "srp",
157 .add = srp_add_one,
158 .remove = srp_remove_one
159 };
160
161 static struct ib_sa_client srp_sa_client;
162
163 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
164 {
165 int tmo = *(int *)kp->arg;
166
167 if (tmo >= 0)
168 return sprintf(buffer, "%d", tmo);
169 else
170 return sprintf(buffer, "off");
171 }
172
173 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
174 {
175 int tmo, res;
176
177 res = srp_parse_tmo(&tmo, val);
178 if (res)
179 goto out;
180
181 if (kp->arg == &srp_reconnect_delay)
182 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
183 srp_dev_loss_tmo);
184 else if (kp->arg == &srp_fast_io_fail_tmo)
185 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
186 else
187 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
188 tmo);
189 if (res)
190 goto out;
191 *(int *)kp->arg = tmo;
192
193 out:
194 return res;
195 }
196
197 static const struct kernel_param_ops srp_tmo_ops = {
198 .get = srp_tmo_get,
199 .set = srp_tmo_set,
200 };
201
202 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
203 {
204 return (struct srp_target_port *) host->hostdata;
205 }
206
207 static const char *srp_target_info(struct Scsi_Host *host)
208 {
209 return host_to_target(host)->target_name;
210 }
211
212 static int srp_target_is_topspin(struct srp_target_port *target)
213 {
214 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
215 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
216
217 return topspin_workarounds &&
218 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
219 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
220 }
221
222 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
223 gfp_t gfp_mask,
224 enum dma_data_direction direction)
225 {
226 struct srp_iu *iu;
227
228 iu = kmalloc(sizeof *iu, gfp_mask);
229 if (!iu)
230 goto out;
231
232 iu->buf = kzalloc(size, gfp_mask);
233 if (!iu->buf)
234 goto out_free_iu;
235
236 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
237 direction);
238 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
239 goto out_free_buf;
240
241 iu->size = size;
242 iu->direction = direction;
243
244 return iu;
245
246 out_free_buf:
247 kfree(iu->buf);
248 out_free_iu:
249 kfree(iu);
250 out:
251 return NULL;
252 }
253
254 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
255 {
256 if (!iu)
257 return;
258
259 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
260 iu->direction);
261 kfree(iu->buf);
262 kfree(iu);
263 }
264
265 static void srp_qp_event(struct ib_event *event, void *context)
266 {
267 pr_debug("QP event %s (%d)\n",
268 ib_event_msg(event->event), event->event);
269 }
270
271 static int srp_init_ib_qp(struct srp_target_port *target,
272 struct ib_qp *qp)
273 {
274 struct ib_qp_attr *attr;
275 int ret;
276
277 attr = kmalloc(sizeof *attr, GFP_KERNEL);
278 if (!attr)
279 return -ENOMEM;
280
281 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
282 target->srp_host->port,
283 be16_to_cpu(target->ib_cm.pkey),
284 &attr->pkey_index);
285 if (ret)
286 goto out;
287
288 attr->qp_state = IB_QPS_INIT;
289 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
290 IB_ACCESS_REMOTE_WRITE);
291 attr->port_num = target->srp_host->port;
292
293 ret = ib_modify_qp(qp, attr,
294 IB_QP_STATE |
295 IB_QP_PKEY_INDEX |
296 IB_QP_ACCESS_FLAGS |
297 IB_QP_PORT);
298
299 out:
300 kfree(attr);
301 return ret;
302 }
303
304 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
305 {
306 struct srp_target_port *target = ch->target;
307 struct ib_cm_id *new_cm_id;
308
309 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
310 srp_ib_cm_handler, ch);
311 if (IS_ERR(new_cm_id))
312 return PTR_ERR(new_cm_id);
313
314 if (ch->ib_cm.cm_id)
315 ib_destroy_cm_id(ch->ib_cm.cm_id);
316 ch->ib_cm.cm_id = new_cm_id;
317 if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
318 target->srp_host->port))
319 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
320 else
321 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
322 ch->ib_cm.path.sgid = target->sgid;
323 ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
324 ch->ib_cm.path.pkey = target->ib_cm.pkey;
325 ch->ib_cm.path.service_id = target->ib_cm.service_id;
326
327 return 0;
328 }
329
330 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
331 {
332 struct srp_target_port *target = ch->target;
333 struct rdma_cm_id *new_cm_id;
334 int ret;
335
336 new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
337 RDMA_PS_TCP, IB_QPT_RC);
338 if (IS_ERR(new_cm_id)) {
339 ret = PTR_ERR(new_cm_id);
340 new_cm_id = NULL;
341 goto out;
342 }
343
344 init_completion(&ch->done);
345 ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
346 (struct sockaddr *)&target->rdma_cm.src : NULL,
347 (struct sockaddr *)&target->rdma_cm.dst,
348 SRP_PATH_REC_TIMEOUT_MS);
349 if (ret) {
350 pr_err("No route available from %pIS to %pIS (%d)\n",
351 &target->rdma_cm.src, &target->rdma_cm.dst, ret);
352 goto out;
353 }
354 ret = wait_for_completion_interruptible(&ch->done);
355 if (ret < 0)
356 goto out;
357
358 ret = ch->status;
359 if (ret) {
360 pr_err("Resolving address %pIS failed (%d)\n",
361 &target->rdma_cm.dst, ret);
362 goto out;
363 }
364
365 swap(ch->rdma_cm.cm_id, new_cm_id);
366
367 out:
368 if (new_cm_id)
369 rdma_destroy_id(new_cm_id);
370
371 return ret;
372 }
373
374 static int srp_new_cm_id(struct srp_rdma_ch *ch)
375 {
376 struct srp_target_port *target = ch->target;
377
378 return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
379 srp_new_ib_cm_id(ch);
380 }
381
382 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
383 {
384 struct srp_device *dev = target->srp_host->srp_dev;
385 struct ib_fmr_pool_param fmr_param;
386
387 memset(&fmr_param, 0, sizeof(fmr_param));
388 fmr_param.pool_size = target->mr_pool_size;
389 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
390 fmr_param.cache = 1;
391 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
392 fmr_param.page_shift = ilog2(dev->mr_page_size);
393 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
394 IB_ACCESS_REMOTE_WRITE |
395 IB_ACCESS_REMOTE_READ);
396
397 return ib_create_fmr_pool(dev->pd, &fmr_param);
398 }
399
400 /**
401 * srp_destroy_fr_pool() - free the resources owned by a pool
402 * @pool: Fast registration pool to be destroyed.
403 */
404 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
405 {
406 int i;
407 struct srp_fr_desc *d;
408
409 if (!pool)
410 return;
411
412 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
413 if (d->mr)
414 ib_dereg_mr(d->mr);
415 }
416 kfree(pool);
417 }
418
419 /**
420 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
421 * @device: IB device to allocate fast registration descriptors for.
422 * @pd: Protection domain associated with the FR descriptors.
423 * @pool_size: Number of descriptors to allocate.
424 * @max_page_list_len: Maximum fast registration work request page list length.
425 */
426 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
427 struct ib_pd *pd, int pool_size,
428 int max_page_list_len)
429 {
430 struct srp_fr_pool *pool;
431 struct srp_fr_desc *d;
432 struct ib_mr *mr;
433 int i, ret = -EINVAL;
434 enum ib_mr_type mr_type;
435
436 if (pool_size <= 0)
437 goto err;
438 ret = -ENOMEM;
439 pool = kzalloc(sizeof(struct srp_fr_pool) +
440 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
441 if (!pool)
442 goto err;
443 pool->size = pool_size;
444 pool->max_page_list_len = max_page_list_len;
445 spin_lock_init(&pool->lock);
446 INIT_LIST_HEAD(&pool->free_list);
447
448 if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
449 mr_type = IB_MR_TYPE_SG_GAPS;
450 else
451 mr_type = IB_MR_TYPE_MEM_REG;
452
453 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
454 mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
455 if (IS_ERR(mr)) {
456 ret = PTR_ERR(mr);
457 if (ret == -ENOMEM)
458 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
459 dev_name(&device->dev));
460 goto destroy_pool;
461 }
462 d->mr = mr;
463 list_add_tail(&d->entry, &pool->free_list);
464 }
465
466 out:
467 return pool;
468
469 destroy_pool:
470 srp_destroy_fr_pool(pool);
471
472 err:
473 pool = ERR_PTR(ret);
474 goto out;
475 }
476
477 /**
478 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
479 * @pool: Pool to obtain descriptor from.
480 */
481 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
482 {
483 struct srp_fr_desc *d = NULL;
484 unsigned long flags;
485
486 spin_lock_irqsave(&pool->lock, flags);
487 if (!list_empty(&pool->free_list)) {
488 d = list_first_entry(&pool->free_list, typeof(*d), entry);
489 list_del(&d->entry);
490 }
491 spin_unlock_irqrestore(&pool->lock, flags);
492
493 return d;
494 }
495
496 /**
497 * srp_fr_pool_put() - put an FR descriptor back in the free list
498 * @pool: Pool the descriptor was allocated from.
499 * @desc: Pointer to an array of fast registration descriptor pointers.
500 * @n: Number of descriptors to put back.
501 *
502 * Note: The caller must already have queued an invalidation request for
503 * desc->mr->rkey before calling this function.
504 */
505 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
506 int n)
507 {
508 unsigned long flags;
509 int i;
510
511 spin_lock_irqsave(&pool->lock, flags);
512 for (i = 0; i < n; i++)
513 list_add(&desc[i]->entry, &pool->free_list);
514 spin_unlock_irqrestore(&pool->lock, flags);
515 }
516
517 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
518 {
519 struct srp_device *dev = target->srp_host->srp_dev;
520
521 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
522 dev->max_pages_per_mr);
523 }
524
525 /**
526 * srp_destroy_qp() - destroy an RDMA queue pair
527 * @ch: SRP RDMA channel.
528 *
529 * Drain the qp before destroying it. This avoids that the receive
530 * completion handler can access the queue pair while it is
531 * being destroyed.
532 */
533 static void srp_destroy_qp(struct srp_rdma_ch *ch)
534 {
535 spin_lock_irq(&ch->lock);
536 ib_process_cq_direct(ch->send_cq, -1);
537 spin_unlock_irq(&ch->lock);
538
539 ib_drain_qp(ch->qp);
540 ib_destroy_qp(ch->qp);
541 }
542
543 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
544 {
545 struct srp_target_port *target = ch->target;
546 struct srp_device *dev = target->srp_host->srp_dev;
547 struct ib_qp_init_attr *init_attr;
548 struct ib_cq *recv_cq, *send_cq;
549 struct ib_qp *qp;
550 struct ib_fmr_pool *fmr_pool = NULL;
551 struct srp_fr_pool *fr_pool = NULL;
552 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
553 int ret;
554
555 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
556 if (!init_attr)
557 return -ENOMEM;
558
559 /* queue_size + 1 for ib_drain_rq() */
560 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
561 ch->comp_vector, IB_POLL_SOFTIRQ);
562 if (IS_ERR(recv_cq)) {
563 ret = PTR_ERR(recv_cq);
564 goto err;
565 }
566
567 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
568 ch->comp_vector, IB_POLL_DIRECT);
569 if (IS_ERR(send_cq)) {
570 ret = PTR_ERR(send_cq);
571 goto err_recv_cq;
572 }
573
574 init_attr->event_handler = srp_qp_event;
575 init_attr->cap.max_send_wr = m * target->queue_size;
576 init_attr->cap.max_recv_wr = target->queue_size + 1;
577 init_attr->cap.max_recv_sge = 1;
578 init_attr->cap.max_send_sge = 1;
579 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
580 init_attr->qp_type = IB_QPT_RC;
581 init_attr->send_cq = send_cq;
582 init_attr->recv_cq = recv_cq;
583
584 if (target->using_rdma_cm) {
585 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
586 qp = ch->rdma_cm.cm_id->qp;
587 } else {
588 qp = ib_create_qp(dev->pd, init_attr);
589 if (!IS_ERR(qp)) {
590 ret = srp_init_ib_qp(target, qp);
591 if (ret)
592 ib_destroy_qp(qp);
593 } else {
594 ret = PTR_ERR(qp);
595 }
596 }
597 if (ret) {
598 pr_err("QP creation failed for dev %s: %d\n",
599 dev_name(&dev->dev->dev), ret);
600 goto err_send_cq;
601 }
602
603 if (dev->use_fast_reg) {
604 fr_pool = srp_alloc_fr_pool(target);
605 if (IS_ERR(fr_pool)) {
606 ret = PTR_ERR(fr_pool);
607 shost_printk(KERN_WARNING, target->scsi_host, PFX
608 "FR pool allocation failed (%d)\n", ret);
609 goto err_qp;
610 }
611 } else if (dev->use_fmr) {
612 fmr_pool = srp_alloc_fmr_pool(target);
613 if (IS_ERR(fmr_pool)) {
614 ret = PTR_ERR(fmr_pool);
615 shost_printk(KERN_WARNING, target->scsi_host, PFX
616 "FMR pool allocation failed (%d)\n", ret);
617 goto err_qp;
618 }
619 }
620
621 if (ch->qp)
622 srp_destroy_qp(ch);
623 if (ch->recv_cq)
624 ib_free_cq(ch->recv_cq);
625 if (ch->send_cq)
626 ib_free_cq(ch->send_cq);
627
628 ch->qp = qp;
629 ch->recv_cq = recv_cq;
630 ch->send_cq = send_cq;
631
632 if (dev->use_fast_reg) {
633 if (ch->fr_pool)
634 srp_destroy_fr_pool(ch->fr_pool);
635 ch->fr_pool = fr_pool;
636 } else if (dev->use_fmr) {
637 if (ch->fmr_pool)
638 ib_destroy_fmr_pool(ch->fmr_pool);
639 ch->fmr_pool = fmr_pool;
640 }
641
642 kfree(init_attr);
643 return 0;
644
645 err_qp:
646 if (target->using_rdma_cm)
647 rdma_destroy_qp(ch->rdma_cm.cm_id);
648 else
649 ib_destroy_qp(qp);
650
651 err_send_cq:
652 ib_free_cq(send_cq);
653
654 err_recv_cq:
655 ib_free_cq(recv_cq);
656
657 err:
658 kfree(init_attr);
659 return ret;
660 }
661
662 /*
663 * Note: this function may be called without srp_alloc_iu_bufs() having been
664 * invoked. Hence the ch->[rt]x_ring checks.
665 */
666 static void srp_free_ch_ib(struct srp_target_port *target,
667 struct srp_rdma_ch *ch)
668 {
669 struct srp_device *dev = target->srp_host->srp_dev;
670 int i;
671
672 if (!ch->target)
673 return;
674
675 if (target->using_rdma_cm) {
676 if (ch->rdma_cm.cm_id) {
677 rdma_destroy_id(ch->rdma_cm.cm_id);
678 ch->rdma_cm.cm_id = NULL;
679 }
680 } else {
681 if (ch->ib_cm.cm_id) {
682 ib_destroy_cm_id(ch->ib_cm.cm_id);
683 ch->ib_cm.cm_id = NULL;
684 }
685 }
686
687 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
688 if (!ch->qp)
689 return;
690
691 if (dev->use_fast_reg) {
692 if (ch->fr_pool)
693 srp_destroy_fr_pool(ch->fr_pool);
694 } else if (dev->use_fmr) {
695 if (ch->fmr_pool)
696 ib_destroy_fmr_pool(ch->fmr_pool);
697 }
698
699 srp_destroy_qp(ch);
700 ib_free_cq(ch->send_cq);
701 ib_free_cq(ch->recv_cq);
702
703 /*
704 * Avoid that the SCSI error handler tries to use this channel after
705 * it has been freed. The SCSI error handler can namely continue
706 * trying to perform recovery actions after scsi_remove_host()
707 * returned.
708 */
709 ch->target = NULL;
710
711 ch->qp = NULL;
712 ch->send_cq = ch->recv_cq = NULL;
713
714 if (ch->rx_ring) {
715 for (i = 0; i < target->queue_size; ++i)
716 srp_free_iu(target->srp_host, ch->rx_ring[i]);
717 kfree(ch->rx_ring);
718 ch->rx_ring = NULL;
719 }
720 if (ch->tx_ring) {
721 for (i = 0; i < target->queue_size; ++i)
722 srp_free_iu(target->srp_host, ch->tx_ring[i]);
723 kfree(ch->tx_ring);
724 ch->tx_ring = NULL;
725 }
726 }
727
728 static void srp_path_rec_completion(int status,
729 struct sa_path_rec *pathrec,
730 void *ch_ptr)
731 {
732 struct srp_rdma_ch *ch = ch_ptr;
733 struct srp_target_port *target = ch->target;
734
735 ch->status = status;
736 if (status)
737 shost_printk(KERN_ERR, target->scsi_host,
738 PFX "Got failed path rec status %d\n", status);
739 else
740 ch->ib_cm.path = *pathrec;
741 complete(&ch->done);
742 }
743
744 static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
745 {
746 struct srp_target_port *target = ch->target;
747 int ret;
748
749 ch->ib_cm.path.numb_path = 1;
750
751 init_completion(&ch->done);
752
753 ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
754 target->srp_host->srp_dev->dev,
755 target->srp_host->port,
756 &ch->ib_cm.path,
757 IB_SA_PATH_REC_SERVICE_ID |
758 IB_SA_PATH_REC_DGID |
759 IB_SA_PATH_REC_SGID |
760 IB_SA_PATH_REC_NUMB_PATH |
761 IB_SA_PATH_REC_PKEY,
762 SRP_PATH_REC_TIMEOUT_MS,
763 GFP_KERNEL,
764 srp_path_rec_completion,
765 ch, &ch->ib_cm.path_query);
766 if (ch->ib_cm.path_query_id < 0)
767 return ch->ib_cm.path_query_id;
768
769 ret = wait_for_completion_interruptible(&ch->done);
770 if (ret < 0)
771 return ret;
772
773 if (ch->status < 0)
774 shost_printk(KERN_WARNING, target->scsi_host,
775 PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
776 ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
777 be16_to_cpu(target->ib_cm.pkey),
778 be64_to_cpu(target->ib_cm.service_id));
779
780 return ch->status;
781 }
782
783 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
784 {
785 struct srp_target_port *target = ch->target;
786 int ret;
787
788 init_completion(&ch->done);
789
790 ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
791 if (ret)
792 return ret;
793
794 wait_for_completion_interruptible(&ch->done);
795
796 if (ch->status != 0)
797 shost_printk(KERN_WARNING, target->scsi_host,
798 PFX "Path resolution failed\n");
799
800 return ch->status;
801 }
802
803 static int srp_lookup_path(struct srp_rdma_ch *ch)
804 {
805 struct srp_target_port *target = ch->target;
806
807 return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
808 srp_ib_lookup_path(ch);
809 }
810
811 static u8 srp_get_subnet_timeout(struct srp_host *host)
812 {
813 struct ib_port_attr attr;
814 int ret;
815 u8 subnet_timeout = 18;
816
817 ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
818 if (ret == 0)
819 subnet_timeout = attr.subnet_timeout;
820
821 if (unlikely(subnet_timeout < 15))
822 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
823 dev_name(&host->srp_dev->dev->dev), subnet_timeout);
824
825 return subnet_timeout;
826 }
827
828 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
829 {
830 struct srp_target_port *target = ch->target;
831 struct {
832 struct rdma_conn_param rdma_param;
833 struct srp_login_req_rdma rdma_req;
834 struct ib_cm_req_param ib_param;
835 struct srp_login_req ib_req;
836 } *req = NULL;
837 char *ipi, *tpi;
838 int status;
839
840 req = kzalloc(sizeof *req, GFP_KERNEL);
841 if (!req)
842 return -ENOMEM;
843
844 req->ib_param.flow_control = 1;
845 req->ib_param.retry_count = target->tl_retry_count;
846
847 /*
848 * Pick some arbitrary defaults here; we could make these
849 * module parameters if anyone cared about setting them.
850 */
851 req->ib_param.responder_resources = 4;
852 req->ib_param.rnr_retry_count = 7;
853 req->ib_param.max_cm_retries = 15;
854
855 req->ib_req.opcode = SRP_LOGIN_REQ;
856 req->ib_req.tag = 0;
857 req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
858 req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
859 SRP_BUF_FORMAT_INDIRECT);
860 req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
861 SRP_MULTICHAN_SINGLE);
862
863 if (target->using_rdma_cm) {
864 req->rdma_param.flow_control = req->ib_param.flow_control;
865 req->rdma_param.responder_resources =
866 req->ib_param.responder_resources;
867 req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
868 req->rdma_param.retry_count = req->ib_param.retry_count;
869 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
870 req->rdma_param.private_data = &req->rdma_req;
871 req->rdma_param.private_data_len = sizeof(req->rdma_req);
872
873 req->rdma_req.opcode = req->ib_req.opcode;
874 req->rdma_req.tag = req->ib_req.tag;
875 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
876 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
877 req->rdma_req.req_flags = req->ib_req.req_flags;
878
879 ipi = req->rdma_req.initiator_port_id;
880 tpi = req->rdma_req.target_port_id;
881 } else {
882 u8 subnet_timeout;
883
884 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
885
886 req->ib_param.primary_path = &ch->ib_cm.path;
887 req->ib_param.alternate_path = NULL;
888 req->ib_param.service_id = target->ib_cm.service_id;
889 get_random_bytes(&req->ib_param.starting_psn, 4);
890 req->ib_param.starting_psn &= 0xffffff;
891 req->ib_param.qp_num = ch->qp->qp_num;
892 req->ib_param.qp_type = ch->qp->qp_type;
893 req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
894 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
895 req->ib_param.private_data = &req->ib_req;
896 req->ib_param.private_data_len = sizeof(req->ib_req);
897
898 ipi = req->ib_req.initiator_port_id;
899 tpi = req->ib_req.target_port_id;
900 }
901
902 /*
903 * In the published SRP specification (draft rev. 16a), the
904 * port identifier format is 8 bytes of ID extension followed
905 * by 8 bytes of GUID. Older drafts put the two halves in the
906 * opposite order, so that the GUID comes first.
907 *
908 * Targets conforming to these obsolete drafts can be
909 * recognized by the I/O Class they report.
910 */
911 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
912 memcpy(ipi, &target->sgid.global.interface_id, 8);
913 memcpy(ipi + 8, &target->initiator_ext, 8);
914 memcpy(tpi, &target->ioc_guid, 8);
915 memcpy(tpi + 8, &target->id_ext, 8);
916 } else {
917 memcpy(ipi, &target->initiator_ext, 8);
918 memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
919 memcpy(tpi, &target->id_ext, 8);
920 memcpy(tpi + 8, &target->ioc_guid, 8);
921 }
922
923 /*
924 * Topspin/Cisco SRP targets will reject our login unless we
925 * zero out the first 8 bytes of our initiator port ID and set
926 * the second 8 bytes to the local node GUID.
927 */
928 if (srp_target_is_topspin(target)) {
929 shost_printk(KERN_DEBUG, target->scsi_host,
930 PFX "Topspin/Cisco initiator port ID workaround "
931 "activated for target GUID %016llx\n",
932 be64_to_cpu(target->ioc_guid));
933 memset(ipi, 0, 8);
934 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
935 }
936
937 if (target->using_rdma_cm)
938 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
939 else
940 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
941
942 kfree(req);
943
944 return status;
945 }
946
947 static bool srp_queue_remove_work(struct srp_target_port *target)
948 {
949 bool changed = false;
950
951 spin_lock_irq(&target->lock);
952 if (target->state != SRP_TARGET_REMOVED) {
953 target->state = SRP_TARGET_REMOVED;
954 changed = true;
955 }
956 spin_unlock_irq(&target->lock);
957
958 if (changed)
959 queue_work(srp_remove_wq, &target->remove_work);
960
961 return changed;
962 }
963
964 static void srp_disconnect_target(struct srp_target_port *target)
965 {
966 struct srp_rdma_ch *ch;
967 int i, ret;
968
969 /* XXX should send SRP_I_LOGOUT request */
970
971 for (i = 0; i < target->ch_count; i++) {
972 ch = &target->ch[i];
973 ch->connected = false;
974 ret = 0;
975 if (target->using_rdma_cm) {
976 if (ch->rdma_cm.cm_id)
977 rdma_disconnect(ch->rdma_cm.cm_id);
978 } else {
979 if (ch->ib_cm.cm_id)
980 ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
981 NULL, 0);
982 }
983 if (ret < 0) {
984 shost_printk(KERN_DEBUG, target->scsi_host,
985 PFX "Sending CM DREQ failed\n");
986 }
987 }
988 }
989
990 static void srp_free_req_data(struct srp_target_port *target,
991 struct srp_rdma_ch *ch)
992 {
993 struct srp_device *dev = target->srp_host->srp_dev;
994 struct ib_device *ibdev = dev->dev;
995 struct srp_request *req;
996 int i;
997
998 if (!ch->req_ring)
999 return;
1000
1001 for (i = 0; i < target->req_ring_size; ++i) {
1002 req = &ch->req_ring[i];
1003 if (dev->use_fast_reg) {
1004 kfree(req->fr_list);
1005 } else {
1006 kfree(req->fmr_list);
1007 kfree(req->map_page);
1008 }
1009 if (req->indirect_dma_addr) {
1010 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
1011 target->indirect_size,
1012 DMA_TO_DEVICE);
1013 }
1014 kfree(req->indirect_desc);
1015 }
1016
1017 kfree(ch->req_ring);
1018 ch->req_ring = NULL;
1019 }
1020
1021 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
1022 {
1023 struct srp_target_port *target = ch->target;
1024 struct srp_device *srp_dev = target->srp_host->srp_dev;
1025 struct ib_device *ibdev = srp_dev->dev;
1026 struct srp_request *req;
1027 void *mr_list;
1028 dma_addr_t dma_addr;
1029 int i, ret = -ENOMEM;
1030
1031 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
1032 GFP_KERNEL);
1033 if (!ch->req_ring)
1034 goto out;
1035
1036 for (i = 0; i < target->req_ring_size; ++i) {
1037 req = &ch->req_ring[i];
1038 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
1039 GFP_KERNEL);
1040 if (!mr_list)
1041 goto out;
1042 if (srp_dev->use_fast_reg) {
1043 req->fr_list = mr_list;
1044 } else {
1045 req->fmr_list = mr_list;
1046 req->map_page = kmalloc_array(srp_dev->max_pages_per_mr,
1047 sizeof(void *),
1048 GFP_KERNEL);
1049 if (!req->map_page)
1050 goto out;
1051 }
1052 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1053 if (!req->indirect_desc)
1054 goto out;
1055
1056 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1057 target->indirect_size,
1058 DMA_TO_DEVICE);
1059 if (ib_dma_mapping_error(ibdev, dma_addr))
1060 goto out;
1061
1062 req->indirect_dma_addr = dma_addr;
1063 }
1064 ret = 0;
1065
1066 out:
1067 return ret;
1068 }
1069
1070 /**
1071 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1072 * @shost: SCSI host whose attributes to remove from sysfs.
1073 *
1074 * Note: Any attributes defined in the host template and that did not exist
1075 * before invocation of this function will be ignored.
1076 */
1077 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1078 {
1079 struct device_attribute **attr;
1080
1081 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1082 device_remove_file(&shost->shost_dev, *attr);
1083 }
1084
1085 static void srp_remove_target(struct srp_target_port *target)
1086 {
1087 struct srp_rdma_ch *ch;
1088 int i;
1089
1090 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1091
1092 srp_del_scsi_host_attr(target->scsi_host);
1093 srp_rport_get(target->rport);
1094 srp_remove_host(target->scsi_host);
1095 scsi_remove_host(target->scsi_host);
1096 srp_stop_rport_timers(target->rport);
1097 srp_disconnect_target(target);
1098 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1099 for (i = 0; i < target->ch_count; i++) {
1100 ch = &target->ch[i];
1101 srp_free_ch_ib(target, ch);
1102 }
1103 cancel_work_sync(&target->tl_err_work);
1104 srp_rport_put(target->rport);
1105 for (i = 0; i < target->ch_count; i++) {
1106 ch = &target->ch[i];
1107 srp_free_req_data(target, ch);
1108 }
1109 kfree(target->ch);
1110 target->ch = NULL;
1111
1112 spin_lock(&target->srp_host->target_lock);
1113 list_del(&target->list);
1114 spin_unlock(&target->srp_host->target_lock);
1115
1116 scsi_host_put(target->scsi_host);
1117 }
1118
1119 static void srp_remove_work(struct work_struct *work)
1120 {
1121 struct srp_target_port *target =
1122 container_of(work, struct srp_target_port, remove_work);
1123
1124 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1125
1126 srp_remove_target(target);
1127 }
1128
1129 static void srp_rport_delete(struct srp_rport *rport)
1130 {
1131 struct srp_target_port *target = rport->lld_data;
1132
1133 srp_queue_remove_work(target);
1134 }
1135
1136 /**
1137 * srp_connected_ch() - number of connected channels
1138 * @target: SRP target port.
1139 */
1140 static int srp_connected_ch(struct srp_target_port *target)
1141 {
1142 int i, c = 0;
1143
1144 for (i = 0; i < target->ch_count; i++)
1145 c += target->ch[i].connected;
1146
1147 return c;
1148 }
1149
1150 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
1151 {
1152 struct srp_target_port *target = ch->target;
1153 int ret;
1154
1155 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1156
1157 ret = srp_lookup_path(ch);
1158 if (ret)
1159 goto out;
1160
1161 while (1) {
1162 init_completion(&ch->done);
1163 ret = srp_send_req(ch, multich);
1164 if (ret)
1165 goto out;
1166 ret = wait_for_completion_interruptible(&ch->done);
1167 if (ret < 0)
1168 goto out;
1169
1170 /*
1171 * The CM event handling code will set status to
1172 * SRP_PORT_REDIRECT if we get a port redirect REJ
1173 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1174 * redirect REJ back.
1175 */
1176 ret = ch->status;
1177 switch (ret) {
1178 case 0:
1179 ch->connected = true;
1180 goto out;
1181
1182 case SRP_PORT_REDIRECT:
1183 ret = srp_lookup_path(ch);
1184 if (ret)
1185 goto out;
1186 break;
1187
1188 case SRP_DLID_REDIRECT:
1189 break;
1190
1191 case SRP_STALE_CONN:
1192 shost_printk(KERN_ERR, target->scsi_host, PFX
1193 "giving up on stale connection\n");
1194 ret = -ECONNRESET;
1195 goto out;
1196
1197 default:
1198 goto out;
1199 }
1200 }
1201
1202 out:
1203 return ret <= 0 ? ret : -ENODEV;
1204 }
1205
1206 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1207 {
1208 srp_handle_qp_err(cq, wc, "INV RKEY");
1209 }
1210
1211 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1212 u32 rkey)
1213 {
1214 struct ib_send_wr *bad_wr;
1215 struct ib_send_wr wr = {
1216 .opcode = IB_WR_LOCAL_INV,
1217 .next = NULL,
1218 .num_sge = 0,
1219 .send_flags = 0,
1220 .ex.invalidate_rkey = rkey,
1221 };
1222
1223 wr.wr_cqe = &req->reg_cqe;
1224 req->reg_cqe.done = srp_inv_rkey_err_done;
1225 return ib_post_send(ch->qp, &wr, &bad_wr);
1226 }
1227
1228 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1229 struct srp_rdma_ch *ch,
1230 struct srp_request *req)
1231 {
1232 struct srp_target_port *target = ch->target;
1233 struct srp_device *dev = target->srp_host->srp_dev;
1234 struct ib_device *ibdev = dev->dev;
1235 int i, res;
1236
1237 if (!scsi_sglist(scmnd) ||
1238 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1239 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1240 return;
1241
1242 if (dev->use_fast_reg) {
1243 struct srp_fr_desc **pfr;
1244
1245 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1246 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1247 if (res < 0) {
1248 shost_printk(KERN_ERR, target->scsi_host, PFX
1249 "Queueing INV WR for rkey %#x failed (%d)\n",
1250 (*pfr)->mr->rkey, res);
1251 queue_work(system_long_wq,
1252 &target->tl_err_work);
1253 }
1254 }
1255 if (req->nmdesc)
1256 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1257 req->nmdesc);
1258 } else if (dev->use_fmr) {
1259 struct ib_pool_fmr **pfmr;
1260
1261 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1262 ib_fmr_pool_unmap(*pfmr);
1263 }
1264
1265 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1266 scmnd->sc_data_direction);
1267 }
1268
1269 /**
1270 * srp_claim_req - Take ownership of the scmnd associated with a request.
1271 * @ch: SRP RDMA channel.
1272 * @req: SRP request.
1273 * @sdev: If not NULL, only take ownership for this SCSI device.
1274 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1275 * ownership of @req->scmnd if it equals @scmnd.
1276 *
1277 * Return value:
1278 * Either NULL or a pointer to the SCSI command the caller became owner of.
1279 */
1280 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1281 struct srp_request *req,
1282 struct scsi_device *sdev,
1283 struct scsi_cmnd *scmnd)
1284 {
1285 unsigned long flags;
1286
1287 spin_lock_irqsave(&ch->lock, flags);
1288 if (req->scmnd &&
1289 (!sdev || req->scmnd->device == sdev) &&
1290 (!scmnd || req->scmnd == scmnd)) {
1291 scmnd = req->scmnd;
1292 req->scmnd = NULL;
1293 } else {
1294 scmnd = NULL;
1295 }
1296 spin_unlock_irqrestore(&ch->lock, flags);
1297
1298 return scmnd;
1299 }
1300
1301 /**
1302 * srp_free_req() - Unmap data and adjust ch->req_lim.
1303 * @ch: SRP RDMA channel.
1304 * @req: Request to be freed.
1305 * @scmnd: SCSI command associated with @req.
1306 * @req_lim_delta: Amount to be added to @target->req_lim.
1307 */
1308 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1309 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1310 {
1311 unsigned long flags;
1312
1313 srp_unmap_data(scmnd, ch, req);
1314
1315 spin_lock_irqsave(&ch->lock, flags);
1316 ch->req_lim += req_lim_delta;
1317 spin_unlock_irqrestore(&ch->lock, flags);
1318 }
1319
1320 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1321 struct scsi_device *sdev, int result)
1322 {
1323 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1324
1325 if (scmnd) {
1326 srp_free_req(ch, req, scmnd, 0);
1327 scmnd->result = result;
1328 scmnd->scsi_done(scmnd);
1329 }
1330 }
1331
1332 static void srp_terminate_io(struct srp_rport *rport)
1333 {
1334 struct srp_target_port *target = rport->lld_data;
1335 struct srp_rdma_ch *ch;
1336 struct Scsi_Host *shost = target->scsi_host;
1337 struct scsi_device *sdev;
1338 int i, j;
1339
1340 /*
1341 * Invoking srp_terminate_io() while srp_queuecommand() is running
1342 * is not safe. Hence the warning statement below.
1343 */
1344 shost_for_each_device(sdev, shost)
1345 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1346
1347 for (i = 0; i < target->ch_count; i++) {
1348 ch = &target->ch[i];
1349
1350 for (j = 0; j < target->req_ring_size; ++j) {
1351 struct srp_request *req = &ch->req_ring[j];
1352
1353 srp_finish_req(ch, req, NULL,
1354 DID_TRANSPORT_FAILFAST << 16);
1355 }
1356 }
1357 }
1358
1359 /*
1360 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1361 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1362 * srp_reset_device() or srp_reset_host() calls will occur while this function
1363 * is in progress. One way to realize that is not to call this function
1364 * directly but to call srp_reconnect_rport() instead since that last function
1365 * serializes calls of this function via rport->mutex and also blocks
1366 * srp_queuecommand() calls before invoking this function.
1367 */
1368 static int srp_rport_reconnect(struct srp_rport *rport)
1369 {
1370 struct srp_target_port *target = rport->lld_data;
1371 struct srp_rdma_ch *ch;
1372 int i, j, ret = 0;
1373 bool multich = false;
1374
1375 srp_disconnect_target(target);
1376
1377 if (target->state == SRP_TARGET_SCANNING)
1378 return -ENODEV;
1379
1380 /*
1381 * Now get a new local CM ID so that we avoid confusing the target in
1382 * case things are really fouled up. Doing so also ensures that all CM
1383 * callbacks will have finished before a new QP is allocated.
1384 */
1385 for (i = 0; i < target->ch_count; i++) {
1386 ch = &target->ch[i];
1387 ret += srp_new_cm_id(ch);
1388 }
1389 for (i = 0; i < target->ch_count; i++) {
1390 ch = &target->ch[i];
1391 for (j = 0; j < target->req_ring_size; ++j) {
1392 struct srp_request *req = &ch->req_ring[j];
1393
1394 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1395 }
1396 }
1397 for (i = 0; i < target->ch_count; i++) {
1398 ch = &target->ch[i];
1399 /*
1400 * Whether or not creating a new CM ID succeeded, create a new
1401 * QP. This guarantees that all completion callback function
1402 * invocations have finished before request resetting starts.
1403 */
1404 ret += srp_create_ch_ib(ch);
1405
1406 INIT_LIST_HEAD(&ch->free_tx);
1407 for (j = 0; j < target->queue_size; ++j)
1408 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1409 }
1410
1411 target->qp_in_error = false;
1412
1413 for (i = 0; i < target->ch_count; i++) {
1414 ch = &target->ch[i];
1415 if (ret)
1416 break;
1417 ret = srp_connect_ch(ch, multich);
1418 multich = true;
1419 }
1420
1421 if (ret == 0)
1422 shost_printk(KERN_INFO, target->scsi_host,
1423 PFX "reconnect succeeded\n");
1424
1425 return ret;
1426 }
1427
1428 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1429 unsigned int dma_len, u32 rkey)
1430 {
1431 struct srp_direct_buf *desc = state->desc;
1432
1433 WARN_ON_ONCE(!dma_len);
1434
1435 desc->va = cpu_to_be64(dma_addr);
1436 desc->key = cpu_to_be32(rkey);
1437 desc->len = cpu_to_be32(dma_len);
1438
1439 state->total_len += dma_len;
1440 state->desc++;
1441 state->ndesc++;
1442 }
1443
1444 static int srp_map_finish_fmr(struct srp_map_state *state,
1445 struct srp_rdma_ch *ch)
1446 {
1447 struct srp_target_port *target = ch->target;
1448 struct srp_device *dev = target->srp_host->srp_dev;
1449 struct ib_pool_fmr *fmr;
1450 u64 io_addr = 0;
1451
1452 if (state->fmr.next >= state->fmr.end) {
1453 shost_printk(KERN_ERR, ch->target->scsi_host,
1454 PFX "Out of MRs (mr_per_cmd = %d)\n",
1455 ch->target->mr_per_cmd);
1456 return -ENOMEM;
1457 }
1458
1459 WARN_ON_ONCE(!dev->use_fmr);
1460
1461 if (state->npages == 0)
1462 return 0;
1463
1464 if (state->npages == 1 && target->global_rkey) {
1465 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1466 target->global_rkey);
1467 goto reset_state;
1468 }
1469
1470 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1471 state->npages, io_addr);
1472 if (IS_ERR(fmr))
1473 return PTR_ERR(fmr);
1474
1475 *state->fmr.next++ = fmr;
1476 state->nmdesc++;
1477
1478 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1479 state->dma_len, fmr->fmr->rkey);
1480
1481 reset_state:
1482 state->npages = 0;
1483 state->dma_len = 0;
1484
1485 return 0;
1486 }
1487
1488 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1489 {
1490 srp_handle_qp_err(cq, wc, "FAST REG");
1491 }
1492
1493 /*
1494 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1495 * where to start in the first element. If sg_offset_p != NULL then
1496 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1497 * byte that has not yet been mapped.
1498 */
1499 static int srp_map_finish_fr(struct srp_map_state *state,
1500 struct srp_request *req,
1501 struct srp_rdma_ch *ch, int sg_nents,
1502 unsigned int *sg_offset_p)
1503 {
1504 struct srp_target_port *target = ch->target;
1505 struct srp_device *dev = target->srp_host->srp_dev;
1506 struct ib_send_wr *bad_wr;
1507 struct ib_reg_wr wr;
1508 struct srp_fr_desc *desc;
1509 u32 rkey;
1510 int n, err;
1511
1512 if (state->fr.next >= state->fr.end) {
1513 shost_printk(KERN_ERR, ch->target->scsi_host,
1514 PFX "Out of MRs (mr_per_cmd = %d)\n",
1515 ch->target->mr_per_cmd);
1516 return -ENOMEM;
1517 }
1518
1519 WARN_ON_ONCE(!dev->use_fast_reg);
1520
1521 if (sg_nents == 1 && target->global_rkey) {
1522 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1523
1524 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1525 sg_dma_len(state->sg) - sg_offset,
1526 target->global_rkey);
1527 if (sg_offset_p)
1528 *sg_offset_p = 0;
1529 return 1;
1530 }
1531
1532 desc = srp_fr_pool_get(ch->fr_pool);
1533 if (!desc)
1534 return -ENOMEM;
1535
1536 rkey = ib_inc_rkey(desc->mr->rkey);
1537 ib_update_fast_reg_key(desc->mr, rkey);
1538
1539 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1540 dev->mr_page_size);
1541 if (unlikely(n < 0)) {
1542 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1543 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1544 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1545 sg_offset_p ? *sg_offset_p : -1, n);
1546 return n;
1547 }
1548
1549 WARN_ON_ONCE(desc->mr->length == 0);
1550
1551 req->reg_cqe.done = srp_reg_mr_err_done;
1552
1553 wr.wr.next = NULL;
1554 wr.wr.opcode = IB_WR_REG_MR;
1555 wr.wr.wr_cqe = &req->reg_cqe;
1556 wr.wr.num_sge = 0;
1557 wr.wr.send_flags = 0;
1558 wr.mr = desc->mr;
1559 wr.key = desc->mr->rkey;
1560 wr.access = (IB_ACCESS_LOCAL_WRITE |
1561 IB_ACCESS_REMOTE_READ |
1562 IB_ACCESS_REMOTE_WRITE);
1563
1564 *state->fr.next++ = desc;
1565 state->nmdesc++;
1566
1567 srp_map_desc(state, desc->mr->iova,
1568 desc->mr->length, desc->mr->rkey);
1569
1570 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1571 if (unlikely(err)) {
1572 WARN_ON_ONCE(err == -ENOMEM);
1573 return err;
1574 }
1575
1576 return n;
1577 }
1578
1579 static int srp_map_sg_entry(struct srp_map_state *state,
1580 struct srp_rdma_ch *ch,
1581 struct scatterlist *sg)
1582 {
1583 struct srp_target_port *target = ch->target;
1584 struct srp_device *dev = target->srp_host->srp_dev;
1585 struct ib_device *ibdev = dev->dev;
1586 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1587 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1588 unsigned int len = 0;
1589 int ret;
1590
1591 WARN_ON_ONCE(!dma_len);
1592
1593 while (dma_len) {
1594 unsigned offset = dma_addr & ~dev->mr_page_mask;
1595
1596 if (state->npages == dev->max_pages_per_mr ||
1597 (state->npages > 0 && offset != 0)) {
1598 ret = srp_map_finish_fmr(state, ch);
1599 if (ret)
1600 return ret;
1601 }
1602
1603 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1604
1605 if (!state->npages)
1606 state->base_dma_addr = dma_addr;
1607 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1608 state->dma_len += len;
1609 dma_addr += len;
1610 dma_len -= len;
1611 }
1612
1613 /*
1614 * If the end of the MR is not on a page boundary then we need to
1615 * close it out and start a new one -- we can only merge at page
1616 * boundaries.
1617 */
1618 ret = 0;
1619 if ((dma_addr & ~dev->mr_page_mask) != 0)
1620 ret = srp_map_finish_fmr(state, ch);
1621 return ret;
1622 }
1623
1624 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1625 struct srp_request *req, struct scatterlist *scat,
1626 int count)
1627 {
1628 struct scatterlist *sg;
1629 int i, ret;
1630
1631 state->pages = req->map_page;
1632 state->fmr.next = req->fmr_list;
1633 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1634
1635 for_each_sg(scat, sg, count, i) {
1636 ret = srp_map_sg_entry(state, ch, sg);
1637 if (ret)
1638 return ret;
1639 }
1640
1641 ret = srp_map_finish_fmr(state, ch);
1642 if (ret)
1643 return ret;
1644
1645 return 0;
1646 }
1647
1648 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1649 struct srp_request *req, struct scatterlist *scat,
1650 int count)
1651 {
1652 unsigned int sg_offset = 0;
1653
1654 state->fr.next = req->fr_list;
1655 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1656 state->sg = scat;
1657
1658 if (count == 0)
1659 return 0;
1660
1661 while (count) {
1662 int i, n;
1663
1664 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1665 if (unlikely(n < 0))
1666 return n;
1667
1668 count -= n;
1669 for (i = 0; i < n; i++)
1670 state->sg = sg_next(state->sg);
1671 }
1672
1673 return 0;
1674 }
1675
1676 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1677 struct srp_request *req, struct scatterlist *scat,
1678 int count)
1679 {
1680 struct srp_target_port *target = ch->target;
1681 struct srp_device *dev = target->srp_host->srp_dev;
1682 struct scatterlist *sg;
1683 int i;
1684
1685 for_each_sg(scat, sg, count, i) {
1686 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1687 ib_sg_dma_len(dev->dev, sg),
1688 target->global_rkey);
1689 }
1690
1691 return 0;
1692 }
1693
1694 /*
1695 * Register the indirect data buffer descriptor with the HCA.
1696 *
1697 * Note: since the indirect data buffer descriptor has been allocated with
1698 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1699 * memory buffer.
1700 */
1701 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1702 void **next_mr, void **end_mr, u32 idb_len,
1703 __be32 *idb_rkey)
1704 {
1705 struct srp_target_port *target = ch->target;
1706 struct srp_device *dev = target->srp_host->srp_dev;
1707 struct srp_map_state state;
1708 struct srp_direct_buf idb_desc;
1709 u64 idb_pages[1];
1710 struct scatterlist idb_sg[1];
1711 int ret;
1712
1713 memset(&state, 0, sizeof(state));
1714 memset(&idb_desc, 0, sizeof(idb_desc));
1715 state.gen.next = next_mr;
1716 state.gen.end = end_mr;
1717 state.desc = &idb_desc;
1718 state.base_dma_addr = req->indirect_dma_addr;
1719 state.dma_len = idb_len;
1720
1721 if (dev->use_fast_reg) {
1722 state.sg = idb_sg;
1723 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1724 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1725 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1726 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1727 #endif
1728 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1729 if (ret < 0)
1730 return ret;
1731 WARN_ON_ONCE(ret < 1);
1732 } else if (dev->use_fmr) {
1733 state.pages = idb_pages;
1734 state.pages[0] = (req->indirect_dma_addr &
1735 dev->mr_page_mask);
1736 state.npages = 1;
1737 ret = srp_map_finish_fmr(&state, ch);
1738 if (ret < 0)
1739 return ret;
1740 } else {
1741 return -EINVAL;
1742 }
1743
1744 *idb_rkey = idb_desc.key;
1745
1746 return 0;
1747 }
1748
1749 static void srp_check_mapping(struct srp_map_state *state,
1750 struct srp_rdma_ch *ch, struct srp_request *req,
1751 struct scatterlist *scat, int count)
1752 {
1753 struct srp_device *dev = ch->target->srp_host->srp_dev;
1754 struct srp_fr_desc **pfr;
1755 u64 desc_len = 0, mr_len = 0;
1756 int i;
1757
1758 for (i = 0; i < state->ndesc; i++)
1759 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1760 if (dev->use_fast_reg)
1761 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1762 mr_len += (*pfr)->mr->length;
1763 else if (dev->use_fmr)
1764 for (i = 0; i < state->nmdesc; i++)
1765 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1766 if (desc_len != scsi_bufflen(req->scmnd) ||
1767 mr_len > scsi_bufflen(req->scmnd))
1768 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1769 scsi_bufflen(req->scmnd), desc_len, mr_len,
1770 state->ndesc, state->nmdesc);
1771 }
1772
1773 /**
1774 * srp_map_data() - map SCSI data buffer onto an SRP request
1775 * @scmnd: SCSI command to map
1776 * @ch: SRP RDMA channel
1777 * @req: SRP request
1778 *
1779 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1780 * mapping failed.
1781 */
1782 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1783 struct srp_request *req)
1784 {
1785 struct srp_target_port *target = ch->target;
1786 struct scatterlist *scat;
1787 struct srp_cmd *cmd = req->cmd->buf;
1788 int len, nents, count, ret;
1789 struct srp_device *dev;
1790 struct ib_device *ibdev;
1791 struct srp_map_state state;
1792 struct srp_indirect_buf *indirect_hdr;
1793 u32 idb_len, table_len;
1794 __be32 idb_rkey;
1795 u8 fmt;
1796
1797 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1798 return sizeof (struct srp_cmd);
1799
1800 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1801 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1802 shost_printk(KERN_WARNING, target->scsi_host,
1803 PFX "Unhandled data direction %d\n",
1804 scmnd->sc_data_direction);
1805 return -EINVAL;
1806 }
1807
1808 nents = scsi_sg_count(scmnd);
1809 scat = scsi_sglist(scmnd);
1810
1811 dev = target->srp_host->srp_dev;
1812 ibdev = dev->dev;
1813
1814 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1815 if (unlikely(count == 0))
1816 return -EIO;
1817
1818 fmt = SRP_DATA_DESC_DIRECT;
1819 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1820
1821 if (count == 1 && target->global_rkey) {
1822 /*
1823 * The midlayer only generated a single gather/scatter
1824 * entry, or DMA mapping coalesced everything to a
1825 * single entry. So a direct descriptor along with
1826 * the DMA MR suffices.
1827 */
1828 struct srp_direct_buf *buf = (void *) cmd->add_data;
1829
1830 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1831 buf->key = cpu_to_be32(target->global_rkey);
1832 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1833
1834 req->nmdesc = 0;
1835 goto map_complete;
1836 }
1837
1838 /*
1839 * We have more than one scatter/gather entry, so build our indirect
1840 * descriptor table, trying to merge as many entries as we can.
1841 */
1842 indirect_hdr = (void *) cmd->add_data;
1843
1844 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1845 target->indirect_size, DMA_TO_DEVICE);
1846
1847 memset(&state, 0, sizeof(state));
1848 state.desc = req->indirect_desc;
1849 if (dev->use_fast_reg)
1850 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1851 else if (dev->use_fmr)
1852 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1853 else
1854 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1855 req->nmdesc = state.nmdesc;
1856 if (ret < 0)
1857 goto unmap;
1858
1859 {
1860 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1861 "Memory mapping consistency check");
1862 if (DYNAMIC_DEBUG_BRANCH(ddm))
1863 srp_check_mapping(&state, ch, req, scat, count);
1864 }
1865
1866 /* We've mapped the request, now pull as much of the indirect
1867 * descriptor table as we can into the command buffer. If this
1868 * target is not using an external indirect table, we are
1869 * guaranteed to fit into the command, as the SCSI layer won't
1870 * give us more S/G entries than we allow.
1871 */
1872 if (state.ndesc == 1) {
1873 /*
1874 * Memory registration collapsed the sg-list into one entry,
1875 * so use a direct descriptor.
1876 */
1877 struct srp_direct_buf *buf = (void *) cmd->add_data;
1878
1879 *buf = req->indirect_desc[0];
1880 goto map_complete;
1881 }
1882
1883 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1884 !target->allow_ext_sg)) {
1885 shost_printk(KERN_ERR, target->scsi_host,
1886 "Could not fit S/G list into SRP_CMD\n");
1887 ret = -EIO;
1888 goto unmap;
1889 }
1890
1891 count = min(state.ndesc, target->cmd_sg_cnt);
1892 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1893 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1894
1895 fmt = SRP_DATA_DESC_INDIRECT;
1896 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1897 len += count * sizeof (struct srp_direct_buf);
1898
1899 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1900 count * sizeof (struct srp_direct_buf));
1901
1902 if (!target->global_rkey) {
1903 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1904 idb_len, &idb_rkey);
1905 if (ret < 0)
1906 goto unmap;
1907 req->nmdesc++;
1908 } else {
1909 idb_rkey = cpu_to_be32(target->global_rkey);
1910 }
1911
1912 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1913 indirect_hdr->table_desc.key = idb_rkey;
1914 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1915 indirect_hdr->len = cpu_to_be32(state.total_len);
1916
1917 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1918 cmd->data_out_desc_cnt = count;
1919 else
1920 cmd->data_in_desc_cnt = count;
1921
1922 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1923 DMA_TO_DEVICE);
1924
1925 map_complete:
1926 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1927 cmd->buf_fmt = fmt << 4;
1928 else
1929 cmd->buf_fmt = fmt;
1930
1931 return len;
1932
1933 unmap:
1934 srp_unmap_data(scmnd, ch, req);
1935 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1936 ret = -E2BIG;
1937 return ret;
1938 }
1939
1940 /*
1941 * Return an IU and possible credit to the free pool
1942 */
1943 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1944 enum srp_iu_type iu_type)
1945 {
1946 unsigned long flags;
1947
1948 spin_lock_irqsave(&ch->lock, flags);
1949 list_add(&iu->list, &ch->free_tx);
1950 if (iu_type != SRP_IU_RSP)
1951 ++ch->req_lim;
1952 spin_unlock_irqrestore(&ch->lock, flags);
1953 }
1954
1955 /*
1956 * Must be called with ch->lock held to protect req_lim and free_tx.
1957 * If IU is not sent, it must be returned using srp_put_tx_iu().
1958 *
1959 * Note:
1960 * An upper limit for the number of allocated information units for each
1961 * request type is:
1962 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1963 * more than Scsi_Host.can_queue requests.
1964 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1965 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1966 * one unanswered SRP request to an initiator.
1967 */
1968 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1969 enum srp_iu_type iu_type)
1970 {
1971 struct srp_target_port *target = ch->target;
1972 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1973 struct srp_iu *iu;
1974
1975 lockdep_assert_held(&ch->lock);
1976
1977 ib_process_cq_direct(ch->send_cq, -1);
1978
1979 if (list_empty(&ch->free_tx))
1980 return NULL;
1981
1982 /* Initiator responses to target requests do not consume credits */
1983 if (iu_type != SRP_IU_RSP) {
1984 if (ch->req_lim <= rsv) {
1985 ++target->zero_req_lim;
1986 return NULL;
1987 }
1988
1989 --ch->req_lim;
1990 }
1991
1992 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1993 list_del(&iu->list);
1994 return iu;
1995 }
1996
1997 /*
1998 * Note: if this function is called from inside ib_drain_sq() then it will
1999 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
2000 * with status IB_WC_SUCCESS then that's a bug.
2001 */
2002 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
2003 {
2004 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2005 struct srp_rdma_ch *ch = cq->cq_context;
2006
2007 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2008 srp_handle_qp_err(cq, wc, "SEND");
2009 return;
2010 }
2011
2012 lockdep_assert_held(&ch->lock);
2013
2014 list_add(&iu->list, &ch->free_tx);
2015 }
2016
2017 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2018 {
2019 struct srp_target_port *target = ch->target;
2020 struct ib_sge list;
2021 struct ib_send_wr wr, *bad_wr;
2022
2023 list.addr = iu->dma;
2024 list.length = len;
2025 list.lkey = target->lkey;
2026
2027 iu->cqe.done = srp_send_done;
2028
2029 wr.next = NULL;
2030 wr.wr_cqe = &iu->cqe;
2031 wr.sg_list = &list;
2032 wr.num_sge = 1;
2033 wr.opcode = IB_WR_SEND;
2034 wr.send_flags = IB_SEND_SIGNALED;
2035
2036 return ib_post_send(ch->qp, &wr, &bad_wr);
2037 }
2038
2039 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2040 {
2041 struct srp_target_port *target = ch->target;
2042 struct ib_recv_wr wr, *bad_wr;
2043 struct ib_sge list;
2044
2045 list.addr = iu->dma;
2046 list.length = iu->size;
2047 list.lkey = target->lkey;
2048
2049 iu->cqe.done = srp_recv_done;
2050
2051 wr.next = NULL;
2052 wr.wr_cqe = &iu->cqe;
2053 wr.sg_list = &list;
2054 wr.num_sge = 1;
2055
2056 return ib_post_recv(ch->qp, &wr, &bad_wr);
2057 }
2058
2059 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
2060 {
2061 struct srp_target_port *target = ch->target;
2062 struct srp_request *req;
2063 struct scsi_cmnd *scmnd;
2064 unsigned long flags;
2065
2066 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
2067 spin_lock_irqsave(&ch->lock, flags);
2068 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2069 if (rsp->tag == ch->tsk_mgmt_tag) {
2070 ch->tsk_mgmt_status = -1;
2071 if (be32_to_cpu(rsp->resp_data_len) >= 4)
2072 ch->tsk_mgmt_status = rsp->data[3];
2073 complete(&ch->tsk_mgmt_done);
2074 } else {
2075 shost_printk(KERN_ERR, target->scsi_host,
2076 "Received tsk mgmt response too late for tag %#llx\n",
2077 rsp->tag);
2078 }
2079 spin_unlock_irqrestore(&ch->lock, flags);
2080 } else {
2081 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
2082 if (scmnd && scmnd->host_scribble) {
2083 req = (void *)scmnd->host_scribble;
2084 scmnd = srp_claim_req(ch, req, NULL, scmnd);
2085 } else {
2086 scmnd = NULL;
2087 }
2088 if (!scmnd) {
2089 shost_printk(KERN_ERR, target->scsi_host,
2090 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2091 rsp->tag, ch - target->ch, ch->qp->qp_num);
2092
2093 spin_lock_irqsave(&ch->lock, flags);
2094 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2095 spin_unlock_irqrestore(&ch->lock, flags);
2096
2097 return;
2098 }
2099 scmnd->result = rsp->status;
2100
2101 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
2102 memcpy(scmnd->sense_buffer, rsp->data +
2103 be32_to_cpu(rsp->resp_data_len),
2104 min_t(int, be32_to_cpu(rsp->sense_data_len),
2105 SCSI_SENSE_BUFFERSIZE));
2106 }
2107
2108 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
2109 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
2110 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
2111 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
2112 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
2113 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
2114 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
2115 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
2116
2117 srp_free_req(ch, req, scmnd,
2118 be32_to_cpu(rsp->req_lim_delta));
2119
2120 scmnd->host_scribble = NULL;
2121 scmnd->scsi_done(scmnd);
2122 }
2123 }
2124
2125 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
2126 void *rsp, int len)
2127 {
2128 struct srp_target_port *target = ch->target;
2129 struct ib_device *dev = target->srp_host->srp_dev->dev;
2130 unsigned long flags;
2131 struct srp_iu *iu;
2132 int err;
2133
2134 spin_lock_irqsave(&ch->lock, flags);
2135 ch->req_lim += req_delta;
2136 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2137 spin_unlock_irqrestore(&ch->lock, flags);
2138
2139 if (!iu) {
2140 shost_printk(KERN_ERR, target->scsi_host, PFX
2141 "no IU available to send response\n");
2142 return 1;
2143 }
2144
2145 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2146 memcpy(iu->buf, rsp, len);
2147 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2148
2149 err = srp_post_send(ch, iu, len);
2150 if (err) {
2151 shost_printk(KERN_ERR, target->scsi_host, PFX
2152 "unable to post response: %d\n", err);
2153 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2154 }
2155
2156 return err;
2157 }
2158
2159 static void srp_process_cred_req(struct srp_rdma_ch *ch,
2160 struct srp_cred_req *req)
2161 {
2162 struct srp_cred_rsp rsp = {
2163 .opcode = SRP_CRED_RSP,
2164 .tag = req->tag,
2165 };
2166 s32 delta = be32_to_cpu(req->req_lim_delta);
2167
2168 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2169 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2170 "problems processing SRP_CRED_REQ\n");
2171 }
2172
2173 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2174 struct srp_aer_req *req)
2175 {
2176 struct srp_target_port *target = ch->target;
2177 struct srp_aer_rsp rsp = {
2178 .opcode = SRP_AER_RSP,
2179 .tag = req->tag,
2180 };
2181 s32 delta = be32_to_cpu(req->req_lim_delta);
2182
2183 shost_printk(KERN_ERR, target->scsi_host, PFX
2184 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2185
2186 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2187 shost_printk(KERN_ERR, target->scsi_host, PFX
2188 "problems processing SRP_AER_REQ\n");
2189 }
2190
2191 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2192 {
2193 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2194 struct srp_rdma_ch *ch = cq->cq_context;
2195 struct srp_target_port *target = ch->target;
2196 struct ib_device *dev = target->srp_host->srp_dev->dev;
2197 int res;
2198 u8 opcode;
2199
2200 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2201 srp_handle_qp_err(cq, wc, "RECV");
2202 return;
2203 }
2204
2205 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2206 DMA_FROM_DEVICE);
2207
2208 opcode = *(u8 *) iu->buf;
2209
2210 if (0) {
2211 shost_printk(KERN_ERR, target->scsi_host,
2212 PFX "recv completion, opcode 0x%02x\n", opcode);
2213 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2214 iu->buf, wc->byte_len, true);
2215 }
2216
2217 switch (opcode) {
2218 case SRP_RSP:
2219 srp_process_rsp(ch, iu->buf);
2220 break;
2221
2222 case SRP_CRED_REQ:
2223 srp_process_cred_req(ch, iu->buf);
2224 break;
2225
2226 case SRP_AER_REQ:
2227 srp_process_aer_req(ch, iu->buf);
2228 break;
2229
2230 case SRP_T_LOGOUT:
2231 /* XXX Handle target logout */
2232 shost_printk(KERN_WARNING, target->scsi_host,
2233 PFX "Got target logout request\n");
2234 break;
2235
2236 default:
2237 shost_printk(KERN_WARNING, target->scsi_host,
2238 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2239 break;
2240 }
2241
2242 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2243 DMA_FROM_DEVICE);
2244
2245 res = srp_post_recv(ch, iu);
2246 if (res != 0)
2247 shost_printk(KERN_ERR, target->scsi_host,
2248 PFX "Recv failed with error code %d\n", res);
2249 }
2250
2251 /**
2252 * srp_tl_err_work() - handle a transport layer error
2253 * @work: Work structure embedded in an SRP target port.
2254 *
2255 * Note: This function may get invoked before the rport has been created,
2256 * hence the target->rport test.
2257 */
2258 static void srp_tl_err_work(struct work_struct *work)
2259 {
2260 struct srp_target_port *target;
2261
2262 target = container_of(work, struct srp_target_port, tl_err_work);
2263 if (target->rport)
2264 srp_start_tl_fail_timers(target->rport);
2265 }
2266
2267 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2268 const char *opname)
2269 {
2270 struct srp_rdma_ch *ch = cq->cq_context;
2271 struct srp_target_port *target = ch->target;
2272
2273 if (ch->connected && !target->qp_in_error) {
2274 shost_printk(KERN_ERR, target->scsi_host,
2275 PFX "failed %s status %s (%d) for CQE %p\n",
2276 opname, ib_wc_status_msg(wc->status), wc->status,
2277 wc->wr_cqe);
2278 queue_work(system_long_wq, &target->tl_err_work);
2279 }
2280 target->qp_in_error = true;
2281 }
2282
2283 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2284 {
2285 struct srp_target_port *target = host_to_target(shost);
2286 struct srp_rport *rport = target->rport;
2287 struct srp_rdma_ch *ch;
2288 struct srp_request *req;
2289 struct srp_iu *iu;
2290 struct srp_cmd *cmd;
2291 struct ib_device *dev;
2292 unsigned long flags;
2293 u32 tag;
2294 u16 idx;
2295 int len, ret;
2296 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2297
2298 /*
2299 * The SCSI EH thread is the only context from which srp_queuecommand()
2300 * can get invoked for blocked devices (SDEV_BLOCK /
2301 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2302 * locking the rport mutex if invoked from inside the SCSI EH.
2303 */
2304 if (in_scsi_eh)
2305 mutex_lock(&rport->mutex);
2306
2307 scmnd->result = srp_chkready(target->rport);
2308 if (unlikely(scmnd->result))
2309 goto err;
2310
2311 WARN_ON_ONCE(scmnd->request->tag < 0);
2312 tag = blk_mq_unique_tag(scmnd->request);
2313 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2314 idx = blk_mq_unique_tag_to_tag(tag);
2315 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2316 dev_name(&shost->shost_gendev), tag, idx,
2317 target->req_ring_size);
2318
2319 spin_lock_irqsave(&ch->lock, flags);
2320 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2321 spin_unlock_irqrestore(&ch->lock, flags);
2322
2323 if (!iu)
2324 goto err;
2325
2326 req = &ch->req_ring[idx];
2327 dev = target->srp_host->srp_dev->dev;
2328 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2329 DMA_TO_DEVICE);
2330
2331 scmnd->host_scribble = (void *) req;
2332
2333 cmd = iu->buf;
2334 memset(cmd, 0, sizeof *cmd);
2335
2336 cmd->opcode = SRP_CMD;
2337 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2338 cmd->tag = tag;
2339 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2340
2341 req->scmnd = scmnd;
2342 req->cmd = iu;
2343
2344 len = srp_map_data(scmnd, ch, req);
2345 if (len < 0) {
2346 shost_printk(KERN_ERR, target->scsi_host,
2347 PFX "Failed to map data (%d)\n", len);
2348 /*
2349 * If we ran out of memory descriptors (-ENOMEM) because an
2350 * application is queuing many requests with more than
2351 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2352 * to reduce queue depth temporarily.
2353 */
2354 scmnd->result = len == -ENOMEM ?
2355 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2356 goto err_iu;
2357 }
2358
2359 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2360 DMA_TO_DEVICE);
2361
2362 if (srp_post_send(ch, iu, len)) {
2363 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2364 goto err_unmap;
2365 }
2366
2367 ret = 0;
2368
2369 unlock_rport:
2370 if (in_scsi_eh)
2371 mutex_unlock(&rport->mutex);
2372
2373 return ret;
2374
2375 err_unmap:
2376 srp_unmap_data(scmnd, ch, req);
2377
2378 err_iu:
2379 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2380
2381 /*
2382 * Avoid that the loops that iterate over the request ring can
2383 * encounter a dangling SCSI command pointer.
2384 */
2385 req->scmnd = NULL;
2386
2387 err:
2388 if (scmnd->result) {
2389 scmnd->scsi_done(scmnd);
2390 ret = 0;
2391 } else {
2392 ret = SCSI_MLQUEUE_HOST_BUSY;
2393 }
2394
2395 goto unlock_rport;
2396 }
2397
2398 /*
2399 * Note: the resources allocated in this function are freed in
2400 * srp_free_ch_ib().
2401 */
2402 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2403 {
2404 struct srp_target_port *target = ch->target;
2405 int i;
2406
2407 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2408 GFP_KERNEL);
2409 if (!ch->rx_ring)
2410 goto err_no_ring;
2411 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2412 GFP_KERNEL);
2413 if (!ch->tx_ring)
2414 goto err_no_ring;
2415
2416 for (i = 0; i < target->queue_size; ++i) {
2417 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2418 ch->max_ti_iu_len,
2419 GFP_KERNEL, DMA_FROM_DEVICE);
2420 if (!ch->rx_ring[i])
2421 goto err;
2422 }
2423
2424 for (i = 0; i < target->queue_size; ++i) {
2425 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2426 target->max_iu_len,
2427 GFP_KERNEL, DMA_TO_DEVICE);
2428 if (!ch->tx_ring[i])
2429 goto err;
2430
2431 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2432 }
2433
2434 return 0;
2435
2436 err:
2437 for (i = 0; i < target->queue_size; ++i) {
2438 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2439 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2440 }
2441
2442
2443 err_no_ring:
2444 kfree(ch->tx_ring);
2445 ch->tx_ring = NULL;
2446 kfree(ch->rx_ring);
2447 ch->rx_ring = NULL;
2448
2449 return -ENOMEM;
2450 }
2451
2452 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2453 {
2454 uint64_t T_tr_ns, max_compl_time_ms;
2455 uint32_t rq_tmo_jiffies;
2456
2457 /*
2458 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2459 * table 91), both the QP timeout and the retry count have to be set
2460 * for RC QP's during the RTR to RTS transition.
2461 */
2462 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2463 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2464
2465 /*
2466 * Set target->rq_tmo_jiffies to one second more than the largest time
2467 * it can take before an error completion is generated. See also
2468 * C9-140..142 in the IBTA spec for more information about how to
2469 * convert the QP Local ACK Timeout value to nanoseconds.
2470 */
2471 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2472 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2473 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2474 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2475
2476 return rq_tmo_jiffies;
2477 }
2478
2479 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2480 const struct srp_login_rsp *lrsp,
2481 struct srp_rdma_ch *ch)
2482 {
2483 struct srp_target_port *target = ch->target;
2484 struct ib_qp_attr *qp_attr = NULL;
2485 int attr_mask = 0;
2486 int ret = 0;
2487 int i;
2488
2489 if (lrsp->opcode == SRP_LOGIN_RSP) {
2490 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2491 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2492
2493 /*
2494 * Reserve credits for task management so we don't
2495 * bounce requests back to the SCSI mid-layer.
2496 */
2497 target->scsi_host->can_queue
2498 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2499 target->scsi_host->can_queue);
2500 target->scsi_host->cmd_per_lun
2501 = min_t(int, target->scsi_host->can_queue,
2502 target->scsi_host->cmd_per_lun);
2503 } else {
2504 shost_printk(KERN_WARNING, target->scsi_host,
2505 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2506 ret = -ECONNRESET;
2507 goto error;
2508 }
2509
2510 if (!ch->rx_ring) {
2511 ret = srp_alloc_iu_bufs(ch);
2512 if (ret)
2513 goto error;
2514 }
2515
2516 for (i = 0; i < target->queue_size; i++) {
2517 struct srp_iu *iu = ch->rx_ring[i];
2518
2519 ret = srp_post_recv(ch, iu);
2520 if (ret)
2521 goto error;
2522 }
2523
2524 if (!target->using_rdma_cm) {
2525 ret = -ENOMEM;
2526 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2527 if (!qp_attr)
2528 goto error;
2529
2530 qp_attr->qp_state = IB_QPS_RTR;
2531 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2532 if (ret)
2533 goto error_free;
2534
2535 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2536 if (ret)
2537 goto error_free;
2538
2539 qp_attr->qp_state = IB_QPS_RTS;
2540 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2541 if (ret)
2542 goto error_free;
2543
2544 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2545
2546 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2547 if (ret)
2548 goto error_free;
2549
2550 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2551 }
2552
2553 error_free:
2554 kfree(qp_attr);
2555
2556 error:
2557 ch->status = ret;
2558 }
2559
2560 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2561 struct ib_cm_event *event,
2562 struct srp_rdma_ch *ch)
2563 {
2564 struct srp_target_port *target = ch->target;
2565 struct Scsi_Host *shost = target->scsi_host;
2566 struct ib_class_port_info *cpi;
2567 int opcode;
2568 u16 dlid;
2569
2570 switch (event->param.rej_rcvd.reason) {
2571 case IB_CM_REJ_PORT_CM_REDIRECT:
2572 cpi = event->param.rej_rcvd.ari;
2573 dlid = be16_to_cpu(cpi->redirect_lid);
2574 sa_path_set_dlid(&ch->ib_cm.path, dlid);
2575 ch->ib_cm.path.pkey = cpi->redirect_pkey;
2576 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2577 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2578
2579 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2580 break;
2581
2582 case IB_CM_REJ_PORT_REDIRECT:
2583 if (srp_target_is_topspin(target)) {
2584 union ib_gid *dgid = &ch->ib_cm.path.dgid;
2585
2586 /*
2587 * Topspin/Cisco SRP gateways incorrectly send
2588 * reject reason code 25 when they mean 24
2589 * (port redirect).
2590 */
2591 memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2592
2593 shost_printk(KERN_DEBUG, shost,
2594 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2595 be64_to_cpu(dgid->global.subnet_prefix),
2596 be64_to_cpu(dgid->global.interface_id));
2597
2598 ch->status = SRP_PORT_REDIRECT;
2599 } else {
2600 shost_printk(KERN_WARNING, shost,
2601 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2602 ch->status = -ECONNRESET;
2603 }
2604 break;
2605
2606 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2607 shost_printk(KERN_WARNING, shost,
2608 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2609 ch->status = -ECONNRESET;
2610 break;
2611
2612 case IB_CM_REJ_CONSUMER_DEFINED:
2613 opcode = *(u8 *) event->private_data;
2614 if (opcode == SRP_LOGIN_REJ) {
2615 struct srp_login_rej *rej = event->private_data;
2616 u32 reason = be32_to_cpu(rej->reason);
2617
2618 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2619 shost_printk(KERN_WARNING, shost,
2620 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2621 else
2622 shost_printk(KERN_WARNING, shost, PFX
2623 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2624 target->sgid.raw,
2625 target->ib_cm.orig_dgid.raw,
2626 reason);
2627 } else
2628 shost_printk(KERN_WARNING, shost,
2629 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2630 " opcode 0x%02x\n", opcode);
2631 ch->status = -ECONNRESET;
2632 break;
2633
2634 case IB_CM_REJ_STALE_CONN:
2635 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2636 ch->status = SRP_STALE_CONN;
2637 break;
2638
2639 default:
2640 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2641 event->param.rej_rcvd.reason);
2642 ch->status = -ECONNRESET;
2643 }
2644 }
2645
2646 static int srp_ib_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2647 {
2648 struct srp_rdma_ch *ch = cm_id->context;
2649 struct srp_target_port *target = ch->target;
2650 int comp = 0;
2651
2652 switch (event->event) {
2653 case IB_CM_REQ_ERROR:
2654 shost_printk(KERN_DEBUG, target->scsi_host,
2655 PFX "Sending CM REQ failed\n");
2656 comp = 1;
2657 ch->status = -ECONNRESET;
2658 break;
2659
2660 case IB_CM_REP_RECEIVED:
2661 comp = 1;
2662 srp_cm_rep_handler(cm_id, event->private_data, ch);
2663 break;
2664
2665 case IB_CM_REJ_RECEIVED:
2666 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2667 comp = 1;
2668
2669 srp_ib_cm_rej_handler(cm_id, event, ch);
2670 break;
2671
2672 case IB_CM_DREQ_RECEIVED:
2673 shost_printk(KERN_WARNING, target->scsi_host,
2674 PFX "DREQ received - connection closed\n");
2675 ch->connected = false;
2676 if (ib_send_cm_drep(cm_id, NULL, 0))
2677 shost_printk(KERN_ERR, target->scsi_host,
2678 PFX "Sending CM DREP failed\n");
2679 queue_work(system_long_wq, &target->tl_err_work);
2680 break;
2681
2682 case IB_CM_TIMEWAIT_EXIT:
2683 shost_printk(KERN_ERR, target->scsi_host,
2684 PFX "connection closed\n");
2685 comp = 1;
2686
2687 ch->status = 0;
2688 break;
2689
2690 case IB_CM_MRA_RECEIVED:
2691 case IB_CM_DREQ_ERROR:
2692 case IB_CM_DREP_RECEIVED:
2693 break;
2694
2695 default:
2696 shost_printk(KERN_WARNING, target->scsi_host,
2697 PFX "Unhandled CM event %d\n", event->event);
2698 break;
2699 }
2700
2701 if (comp)
2702 complete(&ch->done);
2703
2704 return 0;
2705 }
2706
2707 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2708 struct rdma_cm_event *event)
2709 {
2710 struct srp_target_port *target = ch->target;
2711 struct Scsi_Host *shost = target->scsi_host;
2712 int opcode;
2713
2714 switch (event->status) {
2715 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2716 shost_printk(KERN_WARNING, shost,
2717 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2718 ch->status = -ECONNRESET;
2719 break;
2720
2721 case IB_CM_REJ_CONSUMER_DEFINED:
2722 opcode = *(u8 *) event->param.conn.private_data;
2723 if (opcode == SRP_LOGIN_REJ) {
2724 struct srp_login_rej *rej =
2725 (struct srp_login_rej *)
2726 event->param.conn.private_data;
2727 u32 reason = be32_to_cpu(rej->reason);
2728
2729 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2730 shost_printk(KERN_WARNING, shost,
2731 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2732 else
2733 shost_printk(KERN_WARNING, shost,
2734 PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2735 } else {
2736 shost_printk(KERN_WARNING, shost,
2737 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2738 opcode);
2739 }
2740 ch->status = -ECONNRESET;
2741 break;
2742
2743 case IB_CM_REJ_STALE_CONN:
2744 shost_printk(KERN_WARNING, shost,
2745 " REJ reason: stale connection\n");
2746 ch->status = SRP_STALE_CONN;
2747 break;
2748
2749 default:
2750 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2751 event->status);
2752 ch->status = -ECONNRESET;
2753 break;
2754 }
2755 }
2756
2757 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2758 struct rdma_cm_event *event)
2759 {
2760 struct srp_rdma_ch *ch = cm_id->context;
2761 struct srp_target_port *target = ch->target;
2762 int comp = 0;
2763
2764 switch (event->event) {
2765 case RDMA_CM_EVENT_ADDR_RESOLVED:
2766 ch->status = 0;
2767 comp = 1;
2768 break;
2769
2770 case RDMA_CM_EVENT_ADDR_ERROR:
2771 ch->status = -ENXIO;
2772 comp = 1;
2773 break;
2774
2775 case RDMA_CM_EVENT_ROUTE_RESOLVED:
2776 ch->status = 0;
2777 comp = 1;
2778 break;
2779
2780 case RDMA_CM_EVENT_ROUTE_ERROR:
2781 case RDMA_CM_EVENT_UNREACHABLE:
2782 ch->status = -EHOSTUNREACH;
2783 comp = 1;
2784 break;
2785
2786 case RDMA_CM_EVENT_CONNECT_ERROR:
2787 shost_printk(KERN_DEBUG, target->scsi_host,
2788 PFX "Sending CM REQ failed\n");
2789 comp = 1;
2790 ch->status = -ECONNRESET;
2791 break;
2792
2793 case RDMA_CM_EVENT_ESTABLISHED:
2794 comp = 1;
2795 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2796 break;
2797
2798 case RDMA_CM_EVENT_REJECTED:
2799 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2800 comp = 1;
2801
2802 srp_rdma_cm_rej_handler(ch, event);
2803 break;
2804
2805 case RDMA_CM_EVENT_DISCONNECTED:
2806 if (ch->connected) {
2807 shost_printk(KERN_WARNING, target->scsi_host,
2808 PFX "received DREQ\n");
2809 rdma_disconnect(ch->rdma_cm.cm_id);
2810 comp = 1;
2811 ch->status = 0;
2812 queue_work(system_long_wq, &target->tl_err_work);
2813 }
2814 break;
2815
2816 case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2817 shost_printk(KERN_ERR, target->scsi_host,
2818 PFX "connection closed\n");
2819
2820 comp = 1;
2821 ch->status = 0;
2822 break;
2823
2824 default:
2825 shost_printk(KERN_WARNING, target->scsi_host,
2826 PFX "Unhandled CM event %d\n", event->event);
2827 break;
2828 }
2829
2830 if (comp)
2831 complete(&ch->done);
2832
2833 return 0;
2834 }
2835
2836 /**
2837 * srp_change_queue_depth - setting device queue depth
2838 * @sdev: scsi device struct
2839 * @qdepth: requested queue depth
2840 *
2841 * Returns queue depth.
2842 */
2843 static int
2844 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2845 {
2846 if (!sdev->tagged_supported)
2847 qdepth = 1;
2848 return scsi_change_queue_depth(sdev, qdepth);
2849 }
2850
2851 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2852 u8 func, u8 *status)
2853 {
2854 struct srp_target_port *target = ch->target;
2855 struct srp_rport *rport = target->rport;
2856 struct ib_device *dev = target->srp_host->srp_dev->dev;
2857 struct srp_iu *iu;
2858 struct srp_tsk_mgmt *tsk_mgmt;
2859 int res;
2860
2861 if (!ch->connected || target->qp_in_error)
2862 return -1;
2863
2864 /*
2865 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2866 * invoked while a task management function is being sent.
2867 */
2868 mutex_lock(&rport->mutex);
2869 spin_lock_irq(&ch->lock);
2870 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2871 spin_unlock_irq(&ch->lock);
2872
2873 if (!iu) {
2874 mutex_unlock(&rport->mutex);
2875
2876 return -1;
2877 }
2878
2879 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2880 DMA_TO_DEVICE);
2881 tsk_mgmt = iu->buf;
2882 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2883
2884 tsk_mgmt->opcode = SRP_TSK_MGMT;
2885 int_to_scsilun(lun, &tsk_mgmt->lun);
2886 tsk_mgmt->tsk_mgmt_func = func;
2887 tsk_mgmt->task_tag = req_tag;
2888
2889 spin_lock_irq(&ch->lock);
2890 ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2891 tsk_mgmt->tag = ch->tsk_mgmt_tag;
2892 spin_unlock_irq(&ch->lock);
2893
2894 init_completion(&ch->tsk_mgmt_done);
2895
2896 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2897 DMA_TO_DEVICE);
2898 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2899 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2900 mutex_unlock(&rport->mutex);
2901
2902 return -1;
2903 }
2904 res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2905 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2906 if (res > 0 && status)
2907 *status = ch->tsk_mgmt_status;
2908 mutex_unlock(&rport->mutex);
2909
2910 WARN_ON_ONCE(res < 0);
2911
2912 return res > 0 ? 0 : -1;
2913 }
2914
2915 static int srp_abort(struct scsi_cmnd *scmnd)
2916 {
2917 struct srp_target_port *target = host_to_target(scmnd->device->host);
2918 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2919 u32 tag;
2920 u16 ch_idx;
2921 struct srp_rdma_ch *ch;
2922 int ret;
2923
2924 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2925
2926 if (!req)
2927 return SUCCESS;
2928 tag = blk_mq_unique_tag(scmnd->request);
2929 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2930 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2931 return SUCCESS;
2932 ch = &target->ch[ch_idx];
2933 if (!srp_claim_req(ch, req, NULL, scmnd))
2934 return SUCCESS;
2935 shost_printk(KERN_ERR, target->scsi_host,
2936 "Sending SRP abort for tag %#x\n", tag);
2937 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2938 SRP_TSK_ABORT_TASK, NULL) == 0)
2939 ret = SUCCESS;
2940 else if (target->rport->state == SRP_RPORT_LOST)
2941 ret = FAST_IO_FAIL;
2942 else
2943 ret = FAILED;
2944 if (ret == SUCCESS) {
2945 srp_free_req(ch, req, scmnd, 0);
2946 scmnd->result = DID_ABORT << 16;
2947 scmnd->scsi_done(scmnd);
2948 }
2949
2950 return ret;
2951 }
2952
2953 static int srp_reset_device(struct scsi_cmnd *scmnd)
2954 {
2955 struct srp_target_port *target = host_to_target(scmnd->device->host);
2956 struct srp_rdma_ch *ch;
2957 int i;
2958 u8 status;
2959
2960 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2961
2962 ch = &target->ch[0];
2963 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2964 SRP_TSK_LUN_RESET, &status))
2965 return FAILED;
2966 if (status)
2967 return FAILED;
2968
2969 for (i = 0; i < target->ch_count; i++) {
2970 ch = &target->ch[i];
2971 for (i = 0; i < target->req_ring_size; ++i) {
2972 struct srp_request *req = &ch->req_ring[i];
2973
2974 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2975 }
2976 }
2977
2978 return SUCCESS;
2979 }
2980
2981 static int srp_reset_host(struct scsi_cmnd *scmnd)
2982 {
2983 struct srp_target_port *target = host_to_target(scmnd->device->host);
2984
2985 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2986
2987 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2988 }
2989
2990 static int srp_target_alloc(struct scsi_target *starget)
2991 {
2992 struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
2993 struct srp_target_port *target = host_to_target(shost);
2994
2995 if (target->target_can_queue)
2996 starget->can_queue = target->target_can_queue;
2997 return 0;
2998 }
2999
3000 static int srp_slave_alloc(struct scsi_device *sdev)
3001 {
3002 struct Scsi_Host *shost = sdev->host;
3003 struct srp_target_port *target = host_to_target(shost);
3004 struct srp_device *srp_dev = target->srp_host->srp_dev;
3005 struct ib_device *ibdev = srp_dev->dev;
3006
3007 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
3008 blk_queue_virt_boundary(sdev->request_queue,
3009 ~srp_dev->mr_page_mask);
3010
3011 return 0;
3012 }
3013
3014 static int srp_slave_configure(struct scsi_device *sdev)
3015 {
3016 struct Scsi_Host *shost = sdev->host;
3017 struct srp_target_port *target = host_to_target(shost);
3018 struct request_queue *q = sdev->request_queue;
3019 unsigned long timeout;
3020
3021 if (sdev->type == TYPE_DISK) {
3022 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
3023 blk_queue_rq_timeout(q, timeout);
3024 }
3025
3026 return 0;
3027 }
3028
3029 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
3030 char *buf)
3031 {
3032 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3033
3034 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
3035 }
3036
3037 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
3038 char *buf)
3039 {
3040 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3041
3042 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
3043 }
3044
3045 static ssize_t show_service_id(struct device *dev,
3046 struct device_attribute *attr, char *buf)
3047 {
3048 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3049
3050 if (target->using_rdma_cm)
3051 return -ENOENT;
3052 return sprintf(buf, "0x%016llx\n",
3053 be64_to_cpu(target->ib_cm.service_id));
3054 }
3055
3056 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
3057 char *buf)
3058 {
3059 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3060
3061 if (target->using_rdma_cm)
3062 return -ENOENT;
3063 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
3064 }
3065
3066 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
3067 char *buf)
3068 {
3069 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3070
3071 return sprintf(buf, "%pI6\n", target->sgid.raw);
3072 }
3073
3074 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
3075 char *buf)
3076 {
3077 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3078 struct srp_rdma_ch *ch = &target->ch[0];
3079
3080 if (target->using_rdma_cm)
3081 return -ENOENT;
3082 return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
3083 }
3084
3085 static ssize_t show_orig_dgid(struct device *dev,
3086 struct device_attribute *attr, char *buf)
3087 {
3088 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3089
3090 if (target->using_rdma_cm)
3091 return -ENOENT;
3092 return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
3093 }
3094
3095 static ssize_t show_req_lim(struct device *dev,
3096 struct device_attribute *attr, char *buf)
3097 {
3098 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3099 struct srp_rdma_ch *ch;
3100 int i, req_lim = INT_MAX;
3101
3102 for (i = 0; i < target->ch_count; i++) {
3103 ch = &target->ch[i];
3104 req_lim = min(req_lim, ch->req_lim);
3105 }
3106 return sprintf(buf, "%d\n", req_lim);
3107 }
3108
3109 static ssize_t show_zero_req_lim(struct device *dev,
3110 struct device_attribute *attr, char *buf)
3111 {
3112 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3113
3114 return sprintf(buf, "%d\n", target->zero_req_lim);
3115 }
3116
3117 static ssize_t show_local_ib_port(struct device *dev,
3118 struct device_attribute *attr, char *buf)
3119 {
3120 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3121
3122 return sprintf(buf, "%d\n", target->srp_host->port);
3123 }
3124
3125 static ssize_t show_local_ib_device(struct device *dev,
3126 struct device_attribute *attr, char *buf)
3127 {
3128 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3129
3130 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
3131 }
3132
3133 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
3134 char *buf)
3135 {
3136 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3137
3138 return sprintf(buf, "%d\n", target->ch_count);
3139 }
3140
3141 static ssize_t show_comp_vector(struct device *dev,
3142 struct device_attribute *attr, char *buf)
3143 {
3144 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3145
3146 return sprintf(buf, "%d\n", target->comp_vector);
3147 }
3148
3149 static ssize_t show_tl_retry_count(struct device *dev,
3150 struct device_attribute *attr, char *buf)
3151 {
3152 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3153
3154 return sprintf(buf, "%d\n", target->tl_retry_count);
3155 }
3156
3157 static ssize_t show_cmd_sg_entries(struct device *dev,
3158 struct device_attribute *attr, char *buf)
3159 {
3160 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3161
3162 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
3163 }
3164
3165 static ssize_t show_allow_ext_sg(struct device *dev,
3166 struct device_attribute *attr, char *buf)
3167 {
3168 struct srp_target_port *target = host_to_target(class_to_shost(dev));
3169
3170 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3171 }
3172
3173 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
3174 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
3175 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
3176 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
3177 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
3178 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
3179 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
3180 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
3181 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
3182 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
3183 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3184 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
3185 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
3186 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
3187 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
3188 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
3189
3190 static struct device_attribute *srp_host_attrs[] = {
3191 &dev_attr_id_ext,
3192 &dev_attr_ioc_guid,
3193 &dev_attr_service_id,
3194 &dev_attr_pkey,
3195 &dev_attr_sgid,
3196 &dev_attr_dgid,
3197 &dev_attr_orig_dgid,
3198 &dev_attr_req_lim,
3199 &dev_attr_zero_req_lim,
3200 &dev_attr_local_ib_port,
3201 &dev_attr_local_ib_device,
3202 &dev_attr_ch_count,
3203 &dev_attr_comp_vector,
3204 &dev_attr_tl_retry_count,
3205 &dev_attr_cmd_sg_entries,
3206 &dev_attr_allow_ext_sg,
3207 NULL
3208 };
3209
3210 static struct scsi_host_template srp_template = {
3211 .module = THIS_MODULE,
3212 .name = "InfiniBand SRP initiator",
3213 .proc_name = DRV_NAME,
3214 .target_alloc = srp_target_alloc,
3215 .slave_alloc = srp_slave_alloc,
3216 .slave_configure = srp_slave_configure,
3217 .info = srp_target_info,
3218 .queuecommand = srp_queuecommand,
3219 .change_queue_depth = srp_change_queue_depth,
3220 .eh_timed_out = srp_timed_out,
3221 .eh_abort_handler = srp_abort,
3222 .eh_device_reset_handler = srp_reset_device,
3223 .eh_host_reset_handler = srp_reset_host,
3224 .skip_settle_delay = true,
3225 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
3226 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
3227 .this_id = -1,
3228 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
3229 .use_clustering = ENABLE_CLUSTERING,
3230 .shost_attrs = srp_host_attrs,
3231 .track_queue_depth = 1,
3232 };
3233
3234 static int srp_sdev_count(struct Scsi_Host *host)
3235 {
3236 struct scsi_device *sdev;
3237 int c = 0;
3238
3239 shost_for_each_device(sdev, host)
3240 c++;
3241
3242 return c;
3243 }
3244
3245 /*
3246 * Return values:
3247 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3248 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3249 * removal has been scheduled.
3250 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3251 */
3252 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3253 {
3254 struct srp_rport_identifiers ids;
3255 struct srp_rport *rport;
3256
3257 target->state = SRP_TARGET_SCANNING;
3258 sprintf(target->target_name, "SRP.T10:%016llX",
3259 be64_to_cpu(target->id_ext));
3260
3261 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3262 return -ENODEV;
3263
3264 memcpy(ids.port_id, &target->id_ext, 8);
3265 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3266 ids.roles = SRP_RPORT_ROLE_TARGET;
3267 rport = srp_rport_add(target->scsi_host, &ids);
3268 if (IS_ERR(rport)) {
3269 scsi_remove_host(target->scsi_host);
3270 return PTR_ERR(rport);
3271 }
3272
3273 rport->lld_data = target;
3274 target->rport = rport;
3275
3276 spin_lock(&host->target_lock);
3277 list_add_tail(&target->list, &host->target_list);
3278 spin_unlock(&host->target_lock);
3279
3280 scsi_scan_target(&target->scsi_host->shost_gendev,
3281 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3282
3283 if (srp_connected_ch(target) < target->ch_count ||
3284 target->qp_in_error) {
3285 shost_printk(KERN_INFO, target->scsi_host,
3286 PFX "SCSI scan failed - removing SCSI host\n");
3287 srp_queue_remove_work(target);
3288 goto out;
3289 }
3290
3291 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3292 dev_name(&target->scsi_host->shost_gendev),
3293 srp_sdev_count(target->scsi_host));
3294
3295 spin_lock_irq(&target->lock);
3296 if (target->state == SRP_TARGET_SCANNING)
3297 target->state = SRP_TARGET_LIVE;
3298 spin_unlock_irq(&target->lock);
3299
3300 out:
3301 return 0;
3302 }
3303
3304 static void srp_release_dev(struct device *dev)
3305 {
3306 struct srp_host *host =
3307 container_of(dev, struct srp_host, dev);
3308
3309 complete(&host->released);
3310 }
3311
3312 static struct class srp_class = {
3313 .name = "infiniband_srp",
3314 .dev_release = srp_release_dev
3315 };
3316
3317 /**
3318 * srp_conn_unique() - check whether the connection to a target is unique
3319 * @host: SRP host.
3320 * @target: SRP target port.
3321 */
3322 static bool srp_conn_unique(struct srp_host *host,
3323 struct srp_target_port *target)
3324 {
3325 struct srp_target_port *t;
3326 bool ret = false;
3327
3328 if (target->state == SRP_TARGET_REMOVED)
3329 goto out;
3330
3331 ret = true;
3332
3333 spin_lock(&host->target_lock);
3334 list_for_each_entry(t, &host->target_list, list) {
3335 if (t != target &&
3336 target->id_ext == t->id_ext &&
3337 target->ioc_guid == t->ioc_guid &&
3338 target->initiator_ext == t->initiator_ext) {
3339 ret = false;
3340 break;
3341 }
3342 }
3343 spin_unlock(&host->target_lock);
3344
3345 out:
3346 return ret;
3347 }
3348
3349 /*
3350 * Target ports are added by writing
3351 *
3352 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3353 * pkey=<P_Key>,service_id=<service ID>
3354 * or
3355 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3356 * [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3357 *
3358 * to the add_target sysfs attribute.
3359 */
3360 enum {
3361 SRP_OPT_ERR = 0,
3362 SRP_OPT_ID_EXT = 1 << 0,
3363 SRP_OPT_IOC_GUID = 1 << 1,
3364 SRP_OPT_DGID = 1 << 2,
3365 SRP_OPT_PKEY = 1 << 3,
3366 SRP_OPT_SERVICE_ID = 1 << 4,
3367 SRP_OPT_MAX_SECT = 1 << 5,
3368 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3369 SRP_OPT_IO_CLASS = 1 << 7,
3370 SRP_OPT_INITIATOR_EXT = 1 << 8,
3371 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3372 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3373 SRP_OPT_SG_TABLESIZE = 1 << 11,
3374 SRP_OPT_COMP_VECTOR = 1 << 12,
3375 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3376 SRP_OPT_QUEUE_SIZE = 1 << 14,
3377 SRP_OPT_IP_SRC = 1 << 15,
3378 SRP_OPT_IP_DEST = 1 << 16,
3379 SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3380 };
3381
3382 static unsigned int srp_opt_mandatory[] = {
3383 SRP_OPT_ID_EXT |
3384 SRP_OPT_IOC_GUID |
3385 SRP_OPT_DGID |
3386 SRP_OPT_PKEY |
3387 SRP_OPT_SERVICE_ID,
3388 SRP_OPT_ID_EXT |
3389 SRP_OPT_IOC_GUID |
3390 SRP_OPT_IP_DEST,
3391 };
3392
3393 static const match_table_t srp_opt_tokens = {
3394 { SRP_OPT_ID_EXT, "id_ext=%s" },
3395 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3396 { SRP_OPT_DGID, "dgid=%s" },
3397 { SRP_OPT_PKEY, "pkey=%x" },
3398 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3399 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3400 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3401 { SRP_OPT_TARGET_CAN_QUEUE, "target_can_queue=%d" },
3402 { SRP_OPT_IO_CLASS, "io_class=%x" },
3403 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3404 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3405 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3406 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3407 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3408 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3409 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3410 { SRP_OPT_IP_SRC, "src=%s" },
3411 { SRP_OPT_IP_DEST, "dest=%s" },
3412 { SRP_OPT_ERR, NULL }
3413 };
3414
3415 /**
3416 * srp_parse_in - parse an IP address and port number combination
3417 *
3418 * Parse the following address formats:
3419 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3420 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3421 */
3422 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3423 const char *addr_port_str)
3424 {
3425 char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3426 char *port_str;
3427 int ret;
3428
3429 if (!addr)
3430 return -ENOMEM;
3431 port_str = strrchr(addr, ':');
3432 if (!port_str)
3433 return -EINVAL;
3434 *port_str++ = '\0';
3435 ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3436 if (ret && addr[0]) {
3437 addr_end = addr + strlen(addr) - 1;
3438 if (addr[0] == '[' && *addr_end == ']') {
3439 *addr_end = '\0';
3440 ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3441 port_str, sa);
3442 }
3443 }
3444 kfree(addr);
3445 pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3446 return ret;
3447 }
3448
3449 static int srp_parse_options(struct net *net, const char *buf,
3450 struct srp_target_port *target)
3451 {
3452 char *options, *sep_opt;
3453 char *p;
3454 substring_t args[MAX_OPT_ARGS];
3455 unsigned long long ull;
3456 int opt_mask = 0;
3457 int token;
3458 int ret = -EINVAL;
3459 int i;
3460
3461 options = kstrdup(buf, GFP_KERNEL);
3462 if (!options)
3463 return -ENOMEM;
3464
3465 sep_opt = options;
3466 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3467 if (!*p)
3468 continue;
3469
3470 token = match_token(p, srp_opt_tokens, args);
3471 opt_mask |= token;
3472
3473 switch (token) {
3474 case SRP_OPT_ID_EXT:
3475 p = match_strdup(args);
3476 if (!p) {
3477 ret = -ENOMEM;
3478 goto out;
3479 }
3480 ret = kstrtoull(p, 16, &ull);
3481 if (ret) {
3482 pr_warn("invalid id_ext parameter '%s'\n", p);
3483 kfree(p);
3484 goto out;
3485 }
3486 target->id_ext = cpu_to_be64(ull);
3487 kfree(p);
3488 break;
3489
3490 case SRP_OPT_IOC_GUID:
3491 p = match_strdup(args);
3492 if (!p) {
3493 ret = -ENOMEM;
3494 goto out;
3495 }
3496 ret = kstrtoull(p, 16, &ull);
3497 if (ret) {
3498 pr_warn("invalid ioc_guid parameter '%s'\n", p);
3499 kfree(p);
3500 goto out;
3501 }
3502 target->ioc_guid = cpu_to_be64(ull);
3503 kfree(p);
3504 break;
3505
3506 case SRP_OPT_DGID:
3507 p = match_strdup(args);
3508 if (!p) {
3509 ret = -ENOMEM;
3510 goto out;
3511 }
3512 if (strlen(p) != 32) {
3513 pr_warn("bad dest GID parameter '%s'\n", p);
3514 kfree(p);
3515 goto out;
3516 }
3517
3518 ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3519 kfree(p);
3520 if (ret < 0)
3521 goto out;
3522 break;
3523
3524 case SRP_OPT_PKEY:
3525 if (match_hex(args, &token)) {
3526 pr_warn("bad P_Key parameter '%s'\n", p);
3527 goto out;
3528 }
3529 target->ib_cm.pkey = cpu_to_be16(token);
3530 break;
3531
3532 case SRP_OPT_SERVICE_ID:
3533 p = match_strdup(args);
3534 if (!p) {
3535 ret = -ENOMEM;
3536 goto out;
3537 }
3538 ret = kstrtoull(p, 16, &ull);
3539 if (ret) {
3540 pr_warn("bad service_id parameter '%s'\n", p);
3541 kfree(p);
3542 goto out;
3543 }
3544 target->ib_cm.service_id = cpu_to_be64(ull);
3545 kfree(p);
3546 break;
3547
3548 case SRP_OPT_IP_SRC:
3549 p = match_strdup(args);
3550 if (!p) {
3551 ret = -ENOMEM;
3552 goto out;
3553 }
3554 ret = srp_parse_in(net, &target->rdma_cm.src.ss, p);
3555 if (ret < 0) {
3556 pr_warn("bad source parameter '%s'\n", p);
3557 kfree(p);
3558 goto out;
3559 }
3560 target->rdma_cm.src_specified = true;
3561 kfree(p);
3562 break;
3563
3564 case SRP_OPT_IP_DEST:
3565 p = match_strdup(args);
3566 if (!p) {
3567 ret = -ENOMEM;
3568 goto out;
3569 }
3570 ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p);
3571 if (ret < 0) {
3572 pr_warn("bad dest parameter '%s'\n", p);
3573 kfree(p);
3574 goto out;
3575 }
3576 target->using_rdma_cm = true;
3577 kfree(p);
3578 break;
3579
3580 case SRP_OPT_MAX_SECT:
3581 if (match_int(args, &token)) {
3582 pr_warn("bad max sect parameter '%s'\n", p);
3583 goto out;
3584 }
3585 target->scsi_host->max_sectors = token;
3586 break;
3587
3588 case SRP_OPT_QUEUE_SIZE:
3589 if (match_int(args, &token) || token < 1) {
3590 pr_warn("bad queue_size parameter '%s'\n", p);
3591 goto out;
3592 }
3593 target->scsi_host->can_queue = token;
3594 target->queue_size = token + SRP_RSP_SQ_SIZE +
3595 SRP_TSK_MGMT_SQ_SIZE;
3596 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3597 target->scsi_host->cmd_per_lun = token;
3598 break;
3599
3600 case SRP_OPT_MAX_CMD_PER_LUN:
3601 if (match_int(args, &token) || token < 1) {
3602 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3603 p);
3604 goto out;
3605 }
3606 target->scsi_host->cmd_per_lun = token;
3607 break;
3608
3609 case SRP_OPT_TARGET_CAN_QUEUE:
3610 if (match_int(args, &token) || token < 1) {
3611 pr_warn("bad max target_can_queue parameter '%s'\n",
3612 p);
3613 goto out;
3614 }
3615 target->target_can_queue = token;
3616 break;
3617
3618 case SRP_OPT_IO_CLASS:
3619 if (match_hex(args, &token)) {
3620 pr_warn("bad IO class parameter '%s'\n", p);
3621 goto out;
3622 }
3623 if (token != SRP_REV10_IB_IO_CLASS &&
3624 token != SRP_REV16A_IB_IO_CLASS) {
3625 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3626 token, SRP_REV10_IB_IO_CLASS,
3627 SRP_REV16A_IB_IO_CLASS);
3628 goto out;
3629 }
3630 target->io_class = token;
3631 break;
3632
3633 case SRP_OPT_INITIATOR_EXT:
3634 p = match_strdup(args);
3635 if (!p) {
3636 ret = -ENOMEM;
3637 goto out;
3638 }
3639 ret = kstrtoull(p, 16, &ull);
3640 if (ret) {
3641 pr_warn("bad initiator_ext value '%s'\n", p);
3642 kfree(p);
3643 goto out;
3644 }
3645 target->initiator_ext = cpu_to_be64(ull);
3646 kfree(p);
3647 break;
3648
3649 case SRP_OPT_CMD_SG_ENTRIES:
3650 if (match_int(args, &token) || token < 1 || token > 255) {
3651 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3652 p);
3653 goto out;
3654 }
3655 target->cmd_sg_cnt = token;
3656 break;
3657
3658 case SRP_OPT_ALLOW_EXT_SG:
3659 if (match_int(args, &token)) {
3660 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3661 goto out;
3662 }
3663 target->allow_ext_sg = !!token;
3664 break;
3665
3666 case SRP_OPT_SG_TABLESIZE:
3667 if (match_int(args, &token) || token < 1 ||
3668 token > SG_MAX_SEGMENTS) {
3669 pr_warn("bad max sg_tablesize parameter '%s'\n",
3670 p);
3671 goto out;
3672 }
3673 target->sg_tablesize = token;
3674 break;
3675
3676 case SRP_OPT_COMP_VECTOR:
3677 if (match_int(args, &token) || token < 0) {
3678 pr_warn("bad comp_vector parameter '%s'\n", p);
3679 goto out;
3680 }
3681 target->comp_vector = token;
3682 break;
3683
3684 case SRP_OPT_TL_RETRY_COUNT:
3685 if (match_int(args, &token) || token < 2 || token > 7) {
3686 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3687 p);
3688 goto out;
3689 }
3690 target->tl_retry_count = token;
3691 break;
3692
3693 default:
3694 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3695 p);
3696 goto out;
3697 }
3698 }
3699
3700 for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3701 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3702 ret = 0;
3703 break;
3704 }
3705 }
3706 if (ret)
3707 pr_warn("target creation request is missing one or more parameters\n");
3708
3709 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3710 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3711 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3712 target->scsi_host->cmd_per_lun,
3713 target->scsi_host->can_queue);
3714
3715 out:
3716 kfree(options);
3717 return ret;
3718 }
3719
3720 static ssize_t srp_create_target(struct device *dev,
3721 struct device_attribute *attr,
3722 const char *buf, size_t count)
3723 {
3724 struct srp_host *host =
3725 container_of(dev, struct srp_host, dev);
3726 struct Scsi_Host *target_host;
3727 struct srp_target_port *target;
3728 struct srp_rdma_ch *ch;
3729 struct srp_device *srp_dev = host->srp_dev;
3730 struct ib_device *ibdev = srp_dev->dev;
3731 int ret, node_idx, node, cpu, i;
3732 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3733 bool multich = false;
3734
3735 target_host = scsi_host_alloc(&srp_template,
3736 sizeof (struct srp_target_port));
3737 if (!target_host)
3738 return -ENOMEM;
3739
3740 target_host->transportt = ib_srp_transport_template;
3741 target_host->max_channel = 0;
3742 target_host->max_id = 1;
3743 target_host->max_lun = -1LL;
3744 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3745
3746 target = host_to_target(target_host);
3747
3748 target->net = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3749 target->io_class = SRP_REV16A_IB_IO_CLASS;
3750 target->scsi_host = target_host;
3751 target->srp_host = host;
3752 target->lkey = host->srp_dev->pd->local_dma_lkey;
3753 target->global_rkey = host->srp_dev->global_rkey;
3754 target->cmd_sg_cnt = cmd_sg_entries;
3755 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3756 target->allow_ext_sg = allow_ext_sg;
3757 target->tl_retry_count = 7;
3758 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3759
3760 /*
3761 * Avoid that the SCSI host can be removed by srp_remove_target()
3762 * before this function returns.
3763 */
3764 scsi_host_get(target->scsi_host);
3765
3766 ret = mutex_lock_interruptible(&host->add_target_mutex);
3767 if (ret < 0)
3768 goto put;
3769
3770 ret = srp_parse_options(target->net, buf, target);
3771 if (ret)
3772 goto out;
3773
3774 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3775
3776 if (!srp_conn_unique(target->srp_host, target)) {
3777 if (target->using_rdma_cm) {
3778 shost_printk(KERN_INFO, target->scsi_host,
3779 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3780 be64_to_cpu(target->id_ext),
3781 be64_to_cpu(target->ioc_guid),
3782 &target->rdma_cm.dst);
3783 } else {
3784 shost_printk(KERN_INFO, target->scsi_host,
3785 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3786 be64_to_cpu(target->id_ext),
3787 be64_to_cpu(target->ioc_guid),
3788 be64_to_cpu(target->initiator_ext));
3789 }
3790 ret = -EEXIST;
3791 goto out;
3792 }
3793
3794 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3795 target->cmd_sg_cnt < target->sg_tablesize) {
3796 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3797 target->sg_tablesize = target->cmd_sg_cnt;
3798 }
3799
3800 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3801 bool gaps_reg = (ibdev->attrs.device_cap_flags &
3802 IB_DEVICE_SG_GAPS_REG);
3803
3804 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3805 (ilog2(srp_dev->mr_page_size) - 9);
3806 if (!gaps_reg) {
3807 /*
3808 * FR and FMR can only map one HCA page per entry. If
3809 * the start address is not aligned on a HCA page
3810 * boundary two entries will be used for the head and
3811 * the tail although these two entries combined
3812 * contain at most one HCA page of data. Hence the "+
3813 * 1" in the calculation below.
3814 *
3815 * The indirect data buffer descriptor is contiguous
3816 * so the memory for that buffer will only be
3817 * registered if register_always is true. Hence add
3818 * one to mr_per_cmd if register_always has been set.
3819 */
3820 mr_per_cmd = register_always +
3821 (target->scsi_host->max_sectors + 1 +
3822 max_sectors_per_mr - 1) / max_sectors_per_mr;
3823 } else {
3824 mr_per_cmd = register_always +
3825 (target->sg_tablesize +
3826 srp_dev->max_pages_per_mr - 1) /
3827 srp_dev->max_pages_per_mr;
3828 }
3829 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3830 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3831 max_sectors_per_mr, mr_per_cmd);
3832 }
3833
3834 target_host->sg_tablesize = target->sg_tablesize;
3835 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3836 target->mr_per_cmd = mr_per_cmd;
3837 target->indirect_size = target->sg_tablesize *
3838 sizeof (struct srp_direct_buf);
3839 target->max_iu_len = sizeof (struct srp_cmd) +
3840 sizeof (struct srp_indirect_buf) +
3841 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3842
3843 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3844 INIT_WORK(&target->remove_work, srp_remove_work);
3845 spin_lock_init(&target->lock);
3846 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3847 if (ret)
3848 goto out;
3849
3850 ret = -ENOMEM;
3851 target->ch_count = max_t(unsigned, num_online_nodes(),
3852 min(ch_count ? :
3853 min(4 * num_online_nodes(),
3854 ibdev->num_comp_vectors),
3855 num_online_cpus()));
3856 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3857 GFP_KERNEL);
3858 if (!target->ch)
3859 goto out;
3860
3861 node_idx = 0;
3862 for_each_online_node(node) {
3863 const int ch_start = (node_idx * target->ch_count /
3864 num_online_nodes());
3865 const int ch_end = ((node_idx + 1) * target->ch_count /
3866 num_online_nodes());
3867 const int cv_start = node_idx * ibdev->num_comp_vectors /
3868 num_online_nodes();
3869 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3870 num_online_nodes();
3871 int cpu_idx = 0;
3872
3873 for_each_online_cpu(cpu) {
3874 if (cpu_to_node(cpu) != node)
3875 continue;
3876 if (ch_start + cpu_idx >= ch_end)
3877 continue;
3878 ch = &target->ch[ch_start + cpu_idx];
3879 ch->target = target;
3880 ch->comp_vector = cv_start == cv_end ? cv_start :
3881 cv_start + cpu_idx % (cv_end - cv_start);
3882 spin_lock_init(&ch->lock);
3883 INIT_LIST_HEAD(&ch->free_tx);
3884 ret = srp_new_cm_id(ch);
3885 if (ret)
3886 goto err_disconnect;
3887
3888 ret = srp_create_ch_ib(ch);
3889 if (ret)
3890 goto err_disconnect;
3891
3892 ret = srp_alloc_req_data(ch);
3893 if (ret)
3894 goto err_disconnect;
3895
3896 ret = srp_connect_ch(ch, multich);
3897 if (ret) {
3898 char dst[64];
3899
3900 if (target->using_rdma_cm)
3901 snprintf(dst, sizeof(dst), "%pIS",
3902 &target->rdma_cm.dst);
3903 else
3904 snprintf(dst, sizeof(dst), "%pI6",
3905 target->ib_cm.orig_dgid.raw);
3906 shost_printk(KERN_ERR, target->scsi_host,
3907 PFX "Connection %d/%d to %s failed\n",
3908 ch_start + cpu_idx,
3909 target->ch_count, dst);
3910 if (node_idx == 0 && cpu_idx == 0) {
3911 goto free_ch;
3912 } else {
3913 srp_free_ch_ib(target, ch);
3914 srp_free_req_data(target, ch);
3915 target->ch_count = ch - target->ch;
3916 goto connected;
3917 }
3918 }
3919
3920 multich = true;
3921 cpu_idx++;
3922 }
3923 node_idx++;
3924 }
3925
3926 connected:
3927 target->scsi_host->nr_hw_queues = target->ch_count;
3928
3929 ret = srp_add_target(host, target);
3930 if (ret)
3931 goto err_disconnect;
3932
3933 if (target->state != SRP_TARGET_REMOVED) {
3934 if (target->using_rdma_cm) {
3935 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3936 "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3937 be64_to_cpu(target->id_ext),
3938 be64_to_cpu(target->ioc_guid),
3939 target->sgid.raw, &target->rdma_cm.dst);
3940 } else {
3941 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3942 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3943 be64_to_cpu(target->id_ext),
3944 be64_to_cpu(target->ioc_guid),
3945 be16_to_cpu(target->ib_cm.pkey),
3946 be64_to_cpu(target->ib_cm.service_id),
3947 target->sgid.raw,
3948 target->ib_cm.orig_dgid.raw);
3949 }
3950 }
3951
3952 ret = count;
3953
3954 out:
3955 mutex_unlock(&host->add_target_mutex);
3956
3957 put:
3958 scsi_host_put(target->scsi_host);
3959 if (ret < 0) {
3960 /*
3961 * If a call to srp_remove_target() has not been scheduled,
3962 * drop the network namespace reference now that was obtained
3963 * earlier in this function.
3964 */
3965 if (target->state != SRP_TARGET_REMOVED)
3966 kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
3967 scsi_host_put(target->scsi_host);
3968 }
3969
3970 return ret;
3971
3972 err_disconnect:
3973 srp_disconnect_target(target);
3974
3975 free_ch:
3976 for (i = 0; i < target->ch_count; i++) {
3977 ch = &target->ch[i];
3978 srp_free_ch_ib(target, ch);
3979 srp_free_req_data(target, ch);
3980 }
3981
3982 kfree(target->ch);
3983 goto out;
3984 }
3985
3986 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3987
3988 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3989 char *buf)
3990 {
3991 struct srp_host *host = container_of(dev, struct srp_host, dev);
3992
3993 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3994 }
3995
3996 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3997
3998 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3999 char *buf)
4000 {
4001 struct srp_host *host = container_of(dev, struct srp_host, dev);
4002
4003 return sprintf(buf, "%d\n", host->port);
4004 }
4005
4006 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
4007
4008 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4009 {
4010 struct srp_host *host;
4011
4012 host = kzalloc(sizeof *host, GFP_KERNEL);
4013 if (!host)
4014 return NULL;
4015
4016 INIT_LIST_HEAD(&host->target_list);
4017 spin_lock_init(&host->target_lock);
4018 init_completion(&host->released);
4019 mutex_init(&host->add_target_mutex);
4020 host->srp_dev = device;
4021 host->port = port;
4022
4023 host->dev.class = &srp_class;
4024 host->dev.parent = device->dev->dev.parent;
4025 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
4026
4027 if (device_register(&host->dev))
4028 goto free_host;
4029 if (device_create_file(&host->dev, &dev_attr_add_target))
4030 goto err_class;
4031 if (device_create_file(&host->dev, &dev_attr_ibdev))
4032 goto err_class;
4033 if (device_create_file(&host->dev, &dev_attr_port))
4034 goto err_class;
4035
4036 return host;
4037
4038 err_class:
4039 device_unregister(&host->dev);
4040
4041 free_host:
4042 kfree(host);
4043
4044 return NULL;
4045 }
4046
4047 static void srp_add_one(struct ib_device *device)
4048 {
4049 struct srp_device *srp_dev;
4050 struct ib_device_attr *attr = &device->attrs;
4051 struct srp_host *host;
4052 int mr_page_shift, p;
4053 u64 max_pages_per_mr;
4054 unsigned int flags = 0;
4055
4056 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4057 if (!srp_dev)
4058 return;
4059
4060 /*
4061 * Use the smallest page size supported by the HCA, down to a
4062 * minimum of 4096 bytes. We're unlikely to build large sglists
4063 * out of smaller entries.
4064 */
4065 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
4066 srp_dev->mr_page_size = 1 << mr_page_shift;
4067 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
4068 max_pages_per_mr = attr->max_mr_size;
4069 do_div(max_pages_per_mr, srp_dev->mr_page_size);
4070 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4071 attr->max_mr_size, srp_dev->mr_page_size,
4072 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4073 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4074 max_pages_per_mr);
4075
4076 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
4077 device->map_phys_fmr && device->unmap_fmr);
4078 srp_dev->has_fr = (attr->device_cap_flags &
4079 IB_DEVICE_MEM_MGT_EXTENSIONS);
4080 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
4081 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
4082 } else if (!never_register &&
4083 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
4084 srp_dev->use_fast_reg = (srp_dev->has_fr &&
4085 (!srp_dev->has_fmr || prefer_fr));
4086 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
4087 }
4088
4089 if (never_register || !register_always ||
4090 (!srp_dev->has_fmr && !srp_dev->has_fr))
4091 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4092
4093 if (srp_dev->use_fast_reg) {
4094 srp_dev->max_pages_per_mr =
4095 min_t(u32, srp_dev->max_pages_per_mr,
4096 attr->max_fast_reg_page_list_len);
4097 }
4098 srp_dev->mr_max_size = srp_dev->mr_page_size *
4099 srp_dev->max_pages_per_mr;
4100 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4101 device->name, mr_page_shift, attr->max_mr_size,
4102 attr->max_fast_reg_page_list_len,
4103 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4104
4105 INIT_LIST_HEAD(&srp_dev->dev_list);
4106
4107 srp_dev->dev = device;
4108 srp_dev->pd = ib_alloc_pd(device, flags);
4109 if (IS_ERR(srp_dev->pd))
4110 goto free_dev;
4111
4112 if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4113 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4114 WARN_ON_ONCE(srp_dev->global_rkey == 0);
4115 }
4116
4117 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
4118 host = srp_add_port(srp_dev, p);
4119 if (host)
4120 list_add_tail(&host->list, &srp_dev->dev_list);
4121 }
4122
4123 ib_set_client_data(device, &srp_client, srp_dev);
4124 return;
4125
4126 free_dev:
4127 kfree(srp_dev);
4128 }
4129
4130 static void srp_remove_one(struct ib_device *device, void *client_data)
4131 {
4132 struct srp_device *srp_dev;
4133 struct srp_host *host, *tmp_host;
4134 struct srp_target_port *target;
4135
4136 srp_dev = client_data;
4137 if (!srp_dev)
4138 return;
4139
4140 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4141 device_unregister(&host->dev);
4142 /*
4143 * Wait for the sysfs entry to go away, so that no new
4144 * target ports can be created.
4145 */
4146 wait_for_completion(&host->released);
4147
4148 /*
4149 * Remove all target ports.
4150 */
4151 spin_lock(&host->target_lock);
4152 list_for_each_entry(target, &host->target_list, list)
4153 srp_queue_remove_work(target);
4154 spin_unlock(&host->target_lock);
4155
4156 /*
4157 * Wait for tl_err and target port removal tasks.
4158 */
4159 flush_workqueue(system_long_wq);
4160 flush_workqueue(srp_remove_wq);
4161
4162 kfree(host);
4163 }
4164
4165 ib_dealloc_pd(srp_dev->pd);
4166
4167 kfree(srp_dev);
4168 }
4169
4170 static struct srp_function_template ib_srp_transport_functions = {
4171 .has_rport_state = true,
4172 .reset_timer_if_blocked = true,
4173 .reconnect_delay = &srp_reconnect_delay,
4174 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
4175 .dev_loss_tmo = &srp_dev_loss_tmo,
4176 .reconnect = srp_rport_reconnect,
4177 .rport_delete = srp_rport_delete,
4178 .terminate_rport_io = srp_terminate_io,
4179 };
4180
4181 static int __init srp_init_module(void)
4182 {
4183 int ret;
4184
4185 if (srp_sg_tablesize) {
4186 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4187 if (!cmd_sg_entries)
4188 cmd_sg_entries = srp_sg_tablesize;
4189 }
4190
4191 if (!cmd_sg_entries)
4192 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4193
4194 if (cmd_sg_entries > 255) {
4195 pr_warn("Clamping cmd_sg_entries to 255\n");
4196 cmd_sg_entries = 255;
4197 }
4198
4199 if (!indirect_sg_entries)
4200 indirect_sg_entries = cmd_sg_entries;
4201 else if (indirect_sg_entries < cmd_sg_entries) {
4202 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4203 cmd_sg_entries);
4204 indirect_sg_entries = cmd_sg_entries;
4205 }
4206
4207 if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4208 pr_warn("Clamping indirect_sg_entries to %u\n",
4209 SG_MAX_SEGMENTS);
4210 indirect_sg_entries = SG_MAX_SEGMENTS;
4211 }
4212
4213 srp_remove_wq = create_workqueue("srp_remove");
4214 if (!srp_remove_wq) {
4215 ret = -ENOMEM;
4216 goto out;
4217 }
4218
4219 ret = -ENOMEM;
4220 ib_srp_transport_template =
4221 srp_attach_transport(&ib_srp_transport_functions);
4222 if (!ib_srp_transport_template)
4223 goto destroy_wq;
4224
4225 ret = class_register(&srp_class);
4226 if (ret) {
4227 pr_err("couldn't register class infiniband_srp\n");
4228 goto release_tr;
4229 }
4230
4231 ib_sa_register_client(&srp_sa_client);
4232
4233 ret = ib_register_client(&srp_client);
4234 if (ret) {
4235 pr_err("couldn't register IB client\n");
4236 goto unreg_sa;
4237 }
4238
4239 out:
4240 return ret;
4241
4242 unreg_sa:
4243 ib_sa_unregister_client(&srp_sa_client);
4244 class_unregister(&srp_class);
4245
4246 release_tr:
4247 srp_release_transport(ib_srp_transport_template);
4248
4249 destroy_wq:
4250 destroy_workqueue(srp_remove_wq);
4251 goto out;
4252 }
4253
4254 static void __exit srp_cleanup_module(void)
4255 {
4256 ib_unregister_client(&srp_client);
4257 ib_sa_unregister_client(&srp_sa_client);
4258 class_unregister(&srp_class);
4259 srp_release_transport(ib_srp_transport_template);
4260 destroy_workqueue(srp_remove_wq);
4261 }
4262
4263 module_init(srp_init_module);
4264 module_exit(srp_cleanup_module);