]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blob - drivers/infiniband/ulp/srp/ib_srp.c
Merge tag 'media/v4.10-2' of git://git.kernel.org/pub/scm/linux/kernel/git/mchehab...
[mirror_ubuntu-artful-kernel.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2 * Copyright (c) 2005 Cisco Systems. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <rdma/ib_cache.h>
44
45 #include <linux/atomic.h>
46
47 #include <scsi/scsi.h>
48 #include <scsi/scsi_device.h>
49 #include <scsi/scsi_dbg.h>
50 #include <scsi/scsi_tcq.h>
51 #include <scsi/srp.h>
52 #include <scsi/scsi_transport_srp.h>
53
54 #include "ib_srp.h"
55
56 #define DRV_NAME "ib_srp"
57 #define PFX DRV_NAME ": "
58 #define DRV_VERSION "2.0"
59 #define DRV_RELDATE "July 26, 2015"
60
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64 MODULE_VERSION(DRV_VERSION);
65 MODULE_INFO(release_date, DRV_RELDATE);
66
67 #if !defined(CONFIG_DYNAMIC_DEBUG)
68 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
69 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
70 #endif
71
72 static unsigned int srp_sg_tablesize;
73 static unsigned int cmd_sg_entries;
74 static unsigned int indirect_sg_entries;
75 static bool allow_ext_sg;
76 static bool prefer_fr = true;
77 static bool register_always = true;
78 static bool never_register;
79 static int topspin_workarounds = 1;
80
81 module_param(srp_sg_tablesize, uint, 0444);
82 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
83
84 module_param(cmd_sg_entries, uint, 0444);
85 MODULE_PARM_DESC(cmd_sg_entries,
86 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
87
88 module_param(indirect_sg_entries, uint, 0444);
89 MODULE_PARM_DESC(indirect_sg_entries,
90 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
91
92 module_param(allow_ext_sg, bool, 0444);
93 MODULE_PARM_DESC(allow_ext_sg,
94 "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
95
96 module_param(topspin_workarounds, int, 0444);
97 MODULE_PARM_DESC(topspin_workarounds,
98 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
99
100 module_param(prefer_fr, bool, 0444);
101 MODULE_PARM_DESC(prefer_fr,
102 "Whether to use fast registration if both FMR and fast registration are supported");
103
104 module_param(register_always, bool, 0444);
105 MODULE_PARM_DESC(register_always,
106 "Use memory registration even for contiguous memory regions");
107
108 module_param(never_register, bool, 0444);
109 MODULE_PARM_DESC(never_register, "Never register memory");
110
111 static const struct kernel_param_ops srp_tmo_ops;
112
113 static int srp_reconnect_delay = 10;
114 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
115 S_IRUGO | S_IWUSR);
116 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
117
118 static int srp_fast_io_fail_tmo = 15;
119 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
120 S_IRUGO | S_IWUSR);
121 MODULE_PARM_DESC(fast_io_fail_tmo,
122 "Number of seconds between the observation of a transport"
123 " layer error and failing all I/O. \"off\" means that this"
124 " functionality is disabled.");
125
126 static int srp_dev_loss_tmo = 600;
127 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
128 S_IRUGO | S_IWUSR);
129 MODULE_PARM_DESC(dev_loss_tmo,
130 "Maximum number of seconds that the SRP transport should"
131 " insulate transport layer errors. After this time has been"
132 " exceeded the SCSI host is removed. Should be"
133 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
134 " if fast_io_fail_tmo has not been set. \"off\" means that"
135 " this functionality is disabled.");
136
137 static unsigned ch_count;
138 module_param(ch_count, uint, 0444);
139 MODULE_PARM_DESC(ch_count,
140 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
141
142 static void srp_add_one(struct ib_device *device);
143 static void srp_remove_one(struct ib_device *device, void *client_data);
144 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
145 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
146 const char *opname);
147 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event);
148
149 static struct scsi_transport_template *ib_srp_transport_template;
150 static struct workqueue_struct *srp_remove_wq;
151
152 static struct ib_client srp_client = {
153 .name = "srp",
154 .add = srp_add_one,
155 .remove = srp_remove_one
156 };
157
158 static struct ib_sa_client srp_sa_client;
159
160 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
161 {
162 int tmo = *(int *)kp->arg;
163
164 if (tmo >= 0)
165 return sprintf(buffer, "%d", tmo);
166 else
167 return sprintf(buffer, "off");
168 }
169
170 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
171 {
172 int tmo, res;
173
174 res = srp_parse_tmo(&tmo, val);
175 if (res)
176 goto out;
177
178 if (kp->arg == &srp_reconnect_delay)
179 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
180 srp_dev_loss_tmo);
181 else if (kp->arg == &srp_fast_io_fail_tmo)
182 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
183 else
184 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
185 tmo);
186 if (res)
187 goto out;
188 *(int *)kp->arg = tmo;
189
190 out:
191 return res;
192 }
193
194 static const struct kernel_param_ops srp_tmo_ops = {
195 .get = srp_tmo_get,
196 .set = srp_tmo_set,
197 };
198
199 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
200 {
201 return (struct srp_target_port *) host->hostdata;
202 }
203
204 static const char *srp_target_info(struct Scsi_Host *host)
205 {
206 return host_to_target(host)->target_name;
207 }
208
209 static int srp_target_is_topspin(struct srp_target_port *target)
210 {
211 static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
212 static const u8 cisco_oui[3] = { 0x00, 0x1b, 0x0d };
213
214 return topspin_workarounds &&
215 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
216 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
217 }
218
219 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
220 gfp_t gfp_mask,
221 enum dma_data_direction direction)
222 {
223 struct srp_iu *iu;
224
225 iu = kmalloc(sizeof *iu, gfp_mask);
226 if (!iu)
227 goto out;
228
229 iu->buf = kzalloc(size, gfp_mask);
230 if (!iu->buf)
231 goto out_free_iu;
232
233 iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
234 direction);
235 if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
236 goto out_free_buf;
237
238 iu->size = size;
239 iu->direction = direction;
240
241 return iu;
242
243 out_free_buf:
244 kfree(iu->buf);
245 out_free_iu:
246 kfree(iu);
247 out:
248 return NULL;
249 }
250
251 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
252 {
253 if (!iu)
254 return;
255
256 ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
257 iu->direction);
258 kfree(iu->buf);
259 kfree(iu);
260 }
261
262 static void srp_qp_event(struct ib_event *event, void *context)
263 {
264 pr_debug("QP event %s (%d)\n",
265 ib_event_msg(event->event), event->event);
266 }
267
268 static int srp_init_qp(struct srp_target_port *target,
269 struct ib_qp *qp)
270 {
271 struct ib_qp_attr *attr;
272 int ret;
273
274 attr = kmalloc(sizeof *attr, GFP_KERNEL);
275 if (!attr)
276 return -ENOMEM;
277
278 ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
279 target->srp_host->port,
280 be16_to_cpu(target->pkey),
281 &attr->pkey_index);
282 if (ret)
283 goto out;
284
285 attr->qp_state = IB_QPS_INIT;
286 attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
287 IB_ACCESS_REMOTE_WRITE);
288 attr->port_num = target->srp_host->port;
289
290 ret = ib_modify_qp(qp, attr,
291 IB_QP_STATE |
292 IB_QP_PKEY_INDEX |
293 IB_QP_ACCESS_FLAGS |
294 IB_QP_PORT);
295
296 out:
297 kfree(attr);
298 return ret;
299 }
300
301 static int srp_new_cm_id(struct srp_rdma_ch *ch)
302 {
303 struct srp_target_port *target = ch->target;
304 struct ib_cm_id *new_cm_id;
305
306 new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
307 srp_cm_handler, ch);
308 if (IS_ERR(new_cm_id))
309 return PTR_ERR(new_cm_id);
310
311 if (ch->cm_id)
312 ib_destroy_cm_id(ch->cm_id);
313 ch->cm_id = new_cm_id;
314 ch->path.sgid = target->sgid;
315 ch->path.dgid = target->orig_dgid;
316 ch->path.pkey = target->pkey;
317 ch->path.service_id = target->service_id;
318
319 return 0;
320 }
321
322 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
323 {
324 struct srp_device *dev = target->srp_host->srp_dev;
325 struct ib_fmr_pool_param fmr_param;
326
327 memset(&fmr_param, 0, sizeof(fmr_param));
328 fmr_param.pool_size = target->mr_pool_size;
329 fmr_param.dirty_watermark = fmr_param.pool_size / 4;
330 fmr_param.cache = 1;
331 fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
332 fmr_param.page_shift = ilog2(dev->mr_page_size);
333 fmr_param.access = (IB_ACCESS_LOCAL_WRITE |
334 IB_ACCESS_REMOTE_WRITE |
335 IB_ACCESS_REMOTE_READ);
336
337 return ib_create_fmr_pool(dev->pd, &fmr_param);
338 }
339
340 /**
341 * srp_destroy_fr_pool() - free the resources owned by a pool
342 * @pool: Fast registration pool to be destroyed.
343 */
344 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
345 {
346 int i;
347 struct srp_fr_desc *d;
348
349 if (!pool)
350 return;
351
352 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
353 if (d->mr)
354 ib_dereg_mr(d->mr);
355 }
356 kfree(pool);
357 }
358
359 /**
360 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
361 * @device: IB device to allocate fast registration descriptors for.
362 * @pd: Protection domain associated with the FR descriptors.
363 * @pool_size: Number of descriptors to allocate.
364 * @max_page_list_len: Maximum fast registration work request page list length.
365 */
366 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
367 struct ib_pd *pd, int pool_size,
368 int max_page_list_len)
369 {
370 struct srp_fr_pool *pool;
371 struct srp_fr_desc *d;
372 struct ib_mr *mr;
373 int i, ret = -EINVAL;
374
375 if (pool_size <= 0)
376 goto err;
377 ret = -ENOMEM;
378 pool = kzalloc(sizeof(struct srp_fr_pool) +
379 pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
380 if (!pool)
381 goto err;
382 pool->size = pool_size;
383 pool->max_page_list_len = max_page_list_len;
384 spin_lock_init(&pool->lock);
385 INIT_LIST_HEAD(&pool->free_list);
386
387 for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
388 mr = ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG,
389 max_page_list_len);
390 if (IS_ERR(mr)) {
391 ret = PTR_ERR(mr);
392 if (ret == -ENOMEM)
393 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
394 dev_name(&device->dev));
395 goto destroy_pool;
396 }
397 d->mr = mr;
398 list_add_tail(&d->entry, &pool->free_list);
399 }
400
401 out:
402 return pool;
403
404 destroy_pool:
405 srp_destroy_fr_pool(pool);
406
407 err:
408 pool = ERR_PTR(ret);
409 goto out;
410 }
411
412 /**
413 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
414 * @pool: Pool to obtain descriptor from.
415 */
416 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
417 {
418 struct srp_fr_desc *d = NULL;
419 unsigned long flags;
420
421 spin_lock_irqsave(&pool->lock, flags);
422 if (!list_empty(&pool->free_list)) {
423 d = list_first_entry(&pool->free_list, typeof(*d), entry);
424 list_del(&d->entry);
425 }
426 spin_unlock_irqrestore(&pool->lock, flags);
427
428 return d;
429 }
430
431 /**
432 * srp_fr_pool_put() - put an FR descriptor back in the free list
433 * @pool: Pool the descriptor was allocated from.
434 * @desc: Pointer to an array of fast registration descriptor pointers.
435 * @n: Number of descriptors to put back.
436 *
437 * Note: The caller must already have queued an invalidation request for
438 * desc->mr->rkey before calling this function.
439 */
440 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
441 int n)
442 {
443 unsigned long flags;
444 int i;
445
446 spin_lock_irqsave(&pool->lock, flags);
447 for (i = 0; i < n; i++)
448 list_add(&desc[i]->entry, &pool->free_list);
449 spin_unlock_irqrestore(&pool->lock, flags);
450 }
451
452 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
453 {
454 struct srp_device *dev = target->srp_host->srp_dev;
455
456 return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
457 dev->max_pages_per_mr);
458 }
459
460 /**
461 * srp_destroy_qp() - destroy an RDMA queue pair
462 * @qp: RDMA queue pair.
463 *
464 * Drain the qp before destroying it. This avoids that the receive
465 * completion handler can access the queue pair while it is
466 * being destroyed.
467 */
468 static void srp_destroy_qp(struct ib_qp *qp)
469 {
470 ib_drain_rq(qp);
471 ib_destroy_qp(qp);
472 }
473
474 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
475 {
476 struct srp_target_port *target = ch->target;
477 struct srp_device *dev = target->srp_host->srp_dev;
478 struct ib_qp_init_attr *init_attr;
479 struct ib_cq *recv_cq, *send_cq;
480 struct ib_qp *qp;
481 struct ib_fmr_pool *fmr_pool = NULL;
482 struct srp_fr_pool *fr_pool = NULL;
483 const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
484 int ret;
485
486 init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
487 if (!init_attr)
488 return -ENOMEM;
489
490 /* queue_size + 1 for ib_drain_rq() */
491 recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
492 ch->comp_vector, IB_POLL_SOFTIRQ);
493 if (IS_ERR(recv_cq)) {
494 ret = PTR_ERR(recv_cq);
495 goto err;
496 }
497
498 send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
499 ch->comp_vector, IB_POLL_DIRECT);
500 if (IS_ERR(send_cq)) {
501 ret = PTR_ERR(send_cq);
502 goto err_recv_cq;
503 }
504
505 init_attr->event_handler = srp_qp_event;
506 init_attr->cap.max_send_wr = m * target->queue_size;
507 init_attr->cap.max_recv_wr = target->queue_size + 1;
508 init_attr->cap.max_recv_sge = 1;
509 init_attr->cap.max_send_sge = 1;
510 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
511 init_attr->qp_type = IB_QPT_RC;
512 init_attr->send_cq = send_cq;
513 init_attr->recv_cq = recv_cq;
514
515 qp = ib_create_qp(dev->pd, init_attr);
516 if (IS_ERR(qp)) {
517 ret = PTR_ERR(qp);
518 goto err_send_cq;
519 }
520
521 ret = srp_init_qp(target, qp);
522 if (ret)
523 goto err_qp;
524
525 if (dev->use_fast_reg) {
526 fr_pool = srp_alloc_fr_pool(target);
527 if (IS_ERR(fr_pool)) {
528 ret = PTR_ERR(fr_pool);
529 shost_printk(KERN_WARNING, target->scsi_host, PFX
530 "FR pool allocation failed (%d)\n", ret);
531 goto err_qp;
532 }
533 } else if (dev->use_fmr) {
534 fmr_pool = srp_alloc_fmr_pool(target);
535 if (IS_ERR(fmr_pool)) {
536 ret = PTR_ERR(fmr_pool);
537 shost_printk(KERN_WARNING, target->scsi_host, PFX
538 "FMR pool allocation failed (%d)\n", ret);
539 goto err_qp;
540 }
541 }
542
543 if (ch->qp)
544 srp_destroy_qp(ch->qp);
545 if (ch->recv_cq)
546 ib_free_cq(ch->recv_cq);
547 if (ch->send_cq)
548 ib_free_cq(ch->send_cq);
549
550 ch->qp = qp;
551 ch->recv_cq = recv_cq;
552 ch->send_cq = send_cq;
553
554 if (dev->use_fast_reg) {
555 if (ch->fr_pool)
556 srp_destroy_fr_pool(ch->fr_pool);
557 ch->fr_pool = fr_pool;
558 } else if (dev->use_fmr) {
559 if (ch->fmr_pool)
560 ib_destroy_fmr_pool(ch->fmr_pool);
561 ch->fmr_pool = fmr_pool;
562 }
563
564 kfree(init_attr);
565 return 0;
566
567 err_qp:
568 srp_destroy_qp(qp);
569
570 err_send_cq:
571 ib_free_cq(send_cq);
572
573 err_recv_cq:
574 ib_free_cq(recv_cq);
575
576 err:
577 kfree(init_attr);
578 return ret;
579 }
580
581 /*
582 * Note: this function may be called without srp_alloc_iu_bufs() having been
583 * invoked. Hence the ch->[rt]x_ring checks.
584 */
585 static void srp_free_ch_ib(struct srp_target_port *target,
586 struct srp_rdma_ch *ch)
587 {
588 struct srp_device *dev = target->srp_host->srp_dev;
589 int i;
590
591 if (!ch->target)
592 return;
593
594 if (ch->cm_id) {
595 ib_destroy_cm_id(ch->cm_id);
596 ch->cm_id = NULL;
597 }
598
599 /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
600 if (!ch->qp)
601 return;
602
603 if (dev->use_fast_reg) {
604 if (ch->fr_pool)
605 srp_destroy_fr_pool(ch->fr_pool);
606 } else if (dev->use_fmr) {
607 if (ch->fmr_pool)
608 ib_destroy_fmr_pool(ch->fmr_pool);
609 }
610
611 srp_destroy_qp(ch->qp);
612 ib_free_cq(ch->send_cq);
613 ib_free_cq(ch->recv_cq);
614
615 /*
616 * Avoid that the SCSI error handler tries to use this channel after
617 * it has been freed. The SCSI error handler can namely continue
618 * trying to perform recovery actions after scsi_remove_host()
619 * returned.
620 */
621 ch->target = NULL;
622
623 ch->qp = NULL;
624 ch->send_cq = ch->recv_cq = NULL;
625
626 if (ch->rx_ring) {
627 for (i = 0; i < target->queue_size; ++i)
628 srp_free_iu(target->srp_host, ch->rx_ring[i]);
629 kfree(ch->rx_ring);
630 ch->rx_ring = NULL;
631 }
632 if (ch->tx_ring) {
633 for (i = 0; i < target->queue_size; ++i)
634 srp_free_iu(target->srp_host, ch->tx_ring[i]);
635 kfree(ch->tx_ring);
636 ch->tx_ring = NULL;
637 }
638 }
639
640 static void srp_path_rec_completion(int status,
641 struct ib_sa_path_rec *pathrec,
642 void *ch_ptr)
643 {
644 struct srp_rdma_ch *ch = ch_ptr;
645 struct srp_target_port *target = ch->target;
646
647 ch->status = status;
648 if (status)
649 shost_printk(KERN_ERR, target->scsi_host,
650 PFX "Got failed path rec status %d\n", status);
651 else
652 ch->path = *pathrec;
653 complete(&ch->done);
654 }
655
656 static int srp_lookup_path(struct srp_rdma_ch *ch)
657 {
658 struct srp_target_port *target = ch->target;
659 int ret;
660
661 ch->path.numb_path = 1;
662
663 init_completion(&ch->done);
664
665 ch->path_query_id = ib_sa_path_rec_get(&srp_sa_client,
666 target->srp_host->srp_dev->dev,
667 target->srp_host->port,
668 &ch->path,
669 IB_SA_PATH_REC_SERVICE_ID |
670 IB_SA_PATH_REC_DGID |
671 IB_SA_PATH_REC_SGID |
672 IB_SA_PATH_REC_NUMB_PATH |
673 IB_SA_PATH_REC_PKEY,
674 SRP_PATH_REC_TIMEOUT_MS,
675 GFP_KERNEL,
676 srp_path_rec_completion,
677 ch, &ch->path_query);
678 if (ch->path_query_id < 0)
679 return ch->path_query_id;
680
681 ret = wait_for_completion_interruptible(&ch->done);
682 if (ret < 0)
683 return ret;
684
685 if (ch->status < 0)
686 shost_printk(KERN_WARNING, target->scsi_host,
687 PFX "Path record query failed\n");
688
689 return ch->status;
690 }
691
692 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
693 {
694 struct srp_target_port *target = ch->target;
695 struct {
696 struct ib_cm_req_param param;
697 struct srp_login_req priv;
698 } *req = NULL;
699 int status;
700
701 req = kzalloc(sizeof *req, GFP_KERNEL);
702 if (!req)
703 return -ENOMEM;
704
705 req->param.primary_path = &ch->path;
706 req->param.alternate_path = NULL;
707 req->param.service_id = target->service_id;
708 req->param.qp_num = ch->qp->qp_num;
709 req->param.qp_type = ch->qp->qp_type;
710 req->param.private_data = &req->priv;
711 req->param.private_data_len = sizeof req->priv;
712 req->param.flow_control = 1;
713
714 get_random_bytes(&req->param.starting_psn, 4);
715 req->param.starting_psn &= 0xffffff;
716
717 /*
718 * Pick some arbitrary defaults here; we could make these
719 * module parameters if anyone cared about setting them.
720 */
721 req->param.responder_resources = 4;
722 req->param.remote_cm_response_timeout = 20;
723 req->param.local_cm_response_timeout = 20;
724 req->param.retry_count = target->tl_retry_count;
725 req->param.rnr_retry_count = 7;
726 req->param.max_cm_retries = 15;
727
728 req->priv.opcode = SRP_LOGIN_REQ;
729 req->priv.tag = 0;
730 req->priv.req_it_iu_len = cpu_to_be32(target->max_iu_len);
731 req->priv.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
732 SRP_BUF_FORMAT_INDIRECT);
733 req->priv.req_flags = (multich ? SRP_MULTICHAN_MULTI :
734 SRP_MULTICHAN_SINGLE);
735 /*
736 * In the published SRP specification (draft rev. 16a), the
737 * port identifier format is 8 bytes of ID extension followed
738 * by 8 bytes of GUID. Older drafts put the two halves in the
739 * opposite order, so that the GUID comes first.
740 *
741 * Targets conforming to these obsolete drafts can be
742 * recognized by the I/O Class they report.
743 */
744 if (target->io_class == SRP_REV10_IB_IO_CLASS) {
745 memcpy(req->priv.initiator_port_id,
746 &target->sgid.global.interface_id, 8);
747 memcpy(req->priv.initiator_port_id + 8,
748 &target->initiator_ext, 8);
749 memcpy(req->priv.target_port_id, &target->ioc_guid, 8);
750 memcpy(req->priv.target_port_id + 8, &target->id_ext, 8);
751 } else {
752 memcpy(req->priv.initiator_port_id,
753 &target->initiator_ext, 8);
754 memcpy(req->priv.initiator_port_id + 8,
755 &target->sgid.global.interface_id, 8);
756 memcpy(req->priv.target_port_id, &target->id_ext, 8);
757 memcpy(req->priv.target_port_id + 8, &target->ioc_guid, 8);
758 }
759
760 /*
761 * Topspin/Cisco SRP targets will reject our login unless we
762 * zero out the first 8 bytes of our initiator port ID and set
763 * the second 8 bytes to the local node GUID.
764 */
765 if (srp_target_is_topspin(target)) {
766 shost_printk(KERN_DEBUG, target->scsi_host,
767 PFX "Topspin/Cisco initiator port ID workaround "
768 "activated for target GUID %016llx\n",
769 be64_to_cpu(target->ioc_guid));
770 memset(req->priv.initiator_port_id, 0, 8);
771 memcpy(req->priv.initiator_port_id + 8,
772 &target->srp_host->srp_dev->dev->node_guid, 8);
773 }
774
775 status = ib_send_cm_req(ch->cm_id, &req->param);
776
777 kfree(req);
778
779 return status;
780 }
781
782 static bool srp_queue_remove_work(struct srp_target_port *target)
783 {
784 bool changed = false;
785
786 spin_lock_irq(&target->lock);
787 if (target->state != SRP_TARGET_REMOVED) {
788 target->state = SRP_TARGET_REMOVED;
789 changed = true;
790 }
791 spin_unlock_irq(&target->lock);
792
793 if (changed)
794 queue_work(srp_remove_wq, &target->remove_work);
795
796 return changed;
797 }
798
799 static void srp_disconnect_target(struct srp_target_port *target)
800 {
801 struct srp_rdma_ch *ch;
802 int i;
803
804 /* XXX should send SRP_I_LOGOUT request */
805
806 for (i = 0; i < target->ch_count; i++) {
807 ch = &target->ch[i];
808 ch->connected = false;
809 if (ch->cm_id && ib_send_cm_dreq(ch->cm_id, NULL, 0)) {
810 shost_printk(KERN_DEBUG, target->scsi_host,
811 PFX "Sending CM DREQ failed\n");
812 }
813 }
814 }
815
816 static void srp_free_req_data(struct srp_target_port *target,
817 struct srp_rdma_ch *ch)
818 {
819 struct srp_device *dev = target->srp_host->srp_dev;
820 struct ib_device *ibdev = dev->dev;
821 struct srp_request *req;
822 int i;
823
824 if (!ch->req_ring)
825 return;
826
827 for (i = 0; i < target->req_ring_size; ++i) {
828 req = &ch->req_ring[i];
829 if (dev->use_fast_reg) {
830 kfree(req->fr_list);
831 } else {
832 kfree(req->fmr_list);
833 kfree(req->map_page);
834 }
835 if (req->indirect_dma_addr) {
836 ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
837 target->indirect_size,
838 DMA_TO_DEVICE);
839 }
840 kfree(req->indirect_desc);
841 }
842
843 kfree(ch->req_ring);
844 ch->req_ring = NULL;
845 }
846
847 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
848 {
849 struct srp_target_port *target = ch->target;
850 struct srp_device *srp_dev = target->srp_host->srp_dev;
851 struct ib_device *ibdev = srp_dev->dev;
852 struct srp_request *req;
853 void *mr_list;
854 dma_addr_t dma_addr;
855 int i, ret = -ENOMEM;
856
857 ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
858 GFP_KERNEL);
859 if (!ch->req_ring)
860 goto out;
861
862 for (i = 0; i < target->req_ring_size; ++i) {
863 req = &ch->req_ring[i];
864 mr_list = kmalloc(target->mr_per_cmd * sizeof(void *),
865 GFP_KERNEL);
866 if (!mr_list)
867 goto out;
868 if (srp_dev->use_fast_reg) {
869 req->fr_list = mr_list;
870 } else {
871 req->fmr_list = mr_list;
872 req->map_page = kmalloc(srp_dev->max_pages_per_mr *
873 sizeof(void *), GFP_KERNEL);
874 if (!req->map_page)
875 goto out;
876 }
877 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
878 if (!req->indirect_desc)
879 goto out;
880
881 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
882 target->indirect_size,
883 DMA_TO_DEVICE);
884 if (ib_dma_mapping_error(ibdev, dma_addr))
885 goto out;
886
887 req->indirect_dma_addr = dma_addr;
888 }
889 ret = 0;
890
891 out:
892 return ret;
893 }
894
895 /**
896 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
897 * @shost: SCSI host whose attributes to remove from sysfs.
898 *
899 * Note: Any attributes defined in the host template and that did not exist
900 * before invocation of this function will be ignored.
901 */
902 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
903 {
904 struct device_attribute **attr;
905
906 for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
907 device_remove_file(&shost->shost_dev, *attr);
908 }
909
910 static void srp_remove_target(struct srp_target_port *target)
911 {
912 struct srp_rdma_ch *ch;
913 int i;
914
915 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
916
917 srp_del_scsi_host_attr(target->scsi_host);
918 srp_rport_get(target->rport);
919 srp_remove_host(target->scsi_host);
920 scsi_remove_host(target->scsi_host);
921 srp_stop_rport_timers(target->rport);
922 srp_disconnect_target(target);
923 for (i = 0; i < target->ch_count; i++) {
924 ch = &target->ch[i];
925 srp_free_ch_ib(target, ch);
926 }
927 cancel_work_sync(&target->tl_err_work);
928 srp_rport_put(target->rport);
929 for (i = 0; i < target->ch_count; i++) {
930 ch = &target->ch[i];
931 srp_free_req_data(target, ch);
932 }
933 kfree(target->ch);
934 target->ch = NULL;
935
936 spin_lock(&target->srp_host->target_lock);
937 list_del(&target->list);
938 spin_unlock(&target->srp_host->target_lock);
939
940 scsi_host_put(target->scsi_host);
941 }
942
943 static void srp_remove_work(struct work_struct *work)
944 {
945 struct srp_target_port *target =
946 container_of(work, struct srp_target_port, remove_work);
947
948 WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
949
950 srp_remove_target(target);
951 }
952
953 static void srp_rport_delete(struct srp_rport *rport)
954 {
955 struct srp_target_port *target = rport->lld_data;
956
957 srp_queue_remove_work(target);
958 }
959
960 /**
961 * srp_connected_ch() - number of connected channels
962 * @target: SRP target port.
963 */
964 static int srp_connected_ch(struct srp_target_port *target)
965 {
966 int i, c = 0;
967
968 for (i = 0; i < target->ch_count; i++)
969 c += target->ch[i].connected;
970
971 return c;
972 }
973
974 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
975 {
976 struct srp_target_port *target = ch->target;
977 int ret;
978
979 WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
980
981 ret = srp_lookup_path(ch);
982 if (ret)
983 goto out;
984
985 while (1) {
986 init_completion(&ch->done);
987 ret = srp_send_req(ch, multich);
988 if (ret)
989 goto out;
990 ret = wait_for_completion_interruptible(&ch->done);
991 if (ret < 0)
992 goto out;
993
994 /*
995 * The CM event handling code will set status to
996 * SRP_PORT_REDIRECT if we get a port redirect REJ
997 * back, or SRP_DLID_REDIRECT if we get a lid/qp
998 * redirect REJ back.
999 */
1000 ret = ch->status;
1001 switch (ret) {
1002 case 0:
1003 ch->connected = true;
1004 goto out;
1005
1006 case SRP_PORT_REDIRECT:
1007 ret = srp_lookup_path(ch);
1008 if (ret)
1009 goto out;
1010 break;
1011
1012 case SRP_DLID_REDIRECT:
1013 break;
1014
1015 case SRP_STALE_CONN:
1016 shost_printk(KERN_ERR, target->scsi_host, PFX
1017 "giving up on stale connection\n");
1018 ret = -ECONNRESET;
1019 goto out;
1020
1021 default:
1022 goto out;
1023 }
1024 }
1025
1026 out:
1027 return ret <= 0 ? ret : -ENODEV;
1028 }
1029
1030 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1031 {
1032 srp_handle_qp_err(cq, wc, "INV RKEY");
1033 }
1034
1035 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1036 u32 rkey)
1037 {
1038 struct ib_send_wr *bad_wr;
1039 struct ib_send_wr wr = {
1040 .opcode = IB_WR_LOCAL_INV,
1041 .next = NULL,
1042 .num_sge = 0,
1043 .send_flags = 0,
1044 .ex.invalidate_rkey = rkey,
1045 };
1046
1047 wr.wr_cqe = &req->reg_cqe;
1048 req->reg_cqe.done = srp_inv_rkey_err_done;
1049 return ib_post_send(ch->qp, &wr, &bad_wr);
1050 }
1051
1052 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1053 struct srp_rdma_ch *ch,
1054 struct srp_request *req)
1055 {
1056 struct srp_target_port *target = ch->target;
1057 struct srp_device *dev = target->srp_host->srp_dev;
1058 struct ib_device *ibdev = dev->dev;
1059 int i, res;
1060
1061 if (!scsi_sglist(scmnd) ||
1062 (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1063 scmnd->sc_data_direction != DMA_FROM_DEVICE))
1064 return;
1065
1066 if (dev->use_fast_reg) {
1067 struct srp_fr_desc **pfr;
1068
1069 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1070 res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1071 if (res < 0) {
1072 shost_printk(KERN_ERR, target->scsi_host, PFX
1073 "Queueing INV WR for rkey %#x failed (%d)\n",
1074 (*pfr)->mr->rkey, res);
1075 queue_work(system_long_wq,
1076 &target->tl_err_work);
1077 }
1078 }
1079 if (req->nmdesc)
1080 srp_fr_pool_put(ch->fr_pool, req->fr_list,
1081 req->nmdesc);
1082 } else if (dev->use_fmr) {
1083 struct ib_pool_fmr **pfmr;
1084
1085 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1086 ib_fmr_pool_unmap(*pfmr);
1087 }
1088
1089 ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1090 scmnd->sc_data_direction);
1091 }
1092
1093 /**
1094 * srp_claim_req - Take ownership of the scmnd associated with a request.
1095 * @ch: SRP RDMA channel.
1096 * @req: SRP request.
1097 * @sdev: If not NULL, only take ownership for this SCSI device.
1098 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1099 * ownership of @req->scmnd if it equals @scmnd.
1100 *
1101 * Return value:
1102 * Either NULL or a pointer to the SCSI command the caller became owner of.
1103 */
1104 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1105 struct srp_request *req,
1106 struct scsi_device *sdev,
1107 struct scsi_cmnd *scmnd)
1108 {
1109 unsigned long flags;
1110
1111 spin_lock_irqsave(&ch->lock, flags);
1112 if (req->scmnd &&
1113 (!sdev || req->scmnd->device == sdev) &&
1114 (!scmnd || req->scmnd == scmnd)) {
1115 scmnd = req->scmnd;
1116 req->scmnd = NULL;
1117 } else {
1118 scmnd = NULL;
1119 }
1120 spin_unlock_irqrestore(&ch->lock, flags);
1121
1122 return scmnd;
1123 }
1124
1125 /**
1126 * srp_free_req() - Unmap data and adjust ch->req_lim.
1127 * @ch: SRP RDMA channel.
1128 * @req: Request to be freed.
1129 * @scmnd: SCSI command associated with @req.
1130 * @req_lim_delta: Amount to be added to @target->req_lim.
1131 */
1132 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1133 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1134 {
1135 unsigned long flags;
1136
1137 srp_unmap_data(scmnd, ch, req);
1138
1139 spin_lock_irqsave(&ch->lock, flags);
1140 ch->req_lim += req_lim_delta;
1141 spin_unlock_irqrestore(&ch->lock, flags);
1142 }
1143
1144 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1145 struct scsi_device *sdev, int result)
1146 {
1147 struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1148
1149 if (scmnd) {
1150 srp_free_req(ch, req, scmnd, 0);
1151 scmnd->result = result;
1152 scmnd->scsi_done(scmnd);
1153 }
1154 }
1155
1156 static void srp_terminate_io(struct srp_rport *rport)
1157 {
1158 struct srp_target_port *target = rport->lld_data;
1159 struct srp_rdma_ch *ch;
1160 struct Scsi_Host *shost = target->scsi_host;
1161 struct scsi_device *sdev;
1162 int i, j;
1163
1164 /*
1165 * Invoking srp_terminate_io() while srp_queuecommand() is running
1166 * is not safe. Hence the warning statement below.
1167 */
1168 shost_for_each_device(sdev, shost)
1169 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1170
1171 for (i = 0; i < target->ch_count; i++) {
1172 ch = &target->ch[i];
1173
1174 for (j = 0; j < target->req_ring_size; ++j) {
1175 struct srp_request *req = &ch->req_ring[j];
1176
1177 srp_finish_req(ch, req, NULL,
1178 DID_TRANSPORT_FAILFAST << 16);
1179 }
1180 }
1181 }
1182
1183 /*
1184 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1185 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1186 * srp_reset_device() or srp_reset_host() calls will occur while this function
1187 * is in progress. One way to realize that is not to call this function
1188 * directly but to call srp_reconnect_rport() instead since that last function
1189 * serializes calls of this function via rport->mutex and also blocks
1190 * srp_queuecommand() calls before invoking this function.
1191 */
1192 static int srp_rport_reconnect(struct srp_rport *rport)
1193 {
1194 struct srp_target_port *target = rport->lld_data;
1195 struct srp_rdma_ch *ch;
1196 int i, j, ret = 0;
1197 bool multich = false;
1198
1199 srp_disconnect_target(target);
1200
1201 if (target->state == SRP_TARGET_SCANNING)
1202 return -ENODEV;
1203
1204 /*
1205 * Now get a new local CM ID so that we avoid confusing the target in
1206 * case things are really fouled up. Doing so also ensures that all CM
1207 * callbacks will have finished before a new QP is allocated.
1208 */
1209 for (i = 0; i < target->ch_count; i++) {
1210 ch = &target->ch[i];
1211 ret += srp_new_cm_id(ch);
1212 }
1213 for (i = 0; i < target->ch_count; i++) {
1214 ch = &target->ch[i];
1215 for (j = 0; j < target->req_ring_size; ++j) {
1216 struct srp_request *req = &ch->req_ring[j];
1217
1218 srp_finish_req(ch, req, NULL, DID_RESET << 16);
1219 }
1220 }
1221 for (i = 0; i < target->ch_count; i++) {
1222 ch = &target->ch[i];
1223 /*
1224 * Whether or not creating a new CM ID succeeded, create a new
1225 * QP. This guarantees that all completion callback function
1226 * invocations have finished before request resetting starts.
1227 */
1228 ret += srp_create_ch_ib(ch);
1229
1230 INIT_LIST_HEAD(&ch->free_tx);
1231 for (j = 0; j < target->queue_size; ++j)
1232 list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1233 }
1234
1235 target->qp_in_error = false;
1236
1237 for (i = 0; i < target->ch_count; i++) {
1238 ch = &target->ch[i];
1239 if (ret)
1240 break;
1241 ret = srp_connect_ch(ch, multich);
1242 multich = true;
1243 }
1244
1245 if (ret == 0)
1246 shost_printk(KERN_INFO, target->scsi_host,
1247 PFX "reconnect succeeded\n");
1248
1249 return ret;
1250 }
1251
1252 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1253 unsigned int dma_len, u32 rkey)
1254 {
1255 struct srp_direct_buf *desc = state->desc;
1256
1257 WARN_ON_ONCE(!dma_len);
1258
1259 desc->va = cpu_to_be64(dma_addr);
1260 desc->key = cpu_to_be32(rkey);
1261 desc->len = cpu_to_be32(dma_len);
1262
1263 state->total_len += dma_len;
1264 state->desc++;
1265 state->ndesc++;
1266 }
1267
1268 static int srp_map_finish_fmr(struct srp_map_state *state,
1269 struct srp_rdma_ch *ch)
1270 {
1271 struct srp_target_port *target = ch->target;
1272 struct srp_device *dev = target->srp_host->srp_dev;
1273 struct ib_pd *pd = target->pd;
1274 struct ib_pool_fmr *fmr;
1275 u64 io_addr = 0;
1276
1277 if (state->fmr.next >= state->fmr.end) {
1278 shost_printk(KERN_ERR, ch->target->scsi_host,
1279 PFX "Out of MRs (mr_per_cmd = %d)\n",
1280 ch->target->mr_per_cmd);
1281 return -ENOMEM;
1282 }
1283
1284 WARN_ON_ONCE(!dev->use_fmr);
1285
1286 if (state->npages == 0)
1287 return 0;
1288
1289 if (state->npages == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1290 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1291 pd->unsafe_global_rkey);
1292 goto reset_state;
1293 }
1294
1295 fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1296 state->npages, io_addr);
1297 if (IS_ERR(fmr))
1298 return PTR_ERR(fmr);
1299
1300 *state->fmr.next++ = fmr;
1301 state->nmdesc++;
1302
1303 srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1304 state->dma_len, fmr->fmr->rkey);
1305
1306 reset_state:
1307 state->npages = 0;
1308 state->dma_len = 0;
1309
1310 return 0;
1311 }
1312
1313 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1314 {
1315 srp_handle_qp_err(cq, wc, "FAST REG");
1316 }
1317
1318 /*
1319 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1320 * where to start in the first element. If sg_offset_p != NULL then
1321 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1322 * byte that has not yet been mapped.
1323 */
1324 static int srp_map_finish_fr(struct srp_map_state *state,
1325 struct srp_request *req,
1326 struct srp_rdma_ch *ch, int sg_nents,
1327 unsigned int *sg_offset_p)
1328 {
1329 struct srp_target_port *target = ch->target;
1330 struct srp_device *dev = target->srp_host->srp_dev;
1331 struct ib_pd *pd = target->pd;
1332 struct ib_send_wr *bad_wr;
1333 struct ib_reg_wr wr;
1334 struct srp_fr_desc *desc;
1335 u32 rkey;
1336 int n, err;
1337
1338 if (state->fr.next >= state->fr.end) {
1339 shost_printk(KERN_ERR, ch->target->scsi_host,
1340 PFX "Out of MRs (mr_per_cmd = %d)\n",
1341 ch->target->mr_per_cmd);
1342 return -ENOMEM;
1343 }
1344
1345 WARN_ON_ONCE(!dev->use_fast_reg);
1346
1347 if (sg_nents == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1348 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1349
1350 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1351 sg_dma_len(state->sg) - sg_offset,
1352 pd->unsafe_global_rkey);
1353 if (sg_offset_p)
1354 *sg_offset_p = 0;
1355 return 1;
1356 }
1357
1358 desc = srp_fr_pool_get(ch->fr_pool);
1359 if (!desc)
1360 return -ENOMEM;
1361
1362 rkey = ib_inc_rkey(desc->mr->rkey);
1363 ib_update_fast_reg_key(desc->mr, rkey);
1364
1365 n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1366 dev->mr_page_size);
1367 if (unlikely(n < 0)) {
1368 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1369 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1370 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1371 sg_offset_p ? *sg_offset_p : -1, n);
1372 return n;
1373 }
1374
1375 WARN_ON_ONCE(desc->mr->length == 0);
1376
1377 req->reg_cqe.done = srp_reg_mr_err_done;
1378
1379 wr.wr.next = NULL;
1380 wr.wr.opcode = IB_WR_REG_MR;
1381 wr.wr.wr_cqe = &req->reg_cqe;
1382 wr.wr.num_sge = 0;
1383 wr.wr.send_flags = 0;
1384 wr.mr = desc->mr;
1385 wr.key = desc->mr->rkey;
1386 wr.access = (IB_ACCESS_LOCAL_WRITE |
1387 IB_ACCESS_REMOTE_READ |
1388 IB_ACCESS_REMOTE_WRITE);
1389
1390 *state->fr.next++ = desc;
1391 state->nmdesc++;
1392
1393 srp_map_desc(state, desc->mr->iova,
1394 desc->mr->length, desc->mr->rkey);
1395
1396 err = ib_post_send(ch->qp, &wr.wr, &bad_wr);
1397 if (unlikely(err)) {
1398 WARN_ON_ONCE(err == -ENOMEM);
1399 return err;
1400 }
1401
1402 return n;
1403 }
1404
1405 static int srp_map_sg_entry(struct srp_map_state *state,
1406 struct srp_rdma_ch *ch,
1407 struct scatterlist *sg)
1408 {
1409 struct srp_target_port *target = ch->target;
1410 struct srp_device *dev = target->srp_host->srp_dev;
1411 struct ib_device *ibdev = dev->dev;
1412 dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1413 unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1414 unsigned int len = 0;
1415 int ret;
1416
1417 WARN_ON_ONCE(!dma_len);
1418
1419 while (dma_len) {
1420 unsigned offset = dma_addr & ~dev->mr_page_mask;
1421
1422 if (state->npages == dev->max_pages_per_mr ||
1423 (state->npages > 0 && offset != 0)) {
1424 ret = srp_map_finish_fmr(state, ch);
1425 if (ret)
1426 return ret;
1427 }
1428
1429 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1430
1431 if (!state->npages)
1432 state->base_dma_addr = dma_addr;
1433 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1434 state->dma_len += len;
1435 dma_addr += len;
1436 dma_len -= len;
1437 }
1438
1439 /*
1440 * If the end of the MR is not on a page boundary then we need to
1441 * close it out and start a new one -- we can only merge at page
1442 * boundaries.
1443 */
1444 ret = 0;
1445 if ((dma_addr & ~dev->mr_page_mask) != 0)
1446 ret = srp_map_finish_fmr(state, ch);
1447 return ret;
1448 }
1449
1450 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1451 struct srp_request *req, struct scatterlist *scat,
1452 int count)
1453 {
1454 struct scatterlist *sg;
1455 int i, ret;
1456
1457 state->pages = req->map_page;
1458 state->fmr.next = req->fmr_list;
1459 state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1460
1461 for_each_sg(scat, sg, count, i) {
1462 ret = srp_map_sg_entry(state, ch, sg);
1463 if (ret)
1464 return ret;
1465 }
1466
1467 ret = srp_map_finish_fmr(state, ch);
1468 if (ret)
1469 return ret;
1470
1471 return 0;
1472 }
1473
1474 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1475 struct srp_request *req, struct scatterlist *scat,
1476 int count)
1477 {
1478 unsigned int sg_offset = 0;
1479
1480 state->fr.next = req->fr_list;
1481 state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1482 state->sg = scat;
1483
1484 if (count == 0)
1485 return 0;
1486
1487 while (count) {
1488 int i, n;
1489
1490 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1491 if (unlikely(n < 0))
1492 return n;
1493
1494 count -= n;
1495 for (i = 0; i < n; i++)
1496 state->sg = sg_next(state->sg);
1497 }
1498
1499 return 0;
1500 }
1501
1502 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1503 struct srp_request *req, struct scatterlist *scat,
1504 int count)
1505 {
1506 struct srp_target_port *target = ch->target;
1507 struct srp_device *dev = target->srp_host->srp_dev;
1508 struct scatterlist *sg;
1509 int i;
1510
1511 for_each_sg(scat, sg, count, i) {
1512 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1513 ib_sg_dma_len(dev->dev, sg),
1514 target->pd->unsafe_global_rkey);
1515 }
1516
1517 return 0;
1518 }
1519
1520 /*
1521 * Register the indirect data buffer descriptor with the HCA.
1522 *
1523 * Note: since the indirect data buffer descriptor has been allocated with
1524 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1525 * memory buffer.
1526 */
1527 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1528 void **next_mr, void **end_mr, u32 idb_len,
1529 __be32 *idb_rkey)
1530 {
1531 struct srp_target_port *target = ch->target;
1532 struct srp_device *dev = target->srp_host->srp_dev;
1533 struct srp_map_state state;
1534 struct srp_direct_buf idb_desc;
1535 u64 idb_pages[1];
1536 struct scatterlist idb_sg[1];
1537 int ret;
1538
1539 memset(&state, 0, sizeof(state));
1540 memset(&idb_desc, 0, sizeof(idb_desc));
1541 state.gen.next = next_mr;
1542 state.gen.end = end_mr;
1543 state.desc = &idb_desc;
1544 state.base_dma_addr = req->indirect_dma_addr;
1545 state.dma_len = idb_len;
1546
1547 if (dev->use_fast_reg) {
1548 state.sg = idb_sg;
1549 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1550 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1551 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1552 idb_sg->dma_length = idb_sg->length; /* hack^2 */
1553 #endif
1554 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1555 if (ret < 0)
1556 return ret;
1557 WARN_ON_ONCE(ret < 1);
1558 } else if (dev->use_fmr) {
1559 state.pages = idb_pages;
1560 state.pages[0] = (req->indirect_dma_addr &
1561 dev->mr_page_mask);
1562 state.npages = 1;
1563 ret = srp_map_finish_fmr(&state, ch);
1564 if (ret < 0)
1565 return ret;
1566 } else {
1567 return -EINVAL;
1568 }
1569
1570 *idb_rkey = idb_desc.key;
1571
1572 return 0;
1573 }
1574
1575 static void srp_check_mapping(struct srp_map_state *state,
1576 struct srp_rdma_ch *ch, struct srp_request *req,
1577 struct scatterlist *scat, int count)
1578 {
1579 struct srp_device *dev = ch->target->srp_host->srp_dev;
1580 struct srp_fr_desc **pfr;
1581 u64 desc_len = 0, mr_len = 0;
1582 int i;
1583
1584 for (i = 0; i < state->ndesc; i++)
1585 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1586 if (dev->use_fast_reg)
1587 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1588 mr_len += (*pfr)->mr->length;
1589 else if (dev->use_fmr)
1590 for (i = 0; i < state->nmdesc; i++)
1591 mr_len += be32_to_cpu(req->indirect_desc[i].len);
1592 if (desc_len != scsi_bufflen(req->scmnd) ||
1593 mr_len > scsi_bufflen(req->scmnd))
1594 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1595 scsi_bufflen(req->scmnd), desc_len, mr_len,
1596 state->ndesc, state->nmdesc);
1597 }
1598
1599 /**
1600 * srp_map_data() - map SCSI data buffer onto an SRP request
1601 * @scmnd: SCSI command to map
1602 * @ch: SRP RDMA channel
1603 * @req: SRP request
1604 *
1605 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1606 * mapping failed.
1607 */
1608 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1609 struct srp_request *req)
1610 {
1611 struct srp_target_port *target = ch->target;
1612 struct ib_pd *pd = target->pd;
1613 struct scatterlist *scat;
1614 struct srp_cmd *cmd = req->cmd->buf;
1615 int len, nents, count, ret;
1616 struct srp_device *dev;
1617 struct ib_device *ibdev;
1618 struct srp_map_state state;
1619 struct srp_indirect_buf *indirect_hdr;
1620 u32 idb_len, table_len;
1621 __be32 idb_rkey;
1622 u8 fmt;
1623
1624 if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1625 return sizeof (struct srp_cmd);
1626
1627 if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1628 scmnd->sc_data_direction != DMA_TO_DEVICE) {
1629 shost_printk(KERN_WARNING, target->scsi_host,
1630 PFX "Unhandled data direction %d\n",
1631 scmnd->sc_data_direction);
1632 return -EINVAL;
1633 }
1634
1635 nents = scsi_sg_count(scmnd);
1636 scat = scsi_sglist(scmnd);
1637
1638 dev = target->srp_host->srp_dev;
1639 ibdev = dev->dev;
1640
1641 count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1642 if (unlikely(count == 0))
1643 return -EIO;
1644
1645 fmt = SRP_DATA_DESC_DIRECT;
1646 len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1647
1648 if (count == 1 && (pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1649 /*
1650 * The midlayer only generated a single gather/scatter
1651 * entry, or DMA mapping coalesced everything to a
1652 * single entry. So a direct descriptor along with
1653 * the DMA MR suffices.
1654 */
1655 struct srp_direct_buf *buf = (void *) cmd->add_data;
1656
1657 buf->va = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1658 buf->key = cpu_to_be32(pd->unsafe_global_rkey);
1659 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1660
1661 req->nmdesc = 0;
1662 goto map_complete;
1663 }
1664
1665 /*
1666 * We have more than one scatter/gather entry, so build our indirect
1667 * descriptor table, trying to merge as many entries as we can.
1668 */
1669 indirect_hdr = (void *) cmd->add_data;
1670
1671 ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1672 target->indirect_size, DMA_TO_DEVICE);
1673
1674 memset(&state, 0, sizeof(state));
1675 state.desc = req->indirect_desc;
1676 if (dev->use_fast_reg)
1677 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1678 else if (dev->use_fmr)
1679 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1680 else
1681 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1682 req->nmdesc = state.nmdesc;
1683 if (ret < 0)
1684 goto unmap;
1685
1686 {
1687 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1688 "Memory mapping consistency check");
1689 if (DYNAMIC_DEBUG_BRANCH(ddm))
1690 srp_check_mapping(&state, ch, req, scat, count);
1691 }
1692
1693 /* We've mapped the request, now pull as much of the indirect
1694 * descriptor table as we can into the command buffer. If this
1695 * target is not using an external indirect table, we are
1696 * guaranteed to fit into the command, as the SCSI layer won't
1697 * give us more S/G entries than we allow.
1698 */
1699 if (state.ndesc == 1) {
1700 /*
1701 * Memory registration collapsed the sg-list into one entry,
1702 * so use a direct descriptor.
1703 */
1704 struct srp_direct_buf *buf = (void *) cmd->add_data;
1705
1706 *buf = req->indirect_desc[0];
1707 goto map_complete;
1708 }
1709
1710 if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1711 !target->allow_ext_sg)) {
1712 shost_printk(KERN_ERR, target->scsi_host,
1713 "Could not fit S/G list into SRP_CMD\n");
1714 ret = -EIO;
1715 goto unmap;
1716 }
1717
1718 count = min(state.ndesc, target->cmd_sg_cnt);
1719 table_len = state.ndesc * sizeof (struct srp_direct_buf);
1720 idb_len = sizeof(struct srp_indirect_buf) + table_len;
1721
1722 fmt = SRP_DATA_DESC_INDIRECT;
1723 len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1724 len += count * sizeof (struct srp_direct_buf);
1725
1726 memcpy(indirect_hdr->desc_list, req->indirect_desc,
1727 count * sizeof (struct srp_direct_buf));
1728
1729 if (!(pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY)) {
1730 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1731 idb_len, &idb_rkey);
1732 if (ret < 0)
1733 goto unmap;
1734 req->nmdesc++;
1735 } else {
1736 idb_rkey = cpu_to_be32(pd->unsafe_global_rkey);
1737 }
1738
1739 indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1740 indirect_hdr->table_desc.key = idb_rkey;
1741 indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1742 indirect_hdr->len = cpu_to_be32(state.total_len);
1743
1744 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1745 cmd->data_out_desc_cnt = count;
1746 else
1747 cmd->data_in_desc_cnt = count;
1748
1749 ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1750 DMA_TO_DEVICE);
1751
1752 map_complete:
1753 if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1754 cmd->buf_fmt = fmt << 4;
1755 else
1756 cmd->buf_fmt = fmt;
1757
1758 return len;
1759
1760 unmap:
1761 srp_unmap_data(scmnd, ch, req);
1762 if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1763 ret = -E2BIG;
1764 return ret;
1765 }
1766
1767 /*
1768 * Return an IU and possible credit to the free pool
1769 */
1770 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1771 enum srp_iu_type iu_type)
1772 {
1773 unsigned long flags;
1774
1775 spin_lock_irqsave(&ch->lock, flags);
1776 list_add(&iu->list, &ch->free_tx);
1777 if (iu_type != SRP_IU_RSP)
1778 ++ch->req_lim;
1779 spin_unlock_irqrestore(&ch->lock, flags);
1780 }
1781
1782 /*
1783 * Must be called with ch->lock held to protect req_lim and free_tx.
1784 * If IU is not sent, it must be returned using srp_put_tx_iu().
1785 *
1786 * Note:
1787 * An upper limit for the number of allocated information units for each
1788 * request type is:
1789 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1790 * more than Scsi_Host.can_queue requests.
1791 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1792 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1793 * one unanswered SRP request to an initiator.
1794 */
1795 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1796 enum srp_iu_type iu_type)
1797 {
1798 struct srp_target_port *target = ch->target;
1799 s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1800 struct srp_iu *iu;
1801
1802 ib_process_cq_direct(ch->send_cq, -1);
1803
1804 if (list_empty(&ch->free_tx))
1805 return NULL;
1806
1807 /* Initiator responses to target requests do not consume credits */
1808 if (iu_type != SRP_IU_RSP) {
1809 if (ch->req_lim <= rsv) {
1810 ++target->zero_req_lim;
1811 return NULL;
1812 }
1813
1814 --ch->req_lim;
1815 }
1816
1817 iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1818 list_del(&iu->list);
1819 return iu;
1820 }
1821
1822 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1823 {
1824 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1825 struct srp_rdma_ch *ch = cq->cq_context;
1826
1827 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1828 srp_handle_qp_err(cq, wc, "SEND");
1829 return;
1830 }
1831
1832 list_add(&iu->list, &ch->free_tx);
1833 }
1834
1835 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1836 {
1837 struct srp_target_port *target = ch->target;
1838 struct ib_sge list;
1839 struct ib_send_wr wr, *bad_wr;
1840
1841 list.addr = iu->dma;
1842 list.length = len;
1843 list.lkey = target->lkey;
1844
1845 iu->cqe.done = srp_send_done;
1846
1847 wr.next = NULL;
1848 wr.wr_cqe = &iu->cqe;
1849 wr.sg_list = &list;
1850 wr.num_sge = 1;
1851 wr.opcode = IB_WR_SEND;
1852 wr.send_flags = IB_SEND_SIGNALED;
1853
1854 return ib_post_send(ch->qp, &wr, &bad_wr);
1855 }
1856
1857 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1858 {
1859 struct srp_target_port *target = ch->target;
1860 struct ib_recv_wr wr, *bad_wr;
1861 struct ib_sge list;
1862
1863 list.addr = iu->dma;
1864 list.length = iu->size;
1865 list.lkey = target->lkey;
1866
1867 iu->cqe.done = srp_recv_done;
1868
1869 wr.next = NULL;
1870 wr.wr_cqe = &iu->cqe;
1871 wr.sg_list = &list;
1872 wr.num_sge = 1;
1873
1874 return ib_post_recv(ch->qp, &wr, &bad_wr);
1875 }
1876
1877 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1878 {
1879 struct srp_target_port *target = ch->target;
1880 struct srp_request *req;
1881 struct scsi_cmnd *scmnd;
1882 unsigned long flags;
1883
1884 if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1885 spin_lock_irqsave(&ch->lock, flags);
1886 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1887 spin_unlock_irqrestore(&ch->lock, flags);
1888
1889 ch->tsk_mgmt_status = -1;
1890 if (be32_to_cpu(rsp->resp_data_len) >= 4)
1891 ch->tsk_mgmt_status = rsp->data[3];
1892 complete(&ch->tsk_mgmt_done);
1893 } else {
1894 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1895 if (scmnd) {
1896 req = (void *)scmnd->host_scribble;
1897 scmnd = srp_claim_req(ch, req, NULL, scmnd);
1898 }
1899 if (!scmnd) {
1900 shost_printk(KERN_ERR, target->scsi_host,
1901 "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1902 rsp->tag, ch - target->ch, ch->qp->qp_num);
1903
1904 spin_lock_irqsave(&ch->lock, flags);
1905 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1906 spin_unlock_irqrestore(&ch->lock, flags);
1907
1908 return;
1909 }
1910 scmnd->result = rsp->status;
1911
1912 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1913 memcpy(scmnd->sense_buffer, rsp->data +
1914 be32_to_cpu(rsp->resp_data_len),
1915 min_t(int, be32_to_cpu(rsp->sense_data_len),
1916 SCSI_SENSE_BUFFERSIZE));
1917 }
1918
1919 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1920 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1921 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
1922 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
1923 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1924 scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1925 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
1926 scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
1927
1928 srp_free_req(ch, req, scmnd,
1929 be32_to_cpu(rsp->req_lim_delta));
1930
1931 scmnd->host_scribble = NULL;
1932 scmnd->scsi_done(scmnd);
1933 }
1934 }
1935
1936 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1937 void *rsp, int len)
1938 {
1939 struct srp_target_port *target = ch->target;
1940 struct ib_device *dev = target->srp_host->srp_dev->dev;
1941 unsigned long flags;
1942 struct srp_iu *iu;
1943 int err;
1944
1945 spin_lock_irqsave(&ch->lock, flags);
1946 ch->req_lim += req_delta;
1947 iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
1948 spin_unlock_irqrestore(&ch->lock, flags);
1949
1950 if (!iu) {
1951 shost_printk(KERN_ERR, target->scsi_host, PFX
1952 "no IU available to send response\n");
1953 return 1;
1954 }
1955
1956 ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
1957 memcpy(iu->buf, rsp, len);
1958 ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
1959
1960 err = srp_post_send(ch, iu, len);
1961 if (err) {
1962 shost_printk(KERN_ERR, target->scsi_host, PFX
1963 "unable to post response: %d\n", err);
1964 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
1965 }
1966
1967 return err;
1968 }
1969
1970 static void srp_process_cred_req(struct srp_rdma_ch *ch,
1971 struct srp_cred_req *req)
1972 {
1973 struct srp_cred_rsp rsp = {
1974 .opcode = SRP_CRED_RSP,
1975 .tag = req->tag,
1976 };
1977 s32 delta = be32_to_cpu(req->req_lim_delta);
1978
1979 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1980 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
1981 "problems processing SRP_CRED_REQ\n");
1982 }
1983
1984 static void srp_process_aer_req(struct srp_rdma_ch *ch,
1985 struct srp_aer_req *req)
1986 {
1987 struct srp_target_port *target = ch->target;
1988 struct srp_aer_rsp rsp = {
1989 .opcode = SRP_AER_RSP,
1990 .tag = req->tag,
1991 };
1992 s32 delta = be32_to_cpu(req->req_lim_delta);
1993
1994 shost_printk(KERN_ERR, target->scsi_host, PFX
1995 "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
1996
1997 if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
1998 shost_printk(KERN_ERR, target->scsi_host, PFX
1999 "problems processing SRP_AER_REQ\n");
2000 }
2001
2002 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2003 {
2004 struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2005 struct srp_rdma_ch *ch = cq->cq_context;
2006 struct srp_target_port *target = ch->target;
2007 struct ib_device *dev = target->srp_host->srp_dev->dev;
2008 int res;
2009 u8 opcode;
2010
2011 if (unlikely(wc->status != IB_WC_SUCCESS)) {
2012 srp_handle_qp_err(cq, wc, "RECV");
2013 return;
2014 }
2015
2016 ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2017 DMA_FROM_DEVICE);
2018
2019 opcode = *(u8 *) iu->buf;
2020
2021 if (0) {
2022 shost_printk(KERN_ERR, target->scsi_host,
2023 PFX "recv completion, opcode 0x%02x\n", opcode);
2024 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2025 iu->buf, wc->byte_len, true);
2026 }
2027
2028 switch (opcode) {
2029 case SRP_RSP:
2030 srp_process_rsp(ch, iu->buf);
2031 break;
2032
2033 case SRP_CRED_REQ:
2034 srp_process_cred_req(ch, iu->buf);
2035 break;
2036
2037 case SRP_AER_REQ:
2038 srp_process_aer_req(ch, iu->buf);
2039 break;
2040
2041 case SRP_T_LOGOUT:
2042 /* XXX Handle target logout */
2043 shost_printk(KERN_WARNING, target->scsi_host,
2044 PFX "Got target logout request\n");
2045 break;
2046
2047 default:
2048 shost_printk(KERN_WARNING, target->scsi_host,
2049 PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2050 break;
2051 }
2052
2053 ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2054 DMA_FROM_DEVICE);
2055
2056 res = srp_post_recv(ch, iu);
2057 if (res != 0)
2058 shost_printk(KERN_ERR, target->scsi_host,
2059 PFX "Recv failed with error code %d\n", res);
2060 }
2061
2062 /**
2063 * srp_tl_err_work() - handle a transport layer error
2064 * @work: Work structure embedded in an SRP target port.
2065 *
2066 * Note: This function may get invoked before the rport has been created,
2067 * hence the target->rport test.
2068 */
2069 static void srp_tl_err_work(struct work_struct *work)
2070 {
2071 struct srp_target_port *target;
2072
2073 target = container_of(work, struct srp_target_port, tl_err_work);
2074 if (target->rport)
2075 srp_start_tl_fail_timers(target->rport);
2076 }
2077
2078 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2079 const char *opname)
2080 {
2081 struct srp_rdma_ch *ch = cq->cq_context;
2082 struct srp_target_port *target = ch->target;
2083
2084 if (ch->connected && !target->qp_in_error) {
2085 shost_printk(KERN_ERR, target->scsi_host,
2086 PFX "failed %s status %s (%d) for CQE %p\n",
2087 opname, ib_wc_status_msg(wc->status), wc->status,
2088 wc->wr_cqe);
2089 queue_work(system_long_wq, &target->tl_err_work);
2090 }
2091 target->qp_in_error = true;
2092 }
2093
2094 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2095 {
2096 struct srp_target_port *target = host_to_target(shost);
2097 struct srp_rport *rport = target->rport;
2098 struct srp_rdma_ch *ch;
2099 struct srp_request *req;
2100 struct srp_iu *iu;
2101 struct srp_cmd *cmd;
2102 struct ib_device *dev;
2103 unsigned long flags;
2104 u32 tag;
2105 u16 idx;
2106 int len, ret;
2107 const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2108
2109 /*
2110 * The SCSI EH thread is the only context from which srp_queuecommand()
2111 * can get invoked for blocked devices (SDEV_BLOCK /
2112 * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2113 * locking the rport mutex if invoked from inside the SCSI EH.
2114 */
2115 if (in_scsi_eh)
2116 mutex_lock(&rport->mutex);
2117
2118 scmnd->result = srp_chkready(target->rport);
2119 if (unlikely(scmnd->result))
2120 goto err;
2121
2122 WARN_ON_ONCE(scmnd->request->tag < 0);
2123 tag = blk_mq_unique_tag(scmnd->request);
2124 ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2125 idx = blk_mq_unique_tag_to_tag(tag);
2126 WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2127 dev_name(&shost->shost_gendev), tag, idx,
2128 target->req_ring_size);
2129
2130 spin_lock_irqsave(&ch->lock, flags);
2131 iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2132 spin_unlock_irqrestore(&ch->lock, flags);
2133
2134 if (!iu)
2135 goto err;
2136
2137 req = &ch->req_ring[idx];
2138 dev = target->srp_host->srp_dev->dev;
2139 ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2140 DMA_TO_DEVICE);
2141
2142 scmnd->host_scribble = (void *) req;
2143
2144 cmd = iu->buf;
2145 memset(cmd, 0, sizeof *cmd);
2146
2147 cmd->opcode = SRP_CMD;
2148 int_to_scsilun(scmnd->device->lun, &cmd->lun);
2149 cmd->tag = tag;
2150 memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2151
2152 req->scmnd = scmnd;
2153 req->cmd = iu;
2154
2155 len = srp_map_data(scmnd, ch, req);
2156 if (len < 0) {
2157 shost_printk(KERN_ERR, target->scsi_host,
2158 PFX "Failed to map data (%d)\n", len);
2159 /*
2160 * If we ran out of memory descriptors (-ENOMEM) because an
2161 * application is queuing many requests with more than
2162 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2163 * to reduce queue depth temporarily.
2164 */
2165 scmnd->result = len == -ENOMEM ?
2166 DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2167 goto err_iu;
2168 }
2169
2170 ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2171 DMA_TO_DEVICE);
2172
2173 if (srp_post_send(ch, iu, len)) {
2174 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2175 goto err_unmap;
2176 }
2177
2178 ret = 0;
2179
2180 unlock_rport:
2181 if (in_scsi_eh)
2182 mutex_unlock(&rport->mutex);
2183
2184 return ret;
2185
2186 err_unmap:
2187 srp_unmap_data(scmnd, ch, req);
2188
2189 err_iu:
2190 srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2191
2192 /*
2193 * Avoid that the loops that iterate over the request ring can
2194 * encounter a dangling SCSI command pointer.
2195 */
2196 req->scmnd = NULL;
2197
2198 err:
2199 if (scmnd->result) {
2200 scmnd->scsi_done(scmnd);
2201 ret = 0;
2202 } else {
2203 ret = SCSI_MLQUEUE_HOST_BUSY;
2204 }
2205
2206 goto unlock_rport;
2207 }
2208
2209 /*
2210 * Note: the resources allocated in this function are freed in
2211 * srp_free_ch_ib().
2212 */
2213 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2214 {
2215 struct srp_target_port *target = ch->target;
2216 int i;
2217
2218 ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2219 GFP_KERNEL);
2220 if (!ch->rx_ring)
2221 goto err_no_ring;
2222 ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2223 GFP_KERNEL);
2224 if (!ch->tx_ring)
2225 goto err_no_ring;
2226
2227 for (i = 0; i < target->queue_size; ++i) {
2228 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2229 ch->max_ti_iu_len,
2230 GFP_KERNEL, DMA_FROM_DEVICE);
2231 if (!ch->rx_ring[i])
2232 goto err;
2233 }
2234
2235 for (i = 0; i < target->queue_size; ++i) {
2236 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2237 target->max_iu_len,
2238 GFP_KERNEL, DMA_TO_DEVICE);
2239 if (!ch->tx_ring[i])
2240 goto err;
2241
2242 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2243 }
2244
2245 return 0;
2246
2247 err:
2248 for (i = 0; i < target->queue_size; ++i) {
2249 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2250 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2251 }
2252
2253
2254 err_no_ring:
2255 kfree(ch->tx_ring);
2256 ch->tx_ring = NULL;
2257 kfree(ch->rx_ring);
2258 ch->rx_ring = NULL;
2259
2260 return -ENOMEM;
2261 }
2262
2263 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2264 {
2265 uint64_t T_tr_ns, max_compl_time_ms;
2266 uint32_t rq_tmo_jiffies;
2267
2268 /*
2269 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2270 * table 91), both the QP timeout and the retry count have to be set
2271 * for RC QP's during the RTR to RTS transition.
2272 */
2273 WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2274 (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2275
2276 /*
2277 * Set target->rq_tmo_jiffies to one second more than the largest time
2278 * it can take before an error completion is generated. See also
2279 * C9-140..142 in the IBTA spec for more information about how to
2280 * convert the QP Local ACK Timeout value to nanoseconds.
2281 */
2282 T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2283 max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2284 do_div(max_compl_time_ms, NSEC_PER_MSEC);
2285 rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2286
2287 return rq_tmo_jiffies;
2288 }
2289
2290 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2291 const struct srp_login_rsp *lrsp,
2292 struct srp_rdma_ch *ch)
2293 {
2294 struct srp_target_port *target = ch->target;
2295 struct ib_qp_attr *qp_attr = NULL;
2296 int attr_mask = 0;
2297 int ret;
2298 int i;
2299
2300 if (lrsp->opcode == SRP_LOGIN_RSP) {
2301 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2302 ch->req_lim = be32_to_cpu(lrsp->req_lim_delta);
2303
2304 /*
2305 * Reserve credits for task management so we don't
2306 * bounce requests back to the SCSI mid-layer.
2307 */
2308 target->scsi_host->can_queue
2309 = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2310 target->scsi_host->can_queue);
2311 target->scsi_host->cmd_per_lun
2312 = min_t(int, target->scsi_host->can_queue,
2313 target->scsi_host->cmd_per_lun);
2314 } else {
2315 shost_printk(KERN_WARNING, target->scsi_host,
2316 PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2317 ret = -ECONNRESET;
2318 goto error;
2319 }
2320
2321 if (!ch->rx_ring) {
2322 ret = srp_alloc_iu_bufs(ch);
2323 if (ret)
2324 goto error;
2325 }
2326
2327 ret = -ENOMEM;
2328 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
2329 if (!qp_attr)
2330 goto error;
2331
2332 qp_attr->qp_state = IB_QPS_RTR;
2333 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2334 if (ret)
2335 goto error_free;
2336
2337 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2338 if (ret)
2339 goto error_free;
2340
2341 for (i = 0; i < target->queue_size; i++) {
2342 struct srp_iu *iu = ch->rx_ring[i];
2343
2344 ret = srp_post_recv(ch, iu);
2345 if (ret)
2346 goto error_free;
2347 }
2348
2349 qp_attr->qp_state = IB_QPS_RTS;
2350 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2351 if (ret)
2352 goto error_free;
2353
2354 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2355
2356 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2357 if (ret)
2358 goto error_free;
2359
2360 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2361
2362 error_free:
2363 kfree(qp_attr);
2364
2365 error:
2366 ch->status = ret;
2367 }
2368
2369 static void srp_cm_rej_handler(struct ib_cm_id *cm_id,
2370 struct ib_cm_event *event,
2371 struct srp_rdma_ch *ch)
2372 {
2373 struct srp_target_port *target = ch->target;
2374 struct Scsi_Host *shost = target->scsi_host;
2375 struct ib_class_port_info *cpi;
2376 int opcode;
2377
2378 switch (event->param.rej_rcvd.reason) {
2379 case IB_CM_REJ_PORT_CM_REDIRECT:
2380 cpi = event->param.rej_rcvd.ari;
2381 ch->path.dlid = cpi->redirect_lid;
2382 ch->path.pkey = cpi->redirect_pkey;
2383 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2384 memcpy(ch->path.dgid.raw, cpi->redirect_gid, 16);
2385
2386 ch->status = ch->path.dlid ?
2387 SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2388 break;
2389
2390 case IB_CM_REJ_PORT_REDIRECT:
2391 if (srp_target_is_topspin(target)) {
2392 /*
2393 * Topspin/Cisco SRP gateways incorrectly send
2394 * reject reason code 25 when they mean 24
2395 * (port redirect).
2396 */
2397 memcpy(ch->path.dgid.raw,
2398 event->param.rej_rcvd.ari, 16);
2399
2400 shost_printk(KERN_DEBUG, shost,
2401 PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2402 be64_to_cpu(ch->path.dgid.global.subnet_prefix),
2403 be64_to_cpu(ch->path.dgid.global.interface_id));
2404
2405 ch->status = SRP_PORT_REDIRECT;
2406 } else {
2407 shost_printk(KERN_WARNING, shost,
2408 " REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2409 ch->status = -ECONNRESET;
2410 }
2411 break;
2412
2413 case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2414 shost_printk(KERN_WARNING, shost,
2415 " REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2416 ch->status = -ECONNRESET;
2417 break;
2418
2419 case IB_CM_REJ_CONSUMER_DEFINED:
2420 opcode = *(u8 *) event->private_data;
2421 if (opcode == SRP_LOGIN_REJ) {
2422 struct srp_login_rej *rej = event->private_data;
2423 u32 reason = be32_to_cpu(rej->reason);
2424
2425 if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2426 shost_printk(KERN_WARNING, shost,
2427 PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2428 else
2429 shost_printk(KERN_WARNING, shost, PFX
2430 "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2431 target->sgid.raw,
2432 target->orig_dgid.raw, reason);
2433 } else
2434 shost_printk(KERN_WARNING, shost,
2435 " REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2436 " opcode 0x%02x\n", opcode);
2437 ch->status = -ECONNRESET;
2438 break;
2439
2440 case IB_CM_REJ_STALE_CONN:
2441 shost_printk(KERN_WARNING, shost, " REJ reason: stale connection\n");
2442 ch->status = SRP_STALE_CONN;
2443 break;
2444
2445 default:
2446 shost_printk(KERN_WARNING, shost, " REJ reason 0x%x\n",
2447 event->param.rej_rcvd.reason);
2448 ch->status = -ECONNRESET;
2449 }
2450 }
2451
2452 static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2453 {
2454 struct srp_rdma_ch *ch = cm_id->context;
2455 struct srp_target_port *target = ch->target;
2456 int comp = 0;
2457
2458 switch (event->event) {
2459 case IB_CM_REQ_ERROR:
2460 shost_printk(KERN_DEBUG, target->scsi_host,
2461 PFX "Sending CM REQ failed\n");
2462 comp = 1;
2463 ch->status = -ECONNRESET;
2464 break;
2465
2466 case IB_CM_REP_RECEIVED:
2467 comp = 1;
2468 srp_cm_rep_handler(cm_id, event->private_data, ch);
2469 break;
2470
2471 case IB_CM_REJ_RECEIVED:
2472 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2473 comp = 1;
2474
2475 srp_cm_rej_handler(cm_id, event, ch);
2476 break;
2477
2478 case IB_CM_DREQ_RECEIVED:
2479 shost_printk(KERN_WARNING, target->scsi_host,
2480 PFX "DREQ received - connection closed\n");
2481 ch->connected = false;
2482 if (ib_send_cm_drep(cm_id, NULL, 0))
2483 shost_printk(KERN_ERR, target->scsi_host,
2484 PFX "Sending CM DREP failed\n");
2485 queue_work(system_long_wq, &target->tl_err_work);
2486 break;
2487
2488 case IB_CM_TIMEWAIT_EXIT:
2489 shost_printk(KERN_ERR, target->scsi_host,
2490 PFX "connection closed\n");
2491 comp = 1;
2492
2493 ch->status = 0;
2494 break;
2495
2496 case IB_CM_MRA_RECEIVED:
2497 case IB_CM_DREQ_ERROR:
2498 case IB_CM_DREP_RECEIVED:
2499 break;
2500
2501 default:
2502 shost_printk(KERN_WARNING, target->scsi_host,
2503 PFX "Unhandled CM event %d\n", event->event);
2504 break;
2505 }
2506
2507 if (comp)
2508 complete(&ch->done);
2509
2510 return 0;
2511 }
2512
2513 /**
2514 * srp_change_queue_depth - setting device queue depth
2515 * @sdev: scsi device struct
2516 * @qdepth: requested queue depth
2517 *
2518 * Returns queue depth.
2519 */
2520 static int
2521 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2522 {
2523 if (!sdev->tagged_supported)
2524 qdepth = 1;
2525 return scsi_change_queue_depth(sdev, qdepth);
2526 }
2527
2528 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2529 u8 func)
2530 {
2531 struct srp_target_port *target = ch->target;
2532 struct srp_rport *rport = target->rport;
2533 struct ib_device *dev = target->srp_host->srp_dev->dev;
2534 struct srp_iu *iu;
2535 struct srp_tsk_mgmt *tsk_mgmt;
2536
2537 if (!ch->connected || target->qp_in_error)
2538 return -1;
2539
2540 init_completion(&ch->tsk_mgmt_done);
2541
2542 /*
2543 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2544 * invoked while a task management function is being sent.
2545 */
2546 mutex_lock(&rport->mutex);
2547 spin_lock_irq(&ch->lock);
2548 iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2549 spin_unlock_irq(&ch->lock);
2550
2551 if (!iu) {
2552 mutex_unlock(&rport->mutex);
2553
2554 return -1;
2555 }
2556
2557 ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2558 DMA_TO_DEVICE);
2559 tsk_mgmt = iu->buf;
2560 memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2561
2562 tsk_mgmt->opcode = SRP_TSK_MGMT;
2563 int_to_scsilun(lun, &tsk_mgmt->lun);
2564 tsk_mgmt->tag = req_tag | SRP_TAG_TSK_MGMT;
2565 tsk_mgmt->tsk_mgmt_func = func;
2566 tsk_mgmt->task_tag = req_tag;
2567
2568 ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2569 DMA_TO_DEVICE);
2570 if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2571 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2572 mutex_unlock(&rport->mutex);
2573
2574 return -1;
2575 }
2576 mutex_unlock(&rport->mutex);
2577
2578 if (!wait_for_completion_timeout(&ch->tsk_mgmt_done,
2579 msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS)))
2580 return -1;
2581
2582 return 0;
2583 }
2584
2585 static int srp_abort(struct scsi_cmnd *scmnd)
2586 {
2587 struct srp_target_port *target = host_to_target(scmnd->device->host);
2588 struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2589 u32 tag;
2590 u16 ch_idx;
2591 struct srp_rdma_ch *ch;
2592 int ret;
2593
2594 shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2595
2596 if (!req)
2597 return SUCCESS;
2598 tag = blk_mq_unique_tag(scmnd->request);
2599 ch_idx = blk_mq_unique_tag_to_hwq(tag);
2600 if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2601 return SUCCESS;
2602 ch = &target->ch[ch_idx];
2603 if (!srp_claim_req(ch, req, NULL, scmnd))
2604 return SUCCESS;
2605 shost_printk(KERN_ERR, target->scsi_host,
2606 "Sending SRP abort for tag %#x\n", tag);
2607 if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2608 SRP_TSK_ABORT_TASK) == 0)
2609 ret = SUCCESS;
2610 else if (target->rport->state == SRP_RPORT_LOST)
2611 ret = FAST_IO_FAIL;
2612 else
2613 ret = FAILED;
2614 srp_free_req(ch, req, scmnd, 0);
2615 scmnd->result = DID_ABORT << 16;
2616 scmnd->scsi_done(scmnd);
2617
2618 return ret;
2619 }
2620
2621 static int srp_reset_device(struct scsi_cmnd *scmnd)
2622 {
2623 struct srp_target_port *target = host_to_target(scmnd->device->host);
2624 struct srp_rdma_ch *ch;
2625 int i;
2626
2627 shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2628
2629 ch = &target->ch[0];
2630 if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2631 SRP_TSK_LUN_RESET))
2632 return FAILED;
2633 if (ch->tsk_mgmt_status)
2634 return FAILED;
2635
2636 for (i = 0; i < target->ch_count; i++) {
2637 ch = &target->ch[i];
2638 for (i = 0; i < target->req_ring_size; ++i) {
2639 struct srp_request *req = &ch->req_ring[i];
2640
2641 srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2642 }
2643 }
2644
2645 return SUCCESS;
2646 }
2647
2648 static int srp_reset_host(struct scsi_cmnd *scmnd)
2649 {
2650 struct srp_target_port *target = host_to_target(scmnd->device->host);
2651
2652 shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2653
2654 return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2655 }
2656
2657 static int srp_slave_alloc(struct scsi_device *sdev)
2658 {
2659 struct Scsi_Host *shost = sdev->host;
2660 struct srp_target_port *target = host_to_target(shost);
2661 struct srp_device *srp_dev = target->srp_host->srp_dev;
2662 struct ib_device *ibdev = srp_dev->dev;
2663
2664 if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
2665 blk_queue_virt_boundary(sdev->request_queue,
2666 ~srp_dev->mr_page_mask);
2667
2668 return 0;
2669 }
2670
2671 static int srp_slave_configure(struct scsi_device *sdev)
2672 {
2673 struct Scsi_Host *shost = sdev->host;
2674 struct srp_target_port *target = host_to_target(shost);
2675 struct request_queue *q = sdev->request_queue;
2676 unsigned long timeout;
2677
2678 if (sdev->type == TYPE_DISK) {
2679 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2680 blk_queue_rq_timeout(q, timeout);
2681 }
2682
2683 return 0;
2684 }
2685
2686 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2687 char *buf)
2688 {
2689 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2690
2691 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2692 }
2693
2694 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2695 char *buf)
2696 {
2697 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2698
2699 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2700 }
2701
2702 static ssize_t show_service_id(struct device *dev,
2703 struct device_attribute *attr, char *buf)
2704 {
2705 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2706
2707 return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->service_id));
2708 }
2709
2710 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2711 char *buf)
2712 {
2713 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2714
2715 return sprintf(buf, "0x%04x\n", be16_to_cpu(target->pkey));
2716 }
2717
2718 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2719 char *buf)
2720 {
2721 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2722
2723 return sprintf(buf, "%pI6\n", target->sgid.raw);
2724 }
2725
2726 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2727 char *buf)
2728 {
2729 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2730 struct srp_rdma_ch *ch = &target->ch[0];
2731
2732 return sprintf(buf, "%pI6\n", ch->path.dgid.raw);
2733 }
2734
2735 static ssize_t show_orig_dgid(struct device *dev,
2736 struct device_attribute *attr, char *buf)
2737 {
2738 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2739
2740 return sprintf(buf, "%pI6\n", target->orig_dgid.raw);
2741 }
2742
2743 static ssize_t show_req_lim(struct device *dev,
2744 struct device_attribute *attr, char *buf)
2745 {
2746 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2747 struct srp_rdma_ch *ch;
2748 int i, req_lim = INT_MAX;
2749
2750 for (i = 0; i < target->ch_count; i++) {
2751 ch = &target->ch[i];
2752 req_lim = min(req_lim, ch->req_lim);
2753 }
2754 return sprintf(buf, "%d\n", req_lim);
2755 }
2756
2757 static ssize_t show_zero_req_lim(struct device *dev,
2758 struct device_attribute *attr, char *buf)
2759 {
2760 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2761
2762 return sprintf(buf, "%d\n", target->zero_req_lim);
2763 }
2764
2765 static ssize_t show_local_ib_port(struct device *dev,
2766 struct device_attribute *attr, char *buf)
2767 {
2768 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2769
2770 return sprintf(buf, "%d\n", target->srp_host->port);
2771 }
2772
2773 static ssize_t show_local_ib_device(struct device *dev,
2774 struct device_attribute *attr, char *buf)
2775 {
2776 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2777
2778 return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
2779 }
2780
2781 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2782 char *buf)
2783 {
2784 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2785
2786 return sprintf(buf, "%d\n", target->ch_count);
2787 }
2788
2789 static ssize_t show_comp_vector(struct device *dev,
2790 struct device_attribute *attr, char *buf)
2791 {
2792 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2793
2794 return sprintf(buf, "%d\n", target->comp_vector);
2795 }
2796
2797 static ssize_t show_tl_retry_count(struct device *dev,
2798 struct device_attribute *attr, char *buf)
2799 {
2800 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2801
2802 return sprintf(buf, "%d\n", target->tl_retry_count);
2803 }
2804
2805 static ssize_t show_cmd_sg_entries(struct device *dev,
2806 struct device_attribute *attr, char *buf)
2807 {
2808 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2809
2810 return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2811 }
2812
2813 static ssize_t show_allow_ext_sg(struct device *dev,
2814 struct device_attribute *attr, char *buf)
2815 {
2816 struct srp_target_port *target = host_to_target(class_to_shost(dev));
2817
2818 return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
2819 }
2820
2821 static DEVICE_ATTR(id_ext, S_IRUGO, show_id_ext, NULL);
2822 static DEVICE_ATTR(ioc_guid, S_IRUGO, show_ioc_guid, NULL);
2823 static DEVICE_ATTR(service_id, S_IRUGO, show_service_id, NULL);
2824 static DEVICE_ATTR(pkey, S_IRUGO, show_pkey, NULL);
2825 static DEVICE_ATTR(sgid, S_IRUGO, show_sgid, NULL);
2826 static DEVICE_ATTR(dgid, S_IRUGO, show_dgid, NULL);
2827 static DEVICE_ATTR(orig_dgid, S_IRUGO, show_orig_dgid, NULL);
2828 static DEVICE_ATTR(req_lim, S_IRUGO, show_req_lim, NULL);
2829 static DEVICE_ATTR(zero_req_lim, S_IRUGO, show_zero_req_lim, NULL);
2830 static DEVICE_ATTR(local_ib_port, S_IRUGO, show_local_ib_port, NULL);
2831 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
2832 static DEVICE_ATTR(ch_count, S_IRUGO, show_ch_count, NULL);
2833 static DEVICE_ATTR(comp_vector, S_IRUGO, show_comp_vector, NULL);
2834 static DEVICE_ATTR(tl_retry_count, S_IRUGO, show_tl_retry_count, NULL);
2835 static DEVICE_ATTR(cmd_sg_entries, S_IRUGO, show_cmd_sg_entries, NULL);
2836 static DEVICE_ATTR(allow_ext_sg, S_IRUGO, show_allow_ext_sg, NULL);
2837
2838 static struct device_attribute *srp_host_attrs[] = {
2839 &dev_attr_id_ext,
2840 &dev_attr_ioc_guid,
2841 &dev_attr_service_id,
2842 &dev_attr_pkey,
2843 &dev_attr_sgid,
2844 &dev_attr_dgid,
2845 &dev_attr_orig_dgid,
2846 &dev_attr_req_lim,
2847 &dev_attr_zero_req_lim,
2848 &dev_attr_local_ib_port,
2849 &dev_attr_local_ib_device,
2850 &dev_attr_ch_count,
2851 &dev_attr_comp_vector,
2852 &dev_attr_tl_retry_count,
2853 &dev_attr_cmd_sg_entries,
2854 &dev_attr_allow_ext_sg,
2855 NULL
2856 };
2857
2858 static struct scsi_host_template srp_template = {
2859 .module = THIS_MODULE,
2860 .name = "InfiniBand SRP initiator",
2861 .proc_name = DRV_NAME,
2862 .slave_alloc = srp_slave_alloc,
2863 .slave_configure = srp_slave_configure,
2864 .info = srp_target_info,
2865 .queuecommand = srp_queuecommand,
2866 .change_queue_depth = srp_change_queue_depth,
2867 .eh_abort_handler = srp_abort,
2868 .eh_device_reset_handler = srp_reset_device,
2869 .eh_host_reset_handler = srp_reset_host,
2870 .skip_settle_delay = true,
2871 .sg_tablesize = SRP_DEF_SG_TABLESIZE,
2872 .can_queue = SRP_DEFAULT_CMD_SQ_SIZE,
2873 .this_id = -1,
2874 .cmd_per_lun = SRP_DEFAULT_CMD_SQ_SIZE,
2875 .use_clustering = ENABLE_CLUSTERING,
2876 .shost_attrs = srp_host_attrs,
2877 .track_queue_depth = 1,
2878 };
2879
2880 static int srp_sdev_count(struct Scsi_Host *host)
2881 {
2882 struct scsi_device *sdev;
2883 int c = 0;
2884
2885 shost_for_each_device(sdev, host)
2886 c++;
2887
2888 return c;
2889 }
2890
2891 /*
2892 * Return values:
2893 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
2894 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
2895 * removal has been scheduled.
2896 * 0 and target->state != SRP_TARGET_REMOVED upon success.
2897 */
2898 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
2899 {
2900 struct srp_rport_identifiers ids;
2901 struct srp_rport *rport;
2902
2903 target->state = SRP_TARGET_SCANNING;
2904 sprintf(target->target_name, "SRP.T10:%016llX",
2905 be64_to_cpu(target->id_ext));
2906
2907 if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dma_device))
2908 return -ENODEV;
2909
2910 memcpy(ids.port_id, &target->id_ext, 8);
2911 memcpy(ids.port_id + 8, &target->ioc_guid, 8);
2912 ids.roles = SRP_RPORT_ROLE_TARGET;
2913 rport = srp_rport_add(target->scsi_host, &ids);
2914 if (IS_ERR(rport)) {
2915 scsi_remove_host(target->scsi_host);
2916 return PTR_ERR(rport);
2917 }
2918
2919 rport->lld_data = target;
2920 target->rport = rport;
2921
2922 spin_lock(&host->target_lock);
2923 list_add_tail(&target->list, &host->target_list);
2924 spin_unlock(&host->target_lock);
2925
2926 scsi_scan_target(&target->scsi_host->shost_gendev,
2927 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
2928
2929 if (srp_connected_ch(target) < target->ch_count ||
2930 target->qp_in_error) {
2931 shost_printk(KERN_INFO, target->scsi_host,
2932 PFX "SCSI scan failed - removing SCSI host\n");
2933 srp_queue_remove_work(target);
2934 goto out;
2935 }
2936
2937 pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
2938 dev_name(&target->scsi_host->shost_gendev),
2939 srp_sdev_count(target->scsi_host));
2940
2941 spin_lock_irq(&target->lock);
2942 if (target->state == SRP_TARGET_SCANNING)
2943 target->state = SRP_TARGET_LIVE;
2944 spin_unlock_irq(&target->lock);
2945
2946 out:
2947 return 0;
2948 }
2949
2950 static void srp_release_dev(struct device *dev)
2951 {
2952 struct srp_host *host =
2953 container_of(dev, struct srp_host, dev);
2954
2955 complete(&host->released);
2956 }
2957
2958 static struct class srp_class = {
2959 .name = "infiniband_srp",
2960 .dev_release = srp_release_dev
2961 };
2962
2963 /**
2964 * srp_conn_unique() - check whether the connection to a target is unique
2965 * @host: SRP host.
2966 * @target: SRP target port.
2967 */
2968 static bool srp_conn_unique(struct srp_host *host,
2969 struct srp_target_port *target)
2970 {
2971 struct srp_target_port *t;
2972 bool ret = false;
2973
2974 if (target->state == SRP_TARGET_REMOVED)
2975 goto out;
2976
2977 ret = true;
2978
2979 spin_lock(&host->target_lock);
2980 list_for_each_entry(t, &host->target_list, list) {
2981 if (t != target &&
2982 target->id_ext == t->id_ext &&
2983 target->ioc_guid == t->ioc_guid &&
2984 target->initiator_ext == t->initiator_ext) {
2985 ret = false;
2986 break;
2987 }
2988 }
2989 spin_unlock(&host->target_lock);
2990
2991 out:
2992 return ret;
2993 }
2994
2995 /*
2996 * Target ports are added by writing
2997 *
2998 * id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
2999 * pkey=<P_Key>,service_id=<service ID>
3000 *
3001 * to the add_target sysfs attribute.
3002 */
3003 enum {
3004 SRP_OPT_ERR = 0,
3005 SRP_OPT_ID_EXT = 1 << 0,
3006 SRP_OPT_IOC_GUID = 1 << 1,
3007 SRP_OPT_DGID = 1 << 2,
3008 SRP_OPT_PKEY = 1 << 3,
3009 SRP_OPT_SERVICE_ID = 1 << 4,
3010 SRP_OPT_MAX_SECT = 1 << 5,
3011 SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3012 SRP_OPT_IO_CLASS = 1 << 7,
3013 SRP_OPT_INITIATOR_EXT = 1 << 8,
3014 SRP_OPT_CMD_SG_ENTRIES = 1 << 9,
3015 SRP_OPT_ALLOW_EXT_SG = 1 << 10,
3016 SRP_OPT_SG_TABLESIZE = 1 << 11,
3017 SRP_OPT_COMP_VECTOR = 1 << 12,
3018 SRP_OPT_TL_RETRY_COUNT = 1 << 13,
3019 SRP_OPT_QUEUE_SIZE = 1 << 14,
3020 SRP_OPT_ALL = (SRP_OPT_ID_EXT |
3021 SRP_OPT_IOC_GUID |
3022 SRP_OPT_DGID |
3023 SRP_OPT_PKEY |
3024 SRP_OPT_SERVICE_ID),
3025 };
3026
3027 static const match_table_t srp_opt_tokens = {
3028 { SRP_OPT_ID_EXT, "id_ext=%s" },
3029 { SRP_OPT_IOC_GUID, "ioc_guid=%s" },
3030 { SRP_OPT_DGID, "dgid=%s" },
3031 { SRP_OPT_PKEY, "pkey=%x" },
3032 { SRP_OPT_SERVICE_ID, "service_id=%s" },
3033 { SRP_OPT_MAX_SECT, "max_sect=%d" },
3034 { SRP_OPT_MAX_CMD_PER_LUN, "max_cmd_per_lun=%d" },
3035 { SRP_OPT_IO_CLASS, "io_class=%x" },
3036 { SRP_OPT_INITIATOR_EXT, "initiator_ext=%s" },
3037 { SRP_OPT_CMD_SG_ENTRIES, "cmd_sg_entries=%u" },
3038 { SRP_OPT_ALLOW_EXT_SG, "allow_ext_sg=%u" },
3039 { SRP_OPT_SG_TABLESIZE, "sg_tablesize=%u" },
3040 { SRP_OPT_COMP_VECTOR, "comp_vector=%u" },
3041 { SRP_OPT_TL_RETRY_COUNT, "tl_retry_count=%u" },
3042 { SRP_OPT_QUEUE_SIZE, "queue_size=%d" },
3043 { SRP_OPT_ERR, NULL }
3044 };
3045
3046 static int srp_parse_options(const char *buf, struct srp_target_port *target)
3047 {
3048 char *options, *sep_opt;
3049 char *p;
3050 char dgid[3];
3051 substring_t args[MAX_OPT_ARGS];
3052 int opt_mask = 0;
3053 int token;
3054 int ret = -EINVAL;
3055 int i;
3056
3057 options = kstrdup(buf, GFP_KERNEL);
3058 if (!options)
3059 return -ENOMEM;
3060
3061 sep_opt = options;
3062 while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3063 if (!*p)
3064 continue;
3065
3066 token = match_token(p, srp_opt_tokens, args);
3067 opt_mask |= token;
3068
3069 switch (token) {
3070 case SRP_OPT_ID_EXT:
3071 p = match_strdup(args);
3072 if (!p) {
3073 ret = -ENOMEM;
3074 goto out;
3075 }
3076 target->id_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3077 kfree(p);
3078 break;
3079
3080 case SRP_OPT_IOC_GUID:
3081 p = match_strdup(args);
3082 if (!p) {
3083 ret = -ENOMEM;
3084 goto out;
3085 }
3086 target->ioc_guid = cpu_to_be64(simple_strtoull(p, NULL, 16));
3087 kfree(p);
3088 break;
3089
3090 case SRP_OPT_DGID:
3091 p = match_strdup(args);
3092 if (!p) {
3093 ret = -ENOMEM;
3094 goto out;
3095 }
3096 if (strlen(p) != 32) {
3097 pr_warn("bad dest GID parameter '%s'\n", p);
3098 kfree(p);
3099 goto out;
3100 }
3101
3102 for (i = 0; i < 16; ++i) {
3103 strlcpy(dgid, p + i * 2, sizeof(dgid));
3104 if (sscanf(dgid, "%hhx",
3105 &target->orig_dgid.raw[i]) < 1) {
3106 ret = -EINVAL;
3107 kfree(p);
3108 goto out;
3109 }
3110 }
3111 kfree(p);
3112 break;
3113
3114 case SRP_OPT_PKEY:
3115 if (match_hex(args, &token)) {
3116 pr_warn("bad P_Key parameter '%s'\n", p);
3117 goto out;
3118 }
3119 target->pkey = cpu_to_be16(token);
3120 break;
3121
3122 case SRP_OPT_SERVICE_ID:
3123 p = match_strdup(args);
3124 if (!p) {
3125 ret = -ENOMEM;
3126 goto out;
3127 }
3128 target->service_id = cpu_to_be64(simple_strtoull(p, NULL, 16));
3129 kfree(p);
3130 break;
3131
3132 case SRP_OPT_MAX_SECT:
3133 if (match_int(args, &token)) {
3134 pr_warn("bad max sect parameter '%s'\n", p);
3135 goto out;
3136 }
3137 target->scsi_host->max_sectors = token;
3138 break;
3139
3140 case SRP_OPT_QUEUE_SIZE:
3141 if (match_int(args, &token) || token < 1) {
3142 pr_warn("bad queue_size parameter '%s'\n", p);
3143 goto out;
3144 }
3145 target->scsi_host->can_queue = token;
3146 target->queue_size = token + SRP_RSP_SQ_SIZE +
3147 SRP_TSK_MGMT_SQ_SIZE;
3148 if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3149 target->scsi_host->cmd_per_lun = token;
3150 break;
3151
3152 case SRP_OPT_MAX_CMD_PER_LUN:
3153 if (match_int(args, &token) || token < 1) {
3154 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3155 p);
3156 goto out;
3157 }
3158 target->scsi_host->cmd_per_lun = token;
3159 break;
3160
3161 case SRP_OPT_IO_CLASS:
3162 if (match_hex(args, &token)) {
3163 pr_warn("bad IO class parameter '%s'\n", p);
3164 goto out;
3165 }
3166 if (token != SRP_REV10_IB_IO_CLASS &&
3167 token != SRP_REV16A_IB_IO_CLASS) {
3168 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3169 token, SRP_REV10_IB_IO_CLASS,
3170 SRP_REV16A_IB_IO_CLASS);
3171 goto out;
3172 }
3173 target->io_class = token;
3174 break;
3175
3176 case SRP_OPT_INITIATOR_EXT:
3177 p = match_strdup(args);
3178 if (!p) {
3179 ret = -ENOMEM;
3180 goto out;
3181 }
3182 target->initiator_ext = cpu_to_be64(simple_strtoull(p, NULL, 16));
3183 kfree(p);
3184 break;
3185
3186 case SRP_OPT_CMD_SG_ENTRIES:
3187 if (match_int(args, &token) || token < 1 || token > 255) {
3188 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3189 p);
3190 goto out;
3191 }
3192 target->cmd_sg_cnt = token;
3193 break;
3194
3195 case SRP_OPT_ALLOW_EXT_SG:
3196 if (match_int(args, &token)) {
3197 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3198 goto out;
3199 }
3200 target->allow_ext_sg = !!token;
3201 break;
3202
3203 case SRP_OPT_SG_TABLESIZE:
3204 if (match_int(args, &token) || token < 1 ||
3205 token > SG_MAX_SEGMENTS) {
3206 pr_warn("bad max sg_tablesize parameter '%s'\n",
3207 p);
3208 goto out;
3209 }
3210 target->sg_tablesize = token;
3211 break;
3212
3213 case SRP_OPT_COMP_VECTOR:
3214 if (match_int(args, &token) || token < 0) {
3215 pr_warn("bad comp_vector parameter '%s'\n", p);
3216 goto out;
3217 }
3218 target->comp_vector = token;
3219 break;
3220
3221 case SRP_OPT_TL_RETRY_COUNT:
3222 if (match_int(args, &token) || token < 2 || token > 7) {
3223 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3224 p);
3225 goto out;
3226 }
3227 target->tl_retry_count = token;
3228 break;
3229
3230 default:
3231 pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3232 p);
3233 goto out;
3234 }
3235 }
3236
3237 if ((opt_mask & SRP_OPT_ALL) == SRP_OPT_ALL)
3238 ret = 0;
3239 else
3240 for (i = 0; i < ARRAY_SIZE(srp_opt_tokens); ++i)
3241 if ((srp_opt_tokens[i].token & SRP_OPT_ALL) &&
3242 !(srp_opt_tokens[i].token & opt_mask))
3243 pr_warn("target creation request is missing parameter '%s'\n",
3244 srp_opt_tokens[i].pattern);
3245
3246 if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3247 && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3248 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3249 target->scsi_host->cmd_per_lun,
3250 target->scsi_host->can_queue);
3251
3252 out:
3253 kfree(options);
3254 return ret;
3255 }
3256
3257 static ssize_t srp_create_target(struct device *dev,
3258 struct device_attribute *attr,
3259 const char *buf, size_t count)
3260 {
3261 struct srp_host *host =
3262 container_of(dev, struct srp_host, dev);
3263 struct Scsi_Host *target_host;
3264 struct srp_target_port *target;
3265 struct srp_rdma_ch *ch;
3266 struct srp_device *srp_dev = host->srp_dev;
3267 struct ib_device *ibdev = srp_dev->dev;
3268 int ret, node_idx, node, cpu, i;
3269 unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3270 bool multich = false;
3271
3272 target_host = scsi_host_alloc(&srp_template,
3273 sizeof (struct srp_target_port));
3274 if (!target_host)
3275 return -ENOMEM;
3276
3277 target_host->transportt = ib_srp_transport_template;
3278 target_host->max_channel = 0;
3279 target_host->max_id = 1;
3280 target_host->max_lun = -1LL;
3281 target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3282
3283 target = host_to_target(target_host);
3284
3285 target->io_class = SRP_REV16A_IB_IO_CLASS;
3286 target->scsi_host = target_host;
3287 target->srp_host = host;
3288 target->pd = host->srp_dev->pd;
3289 target->lkey = host->srp_dev->pd->local_dma_lkey;
3290 target->cmd_sg_cnt = cmd_sg_entries;
3291 target->sg_tablesize = indirect_sg_entries ? : cmd_sg_entries;
3292 target->allow_ext_sg = allow_ext_sg;
3293 target->tl_retry_count = 7;
3294 target->queue_size = SRP_DEFAULT_QUEUE_SIZE;
3295
3296 /*
3297 * Avoid that the SCSI host can be removed by srp_remove_target()
3298 * before this function returns.
3299 */
3300 scsi_host_get(target->scsi_host);
3301
3302 ret = mutex_lock_interruptible(&host->add_target_mutex);
3303 if (ret < 0)
3304 goto put;
3305
3306 ret = srp_parse_options(buf, target);
3307 if (ret)
3308 goto out;
3309
3310 target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3311
3312 if (!srp_conn_unique(target->srp_host, target)) {
3313 shost_printk(KERN_INFO, target->scsi_host,
3314 PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3315 be64_to_cpu(target->id_ext),
3316 be64_to_cpu(target->ioc_guid),
3317 be64_to_cpu(target->initiator_ext));
3318 ret = -EEXIST;
3319 goto out;
3320 }
3321
3322 if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3323 target->cmd_sg_cnt < target->sg_tablesize) {
3324 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3325 target->sg_tablesize = target->cmd_sg_cnt;
3326 }
3327
3328 if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3329 /*
3330 * FR and FMR can only map one HCA page per entry. If the
3331 * start address is not aligned on a HCA page boundary two
3332 * entries will be used for the head and the tail although
3333 * these two entries combined contain at most one HCA page of
3334 * data. Hence the "+ 1" in the calculation below.
3335 *
3336 * The indirect data buffer descriptor is contiguous so the
3337 * memory for that buffer will only be registered if
3338 * register_always is true. Hence add one to mr_per_cmd if
3339 * register_always has been set.
3340 */
3341 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3342 (ilog2(srp_dev->mr_page_size) - 9);
3343 mr_per_cmd = register_always +
3344 (target->scsi_host->max_sectors + 1 +
3345 max_sectors_per_mr - 1) / max_sectors_per_mr;
3346 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3347 target->scsi_host->max_sectors,
3348 srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3349 max_sectors_per_mr, mr_per_cmd);
3350 }
3351
3352 target_host->sg_tablesize = target->sg_tablesize;
3353 target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3354 target->mr_per_cmd = mr_per_cmd;
3355 target->indirect_size = target->sg_tablesize *
3356 sizeof (struct srp_direct_buf);
3357 target->max_iu_len = sizeof (struct srp_cmd) +
3358 sizeof (struct srp_indirect_buf) +
3359 target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3360
3361 INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3362 INIT_WORK(&target->remove_work, srp_remove_work);
3363 spin_lock_init(&target->lock);
3364 ret = ib_query_gid(ibdev, host->port, 0, &target->sgid, NULL);
3365 if (ret)
3366 goto out;
3367
3368 ret = -ENOMEM;
3369 target->ch_count = max_t(unsigned, num_online_nodes(),
3370 min(ch_count ? :
3371 min(4 * num_online_nodes(),
3372 ibdev->num_comp_vectors),
3373 num_online_cpus()));
3374 target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3375 GFP_KERNEL);
3376 if (!target->ch)
3377 goto out;
3378
3379 node_idx = 0;
3380 for_each_online_node(node) {
3381 const int ch_start = (node_idx * target->ch_count /
3382 num_online_nodes());
3383 const int ch_end = ((node_idx + 1) * target->ch_count /
3384 num_online_nodes());
3385 const int cv_start = (node_idx * ibdev->num_comp_vectors /
3386 num_online_nodes() + target->comp_vector)
3387 % ibdev->num_comp_vectors;
3388 const int cv_end = ((node_idx + 1) * ibdev->num_comp_vectors /
3389 num_online_nodes() + target->comp_vector)
3390 % ibdev->num_comp_vectors;
3391 int cpu_idx = 0;
3392
3393 for_each_online_cpu(cpu) {
3394 if (cpu_to_node(cpu) != node)
3395 continue;
3396 if (ch_start + cpu_idx >= ch_end)
3397 continue;
3398 ch = &target->ch[ch_start + cpu_idx];
3399 ch->target = target;
3400 ch->comp_vector = cv_start == cv_end ? cv_start :
3401 cv_start + cpu_idx % (cv_end - cv_start);
3402 spin_lock_init(&ch->lock);
3403 INIT_LIST_HEAD(&ch->free_tx);
3404 ret = srp_new_cm_id(ch);
3405 if (ret)
3406 goto err_disconnect;
3407
3408 ret = srp_create_ch_ib(ch);
3409 if (ret)
3410 goto err_disconnect;
3411
3412 ret = srp_alloc_req_data(ch);
3413 if (ret)
3414 goto err_disconnect;
3415
3416 ret = srp_connect_ch(ch, multich);
3417 if (ret) {
3418 shost_printk(KERN_ERR, target->scsi_host,
3419 PFX "Connection %d/%d failed\n",
3420 ch_start + cpu_idx,
3421 target->ch_count);
3422 if (node_idx == 0 && cpu_idx == 0) {
3423 goto err_disconnect;
3424 } else {
3425 srp_free_ch_ib(target, ch);
3426 srp_free_req_data(target, ch);
3427 target->ch_count = ch - target->ch;
3428 goto connected;
3429 }
3430 }
3431
3432 multich = true;
3433 cpu_idx++;
3434 }
3435 node_idx++;
3436 }
3437
3438 connected:
3439 target->scsi_host->nr_hw_queues = target->ch_count;
3440
3441 ret = srp_add_target(host, target);
3442 if (ret)
3443 goto err_disconnect;
3444
3445 if (target->state != SRP_TARGET_REMOVED) {
3446 shost_printk(KERN_DEBUG, target->scsi_host, PFX
3447 "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3448 be64_to_cpu(target->id_ext),
3449 be64_to_cpu(target->ioc_guid),
3450 be16_to_cpu(target->pkey),
3451 be64_to_cpu(target->service_id),
3452 target->sgid.raw, target->orig_dgid.raw);
3453 }
3454
3455 ret = count;
3456
3457 out:
3458 mutex_unlock(&host->add_target_mutex);
3459
3460 put:
3461 scsi_host_put(target->scsi_host);
3462 if (ret < 0)
3463 scsi_host_put(target->scsi_host);
3464
3465 return ret;
3466
3467 err_disconnect:
3468 srp_disconnect_target(target);
3469
3470 for (i = 0; i < target->ch_count; i++) {
3471 ch = &target->ch[i];
3472 srp_free_ch_ib(target, ch);
3473 srp_free_req_data(target, ch);
3474 }
3475
3476 kfree(target->ch);
3477 goto out;
3478 }
3479
3480 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3481
3482 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3483 char *buf)
3484 {
3485 struct srp_host *host = container_of(dev, struct srp_host, dev);
3486
3487 return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3488 }
3489
3490 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3491
3492 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3493 char *buf)
3494 {
3495 struct srp_host *host = container_of(dev, struct srp_host, dev);
3496
3497 return sprintf(buf, "%d\n", host->port);
3498 }
3499
3500 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3501
3502 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3503 {
3504 struct srp_host *host;
3505
3506 host = kzalloc(sizeof *host, GFP_KERNEL);
3507 if (!host)
3508 return NULL;
3509
3510 INIT_LIST_HEAD(&host->target_list);
3511 spin_lock_init(&host->target_lock);
3512 init_completion(&host->released);
3513 mutex_init(&host->add_target_mutex);
3514 host->srp_dev = device;
3515 host->port = port;
3516
3517 host->dev.class = &srp_class;
3518 host->dev.parent = device->dev->dma_device;
3519 dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
3520
3521 if (device_register(&host->dev))
3522 goto free_host;
3523 if (device_create_file(&host->dev, &dev_attr_add_target))
3524 goto err_class;
3525 if (device_create_file(&host->dev, &dev_attr_ibdev))
3526 goto err_class;
3527 if (device_create_file(&host->dev, &dev_attr_port))
3528 goto err_class;
3529
3530 return host;
3531
3532 err_class:
3533 device_unregister(&host->dev);
3534
3535 free_host:
3536 kfree(host);
3537
3538 return NULL;
3539 }
3540
3541 static void srp_add_one(struct ib_device *device)
3542 {
3543 struct srp_device *srp_dev;
3544 struct ib_device_attr *attr = &device->attrs;
3545 struct srp_host *host;
3546 int mr_page_shift, p;
3547 u64 max_pages_per_mr;
3548 unsigned int flags = 0;
3549
3550 srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3551 if (!srp_dev)
3552 return;
3553
3554 /*
3555 * Use the smallest page size supported by the HCA, down to a
3556 * minimum of 4096 bytes. We're unlikely to build large sglists
3557 * out of smaller entries.
3558 */
3559 mr_page_shift = max(12, ffs(attr->page_size_cap) - 1);
3560 srp_dev->mr_page_size = 1 << mr_page_shift;
3561 srp_dev->mr_page_mask = ~((u64) srp_dev->mr_page_size - 1);
3562 max_pages_per_mr = attr->max_mr_size;
3563 do_div(max_pages_per_mr, srp_dev->mr_page_size);
3564 pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3565 attr->max_mr_size, srp_dev->mr_page_size,
3566 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
3567 srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
3568 max_pages_per_mr);
3569
3570 srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
3571 device->map_phys_fmr && device->unmap_fmr);
3572 srp_dev->has_fr = (attr->device_cap_flags &
3573 IB_DEVICE_MEM_MGT_EXTENSIONS);
3574 if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
3575 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
3576 } else if (!never_register &&
3577 attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
3578 srp_dev->use_fast_reg = (srp_dev->has_fr &&
3579 (!srp_dev->has_fmr || prefer_fr));
3580 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
3581 }
3582
3583 if (never_register || !register_always ||
3584 (!srp_dev->has_fmr && !srp_dev->has_fr))
3585 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
3586
3587 if (srp_dev->use_fast_reg) {
3588 srp_dev->max_pages_per_mr =
3589 min_t(u32, srp_dev->max_pages_per_mr,
3590 attr->max_fast_reg_page_list_len);
3591 }
3592 srp_dev->mr_max_size = srp_dev->mr_page_size *
3593 srp_dev->max_pages_per_mr;
3594 pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
3595 device->name, mr_page_shift, attr->max_mr_size,
3596 attr->max_fast_reg_page_list_len,
3597 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
3598
3599 INIT_LIST_HEAD(&srp_dev->dev_list);
3600
3601 srp_dev->dev = device;
3602 srp_dev->pd = ib_alloc_pd(device, flags);
3603 if (IS_ERR(srp_dev->pd))
3604 goto free_dev;
3605
3606
3607 for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
3608 host = srp_add_port(srp_dev, p);
3609 if (host)
3610 list_add_tail(&host->list, &srp_dev->dev_list);
3611 }
3612
3613 ib_set_client_data(device, &srp_client, srp_dev);
3614 return;
3615
3616 free_dev:
3617 kfree(srp_dev);
3618 }
3619
3620 static void srp_remove_one(struct ib_device *device, void *client_data)
3621 {
3622 struct srp_device *srp_dev;
3623 struct srp_host *host, *tmp_host;
3624 struct srp_target_port *target;
3625
3626 srp_dev = client_data;
3627 if (!srp_dev)
3628 return;
3629
3630 list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3631 device_unregister(&host->dev);
3632 /*
3633 * Wait for the sysfs entry to go away, so that no new
3634 * target ports can be created.
3635 */
3636 wait_for_completion(&host->released);
3637
3638 /*
3639 * Remove all target ports.
3640 */
3641 spin_lock(&host->target_lock);
3642 list_for_each_entry(target, &host->target_list, list)
3643 srp_queue_remove_work(target);
3644 spin_unlock(&host->target_lock);
3645
3646 /*
3647 * Wait for tl_err and target port removal tasks.
3648 */
3649 flush_workqueue(system_long_wq);
3650 flush_workqueue(srp_remove_wq);
3651
3652 kfree(host);
3653 }
3654
3655 ib_dealloc_pd(srp_dev->pd);
3656
3657 kfree(srp_dev);
3658 }
3659
3660 static struct srp_function_template ib_srp_transport_functions = {
3661 .has_rport_state = true,
3662 .reset_timer_if_blocked = true,
3663 .reconnect_delay = &srp_reconnect_delay,
3664 .fast_io_fail_tmo = &srp_fast_io_fail_tmo,
3665 .dev_loss_tmo = &srp_dev_loss_tmo,
3666 .reconnect = srp_rport_reconnect,
3667 .rport_delete = srp_rport_delete,
3668 .terminate_rport_io = srp_terminate_io,
3669 };
3670
3671 static int __init srp_init_module(void)
3672 {
3673 int ret;
3674
3675 if (srp_sg_tablesize) {
3676 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
3677 if (!cmd_sg_entries)
3678 cmd_sg_entries = srp_sg_tablesize;
3679 }
3680
3681 if (!cmd_sg_entries)
3682 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
3683
3684 if (cmd_sg_entries > 255) {
3685 pr_warn("Clamping cmd_sg_entries to 255\n");
3686 cmd_sg_entries = 255;
3687 }
3688
3689 if (!indirect_sg_entries)
3690 indirect_sg_entries = cmd_sg_entries;
3691 else if (indirect_sg_entries < cmd_sg_entries) {
3692 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
3693 cmd_sg_entries);
3694 indirect_sg_entries = cmd_sg_entries;
3695 }
3696
3697 srp_remove_wq = create_workqueue("srp_remove");
3698 if (!srp_remove_wq) {
3699 ret = -ENOMEM;
3700 goto out;
3701 }
3702
3703 ret = -ENOMEM;
3704 ib_srp_transport_template =
3705 srp_attach_transport(&ib_srp_transport_functions);
3706 if (!ib_srp_transport_template)
3707 goto destroy_wq;
3708
3709 ret = class_register(&srp_class);
3710 if (ret) {
3711 pr_err("couldn't register class infiniband_srp\n");
3712 goto release_tr;
3713 }
3714
3715 ib_sa_register_client(&srp_sa_client);
3716
3717 ret = ib_register_client(&srp_client);
3718 if (ret) {
3719 pr_err("couldn't register IB client\n");
3720 goto unreg_sa;
3721 }
3722
3723 out:
3724 return ret;
3725
3726 unreg_sa:
3727 ib_sa_unregister_client(&srp_sa_client);
3728 class_unregister(&srp_class);
3729
3730 release_tr:
3731 srp_release_transport(ib_srp_transport_template);
3732
3733 destroy_wq:
3734 destroy_workqueue(srp_remove_wq);
3735 goto out;
3736 }
3737
3738 static void __exit srp_cleanup_module(void)
3739 {
3740 ib_unregister_client(&srp_client);
3741 ib_sa_unregister_client(&srp_sa_client);
3742 class_unregister(&srp_class);
3743 srp_release_transport(ib_srp_transport_template);
3744 destroy_workqueue(srp_remove_wq);
3745 }
3746
3747 module_init(srp_init_module);
3748 module_exit(srp_cleanup_module);