]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blob - drivers/staging/rdma/ipath/ipath_verbs.c
ASoC: pcm3168a: remove unused variable
[mirror_ubuntu-zesty-kernel.git] / drivers / staging / rdma / ipath / ipath_verbs.c
1 /*
2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #include <rdma/ib_mad.h>
35 #include <rdma/ib_user_verbs.h>
36 #include <linux/io.h>
37 #include <linux/slab.h>
38 #include <linux/module.h>
39 #include <linux/utsname.h>
40 #include <linux/rculist.h>
41
42 #include "ipath_kernel.h"
43 #include "ipath_verbs.h"
44 #include "ipath_common.h"
45
46 static unsigned int ib_ipath_qp_table_size = 251;
47 module_param_named(qp_table_size, ib_ipath_qp_table_size, uint, S_IRUGO);
48 MODULE_PARM_DESC(qp_table_size, "QP table size");
49
50 unsigned int ib_ipath_lkey_table_size = 12;
51 module_param_named(lkey_table_size, ib_ipath_lkey_table_size, uint,
52 S_IRUGO);
53 MODULE_PARM_DESC(lkey_table_size,
54 "LKEY table size in bits (2^n, 1 <= n <= 23)");
55
56 static unsigned int ib_ipath_max_pds = 0xFFFF;
57 module_param_named(max_pds, ib_ipath_max_pds, uint, S_IWUSR | S_IRUGO);
58 MODULE_PARM_DESC(max_pds,
59 "Maximum number of protection domains to support");
60
61 static unsigned int ib_ipath_max_ahs = 0xFFFF;
62 module_param_named(max_ahs, ib_ipath_max_ahs, uint, S_IWUSR | S_IRUGO);
63 MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
64
65 unsigned int ib_ipath_max_cqes = 0x2FFFF;
66 module_param_named(max_cqes, ib_ipath_max_cqes, uint, S_IWUSR | S_IRUGO);
67 MODULE_PARM_DESC(max_cqes,
68 "Maximum number of completion queue entries to support");
69
70 unsigned int ib_ipath_max_cqs = 0x1FFFF;
71 module_param_named(max_cqs, ib_ipath_max_cqs, uint, S_IWUSR | S_IRUGO);
72 MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
73
74 unsigned int ib_ipath_max_qp_wrs = 0x3FFF;
75 module_param_named(max_qp_wrs, ib_ipath_max_qp_wrs, uint,
76 S_IWUSR | S_IRUGO);
77 MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
78
79 unsigned int ib_ipath_max_qps = 16384;
80 module_param_named(max_qps, ib_ipath_max_qps, uint, S_IWUSR | S_IRUGO);
81 MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
82
83 unsigned int ib_ipath_max_sges = 0x60;
84 module_param_named(max_sges, ib_ipath_max_sges, uint, S_IWUSR | S_IRUGO);
85 MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
86
87 unsigned int ib_ipath_max_mcast_grps = 16384;
88 module_param_named(max_mcast_grps, ib_ipath_max_mcast_grps, uint,
89 S_IWUSR | S_IRUGO);
90 MODULE_PARM_DESC(max_mcast_grps,
91 "Maximum number of multicast groups to support");
92
93 unsigned int ib_ipath_max_mcast_qp_attached = 16;
94 module_param_named(max_mcast_qp_attached, ib_ipath_max_mcast_qp_attached,
95 uint, S_IWUSR | S_IRUGO);
96 MODULE_PARM_DESC(max_mcast_qp_attached,
97 "Maximum number of attached QPs to support");
98
99 unsigned int ib_ipath_max_srqs = 1024;
100 module_param_named(max_srqs, ib_ipath_max_srqs, uint, S_IWUSR | S_IRUGO);
101 MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
102
103 unsigned int ib_ipath_max_srq_sges = 128;
104 module_param_named(max_srq_sges, ib_ipath_max_srq_sges,
105 uint, S_IWUSR | S_IRUGO);
106 MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
107
108 unsigned int ib_ipath_max_srq_wrs = 0x1FFFF;
109 module_param_named(max_srq_wrs, ib_ipath_max_srq_wrs,
110 uint, S_IWUSR | S_IRUGO);
111 MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
112
113 static unsigned int ib_ipath_disable_sma;
114 module_param_named(disable_sma, ib_ipath_disable_sma, uint, S_IWUSR | S_IRUGO);
115 MODULE_PARM_DESC(disable_sma, "Disable the SMA");
116
117 /*
118 * Note that it is OK to post send work requests in the SQE and ERR
119 * states; ipath_do_send() will process them and generate error
120 * completions as per IB 1.2 C10-96.
121 */
122 const int ib_ipath_state_ops[IB_QPS_ERR + 1] = {
123 [IB_QPS_RESET] = 0,
124 [IB_QPS_INIT] = IPATH_POST_RECV_OK,
125 [IB_QPS_RTR] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK,
126 [IB_QPS_RTS] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
127 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK |
128 IPATH_PROCESS_NEXT_SEND_OK,
129 [IB_QPS_SQD] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
130 IPATH_POST_SEND_OK | IPATH_PROCESS_SEND_OK,
131 [IB_QPS_SQE] = IPATH_POST_RECV_OK | IPATH_PROCESS_RECV_OK |
132 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
133 [IB_QPS_ERR] = IPATH_POST_RECV_OK | IPATH_FLUSH_RECV |
134 IPATH_POST_SEND_OK | IPATH_FLUSH_SEND,
135 };
136
137 struct ipath_ucontext {
138 struct ib_ucontext ibucontext;
139 };
140
141 static inline struct ipath_ucontext *to_iucontext(struct ib_ucontext
142 *ibucontext)
143 {
144 return container_of(ibucontext, struct ipath_ucontext, ibucontext);
145 }
146
147 /*
148 * Translate ib_wr_opcode into ib_wc_opcode.
149 */
150 const enum ib_wc_opcode ib_ipath_wc_opcode[] = {
151 [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
152 [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
153 [IB_WR_SEND] = IB_WC_SEND,
154 [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
155 [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
156 [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
157 [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
158 };
159
160 /*
161 * System image GUID.
162 */
163 static __be64 sys_image_guid;
164
165 /**
166 * ipath_copy_sge - copy data to SGE memory
167 * @ss: the SGE state
168 * @data: the data to copy
169 * @length: the length of the data
170 */
171 void ipath_copy_sge(struct ipath_sge_state *ss, void *data, u32 length)
172 {
173 struct ipath_sge *sge = &ss->sge;
174
175 while (length) {
176 u32 len = sge->length;
177
178 if (len > length)
179 len = length;
180 if (len > sge->sge_length)
181 len = sge->sge_length;
182 BUG_ON(len == 0);
183 memcpy(sge->vaddr, data, len);
184 sge->vaddr += len;
185 sge->length -= len;
186 sge->sge_length -= len;
187 if (sge->sge_length == 0) {
188 if (--ss->num_sge)
189 *sge = *ss->sg_list++;
190 } else if (sge->length == 0 && sge->mr != NULL) {
191 if (++sge->n >= IPATH_SEGSZ) {
192 if (++sge->m >= sge->mr->mapsz)
193 break;
194 sge->n = 0;
195 }
196 sge->vaddr =
197 sge->mr->map[sge->m]->segs[sge->n].vaddr;
198 sge->length =
199 sge->mr->map[sge->m]->segs[sge->n].length;
200 }
201 data += len;
202 length -= len;
203 }
204 }
205
206 /**
207 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
208 * @ss: the SGE state
209 * @length: the number of bytes to skip
210 */
211 void ipath_skip_sge(struct ipath_sge_state *ss, u32 length)
212 {
213 struct ipath_sge *sge = &ss->sge;
214
215 while (length) {
216 u32 len = sge->length;
217
218 if (len > length)
219 len = length;
220 if (len > sge->sge_length)
221 len = sge->sge_length;
222 BUG_ON(len == 0);
223 sge->vaddr += len;
224 sge->length -= len;
225 sge->sge_length -= len;
226 if (sge->sge_length == 0) {
227 if (--ss->num_sge)
228 *sge = *ss->sg_list++;
229 } else if (sge->length == 0 && sge->mr != NULL) {
230 if (++sge->n >= IPATH_SEGSZ) {
231 if (++sge->m >= sge->mr->mapsz)
232 break;
233 sge->n = 0;
234 }
235 sge->vaddr =
236 sge->mr->map[sge->m]->segs[sge->n].vaddr;
237 sge->length =
238 sge->mr->map[sge->m]->segs[sge->n].length;
239 }
240 length -= len;
241 }
242 }
243
244 /*
245 * Count the number of DMA descriptors needed to send length bytes of data.
246 * Don't modify the ipath_sge_state to get the count.
247 * Return zero if any of the segments is not aligned.
248 */
249 static u32 ipath_count_sge(struct ipath_sge_state *ss, u32 length)
250 {
251 struct ipath_sge *sg_list = ss->sg_list;
252 struct ipath_sge sge = ss->sge;
253 u8 num_sge = ss->num_sge;
254 u32 ndesc = 1; /* count the header */
255
256 while (length) {
257 u32 len = sge.length;
258
259 if (len > length)
260 len = length;
261 if (len > sge.sge_length)
262 len = sge.sge_length;
263 BUG_ON(len == 0);
264 if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
265 (len != length && (len & (sizeof(u32) - 1)))) {
266 ndesc = 0;
267 break;
268 }
269 ndesc++;
270 sge.vaddr += len;
271 sge.length -= len;
272 sge.sge_length -= len;
273 if (sge.sge_length == 0) {
274 if (--num_sge)
275 sge = *sg_list++;
276 } else if (sge.length == 0 && sge.mr != NULL) {
277 if (++sge.n >= IPATH_SEGSZ) {
278 if (++sge.m >= sge.mr->mapsz)
279 break;
280 sge.n = 0;
281 }
282 sge.vaddr =
283 sge.mr->map[sge.m]->segs[sge.n].vaddr;
284 sge.length =
285 sge.mr->map[sge.m]->segs[sge.n].length;
286 }
287 length -= len;
288 }
289 return ndesc;
290 }
291
292 /*
293 * Copy from the SGEs to the data buffer.
294 */
295 static void ipath_copy_from_sge(void *data, struct ipath_sge_state *ss,
296 u32 length)
297 {
298 struct ipath_sge *sge = &ss->sge;
299
300 while (length) {
301 u32 len = sge->length;
302
303 if (len > length)
304 len = length;
305 if (len > sge->sge_length)
306 len = sge->sge_length;
307 BUG_ON(len == 0);
308 memcpy(data, sge->vaddr, len);
309 sge->vaddr += len;
310 sge->length -= len;
311 sge->sge_length -= len;
312 if (sge->sge_length == 0) {
313 if (--ss->num_sge)
314 *sge = *ss->sg_list++;
315 } else if (sge->length == 0 && sge->mr != NULL) {
316 if (++sge->n >= IPATH_SEGSZ) {
317 if (++sge->m >= sge->mr->mapsz)
318 break;
319 sge->n = 0;
320 }
321 sge->vaddr =
322 sge->mr->map[sge->m]->segs[sge->n].vaddr;
323 sge->length =
324 sge->mr->map[sge->m]->segs[sge->n].length;
325 }
326 data += len;
327 length -= len;
328 }
329 }
330
331 /**
332 * ipath_post_one_send - post one RC, UC, or UD send work request
333 * @qp: the QP to post on
334 * @wr: the work request to send
335 */
336 static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
337 {
338 struct ipath_swqe *wqe;
339 u32 next;
340 int i;
341 int j;
342 int acc;
343 int ret;
344 unsigned long flags;
345 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
346
347 spin_lock_irqsave(&qp->s_lock, flags);
348
349 if (qp->ibqp.qp_type != IB_QPT_SMI &&
350 !(dd->ipath_flags & IPATH_LINKACTIVE)) {
351 ret = -ENETDOWN;
352 goto bail;
353 }
354
355 /* Check that state is OK to post send. */
356 if (unlikely(!(ib_ipath_state_ops[qp->state] & IPATH_POST_SEND_OK)))
357 goto bail_inval;
358
359 /* IB spec says that num_sge == 0 is OK. */
360 if (wr->num_sge > qp->s_max_sge)
361 goto bail_inval;
362
363 /*
364 * Don't allow RDMA reads or atomic operations on UC or
365 * undefined operations.
366 * Make sure buffer is large enough to hold the result for atomics.
367 */
368 if (qp->ibqp.qp_type == IB_QPT_UC) {
369 if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
370 goto bail_inval;
371 } else if (qp->ibqp.qp_type == IB_QPT_UD) {
372 /* Check UD opcode */
373 if (wr->opcode != IB_WR_SEND &&
374 wr->opcode != IB_WR_SEND_WITH_IMM)
375 goto bail_inval;
376 /* Check UD destination address PD */
377 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
378 goto bail_inval;
379 } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
380 goto bail_inval;
381 else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
382 (wr->num_sge == 0 ||
383 wr->sg_list[0].length < sizeof(u64) ||
384 wr->sg_list[0].addr & (sizeof(u64) - 1)))
385 goto bail_inval;
386 else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
387 goto bail_inval;
388
389 next = qp->s_head + 1;
390 if (next >= qp->s_size)
391 next = 0;
392 if (next == qp->s_last) {
393 ret = -ENOMEM;
394 goto bail;
395 }
396
397 wqe = get_swqe_ptr(qp, qp->s_head);
398
399 if (qp->ibqp.qp_type != IB_QPT_UC &&
400 qp->ibqp.qp_type != IB_QPT_RC)
401 memcpy(&wqe->ud_wr, ud_wr(wr), sizeof(wqe->ud_wr));
402 else if (wr->opcode == IB_WR_RDMA_WRITE_WITH_IMM ||
403 wr->opcode == IB_WR_RDMA_WRITE ||
404 wr->opcode == IB_WR_RDMA_READ)
405 memcpy(&wqe->rdma_wr, rdma_wr(wr), sizeof(wqe->rdma_wr));
406 else if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
407 wr->opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
408 memcpy(&wqe->atomic_wr, atomic_wr(wr), sizeof(wqe->atomic_wr));
409 else
410 memcpy(&wqe->wr, wr, sizeof(wqe->wr));
411
412 wqe->length = 0;
413 if (wr->num_sge) {
414 acc = wr->opcode >= IB_WR_RDMA_READ ?
415 IB_ACCESS_LOCAL_WRITE : 0;
416 for (i = 0, j = 0; i < wr->num_sge; i++) {
417 u32 length = wr->sg_list[i].length;
418 int ok;
419
420 if (length == 0)
421 continue;
422 ok = ipath_lkey_ok(qp, &wqe->sg_list[j],
423 &wr->sg_list[i], acc);
424 if (!ok)
425 goto bail_inval;
426 wqe->length += length;
427 j++;
428 }
429 wqe->wr.num_sge = j;
430 }
431 if (qp->ibqp.qp_type == IB_QPT_UC ||
432 qp->ibqp.qp_type == IB_QPT_RC) {
433 if (wqe->length > 0x80000000U)
434 goto bail_inval;
435 } else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
436 goto bail_inval;
437 wqe->ssn = qp->s_ssn++;
438 qp->s_head = next;
439
440 ret = 0;
441 goto bail;
442
443 bail_inval:
444 ret = -EINVAL;
445 bail:
446 spin_unlock_irqrestore(&qp->s_lock, flags);
447 return ret;
448 }
449
450 /**
451 * ipath_post_send - post a send on a QP
452 * @ibqp: the QP to post the send on
453 * @wr: the list of work requests to post
454 * @bad_wr: the first bad WR is put here
455 *
456 * This may be called from interrupt context.
457 */
458 static int ipath_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
459 struct ib_send_wr **bad_wr)
460 {
461 struct ipath_qp *qp = to_iqp(ibqp);
462 int err = 0;
463
464 for (; wr; wr = wr->next) {
465 err = ipath_post_one_send(qp, wr);
466 if (err) {
467 *bad_wr = wr;
468 goto bail;
469 }
470 }
471
472 /* Try to do the send work in the caller's context. */
473 ipath_do_send((unsigned long) qp);
474
475 bail:
476 return err;
477 }
478
479 /**
480 * ipath_post_receive - post a receive on a QP
481 * @ibqp: the QP to post the receive on
482 * @wr: the WR to post
483 * @bad_wr: the first bad WR is put here
484 *
485 * This may be called from interrupt context.
486 */
487 static int ipath_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
488 struct ib_recv_wr **bad_wr)
489 {
490 struct ipath_qp *qp = to_iqp(ibqp);
491 struct ipath_rwq *wq = qp->r_rq.wq;
492 unsigned long flags;
493 int ret;
494
495 /* Check that state is OK to post receive. */
496 if (!(ib_ipath_state_ops[qp->state] & IPATH_POST_RECV_OK) || !wq) {
497 *bad_wr = wr;
498 ret = -EINVAL;
499 goto bail;
500 }
501
502 for (; wr; wr = wr->next) {
503 struct ipath_rwqe *wqe;
504 u32 next;
505 int i;
506
507 if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
508 *bad_wr = wr;
509 ret = -EINVAL;
510 goto bail;
511 }
512
513 spin_lock_irqsave(&qp->r_rq.lock, flags);
514 next = wq->head + 1;
515 if (next >= qp->r_rq.size)
516 next = 0;
517 if (next == wq->tail) {
518 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
519 *bad_wr = wr;
520 ret = -ENOMEM;
521 goto bail;
522 }
523
524 wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
525 wqe->wr_id = wr->wr_id;
526 wqe->num_sge = wr->num_sge;
527 for (i = 0; i < wr->num_sge; i++)
528 wqe->sg_list[i] = wr->sg_list[i];
529 /* Make sure queue entry is written before the head index. */
530 smp_wmb();
531 wq->head = next;
532 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
533 }
534 ret = 0;
535
536 bail:
537 return ret;
538 }
539
540 /**
541 * ipath_qp_rcv - processing an incoming packet on a QP
542 * @dev: the device the packet came on
543 * @hdr: the packet header
544 * @has_grh: true if the packet has a GRH
545 * @data: the packet data
546 * @tlen: the packet length
547 * @qp: the QP the packet came on
548 *
549 * This is called from ipath_ib_rcv() to process an incoming packet
550 * for the given QP.
551 * Called at interrupt level.
552 */
553 static void ipath_qp_rcv(struct ipath_ibdev *dev,
554 struct ipath_ib_header *hdr, int has_grh,
555 void *data, u32 tlen, struct ipath_qp *qp)
556 {
557 /* Check for valid receive state. */
558 if (!(ib_ipath_state_ops[qp->state] & IPATH_PROCESS_RECV_OK)) {
559 dev->n_pkt_drops++;
560 return;
561 }
562
563 switch (qp->ibqp.qp_type) {
564 case IB_QPT_SMI:
565 case IB_QPT_GSI:
566 if (ib_ipath_disable_sma)
567 break;
568 /* FALLTHROUGH */
569 case IB_QPT_UD:
570 ipath_ud_rcv(dev, hdr, has_grh, data, tlen, qp);
571 break;
572
573 case IB_QPT_RC:
574 ipath_rc_rcv(dev, hdr, has_grh, data, tlen, qp);
575 break;
576
577 case IB_QPT_UC:
578 ipath_uc_rcv(dev, hdr, has_grh, data, tlen, qp);
579 break;
580
581 default:
582 break;
583 }
584 }
585
586 /**
587 * ipath_ib_rcv - process an incoming packet
588 * @arg: the device pointer
589 * @rhdr: the header of the packet
590 * @data: the packet data
591 * @tlen: the packet length
592 *
593 * This is called from ipath_kreceive() to process an incoming packet at
594 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
595 */
596 void ipath_ib_rcv(struct ipath_ibdev *dev, void *rhdr, void *data,
597 u32 tlen)
598 {
599 struct ipath_ib_header *hdr = rhdr;
600 struct ipath_other_headers *ohdr;
601 struct ipath_qp *qp;
602 u32 qp_num;
603 int lnh;
604 u8 opcode;
605 u16 lid;
606
607 if (unlikely(dev == NULL))
608 goto bail;
609
610 if (unlikely(tlen < 24)) { /* LRH+BTH+CRC */
611 dev->rcv_errors++;
612 goto bail;
613 }
614
615 /* Check for a valid destination LID (see ch. 7.11.1). */
616 lid = be16_to_cpu(hdr->lrh[1]);
617 if (lid < IPATH_MULTICAST_LID_BASE) {
618 lid &= ~((1 << dev->dd->ipath_lmc) - 1);
619 if (unlikely(lid != dev->dd->ipath_lid)) {
620 dev->rcv_errors++;
621 goto bail;
622 }
623 }
624
625 /* Check for GRH */
626 lnh = be16_to_cpu(hdr->lrh[0]) & 3;
627 if (lnh == IPATH_LRH_BTH)
628 ohdr = &hdr->u.oth;
629 else if (lnh == IPATH_LRH_GRH)
630 ohdr = &hdr->u.l.oth;
631 else {
632 dev->rcv_errors++;
633 goto bail;
634 }
635
636 opcode = (be32_to_cpu(ohdr->bth[0]) >> 24) & 0x7f;
637 dev->opstats[opcode].n_bytes += tlen;
638 dev->opstats[opcode].n_packets++;
639
640 /* Get the destination QP number. */
641 qp_num = be32_to_cpu(ohdr->bth[1]) & IPATH_QPN_MASK;
642 if (qp_num == IPATH_MULTICAST_QPN) {
643 struct ipath_mcast *mcast;
644 struct ipath_mcast_qp *p;
645
646 if (lnh != IPATH_LRH_GRH) {
647 dev->n_pkt_drops++;
648 goto bail;
649 }
650 mcast = ipath_mcast_find(&hdr->u.l.grh.dgid);
651 if (mcast == NULL) {
652 dev->n_pkt_drops++;
653 goto bail;
654 }
655 dev->n_multicast_rcv++;
656 list_for_each_entry_rcu(p, &mcast->qp_list, list)
657 ipath_qp_rcv(dev, hdr, 1, data, tlen, p->qp);
658 /*
659 * Notify ipath_multicast_detach() if it is waiting for us
660 * to finish.
661 */
662 if (atomic_dec_return(&mcast->refcount) <= 1)
663 wake_up(&mcast->wait);
664 } else {
665 qp = ipath_lookup_qpn(&dev->qp_table, qp_num);
666 if (qp) {
667 dev->n_unicast_rcv++;
668 ipath_qp_rcv(dev, hdr, lnh == IPATH_LRH_GRH, data,
669 tlen, qp);
670 /*
671 * Notify ipath_destroy_qp() if it is waiting
672 * for us to finish.
673 */
674 if (atomic_dec_and_test(&qp->refcount))
675 wake_up(&qp->wait);
676 } else
677 dev->n_pkt_drops++;
678 }
679
680 bail:;
681 }
682
683 /**
684 * ipath_ib_timer - verbs timer
685 * @arg: the device pointer
686 *
687 * This is called from ipath_do_rcv_timer() at interrupt level to check for
688 * QPs which need retransmits and to collect performance numbers.
689 */
690 static void ipath_ib_timer(struct ipath_ibdev *dev)
691 {
692 struct ipath_qp *resend = NULL;
693 struct ipath_qp *rnr = NULL;
694 struct list_head *last;
695 struct ipath_qp *qp;
696 unsigned long flags;
697
698 if (dev == NULL)
699 return;
700
701 spin_lock_irqsave(&dev->pending_lock, flags);
702 /* Start filling the next pending queue. */
703 if (++dev->pending_index >= ARRAY_SIZE(dev->pending))
704 dev->pending_index = 0;
705 /* Save any requests still in the new queue, they have timed out. */
706 last = &dev->pending[dev->pending_index];
707 while (!list_empty(last)) {
708 qp = list_entry(last->next, struct ipath_qp, timerwait);
709 list_del_init(&qp->timerwait);
710 qp->timer_next = resend;
711 resend = qp;
712 atomic_inc(&qp->refcount);
713 }
714 last = &dev->rnrwait;
715 if (!list_empty(last)) {
716 qp = list_entry(last->next, struct ipath_qp, timerwait);
717 if (--qp->s_rnr_timeout == 0) {
718 do {
719 list_del_init(&qp->timerwait);
720 qp->timer_next = rnr;
721 rnr = qp;
722 atomic_inc(&qp->refcount);
723 if (list_empty(last))
724 break;
725 qp = list_entry(last->next, struct ipath_qp,
726 timerwait);
727 } while (qp->s_rnr_timeout == 0);
728 }
729 }
730 /*
731 * We should only be in the started state if pma_sample_start != 0
732 */
733 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_STARTED &&
734 --dev->pma_sample_start == 0) {
735 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_RUNNING;
736 ipath_snapshot_counters(dev->dd, &dev->ipath_sword,
737 &dev->ipath_rword,
738 &dev->ipath_spkts,
739 &dev->ipath_rpkts,
740 &dev->ipath_xmit_wait);
741 }
742 if (dev->pma_sample_status == IB_PMA_SAMPLE_STATUS_RUNNING) {
743 if (dev->pma_sample_interval == 0) {
744 u64 ta, tb, tc, td, te;
745
746 dev->pma_sample_status = IB_PMA_SAMPLE_STATUS_DONE;
747 ipath_snapshot_counters(dev->dd, &ta, &tb,
748 &tc, &td, &te);
749
750 dev->ipath_sword = ta - dev->ipath_sword;
751 dev->ipath_rword = tb - dev->ipath_rword;
752 dev->ipath_spkts = tc - dev->ipath_spkts;
753 dev->ipath_rpkts = td - dev->ipath_rpkts;
754 dev->ipath_xmit_wait = te - dev->ipath_xmit_wait;
755 } else {
756 dev->pma_sample_interval--;
757 }
758 }
759 spin_unlock_irqrestore(&dev->pending_lock, flags);
760
761 /* XXX What if timer fires again while this is running? */
762 while (resend != NULL) {
763 qp = resend;
764 resend = qp->timer_next;
765
766 spin_lock_irqsave(&qp->s_lock, flags);
767 if (qp->s_last != qp->s_tail &&
768 ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK) {
769 dev->n_timeouts++;
770 ipath_restart_rc(qp, qp->s_last_psn + 1);
771 }
772 spin_unlock_irqrestore(&qp->s_lock, flags);
773
774 /* Notify ipath_destroy_qp() if it is waiting. */
775 if (atomic_dec_and_test(&qp->refcount))
776 wake_up(&qp->wait);
777 }
778 while (rnr != NULL) {
779 qp = rnr;
780 rnr = qp->timer_next;
781
782 spin_lock_irqsave(&qp->s_lock, flags);
783 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
784 ipath_schedule_send(qp);
785 spin_unlock_irqrestore(&qp->s_lock, flags);
786
787 /* Notify ipath_destroy_qp() if it is waiting. */
788 if (atomic_dec_and_test(&qp->refcount))
789 wake_up(&qp->wait);
790 }
791 }
792
793 static void update_sge(struct ipath_sge_state *ss, u32 length)
794 {
795 struct ipath_sge *sge = &ss->sge;
796
797 sge->vaddr += length;
798 sge->length -= length;
799 sge->sge_length -= length;
800 if (sge->sge_length == 0) {
801 if (--ss->num_sge)
802 *sge = *ss->sg_list++;
803 } else if (sge->length == 0 && sge->mr != NULL) {
804 if (++sge->n >= IPATH_SEGSZ) {
805 if (++sge->m >= sge->mr->mapsz)
806 return;
807 sge->n = 0;
808 }
809 sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
810 sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
811 }
812 }
813
814 #ifdef __LITTLE_ENDIAN
815 static inline u32 get_upper_bits(u32 data, u32 shift)
816 {
817 return data >> shift;
818 }
819
820 static inline u32 set_upper_bits(u32 data, u32 shift)
821 {
822 return data << shift;
823 }
824
825 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
826 {
827 data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
828 data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
829 return data;
830 }
831 #else
832 static inline u32 get_upper_bits(u32 data, u32 shift)
833 {
834 return data << shift;
835 }
836
837 static inline u32 set_upper_bits(u32 data, u32 shift)
838 {
839 return data >> shift;
840 }
841
842 static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
843 {
844 data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
845 data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
846 return data;
847 }
848 #endif
849
850 static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
851 u32 length, unsigned flush_wc)
852 {
853 u32 extra = 0;
854 u32 data = 0;
855 u32 last;
856
857 while (1) {
858 u32 len = ss->sge.length;
859 u32 off;
860
861 if (len > length)
862 len = length;
863 if (len > ss->sge.sge_length)
864 len = ss->sge.sge_length;
865 BUG_ON(len == 0);
866 /* If the source address is not aligned, try to align it. */
867 off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
868 if (off) {
869 u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
870 ~(sizeof(u32) - 1));
871 u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
872 u32 y;
873
874 y = sizeof(u32) - off;
875 if (len > y)
876 len = y;
877 if (len + extra >= sizeof(u32)) {
878 data |= set_upper_bits(v, extra *
879 BITS_PER_BYTE);
880 len = sizeof(u32) - extra;
881 if (len == length) {
882 last = data;
883 break;
884 }
885 __raw_writel(data, piobuf);
886 piobuf++;
887 extra = 0;
888 data = 0;
889 } else {
890 /* Clear unused upper bytes */
891 data |= clear_upper_bytes(v, len, extra);
892 if (len == length) {
893 last = data;
894 break;
895 }
896 extra += len;
897 }
898 } else if (extra) {
899 /* Source address is aligned. */
900 u32 *addr = (u32 *) ss->sge.vaddr;
901 int shift = extra * BITS_PER_BYTE;
902 int ushift = 32 - shift;
903 u32 l = len;
904
905 while (l >= sizeof(u32)) {
906 u32 v = *addr;
907
908 data |= set_upper_bits(v, shift);
909 __raw_writel(data, piobuf);
910 data = get_upper_bits(v, ushift);
911 piobuf++;
912 addr++;
913 l -= sizeof(u32);
914 }
915 /*
916 * We still have 'extra' number of bytes leftover.
917 */
918 if (l) {
919 u32 v = *addr;
920
921 if (l + extra >= sizeof(u32)) {
922 data |= set_upper_bits(v, shift);
923 len -= l + extra - sizeof(u32);
924 if (len == length) {
925 last = data;
926 break;
927 }
928 __raw_writel(data, piobuf);
929 piobuf++;
930 extra = 0;
931 data = 0;
932 } else {
933 /* Clear unused upper bytes */
934 data |= clear_upper_bytes(v, l,
935 extra);
936 if (len == length) {
937 last = data;
938 break;
939 }
940 extra += l;
941 }
942 } else if (len == length) {
943 last = data;
944 break;
945 }
946 } else if (len == length) {
947 u32 w;
948
949 /*
950 * Need to round up for the last dword in the
951 * packet.
952 */
953 w = (len + 3) >> 2;
954 __iowrite32_copy(piobuf, ss->sge.vaddr, w - 1);
955 piobuf += w - 1;
956 last = ((u32 *) ss->sge.vaddr)[w - 1];
957 break;
958 } else {
959 u32 w = len >> 2;
960
961 __iowrite32_copy(piobuf, ss->sge.vaddr, w);
962 piobuf += w;
963
964 extra = len & (sizeof(u32) - 1);
965 if (extra) {
966 u32 v = ((u32 *) ss->sge.vaddr)[w];
967
968 /* Clear unused upper bytes */
969 data = clear_upper_bytes(v, extra, 0);
970 }
971 }
972 update_sge(ss, len);
973 length -= len;
974 }
975 /* Update address before sending packet. */
976 update_sge(ss, length);
977 if (flush_wc) {
978 /* must flush early everything before trigger word */
979 ipath_flush_wc();
980 __raw_writel(last, piobuf);
981 /* be sure trigger word is written */
982 ipath_flush_wc();
983 } else
984 __raw_writel(last, piobuf);
985 }
986
987 /*
988 * Convert IB rate to delay multiplier.
989 */
990 unsigned ipath_ib_rate_to_mult(enum ib_rate rate)
991 {
992 switch (rate) {
993 case IB_RATE_2_5_GBPS: return 8;
994 case IB_RATE_5_GBPS: return 4;
995 case IB_RATE_10_GBPS: return 2;
996 case IB_RATE_20_GBPS: return 1;
997 default: return 0;
998 }
999 }
1000
1001 /*
1002 * Convert delay multiplier to IB rate
1003 */
1004 static enum ib_rate ipath_mult_to_ib_rate(unsigned mult)
1005 {
1006 switch (mult) {
1007 case 8: return IB_RATE_2_5_GBPS;
1008 case 4: return IB_RATE_5_GBPS;
1009 case 2: return IB_RATE_10_GBPS;
1010 case 1: return IB_RATE_20_GBPS;
1011 default: return IB_RATE_PORT_CURRENT;
1012 }
1013 }
1014
1015 static inline struct ipath_verbs_txreq *get_txreq(struct ipath_ibdev *dev)
1016 {
1017 struct ipath_verbs_txreq *tx = NULL;
1018 unsigned long flags;
1019
1020 spin_lock_irqsave(&dev->pending_lock, flags);
1021 if (!list_empty(&dev->txreq_free)) {
1022 struct list_head *l = dev->txreq_free.next;
1023
1024 list_del(l);
1025 tx = list_entry(l, struct ipath_verbs_txreq, txreq.list);
1026 }
1027 spin_unlock_irqrestore(&dev->pending_lock, flags);
1028 return tx;
1029 }
1030
1031 static inline void put_txreq(struct ipath_ibdev *dev,
1032 struct ipath_verbs_txreq *tx)
1033 {
1034 unsigned long flags;
1035
1036 spin_lock_irqsave(&dev->pending_lock, flags);
1037 list_add(&tx->txreq.list, &dev->txreq_free);
1038 spin_unlock_irqrestore(&dev->pending_lock, flags);
1039 }
1040
1041 static void sdma_complete(void *cookie, int status)
1042 {
1043 struct ipath_verbs_txreq *tx = cookie;
1044 struct ipath_qp *qp = tx->qp;
1045 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1046 unsigned long flags;
1047 enum ib_wc_status ibs = status == IPATH_SDMA_TXREQ_S_OK ?
1048 IB_WC_SUCCESS : IB_WC_WR_FLUSH_ERR;
1049
1050 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1051 spin_lock_irqsave(&qp->s_lock, flags);
1052 if (tx->wqe)
1053 ipath_send_complete(qp, tx->wqe, ibs);
1054 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1055 qp->s_last != qp->s_head) ||
1056 (qp->s_flags & IPATH_S_WAIT_DMA))
1057 ipath_schedule_send(qp);
1058 spin_unlock_irqrestore(&qp->s_lock, flags);
1059 wake_up(&qp->wait_dma);
1060 } else if (tx->wqe) {
1061 spin_lock_irqsave(&qp->s_lock, flags);
1062 ipath_send_complete(qp, tx->wqe, ibs);
1063 spin_unlock_irqrestore(&qp->s_lock, flags);
1064 }
1065
1066 if (tx->txreq.flags & IPATH_SDMA_TXREQ_F_FREEBUF)
1067 kfree(tx->txreq.map_addr);
1068 put_txreq(dev, tx);
1069
1070 if (atomic_dec_and_test(&qp->refcount))
1071 wake_up(&qp->wait);
1072 }
1073
1074 static void decrement_dma_busy(struct ipath_qp *qp)
1075 {
1076 unsigned long flags;
1077
1078 if (atomic_dec_and_test(&qp->s_dma_busy)) {
1079 spin_lock_irqsave(&qp->s_lock, flags);
1080 if ((ib_ipath_state_ops[qp->state] & IPATH_FLUSH_SEND &&
1081 qp->s_last != qp->s_head) ||
1082 (qp->s_flags & IPATH_S_WAIT_DMA))
1083 ipath_schedule_send(qp);
1084 spin_unlock_irqrestore(&qp->s_lock, flags);
1085 wake_up(&qp->wait_dma);
1086 }
1087 }
1088
1089 /*
1090 * Compute the number of clock cycles of delay before sending the next packet.
1091 * The multipliers reflect the number of clocks for the fastest rate so
1092 * one tick at 4xDDR is 8 ticks at 1xSDR.
1093 * If the destination port will take longer to receive a packet than
1094 * the outgoing link can send it, we need to delay sending the next packet
1095 * by the difference in time it takes the receiver to receive and the sender
1096 * to send this packet.
1097 * Note that this delay is always correct for UC and RC but not always
1098 * optimal for UD. For UD, the destination HCA can be different for each
1099 * packet, in which case, we could send packets to a different destination
1100 * while "waiting" for the delay. The overhead for doing this without
1101 * HW support is more than just paying the cost of delaying some packets
1102 * unnecessarily.
1103 */
1104 static inline unsigned ipath_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult)
1105 {
1106 return (rcv_mult > snd_mult) ?
1107 (plen * (rcv_mult - snd_mult) + 1) >> 1 : 0;
1108 }
1109
1110 static int ipath_verbs_send_dma(struct ipath_qp *qp,
1111 struct ipath_ib_header *hdr, u32 hdrwords,
1112 struct ipath_sge_state *ss, u32 len,
1113 u32 plen, u32 dwords)
1114 {
1115 struct ipath_ibdev *dev = to_idev(qp->ibqp.device);
1116 struct ipath_devdata *dd = dev->dd;
1117 struct ipath_verbs_txreq *tx;
1118 u32 *piobuf;
1119 u32 control;
1120 u32 ndesc;
1121 int ret;
1122
1123 tx = qp->s_tx;
1124 if (tx) {
1125 qp->s_tx = NULL;
1126 /* resend previously constructed packet */
1127 atomic_inc(&qp->s_dma_busy);
1128 ret = ipath_sdma_verbs_send(dd, tx->ss, tx->len, tx);
1129 if (ret) {
1130 qp->s_tx = tx;
1131 decrement_dma_busy(qp);
1132 }
1133 goto bail;
1134 }
1135
1136 tx = get_txreq(dev);
1137 if (!tx) {
1138 ret = -EBUSY;
1139 goto bail;
1140 }
1141
1142 /*
1143 * Get the saved delay count we computed for the previous packet
1144 * and save the delay count for this packet to be used next time
1145 * we get here.
1146 */
1147 control = qp->s_pkt_delay;
1148 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1149
1150 tx->qp = qp;
1151 atomic_inc(&qp->refcount);
1152 tx->wqe = qp->s_wqe;
1153 tx->txreq.callback = sdma_complete;
1154 tx->txreq.callback_cookie = tx;
1155 tx->txreq.flags = IPATH_SDMA_TXREQ_F_HEADTOHOST |
1156 IPATH_SDMA_TXREQ_F_INTREQ | IPATH_SDMA_TXREQ_F_FREEDESC;
1157 if (plen + 1 >= IPATH_SMALLBUF_DWORDS)
1158 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_USELARGEBUF;
1159
1160 /* VL15 packets bypass credit check */
1161 if ((be16_to_cpu(hdr->lrh[0]) >> 12) == 15) {
1162 control |= 1ULL << 31;
1163 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_VL15;
1164 }
1165
1166 if (len) {
1167 /*
1168 * Don't try to DMA if it takes more descriptors than
1169 * the queue holds.
1170 */
1171 ndesc = ipath_count_sge(ss, len);
1172 if (ndesc >= dd->ipath_sdma_descq_cnt)
1173 ndesc = 0;
1174 } else
1175 ndesc = 1;
1176 if (ndesc) {
1177 tx->hdr.pbc[0] = cpu_to_le32(plen);
1178 tx->hdr.pbc[1] = cpu_to_le32(control);
1179 memcpy(&tx->hdr.hdr, hdr, hdrwords << 2);
1180 tx->txreq.sg_count = ndesc;
1181 tx->map_len = (hdrwords + 2) << 2;
1182 tx->txreq.map_addr = &tx->hdr;
1183 atomic_inc(&qp->s_dma_busy);
1184 ret = ipath_sdma_verbs_send(dd, ss, dwords, tx);
1185 if (ret) {
1186 /* save ss and length in dwords */
1187 tx->ss = ss;
1188 tx->len = dwords;
1189 qp->s_tx = tx;
1190 decrement_dma_busy(qp);
1191 }
1192 goto bail;
1193 }
1194
1195 /* Allocate a buffer and copy the header and payload to it. */
1196 tx->map_len = (plen + 1) << 2;
1197 piobuf = kmalloc(tx->map_len, GFP_ATOMIC);
1198 if (unlikely(piobuf == NULL)) {
1199 ret = -EBUSY;
1200 goto err_tx;
1201 }
1202 tx->txreq.map_addr = piobuf;
1203 tx->txreq.flags |= IPATH_SDMA_TXREQ_F_FREEBUF;
1204 tx->txreq.sg_count = 1;
1205
1206 *piobuf++ = (__force u32) cpu_to_le32(plen);
1207 *piobuf++ = (__force u32) cpu_to_le32(control);
1208 memcpy(piobuf, hdr, hdrwords << 2);
1209 ipath_copy_from_sge(piobuf + hdrwords, ss, len);
1210
1211 atomic_inc(&qp->s_dma_busy);
1212 ret = ipath_sdma_verbs_send(dd, NULL, 0, tx);
1213 /*
1214 * If we couldn't queue the DMA request, save the info
1215 * and try again later rather than destroying the
1216 * buffer and undoing the side effects of the copy.
1217 */
1218 if (ret) {
1219 tx->ss = NULL;
1220 tx->len = 0;
1221 qp->s_tx = tx;
1222 decrement_dma_busy(qp);
1223 }
1224 dev->n_unaligned++;
1225 goto bail;
1226
1227 err_tx:
1228 if (atomic_dec_and_test(&qp->refcount))
1229 wake_up(&qp->wait);
1230 put_txreq(dev, tx);
1231 bail:
1232 return ret;
1233 }
1234
1235 static int ipath_verbs_send_pio(struct ipath_qp *qp,
1236 struct ipath_ib_header *ibhdr, u32 hdrwords,
1237 struct ipath_sge_state *ss, u32 len,
1238 u32 plen, u32 dwords)
1239 {
1240 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1241 u32 *hdr = (u32 *) ibhdr;
1242 u32 __iomem *piobuf;
1243 unsigned flush_wc;
1244 u32 control;
1245 int ret;
1246 unsigned long flags;
1247
1248 piobuf = ipath_getpiobuf(dd, plen, NULL);
1249 if (unlikely(piobuf == NULL)) {
1250 ret = -EBUSY;
1251 goto bail;
1252 }
1253
1254 /*
1255 * Get the saved delay count we computed for the previous packet
1256 * and save the delay count for this packet to be used next time
1257 * we get here.
1258 */
1259 control = qp->s_pkt_delay;
1260 qp->s_pkt_delay = ipath_pkt_delay(plen, dd->delay_mult, qp->s_dmult);
1261
1262 /* VL15 packets bypass credit check */
1263 if ((be16_to_cpu(ibhdr->lrh[0]) >> 12) == 15)
1264 control |= 1ULL << 31;
1265
1266 /*
1267 * Write the length to the control qword plus any needed flags.
1268 * We have to flush after the PBC for correctness on some cpus
1269 * or WC buffer can be written out of order.
1270 */
1271 writeq(((u64) control << 32) | plen, piobuf);
1272 piobuf += 2;
1273
1274 flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
1275 if (len == 0) {
1276 /*
1277 * If there is just the header portion, must flush before
1278 * writing last word of header for correctness, and after
1279 * the last header word (trigger word).
1280 */
1281 if (flush_wc) {
1282 ipath_flush_wc();
1283 __iowrite32_copy(piobuf, hdr, hdrwords - 1);
1284 ipath_flush_wc();
1285 __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
1286 ipath_flush_wc();
1287 } else
1288 __iowrite32_copy(piobuf, hdr, hdrwords);
1289 goto done;
1290 }
1291
1292 if (flush_wc)
1293 ipath_flush_wc();
1294 __iowrite32_copy(piobuf, hdr, hdrwords);
1295 piobuf += hdrwords;
1296
1297 /* The common case is aligned and contained in one segment. */
1298 if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
1299 !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
1300 u32 *addr = (u32 *) ss->sge.vaddr;
1301
1302 /* Update address before sending packet. */
1303 update_sge(ss, len);
1304 if (flush_wc) {
1305 __iowrite32_copy(piobuf, addr, dwords - 1);
1306 /* must flush early everything before trigger word */
1307 ipath_flush_wc();
1308 __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
1309 /* be sure trigger word is written */
1310 ipath_flush_wc();
1311 } else
1312 __iowrite32_copy(piobuf, addr, dwords);
1313 goto done;
1314 }
1315 copy_io(piobuf, ss, len, flush_wc);
1316 done:
1317 if (qp->s_wqe) {
1318 spin_lock_irqsave(&qp->s_lock, flags);
1319 ipath_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
1320 spin_unlock_irqrestore(&qp->s_lock, flags);
1321 }
1322 ret = 0;
1323 bail:
1324 return ret;
1325 }
1326
1327 /**
1328 * ipath_verbs_send - send a packet
1329 * @qp: the QP to send on
1330 * @hdr: the packet header
1331 * @hdrwords: the number of 32-bit words in the header
1332 * @ss: the SGE to send
1333 * @len: the length of the packet in bytes
1334 */
1335 int ipath_verbs_send(struct ipath_qp *qp, struct ipath_ib_header *hdr,
1336 u32 hdrwords, struct ipath_sge_state *ss, u32 len)
1337 {
1338 struct ipath_devdata *dd = to_idev(qp->ibqp.device)->dd;
1339 u32 plen;
1340 int ret;
1341 u32 dwords = (len + 3) >> 2;
1342
1343 /*
1344 * Calculate the send buffer trigger address.
1345 * The +1 counts for the pbc control dword following the pbc length.
1346 */
1347 plen = hdrwords + dwords + 1;
1348
1349 /*
1350 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1351 * can defer SDMA restart until link goes ACTIVE without
1352 * worrying about just how we got there.
1353 */
1354 if (qp->ibqp.qp_type == IB_QPT_SMI ||
1355 !(dd->ipath_flags & IPATH_HAS_SEND_DMA))
1356 ret = ipath_verbs_send_pio(qp, hdr, hdrwords, ss, len,
1357 plen, dwords);
1358 else
1359 ret = ipath_verbs_send_dma(qp, hdr, hdrwords, ss, len,
1360 plen, dwords);
1361
1362 return ret;
1363 }
1364
1365 int ipath_snapshot_counters(struct ipath_devdata *dd, u64 *swords,
1366 u64 *rwords, u64 *spkts, u64 *rpkts,
1367 u64 *xmit_wait)
1368 {
1369 int ret;
1370
1371 if (!(dd->ipath_flags & IPATH_INITTED)) {
1372 /* no hardware, freeze, etc. */
1373 ret = -EINVAL;
1374 goto bail;
1375 }
1376 *swords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordsendcnt);
1377 *rwords = ipath_snap_cntr(dd, dd->ipath_cregs->cr_wordrcvcnt);
1378 *spkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktsendcnt);
1379 *rpkts = ipath_snap_cntr(dd, dd->ipath_cregs->cr_pktrcvcnt);
1380 *xmit_wait = ipath_snap_cntr(dd, dd->ipath_cregs->cr_sendstallcnt);
1381
1382 ret = 0;
1383
1384 bail:
1385 return ret;
1386 }
1387
1388 /**
1389 * ipath_get_counters - get various chip counters
1390 * @dd: the infinipath device
1391 * @cntrs: counters are placed here
1392 *
1393 * Return the counters needed by recv_pma_get_portcounters().
1394 */
1395 int ipath_get_counters(struct ipath_devdata *dd,
1396 struct ipath_verbs_counters *cntrs)
1397 {
1398 struct ipath_cregs const *crp = dd->ipath_cregs;
1399 int ret;
1400
1401 if (!(dd->ipath_flags & IPATH_INITTED)) {
1402 /* no hardware, freeze, etc. */
1403 ret = -EINVAL;
1404 goto bail;
1405 }
1406 cntrs->symbol_error_counter =
1407 ipath_snap_cntr(dd, crp->cr_ibsymbolerrcnt);
1408 cntrs->link_error_recovery_counter =
1409 ipath_snap_cntr(dd, crp->cr_iblinkerrrecovcnt);
1410 /*
1411 * The link downed counter counts when the other side downs the
1412 * connection. We add in the number of times we downed the link
1413 * due to local link integrity errors to compensate.
1414 */
1415 cntrs->link_downed_counter =
1416 ipath_snap_cntr(dd, crp->cr_iblinkdowncnt);
1417 cntrs->port_rcv_errors =
1418 ipath_snap_cntr(dd, crp->cr_rxdroppktcnt) +
1419 ipath_snap_cntr(dd, crp->cr_rcvovflcnt) +
1420 ipath_snap_cntr(dd, crp->cr_portovflcnt) +
1421 ipath_snap_cntr(dd, crp->cr_err_rlencnt) +
1422 ipath_snap_cntr(dd, crp->cr_invalidrlencnt) +
1423 ipath_snap_cntr(dd, crp->cr_errlinkcnt) +
1424 ipath_snap_cntr(dd, crp->cr_erricrccnt) +
1425 ipath_snap_cntr(dd, crp->cr_errvcrccnt) +
1426 ipath_snap_cntr(dd, crp->cr_errlpcrccnt) +
1427 ipath_snap_cntr(dd, crp->cr_badformatcnt) +
1428 dd->ipath_rxfc_unsupvl_errs;
1429 if (crp->cr_rxotherlocalphyerrcnt)
1430 cntrs->port_rcv_errors +=
1431 ipath_snap_cntr(dd, crp->cr_rxotherlocalphyerrcnt);
1432 if (crp->cr_rxvlerrcnt)
1433 cntrs->port_rcv_errors +=
1434 ipath_snap_cntr(dd, crp->cr_rxvlerrcnt);
1435 cntrs->port_rcv_remphys_errors =
1436 ipath_snap_cntr(dd, crp->cr_rcvebpcnt);
1437 cntrs->port_xmit_discards = ipath_snap_cntr(dd, crp->cr_unsupvlcnt);
1438 cntrs->port_xmit_data = ipath_snap_cntr(dd, crp->cr_wordsendcnt);
1439 cntrs->port_rcv_data = ipath_snap_cntr(dd, crp->cr_wordrcvcnt);
1440 cntrs->port_xmit_packets = ipath_snap_cntr(dd, crp->cr_pktsendcnt);
1441 cntrs->port_rcv_packets = ipath_snap_cntr(dd, crp->cr_pktrcvcnt);
1442 cntrs->local_link_integrity_errors =
1443 crp->cr_locallinkintegrityerrcnt ?
1444 ipath_snap_cntr(dd, crp->cr_locallinkintegrityerrcnt) :
1445 ((dd->ipath_flags & IPATH_GPIO_ERRINTRS) ?
1446 dd->ipath_lli_errs : dd->ipath_lli_errors);
1447 cntrs->excessive_buffer_overrun_errors =
1448 crp->cr_excessbufferovflcnt ?
1449 ipath_snap_cntr(dd, crp->cr_excessbufferovflcnt) :
1450 dd->ipath_overrun_thresh_errs;
1451 cntrs->vl15_dropped = crp->cr_vl15droppedpktcnt ?
1452 ipath_snap_cntr(dd, crp->cr_vl15droppedpktcnt) : 0;
1453
1454 ret = 0;
1455
1456 bail:
1457 return ret;
1458 }
1459
1460 /**
1461 * ipath_ib_piobufavail - callback when a PIO buffer is available
1462 * @arg: the device pointer
1463 *
1464 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1465 * available after ipath_verbs_send() returned an error that no buffers were
1466 * available. Return 1 if we consumed all the PIO buffers and we still have
1467 * QPs waiting for buffers (for now, just restart the send tasklet and
1468 * return zero).
1469 */
1470 int ipath_ib_piobufavail(struct ipath_ibdev *dev)
1471 {
1472 struct list_head *list;
1473 struct ipath_qp *qplist;
1474 struct ipath_qp *qp;
1475 unsigned long flags;
1476
1477 if (dev == NULL)
1478 goto bail;
1479
1480 list = &dev->piowait;
1481 qplist = NULL;
1482
1483 spin_lock_irqsave(&dev->pending_lock, flags);
1484 while (!list_empty(list)) {
1485 qp = list_entry(list->next, struct ipath_qp, piowait);
1486 list_del_init(&qp->piowait);
1487 qp->pio_next = qplist;
1488 qplist = qp;
1489 atomic_inc(&qp->refcount);
1490 }
1491 spin_unlock_irqrestore(&dev->pending_lock, flags);
1492
1493 while (qplist != NULL) {
1494 qp = qplist;
1495 qplist = qp->pio_next;
1496
1497 spin_lock_irqsave(&qp->s_lock, flags);
1498 if (ib_ipath_state_ops[qp->state] & IPATH_PROCESS_SEND_OK)
1499 ipath_schedule_send(qp);
1500 spin_unlock_irqrestore(&qp->s_lock, flags);
1501
1502 /* Notify ipath_destroy_qp() if it is waiting. */
1503 if (atomic_dec_and_test(&qp->refcount))
1504 wake_up(&qp->wait);
1505 }
1506
1507 bail:
1508 return 0;
1509 }
1510
1511 static int ipath_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
1512 struct ib_udata *uhw)
1513 {
1514 struct ipath_ibdev *dev = to_idev(ibdev);
1515
1516 if (uhw->inlen || uhw->outlen)
1517 return -EINVAL;
1518
1519 memset(props, 0, sizeof(*props));
1520
1521 props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
1522 IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
1523 IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
1524 IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
1525 props->page_size_cap = PAGE_SIZE;
1526 props->vendor_id =
1527 IPATH_SRC_OUI_1 << 16 | IPATH_SRC_OUI_2 << 8 | IPATH_SRC_OUI_3;
1528 props->vendor_part_id = dev->dd->ipath_deviceid;
1529 props->hw_ver = dev->dd->ipath_pcirev;
1530
1531 props->sys_image_guid = dev->sys_image_guid;
1532
1533 props->max_mr_size = ~0ull;
1534 props->max_qp = ib_ipath_max_qps;
1535 props->max_qp_wr = ib_ipath_max_qp_wrs;
1536 props->max_sge = ib_ipath_max_sges;
1537 props->max_sge_rd = ib_ipath_max_sges;
1538 props->max_cq = ib_ipath_max_cqs;
1539 props->max_ah = ib_ipath_max_ahs;
1540 props->max_cqe = ib_ipath_max_cqes;
1541 props->max_mr = dev->lk_table.max;
1542 props->max_fmr = dev->lk_table.max;
1543 props->max_map_per_fmr = 32767;
1544 props->max_pd = ib_ipath_max_pds;
1545 props->max_qp_rd_atom = IPATH_MAX_RDMA_ATOMIC;
1546 props->max_qp_init_rd_atom = 255;
1547 /* props->max_res_rd_atom */
1548 props->max_srq = ib_ipath_max_srqs;
1549 props->max_srq_wr = ib_ipath_max_srq_wrs;
1550 props->max_srq_sge = ib_ipath_max_srq_sges;
1551 /* props->local_ca_ack_delay */
1552 props->atomic_cap = IB_ATOMIC_GLOB;
1553 props->max_pkeys = ipath_get_npkeys(dev->dd);
1554 props->max_mcast_grp = ib_ipath_max_mcast_grps;
1555 props->max_mcast_qp_attach = ib_ipath_max_mcast_qp_attached;
1556 props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1557 props->max_mcast_grp;
1558
1559 return 0;
1560 }
1561
1562 const u8 ipath_cvt_physportstate[32] = {
1563 [INFINIPATH_IBCS_LT_STATE_DISABLED] = IB_PHYSPORTSTATE_DISABLED,
1564 [INFINIPATH_IBCS_LT_STATE_LINKUP] = IB_PHYSPORTSTATE_LINKUP,
1565 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE] = IB_PHYSPORTSTATE_POLL,
1566 [INFINIPATH_IBCS_LT_STATE_POLLQUIET] = IB_PHYSPORTSTATE_POLL,
1567 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY] = IB_PHYSPORTSTATE_SLEEP,
1568 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET] = IB_PHYSPORTSTATE_SLEEP,
1569 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE] =
1570 IB_PHYSPORTSTATE_CFG_TRAIN,
1571 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG] =
1572 IB_PHYSPORTSTATE_CFG_TRAIN,
1573 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT] =
1574 IB_PHYSPORTSTATE_CFG_TRAIN,
1575 [INFINIPATH_IBCS_LT_STATE_CFGIDLE] = IB_PHYSPORTSTATE_CFG_TRAIN,
1576 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN] =
1577 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1578 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT] =
1579 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1580 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE] =
1581 IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
1582 [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN,
1583 [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN,
1584 [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
1585 [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN,
1586 [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
1587 [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
1588 [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
1589 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1590 };
1591
1592 u32 ipath_get_cr_errpkey(struct ipath_devdata *dd)
1593 {
1594 return ipath_read_creg32(dd, dd->ipath_cregs->cr_errpkey);
1595 }
1596
1597 static int ipath_query_port(struct ib_device *ibdev,
1598 u8 port, struct ib_port_attr *props)
1599 {
1600 struct ipath_ibdev *dev = to_idev(ibdev);
1601 struct ipath_devdata *dd = dev->dd;
1602 enum ib_mtu mtu;
1603 u16 lid = dd->ipath_lid;
1604 u64 ibcstat;
1605
1606 memset(props, 0, sizeof(*props));
1607 props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
1608 props->lmc = dd->ipath_lmc;
1609 props->sm_lid = dev->sm_lid;
1610 props->sm_sl = dev->sm_sl;
1611 ibcstat = dd->ipath_lastibcstat;
1612 /* map LinkState to IB portinfo values. */
1613 props->state = ipath_ib_linkstate(dd, ibcstat) + 1;
1614
1615 /* See phys_state_show() */
1616 props->phys_state = /* MEA: assumes shift == 0 */
1617 ipath_cvt_physportstate[dd->ipath_lastibcstat &
1618 dd->ibcs_lts_mask];
1619 props->port_cap_flags = dev->port_cap_flags;
1620 props->gid_tbl_len = 1;
1621 props->max_msg_sz = 0x80000000;
1622 props->pkey_tbl_len = ipath_get_npkeys(dd);
1623 props->bad_pkey_cntr = ipath_get_cr_errpkey(dd) -
1624 dev->z_pkey_violations;
1625 props->qkey_viol_cntr = dev->qkey_violations;
1626 props->active_width = dd->ipath_link_width_active;
1627 /* See rate_show() */
1628 props->active_speed = dd->ipath_link_speed_active;
1629 props->max_vl_num = 1; /* VLCap = VL0 */
1630 props->init_type_reply = 0;
1631
1632 props->max_mtu = ipath_mtu4096 ? IB_MTU_4096 : IB_MTU_2048;
1633 switch (dd->ipath_ibmtu) {
1634 case 4096:
1635 mtu = IB_MTU_4096;
1636 break;
1637 case 2048:
1638 mtu = IB_MTU_2048;
1639 break;
1640 case 1024:
1641 mtu = IB_MTU_1024;
1642 break;
1643 case 512:
1644 mtu = IB_MTU_512;
1645 break;
1646 case 256:
1647 mtu = IB_MTU_256;
1648 break;
1649 default:
1650 mtu = IB_MTU_2048;
1651 }
1652 props->active_mtu = mtu;
1653 props->subnet_timeout = dev->subnet_timeout;
1654
1655 return 0;
1656 }
1657
1658 static int ipath_modify_device(struct ib_device *device,
1659 int device_modify_mask,
1660 struct ib_device_modify *device_modify)
1661 {
1662 int ret;
1663
1664 if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
1665 IB_DEVICE_MODIFY_NODE_DESC)) {
1666 ret = -EOPNOTSUPP;
1667 goto bail;
1668 }
1669
1670 if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC)
1671 memcpy(device->node_desc, device_modify->node_desc, 64);
1672
1673 if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID)
1674 to_idev(device)->sys_image_guid =
1675 cpu_to_be64(device_modify->sys_image_guid);
1676
1677 ret = 0;
1678
1679 bail:
1680 return ret;
1681 }
1682
1683 static int ipath_modify_port(struct ib_device *ibdev,
1684 u8 port, int port_modify_mask,
1685 struct ib_port_modify *props)
1686 {
1687 struct ipath_ibdev *dev = to_idev(ibdev);
1688
1689 dev->port_cap_flags |= props->set_port_cap_mask;
1690 dev->port_cap_flags &= ~props->clr_port_cap_mask;
1691 if (port_modify_mask & IB_PORT_SHUTDOWN)
1692 ipath_set_linkstate(dev->dd, IPATH_IB_LINKDOWN);
1693 if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
1694 dev->qkey_violations = 0;
1695 return 0;
1696 }
1697
1698 static int ipath_query_gid(struct ib_device *ibdev, u8 port,
1699 int index, union ib_gid *gid)
1700 {
1701 struct ipath_ibdev *dev = to_idev(ibdev);
1702 int ret;
1703
1704 if (index >= 1) {
1705 ret = -EINVAL;
1706 goto bail;
1707 }
1708 gid->global.subnet_prefix = dev->gid_prefix;
1709 gid->global.interface_id = dev->dd->ipath_guid;
1710
1711 ret = 0;
1712
1713 bail:
1714 return ret;
1715 }
1716
1717 static struct ib_pd *ipath_alloc_pd(struct ib_device *ibdev,
1718 struct ib_ucontext *context,
1719 struct ib_udata *udata)
1720 {
1721 struct ipath_ibdev *dev = to_idev(ibdev);
1722 struct ipath_pd *pd;
1723 struct ib_pd *ret;
1724
1725 /*
1726 * This is actually totally arbitrary. Some correctness tests
1727 * assume there's a maximum number of PDs that can be allocated.
1728 * We don't actually have this limit, but we fail the test if
1729 * we allow allocations of more than we report for this value.
1730 */
1731
1732 pd = kmalloc(sizeof *pd, GFP_KERNEL);
1733 if (!pd) {
1734 ret = ERR_PTR(-ENOMEM);
1735 goto bail;
1736 }
1737
1738 spin_lock(&dev->n_pds_lock);
1739 if (dev->n_pds_allocated == ib_ipath_max_pds) {
1740 spin_unlock(&dev->n_pds_lock);
1741 kfree(pd);
1742 ret = ERR_PTR(-ENOMEM);
1743 goto bail;
1744 }
1745
1746 dev->n_pds_allocated++;
1747 spin_unlock(&dev->n_pds_lock);
1748
1749 /* ib_alloc_pd() will initialize pd->ibpd. */
1750 pd->user = udata != NULL;
1751
1752 ret = &pd->ibpd;
1753
1754 bail:
1755 return ret;
1756 }
1757
1758 static int ipath_dealloc_pd(struct ib_pd *ibpd)
1759 {
1760 struct ipath_pd *pd = to_ipd(ibpd);
1761 struct ipath_ibdev *dev = to_idev(ibpd->device);
1762
1763 spin_lock(&dev->n_pds_lock);
1764 dev->n_pds_allocated--;
1765 spin_unlock(&dev->n_pds_lock);
1766
1767 kfree(pd);
1768
1769 return 0;
1770 }
1771
1772 /**
1773 * ipath_create_ah - create an address handle
1774 * @pd: the protection domain
1775 * @ah_attr: the attributes of the AH
1776 *
1777 * This may be called from interrupt context.
1778 */
1779 static struct ib_ah *ipath_create_ah(struct ib_pd *pd,
1780 struct ib_ah_attr *ah_attr)
1781 {
1782 struct ipath_ah *ah;
1783 struct ib_ah *ret;
1784 struct ipath_ibdev *dev = to_idev(pd->device);
1785 unsigned long flags;
1786
1787 /* A multicast address requires a GRH (see ch. 8.4.1). */
1788 if (ah_attr->dlid >= IPATH_MULTICAST_LID_BASE &&
1789 ah_attr->dlid != IPATH_PERMISSIVE_LID &&
1790 !(ah_attr->ah_flags & IB_AH_GRH)) {
1791 ret = ERR_PTR(-EINVAL);
1792 goto bail;
1793 }
1794
1795 if (ah_attr->dlid == 0) {
1796 ret = ERR_PTR(-EINVAL);
1797 goto bail;
1798 }
1799
1800 if (ah_attr->port_num < 1 ||
1801 ah_attr->port_num > pd->device->phys_port_cnt) {
1802 ret = ERR_PTR(-EINVAL);
1803 goto bail;
1804 }
1805
1806 ah = kmalloc(sizeof *ah, GFP_ATOMIC);
1807 if (!ah) {
1808 ret = ERR_PTR(-ENOMEM);
1809 goto bail;
1810 }
1811
1812 spin_lock_irqsave(&dev->n_ahs_lock, flags);
1813 if (dev->n_ahs_allocated == ib_ipath_max_ahs) {
1814 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1815 kfree(ah);
1816 ret = ERR_PTR(-ENOMEM);
1817 goto bail;
1818 }
1819
1820 dev->n_ahs_allocated++;
1821 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1822
1823 /* ib_create_ah() will initialize ah->ibah. */
1824 ah->attr = *ah_attr;
1825 ah->attr.static_rate = ipath_ib_rate_to_mult(ah_attr->static_rate);
1826
1827 ret = &ah->ibah;
1828
1829 bail:
1830 return ret;
1831 }
1832
1833 /**
1834 * ipath_destroy_ah - destroy an address handle
1835 * @ibah: the AH to destroy
1836 *
1837 * This may be called from interrupt context.
1838 */
1839 static int ipath_destroy_ah(struct ib_ah *ibah)
1840 {
1841 struct ipath_ibdev *dev = to_idev(ibah->device);
1842 struct ipath_ah *ah = to_iah(ibah);
1843 unsigned long flags;
1844
1845 spin_lock_irqsave(&dev->n_ahs_lock, flags);
1846 dev->n_ahs_allocated--;
1847 spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
1848
1849 kfree(ah);
1850
1851 return 0;
1852 }
1853
1854 static int ipath_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
1855 {
1856 struct ipath_ah *ah = to_iah(ibah);
1857
1858 *ah_attr = ah->attr;
1859 ah_attr->static_rate = ipath_mult_to_ib_rate(ah->attr.static_rate);
1860
1861 return 0;
1862 }
1863
1864 /**
1865 * ipath_get_npkeys - return the size of the PKEY table for port 0
1866 * @dd: the infinipath device
1867 */
1868 unsigned ipath_get_npkeys(struct ipath_devdata *dd)
1869 {
1870 return ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys);
1871 }
1872
1873 /**
1874 * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1875 * @dd: the infinipath device
1876 * @index: the PKEY index
1877 */
1878 unsigned ipath_get_pkey(struct ipath_devdata *dd, unsigned index)
1879 {
1880 unsigned ret;
1881
1882 /* always a kernel port, no locking needed */
1883 if (index >= ARRAY_SIZE(dd->ipath_pd[0]->port_pkeys))
1884 ret = 0;
1885 else
1886 ret = dd->ipath_pd[0]->port_pkeys[index];
1887
1888 return ret;
1889 }
1890
1891 static int ipath_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1892 u16 *pkey)
1893 {
1894 struct ipath_ibdev *dev = to_idev(ibdev);
1895 int ret;
1896
1897 if (index >= ipath_get_npkeys(dev->dd)) {
1898 ret = -EINVAL;
1899 goto bail;
1900 }
1901
1902 *pkey = ipath_get_pkey(dev->dd, index);
1903 ret = 0;
1904
1905 bail:
1906 return ret;
1907 }
1908
1909 /**
1910 * ipath_alloc_ucontext - allocate a ucontest
1911 * @ibdev: the infiniband device
1912 * @udata: not used by the InfiniPath driver
1913 */
1914
1915 static struct ib_ucontext *ipath_alloc_ucontext(struct ib_device *ibdev,
1916 struct ib_udata *udata)
1917 {
1918 struct ipath_ucontext *context;
1919 struct ib_ucontext *ret;
1920
1921 context = kmalloc(sizeof *context, GFP_KERNEL);
1922 if (!context) {
1923 ret = ERR_PTR(-ENOMEM);
1924 goto bail;
1925 }
1926
1927 ret = &context->ibucontext;
1928
1929 bail:
1930 return ret;
1931 }
1932
1933 static int ipath_dealloc_ucontext(struct ib_ucontext *context)
1934 {
1935 kfree(to_iucontext(context));
1936 return 0;
1937 }
1938
1939 static int ipath_verbs_register_sysfs(struct ib_device *dev);
1940
1941 static void __verbs_timer(unsigned long arg)
1942 {
1943 struct ipath_devdata *dd = (struct ipath_devdata *) arg;
1944
1945 /* Handle verbs layer timeouts. */
1946 ipath_ib_timer(dd->verbs_dev);
1947
1948 mod_timer(&dd->verbs_timer, jiffies + 1);
1949 }
1950
1951 static int enable_timer(struct ipath_devdata *dd)
1952 {
1953 /*
1954 * Early chips had a design flaw where the chip and kernel idea
1955 * of the tail register don't always agree, and therefore we won't
1956 * get an interrupt on the next packet received.
1957 * If the board supports per packet receive interrupts, use it.
1958 * Otherwise, the timer function periodically checks for packets
1959 * to cover this case.
1960 * Either way, the timer is needed for verbs layer related
1961 * processing.
1962 */
1963 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1964 ipath_write_kreg(dd, dd->ipath_kregs->kr_debugportselect,
1965 0x2074076542310ULL);
1966 /* Enable GPIO bit 2 interrupt */
1967 dd->ipath_gpio_mask |= (u64) (1 << IPATH_GPIO_PORT0_BIT);
1968 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1969 dd->ipath_gpio_mask);
1970 }
1971
1972 setup_timer(&dd->verbs_timer, __verbs_timer, (unsigned long)dd);
1973
1974 dd->verbs_timer.expires = jiffies + 1;
1975 add_timer(&dd->verbs_timer);
1976
1977 return 0;
1978 }
1979
1980 static int disable_timer(struct ipath_devdata *dd)
1981 {
1982 /* Disable GPIO bit 2 interrupt */
1983 if (dd->ipath_flags & IPATH_GPIO_INTR) {
1984 /* Disable GPIO bit 2 interrupt */
1985 dd->ipath_gpio_mask &= ~((u64) (1 << IPATH_GPIO_PORT0_BIT));
1986 ipath_write_kreg(dd, dd->ipath_kregs->kr_gpio_mask,
1987 dd->ipath_gpio_mask);
1988 /*
1989 * We might want to undo changes to debugportselect,
1990 * but how?
1991 */
1992 }
1993
1994 del_timer_sync(&dd->verbs_timer);
1995
1996 return 0;
1997 }
1998
1999 static int ipath_port_immutable(struct ib_device *ibdev, u8 port_num,
2000 struct ib_port_immutable *immutable)
2001 {
2002 struct ib_port_attr attr;
2003 int err;
2004
2005 err = ipath_query_port(ibdev, port_num, &attr);
2006 if (err)
2007 return err;
2008
2009 immutable->pkey_tbl_len = attr.pkey_tbl_len;
2010 immutable->gid_tbl_len = attr.gid_tbl_len;
2011 immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
2012 immutable->max_mad_size = IB_MGMT_MAD_SIZE;
2013
2014 return 0;
2015 }
2016
2017 /**
2018 * ipath_register_ib_device - register our device with the infiniband core
2019 * @dd: the device data structure
2020 * Return the allocated ipath_ibdev pointer or NULL on error.
2021 */
2022 int ipath_register_ib_device(struct ipath_devdata *dd)
2023 {
2024 struct ipath_verbs_counters cntrs;
2025 struct ipath_ibdev *idev;
2026 struct ib_device *dev;
2027 struct ipath_verbs_txreq *tx;
2028 unsigned i;
2029 int ret;
2030
2031 idev = (struct ipath_ibdev *)ib_alloc_device(sizeof *idev);
2032 if (idev == NULL) {
2033 ret = -ENOMEM;
2034 goto bail;
2035 }
2036
2037 dev = &idev->ibdev;
2038
2039 if (dd->ipath_sdma_descq_cnt) {
2040 tx = kmalloc_array(dd->ipath_sdma_descq_cnt, sizeof *tx,
2041 GFP_KERNEL);
2042 if (tx == NULL) {
2043 ret = -ENOMEM;
2044 goto err_tx;
2045 }
2046 } else
2047 tx = NULL;
2048 idev->txreq_bufs = tx;
2049
2050 /* Only need to initialize non-zero fields. */
2051 spin_lock_init(&idev->n_pds_lock);
2052 spin_lock_init(&idev->n_ahs_lock);
2053 spin_lock_init(&idev->n_cqs_lock);
2054 spin_lock_init(&idev->n_qps_lock);
2055 spin_lock_init(&idev->n_srqs_lock);
2056 spin_lock_init(&idev->n_mcast_grps_lock);
2057
2058 spin_lock_init(&idev->qp_table.lock);
2059 spin_lock_init(&idev->lk_table.lock);
2060 idev->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
2061 /* Set the prefix to the default value (see ch. 4.1.1) */
2062 idev->gid_prefix = cpu_to_be64(0xfe80000000000000ULL);
2063
2064 ret = ipath_init_qp_table(idev, ib_ipath_qp_table_size);
2065 if (ret)
2066 goto err_qp;
2067
2068 /*
2069 * The top ib_ipath_lkey_table_size bits are used to index the
2070 * table. The lower 8 bits can be owned by the user (copied from
2071 * the LKEY). The remaining bits act as a generation number or tag.
2072 */
2073 idev->lk_table.max = 1 << ib_ipath_lkey_table_size;
2074 idev->lk_table.table = kcalloc(idev->lk_table.max,
2075 sizeof(*idev->lk_table.table),
2076 GFP_KERNEL);
2077 if (idev->lk_table.table == NULL) {
2078 ret = -ENOMEM;
2079 goto err_lk;
2080 }
2081 INIT_LIST_HEAD(&idev->pending_mmaps);
2082 spin_lock_init(&idev->pending_lock);
2083 idev->mmap_offset = PAGE_SIZE;
2084 spin_lock_init(&idev->mmap_offset_lock);
2085 INIT_LIST_HEAD(&idev->pending[0]);
2086 INIT_LIST_HEAD(&idev->pending[1]);
2087 INIT_LIST_HEAD(&idev->pending[2]);
2088 INIT_LIST_HEAD(&idev->piowait);
2089 INIT_LIST_HEAD(&idev->rnrwait);
2090 INIT_LIST_HEAD(&idev->txreq_free);
2091 idev->pending_index = 0;
2092 idev->port_cap_flags =
2093 IB_PORT_SYS_IMAGE_GUID_SUP | IB_PORT_CLIENT_REG_SUP;
2094 if (dd->ipath_flags & IPATH_HAS_LINK_LATENCY)
2095 idev->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
2096 idev->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
2097 idev->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
2098 idev->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
2099 idev->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
2100 idev->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
2101
2102 /* Snapshot current HW counters to "clear" them. */
2103 ipath_get_counters(dd, &cntrs);
2104 idev->z_symbol_error_counter = cntrs.symbol_error_counter;
2105 idev->z_link_error_recovery_counter =
2106 cntrs.link_error_recovery_counter;
2107 idev->z_link_downed_counter = cntrs.link_downed_counter;
2108 idev->z_port_rcv_errors = cntrs.port_rcv_errors;
2109 idev->z_port_rcv_remphys_errors =
2110 cntrs.port_rcv_remphys_errors;
2111 idev->z_port_xmit_discards = cntrs.port_xmit_discards;
2112 idev->z_port_xmit_data = cntrs.port_xmit_data;
2113 idev->z_port_rcv_data = cntrs.port_rcv_data;
2114 idev->z_port_xmit_packets = cntrs.port_xmit_packets;
2115 idev->z_port_rcv_packets = cntrs.port_rcv_packets;
2116 idev->z_local_link_integrity_errors =
2117 cntrs.local_link_integrity_errors;
2118 idev->z_excessive_buffer_overrun_errors =
2119 cntrs.excessive_buffer_overrun_errors;
2120 idev->z_vl15_dropped = cntrs.vl15_dropped;
2121
2122 for (i = 0; i < dd->ipath_sdma_descq_cnt; i++, tx++)
2123 list_add(&tx->txreq.list, &idev->txreq_free);
2124
2125 /*
2126 * The system image GUID is supposed to be the same for all
2127 * IB HCAs in a single system but since there can be other
2128 * device types in the system, we can't be sure this is unique.
2129 */
2130 if (!sys_image_guid)
2131 sys_image_guid = dd->ipath_guid;
2132 idev->sys_image_guid = sys_image_guid;
2133 idev->ib_unit = dd->ipath_unit;
2134 idev->dd = dd;
2135
2136 strlcpy(dev->name, "ipath%d", IB_DEVICE_NAME_MAX);
2137 dev->owner = THIS_MODULE;
2138 dev->node_guid = dd->ipath_guid;
2139 dev->uverbs_abi_ver = IPATH_UVERBS_ABI_VERSION;
2140 dev->uverbs_cmd_mask =
2141 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2142 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2143 (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2144 (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2145 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2146 (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
2147 (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
2148 (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
2149 (1ull << IB_USER_VERBS_CMD_REG_MR) |
2150 (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2151 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2152 (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2153 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
2154 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2155 (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
2156 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
2157 (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2158 (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2159 (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2160 (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2161 (1ull << IB_USER_VERBS_CMD_POST_SEND) |
2162 (1ull << IB_USER_VERBS_CMD_POST_RECV) |
2163 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
2164 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
2165 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
2166 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
2167 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
2168 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
2169 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
2170 dev->node_type = RDMA_NODE_IB_CA;
2171 dev->phys_port_cnt = 1;
2172 dev->num_comp_vectors = 1;
2173 dev->dma_device = &dd->pcidev->dev;
2174 dev->query_device = ipath_query_device;
2175 dev->modify_device = ipath_modify_device;
2176 dev->query_port = ipath_query_port;
2177 dev->modify_port = ipath_modify_port;
2178 dev->query_pkey = ipath_query_pkey;
2179 dev->query_gid = ipath_query_gid;
2180 dev->alloc_ucontext = ipath_alloc_ucontext;
2181 dev->dealloc_ucontext = ipath_dealloc_ucontext;
2182 dev->alloc_pd = ipath_alloc_pd;
2183 dev->dealloc_pd = ipath_dealloc_pd;
2184 dev->create_ah = ipath_create_ah;
2185 dev->destroy_ah = ipath_destroy_ah;
2186 dev->query_ah = ipath_query_ah;
2187 dev->create_srq = ipath_create_srq;
2188 dev->modify_srq = ipath_modify_srq;
2189 dev->query_srq = ipath_query_srq;
2190 dev->destroy_srq = ipath_destroy_srq;
2191 dev->create_qp = ipath_create_qp;
2192 dev->modify_qp = ipath_modify_qp;
2193 dev->query_qp = ipath_query_qp;
2194 dev->destroy_qp = ipath_destroy_qp;
2195 dev->post_send = ipath_post_send;
2196 dev->post_recv = ipath_post_receive;
2197 dev->post_srq_recv = ipath_post_srq_receive;
2198 dev->create_cq = ipath_create_cq;
2199 dev->destroy_cq = ipath_destroy_cq;
2200 dev->resize_cq = ipath_resize_cq;
2201 dev->poll_cq = ipath_poll_cq;
2202 dev->req_notify_cq = ipath_req_notify_cq;
2203 dev->get_dma_mr = ipath_get_dma_mr;
2204 dev->reg_user_mr = ipath_reg_user_mr;
2205 dev->dereg_mr = ipath_dereg_mr;
2206 dev->alloc_fmr = ipath_alloc_fmr;
2207 dev->map_phys_fmr = ipath_map_phys_fmr;
2208 dev->unmap_fmr = ipath_unmap_fmr;
2209 dev->dealloc_fmr = ipath_dealloc_fmr;
2210 dev->attach_mcast = ipath_multicast_attach;
2211 dev->detach_mcast = ipath_multicast_detach;
2212 dev->process_mad = ipath_process_mad;
2213 dev->mmap = ipath_mmap;
2214 dev->dma_ops = &ipath_dma_mapping_ops;
2215 dev->get_port_immutable = ipath_port_immutable;
2216
2217 snprintf(dev->node_desc, sizeof(dev->node_desc),
2218 IPATH_IDSTR " %s", init_utsname()->nodename);
2219
2220 ret = ib_register_device(dev, NULL);
2221 if (ret)
2222 goto err_reg;
2223
2224 ret = ipath_verbs_register_sysfs(dev);
2225 if (ret)
2226 goto err_class;
2227
2228 enable_timer(dd);
2229
2230 goto bail;
2231
2232 err_class:
2233 ib_unregister_device(dev);
2234 err_reg:
2235 kfree(idev->lk_table.table);
2236 err_lk:
2237 kfree(idev->qp_table.table);
2238 err_qp:
2239 kfree(idev->txreq_bufs);
2240 err_tx:
2241 ib_dealloc_device(dev);
2242 ipath_dev_err(dd, "cannot register verbs: %d!\n", -ret);
2243 idev = NULL;
2244
2245 bail:
2246 dd->verbs_dev = idev;
2247 return ret;
2248 }
2249
2250 void ipath_unregister_ib_device(struct ipath_ibdev *dev)
2251 {
2252 struct ib_device *ibdev = &dev->ibdev;
2253 u32 qps_inuse;
2254
2255 ib_unregister_device(ibdev);
2256
2257 disable_timer(dev->dd);
2258
2259 if (!list_empty(&dev->pending[0]) ||
2260 !list_empty(&dev->pending[1]) ||
2261 !list_empty(&dev->pending[2]))
2262 ipath_dev_err(dev->dd, "pending list not empty!\n");
2263 if (!list_empty(&dev->piowait))
2264 ipath_dev_err(dev->dd, "piowait list not empty!\n");
2265 if (!list_empty(&dev->rnrwait))
2266 ipath_dev_err(dev->dd, "rnrwait list not empty!\n");
2267 if (!ipath_mcast_tree_empty())
2268 ipath_dev_err(dev->dd, "multicast table memory leak!\n");
2269 /*
2270 * Note that ipath_unregister_ib_device() can be called before all
2271 * the QPs are destroyed!
2272 */
2273 qps_inuse = ipath_free_all_qps(&dev->qp_table);
2274 if (qps_inuse)
2275 ipath_dev_err(dev->dd, "QP memory leak! %u still in use\n",
2276 qps_inuse);
2277 kfree(dev->qp_table.table);
2278 kfree(dev->lk_table.table);
2279 kfree(dev->txreq_bufs);
2280 ib_dealloc_device(ibdev);
2281 }
2282
2283 static ssize_t show_rev(struct device *device, struct device_attribute *attr,
2284 char *buf)
2285 {
2286 struct ipath_ibdev *dev =
2287 container_of(device, struct ipath_ibdev, ibdev.dev);
2288
2289 return sprintf(buf, "%x\n", dev->dd->ipath_pcirev);
2290 }
2291
2292 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
2293 char *buf)
2294 {
2295 struct ipath_ibdev *dev =
2296 container_of(device, struct ipath_ibdev, ibdev.dev);
2297 int ret;
2298
2299 ret = dev->dd->ipath_f_get_boardname(dev->dd, buf, 128);
2300 if (ret < 0)
2301 goto bail;
2302 strcat(buf, "\n");
2303 ret = strlen(buf);
2304
2305 bail:
2306 return ret;
2307 }
2308
2309 static ssize_t show_stats(struct device *device, struct device_attribute *attr,
2310 char *buf)
2311 {
2312 struct ipath_ibdev *dev =
2313 container_of(device, struct ipath_ibdev, ibdev.dev);
2314 int i;
2315 int len;
2316
2317 len = sprintf(buf,
2318 "RC resends %d\n"
2319 "RC no QACK %d\n"
2320 "RC ACKs %d\n"
2321 "RC SEQ NAKs %d\n"
2322 "RC RDMA seq %d\n"
2323 "RC RNR NAKs %d\n"
2324 "RC OTH NAKs %d\n"
2325 "RC timeouts %d\n"
2326 "RC RDMA dup %d\n"
2327 "piobuf wait %d\n"
2328 "unaligned %d\n"
2329 "PKT drops %d\n"
2330 "WQE errs %d\n",
2331 dev->n_rc_resends, dev->n_rc_qacks, dev->n_rc_acks,
2332 dev->n_seq_naks, dev->n_rdma_seq, dev->n_rnr_naks,
2333 dev->n_other_naks, dev->n_timeouts,
2334 dev->n_rdma_dup_busy, dev->n_piowait, dev->n_unaligned,
2335 dev->n_pkt_drops, dev->n_wqe_errs);
2336 for (i = 0; i < ARRAY_SIZE(dev->opstats); i++) {
2337 const struct ipath_opcode_stats *si = &dev->opstats[i];
2338
2339 if (!si->n_packets && !si->n_bytes)
2340 continue;
2341 len += sprintf(buf + len, "%02x %llu/%llu\n", i,
2342 (unsigned long long) si->n_packets,
2343 (unsigned long long) si->n_bytes);
2344 }
2345 return len;
2346 }
2347
2348 static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
2349 static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
2350 static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
2351 static DEVICE_ATTR(stats, S_IRUGO, show_stats, NULL);
2352
2353 static struct device_attribute *ipath_class_attributes[] = {
2354 &dev_attr_hw_rev,
2355 &dev_attr_hca_type,
2356 &dev_attr_board_id,
2357 &dev_attr_stats
2358 };
2359
2360 static int ipath_verbs_register_sysfs(struct ib_device *dev)
2361 {
2362 int i;
2363 int ret;
2364
2365 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i) {
2366 ret = device_create_file(&dev->dev,
2367 ipath_class_attributes[i]);
2368 if (ret)
2369 goto bail;
2370 }
2371 return 0;
2372 bail:
2373 for (i = 0; i < ARRAY_SIZE(ipath_class_attributes); ++i)
2374 device_remove_file(&dev->dev, ipath_class_attributes[i]);
2375 return ret;
2376 }