2 * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
3 * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <rdma/ib_mad.h>
35 #include <rdma/ib_user_verbs.h>
37 #include <linux/slab.h>
38 #include <linux/module.h>
39 #include <linux/utsname.h>
40 #include <linux/rculist.h>
42 #include "ipath_kernel.h"
43 #include "ipath_verbs.h"
44 #include "ipath_common.h"
46 static unsigned int ib_ipath_qp_table_size
= 251;
47 module_param_named(qp_table_size
, ib_ipath_qp_table_size
, uint
, S_IRUGO
);
48 MODULE_PARM_DESC(qp_table_size
, "QP table size");
50 unsigned int ib_ipath_lkey_table_size
= 12;
51 module_param_named(lkey_table_size
, ib_ipath_lkey_table_size
, uint
,
53 MODULE_PARM_DESC(lkey_table_size
,
54 "LKEY table size in bits (2^n, 1 <= n <= 23)");
56 static unsigned int ib_ipath_max_pds
= 0xFFFF;
57 module_param_named(max_pds
, ib_ipath_max_pds
, uint
, S_IWUSR
| S_IRUGO
);
58 MODULE_PARM_DESC(max_pds
,
59 "Maximum number of protection domains to support");
61 static unsigned int ib_ipath_max_ahs
= 0xFFFF;
62 module_param_named(max_ahs
, ib_ipath_max_ahs
, uint
, S_IWUSR
| S_IRUGO
);
63 MODULE_PARM_DESC(max_ahs
, "Maximum number of address handles to support");
65 unsigned int ib_ipath_max_cqes
= 0x2FFFF;
66 module_param_named(max_cqes
, ib_ipath_max_cqes
, uint
, S_IWUSR
| S_IRUGO
);
67 MODULE_PARM_DESC(max_cqes
,
68 "Maximum number of completion queue entries to support");
70 unsigned int ib_ipath_max_cqs
= 0x1FFFF;
71 module_param_named(max_cqs
, ib_ipath_max_cqs
, uint
, S_IWUSR
| S_IRUGO
);
72 MODULE_PARM_DESC(max_cqs
, "Maximum number of completion queues to support");
74 unsigned int ib_ipath_max_qp_wrs
= 0x3FFF;
75 module_param_named(max_qp_wrs
, ib_ipath_max_qp_wrs
, uint
,
77 MODULE_PARM_DESC(max_qp_wrs
, "Maximum number of QP WRs to support");
79 unsigned int ib_ipath_max_qps
= 16384;
80 module_param_named(max_qps
, ib_ipath_max_qps
, uint
, S_IWUSR
| S_IRUGO
);
81 MODULE_PARM_DESC(max_qps
, "Maximum number of QPs to support");
83 unsigned int ib_ipath_max_sges
= 0x60;
84 module_param_named(max_sges
, ib_ipath_max_sges
, uint
, S_IWUSR
| S_IRUGO
);
85 MODULE_PARM_DESC(max_sges
, "Maximum number of SGEs to support");
87 unsigned int ib_ipath_max_mcast_grps
= 16384;
88 module_param_named(max_mcast_grps
, ib_ipath_max_mcast_grps
, uint
,
90 MODULE_PARM_DESC(max_mcast_grps
,
91 "Maximum number of multicast groups to support");
93 unsigned int ib_ipath_max_mcast_qp_attached
= 16;
94 module_param_named(max_mcast_qp_attached
, ib_ipath_max_mcast_qp_attached
,
95 uint
, S_IWUSR
| S_IRUGO
);
96 MODULE_PARM_DESC(max_mcast_qp_attached
,
97 "Maximum number of attached QPs to support");
99 unsigned int ib_ipath_max_srqs
= 1024;
100 module_param_named(max_srqs
, ib_ipath_max_srqs
, uint
, S_IWUSR
| S_IRUGO
);
101 MODULE_PARM_DESC(max_srqs
, "Maximum number of SRQs to support");
103 unsigned int ib_ipath_max_srq_sges
= 128;
104 module_param_named(max_srq_sges
, ib_ipath_max_srq_sges
,
105 uint
, S_IWUSR
| S_IRUGO
);
106 MODULE_PARM_DESC(max_srq_sges
, "Maximum number of SRQ SGEs to support");
108 unsigned int ib_ipath_max_srq_wrs
= 0x1FFFF;
109 module_param_named(max_srq_wrs
, ib_ipath_max_srq_wrs
,
110 uint
, S_IWUSR
| S_IRUGO
);
111 MODULE_PARM_DESC(max_srq_wrs
, "Maximum number of SRQ WRs support");
113 static unsigned int ib_ipath_disable_sma
;
114 module_param_named(disable_sma
, ib_ipath_disable_sma
, uint
, S_IWUSR
| S_IRUGO
);
115 MODULE_PARM_DESC(disable_sma
, "Disable the SMA");
118 * Note that it is OK to post send work requests in the SQE and ERR
119 * states; ipath_do_send() will process them and generate error
120 * completions as per IB 1.2 C10-96.
122 const int ib_ipath_state_ops
[IB_QPS_ERR
+ 1] = {
124 [IB_QPS_INIT
] = IPATH_POST_RECV_OK
,
125 [IB_QPS_RTR
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
,
126 [IB_QPS_RTS
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
|
127 IPATH_POST_SEND_OK
| IPATH_PROCESS_SEND_OK
|
128 IPATH_PROCESS_NEXT_SEND_OK
,
129 [IB_QPS_SQD
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
|
130 IPATH_POST_SEND_OK
| IPATH_PROCESS_SEND_OK
,
131 [IB_QPS_SQE
] = IPATH_POST_RECV_OK
| IPATH_PROCESS_RECV_OK
|
132 IPATH_POST_SEND_OK
| IPATH_FLUSH_SEND
,
133 [IB_QPS_ERR
] = IPATH_POST_RECV_OK
| IPATH_FLUSH_RECV
|
134 IPATH_POST_SEND_OK
| IPATH_FLUSH_SEND
,
137 struct ipath_ucontext
{
138 struct ib_ucontext ibucontext
;
141 static inline struct ipath_ucontext
*to_iucontext(struct ib_ucontext
144 return container_of(ibucontext
, struct ipath_ucontext
, ibucontext
);
148 * Translate ib_wr_opcode into ib_wc_opcode.
150 const enum ib_wc_opcode ib_ipath_wc_opcode
[] = {
151 [IB_WR_RDMA_WRITE
] = IB_WC_RDMA_WRITE
,
152 [IB_WR_RDMA_WRITE_WITH_IMM
] = IB_WC_RDMA_WRITE
,
153 [IB_WR_SEND
] = IB_WC_SEND
,
154 [IB_WR_SEND_WITH_IMM
] = IB_WC_SEND
,
155 [IB_WR_RDMA_READ
] = IB_WC_RDMA_READ
,
156 [IB_WR_ATOMIC_CMP_AND_SWP
] = IB_WC_COMP_SWAP
,
157 [IB_WR_ATOMIC_FETCH_AND_ADD
] = IB_WC_FETCH_ADD
163 static __be64 sys_image_guid
;
166 * ipath_copy_sge - copy data to SGE memory
168 * @data: the data to copy
169 * @length: the length of the data
171 void ipath_copy_sge(struct ipath_sge_state
*ss
, void *data
, u32 length
)
173 struct ipath_sge
*sge
= &ss
->sge
;
176 u32 len
= sge
->length
;
180 if (len
> sge
->sge_length
)
181 len
= sge
->sge_length
;
183 memcpy(sge
->vaddr
, data
, len
);
186 sge
->sge_length
-= len
;
187 if (sge
->sge_length
== 0) {
189 *sge
= *ss
->sg_list
++;
190 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
191 if (++sge
->n
>= IPATH_SEGSZ
) {
192 if (++sge
->m
>= sge
->mr
->mapsz
)
197 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
199 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
207 * ipath_skip_sge - skip over SGE memory - XXX almost dup of prev func
209 * @length: the number of bytes to skip
211 void ipath_skip_sge(struct ipath_sge_state
*ss
, u32 length
)
213 struct ipath_sge
*sge
= &ss
->sge
;
216 u32 len
= sge
->length
;
220 if (len
> sge
->sge_length
)
221 len
= sge
->sge_length
;
225 sge
->sge_length
-= len
;
226 if (sge
->sge_length
== 0) {
228 *sge
= *ss
->sg_list
++;
229 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
230 if (++sge
->n
>= IPATH_SEGSZ
) {
231 if (++sge
->m
>= sge
->mr
->mapsz
)
236 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
238 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
245 * Count the number of DMA descriptors needed to send length bytes of data.
246 * Don't modify the ipath_sge_state to get the count.
247 * Return zero if any of the segments is not aligned.
249 static u32
ipath_count_sge(struct ipath_sge_state
*ss
, u32 length
)
251 struct ipath_sge
*sg_list
= ss
->sg_list
;
252 struct ipath_sge sge
= ss
->sge
;
253 u8 num_sge
= ss
->num_sge
;
254 u32 ndesc
= 1; /* count the header */
257 u32 len
= sge
.length
;
261 if (len
> sge
.sge_length
)
262 len
= sge
.sge_length
;
264 if (((long) sge
.vaddr
& (sizeof(u32
) - 1)) ||
265 (len
!= length
&& (len
& (sizeof(u32
) - 1)))) {
272 sge
.sge_length
-= len
;
273 if (sge
.sge_length
== 0) {
276 } else if (sge
.length
== 0 && sge
.mr
!= NULL
) {
277 if (++sge
.n
>= IPATH_SEGSZ
) {
278 if (++sge
.m
>= sge
.mr
->mapsz
)
283 sge
.mr
->map
[sge
.m
]->segs
[sge
.n
].vaddr
;
285 sge
.mr
->map
[sge
.m
]->segs
[sge
.n
].length
;
293 * Copy from the SGEs to the data buffer.
295 static void ipath_copy_from_sge(void *data
, struct ipath_sge_state
*ss
,
298 struct ipath_sge
*sge
= &ss
->sge
;
301 u32 len
= sge
->length
;
305 if (len
> sge
->sge_length
)
306 len
= sge
->sge_length
;
308 memcpy(data
, sge
->vaddr
, len
);
311 sge
->sge_length
-= len
;
312 if (sge
->sge_length
== 0) {
314 *sge
= *ss
->sg_list
++;
315 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
316 if (++sge
->n
>= IPATH_SEGSZ
) {
317 if (++sge
->m
>= sge
->mr
->mapsz
)
322 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
324 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
332 * ipath_post_one_send - post one RC, UC, or UD send work request
333 * @qp: the QP to post on
334 * @wr: the work request to send
336 static int ipath_post_one_send(struct ipath_qp
*qp
, struct ib_send_wr
*wr
)
338 struct ipath_swqe
*wqe
;
345 struct ipath_devdata
*dd
= to_idev(qp
->ibqp
.device
)->dd
;
347 spin_lock_irqsave(&qp
->s_lock
, flags
);
349 if (qp
->ibqp
.qp_type
!= IB_QPT_SMI
&&
350 !(dd
->ipath_flags
& IPATH_LINKACTIVE
)) {
355 /* Check that state is OK to post send. */
356 if (unlikely(!(ib_ipath_state_ops
[qp
->state
] & IPATH_POST_SEND_OK
)))
359 /* IB spec says that num_sge == 0 is OK. */
360 if (wr
->num_sge
> qp
->s_max_sge
)
364 * Don't allow RDMA reads or atomic operations on UC or
365 * undefined operations.
366 * Make sure buffer is large enough to hold the result for atomics.
368 if (qp
->ibqp
.qp_type
== IB_QPT_UC
) {
369 if ((unsigned) wr
->opcode
>= IB_WR_RDMA_READ
)
371 } else if (qp
->ibqp
.qp_type
== IB_QPT_UD
) {
372 /* Check UD opcode */
373 if (wr
->opcode
!= IB_WR_SEND
&&
374 wr
->opcode
!= IB_WR_SEND_WITH_IMM
)
376 /* Check UD destination address PD */
377 if (qp
->ibqp
.pd
!= ud_wr(wr
)->ah
->pd
)
379 } else if ((unsigned) wr
->opcode
> IB_WR_ATOMIC_FETCH_AND_ADD
)
381 else if (wr
->opcode
>= IB_WR_ATOMIC_CMP_AND_SWP
&&
383 wr
->sg_list
[0].length
< sizeof(u64
) ||
384 wr
->sg_list
[0].addr
& (sizeof(u64
) - 1)))
386 else if (wr
->opcode
>= IB_WR_RDMA_READ
&& !qp
->s_max_rd_atomic
)
389 next
= qp
->s_head
+ 1;
390 if (next
>= qp
->s_size
)
392 if (next
== qp
->s_last
) {
397 wqe
= get_swqe_ptr(qp
, qp
->s_head
);
399 if (qp
->ibqp
.qp_type
!= IB_QPT_UC
&&
400 qp
->ibqp
.qp_type
!= IB_QPT_RC
)
401 memcpy(&wqe
->ud_wr
, ud_wr(wr
), sizeof(wqe
->ud_wr
));
402 else if (wr
->opcode
== IB_WR_RDMA_WRITE_WITH_IMM
||
403 wr
->opcode
== IB_WR_RDMA_WRITE
||
404 wr
->opcode
== IB_WR_RDMA_READ
)
405 memcpy(&wqe
->rdma_wr
, rdma_wr(wr
), sizeof(wqe
->rdma_wr
));
406 else if (wr
->opcode
== IB_WR_ATOMIC_CMP_AND_SWP
||
407 wr
->opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
)
408 memcpy(&wqe
->atomic_wr
, atomic_wr(wr
), sizeof(wqe
->atomic_wr
));
410 memcpy(&wqe
->wr
, wr
, sizeof(wqe
->wr
));
414 acc
= wr
->opcode
>= IB_WR_RDMA_READ
?
415 IB_ACCESS_LOCAL_WRITE
: 0;
416 for (i
= 0, j
= 0; i
< wr
->num_sge
; i
++) {
417 u32 length
= wr
->sg_list
[i
].length
;
422 ok
= ipath_lkey_ok(qp
, &wqe
->sg_list
[j
],
423 &wr
->sg_list
[i
], acc
);
426 wqe
->length
+= length
;
431 if (qp
->ibqp
.qp_type
== IB_QPT_UC
||
432 qp
->ibqp
.qp_type
== IB_QPT_RC
) {
433 if (wqe
->length
> 0x80000000U
)
435 } else if (wqe
->length
> to_idev(qp
->ibqp
.device
)->dd
->ipath_ibmtu
)
437 wqe
->ssn
= qp
->s_ssn
++;
446 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
451 * ipath_post_send - post a send on a QP
452 * @ibqp: the QP to post the send on
453 * @wr: the list of work requests to post
454 * @bad_wr: the first bad WR is put here
456 * This may be called from interrupt context.
458 static int ipath_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
459 struct ib_send_wr
**bad_wr
)
461 struct ipath_qp
*qp
= to_iqp(ibqp
);
464 for (; wr
; wr
= wr
->next
) {
465 err
= ipath_post_one_send(qp
, wr
);
472 /* Try to do the send work in the caller's context. */
473 ipath_do_send((unsigned long) qp
);
480 * ipath_post_receive - post a receive on a QP
481 * @ibqp: the QP to post the receive on
482 * @wr: the WR to post
483 * @bad_wr: the first bad WR is put here
485 * This may be called from interrupt context.
487 static int ipath_post_receive(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
488 struct ib_recv_wr
**bad_wr
)
490 struct ipath_qp
*qp
= to_iqp(ibqp
);
491 struct ipath_rwq
*wq
= qp
->r_rq
.wq
;
495 /* Check that state is OK to post receive. */
496 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_POST_RECV_OK
) || !wq
) {
502 for (; wr
; wr
= wr
->next
) {
503 struct ipath_rwqe
*wqe
;
507 if ((unsigned) wr
->num_sge
> qp
->r_rq
.max_sge
) {
513 spin_lock_irqsave(&qp
->r_rq
.lock
, flags
);
515 if (next
>= qp
->r_rq
.size
)
517 if (next
== wq
->tail
) {
518 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
524 wqe
= get_rwqe_ptr(&qp
->r_rq
, wq
->head
);
525 wqe
->wr_id
= wr
->wr_id
;
526 wqe
->num_sge
= wr
->num_sge
;
527 for (i
= 0; i
< wr
->num_sge
; i
++)
528 wqe
->sg_list
[i
] = wr
->sg_list
[i
];
529 /* Make sure queue entry is written before the head index. */
532 spin_unlock_irqrestore(&qp
->r_rq
.lock
, flags
);
541 * ipath_qp_rcv - processing an incoming packet on a QP
542 * @dev: the device the packet came on
543 * @hdr: the packet header
544 * @has_grh: true if the packet has a GRH
545 * @data: the packet data
546 * @tlen: the packet length
547 * @qp: the QP the packet came on
549 * This is called from ipath_ib_rcv() to process an incoming packet
551 * Called at interrupt level.
553 static void ipath_qp_rcv(struct ipath_ibdev
*dev
,
554 struct ipath_ib_header
*hdr
, int has_grh
,
555 void *data
, u32 tlen
, struct ipath_qp
*qp
)
557 /* Check for valid receive state. */
558 if (!(ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_RECV_OK
)) {
563 switch (qp
->ibqp
.qp_type
) {
566 if (ib_ipath_disable_sma
)
570 ipath_ud_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
574 ipath_rc_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
578 ipath_uc_rcv(dev
, hdr
, has_grh
, data
, tlen
, qp
);
587 * ipath_ib_rcv - process an incoming packet
588 * @arg: the device pointer
589 * @rhdr: the header of the packet
590 * @data: the packet data
591 * @tlen: the packet length
593 * This is called from ipath_kreceive() to process an incoming packet at
594 * interrupt level. Tlen is the length of the header + data + CRC in bytes.
596 void ipath_ib_rcv(struct ipath_ibdev
*dev
, void *rhdr
, void *data
,
599 struct ipath_ib_header
*hdr
= rhdr
;
600 struct ipath_other_headers
*ohdr
;
607 if (unlikely(dev
== NULL
))
610 if (unlikely(tlen
< 24)) { /* LRH+BTH+CRC */
615 /* Check for a valid destination LID (see ch. 7.11.1). */
616 lid
= be16_to_cpu(hdr
->lrh
[1]);
617 if (lid
< IPATH_MULTICAST_LID_BASE
) {
618 lid
&= ~((1 << dev
->dd
->ipath_lmc
) - 1);
619 if (unlikely(lid
!= dev
->dd
->ipath_lid
)) {
626 lnh
= be16_to_cpu(hdr
->lrh
[0]) & 3;
627 if (lnh
== IPATH_LRH_BTH
)
629 else if (lnh
== IPATH_LRH_GRH
)
630 ohdr
= &hdr
->u
.l
.oth
;
636 opcode
= (be32_to_cpu(ohdr
->bth
[0]) >> 24) & 0x7f;
637 dev
->opstats
[opcode
].n_bytes
+= tlen
;
638 dev
->opstats
[opcode
].n_packets
++;
640 /* Get the destination QP number. */
641 qp_num
= be32_to_cpu(ohdr
->bth
[1]) & IPATH_QPN_MASK
;
642 if (qp_num
== IPATH_MULTICAST_QPN
) {
643 struct ipath_mcast
*mcast
;
644 struct ipath_mcast_qp
*p
;
646 if (lnh
!= IPATH_LRH_GRH
) {
650 mcast
= ipath_mcast_find(&hdr
->u
.l
.grh
.dgid
);
655 dev
->n_multicast_rcv
++;
656 list_for_each_entry_rcu(p
, &mcast
->qp_list
, list
)
657 ipath_qp_rcv(dev
, hdr
, 1, data
, tlen
, p
->qp
);
659 * Notify ipath_multicast_detach() if it is waiting for us
662 if (atomic_dec_return(&mcast
->refcount
) <= 1)
663 wake_up(&mcast
->wait
);
665 qp
= ipath_lookup_qpn(&dev
->qp_table
, qp_num
);
667 dev
->n_unicast_rcv
++;
668 ipath_qp_rcv(dev
, hdr
, lnh
== IPATH_LRH_GRH
, data
,
671 * Notify ipath_destroy_qp() if it is waiting
674 if (atomic_dec_and_test(&qp
->refcount
))
684 * ipath_ib_timer - verbs timer
685 * @arg: the device pointer
687 * This is called from ipath_do_rcv_timer() at interrupt level to check for
688 * QPs which need retransmits and to collect performance numbers.
690 static void ipath_ib_timer(struct ipath_ibdev
*dev
)
692 struct ipath_qp
*resend
= NULL
;
693 struct ipath_qp
*rnr
= NULL
;
694 struct list_head
*last
;
701 spin_lock_irqsave(&dev
->pending_lock
, flags
);
702 /* Start filling the next pending queue. */
703 if (++dev
->pending_index
>= ARRAY_SIZE(dev
->pending
))
704 dev
->pending_index
= 0;
705 /* Save any requests still in the new queue, they have timed out. */
706 last
= &dev
->pending
[dev
->pending_index
];
707 while (!list_empty(last
)) {
708 qp
= list_entry(last
->next
, struct ipath_qp
, timerwait
);
709 list_del_init(&qp
->timerwait
);
710 qp
->timer_next
= resend
;
712 atomic_inc(&qp
->refcount
);
714 last
= &dev
->rnrwait
;
715 if (!list_empty(last
)) {
716 qp
= list_entry(last
->next
, struct ipath_qp
, timerwait
);
717 if (--qp
->s_rnr_timeout
== 0) {
719 list_del_init(&qp
->timerwait
);
720 qp
->timer_next
= rnr
;
722 atomic_inc(&qp
->refcount
);
723 if (list_empty(last
))
725 qp
= list_entry(last
->next
, struct ipath_qp
,
727 } while (qp
->s_rnr_timeout
== 0);
731 * We should only be in the started state if pma_sample_start != 0
733 if (dev
->pma_sample_status
== IB_PMA_SAMPLE_STATUS_STARTED
&&
734 --dev
->pma_sample_start
== 0) {
735 dev
->pma_sample_status
= IB_PMA_SAMPLE_STATUS_RUNNING
;
736 ipath_snapshot_counters(dev
->dd
, &dev
->ipath_sword
,
740 &dev
->ipath_xmit_wait
);
742 if (dev
->pma_sample_status
== IB_PMA_SAMPLE_STATUS_RUNNING
) {
743 if (dev
->pma_sample_interval
== 0) {
744 u64 ta
, tb
, tc
, td
, te
;
746 dev
->pma_sample_status
= IB_PMA_SAMPLE_STATUS_DONE
;
747 ipath_snapshot_counters(dev
->dd
, &ta
, &tb
,
750 dev
->ipath_sword
= ta
- dev
->ipath_sword
;
751 dev
->ipath_rword
= tb
- dev
->ipath_rword
;
752 dev
->ipath_spkts
= tc
- dev
->ipath_spkts
;
753 dev
->ipath_rpkts
= td
- dev
->ipath_rpkts
;
754 dev
->ipath_xmit_wait
= te
- dev
->ipath_xmit_wait
;
756 dev
->pma_sample_interval
--;
759 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
761 /* XXX What if timer fires again while this is running? */
762 while (resend
!= NULL
) {
764 resend
= qp
->timer_next
;
766 spin_lock_irqsave(&qp
->s_lock
, flags
);
767 if (qp
->s_last
!= qp
->s_tail
&&
768 ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_SEND_OK
) {
770 ipath_restart_rc(qp
, qp
->s_last_psn
+ 1);
772 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
774 /* Notify ipath_destroy_qp() if it is waiting. */
775 if (atomic_dec_and_test(&qp
->refcount
))
778 while (rnr
!= NULL
) {
780 rnr
= qp
->timer_next
;
782 spin_lock_irqsave(&qp
->s_lock
, flags
);
783 if (ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_SEND_OK
)
784 ipath_schedule_send(qp
);
785 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
787 /* Notify ipath_destroy_qp() if it is waiting. */
788 if (atomic_dec_and_test(&qp
->refcount
))
793 static void update_sge(struct ipath_sge_state
*ss
, u32 length
)
795 struct ipath_sge
*sge
= &ss
->sge
;
797 sge
->vaddr
+= length
;
798 sge
->length
-= length
;
799 sge
->sge_length
-= length
;
800 if (sge
->sge_length
== 0) {
802 *sge
= *ss
->sg_list
++;
803 } else if (sge
->length
== 0 && sge
->mr
!= NULL
) {
804 if (++sge
->n
>= IPATH_SEGSZ
) {
805 if (++sge
->m
>= sge
->mr
->mapsz
)
809 sge
->vaddr
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
810 sge
->length
= sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
814 #ifdef __LITTLE_ENDIAN
815 static inline u32
get_upper_bits(u32 data
, u32 shift
)
817 return data
>> shift
;
820 static inline u32
set_upper_bits(u32 data
, u32 shift
)
822 return data
<< shift
;
825 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
827 data
<<= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
828 data
>>= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
832 static inline u32
get_upper_bits(u32 data
, u32 shift
)
834 return data
<< shift
;
837 static inline u32
set_upper_bits(u32 data
, u32 shift
)
839 return data
>> shift
;
842 static inline u32
clear_upper_bytes(u32 data
, u32 n
, u32 off
)
844 data
>>= ((sizeof(u32
) - n
) * BITS_PER_BYTE
);
845 data
<<= ((sizeof(u32
) - n
- off
) * BITS_PER_BYTE
);
850 static void copy_io(u32 __iomem
*piobuf
, struct ipath_sge_state
*ss
,
851 u32 length
, unsigned flush_wc
)
858 u32 len
= ss
->sge
.length
;
863 if (len
> ss
->sge
.sge_length
)
864 len
= ss
->sge
.sge_length
;
866 /* If the source address is not aligned, try to align it. */
867 off
= (unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1);
869 u32
*addr
= (u32
*)((unsigned long)ss
->sge
.vaddr
&
871 u32 v
= get_upper_bits(*addr
, off
* BITS_PER_BYTE
);
874 y
= sizeof(u32
) - off
;
877 if (len
+ extra
>= sizeof(u32
)) {
878 data
|= set_upper_bits(v
, extra
*
880 len
= sizeof(u32
) - extra
;
885 __raw_writel(data
, piobuf
);
890 /* Clear unused upper bytes */
891 data
|= clear_upper_bytes(v
, len
, extra
);
899 /* Source address is aligned. */
900 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
901 int shift
= extra
* BITS_PER_BYTE
;
902 int ushift
= 32 - shift
;
905 while (l
>= sizeof(u32
)) {
908 data
|= set_upper_bits(v
, shift
);
909 __raw_writel(data
, piobuf
);
910 data
= get_upper_bits(v
, ushift
);
916 * We still have 'extra' number of bytes leftover.
921 if (l
+ extra
>= sizeof(u32
)) {
922 data
|= set_upper_bits(v
, shift
);
923 len
-= l
+ extra
- sizeof(u32
);
928 __raw_writel(data
, piobuf
);
933 /* Clear unused upper bytes */
934 data
|= clear_upper_bytes(v
, l
,
942 } else if (len
== length
) {
946 } else if (len
== length
) {
950 * Need to round up for the last dword in the
954 __iowrite32_copy(piobuf
, ss
->sge
.vaddr
, w
- 1);
956 last
= ((u32
*) ss
->sge
.vaddr
)[w
- 1];
961 __iowrite32_copy(piobuf
, ss
->sge
.vaddr
, w
);
964 extra
= len
& (sizeof(u32
) - 1);
966 u32 v
= ((u32
*) ss
->sge
.vaddr
)[w
];
968 /* Clear unused upper bytes */
969 data
= clear_upper_bytes(v
, extra
, 0);
975 /* Update address before sending packet. */
976 update_sge(ss
, length
);
978 /* must flush early everything before trigger word */
980 __raw_writel(last
, piobuf
);
981 /* be sure trigger word is written */
984 __raw_writel(last
, piobuf
);
988 * Convert IB rate to delay multiplier.
990 unsigned ipath_ib_rate_to_mult(enum ib_rate rate
)
993 case IB_RATE_2_5_GBPS
: return 8;
994 case IB_RATE_5_GBPS
: return 4;
995 case IB_RATE_10_GBPS
: return 2;
996 case IB_RATE_20_GBPS
: return 1;
1002 * Convert delay multiplier to IB rate
1004 static enum ib_rate
ipath_mult_to_ib_rate(unsigned mult
)
1007 case 8: return IB_RATE_2_5_GBPS
;
1008 case 4: return IB_RATE_5_GBPS
;
1009 case 2: return IB_RATE_10_GBPS
;
1010 case 1: return IB_RATE_20_GBPS
;
1011 default: return IB_RATE_PORT_CURRENT
;
1015 static inline struct ipath_verbs_txreq
*get_txreq(struct ipath_ibdev
*dev
)
1017 struct ipath_verbs_txreq
*tx
= NULL
;
1018 unsigned long flags
;
1020 spin_lock_irqsave(&dev
->pending_lock
, flags
);
1021 if (!list_empty(&dev
->txreq_free
)) {
1022 struct list_head
*l
= dev
->txreq_free
.next
;
1025 tx
= list_entry(l
, struct ipath_verbs_txreq
, txreq
.list
);
1027 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
1031 static inline void put_txreq(struct ipath_ibdev
*dev
,
1032 struct ipath_verbs_txreq
*tx
)
1034 unsigned long flags
;
1036 spin_lock_irqsave(&dev
->pending_lock
, flags
);
1037 list_add(&tx
->txreq
.list
, &dev
->txreq_free
);
1038 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
1041 static void sdma_complete(void *cookie
, int status
)
1043 struct ipath_verbs_txreq
*tx
= cookie
;
1044 struct ipath_qp
*qp
= tx
->qp
;
1045 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
1046 unsigned long flags
;
1047 enum ib_wc_status ibs
= status
== IPATH_SDMA_TXREQ_S_OK
?
1048 IB_WC_SUCCESS
: IB_WC_WR_FLUSH_ERR
;
1050 if (atomic_dec_and_test(&qp
->s_dma_busy
)) {
1051 spin_lock_irqsave(&qp
->s_lock
, flags
);
1053 ipath_send_complete(qp
, tx
->wqe
, ibs
);
1054 if ((ib_ipath_state_ops
[qp
->state
] & IPATH_FLUSH_SEND
&&
1055 qp
->s_last
!= qp
->s_head
) ||
1056 (qp
->s_flags
& IPATH_S_WAIT_DMA
))
1057 ipath_schedule_send(qp
);
1058 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1059 wake_up(&qp
->wait_dma
);
1060 } else if (tx
->wqe
) {
1061 spin_lock_irqsave(&qp
->s_lock
, flags
);
1062 ipath_send_complete(qp
, tx
->wqe
, ibs
);
1063 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1066 if (tx
->txreq
.flags
& IPATH_SDMA_TXREQ_F_FREEBUF
)
1067 kfree(tx
->txreq
.map_addr
);
1070 if (atomic_dec_and_test(&qp
->refcount
))
1074 static void decrement_dma_busy(struct ipath_qp
*qp
)
1076 unsigned long flags
;
1078 if (atomic_dec_and_test(&qp
->s_dma_busy
)) {
1079 spin_lock_irqsave(&qp
->s_lock
, flags
);
1080 if ((ib_ipath_state_ops
[qp
->state
] & IPATH_FLUSH_SEND
&&
1081 qp
->s_last
!= qp
->s_head
) ||
1082 (qp
->s_flags
& IPATH_S_WAIT_DMA
))
1083 ipath_schedule_send(qp
);
1084 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1085 wake_up(&qp
->wait_dma
);
1090 * Compute the number of clock cycles of delay before sending the next packet.
1091 * The multipliers reflect the number of clocks for the fastest rate so
1092 * one tick at 4xDDR is 8 ticks at 1xSDR.
1093 * If the destination port will take longer to receive a packet than
1094 * the outgoing link can send it, we need to delay sending the next packet
1095 * by the difference in time it takes the receiver to receive and the sender
1096 * to send this packet.
1097 * Note that this delay is always correct for UC and RC but not always
1098 * optimal for UD. For UD, the destination HCA can be different for each
1099 * packet, in which case, we could send packets to a different destination
1100 * while "waiting" for the delay. The overhead for doing this without
1101 * HW support is more than just paying the cost of delaying some packets
1104 static inline unsigned ipath_pkt_delay(u32 plen
, u8 snd_mult
, u8 rcv_mult
)
1106 return (rcv_mult
> snd_mult
) ?
1107 (plen
* (rcv_mult
- snd_mult
) + 1) >> 1 : 0;
1110 static int ipath_verbs_send_dma(struct ipath_qp
*qp
,
1111 struct ipath_ib_header
*hdr
, u32 hdrwords
,
1112 struct ipath_sge_state
*ss
, u32 len
,
1113 u32 plen
, u32 dwords
)
1115 struct ipath_ibdev
*dev
= to_idev(qp
->ibqp
.device
);
1116 struct ipath_devdata
*dd
= dev
->dd
;
1117 struct ipath_verbs_txreq
*tx
;
1126 /* resend previously constructed packet */
1127 atomic_inc(&qp
->s_dma_busy
);
1128 ret
= ipath_sdma_verbs_send(dd
, tx
->ss
, tx
->len
, tx
);
1131 decrement_dma_busy(qp
);
1136 tx
= get_txreq(dev
);
1143 * Get the saved delay count we computed for the previous packet
1144 * and save the delay count for this packet to be used next time
1147 control
= qp
->s_pkt_delay
;
1148 qp
->s_pkt_delay
= ipath_pkt_delay(plen
, dd
->delay_mult
, qp
->s_dmult
);
1151 atomic_inc(&qp
->refcount
);
1152 tx
->wqe
= qp
->s_wqe
;
1153 tx
->txreq
.callback
= sdma_complete
;
1154 tx
->txreq
.callback_cookie
= tx
;
1155 tx
->txreq
.flags
= IPATH_SDMA_TXREQ_F_HEADTOHOST
|
1156 IPATH_SDMA_TXREQ_F_INTREQ
| IPATH_SDMA_TXREQ_F_FREEDESC
;
1157 if (plen
+ 1 >= IPATH_SMALLBUF_DWORDS
)
1158 tx
->txreq
.flags
|= IPATH_SDMA_TXREQ_F_USELARGEBUF
;
1160 /* VL15 packets bypass credit check */
1161 if ((be16_to_cpu(hdr
->lrh
[0]) >> 12) == 15) {
1162 control
|= 1ULL << 31;
1163 tx
->txreq
.flags
|= IPATH_SDMA_TXREQ_F_VL15
;
1168 * Don't try to DMA if it takes more descriptors than
1171 ndesc
= ipath_count_sge(ss
, len
);
1172 if (ndesc
>= dd
->ipath_sdma_descq_cnt
)
1177 tx
->hdr
.pbc
[0] = cpu_to_le32(plen
);
1178 tx
->hdr
.pbc
[1] = cpu_to_le32(control
);
1179 memcpy(&tx
->hdr
.hdr
, hdr
, hdrwords
<< 2);
1180 tx
->txreq
.sg_count
= ndesc
;
1181 tx
->map_len
= (hdrwords
+ 2) << 2;
1182 tx
->txreq
.map_addr
= &tx
->hdr
;
1183 atomic_inc(&qp
->s_dma_busy
);
1184 ret
= ipath_sdma_verbs_send(dd
, ss
, dwords
, tx
);
1186 /* save ss and length in dwords */
1190 decrement_dma_busy(qp
);
1195 /* Allocate a buffer and copy the header and payload to it. */
1196 tx
->map_len
= (plen
+ 1) << 2;
1197 piobuf
= kmalloc(tx
->map_len
, GFP_ATOMIC
);
1198 if (unlikely(piobuf
== NULL
)) {
1202 tx
->txreq
.map_addr
= piobuf
;
1203 tx
->txreq
.flags
|= IPATH_SDMA_TXREQ_F_FREEBUF
;
1204 tx
->txreq
.sg_count
= 1;
1206 *piobuf
++ = (__force u32
) cpu_to_le32(plen
);
1207 *piobuf
++ = (__force u32
) cpu_to_le32(control
);
1208 memcpy(piobuf
, hdr
, hdrwords
<< 2);
1209 ipath_copy_from_sge(piobuf
+ hdrwords
, ss
, len
);
1211 atomic_inc(&qp
->s_dma_busy
);
1212 ret
= ipath_sdma_verbs_send(dd
, NULL
, 0, tx
);
1214 * If we couldn't queue the DMA request, save the info
1215 * and try again later rather than destroying the
1216 * buffer and undoing the side effects of the copy.
1222 decrement_dma_busy(qp
);
1228 if (atomic_dec_and_test(&qp
->refcount
))
1235 static int ipath_verbs_send_pio(struct ipath_qp
*qp
,
1236 struct ipath_ib_header
*ibhdr
, u32 hdrwords
,
1237 struct ipath_sge_state
*ss
, u32 len
,
1238 u32 plen
, u32 dwords
)
1240 struct ipath_devdata
*dd
= to_idev(qp
->ibqp
.device
)->dd
;
1241 u32
*hdr
= (u32
*) ibhdr
;
1242 u32 __iomem
*piobuf
;
1246 unsigned long flags
;
1248 piobuf
= ipath_getpiobuf(dd
, plen
, NULL
);
1249 if (unlikely(piobuf
== NULL
)) {
1255 * Get the saved delay count we computed for the previous packet
1256 * and save the delay count for this packet to be used next time
1259 control
= qp
->s_pkt_delay
;
1260 qp
->s_pkt_delay
= ipath_pkt_delay(plen
, dd
->delay_mult
, qp
->s_dmult
);
1262 /* VL15 packets bypass credit check */
1263 if ((be16_to_cpu(ibhdr
->lrh
[0]) >> 12) == 15)
1264 control
|= 1ULL << 31;
1267 * Write the length to the control qword plus any needed flags.
1268 * We have to flush after the PBC for correctness on some cpus
1269 * or WC buffer can be written out of order.
1271 writeq(((u64
) control
<< 32) | plen
, piobuf
);
1274 flush_wc
= dd
->ipath_flags
& IPATH_PIO_FLUSH_WC
;
1277 * If there is just the header portion, must flush before
1278 * writing last word of header for correctness, and after
1279 * the last header word (trigger word).
1283 __iowrite32_copy(piobuf
, hdr
, hdrwords
- 1);
1285 __raw_writel(hdr
[hdrwords
- 1], piobuf
+ hdrwords
- 1);
1288 __iowrite32_copy(piobuf
, hdr
, hdrwords
);
1294 __iowrite32_copy(piobuf
, hdr
, hdrwords
);
1297 /* The common case is aligned and contained in one segment. */
1298 if (likely(ss
->num_sge
== 1 && len
<= ss
->sge
.length
&&
1299 !((unsigned long)ss
->sge
.vaddr
& (sizeof(u32
) - 1)))) {
1300 u32
*addr
= (u32
*) ss
->sge
.vaddr
;
1302 /* Update address before sending packet. */
1303 update_sge(ss
, len
);
1305 __iowrite32_copy(piobuf
, addr
, dwords
- 1);
1306 /* must flush early everything before trigger word */
1308 __raw_writel(addr
[dwords
- 1], piobuf
+ dwords
- 1);
1309 /* be sure trigger word is written */
1312 __iowrite32_copy(piobuf
, addr
, dwords
);
1315 copy_io(piobuf
, ss
, len
, flush_wc
);
1318 spin_lock_irqsave(&qp
->s_lock
, flags
);
1319 ipath_send_complete(qp
, qp
->s_wqe
, IB_WC_SUCCESS
);
1320 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1328 * ipath_verbs_send - send a packet
1329 * @qp: the QP to send on
1330 * @hdr: the packet header
1331 * @hdrwords: the number of 32-bit words in the header
1332 * @ss: the SGE to send
1333 * @len: the length of the packet in bytes
1335 int ipath_verbs_send(struct ipath_qp
*qp
, struct ipath_ib_header
*hdr
,
1336 u32 hdrwords
, struct ipath_sge_state
*ss
, u32 len
)
1338 struct ipath_devdata
*dd
= to_idev(qp
->ibqp
.device
)->dd
;
1341 u32 dwords
= (len
+ 3) >> 2;
1344 * Calculate the send buffer trigger address.
1345 * The +1 counts for the pbc control dword following the pbc length.
1347 plen
= hdrwords
+ dwords
+ 1;
1350 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
1351 * can defer SDMA restart until link goes ACTIVE without
1352 * worrying about just how we got there.
1354 if (qp
->ibqp
.qp_type
== IB_QPT_SMI
||
1355 !(dd
->ipath_flags
& IPATH_HAS_SEND_DMA
))
1356 ret
= ipath_verbs_send_pio(qp
, hdr
, hdrwords
, ss
, len
,
1359 ret
= ipath_verbs_send_dma(qp
, hdr
, hdrwords
, ss
, len
,
1365 int ipath_snapshot_counters(struct ipath_devdata
*dd
, u64
*swords
,
1366 u64
*rwords
, u64
*spkts
, u64
*rpkts
,
1371 if (!(dd
->ipath_flags
& IPATH_INITTED
)) {
1372 /* no hardware, freeze, etc. */
1376 *swords
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_wordsendcnt
);
1377 *rwords
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_wordrcvcnt
);
1378 *spkts
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_pktsendcnt
);
1379 *rpkts
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_pktrcvcnt
);
1380 *xmit_wait
= ipath_snap_cntr(dd
, dd
->ipath_cregs
->cr_sendstallcnt
);
1389 * ipath_get_counters - get various chip counters
1390 * @dd: the infinipath device
1391 * @cntrs: counters are placed here
1393 * Return the counters needed by recv_pma_get_portcounters().
1395 int ipath_get_counters(struct ipath_devdata
*dd
,
1396 struct ipath_verbs_counters
*cntrs
)
1398 struct ipath_cregs
const *crp
= dd
->ipath_cregs
;
1401 if (!(dd
->ipath_flags
& IPATH_INITTED
)) {
1402 /* no hardware, freeze, etc. */
1406 cntrs
->symbol_error_counter
=
1407 ipath_snap_cntr(dd
, crp
->cr_ibsymbolerrcnt
);
1408 cntrs
->link_error_recovery_counter
=
1409 ipath_snap_cntr(dd
, crp
->cr_iblinkerrrecovcnt
);
1411 * The link downed counter counts when the other side downs the
1412 * connection. We add in the number of times we downed the link
1413 * due to local link integrity errors to compensate.
1415 cntrs
->link_downed_counter
=
1416 ipath_snap_cntr(dd
, crp
->cr_iblinkdowncnt
);
1417 cntrs
->port_rcv_errors
=
1418 ipath_snap_cntr(dd
, crp
->cr_rxdroppktcnt
) +
1419 ipath_snap_cntr(dd
, crp
->cr_rcvovflcnt
) +
1420 ipath_snap_cntr(dd
, crp
->cr_portovflcnt
) +
1421 ipath_snap_cntr(dd
, crp
->cr_err_rlencnt
) +
1422 ipath_snap_cntr(dd
, crp
->cr_invalidrlencnt
) +
1423 ipath_snap_cntr(dd
, crp
->cr_errlinkcnt
) +
1424 ipath_snap_cntr(dd
, crp
->cr_erricrccnt
) +
1425 ipath_snap_cntr(dd
, crp
->cr_errvcrccnt
) +
1426 ipath_snap_cntr(dd
, crp
->cr_errlpcrccnt
) +
1427 ipath_snap_cntr(dd
, crp
->cr_badformatcnt
) +
1428 dd
->ipath_rxfc_unsupvl_errs
;
1429 if (crp
->cr_rxotherlocalphyerrcnt
)
1430 cntrs
->port_rcv_errors
+=
1431 ipath_snap_cntr(dd
, crp
->cr_rxotherlocalphyerrcnt
);
1432 if (crp
->cr_rxvlerrcnt
)
1433 cntrs
->port_rcv_errors
+=
1434 ipath_snap_cntr(dd
, crp
->cr_rxvlerrcnt
);
1435 cntrs
->port_rcv_remphys_errors
=
1436 ipath_snap_cntr(dd
, crp
->cr_rcvebpcnt
);
1437 cntrs
->port_xmit_discards
= ipath_snap_cntr(dd
, crp
->cr_unsupvlcnt
);
1438 cntrs
->port_xmit_data
= ipath_snap_cntr(dd
, crp
->cr_wordsendcnt
);
1439 cntrs
->port_rcv_data
= ipath_snap_cntr(dd
, crp
->cr_wordrcvcnt
);
1440 cntrs
->port_xmit_packets
= ipath_snap_cntr(dd
, crp
->cr_pktsendcnt
);
1441 cntrs
->port_rcv_packets
= ipath_snap_cntr(dd
, crp
->cr_pktrcvcnt
);
1442 cntrs
->local_link_integrity_errors
=
1443 crp
->cr_locallinkintegrityerrcnt
?
1444 ipath_snap_cntr(dd
, crp
->cr_locallinkintegrityerrcnt
) :
1445 ((dd
->ipath_flags
& IPATH_GPIO_ERRINTRS
) ?
1446 dd
->ipath_lli_errs
: dd
->ipath_lli_errors
);
1447 cntrs
->excessive_buffer_overrun_errors
=
1448 crp
->cr_excessbufferovflcnt
?
1449 ipath_snap_cntr(dd
, crp
->cr_excessbufferovflcnt
) :
1450 dd
->ipath_overrun_thresh_errs
;
1451 cntrs
->vl15_dropped
= crp
->cr_vl15droppedpktcnt
?
1452 ipath_snap_cntr(dd
, crp
->cr_vl15droppedpktcnt
) : 0;
1461 * ipath_ib_piobufavail - callback when a PIO buffer is available
1462 * @arg: the device pointer
1464 * This is called from ipath_intr() at interrupt level when a PIO buffer is
1465 * available after ipath_verbs_send() returned an error that no buffers were
1466 * available. Return 1 if we consumed all the PIO buffers and we still have
1467 * QPs waiting for buffers (for now, just restart the send tasklet and
1470 int ipath_ib_piobufavail(struct ipath_ibdev
*dev
)
1472 struct list_head
*list
;
1473 struct ipath_qp
*qplist
;
1474 struct ipath_qp
*qp
;
1475 unsigned long flags
;
1480 list
= &dev
->piowait
;
1483 spin_lock_irqsave(&dev
->pending_lock
, flags
);
1484 while (!list_empty(list
)) {
1485 qp
= list_entry(list
->next
, struct ipath_qp
, piowait
);
1486 list_del_init(&qp
->piowait
);
1487 qp
->pio_next
= qplist
;
1489 atomic_inc(&qp
->refcount
);
1491 spin_unlock_irqrestore(&dev
->pending_lock
, flags
);
1493 while (qplist
!= NULL
) {
1495 qplist
= qp
->pio_next
;
1497 spin_lock_irqsave(&qp
->s_lock
, flags
);
1498 if (ib_ipath_state_ops
[qp
->state
] & IPATH_PROCESS_SEND_OK
)
1499 ipath_schedule_send(qp
);
1500 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
1502 /* Notify ipath_destroy_qp() if it is waiting. */
1503 if (atomic_dec_and_test(&qp
->refcount
))
1511 static int ipath_query_device(struct ib_device
*ibdev
, struct ib_device_attr
*props
,
1512 struct ib_udata
*uhw
)
1514 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1516 if (uhw
->inlen
|| uhw
->outlen
)
1519 memset(props
, 0, sizeof(*props
));
1521 props
->device_cap_flags
= IB_DEVICE_BAD_PKEY_CNTR
|
1522 IB_DEVICE_BAD_QKEY_CNTR
| IB_DEVICE_SHUTDOWN_PORT
|
1523 IB_DEVICE_SYS_IMAGE_GUID
| IB_DEVICE_RC_RNR_NAK_GEN
|
1524 IB_DEVICE_PORT_ACTIVE_EVENT
| IB_DEVICE_SRQ_RESIZE
;
1525 props
->page_size_cap
= PAGE_SIZE
;
1527 IPATH_SRC_OUI_1
<< 16 | IPATH_SRC_OUI_2
<< 8 | IPATH_SRC_OUI_3
;
1528 props
->vendor_part_id
= dev
->dd
->ipath_deviceid
;
1529 props
->hw_ver
= dev
->dd
->ipath_pcirev
;
1531 props
->sys_image_guid
= dev
->sys_image_guid
;
1533 props
->max_mr_size
= ~0ull;
1534 props
->max_qp
= ib_ipath_max_qps
;
1535 props
->max_qp_wr
= ib_ipath_max_qp_wrs
;
1536 props
->max_sge
= ib_ipath_max_sges
;
1537 props
->max_sge_rd
= ib_ipath_max_sges
;
1538 props
->max_cq
= ib_ipath_max_cqs
;
1539 props
->max_ah
= ib_ipath_max_ahs
;
1540 props
->max_cqe
= ib_ipath_max_cqes
;
1541 props
->max_mr
= dev
->lk_table
.max
;
1542 props
->max_fmr
= dev
->lk_table
.max
;
1543 props
->max_map_per_fmr
= 32767;
1544 props
->max_pd
= ib_ipath_max_pds
;
1545 props
->max_qp_rd_atom
= IPATH_MAX_RDMA_ATOMIC
;
1546 props
->max_qp_init_rd_atom
= 255;
1547 /* props->max_res_rd_atom */
1548 props
->max_srq
= ib_ipath_max_srqs
;
1549 props
->max_srq_wr
= ib_ipath_max_srq_wrs
;
1550 props
->max_srq_sge
= ib_ipath_max_srq_sges
;
1551 /* props->local_ca_ack_delay */
1552 props
->atomic_cap
= IB_ATOMIC_GLOB
;
1553 props
->max_pkeys
= ipath_get_npkeys(dev
->dd
);
1554 props
->max_mcast_grp
= ib_ipath_max_mcast_grps
;
1555 props
->max_mcast_qp_attach
= ib_ipath_max_mcast_qp_attached
;
1556 props
->max_total_mcast_qp_attach
= props
->max_mcast_qp_attach
*
1557 props
->max_mcast_grp
;
1562 const u8 ipath_cvt_physportstate
[32] = {
1563 [INFINIPATH_IBCS_LT_STATE_DISABLED
] = IB_PHYSPORTSTATE_DISABLED
,
1564 [INFINIPATH_IBCS_LT_STATE_LINKUP
] = IB_PHYSPORTSTATE_LINKUP
,
1565 [INFINIPATH_IBCS_LT_STATE_POLLACTIVE
] = IB_PHYSPORTSTATE_POLL
,
1566 [INFINIPATH_IBCS_LT_STATE_POLLQUIET
] = IB_PHYSPORTSTATE_POLL
,
1567 [INFINIPATH_IBCS_LT_STATE_SLEEPDELAY
] = IB_PHYSPORTSTATE_SLEEP
,
1568 [INFINIPATH_IBCS_LT_STATE_SLEEPQUIET
] = IB_PHYSPORTSTATE_SLEEP
,
1569 [INFINIPATH_IBCS_LT_STATE_CFGDEBOUNCE
] =
1570 IB_PHYSPORTSTATE_CFG_TRAIN
,
1571 [INFINIPATH_IBCS_LT_STATE_CFGRCVFCFG
] =
1572 IB_PHYSPORTSTATE_CFG_TRAIN
,
1573 [INFINIPATH_IBCS_LT_STATE_CFGWAITRMT
] =
1574 IB_PHYSPORTSTATE_CFG_TRAIN
,
1575 [INFINIPATH_IBCS_LT_STATE_CFGIDLE
] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1576 [INFINIPATH_IBCS_LT_STATE_RECOVERRETRAIN
] =
1577 IB_PHYSPORTSTATE_LINK_ERR_RECOVER
,
1578 [INFINIPATH_IBCS_LT_STATE_RECOVERWAITRMT
] =
1579 IB_PHYSPORTSTATE_LINK_ERR_RECOVER
,
1580 [INFINIPATH_IBCS_LT_STATE_RECOVERIDLE
] =
1581 IB_PHYSPORTSTATE_LINK_ERR_RECOVER
,
1582 [0x10] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1583 [0x11] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1584 [0x12] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1585 [0x13] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1586 [0x14] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1587 [0x15] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1588 [0x16] = IB_PHYSPORTSTATE_CFG_TRAIN
,
1589 [0x17] = IB_PHYSPORTSTATE_CFG_TRAIN
1592 u32
ipath_get_cr_errpkey(struct ipath_devdata
*dd
)
1594 return ipath_read_creg32(dd
, dd
->ipath_cregs
->cr_errpkey
);
1597 static int ipath_query_port(struct ib_device
*ibdev
,
1598 u8 port
, struct ib_port_attr
*props
)
1600 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1601 struct ipath_devdata
*dd
= dev
->dd
;
1603 u16 lid
= dd
->ipath_lid
;
1606 memset(props
, 0, sizeof(*props
));
1607 props
->lid
= lid
? lid
: be16_to_cpu(IB_LID_PERMISSIVE
);
1608 props
->lmc
= dd
->ipath_lmc
;
1609 props
->sm_lid
= dev
->sm_lid
;
1610 props
->sm_sl
= dev
->sm_sl
;
1611 ibcstat
= dd
->ipath_lastibcstat
;
1612 /* map LinkState to IB portinfo values. */
1613 props
->state
= ipath_ib_linkstate(dd
, ibcstat
) + 1;
1615 /* See phys_state_show() */
1616 props
->phys_state
= /* MEA: assumes shift == 0 */
1617 ipath_cvt_physportstate
[dd
->ipath_lastibcstat
&
1619 props
->port_cap_flags
= dev
->port_cap_flags
;
1620 props
->gid_tbl_len
= 1;
1621 props
->max_msg_sz
= 0x80000000;
1622 props
->pkey_tbl_len
= ipath_get_npkeys(dd
);
1623 props
->bad_pkey_cntr
= ipath_get_cr_errpkey(dd
) -
1624 dev
->z_pkey_violations
;
1625 props
->qkey_viol_cntr
= dev
->qkey_violations
;
1626 props
->active_width
= dd
->ipath_link_width_active
;
1627 /* See rate_show() */
1628 props
->active_speed
= dd
->ipath_link_speed_active
;
1629 props
->max_vl_num
= 1; /* VLCap = VL0 */
1630 props
->init_type_reply
= 0;
1632 props
->max_mtu
= ipath_mtu4096
? IB_MTU_4096
: IB_MTU_2048
;
1633 switch (dd
->ipath_ibmtu
) {
1652 props
->active_mtu
= mtu
;
1653 props
->subnet_timeout
= dev
->subnet_timeout
;
1658 static int ipath_modify_device(struct ib_device
*device
,
1659 int device_modify_mask
,
1660 struct ib_device_modify
*device_modify
)
1664 if (device_modify_mask
& ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID
|
1665 IB_DEVICE_MODIFY_NODE_DESC
)) {
1670 if (device_modify_mask
& IB_DEVICE_MODIFY_NODE_DESC
)
1671 memcpy(device
->node_desc
, device_modify
->node_desc
, 64);
1673 if (device_modify_mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
1674 to_idev(device
)->sys_image_guid
=
1675 cpu_to_be64(device_modify
->sys_image_guid
);
1683 static int ipath_modify_port(struct ib_device
*ibdev
,
1684 u8 port
, int port_modify_mask
,
1685 struct ib_port_modify
*props
)
1687 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1689 dev
->port_cap_flags
|= props
->set_port_cap_mask
;
1690 dev
->port_cap_flags
&= ~props
->clr_port_cap_mask
;
1691 if (port_modify_mask
& IB_PORT_SHUTDOWN
)
1692 ipath_set_linkstate(dev
->dd
, IPATH_IB_LINKDOWN
);
1693 if (port_modify_mask
& IB_PORT_RESET_QKEY_CNTR
)
1694 dev
->qkey_violations
= 0;
1698 static int ipath_query_gid(struct ib_device
*ibdev
, u8 port
,
1699 int index
, union ib_gid
*gid
)
1701 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1708 gid
->global
.subnet_prefix
= dev
->gid_prefix
;
1709 gid
->global
.interface_id
= dev
->dd
->ipath_guid
;
1717 static struct ib_pd
*ipath_alloc_pd(struct ib_device
*ibdev
,
1718 struct ib_ucontext
*context
,
1719 struct ib_udata
*udata
)
1721 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1722 struct ipath_pd
*pd
;
1726 * This is actually totally arbitrary. Some correctness tests
1727 * assume there's a maximum number of PDs that can be allocated.
1728 * We don't actually have this limit, but we fail the test if
1729 * we allow allocations of more than we report for this value.
1732 pd
= kmalloc(sizeof *pd
, GFP_KERNEL
);
1734 ret
= ERR_PTR(-ENOMEM
);
1738 spin_lock(&dev
->n_pds_lock
);
1739 if (dev
->n_pds_allocated
== ib_ipath_max_pds
) {
1740 spin_unlock(&dev
->n_pds_lock
);
1742 ret
= ERR_PTR(-ENOMEM
);
1746 dev
->n_pds_allocated
++;
1747 spin_unlock(&dev
->n_pds_lock
);
1749 /* ib_alloc_pd() will initialize pd->ibpd. */
1750 pd
->user
= udata
!= NULL
;
1758 static int ipath_dealloc_pd(struct ib_pd
*ibpd
)
1760 struct ipath_pd
*pd
= to_ipd(ibpd
);
1761 struct ipath_ibdev
*dev
= to_idev(ibpd
->device
);
1763 spin_lock(&dev
->n_pds_lock
);
1764 dev
->n_pds_allocated
--;
1765 spin_unlock(&dev
->n_pds_lock
);
1773 * ipath_create_ah - create an address handle
1774 * @pd: the protection domain
1775 * @ah_attr: the attributes of the AH
1777 * This may be called from interrupt context.
1779 static struct ib_ah
*ipath_create_ah(struct ib_pd
*pd
,
1780 struct ib_ah_attr
*ah_attr
)
1782 struct ipath_ah
*ah
;
1784 struct ipath_ibdev
*dev
= to_idev(pd
->device
);
1785 unsigned long flags
;
1787 /* A multicast address requires a GRH (see ch. 8.4.1). */
1788 if (ah_attr
->dlid
>= IPATH_MULTICAST_LID_BASE
&&
1789 ah_attr
->dlid
!= IPATH_PERMISSIVE_LID
&&
1790 !(ah_attr
->ah_flags
& IB_AH_GRH
)) {
1791 ret
= ERR_PTR(-EINVAL
);
1795 if (ah_attr
->dlid
== 0) {
1796 ret
= ERR_PTR(-EINVAL
);
1800 if (ah_attr
->port_num
< 1 ||
1801 ah_attr
->port_num
> pd
->device
->phys_port_cnt
) {
1802 ret
= ERR_PTR(-EINVAL
);
1806 ah
= kmalloc(sizeof *ah
, GFP_ATOMIC
);
1808 ret
= ERR_PTR(-ENOMEM
);
1812 spin_lock_irqsave(&dev
->n_ahs_lock
, flags
);
1813 if (dev
->n_ahs_allocated
== ib_ipath_max_ahs
) {
1814 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1816 ret
= ERR_PTR(-ENOMEM
);
1820 dev
->n_ahs_allocated
++;
1821 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1823 /* ib_create_ah() will initialize ah->ibah. */
1824 ah
->attr
= *ah_attr
;
1825 ah
->attr
.static_rate
= ipath_ib_rate_to_mult(ah_attr
->static_rate
);
1834 * ipath_destroy_ah - destroy an address handle
1835 * @ibah: the AH to destroy
1837 * This may be called from interrupt context.
1839 static int ipath_destroy_ah(struct ib_ah
*ibah
)
1841 struct ipath_ibdev
*dev
= to_idev(ibah
->device
);
1842 struct ipath_ah
*ah
= to_iah(ibah
);
1843 unsigned long flags
;
1845 spin_lock_irqsave(&dev
->n_ahs_lock
, flags
);
1846 dev
->n_ahs_allocated
--;
1847 spin_unlock_irqrestore(&dev
->n_ahs_lock
, flags
);
1854 static int ipath_query_ah(struct ib_ah
*ibah
, struct ib_ah_attr
*ah_attr
)
1856 struct ipath_ah
*ah
= to_iah(ibah
);
1858 *ah_attr
= ah
->attr
;
1859 ah_attr
->static_rate
= ipath_mult_to_ib_rate(ah
->attr
.static_rate
);
1865 * ipath_get_npkeys - return the size of the PKEY table for port 0
1866 * @dd: the infinipath device
1868 unsigned ipath_get_npkeys(struct ipath_devdata
*dd
)
1870 return ARRAY_SIZE(dd
->ipath_pd
[0]->port_pkeys
);
1874 * ipath_get_pkey - return the indexed PKEY from the port PKEY table
1875 * @dd: the infinipath device
1876 * @index: the PKEY index
1878 unsigned ipath_get_pkey(struct ipath_devdata
*dd
, unsigned index
)
1882 /* always a kernel port, no locking needed */
1883 if (index
>= ARRAY_SIZE(dd
->ipath_pd
[0]->port_pkeys
))
1886 ret
= dd
->ipath_pd
[0]->port_pkeys
[index
];
1891 static int ipath_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
,
1894 struct ipath_ibdev
*dev
= to_idev(ibdev
);
1897 if (index
>= ipath_get_npkeys(dev
->dd
)) {
1902 *pkey
= ipath_get_pkey(dev
->dd
, index
);
1910 * ipath_alloc_ucontext - allocate a ucontest
1911 * @ibdev: the infiniband device
1912 * @udata: not used by the InfiniPath driver
1915 static struct ib_ucontext
*ipath_alloc_ucontext(struct ib_device
*ibdev
,
1916 struct ib_udata
*udata
)
1918 struct ipath_ucontext
*context
;
1919 struct ib_ucontext
*ret
;
1921 context
= kmalloc(sizeof *context
, GFP_KERNEL
);
1923 ret
= ERR_PTR(-ENOMEM
);
1927 ret
= &context
->ibucontext
;
1933 static int ipath_dealloc_ucontext(struct ib_ucontext
*context
)
1935 kfree(to_iucontext(context
));
1939 static int ipath_verbs_register_sysfs(struct ib_device
*dev
);
1941 static void __verbs_timer(unsigned long arg
)
1943 struct ipath_devdata
*dd
= (struct ipath_devdata
*) arg
;
1945 /* Handle verbs layer timeouts. */
1946 ipath_ib_timer(dd
->verbs_dev
);
1948 mod_timer(&dd
->verbs_timer
, jiffies
+ 1);
1951 static int enable_timer(struct ipath_devdata
*dd
)
1954 * Early chips had a design flaw where the chip and kernel idea
1955 * of the tail register don't always agree, and therefore we won't
1956 * get an interrupt on the next packet received.
1957 * If the board supports per packet receive interrupts, use it.
1958 * Otherwise, the timer function periodically checks for packets
1959 * to cover this case.
1960 * Either way, the timer is needed for verbs layer related
1963 if (dd
->ipath_flags
& IPATH_GPIO_INTR
) {
1964 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_debugportselect
,
1965 0x2074076542310ULL
);
1966 /* Enable GPIO bit 2 interrupt */
1967 dd
->ipath_gpio_mask
|= (u64
) (1 << IPATH_GPIO_PORT0_BIT
);
1968 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_gpio_mask
,
1969 dd
->ipath_gpio_mask
);
1972 setup_timer(&dd
->verbs_timer
, __verbs_timer
, (unsigned long)dd
);
1974 dd
->verbs_timer
.expires
= jiffies
+ 1;
1975 add_timer(&dd
->verbs_timer
);
1980 static int disable_timer(struct ipath_devdata
*dd
)
1982 /* Disable GPIO bit 2 interrupt */
1983 if (dd
->ipath_flags
& IPATH_GPIO_INTR
) {
1984 /* Disable GPIO bit 2 interrupt */
1985 dd
->ipath_gpio_mask
&= ~((u64
) (1 << IPATH_GPIO_PORT0_BIT
));
1986 ipath_write_kreg(dd
, dd
->ipath_kregs
->kr_gpio_mask
,
1987 dd
->ipath_gpio_mask
);
1989 * We might want to undo changes to debugportselect,
1994 del_timer_sync(&dd
->verbs_timer
);
1999 static int ipath_port_immutable(struct ib_device
*ibdev
, u8 port_num
,
2000 struct ib_port_immutable
*immutable
)
2002 struct ib_port_attr attr
;
2005 err
= ipath_query_port(ibdev
, port_num
, &attr
);
2009 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
2010 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
2011 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_IB
;
2012 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
2018 * ipath_register_ib_device - register our device with the infiniband core
2019 * @dd: the device data structure
2020 * Return the allocated ipath_ibdev pointer or NULL on error.
2022 int ipath_register_ib_device(struct ipath_devdata
*dd
)
2024 struct ipath_verbs_counters cntrs
;
2025 struct ipath_ibdev
*idev
;
2026 struct ib_device
*dev
;
2027 struct ipath_verbs_txreq
*tx
;
2031 idev
= (struct ipath_ibdev
*)ib_alloc_device(sizeof *idev
);
2039 if (dd
->ipath_sdma_descq_cnt
) {
2040 tx
= kmalloc_array(dd
->ipath_sdma_descq_cnt
, sizeof *tx
,
2048 idev
->txreq_bufs
= tx
;
2050 /* Only need to initialize non-zero fields. */
2051 spin_lock_init(&idev
->n_pds_lock
);
2052 spin_lock_init(&idev
->n_ahs_lock
);
2053 spin_lock_init(&idev
->n_cqs_lock
);
2054 spin_lock_init(&idev
->n_qps_lock
);
2055 spin_lock_init(&idev
->n_srqs_lock
);
2056 spin_lock_init(&idev
->n_mcast_grps_lock
);
2058 spin_lock_init(&idev
->qp_table
.lock
);
2059 spin_lock_init(&idev
->lk_table
.lock
);
2060 idev
->sm_lid
= be16_to_cpu(IB_LID_PERMISSIVE
);
2061 /* Set the prefix to the default value (see ch. 4.1.1) */
2062 idev
->gid_prefix
= cpu_to_be64(0xfe80000000000000ULL
);
2064 ret
= ipath_init_qp_table(idev
, ib_ipath_qp_table_size
);
2069 * The top ib_ipath_lkey_table_size bits are used to index the
2070 * table. The lower 8 bits can be owned by the user (copied from
2071 * the LKEY). The remaining bits act as a generation number or tag.
2073 idev
->lk_table
.max
= 1 << ib_ipath_lkey_table_size
;
2074 idev
->lk_table
.table
= kcalloc(idev
->lk_table
.max
,
2075 sizeof(*idev
->lk_table
.table
),
2077 if (idev
->lk_table
.table
== NULL
) {
2081 INIT_LIST_HEAD(&idev
->pending_mmaps
);
2082 spin_lock_init(&idev
->pending_lock
);
2083 idev
->mmap_offset
= PAGE_SIZE
;
2084 spin_lock_init(&idev
->mmap_offset_lock
);
2085 INIT_LIST_HEAD(&idev
->pending
[0]);
2086 INIT_LIST_HEAD(&idev
->pending
[1]);
2087 INIT_LIST_HEAD(&idev
->pending
[2]);
2088 INIT_LIST_HEAD(&idev
->piowait
);
2089 INIT_LIST_HEAD(&idev
->rnrwait
);
2090 INIT_LIST_HEAD(&idev
->txreq_free
);
2091 idev
->pending_index
= 0;
2092 idev
->port_cap_flags
=
2093 IB_PORT_SYS_IMAGE_GUID_SUP
| IB_PORT_CLIENT_REG_SUP
;
2094 if (dd
->ipath_flags
& IPATH_HAS_LINK_LATENCY
)
2095 idev
->port_cap_flags
|= IB_PORT_LINK_LATENCY_SUP
;
2096 idev
->pma_counter_select
[0] = IB_PMA_PORT_XMIT_DATA
;
2097 idev
->pma_counter_select
[1] = IB_PMA_PORT_RCV_DATA
;
2098 idev
->pma_counter_select
[2] = IB_PMA_PORT_XMIT_PKTS
;
2099 idev
->pma_counter_select
[3] = IB_PMA_PORT_RCV_PKTS
;
2100 idev
->pma_counter_select
[4] = IB_PMA_PORT_XMIT_WAIT
;
2102 /* Snapshot current HW counters to "clear" them. */
2103 ipath_get_counters(dd
, &cntrs
);
2104 idev
->z_symbol_error_counter
= cntrs
.symbol_error_counter
;
2105 idev
->z_link_error_recovery_counter
=
2106 cntrs
.link_error_recovery_counter
;
2107 idev
->z_link_downed_counter
= cntrs
.link_downed_counter
;
2108 idev
->z_port_rcv_errors
= cntrs
.port_rcv_errors
;
2109 idev
->z_port_rcv_remphys_errors
=
2110 cntrs
.port_rcv_remphys_errors
;
2111 idev
->z_port_xmit_discards
= cntrs
.port_xmit_discards
;
2112 idev
->z_port_xmit_data
= cntrs
.port_xmit_data
;
2113 idev
->z_port_rcv_data
= cntrs
.port_rcv_data
;
2114 idev
->z_port_xmit_packets
= cntrs
.port_xmit_packets
;
2115 idev
->z_port_rcv_packets
= cntrs
.port_rcv_packets
;
2116 idev
->z_local_link_integrity_errors
=
2117 cntrs
.local_link_integrity_errors
;
2118 idev
->z_excessive_buffer_overrun_errors
=
2119 cntrs
.excessive_buffer_overrun_errors
;
2120 idev
->z_vl15_dropped
= cntrs
.vl15_dropped
;
2122 for (i
= 0; i
< dd
->ipath_sdma_descq_cnt
; i
++, tx
++)
2123 list_add(&tx
->txreq
.list
, &idev
->txreq_free
);
2126 * The system image GUID is supposed to be the same for all
2127 * IB HCAs in a single system but since there can be other
2128 * device types in the system, we can't be sure this is unique.
2130 if (!sys_image_guid
)
2131 sys_image_guid
= dd
->ipath_guid
;
2132 idev
->sys_image_guid
= sys_image_guid
;
2133 idev
->ib_unit
= dd
->ipath_unit
;
2136 strlcpy(dev
->name
, "ipath%d", IB_DEVICE_NAME_MAX
);
2137 dev
->owner
= THIS_MODULE
;
2138 dev
->node_guid
= dd
->ipath_guid
;
2139 dev
->uverbs_abi_ver
= IPATH_UVERBS_ABI_VERSION
;
2140 dev
->uverbs_cmd_mask
=
2141 (1ull << IB_USER_VERBS_CMD_GET_CONTEXT
) |
2142 (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE
) |
2143 (1ull << IB_USER_VERBS_CMD_QUERY_PORT
) |
2144 (1ull << IB_USER_VERBS_CMD_ALLOC_PD
) |
2145 (1ull << IB_USER_VERBS_CMD_DEALLOC_PD
) |
2146 (1ull << IB_USER_VERBS_CMD_CREATE_AH
) |
2147 (1ull << IB_USER_VERBS_CMD_DESTROY_AH
) |
2148 (1ull << IB_USER_VERBS_CMD_QUERY_AH
) |
2149 (1ull << IB_USER_VERBS_CMD_REG_MR
) |
2150 (1ull << IB_USER_VERBS_CMD_DEREG_MR
) |
2151 (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
) |
2152 (1ull << IB_USER_VERBS_CMD_CREATE_CQ
) |
2153 (1ull << IB_USER_VERBS_CMD_RESIZE_CQ
) |
2154 (1ull << IB_USER_VERBS_CMD_DESTROY_CQ
) |
2155 (1ull << IB_USER_VERBS_CMD_POLL_CQ
) |
2156 (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
) |
2157 (1ull << IB_USER_VERBS_CMD_CREATE_QP
) |
2158 (1ull << IB_USER_VERBS_CMD_QUERY_QP
) |
2159 (1ull << IB_USER_VERBS_CMD_MODIFY_QP
) |
2160 (1ull << IB_USER_VERBS_CMD_DESTROY_QP
) |
2161 (1ull << IB_USER_VERBS_CMD_POST_SEND
) |
2162 (1ull << IB_USER_VERBS_CMD_POST_RECV
) |
2163 (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST
) |
2164 (1ull << IB_USER_VERBS_CMD_DETACH_MCAST
) |
2165 (1ull << IB_USER_VERBS_CMD_CREATE_SRQ
) |
2166 (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ
) |
2167 (1ull << IB_USER_VERBS_CMD_QUERY_SRQ
) |
2168 (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ
) |
2169 (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV
);
2170 dev
->node_type
= RDMA_NODE_IB_CA
;
2171 dev
->phys_port_cnt
= 1;
2172 dev
->num_comp_vectors
= 1;
2173 dev
->dma_device
= &dd
->pcidev
->dev
;
2174 dev
->query_device
= ipath_query_device
;
2175 dev
->modify_device
= ipath_modify_device
;
2176 dev
->query_port
= ipath_query_port
;
2177 dev
->modify_port
= ipath_modify_port
;
2178 dev
->query_pkey
= ipath_query_pkey
;
2179 dev
->query_gid
= ipath_query_gid
;
2180 dev
->alloc_ucontext
= ipath_alloc_ucontext
;
2181 dev
->dealloc_ucontext
= ipath_dealloc_ucontext
;
2182 dev
->alloc_pd
= ipath_alloc_pd
;
2183 dev
->dealloc_pd
= ipath_dealloc_pd
;
2184 dev
->create_ah
= ipath_create_ah
;
2185 dev
->destroy_ah
= ipath_destroy_ah
;
2186 dev
->query_ah
= ipath_query_ah
;
2187 dev
->create_srq
= ipath_create_srq
;
2188 dev
->modify_srq
= ipath_modify_srq
;
2189 dev
->query_srq
= ipath_query_srq
;
2190 dev
->destroy_srq
= ipath_destroy_srq
;
2191 dev
->create_qp
= ipath_create_qp
;
2192 dev
->modify_qp
= ipath_modify_qp
;
2193 dev
->query_qp
= ipath_query_qp
;
2194 dev
->destroy_qp
= ipath_destroy_qp
;
2195 dev
->post_send
= ipath_post_send
;
2196 dev
->post_recv
= ipath_post_receive
;
2197 dev
->post_srq_recv
= ipath_post_srq_receive
;
2198 dev
->create_cq
= ipath_create_cq
;
2199 dev
->destroy_cq
= ipath_destroy_cq
;
2200 dev
->resize_cq
= ipath_resize_cq
;
2201 dev
->poll_cq
= ipath_poll_cq
;
2202 dev
->req_notify_cq
= ipath_req_notify_cq
;
2203 dev
->get_dma_mr
= ipath_get_dma_mr
;
2204 dev
->reg_user_mr
= ipath_reg_user_mr
;
2205 dev
->dereg_mr
= ipath_dereg_mr
;
2206 dev
->alloc_fmr
= ipath_alloc_fmr
;
2207 dev
->map_phys_fmr
= ipath_map_phys_fmr
;
2208 dev
->unmap_fmr
= ipath_unmap_fmr
;
2209 dev
->dealloc_fmr
= ipath_dealloc_fmr
;
2210 dev
->attach_mcast
= ipath_multicast_attach
;
2211 dev
->detach_mcast
= ipath_multicast_detach
;
2212 dev
->process_mad
= ipath_process_mad
;
2213 dev
->mmap
= ipath_mmap
;
2214 dev
->dma_ops
= &ipath_dma_mapping_ops
;
2215 dev
->get_port_immutable
= ipath_port_immutable
;
2217 snprintf(dev
->node_desc
, sizeof(dev
->node_desc
),
2218 IPATH_IDSTR
" %s", init_utsname()->nodename
);
2220 ret
= ib_register_device(dev
, NULL
);
2224 ret
= ipath_verbs_register_sysfs(dev
);
2233 ib_unregister_device(dev
);
2235 kfree(idev
->lk_table
.table
);
2237 kfree(idev
->qp_table
.table
);
2239 kfree(idev
->txreq_bufs
);
2241 ib_dealloc_device(dev
);
2242 ipath_dev_err(dd
, "cannot register verbs: %d!\n", -ret
);
2246 dd
->verbs_dev
= idev
;
2250 void ipath_unregister_ib_device(struct ipath_ibdev
*dev
)
2252 struct ib_device
*ibdev
= &dev
->ibdev
;
2255 ib_unregister_device(ibdev
);
2257 disable_timer(dev
->dd
);
2259 if (!list_empty(&dev
->pending
[0]) ||
2260 !list_empty(&dev
->pending
[1]) ||
2261 !list_empty(&dev
->pending
[2]))
2262 ipath_dev_err(dev
->dd
, "pending list not empty!\n");
2263 if (!list_empty(&dev
->piowait
))
2264 ipath_dev_err(dev
->dd
, "piowait list not empty!\n");
2265 if (!list_empty(&dev
->rnrwait
))
2266 ipath_dev_err(dev
->dd
, "rnrwait list not empty!\n");
2267 if (!ipath_mcast_tree_empty())
2268 ipath_dev_err(dev
->dd
, "multicast table memory leak!\n");
2270 * Note that ipath_unregister_ib_device() can be called before all
2271 * the QPs are destroyed!
2273 qps_inuse
= ipath_free_all_qps(&dev
->qp_table
);
2275 ipath_dev_err(dev
->dd
, "QP memory leak! %u still in use\n",
2277 kfree(dev
->qp_table
.table
);
2278 kfree(dev
->lk_table
.table
);
2279 kfree(dev
->txreq_bufs
);
2280 ib_dealloc_device(ibdev
);
2283 static ssize_t
show_rev(struct device
*device
, struct device_attribute
*attr
,
2286 struct ipath_ibdev
*dev
=
2287 container_of(device
, struct ipath_ibdev
, ibdev
.dev
);
2289 return sprintf(buf
, "%x\n", dev
->dd
->ipath_pcirev
);
2292 static ssize_t
show_hca(struct device
*device
, struct device_attribute
*attr
,
2295 struct ipath_ibdev
*dev
=
2296 container_of(device
, struct ipath_ibdev
, ibdev
.dev
);
2299 ret
= dev
->dd
->ipath_f_get_boardname(dev
->dd
, buf
, 128);
2309 static ssize_t
show_stats(struct device
*device
, struct device_attribute
*attr
,
2312 struct ipath_ibdev
*dev
=
2313 container_of(device
, struct ipath_ibdev
, ibdev
.dev
);
2331 dev
->n_rc_resends
, dev
->n_rc_qacks
, dev
->n_rc_acks
,
2332 dev
->n_seq_naks
, dev
->n_rdma_seq
, dev
->n_rnr_naks
,
2333 dev
->n_other_naks
, dev
->n_timeouts
,
2334 dev
->n_rdma_dup_busy
, dev
->n_piowait
, dev
->n_unaligned
,
2335 dev
->n_pkt_drops
, dev
->n_wqe_errs
);
2336 for (i
= 0; i
< ARRAY_SIZE(dev
->opstats
); i
++) {
2337 const struct ipath_opcode_stats
*si
= &dev
->opstats
[i
];
2339 if (!si
->n_packets
&& !si
->n_bytes
)
2341 len
+= sprintf(buf
+ len
, "%02x %llu/%llu\n", i
,
2342 (unsigned long long) si
->n_packets
,
2343 (unsigned long long) si
->n_bytes
);
2348 static DEVICE_ATTR(hw_rev
, S_IRUGO
, show_rev
, NULL
);
2349 static DEVICE_ATTR(hca_type
, S_IRUGO
, show_hca
, NULL
);
2350 static DEVICE_ATTR(board_id
, S_IRUGO
, show_hca
, NULL
);
2351 static DEVICE_ATTR(stats
, S_IRUGO
, show_stats
, NULL
);
2353 static struct device_attribute
*ipath_class_attributes
[] = {
2360 static int ipath_verbs_register_sysfs(struct ib_device
*dev
)
2365 for (i
= 0; i
< ARRAY_SIZE(ipath_class_attributes
); ++i
) {
2366 ret
= device_create_file(&dev
->dev
,
2367 ipath_class_attributes
[i
]);
2373 for (i
= 0; i
< ARRAY_SIZE(ipath_class_attributes
); ++i
)
2374 device_remove_file(&dev
->dev
, ipath_class_attributes
[i
]);