2 * Copyright(c) 2015, 2016 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 #include <linux/net.h>
49 #include <rdma/ib_smi.h>
53 #include "verbs_txreq.h"
57 * ud_loopback - handle send on loopback QPs
58 * @sqp: the sending QP
59 * @swqe: the send work request
61 * This is called from hfi1_make_ud_req() to forward a WQE addressed
63 * Note that the receive interrupt handler may be calling hfi1_ud_rcv()
64 * while this is being called.
66 static void ud_loopback(struct rvt_qp
*sqp
, struct rvt_swqe
*swqe
)
68 struct hfi1_ibport
*ibp
= to_iport(sqp
->ibqp
.device
, sqp
->port_num
);
69 struct hfi1_pportdata
*ppd
;
71 struct ib_ah_attr
*ah_attr
;
73 struct rvt_sge_state ssge
;
77 enum ib_qp_type sqptype
, dqptype
;
81 qp
= rvt_lookup_qpn(ib_to_rvt(sqp
->ibqp
.device
), &ibp
->rvp
,
82 swqe
->ud_wr
.remote_qpn
);
84 ibp
->rvp
.n_pkt_drops
++;
89 sqptype
= sqp
->ibqp
.qp_type
== IB_QPT_GSI
?
90 IB_QPT_UD
: sqp
->ibqp
.qp_type
;
91 dqptype
= qp
->ibqp
.qp_type
== IB_QPT_GSI
?
92 IB_QPT_UD
: qp
->ibqp
.qp_type
;
94 if (dqptype
!= sqptype
||
95 !(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
)) {
96 ibp
->rvp
.n_pkt_drops
++;
100 ah_attr
= &ibah_to_rvtah(swqe
->ud_wr
.ah
)->attr
;
101 ppd
= ppd_from_ibp(ibp
);
103 if (qp
->ibqp
.qp_num
> 1) {
106 u8 sc5
= ibp
->sl_to_sc
[ah_attr
->sl
];
108 pkey
= hfi1_get_pkey(ibp
, sqp
->s_pkey_index
);
109 slid
= ppd
->lid
| (ah_attr
->src_path_bits
&
110 ((1 << ppd
->lmc
) - 1));
111 if (unlikely(ingress_pkey_check(ppd
, pkey
, sc5
,
112 qp
->s_pkey_index
, slid
))) {
113 hfi1_bad_pqkey(ibp
, OPA_TRAP_BAD_P_KEY
, pkey
,
115 sqp
->ibqp
.qp_num
, qp
->ibqp
.qp_num
,
116 slid
, ah_attr
->dlid
);
122 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
123 * Qkeys with the high order bit set mean use the
124 * qkey from the QP context instead of the WR (see 10.2.5).
126 if (qp
->ibqp
.qp_num
) {
129 qkey
= (int)swqe
->ud_wr
.remote_qkey
< 0 ?
130 sqp
->qkey
: swqe
->ud_wr
.remote_qkey
;
131 if (unlikely(qkey
!= qp
->qkey
)) {
134 lid
= ppd
->lid
| (ah_attr
->src_path_bits
&
135 ((1 << ppd
->lmc
) - 1));
136 hfi1_bad_pqkey(ibp
, OPA_TRAP_BAD_Q_KEY
, qkey
,
138 sqp
->ibqp
.qp_num
, qp
->ibqp
.qp_num
,
146 * A GRH is expected to precede the data even if not
147 * present on the wire.
149 length
= swqe
->length
;
150 memset(&wc
, 0, sizeof(wc
));
151 wc
.byte_len
= length
+ sizeof(struct ib_grh
);
153 if (swqe
->wr
.opcode
== IB_WR_SEND_WITH_IMM
) {
154 wc
.wc_flags
= IB_WC_WITH_IMM
;
155 wc
.ex
.imm_data
= swqe
->wr
.ex
.imm_data
;
158 spin_lock_irqsave(&qp
->r_lock
, flags
);
161 * Get the next work request entry to find where to put the data.
163 if (qp
->r_flags
& RVT_R_REUSE_SGE
) {
164 qp
->r_flags
&= ~RVT_R_REUSE_SGE
;
168 ret
= hfi1_rvt_get_rwqe(qp
, 0);
170 hfi1_rc_error(qp
, IB_WC_LOC_QP_OP_ERR
);
174 if (qp
->ibqp
.qp_num
== 0)
175 ibp
->rvp
.n_vl15_dropped
++;
179 /* Silently drop packets which are too big. */
180 if (unlikely(wc
.byte_len
> qp
->r_len
)) {
181 qp
->r_flags
|= RVT_R_REUSE_SGE
;
182 ibp
->rvp
.n_pkt_drops
++;
186 if (ah_attr
->ah_flags
& IB_AH_GRH
) {
187 hfi1_copy_sge(&qp
->r_sge
, &ah_attr
->grh
,
188 sizeof(struct ib_grh
), 1, 0);
189 wc
.wc_flags
|= IB_WC_GRH
;
191 hfi1_skip_sge(&qp
->r_sge
, sizeof(struct ib_grh
), 1);
193 ssge
.sg_list
= swqe
->sg_list
+ 1;
194 ssge
.sge
= *swqe
->sg_list
;
195 ssge
.num_sge
= swqe
->wr
.num_sge
;
198 u32 len
= sge
->length
;
202 if (len
> sge
->sge_length
)
203 len
= sge
->sge_length
;
204 WARN_ON_ONCE(len
== 0);
205 hfi1_copy_sge(&qp
->r_sge
, sge
->vaddr
, len
, 1, 0);
208 sge
->sge_length
-= len
;
209 if (sge
->sge_length
== 0) {
211 *sge
= *ssge
.sg_list
++;
212 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
213 if (++sge
->n
>= RVT_SEGSZ
) {
214 if (++sge
->m
>= sge
->mr
->mapsz
)
219 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
221 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
225 rvt_put_ss(&qp
->r_sge
);
226 if (!test_and_clear_bit(RVT_R_WRID_VALID
, &qp
->r_aflags
))
228 wc
.wr_id
= qp
->r_wr_id
;
229 wc
.status
= IB_WC_SUCCESS
;
230 wc
.opcode
= IB_WC_RECV
;
232 wc
.src_qp
= sqp
->ibqp
.qp_num
;
233 if (qp
->ibqp
.qp_type
== IB_QPT_GSI
|| qp
->ibqp
.qp_type
== IB_QPT_SMI
) {
234 if (sqp
->ibqp
.qp_type
== IB_QPT_GSI
||
235 sqp
->ibqp
.qp_type
== IB_QPT_SMI
)
236 wc
.pkey_index
= swqe
->ud_wr
.pkey_index
;
238 wc
.pkey_index
= sqp
->s_pkey_index
;
242 wc
.slid
= ppd
->lid
| (ah_attr
->src_path_bits
& ((1 << ppd
->lmc
) - 1));
243 /* Check for loopback when the port lid is not set */
244 if (wc
.slid
== 0 && sqp
->ibqp
.qp_type
== IB_QPT_GSI
)
245 wc
.slid
= be16_to_cpu(IB_LID_PERMISSIVE
);
247 wc
.dlid_path_bits
= ah_attr
->dlid
& ((1 << ppd
->lmc
) - 1);
248 wc
.port_num
= qp
->port_num
;
249 /* Signal completion event if the solicited bit is set. */
250 rvt_cq_enter(ibcq_to_rvtcq(qp
->ibqp
.recv_cq
), &wc
,
251 swqe
->wr
.send_flags
& IB_SEND_SOLICITED
);
252 ibp
->rvp
.n_loop_pkts
++;
254 spin_unlock_irqrestore(&qp
->r_lock
, flags
);
260 * hfi1_make_ud_req - construct a UD request packet
263 * Assume s_lock is held.
265 * Return 1 if constructed; otherwise, return 0.
267 int hfi1_make_ud_req(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
)
269 struct hfi1_qp_priv
*priv
= qp
->priv
;
270 struct hfi1_other_headers
*ohdr
;
271 struct ib_ah_attr
*ah_attr
;
272 struct hfi1_pportdata
*ppd
;
273 struct hfi1_ibport
*ibp
;
274 struct rvt_swqe
*wqe
;
283 ps
->s_txreq
= get_txreq(ps
->dev
, qp
);
284 if (IS_ERR(ps
->s_txreq
))
287 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_NEXT_SEND_OK
)) {
288 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_FLUSH_SEND
))
290 /* We are in the error state, flush the work request. */
291 smp_read_barrier_depends(); /* see post_one_send */
292 if (qp
->s_last
== ACCESS_ONCE(qp
->s_head
))
294 /* If DMAs are in progress, we can't flush immediately. */
295 if (iowait_sdma_pending(&priv
->s_iowait
)) {
296 qp
->s_flags
|= RVT_S_WAIT_DMA
;
299 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_last
);
300 hfi1_send_complete(qp
, wqe
, IB_WC_WR_FLUSH_ERR
);
304 /* see post_one_send() */
305 smp_read_barrier_depends();
306 if (qp
->s_cur
== ACCESS_ONCE(qp
->s_head
))
309 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_cur
);
310 next_cur
= qp
->s_cur
+ 1;
311 if (next_cur
>= qp
->s_size
)
314 /* Construct the header. */
315 ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
316 ppd
= ppd_from_ibp(ibp
);
317 ah_attr
= &ibah_to_rvtah(wqe
->ud_wr
.ah
)->attr
;
318 if (ah_attr
->dlid
< be16_to_cpu(IB_MULTICAST_LID_BASE
) ||
319 ah_attr
->dlid
== be16_to_cpu(IB_LID_PERMISSIVE
)) {
320 lid
= ah_attr
->dlid
& ~((1 << ppd
->lmc
) - 1);
321 if (unlikely(!loopback
&&
323 (lid
== be16_to_cpu(IB_LID_PERMISSIVE
) &&
324 qp
->ibqp
.qp_type
== IB_QPT_GSI
)))) {
325 unsigned long tflags
= ps
->flags
;
327 * If DMAs are in progress, we can't generate
328 * a completion for the loopback packet since
329 * it would be out of order.
330 * Instead of waiting, we could queue a
331 * zero length descriptor so we get a callback.
333 if (iowait_sdma_pending(&priv
->s_iowait
)) {
334 qp
->s_flags
|= RVT_S_WAIT_DMA
;
337 qp
->s_cur
= next_cur
;
338 spin_unlock_irqrestore(&qp
->s_lock
, tflags
);
339 ud_loopback(qp
, wqe
);
340 spin_lock_irqsave(&qp
->s_lock
, tflags
);
342 hfi1_send_complete(qp
, wqe
, IB_WC_SUCCESS
);
347 qp
->s_cur
= next_cur
;
348 extra_bytes
= -wqe
->length
& 3;
349 nwords
= (wqe
->length
+ extra_bytes
) >> 2;
351 /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
353 qp
->s_cur_size
= wqe
->length
;
354 qp
->s_cur_sge
= &qp
->s_sge
;
355 qp
->s_srate
= ah_attr
->static_rate
;
356 qp
->srate_mbps
= ib_rate_to_mbps(qp
->s_srate
);
358 qp
->s_sge
.sge
= wqe
->sg_list
[0];
359 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
360 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
361 qp
->s_sge
.total_len
= wqe
->length
;
363 if (ah_attr
->ah_flags
& IB_AH_GRH
) {
364 /* Header size in 32-bit words. */
365 qp
->s_hdrwords
+= hfi1_make_grh(ibp
,
366 &ps
->s_txreq
->phdr
.hdr
.u
.l
.grh
,
368 qp
->s_hdrwords
, nwords
);
370 ohdr
= &ps
->s_txreq
->phdr
.hdr
.u
.l
.oth
;
372 * Don't worry about sending to locally attached multicast
373 * QPs. It is unspecified by the spec. what happens.
376 /* Header size in 32-bit words. */
378 ohdr
= &ps
->s_txreq
->phdr
.hdr
.u
.oth
;
380 if (wqe
->wr
.opcode
== IB_WR_SEND_WITH_IMM
) {
382 ohdr
->u
.ud
.imm_data
= wqe
->wr
.ex
.imm_data
;
383 bth0
= IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE
<< 24;
385 bth0
= IB_OPCODE_UD_SEND_ONLY
<< 24;
387 sc5
= ibp
->sl_to_sc
[ah_attr
->sl
];
388 lrh0
|= (ah_attr
->sl
& 0xf) << 4;
389 if (qp
->ibqp
.qp_type
== IB_QPT_SMI
) {
390 lrh0
|= 0xF000; /* Set VL (see ch. 13.5.3.1) */
393 lrh0
|= (sc5
& 0xf) << 12;
396 priv
->s_sde
= qp_to_sdma_engine(qp
, priv
->s_sc
);
397 ps
->s_txreq
->sde
= priv
->s_sde
;
398 priv
->s_sendcontext
= qp_to_send_context(qp
, priv
->s_sc
);
399 ps
->s_txreq
->psc
= priv
->s_sendcontext
;
400 ps
->s_txreq
->phdr
.hdr
.lrh
[0] = cpu_to_be16(lrh0
);
401 ps
->s_txreq
->phdr
.hdr
.lrh
[1] = cpu_to_be16(ah_attr
->dlid
);
402 ps
->s_txreq
->phdr
.hdr
.lrh
[2] =
403 cpu_to_be16(qp
->s_hdrwords
+ nwords
+ SIZE_OF_CRC
);
404 if (ah_attr
->dlid
== be16_to_cpu(IB_LID_PERMISSIVE
)) {
405 ps
->s_txreq
->phdr
.hdr
.lrh
[3] = IB_LID_PERMISSIVE
;
409 lid
|= ah_attr
->src_path_bits
& ((1 << ppd
->lmc
) - 1);
410 ps
->s_txreq
->phdr
.hdr
.lrh
[3] = cpu_to_be16(lid
);
412 ps
->s_txreq
->phdr
.hdr
.lrh
[3] = IB_LID_PERMISSIVE
;
415 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
416 bth0
|= IB_BTH_SOLICITED
;
417 bth0
|= extra_bytes
<< 20;
418 if (qp
->ibqp
.qp_type
== IB_QPT_GSI
|| qp
->ibqp
.qp_type
== IB_QPT_SMI
)
419 bth0
|= hfi1_get_pkey(ibp
, wqe
->ud_wr
.pkey_index
);
421 bth0
|= hfi1_get_pkey(ibp
, qp
->s_pkey_index
);
422 ohdr
->bth
[0] = cpu_to_be32(bth0
);
423 ohdr
->bth
[1] = cpu_to_be32(wqe
->ud_wr
.remote_qpn
);
424 ohdr
->bth
[2] = cpu_to_be32(mask_psn(wqe
->psn
));
426 * Qkeys with the high order bit set mean use the
427 * qkey from the QP context instead of the WR (see 10.2.5).
429 ohdr
->u
.ud
.deth
[0] = cpu_to_be32((int)wqe
->ud_wr
.remote_qkey
< 0 ?
430 qp
->qkey
: wqe
->ud_wr
.remote_qkey
);
431 ohdr
->u
.ud
.deth
[1] = cpu_to_be32(qp
->ibqp
.qp_num
);
433 priv
->s_hdr
->ahgcount
= 0;
434 priv
->s_hdr
->ahgidx
= 0;
435 priv
->s_hdr
->tx_flags
= 0;
436 priv
->s_hdr
->sde
= NULL
;
438 ps
->s_txreq
->hdr_dwords
= qp
->s_hdrwords
+ 2;
443 hfi1_put_txreq(ps
->s_txreq
);
448 hfi1_put_txreq(ps
->s_txreq
);
452 qp
->s_flags
&= ~RVT_S_BUSY
;
458 * Hardware can't check this so we do it here.
460 * This is a slightly different algorithm than the standard pkey check. It
461 * special cases the management keys and allows for 0x7fff and 0xffff to be in
462 * the table at the same time.
464 * @returns the index found or -1 if not found
466 int hfi1_lookup_pkey_idx(struct hfi1_ibport
*ibp
, u16 pkey
)
468 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
471 if (pkey
== FULL_MGMT_P_KEY
|| pkey
== LIM_MGMT_P_KEY
) {
472 unsigned lim_idx
= -1;
474 for (i
= 0; i
< ARRAY_SIZE(ppd
->pkeys
); ++i
) {
475 /* here we look for an exact match */
476 if (ppd
->pkeys
[i
] == pkey
)
478 if (ppd
->pkeys
[i
] == LIM_MGMT_P_KEY
)
482 /* did not find 0xffff return 0x7fff idx if found */
483 if (pkey
== FULL_MGMT_P_KEY
)
490 pkey
&= 0x7fff; /* remove limited/full membership bit */
492 for (i
= 0; i
< ARRAY_SIZE(ppd
->pkeys
); ++i
)
493 if ((ppd
->pkeys
[i
] & 0x7fff) == pkey
)
497 * Should not get here, this means hardware failed to validate pkeys.
502 void return_cnp(struct hfi1_ibport
*ibp
, struct rvt_qp
*qp
, u32 remote_qpn
,
503 u32 pkey
, u32 slid
, u32 dlid
, u8 sc5
,
504 const struct ib_grh
*old_grh
)
506 u64 pbc
, pbc_flags
= 0;
507 u32 bth0
, plen
, vl
, hwords
= 5;
509 u8 sl
= ibp
->sc_to_sl
[sc5
];
510 struct hfi1_ib_header hdr
;
511 struct hfi1_other_headers
*ohdr
;
512 struct pio_buf
*pbuf
;
513 struct send_context
*ctxt
= qp_to_send_context(qp
, sc5
);
514 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
517 struct ib_grh
*grh
= &hdr
.u
.l
.grh
;
519 grh
->version_tclass_flow
= old_grh
->version_tclass_flow
;
520 grh
->paylen
= cpu_to_be16((hwords
- 2 + SIZE_OF_CRC
) << 2);
521 grh
->hop_limit
= 0xff;
522 grh
->sgid
= old_grh
->dgid
;
523 grh
->dgid
= old_grh
->sgid
;
526 hwords
+= sizeof(struct ib_grh
) / sizeof(u32
);
532 lrh0
|= (sc5
& 0xf) << 12 | sl
<< 4;
534 bth0
= pkey
| (IB_OPCODE_CNP
<< 24);
535 ohdr
->bth
[0] = cpu_to_be32(bth0
);
537 ohdr
->bth
[1] = cpu_to_be32(remote_qpn
| (1 << HFI1_BECN_SHIFT
));
538 ohdr
->bth
[2] = 0; /* PSN 0 */
540 hdr
.lrh
[0] = cpu_to_be16(lrh0
);
541 hdr
.lrh
[1] = cpu_to_be16(dlid
);
542 hdr
.lrh
[2] = cpu_to_be16(hwords
+ SIZE_OF_CRC
);
543 hdr
.lrh
[3] = cpu_to_be16(slid
);
545 plen
= 2 /* PBC */ + hwords
;
546 pbc_flags
|= (!!(sc5
& 0x10)) << PBC_DC_INFO_SHIFT
;
547 vl
= sc_to_vlt(ppd
->dd
, sc5
);
548 pbc
= create_pbc(ppd
, pbc_flags
, qp
->srate_mbps
, vl
, plen
);
550 pbuf
= sc_buffer_alloc(ctxt
, plen
, NULL
, NULL
);
552 ppd
->dd
->pio_inline_send(ppd
->dd
, pbuf
, pbc
,
558 * opa_smp_check() - Do the regular pkey checking, and the additional
559 * checks for SMPs specified in OPAv1 rev 0.90, section 9.10.26
560 * ("SMA Packet Checks").
563 * - Checks are done using the pkey directly from the packet's BTH,
564 * and specifically _not_ the pkey that we attach to the completion,
565 * which may be different.
566 * - These checks are specifically for "non-local" SMPs (i.e., SMPs
567 * which originated on another node). SMPs which are sent from, and
568 * destined to this node are checked in opa_local_smp_check().
570 * At the point where opa_smp_check() is called, we know:
571 * - destination QP is QP0
573 * opa_smp_check() returns 0 if all checks succeed, 1 otherwise.
575 static int opa_smp_check(struct hfi1_ibport
*ibp
, u16 pkey
, u8 sc5
,
576 struct rvt_qp
*qp
, u16 slid
, struct opa_smp
*smp
)
578 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
581 * I don't think it's possible for us to get here with sc != 0xf,
582 * but check it to be certain.
587 if (rcv_pkey_check(ppd
, pkey
, sc5
, slid
))
591 * At this point we know (and so don't need to check again) that
592 * the pkey is either LIM_MGMT_P_KEY, or FULL_MGMT_P_KEY
593 * (see ingress_pkey_check).
595 if (smp
->mgmt_class
!= IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
&&
596 smp
->mgmt_class
!= IB_MGMT_CLASS_SUBN_LID_ROUTED
) {
597 ingress_pkey_table_fail(ppd
, pkey
, slid
);
602 * SMPs fall into one of four (disjoint) categories:
603 * SMA request, SMA response, trap, or trap repress.
604 * Our response depends, in part, on which type of
605 * SMP we're processing.
607 * If this is not an SMA request, or trap repress:
608 * - accept MAD if the port is running an SM
609 * - pkey == FULL_MGMT_P_KEY =>
610 * reply with unsupported method (i.e., just mark
611 * the smp's status field here, and let it be
612 * processed normally)
613 * - pkey != LIM_MGMT_P_KEY =>
614 * increment port recv constraint errors, drop MAD
615 * If this is an SMA request or trap repress:
616 * - pkey != FULL_MGMT_P_KEY =>
617 * increment port recv constraint errors, drop MAD
619 switch (smp
->method
) {
620 case IB_MGMT_METHOD_GET
:
621 case IB_MGMT_METHOD_SET
:
622 case IB_MGMT_METHOD_REPORT
:
623 case IB_MGMT_METHOD_TRAP_REPRESS
:
624 if (pkey
!= FULL_MGMT_P_KEY
) {
625 ingress_pkey_table_fail(ppd
, pkey
, slid
);
629 case IB_MGMT_METHOD_SEND
:
630 case IB_MGMT_METHOD_TRAP
:
631 case IB_MGMT_METHOD_GET_RESP
:
632 case IB_MGMT_METHOD_REPORT_RESP
:
633 if (ibp
->rvp
.port_cap_flags
& IB_PORT_SM
)
635 if (pkey
== FULL_MGMT_P_KEY
) {
636 smp
->status
|= IB_SMP_UNSUP_METHOD
;
639 if (pkey
!= LIM_MGMT_P_KEY
) {
640 ingress_pkey_table_fail(ppd
, pkey
, slid
);
651 * hfi1_ud_rcv - receive an incoming UD packet
652 * @ibp: the port the packet came in on
653 * @hdr: the packet header
654 * @rcv_flags: flags relevant to rcv processing
655 * @data: the packet data
656 * @tlen: the packet length
657 * @qp: the QP the packet came on
659 * This is called from qp_rcv() to process an incoming UD packet
661 * Called at interrupt level.
663 void hfi1_ud_rcv(struct hfi1_packet
*packet
)
665 struct hfi1_other_headers
*ohdr
= packet
->ohdr
;
667 u32 hdrsize
= packet
->hlen
;
673 int mgmt_pkey_idx
= -1;
674 struct hfi1_ibport
*ibp
= &packet
->rcd
->ppd
->ibport_data
;
675 struct hfi1_ib_header
*hdr
= packet
->hdr
;
676 u32 rcv_flags
= packet
->rcv_flags
;
677 void *data
= packet
->ebuf
;
678 u32 tlen
= packet
->tlen
;
679 struct rvt_qp
*qp
= packet
->qp
;
680 bool has_grh
= rcv_flags
& HFI1_HAS_GRH
;
681 u8 sc5
= hdr2sc((struct hfi1_message_header
*)hdr
, packet
->rhf
);
684 struct ib_grh
*grh
= NULL
;
686 qkey
= be32_to_cpu(ohdr
->u
.ud
.deth
[0]);
687 src_qp
= be32_to_cpu(ohdr
->u
.ud
.deth
[1]) & RVT_QPN_MASK
;
688 dlid
= be16_to_cpu(hdr
->lrh
[1]);
689 is_mcast
= (dlid
> be16_to_cpu(IB_MULTICAST_LID_BASE
)) &&
690 (dlid
!= be16_to_cpu(IB_LID_PERMISSIVE
));
691 bth1
= be32_to_cpu(ohdr
->bth
[1]);
692 if (unlikely(bth1
& HFI1_BECN_SMASK
)) {
694 * In pre-B0 h/w the CNP_OPCODE is handled via an
697 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
698 u32 lqpn
= be32_to_cpu(ohdr
->bth
[1]) & RVT_QPN_MASK
;
701 sl
= ibp
->sc_to_sl
[sc5
];
703 process_becn(ppd
, sl
, 0, lqpn
, 0, IB_CC_SVCTYPE_UD
);
707 * The opcode is in the low byte when its in network order
708 * (top byte when in host order).
710 opcode
= be32_to_cpu(ohdr
->bth
[0]) >> 24;
713 pkey
= (u16
)be32_to_cpu(ohdr
->bth
[0]);
715 if (!is_mcast
&& (opcode
!= IB_OPCODE_CNP
) && bth1
& HFI1_FECN_SMASK
) {
716 u16 slid
= be16_to_cpu(hdr
->lrh
[3]);
718 return_cnp(ibp
, qp
, src_qp
, pkey
, dlid
, slid
, sc5
, grh
);
721 * Get the number of bytes the message was padded by
722 * and drop incomplete packets.
724 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
725 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
728 tlen
-= hdrsize
+ pad
+ 4;
731 * Check that the permissive LID is only used on QP0
732 * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
734 if (qp
->ibqp
.qp_num
) {
735 if (unlikely(hdr
->lrh
[1] == IB_LID_PERMISSIVE
||
736 hdr
->lrh
[3] == IB_LID_PERMISSIVE
))
738 if (qp
->ibqp
.qp_num
> 1) {
739 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
742 slid
= be16_to_cpu(hdr
->lrh
[3]);
743 if (unlikely(rcv_pkey_check(ppd
, pkey
, sc5
, slid
))) {
745 * Traps will not be sent for packets dropped
746 * by the HW. This is fine, as sending trap
747 * for invalid pkeys is optional according to
748 * IB spec (release 1.3, section 10.9.4)
750 hfi1_bad_pqkey(ibp
, OPA_TRAP_BAD_P_KEY
,
752 (be16_to_cpu(hdr
->lrh
[0]) >> 4) &
754 src_qp
, qp
->ibqp
.qp_num
,
755 be16_to_cpu(hdr
->lrh
[3]),
756 be16_to_cpu(hdr
->lrh
[1]));
761 mgmt_pkey_idx
= hfi1_lookup_pkey_idx(ibp
, pkey
);
762 if (mgmt_pkey_idx
< 0)
765 if (unlikely(qkey
!= qp
->qkey
)) {
766 hfi1_bad_pqkey(ibp
, OPA_TRAP_BAD_Q_KEY
, qkey
,
767 (be16_to_cpu(hdr
->lrh
[0]) >> 4) & 0xF,
768 src_qp
, qp
->ibqp
.qp_num
,
769 be16_to_cpu(hdr
->lrh
[3]),
770 be16_to_cpu(hdr
->lrh
[1]));
773 /* Drop invalid MAD packets (see 13.5.3.1). */
774 if (unlikely(qp
->ibqp
.qp_num
== 1 &&
776 (be16_to_cpu(hdr
->lrh
[0]) >> 12) == 15)))
779 /* Received on QP0, and so by definition, this is an SMP */
780 struct opa_smp
*smp
= (struct opa_smp
*)data
;
781 u16 slid
= be16_to_cpu(hdr
->lrh
[3]);
783 if (opa_smp_check(ibp
, pkey
, sc5
, qp
, slid
, smp
))
788 if ((hdr
->lrh
[1] == IB_LID_PERMISSIVE
||
789 hdr
->lrh
[3] == IB_LID_PERMISSIVE
) &&
790 smp
->mgmt_class
!= IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
)
793 /* look up SMI pkey */
794 mgmt_pkey_idx
= hfi1_lookup_pkey_idx(ibp
, pkey
);
795 if (mgmt_pkey_idx
< 0)
799 if (qp
->ibqp
.qp_num
> 1 &&
800 opcode
== IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE
) {
801 wc
.ex
.imm_data
= ohdr
->u
.ud
.imm_data
;
802 wc
.wc_flags
= IB_WC_WITH_IMM
;
804 } else if (opcode
== IB_OPCODE_UD_SEND_ONLY
) {
812 * A GRH is expected to precede the data even if not
813 * present on the wire.
815 wc
.byte_len
= tlen
+ sizeof(struct ib_grh
);
818 * Get the next work request entry to find where to put the data.
820 if (qp
->r_flags
& RVT_R_REUSE_SGE
) {
821 qp
->r_flags
&= ~RVT_R_REUSE_SGE
;
825 ret
= hfi1_rvt_get_rwqe(qp
, 0);
827 hfi1_rc_error(qp
, IB_WC_LOC_QP_OP_ERR
);
831 if (qp
->ibqp
.qp_num
== 0)
832 ibp
->rvp
.n_vl15_dropped
++;
836 /* Silently drop packets which are too big. */
837 if (unlikely(wc
.byte_len
> qp
->r_len
)) {
838 qp
->r_flags
|= RVT_R_REUSE_SGE
;
842 hfi1_copy_sge(&qp
->r_sge
, &hdr
->u
.l
.grh
,
843 sizeof(struct ib_grh
), 1, 0);
844 wc
.wc_flags
|= IB_WC_GRH
;
846 hfi1_skip_sge(&qp
->r_sge
, sizeof(struct ib_grh
), 1);
848 hfi1_copy_sge(&qp
->r_sge
, data
, wc
.byte_len
- sizeof(struct ib_grh
),
850 rvt_put_ss(&qp
->r_sge
);
851 if (!test_and_clear_bit(RVT_R_WRID_VALID
, &qp
->r_aflags
))
853 wc
.wr_id
= qp
->r_wr_id
;
854 wc
.status
= IB_WC_SUCCESS
;
855 wc
.opcode
= IB_WC_RECV
;
860 if (qp
->ibqp
.qp_type
== IB_QPT_GSI
||
861 qp
->ibqp
.qp_type
== IB_QPT_SMI
) {
862 if (mgmt_pkey_idx
< 0) {
863 if (net_ratelimit()) {
864 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
865 struct hfi1_devdata
*dd
= ppd
->dd
;
867 dd_dev_err(dd
, "QP type %d mgmt_pkey_idx < 0 and packet not dropped???\n",
872 wc
.pkey_index
= (unsigned)mgmt_pkey_idx
;
877 wc
.slid
= be16_to_cpu(hdr
->lrh
[3]);
878 wc
.sl
= ibp
->sc_to_sl
[sc5
];
881 * Save the LMC lower bits if the destination LID is a unicast LID.
883 wc
.dlid_path_bits
= dlid
>= be16_to_cpu(IB_MULTICAST_LID_BASE
) ? 0 :
884 dlid
& ((1 << ppd_from_ibp(ibp
)->lmc
) - 1);
885 wc
.port_num
= qp
->port_num
;
886 /* Signal completion event if the solicited bit is set. */
887 rvt_cq_enter(ibcq_to_rvtcq(qp
->ibqp
.recv_cq
), &wc
,
889 cpu_to_be32(IB_BTH_SOLICITED
)) != 0);
893 ibp
->rvp
.n_pkt_drops
++;