2 * Copyright(c) 2015, 2016 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
49 #include "verbs_txreq.h"
52 /* cut down ridiculously long IB macro names */
53 #define OP(x) IB_OPCODE_UC_##x
55 /* only opcode mask for adaptive pio */
56 const u32 uc_only_opcode
=
57 BIT(OP(SEND_ONLY
) & 0x1f) |
58 BIT(OP(SEND_ONLY_WITH_IMMEDIATE
& 0x1f)) |
59 BIT(OP(RDMA_WRITE_ONLY
& 0x1f)) |
60 BIT(OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
& 0x1f));
63 * hfi1_make_uc_req - construct a request packet (SEND, RDMA write)
64 * @qp: a pointer to the QP
66 * Assume s_lock is held.
68 * Return 1 if constructed; otherwise, return 0.
70 int hfi1_make_uc_req(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
)
72 struct hfi1_qp_priv
*priv
= qp
->priv
;
73 struct hfi1_other_headers
*ohdr
;
81 ps
->s_txreq
= get_txreq(ps
->dev
, qp
);
82 if (IS_ERR(ps
->s_txreq
))
85 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_SEND_OK
)) {
86 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_FLUSH_SEND
))
88 /* We are in the error state, flush the work request. */
89 smp_read_barrier_depends(); /* see post_one_send() */
90 if (qp
->s_last
== ACCESS_ONCE(qp
->s_head
))
92 /* If DMAs are in progress, we can't flush immediately. */
93 if (iowait_sdma_pending(&priv
->s_iowait
)) {
94 qp
->s_flags
|= RVT_S_WAIT_DMA
;
98 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_last
);
99 hfi1_send_complete(qp
, wqe
, IB_WC_WR_FLUSH_ERR
);
103 ohdr
= &ps
->s_txreq
->phdr
.hdr
.u
.oth
;
104 if (qp
->remote_ah_attr
.ah_flags
& IB_AH_GRH
)
105 ohdr
= &ps
->s_txreq
->phdr
.hdr
.u
.l
.oth
;
107 /* Get the next send request. */
108 wqe
= rvt_get_swqe_ptr(qp
, qp
->s_cur
);
110 switch (qp
->s_state
) {
112 if (!(ib_rvt_state_ops
[qp
->state
] &
113 RVT_PROCESS_NEXT_SEND_OK
))
115 /* Check if send work queue is empty. */
116 smp_read_barrier_depends(); /* see post_one_send() */
117 if (qp
->s_cur
== ACCESS_ONCE(qp
->s_head
)) {
122 * Start a new request.
124 qp
->s_psn
= wqe
->psn
;
125 qp
->s_sge
.sge
= wqe
->sg_list
[0];
126 qp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
127 qp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
128 qp
->s_sge
.total_len
= wqe
->length
;
131 switch (wqe
->wr
.opcode
) {
133 case IB_WR_SEND_WITH_IMM
:
135 qp
->s_state
= OP(SEND_FIRST
);
139 if (wqe
->wr
.opcode
== IB_WR_SEND
) {
140 qp
->s_state
= OP(SEND_ONLY
);
143 OP(SEND_ONLY_WITH_IMMEDIATE
);
144 /* Immediate data comes after the BTH */
145 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
148 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
149 bth0
|= IB_BTH_SOLICITED
;
151 if (++qp
->s_cur
>= qp
->s_size
)
155 case IB_WR_RDMA_WRITE
:
156 case IB_WR_RDMA_WRITE_WITH_IMM
:
157 ohdr
->u
.rc
.reth
.vaddr
=
158 cpu_to_be64(wqe
->rdma_wr
.remote_addr
);
159 ohdr
->u
.rc
.reth
.rkey
=
160 cpu_to_be32(wqe
->rdma_wr
.rkey
);
161 ohdr
->u
.rc
.reth
.length
= cpu_to_be32(len
);
162 hwords
+= sizeof(struct ib_reth
) / 4;
164 qp
->s_state
= OP(RDMA_WRITE_FIRST
);
168 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
) {
169 qp
->s_state
= OP(RDMA_WRITE_ONLY
);
172 OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
);
173 /* Immediate data comes after the RETH */
174 ohdr
->u
.rc
.imm_data
= wqe
->wr
.ex
.imm_data
;
176 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
177 bth0
|= IB_BTH_SOLICITED
;
180 if (++qp
->s_cur
>= qp
->s_size
)
190 qp
->s_state
= OP(SEND_MIDDLE
);
192 case OP(SEND_MIDDLE
):
196 middle
= HFI1_CAP_IS_KSET(SDMA_AHG
);
199 if (wqe
->wr
.opcode
== IB_WR_SEND
) {
200 qp
->s_state
= OP(SEND_LAST
);
202 qp
->s_state
= OP(SEND_LAST_WITH_IMMEDIATE
);
203 /* Immediate data comes after the BTH */
204 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
207 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
208 bth0
|= IB_BTH_SOLICITED
;
210 if (++qp
->s_cur
>= qp
->s_size
)
214 case OP(RDMA_WRITE_FIRST
):
215 qp
->s_state
= OP(RDMA_WRITE_MIDDLE
);
217 case OP(RDMA_WRITE_MIDDLE
):
221 middle
= HFI1_CAP_IS_KSET(SDMA_AHG
);
224 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE
) {
225 qp
->s_state
= OP(RDMA_WRITE_LAST
);
228 OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
);
229 /* Immediate data comes after the BTH */
230 ohdr
->u
.imm_data
= wqe
->wr
.ex
.imm_data
;
232 if (wqe
->wr
.send_flags
& IB_SEND_SOLICITED
)
233 bth0
|= IB_BTH_SOLICITED
;
236 if (++qp
->s_cur
>= qp
->s_size
)
241 qp
->s_hdrwords
= hwords
;
242 ps
->s_txreq
->sde
= priv
->s_sde
;
243 qp
->s_cur_sge
= &qp
->s_sge
;
244 qp
->s_cur_size
= len
;
245 hfi1_make_ruc_header(qp
, ohdr
, bth0
| (qp
->s_state
<< 24),
246 mask_psn(qp
->s_psn
++), middle
, ps
);
248 ps
->s_txreq
->hdr_dwords
= qp
->s_hdrwords
+ 2;
252 hfi1_put_txreq(ps
->s_txreq
);
257 hfi1_put_txreq(ps
->s_txreq
);
261 qp
->s_flags
&= ~RVT_S_BUSY
;
267 * hfi1_uc_rcv - handle an incoming UC packet
268 * @ibp: the port the packet came in on
269 * @hdr: the header of the packet
270 * @rcv_flags: flags relevant to rcv processing
271 * @data: the packet data
272 * @tlen: the length of the packet
273 * @qp: the QP for this packet.
275 * This is called from qp_rcv() to process an incoming UC packet
277 * Called at interrupt level.
279 void hfi1_uc_rcv(struct hfi1_packet
*packet
)
281 struct hfi1_ibport
*ibp
= &packet
->rcd
->ppd
->ibport_data
;
282 struct hfi1_ib_header
*hdr
= packet
->hdr
;
283 u32 rcv_flags
= packet
->rcv_flags
;
284 void *data
= packet
->ebuf
;
285 u32 tlen
= packet
->tlen
;
286 struct rvt_qp
*qp
= packet
->qp
;
287 struct hfi1_other_headers
*ohdr
= packet
->ohdr
;
289 u32 hdrsize
= packet
->hlen
;
294 struct ib_reth
*reth
;
295 int has_grh
= rcv_flags
& HFI1_HAS_GRH
;
299 bth0
= be32_to_cpu(ohdr
->bth
[0]);
300 if (hfi1_ruc_check_hdr(ibp
, hdr
, has_grh
, qp
, bth0
))
303 bth1
= be32_to_cpu(ohdr
->bth
[1]);
304 if (unlikely(bth1
& (HFI1_BECN_SMASK
| HFI1_FECN_SMASK
))) {
305 if (bth1
& HFI1_BECN_SMASK
) {
306 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
308 u16 rlid
= be16_to_cpu(hdr
->lrh
[3]);
311 lqpn
= bth1
& RVT_QPN_MASK
;
312 rqpn
= qp
->remote_qpn
;
314 sc5
= ibp
->sl_to_sc
[qp
->remote_ah_attr
.sl
];
315 sl
= ibp
->sc_to_sl
[sc5
];
317 process_becn(ppd
, sl
, rlid
, lqpn
, rqpn
,
321 if (bth1
& HFI1_FECN_SMASK
) {
322 struct ib_grh
*grh
= NULL
;
323 u16 pkey
= (u16
)be32_to_cpu(ohdr
->bth
[0]);
324 u16 slid
= be16_to_cpu(hdr
->lrh
[3]);
325 u16 dlid
= be16_to_cpu(hdr
->lrh
[1]);
326 u32 src_qp
= qp
->remote_qpn
;
329 sc5
= ibp
->sl_to_sc
[qp
->remote_ah_attr
.sl
];
333 return_cnp(ibp
, qp
, src_qp
, pkey
, dlid
, slid
, sc5
,
338 psn
= be32_to_cpu(ohdr
->bth
[2]);
339 opcode
= (bth0
>> 24) & 0xff;
341 /* Compare the PSN verses the expected PSN. */
342 if (unlikely(cmp_psn(psn
, qp
->r_psn
) != 0)) {
344 * Handle a sequence error.
345 * Silently drop any current message.
349 if (qp
->r_state
== OP(SEND_FIRST
) ||
350 qp
->r_state
== OP(SEND_MIDDLE
)) {
351 set_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
);
352 qp
->r_sge
.num_sge
= 0;
354 rvt_put_ss(&qp
->r_sge
);
356 qp
->r_state
= OP(SEND_LAST
);
360 case OP(SEND_ONLY_WITH_IMMEDIATE
):
363 case OP(RDMA_WRITE_FIRST
):
364 case OP(RDMA_WRITE_ONLY
):
365 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
):
373 /* Check for opcode sequence errors. */
374 switch (qp
->r_state
) {
376 case OP(SEND_MIDDLE
):
377 if (opcode
== OP(SEND_MIDDLE
) ||
378 opcode
== OP(SEND_LAST
) ||
379 opcode
== OP(SEND_LAST_WITH_IMMEDIATE
))
383 case OP(RDMA_WRITE_FIRST
):
384 case OP(RDMA_WRITE_MIDDLE
):
385 if (opcode
== OP(RDMA_WRITE_MIDDLE
) ||
386 opcode
== OP(RDMA_WRITE_LAST
) ||
387 opcode
== OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
))
392 if (opcode
== OP(SEND_FIRST
) ||
393 opcode
== OP(SEND_ONLY
) ||
394 opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
) ||
395 opcode
== OP(RDMA_WRITE_FIRST
) ||
396 opcode
== OP(RDMA_WRITE_ONLY
) ||
397 opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
))
402 if (qp
->state
== IB_QPS_RTR
&& !(qp
->r_flags
& RVT_R_COMM_EST
))
405 /* OK, process the packet. */
409 case OP(SEND_ONLY_WITH_IMMEDIATE
):
411 if (test_and_clear_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
)) {
412 qp
->r_sge
= qp
->s_rdma_read_sge
;
414 ret
= hfi1_rvt_get_rwqe(qp
, 0);
420 * qp->s_rdma_read_sge will be the owner
421 * of the mr references.
423 qp
->s_rdma_read_sge
= qp
->r_sge
;
426 if (opcode
== OP(SEND_ONLY
))
427 goto no_immediate_data
;
428 else if (opcode
== OP(SEND_ONLY_WITH_IMMEDIATE
))
431 case OP(SEND_MIDDLE
):
432 /* Check for invalid length PMTU or posted rwqe len. */
433 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4)))
435 qp
->r_rcv_len
+= pmtu
;
436 if (unlikely(qp
->r_rcv_len
> qp
->r_len
))
438 hfi1_copy_sge(&qp
->r_sge
, data
, pmtu
, 0, 0);
441 case OP(SEND_LAST_WITH_IMMEDIATE
):
443 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
444 wc
.wc_flags
= IB_WC_WITH_IMM
;
451 /* Get the number of bytes the message was padded by. */
452 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
453 /* Check for invalid length. */
454 /* LAST len should be >= 1 */
455 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
457 /* Don't count the CRC. */
458 tlen
-= (hdrsize
+ pad
+ 4);
459 wc
.byte_len
= tlen
+ qp
->r_rcv_len
;
460 if (unlikely(wc
.byte_len
> qp
->r_len
))
462 wc
.opcode
= IB_WC_RECV
;
463 hfi1_copy_sge(&qp
->r_sge
, data
, tlen
, 0, 0);
464 rvt_put_ss(&qp
->s_rdma_read_sge
);
466 wc
.wr_id
= qp
->r_wr_id
;
467 wc
.status
= IB_WC_SUCCESS
;
469 wc
.src_qp
= qp
->remote_qpn
;
470 wc
.slid
= qp
->remote_ah_attr
.dlid
;
472 * It seems that IB mandates the presence of an SL in a
473 * work completion only for the UD transport (see section
474 * 11.4.2 of IBTA Vol. 1).
476 * However, the way the SL is chosen below is consistent
477 * with the way that IB/qib works and is trying avoid
478 * introducing incompatibilities.
480 * See also OPA Vol. 1, section 9.7.6, and table 9-17.
482 wc
.sl
= qp
->remote_ah_attr
.sl
;
483 /* zero fields that are N/A */
486 wc
.dlid_path_bits
= 0;
488 /* Signal completion event if the solicited bit is set. */
489 rvt_cq_enter(ibcq_to_rvtcq(qp
->ibqp
.recv_cq
), &wc
,
491 cpu_to_be32(IB_BTH_SOLICITED
)) != 0);
494 case OP(RDMA_WRITE_FIRST
):
495 case OP(RDMA_WRITE_ONLY
):
496 case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
): /* consume RWQE */
498 if (unlikely(!(qp
->qp_access_flags
&
499 IB_ACCESS_REMOTE_WRITE
))) {
502 reth
= &ohdr
->u
.rc
.reth
;
503 qp
->r_len
= be32_to_cpu(reth
->length
);
505 qp
->r_sge
.sg_list
= NULL
;
506 if (qp
->r_len
!= 0) {
507 u32 rkey
= be32_to_cpu(reth
->rkey
);
508 u64 vaddr
= be64_to_cpu(reth
->vaddr
);
512 ok
= rvt_rkey_ok(qp
, &qp
->r_sge
.sge
, qp
->r_len
,
513 vaddr
, rkey
, IB_ACCESS_REMOTE_WRITE
);
516 qp
->r_sge
.num_sge
= 1;
518 qp
->r_sge
.num_sge
= 0;
519 qp
->r_sge
.sge
.mr
= NULL
;
520 qp
->r_sge
.sge
.vaddr
= NULL
;
521 qp
->r_sge
.sge
.length
= 0;
522 qp
->r_sge
.sge
.sge_length
= 0;
524 if (opcode
== OP(RDMA_WRITE_ONLY
)) {
526 } else if (opcode
== OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE
)) {
527 wc
.ex
.imm_data
= ohdr
->u
.rc
.imm_data
;
531 case OP(RDMA_WRITE_MIDDLE
):
532 /* Check for invalid length PMTU or posted rwqe len. */
533 if (unlikely(tlen
!= (hdrsize
+ pmtu
+ 4)))
535 qp
->r_rcv_len
+= pmtu
;
536 if (unlikely(qp
->r_rcv_len
> qp
->r_len
))
538 hfi1_copy_sge(&qp
->r_sge
, data
, pmtu
, 1, 0);
541 case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE
):
542 wc
.ex
.imm_data
= ohdr
->u
.imm_data
;
544 wc
.wc_flags
= IB_WC_WITH_IMM
;
546 /* Get the number of bytes the message was padded by. */
547 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
548 /* Check for invalid length. */
549 /* LAST len should be >= 1 */
550 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
552 /* Don't count the CRC. */
553 tlen
-= (hdrsize
+ pad
+ 4);
554 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
))
556 if (test_and_clear_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
)) {
557 rvt_put_ss(&qp
->s_rdma_read_sge
);
559 ret
= hfi1_rvt_get_rwqe(qp
, 1);
565 wc
.byte_len
= qp
->r_len
;
566 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
567 hfi1_copy_sge(&qp
->r_sge
, data
, tlen
, 1, 0);
568 rvt_put_ss(&qp
->r_sge
);
571 case OP(RDMA_WRITE_LAST
):
573 /* Get the number of bytes the message was padded by. */
574 pad
= (be32_to_cpu(ohdr
->bth
[0]) >> 20) & 3;
575 /* Check for invalid length. */
576 /* LAST len should be >= 1 */
577 if (unlikely(tlen
< (hdrsize
+ pad
+ 4)))
579 /* Don't count the CRC. */
580 tlen
-= (hdrsize
+ pad
+ 4);
581 if (unlikely(tlen
+ qp
->r_rcv_len
!= qp
->r_len
))
583 hfi1_copy_sge(&qp
->r_sge
, data
, tlen
, 1, 0);
584 rvt_put_ss(&qp
->r_sge
);
588 /* Drop packet for unknown opcodes. */
592 qp
->r_state
= opcode
;
596 set_bit(RVT_R_REWIND_SGE
, &qp
->r_aflags
);
597 qp
->r_sge
.num_sge
= 0;
599 ibp
->rvp
.n_pkt_drops
++;
603 hfi1_rc_error(qp
, IB_WC_LOC_QP_OP_ERR
);