2 * Copyright(c) 2015 - 2017 Intel Corporation.
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
48 #include <linux/spinlock.h>
53 #include "verbs_txreq.h"
57 * Validate a RWQE and fill in the SGE state.
60 static int init_sge(struct rvt_qp
*qp
, struct rvt_rwqe
*wqe
)
64 struct rvt_lkey_table
*rkt
;
66 struct rvt_sge_state
*ss
;
68 rkt
= &to_idev(qp
->ibqp
.device
)->rdi
.lkey_table
;
69 pd
= ibpd_to_rvtpd(qp
->ibqp
.srq
? qp
->ibqp
.srq
->pd
: qp
->ibqp
.pd
);
71 ss
->sg_list
= qp
->r_sg_list
;
73 for (i
= j
= 0; i
< wqe
->num_sge
; i
++) {
74 if (wqe
->sg_list
[i
].length
== 0)
77 ret
= rvt_lkey_ok(rkt
, pd
, j
? &ss
->sg_list
[j
- 1] : &ss
->sge
,
78 NULL
, &wqe
->sg_list
[i
],
79 IB_ACCESS_LOCAL_WRITE
);
80 if (unlikely(ret
<= 0))
82 qp
->r_len
+= wqe
->sg_list
[i
].length
;
86 ss
->total_len
= qp
->r_len
;
92 struct rvt_sge
*sge
= --j
? &ss
->sg_list
[j
- 1] : &ss
->sge
;
97 memset(&wc
, 0, sizeof(wc
));
98 wc
.wr_id
= wqe
->wr_id
;
99 wc
.status
= IB_WC_LOC_PROT_ERR
;
100 wc
.opcode
= IB_WC_RECV
;
102 /* Signal solicited completion event. */
103 rvt_cq_enter(ibcq_to_rvtcq(qp
->ibqp
.recv_cq
), &wc
, 1);
110 * hfi1_rvt_get_rwqe - copy the next RWQE into the QP's RWQE
112 * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
114 * Return -1 if there is a local error, 0 if no RWQE is available,
115 * otherwise return 1.
117 * Can be called from interrupt level.
119 int hfi1_rvt_get_rwqe(struct rvt_qp
*qp
, int wr_id_only
)
125 struct rvt_rwqe
*wqe
;
126 void (*handler
)(struct ib_event
*, void *);
131 srq
= ibsrq_to_rvtsrq(qp
->ibqp
.srq
);
132 handler
= srq
->ibsrq
.event_handler
;
140 spin_lock_irqsave(&rq
->lock
, flags
);
141 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
)) {
148 /* Validate tail before using it since it is user writable. */
149 if (tail
>= rq
->size
)
151 if (unlikely(tail
== wq
->head
)) {
155 /* Make sure entry is read after head index is read. */
157 wqe
= rvt_get_rwqe_ptr(rq
, tail
);
159 * Even though we update the tail index in memory, the verbs
160 * consumer is not supposed to post more entries until a
161 * completion is generated.
163 if (++tail
>= rq
->size
)
166 if (!wr_id_only
&& !init_sge(qp
, wqe
)) {
170 qp
->r_wr_id
= wqe
->wr_id
;
173 set_bit(RVT_R_WRID_VALID
, &qp
->r_aflags
);
178 * Validate head pointer value and compute
179 * the number of remaining WQEs.
185 n
+= rq
->size
- tail
;
188 if (n
< srq
->limit
) {
192 spin_unlock_irqrestore(&rq
->lock
, flags
);
193 ev
.device
= qp
->ibqp
.device
;
194 ev
.element
.srq
= qp
->ibqp
.srq
;
195 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
196 handler(&ev
, srq
->ibsrq
.srq_context
);
201 spin_unlock_irqrestore(&rq
->lock
, flags
);
206 static int gid_ok(union ib_gid
*gid
, __be64 gid_prefix
, __be64 id
)
208 return (gid
->global
.interface_id
== id
&&
209 (gid
->global
.subnet_prefix
== gid_prefix
||
210 gid
->global
.subnet_prefix
== IB_DEFAULT_GID_PREFIX
));
215 * This should be called with the QP r_lock held.
217 * The s_lock will be acquired around the hfi1_migrate_qp() call.
219 int hfi1_ruc_check_hdr(struct hfi1_ibport
*ibp
, struct hfi1_packet
*packet
)
223 struct rvt_qp
*qp
= packet
->qp
;
224 u8 sc5
= ibp
->sl_to_sc
[rdma_ah_get_sl(&qp
->remote_ah_attr
)];
225 u32 dlid
= packet
->dlid
;
226 u32 slid
= packet
->slid
;
232 bth0
= be32_to_cpu(packet
->ohdr
->bth
[0]);
233 bth1
= be32_to_cpu(packet
->ohdr
->bth
[1]);
234 if (packet
->etype
== RHF_RCV_TYPE_BYPASS
) {
235 pkey
= hfi1_16B_get_pkey(packet
->hdr
);
236 migrated
= bth1
& OPA_BTH_MIG_REQ
;
238 pkey
= ib_bth_get_pkey(packet
->ohdr
);
239 migrated
= bth0
& IB_BTH_MIG_REQ
;
242 if (qp
->s_mig_state
== IB_MIG_ARMED
&& migrated
) {
244 if ((rdma_ah_get_ah_flags(&qp
->alt_ah_attr
) &
246 (packet
->etype
!= RHF_RCV_TYPE_BYPASS
))
249 const struct ib_global_route
*grh
;
251 if (!(rdma_ah_get_ah_flags(&qp
->alt_ah_attr
) &
254 grh
= rdma_ah_read_grh(&qp
->alt_ah_attr
);
255 guid
= get_sguid(ibp
, grh
->sgid_index
);
256 if (!gid_ok(&packet
->grh
->dgid
, ibp
->rvp
.gid_prefix
,
261 grh
->dgid
.global
.subnet_prefix
,
262 grh
->dgid
.global
.interface_id
))
265 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp
), pkey
,
267 hfi1_bad_pkey(ibp
, pkey
, sl
, 0, qp
->ibqp
.qp_num
,
271 /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
272 if (slid
!= rdma_ah_get_dlid(&qp
->alt_ah_attr
) ||
273 ppd_from_ibp(ibp
)->port
!=
274 rdma_ah_get_port_num(&qp
->alt_ah_attr
))
276 spin_lock_irqsave(&qp
->s_lock
, flags
);
278 spin_unlock_irqrestore(&qp
->s_lock
, flags
);
281 if ((rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) &
283 (packet
->etype
!= RHF_RCV_TYPE_BYPASS
))
286 const struct ib_global_route
*grh
;
288 if (!(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) &
291 grh
= rdma_ah_read_grh(&qp
->remote_ah_attr
);
292 guid
= get_sguid(ibp
, grh
->sgid_index
);
293 if (!gid_ok(&packet
->grh
->dgid
, ibp
->rvp
.gid_prefix
,
298 grh
->dgid
.global
.subnet_prefix
,
299 grh
->dgid
.global
.interface_id
))
302 if (unlikely(rcv_pkey_check(ppd_from_ibp(ibp
), pkey
,
304 hfi1_bad_pkey(ibp
, pkey
, sl
, 0, qp
->ibqp
.qp_num
,
308 /* Validate the SLID. See Ch. 9.6.1.5 */
309 if ((slid
!= rdma_ah_get_dlid(&qp
->remote_ah_attr
)) ||
310 ppd_from_ibp(ibp
)->port
!= qp
->port_num
)
312 if (qp
->s_mig_state
== IB_MIG_REARM
&& !migrated
)
313 qp
->s_mig_state
= IB_MIG_ARMED
;
320 * ruc_loopback - handle UC and RC loopback requests
321 * @sqp: the sending QP
323 * This is called from hfi1_do_send() to
324 * forward a WQE addressed to the same HFI.
325 * Note that although we are single threaded due to the send engine, we still
326 * have to protect against post_send(). We don't have to worry about
327 * receive interrupts since this is a connected protocol and all packets
328 * will pass through here.
330 static void ruc_loopback(struct rvt_qp
*sqp
)
332 struct hfi1_ibport
*ibp
= to_iport(sqp
->ibqp
.device
, sqp
->port_num
);
334 struct rvt_swqe
*wqe
;
340 enum ib_wc_status send_status
;
343 bool copy_last
= false;
349 * Note that we check the responder QP state after
350 * checking the requester's state.
352 qp
= rvt_lookup_qpn(ib_to_rvt(sqp
->ibqp
.device
), &ibp
->rvp
,
355 spin_lock_irqsave(&sqp
->s_lock
, flags
);
357 /* Return if we are already busy processing a work request. */
358 if ((sqp
->s_flags
& (RVT_S_BUSY
| RVT_S_ANY_WAIT
)) ||
359 !(ib_rvt_state_ops
[sqp
->state
] & RVT_PROCESS_OR_FLUSH_SEND
))
362 sqp
->s_flags
|= RVT_S_BUSY
;
365 smp_read_barrier_depends(); /* see post_one_send() */
366 if (sqp
->s_last
== READ_ONCE(sqp
->s_head
))
368 wqe
= rvt_get_swqe_ptr(sqp
, sqp
->s_last
);
370 /* Return if it is not OK to start a new work request. */
371 if (!(ib_rvt_state_ops
[sqp
->state
] & RVT_PROCESS_NEXT_SEND_OK
)) {
372 if (!(ib_rvt_state_ops
[sqp
->state
] & RVT_FLUSH_SEND
))
374 /* We are in the error state, flush the work request. */
375 send_status
= IB_WC_WR_FLUSH_ERR
;
380 * We can rely on the entry not changing without the s_lock
381 * being held until we update s_last.
382 * We increment s_cur to indicate s_last is in progress.
384 if (sqp
->s_last
== sqp
->s_cur
) {
385 if (++sqp
->s_cur
>= sqp
->s_size
)
388 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
390 if (!qp
|| !(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_RECV_OK
) ||
391 qp
->ibqp
.qp_type
!= sqp
->ibqp
.qp_type
) {
392 ibp
->rvp
.n_pkt_drops
++;
394 * For RC, the requester would timeout and retry so
395 * shortcut the timeouts and just signal too many retries.
397 if (sqp
->ibqp
.qp_type
== IB_QPT_RC
)
398 send_status
= IB_WC_RETRY_EXC_ERR
;
400 send_status
= IB_WC_SUCCESS
;
404 memset(&wc
, 0, sizeof(wc
));
405 send_status
= IB_WC_SUCCESS
;
408 sqp
->s_sge
.sge
= wqe
->sg_list
[0];
409 sqp
->s_sge
.sg_list
= wqe
->sg_list
+ 1;
410 sqp
->s_sge
.num_sge
= wqe
->wr
.num_sge
;
411 sqp
->s_len
= wqe
->length
;
412 switch (wqe
->wr
.opcode
) {
416 case IB_WR_LOCAL_INV
:
417 if (!(wqe
->wr
.send_flags
& RVT_SEND_COMPLETION_ONLY
)) {
418 if (rvt_invalidate_rkey(sqp
,
419 wqe
->wr
.ex
.invalidate_rkey
))
420 send_status
= IB_WC_LOC_PROT_ERR
;
425 case IB_WR_SEND_WITH_INV
:
426 if (!rvt_invalidate_rkey(qp
, wqe
->wr
.ex
.invalidate_rkey
)) {
427 wc
.wc_flags
= IB_WC_WITH_INVALIDATE
;
428 wc
.ex
.invalidate_rkey
= wqe
->wr
.ex
.invalidate_rkey
;
432 case IB_WR_SEND_WITH_IMM
:
433 wc
.wc_flags
= IB_WC_WITH_IMM
;
434 wc
.ex
.imm_data
= wqe
->wr
.ex
.imm_data
;
438 ret
= hfi1_rvt_get_rwqe(qp
, 0);
445 case IB_WR_RDMA_WRITE_WITH_IMM
:
446 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)))
448 wc
.wc_flags
= IB_WC_WITH_IMM
;
449 wc
.ex
.imm_data
= wqe
->wr
.ex
.imm_data
;
450 ret
= hfi1_rvt_get_rwqe(qp
, 1);
455 /* skip copy_last set and qp_access_flags recheck */
457 case IB_WR_RDMA_WRITE
:
458 copy_last
= rvt_is_user_qp(qp
);
459 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)))
462 if (wqe
->length
== 0)
464 if (unlikely(!rvt_rkey_ok(qp
, &qp
->r_sge
.sge
, wqe
->length
,
465 wqe
->rdma_wr
.remote_addr
,
467 IB_ACCESS_REMOTE_WRITE
)))
469 qp
->r_sge
.sg_list
= NULL
;
470 qp
->r_sge
.num_sge
= 1;
471 qp
->r_sge
.total_len
= wqe
->length
;
474 case IB_WR_RDMA_READ
:
475 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_READ
)))
477 if (unlikely(!rvt_rkey_ok(qp
, &sqp
->s_sge
.sge
, wqe
->length
,
478 wqe
->rdma_wr
.remote_addr
,
480 IB_ACCESS_REMOTE_READ
)))
483 sqp
->s_sge
.sg_list
= NULL
;
484 sqp
->s_sge
.num_sge
= 1;
485 qp
->r_sge
.sge
= wqe
->sg_list
[0];
486 qp
->r_sge
.sg_list
= wqe
->sg_list
+ 1;
487 qp
->r_sge
.num_sge
= wqe
->wr
.num_sge
;
488 qp
->r_sge
.total_len
= wqe
->length
;
491 case IB_WR_ATOMIC_CMP_AND_SWP
:
492 case IB_WR_ATOMIC_FETCH_AND_ADD
:
493 if (unlikely(!(qp
->qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
)))
495 if (unlikely(!rvt_rkey_ok(qp
, &qp
->r_sge
.sge
, sizeof(u64
),
496 wqe
->atomic_wr
.remote_addr
,
498 IB_ACCESS_REMOTE_ATOMIC
)))
500 /* Perform atomic OP and save result. */
501 maddr
= (atomic64_t
*)qp
->r_sge
.sge
.vaddr
;
502 sdata
= wqe
->atomic_wr
.compare_add
;
503 *(u64
*)sqp
->s_sge
.sge
.vaddr
=
504 (wqe
->wr
.opcode
== IB_WR_ATOMIC_FETCH_AND_ADD
) ?
505 (u64
)atomic64_add_return(sdata
, maddr
) - sdata
:
506 (u64
)cmpxchg((u64
*)qp
->r_sge
.sge
.vaddr
,
507 sdata
, wqe
->atomic_wr
.swap
);
508 rvt_put_mr(qp
->r_sge
.sge
.mr
);
509 qp
->r_sge
.num_sge
= 0;
513 send_status
= IB_WC_LOC_QP_OP_ERR
;
517 sge
= &sqp
->s_sge
.sge
;
519 u32 len
= sqp
->s_len
;
521 if (len
> sge
->length
)
523 if (len
> sge
->sge_length
)
524 len
= sge
->sge_length
;
525 WARN_ON_ONCE(len
== 0);
526 hfi1_copy_sge(&qp
->r_sge
, sge
->vaddr
, len
, release
, copy_last
);
529 sge
->sge_length
-= len
;
530 if (sge
->sge_length
== 0) {
533 if (--sqp
->s_sge
.num_sge
)
534 *sge
= *sqp
->s_sge
.sg_list
++;
535 } else if (sge
->length
== 0 && sge
->mr
->lkey
) {
536 if (++sge
->n
>= RVT_SEGSZ
) {
537 if (++sge
->m
>= sge
->mr
->mapsz
)
542 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].vaddr
;
544 sge
->mr
->map
[sge
->m
]->segs
[sge
->n
].length
;
549 rvt_put_ss(&qp
->r_sge
);
551 if (!test_and_clear_bit(RVT_R_WRID_VALID
, &qp
->r_aflags
))
554 if (wqe
->wr
.opcode
== IB_WR_RDMA_WRITE_WITH_IMM
)
555 wc
.opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
557 wc
.opcode
= IB_WC_RECV
;
558 wc
.wr_id
= qp
->r_wr_id
;
559 wc
.status
= IB_WC_SUCCESS
;
560 wc
.byte_len
= wqe
->length
;
562 wc
.src_qp
= qp
->remote_qpn
;
563 wc
.slid
= rdma_ah_get_dlid(&qp
->remote_ah_attr
) & U16_MAX
;
564 wc
.sl
= rdma_ah_get_sl(&qp
->remote_ah_attr
);
566 /* Signal completion event if the solicited bit is set. */
567 rvt_cq_enter(ibcq_to_rvtcq(qp
->ibqp
.recv_cq
), &wc
,
568 wqe
->wr
.send_flags
& IB_SEND_SOLICITED
);
571 spin_lock_irqsave(&sqp
->s_lock
, flags
);
572 ibp
->rvp
.n_loop_pkts
++;
574 sqp
->s_rnr_retry
= sqp
->s_rnr_retry_cnt
;
575 hfi1_send_complete(sqp
, wqe
, send_status
);
577 atomic_dec(&sqp
->local_ops_pending
);
584 if (qp
->ibqp
.qp_type
== IB_QPT_UC
)
586 ibp
->rvp
.n_rnr_naks
++;
588 * Note: we don't need the s_lock held since the BUSY flag
589 * makes this single threaded.
591 if (sqp
->s_rnr_retry
== 0) {
592 send_status
= IB_WC_RNR_RETRY_EXC_ERR
;
595 if (sqp
->s_rnr_retry_cnt
< 7)
597 spin_lock_irqsave(&sqp
->s_lock
, flags
);
598 if (!(ib_rvt_state_ops
[sqp
->state
] & RVT_PROCESS_RECV_OK
))
600 rvt_add_rnr_timer(sqp
, qp
->r_min_rnr_timer
<<
601 IB_AETH_CREDIT_SHIFT
);
605 send_status
= IB_WC_REM_OP_ERR
;
606 wc
.status
= IB_WC_LOC_QP_OP_ERR
;
610 send_status
= IB_WC_REM_INV_REQ_ERR
;
611 wc
.status
= IB_WC_LOC_QP_OP_ERR
;
615 send_status
= IB_WC_REM_ACCESS_ERR
;
616 wc
.status
= IB_WC_LOC_PROT_ERR
;
618 /* responder goes to error state */
619 rvt_rc_error(qp
, wc
.status
);
622 spin_lock_irqsave(&sqp
->s_lock
, flags
);
623 hfi1_send_complete(sqp
, wqe
, send_status
);
624 if (sqp
->ibqp
.qp_type
== IB_QPT_RC
) {
625 int lastwqe
= rvt_error_qp(sqp
, IB_WC_WR_FLUSH_ERR
);
627 sqp
->s_flags
&= ~RVT_S_BUSY
;
628 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
632 ev
.device
= sqp
->ibqp
.device
;
633 ev
.element
.qp
= &sqp
->ibqp
;
634 ev
.event
= IB_EVENT_QP_LAST_WQE_REACHED
;
635 sqp
->ibqp
.event_handler(&ev
, sqp
->ibqp
.qp_context
);
640 sqp
->s_flags
&= ~RVT_S_BUSY
;
642 spin_unlock_irqrestore(&sqp
->s_lock
, flags
);
648 * hfi1_make_grh - construct a GRH header
649 * @ibp: a pointer to the IB port
650 * @hdr: a pointer to the GRH header being constructed
651 * @grh: the global route address to send to
652 * @hwords: size of header after grh being sent in dwords
653 * @nwords: the number of 32 bit words of data being sent
655 * Return the size of the header in 32 bit words.
657 u32
hfi1_make_grh(struct hfi1_ibport
*ibp
, struct ib_grh
*hdr
,
658 const struct ib_global_route
*grh
, u32 hwords
, u32 nwords
)
660 hdr
->version_tclass_flow
=
661 cpu_to_be32((IB_GRH_VERSION
<< IB_GRH_VERSION_SHIFT
) |
662 (grh
->traffic_class
<< IB_GRH_TCLASS_SHIFT
) |
663 (grh
->flow_label
<< IB_GRH_FLOW_SHIFT
));
664 hdr
->paylen
= cpu_to_be16((hwords
+ nwords
) << 2);
665 /* next_hdr is defined by C8-7 in ch. 8.4.1 */
666 hdr
->next_hdr
= IB_GRH_NEXT_HDR
;
667 hdr
->hop_limit
= grh
->hop_limit
;
668 /* The SGID is 32-bit aligned. */
669 hdr
->sgid
.global
.subnet_prefix
= ibp
->rvp
.gid_prefix
;
670 hdr
->sgid
.global
.interface_id
=
671 grh
->sgid_index
< HFI1_GUIDS_PER_PORT
?
672 get_sguid(ibp
, grh
->sgid_index
) :
673 get_sguid(ibp
, HFI1_PORT_GUID_INDEX
);
674 hdr
->dgid
= grh
->dgid
;
676 /* GRH header size in 32-bit words. */
677 return sizeof(struct ib_grh
) / sizeof(u32
);
680 #define BTH2_OFFSET (offsetof(struct hfi1_sdma_header, \
681 hdr.ibh.u.oth.bth[2]) / 4)
684 * build_ahg - create ahg in s_ahg
685 * @qp: a pointer to QP
686 * @npsn: the next PSN for the request/response
688 * This routine handles the AHG by allocating an ahg entry and causing the
689 * copy of the first middle.
691 * Subsequent middles use the copied entry, editing the
692 * PSN with 1 or 2 edits.
694 static inline void build_ahg(struct rvt_qp
*qp
, u32 npsn
)
696 struct hfi1_qp_priv
*priv
= qp
->priv
;
698 if (unlikely(qp
->s_flags
& RVT_S_AHG_CLEAR
))
700 if (!(qp
->s_flags
& RVT_S_AHG_VALID
)) {
701 /* first middle that needs copy */
702 if (qp
->s_ahgidx
< 0)
703 qp
->s_ahgidx
= sdma_ahg_alloc(priv
->s_sde
);
704 if (qp
->s_ahgidx
>= 0) {
706 priv
->s_ahg
->tx_flags
|= SDMA_TXREQ_F_AHG_COPY
;
707 /* save to protect a change in another thread */
708 priv
->s_ahg
->ahgidx
= qp
->s_ahgidx
;
709 qp
->s_flags
|= RVT_S_AHG_VALID
;
712 /* subsequent middle after valid */
713 if (qp
->s_ahgidx
>= 0) {
714 priv
->s_ahg
->tx_flags
|= SDMA_TXREQ_F_USE_AHG
;
715 priv
->s_ahg
->ahgidx
= qp
->s_ahgidx
;
716 priv
->s_ahg
->ahgcount
++;
717 priv
->s_ahg
->ahgdesc
[0] =
718 sdma_build_ahg_descriptor(
719 (__force u16
)cpu_to_be16((u16
)npsn
),
723 if ((npsn
& 0xffff0000) !=
724 (qp
->s_ahgpsn
& 0xffff0000)) {
725 priv
->s_ahg
->ahgcount
++;
726 priv
->s_ahg
->ahgdesc
[1] =
727 sdma_build_ahg_descriptor(
728 (__force u16
)cpu_to_be16(
738 static inline void hfi1_make_ruc_bth(struct rvt_qp
*qp
,
739 struct ib_other_headers
*ohdr
,
740 u32 bth0
, u32 bth1
, u32 bth2
)
742 bth1
|= qp
->remote_qpn
;
743 ohdr
->bth
[0] = cpu_to_be32(bth0
);
744 ohdr
->bth
[1] = cpu_to_be32(bth1
);
745 ohdr
->bth
[2] = cpu_to_be32(bth2
);
748 static inline void hfi1_make_ruc_header_16B(struct rvt_qp
*qp
,
749 struct ib_other_headers
*ohdr
,
750 u32 bth0
, u32 bth2
, int middle
,
751 struct hfi1_pkt_state
*ps
)
753 struct hfi1_qp_priv
*priv
= qp
->priv
;
754 struct hfi1_ibport
*ibp
= ps
->ibp
;
755 struct hfi1_pportdata
*ppd
= ppd_from_ibp(ibp
);
758 u16 pkey
= hfi1_get_pkey(ibp
, qp
->s_pkey_index
);
759 u8 l4
= OPA_16B_L4_IB_LOCAL
;
760 u8 extra_bytes
= hfi1_get_16b_padding((qp
->s_hdrwords
<< 2),
761 ps
->s_txreq
->s_cur_size
);
762 u32 nwords
= SIZE_OF_CRC
+ ((ps
->s_txreq
->s_cur_size
+
763 extra_bytes
+ SIZE_OF_LT
) >> 2);
766 if (unlikely(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
) &&
767 hfi1_check_mcast(rdma_ah_get_dlid(&qp
->remote_ah_attr
))) {
769 struct ib_global_route
*grd
=
770 rdma_ah_retrieve_grh(&qp
->remote_ah_attr
);
774 * Ensure OPA GIDs are transformed to IB gids
775 * before creating the GRH.
777 if (grd
->sgid_index
== OPA_GID_INDEX
)
779 grh
= &ps
->s_txreq
->phdr
.hdr
.opah
.u
.l
.grh
;
780 l4
= OPA_16B_L4_IB_GLOBAL
;
781 hdrwords
= qp
->s_hdrwords
- 4;
782 qp
->s_hdrwords
+= hfi1_make_grh(ibp
, grh
, grd
,
787 if (qp
->s_mig_state
== IB_MIG_MIGRATED
)
788 bth1
|= OPA_BTH_MIG_REQ
;
795 qp
->s_flags
&= ~RVT_S_AHG_VALID
;
798 bth0
|= extra_bytes
<< 20;
799 if (qp
->s_flags
& RVT_S_ECN
) {
800 qp
->s_flags
&= ~RVT_S_ECN
;
801 /* we recently received a FECN, so return a BECN */
804 hfi1_make_ruc_bth(qp
, ohdr
, bth0
, bth1
, bth2
);
807 slid
= be32_to_cpu(OPA_LID_PERMISSIVE
);
810 (rdma_ah_get_path_bits(&qp
->remote_ah_attr
) &
811 ((1 << ppd
->lmc
) - 1));
813 hfi1_make_16b_hdr(&ps
->s_txreq
->phdr
.hdr
.opah
,
815 opa_get_lid(rdma_ah_get_dlid(&qp
->remote_ah_attr
),
817 (qp
->s_hdrwords
+ nwords
) >> 1,
818 pkey
, becn
, 0, l4
, priv
->s_sc
);
821 static inline void hfi1_make_ruc_header_9B(struct rvt_qp
*qp
,
822 struct ib_other_headers
*ohdr
,
823 u32 bth0
, u32 bth2
, int middle
,
824 struct hfi1_pkt_state
*ps
)
826 struct hfi1_qp_priv
*priv
= qp
->priv
;
827 struct hfi1_ibport
*ibp
= ps
->ibp
;
829 u16 pkey
= hfi1_get_pkey(ibp
, qp
->s_pkey_index
);
830 u16 lrh0
= HFI1_LRH_BTH
;
831 u8 extra_bytes
= -ps
->s_txreq
->s_cur_size
& 3;
832 u32 nwords
= SIZE_OF_CRC
+ ((ps
->s_txreq
->s_cur_size
+
835 if (unlikely(rdma_ah_get_ah_flags(&qp
->remote_ah_attr
) & IB_AH_GRH
)) {
836 struct ib_grh
*grh
= &ps
->s_txreq
->phdr
.hdr
.ibh
.u
.l
.grh
;
837 int hdrwords
= qp
->s_hdrwords
- 2;
841 hfi1_make_grh(ibp
, grh
,
842 rdma_ah_read_grh(&qp
->remote_ah_attr
),
846 lrh0
|= (priv
->s_sc
& 0xf) << 12 |
847 (rdma_ah_get_sl(&qp
->remote_ah_attr
) & 0xf) << 4;
849 if (qp
->s_mig_state
== IB_MIG_MIGRATED
)
850 bth0
|= IB_BTH_MIG_REQ
;
857 qp
->s_flags
&= ~RVT_S_AHG_VALID
;
860 bth0
|= extra_bytes
<< 20;
861 if (qp
->s_flags
& RVT_S_ECN
) {
862 qp
->s_flags
&= ~RVT_S_ECN
;
863 /* we recently received a FECN, so return a BECN */
864 bth1
|= (IB_BECN_MASK
<< IB_BECN_SHIFT
);
866 hfi1_make_ruc_bth(qp
, ohdr
, bth0
, bth1
, bth2
);
867 hfi1_make_ib_hdr(&ps
->s_txreq
->phdr
.hdr
.ibh
,
869 qp
->s_hdrwords
+ nwords
,
870 opa_get_lid(rdma_ah_get_dlid(&qp
->remote_ah_attr
), 9B
),
871 ppd_from_ibp(ibp
)->lid
|
872 rdma_ah_get_path_bits(&qp
->remote_ah_attr
));
875 typedef void (*hfi1_make_ruc_hdr
)(struct rvt_qp
*qp
,
876 struct ib_other_headers
*ohdr
,
877 u32 bth0
, u32 bth2
, int middle
,
878 struct hfi1_pkt_state
*ps
);
880 /* We support only two types - 9B and 16B for now */
881 static const hfi1_make_ruc_hdr hfi1_ruc_header_tbl
[2] = {
882 [HFI1_PKT_TYPE_9B
] = &hfi1_make_ruc_header_9B
,
883 [HFI1_PKT_TYPE_16B
] = &hfi1_make_ruc_header_16B
886 void hfi1_make_ruc_header(struct rvt_qp
*qp
, struct ib_other_headers
*ohdr
,
887 u32 bth0
, u32 bth2
, int middle
,
888 struct hfi1_pkt_state
*ps
)
890 struct hfi1_qp_priv
*priv
= qp
->priv
;
893 * reset s_ahg/AHG fields
895 * This insures that the ahgentry/ahgcount
896 * are at a non-AHG default to protect
897 * build_verbs_tx_desc() from using
900 * build_ahg() will modify as appropriate
901 * to use the AHG feature.
903 priv
->s_ahg
->tx_flags
= 0;
904 priv
->s_ahg
->ahgcount
= 0;
905 priv
->s_ahg
->ahgidx
= 0;
907 /* Make the appropriate header */
908 hfi1_ruc_header_tbl
[priv
->hdr_type
](qp
, ohdr
, bth0
, bth2
, middle
, ps
);
911 /* when sending, force a reschedule every one of these periods */
912 #define SEND_RESCHED_TIMEOUT (5 * HZ) /* 5s in jiffies */
915 * schedule_send_yield - test for a yield required for QP send engine
916 * @timeout: Final time for timeout slice for jiffies
917 * @qp: a pointer to QP
918 * @ps: a pointer to a structure with commonly lookup values for
919 * the the send engine progress
921 * This routine checks if the time slice for the QP has expired
922 * for RC QPs, if so an additional work entry is queued. At this
923 * point, other QPs have an opportunity to be scheduled. It
924 * returns true if a yield is required, otherwise, false
927 static bool schedule_send_yield(struct rvt_qp
*qp
,
928 struct hfi1_pkt_state
*ps
)
930 ps
->pkts_sent
= true;
932 if (unlikely(time_after(jiffies
, ps
->timeout
))) {
933 if (!ps
->in_thread
||
934 workqueue_congested(ps
->cpu
, ps
->ppd
->hfi1_wq
)) {
935 spin_lock_irqsave(&qp
->s_lock
, ps
->flags
);
936 qp
->s_flags
&= ~RVT_S_BUSY
;
937 hfi1_schedule_send(qp
);
938 spin_unlock_irqrestore(&qp
->s_lock
, ps
->flags
);
939 this_cpu_inc(*ps
->ppd
->dd
->send_schedule
);
940 trace_hfi1_rc_expired_time_slice(qp
, true);
945 this_cpu_inc(*ps
->ppd
->dd
->send_schedule
);
946 ps
->timeout
= jiffies
+ ps
->timeout_int
;
949 trace_hfi1_rc_expired_time_slice(qp
, false);
953 void hfi1_do_send_from_rvt(struct rvt_qp
*qp
)
955 hfi1_do_send(qp
, false);
958 void _hfi1_do_send(struct work_struct
*work
)
960 struct iowait
*wait
= container_of(work
, struct iowait
, iowork
);
961 struct rvt_qp
*qp
= iowait_to_qp(wait
);
963 hfi1_do_send(qp
, true);
967 * hfi1_do_send - perform a send on a QP
968 * @work: contains a pointer to the QP
969 * @in_thread: true if in a workqueue thread
971 * Process entries in the send work queue until credit or queue is
972 * exhausted. Only allow one CPU to send a packet per QP.
973 * Otherwise, two threads could send packets out of order.
975 void hfi1_do_send(struct rvt_qp
*qp
, bool in_thread
)
977 struct hfi1_pkt_state ps
;
978 struct hfi1_qp_priv
*priv
= qp
->priv
;
979 int (*make_req
)(struct rvt_qp
*qp
, struct hfi1_pkt_state
*ps
);
981 ps
.dev
= to_idev(qp
->ibqp
.device
);
982 ps
.ibp
= to_iport(qp
->ibqp
.device
, qp
->port_num
);
983 ps
.ppd
= ppd_from_ibp(ps
.ibp
);
984 ps
.in_thread
= in_thread
;
986 trace_hfi1_rc_do_send(qp
, in_thread
);
988 switch (qp
->ibqp
.qp_type
) {
990 if (!loopback
&& ((rdma_ah_get_dlid(&qp
->remote_ah_attr
) &
991 ~((1 << ps
.ppd
->lmc
) - 1)) ==
996 make_req
= hfi1_make_rc_req
;
997 ps
.timeout_int
= qp
->timeout_jiffies
;
1000 if (!loopback
&& ((rdma_ah_get_dlid(&qp
->remote_ah_attr
) &
1001 ~((1 << ps
.ppd
->lmc
) - 1)) ==
1006 make_req
= hfi1_make_uc_req
;
1007 ps
.timeout_int
= SEND_RESCHED_TIMEOUT
;
1010 make_req
= hfi1_make_ud_req
;
1011 ps
.timeout_int
= SEND_RESCHED_TIMEOUT
;
1014 spin_lock_irqsave(&qp
->s_lock
, ps
.flags
);
1016 /* Return if we are already busy processing a work request. */
1017 if (!hfi1_send_ok(qp
)) {
1018 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
1022 qp
->s_flags
|= RVT_S_BUSY
;
1024 ps
.timeout_int
= ps
.timeout_int
/ 8;
1025 ps
.timeout
= jiffies
+ ps
.timeout_int
;
1026 ps
.cpu
= priv
->s_sde
? priv
->s_sde
->cpu
:
1027 cpumask_first(cpumask_of_node(ps
.ppd
->dd
->node
));
1028 ps
.pkts_sent
= false;
1030 /* insure a pre-built packet is handled */
1031 ps
.s_txreq
= get_waiting_verbs_txreq(qp
);
1033 /* Check for a constructed packet to be sent. */
1034 if (qp
->s_hdrwords
!= 0) {
1035 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
1037 * If the packet cannot be sent now, return and
1038 * the send engine will be woken up later.
1040 if (hfi1_verbs_send(qp
, &ps
))
1042 /* Record that s_ahg is empty. */
1044 /* allow other tasks to run */
1045 if (schedule_send_yield(qp
, &ps
))
1048 spin_lock_irqsave(&qp
->s_lock
, ps
.flags
);
1050 } while (make_req(qp
, &ps
));
1051 iowait_starve_clear(ps
.pkts_sent
, &priv
->s_iowait
);
1052 spin_unlock_irqrestore(&qp
->s_lock
, ps
.flags
);
1056 * This should be called with s_lock held.
1058 void hfi1_send_complete(struct rvt_qp
*qp
, struct rvt_swqe
*wqe
,
1059 enum ib_wc_status status
)
1063 if (!(ib_rvt_state_ops
[qp
->state
] & RVT_PROCESS_OR_FLUSH_SEND
))
1068 trace_hfi1_qp_send_completion(qp
, wqe
, last
);
1069 if (++last
>= qp
->s_size
)
1071 trace_hfi1_qp_send_completion(qp
, wqe
, last
);
1073 /* See post_send() */
1076 if (qp
->ibqp
.qp_type
== IB_QPT_UD
||
1077 qp
->ibqp
.qp_type
== IB_QPT_SMI
||
1078 qp
->ibqp
.qp_type
== IB_QPT_GSI
)
1079 atomic_dec(&ibah_to_rvtah(wqe
->ud_wr
.ah
)->refcount
);
1081 rvt_qp_swqe_complete(qp
,
1083 ib_hfi1_wc_opcode
[wqe
->wr
.opcode
],
1086 if (qp
->s_acked
== old_last
)
1088 if (qp
->s_cur
== old_last
)
1090 if (qp
->s_tail
== old_last
)
1092 if (qp
->state
== IB_QPS_SQD
&& last
== qp
->s_cur
)