2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/skbuff.h>
38 #include "rxe_queue.h"
54 RESPST_DUPLICATE_REQUEST
,
55 RESPST_ERR_MALFORMED_WQE
,
56 RESPST_ERR_UNSUPPORTED_OPCODE
,
57 RESPST_ERR_MISALIGNED_ATOMIC
,
58 RESPST_ERR_PSN_OUT_OF_SEQ
,
59 RESPST_ERR_MISSING_OPCODE_FIRST
,
60 RESPST_ERR_MISSING_OPCODE_LAST_C
,
61 RESPST_ERR_MISSING_OPCODE_LAST_D1E
,
62 RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
,
64 RESPST_ERR_RKEY_VIOLATION
,
66 RESPST_ERR_CQ_OVERFLOW
,
73 static char *resp_state_name
[] = {
74 [RESPST_NONE
] = "NONE",
75 [RESPST_GET_REQ
] = "GET_REQ",
76 [RESPST_CHK_PSN
] = "CHK_PSN",
77 [RESPST_CHK_OP_SEQ
] = "CHK_OP_SEQ",
78 [RESPST_CHK_OP_VALID
] = "CHK_OP_VALID",
79 [RESPST_CHK_RESOURCE
] = "CHK_RESOURCE",
80 [RESPST_CHK_LENGTH
] = "CHK_LENGTH",
81 [RESPST_CHK_RKEY
] = "CHK_RKEY",
82 [RESPST_EXECUTE
] = "EXECUTE",
83 [RESPST_READ_REPLY
] = "READ_REPLY",
84 [RESPST_COMPLETE
] = "COMPLETE",
85 [RESPST_ACKNOWLEDGE
] = "ACKNOWLEDGE",
86 [RESPST_CLEANUP
] = "CLEANUP",
87 [RESPST_DUPLICATE_REQUEST
] = "DUPLICATE_REQUEST",
88 [RESPST_ERR_MALFORMED_WQE
] = "ERR_MALFORMED_WQE",
89 [RESPST_ERR_UNSUPPORTED_OPCODE
] = "ERR_UNSUPPORTED_OPCODE",
90 [RESPST_ERR_MISALIGNED_ATOMIC
] = "ERR_MISALIGNED_ATOMIC",
91 [RESPST_ERR_PSN_OUT_OF_SEQ
] = "ERR_PSN_OUT_OF_SEQ",
92 [RESPST_ERR_MISSING_OPCODE_FIRST
] = "ERR_MISSING_OPCODE_FIRST",
93 [RESPST_ERR_MISSING_OPCODE_LAST_C
] = "ERR_MISSING_OPCODE_LAST_C",
94 [RESPST_ERR_MISSING_OPCODE_LAST_D1E
] = "ERR_MISSING_OPCODE_LAST_D1E",
95 [RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
] = "ERR_TOO_MANY_RDMA_ATM_REQ",
96 [RESPST_ERR_RNR
] = "ERR_RNR",
97 [RESPST_ERR_RKEY_VIOLATION
] = "ERR_RKEY_VIOLATION",
98 [RESPST_ERR_LENGTH
] = "ERR_LENGTH",
99 [RESPST_ERR_CQ_OVERFLOW
] = "ERR_CQ_OVERFLOW",
100 [RESPST_ERROR
] = "ERROR",
101 [RESPST_RESET
] = "RESET",
102 [RESPST_DONE
] = "DONE",
103 [RESPST_EXIT
] = "EXIT",
106 /* rxe_recv calls here to add a request packet to the input queue */
107 void rxe_resp_queue_pkt(struct rxe_qp
*qp
, struct sk_buff
*skb
)
110 struct rxe_pkt_info
*pkt
= SKB_TO_PKT(skb
);
112 skb_queue_tail(&qp
->req_pkts
, skb
);
114 must_sched
= (pkt
->opcode
== IB_OPCODE_RC_RDMA_READ_REQUEST
) ||
115 (skb_queue_len(&qp
->req_pkts
) > 1);
117 rxe_run_task(&qp
->resp
.task
, must_sched
);
120 static inline enum resp_states
get_req(struct rxe_qp
*qp
,
121 struct rxe_pkt_info
**pkt_p
)
125 if (qp
->resp
.state
== QP_STATE_ERROR
) {
126 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
131 /* go drain recv wr queue */
132 return RESPST_CHK_RESOURCE
;
135 skb
= skb_peek(&qp
->req_pkts
);
139 *pkt_p
= SKB_TO_PKT(skb
);
141 return (qp
->resp
.res
) ? RESPST_READ_REPLY
: RESPST_CHK_PSN
;
144 static enum resp_states
check_psn(struct rxe_qp
*qp
,
145 struct rxe_pkt_info
*pkt
)
147 int diff
= psn_compare(pkt
->psn
, qp
->resp
.psn
);
148 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
150 switch (qp_type(qp
)) {
153 if (qp
->resp
.sent_psn_nak
)
154 return RESPST_CLEANUP
;
156 qp
->resp
.sent_psn_nak
= 1;
157 rxe_counter_inc(rxe
, RXE_CNT_OUT_OF_SEQ_REQ
);
158 return RESPST_ERR_PSN_OUT_OF_SEQ
;
160 } else if (diff
< 0) {
161 rxe_counter_inc(rxe
, RXE_CNT_DUP_REQ
);
162 return RESPST_DUPLICATE_REQUEST
;
165 if (qp
->resp
.sent_psn_nak
)
166 qp
->resp
.sent_psn_nak
= 0;
171 if (qp
->resp
.drop_msg
|| diff
!= 0) {
172 if (pkt
->mask
& RXE_START_MASK
) {
173 qp
->resp
.drop_msg
= 0;
174 return RESPST_CHK_OP_SEQ
;
177 qp
->resp
.drop_msg
= 1;
178 return RESPST_CLEANUP
;
185 return RESPST_CHK_OP_SEQ
;
188 static enum resp_states
check_op_seq(struct rxe_qp
*qp
,
189 struct rxe_pkt_info
*pkt
)
191 switch (qp_type(qp
)) {
193 switch (qp
->resp
.opcode
) {
194 case IB_OPCODE_RC_SEND_FIRST
:
195 case IB_OPCODE_RC_SEND_MIDDLE
:
196 switch (pkt
->opcode
) {
197 case IB_OPCODE_RC_SEND_MIDDLE
:
198 case IB_OPCODE_RC_SEND_LAST
:
199 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
200 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
201 return RESPST_CHK_OP_VALID
;
203 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
206 case IB_OPCODE_RC_RDMA_WRITE_FIRST
:
207 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
208 switch (pkt
->opcode
) {
209 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
210 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
211 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
212 return RESPST_CHK_OP_VALID
;
214 return RESPST_ERR_MISSING_OPCODE_LAST_C
;
218 switch (pkt
->opcode
) {
219 case IB_OPCODE_RC_SEND_MIDDLE
:
220 case IB_OPCODE_RC_SEND_LAST
:
221 case IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE
:
222 case IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE
:
223 case IB_OPCODE_RC_RDMA_WRITE_MIDDLE
:
224 case IB_OPCODE_RC_RDMA_WRITE_LAST
:
225 case IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
226 return RESPST_ERR_MISSING_OPCODE_FIRST
;
228 return RESPST_CHK_OP_VALID
;
234 switch (qp
->resp
.opcode
) {
235 case IB_OPCODE_UC_SEND_FIRST
:
236 case IB_OPCODE_UC_SEND_MIDDLE
:
237 switch (pkt
->opcode
) {
238 case IB_OPCODE_UC_SEND_MIDDLE
:
239 case IB_OPCODE_UC_SEND_LAST
:
240 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
241 return RESPST_CHK_OP_VALID
;
243 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
246 case IB_OPCODE_UC_RDMA_WRITE_FIRST
:
247 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
248 switch (pkt
->opcode
) {
249 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
250 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
251 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
252 return RESPST_CHK_OP_VALID
;
254 return RESPST_ERR_MISSING_OPCODE_LAST_D1E
;
258 switch (pkt
->opcode
) {
259 case IB_OPCODE_UC_SEND_MIDDLE
:
260 case IB_OPCODE_UC_SEND_LAST
:
261 case IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE
:
262 case IB_OPCODE_UC_RDMA_WRITE_MIDDLE
:
263 case IB_OPCODE_UC_RDMA_WRITE_LAST
:
264 case IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE
:
265 qp
->resp
.drop_msg
= 1;
266 return RESPST_CLEANUP
;
268 return RESPST_CHK_OP_VALID
;
274 return RESPST_CHK_OP_VALID
;
278 static enum resp_states
check_op_valid(struct rxe_qp
*qp
,
279 struct rxe_pkt_info
*pkt
)
281 switch (qp_type(qp
)) {
283 if (((pkt
->mask
& RXE_READ_MASK
) &&
284 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_READ
)) ||
285 ((pkt
->mask
& RXE_WRITE_MASK
) &&
286 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) ||
287 ((pkt
->mask
& RXE_ATOMIC_MASK
) &&
288 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_ATOMIC
))) {
289 return RESPST_ERR_UNSUPPORTED_OPCODE
;
295 if ((pkt
->mask
& RXE_WRITE_MASK
) &&
296 !(qp
->attr
.qp_access_flags
& IB_ACCESS_REMOTE_WRITE
)) {
297 qp
->resp
.drop_msg
= 1;
298 return RESPST_CLEANUP
;
313 return RESPST_CHK_RESOURCE
;
316 static enum resp_states
get_srq_wqe(struct rxe_qp
*qp
)
318 struct rxe_srq
*srq
= qp
->srq
;
319 struct rxe_queue
*q
= srq
->rq
.queue
;
320 struct rxe_recv_wqe
*wqe
;
324 return RESPST_ERR_RNR
;
326 spin_lock_bh(&srq
->rq
.consumer_lock
);
330 spin_unlock_bh(&srq
->rq
.consumer_lock
);
331 return RESPST_ERR_RNR
;
334 /* note kernel and user space recv wqes have same size */
335 memcpy(&qp
->resp
.srq_wqe
, wqe
, sizeof(qp
->resp
.srq_wqe
));
337 qp
->resp
.wqe
= &qp
->resp
.srq_wqe
.wqe
;
340 if (srq
->limit
&& srq
->ibsrq
.event_handler
&&
341 (queue_count(q
) < srq
->limit
)) {
346 spin_unlock_bh(&srq
->rq
.consumer_lock
);
347 return RESPST_CHK_LENGTH
;
350 spin_unlock_bh(&srq
->rq
.consumer_lock
);
351 ev
.device
= qp
->ibqp
.device
;
352 ev
.element
.srq
= qp
->ibqp
.srq
;
353 ev
.event
= IB_EVENT_SRQ_LIMIT_REACHED
;
354 srq
->ibsrq
.event_handler(&ev
, srq
->ibsrq
.srq_context
);
355 return RESPST_CHK_LENGTH
;
358 static enum resp_states
check_resource(struct rxe_qp
*qp
,
359 struct rxe_pkt_info
*pkt
)
361 struct rxe_srq
*srq
= qp
->srq
;
363 if (qp
->resp
.state
== QP_STATE_ERROR
) {
365 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
366 return RESPST_COMPLETE
;
368 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
370 qp
->resp
.status
= IB_WC_WR_FLUSH_ERR
;
371 return RESPST_COMPLETE
;
380 if (pkt
->mask
& RXE_READ_OR_ATOMIC
) {
381 /* it is the requesters job to not send
382 * too many read/atomic ops, we just
383 * recycle the responder resource queue
385 if (likely(qp
->attr
.max_dest_rd_atomic
> 0))
386 return RESPST_CHK_LENGTH
;
388 return RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
;
391 if (pkt
->mask
& RXE_RWR_MASK
) {
393 return get_srq_wqe(qp
);
395 qp
->resp
.wqe
= queue_head(qp
->rq
.queue
);
396 return (qp
->resp
.wqe
) ? RESPST_CHK_LENGTH
: RESPST_ERR_RNR
;
399 return RESPST_CHK_LENGTH
;
402 static enum resp_states
check_length(struct rxe_qp
*qp
,
403 struct rxe_pkt_info
*pkt
)
405 switch (qp_type(qp
)) {
407 return RESPST_CHK_RKEY
;
410 return RESPST_CHK_RKEY
;
413 return RESPST_CHK_RKEY
;
417 static enum resp_states
check_rkey(struct rxe_qp
*qp
,
418 struct rxe_pkt_info
*pkt
)
420 struct rxe_mem
*mem
= NULL
;
426 enum resp_states state
;
429 if (pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_MASK
)) {
430 if (pkt
->mask
& RXE_RETH_MASK
) {
431 qp
->resp
.va
= reth_va(pkt
);
432 qp
->resp
.rkey
= reth_rkey(pkt
);
433 qp
->resp
.resid
= reth_len(pkt
);
435 access
= (pkt
->mask
& RXE_READ_MASK
) ? IB_ACCESS_REMOTE_READ
436 : IB_ACCESS_REMOTE_WRITE
;
437 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
438 qp
->resp
.va
= atmeth_va(pkt
);
439 qp
->resp
.rkey
= atmeth_rkey(pkt
);
440 qp
->resp
.resid
= sizeof(u64
);
441 access
= IB_ACCESS_REMOTE_ATOMIC
;
443 return RESPST_EXECUTE
;
446 /* A zero-byte op is not required to set an addr or rkey. */
447 if ((pkt
->mask
& (RXE_READ_MASK
| RXE_WRITE_OR_SEND
)) &&
448 (pkt
->mask
& RXE_RETH_MASK
) &&
449 reth_len(pkt
) == 0) {
450 return RESPST_EXECUTE
;
454 rkey
= qp
->resp
.rkey
;
455 resid
= qp
->resp
.resid
;
456 pktlen
= payload_size(pkt
);
458 mem
= lookup_mem(qp
->pd
, access
, rkey
, lookup_remote
);
460 state
= RESPST_ERR_RKEY_VIOLATION
;
464 if (unlikely(mem
->state
== RXE_MEM_STATE_FREE
)) {
465 state
= RESPST_ERR_RKEY_VIOLATION
;
469 if (mem_check_range(mem
, va
, resid
)) {
470 state
= RESPST_ERR_RKEY_VIOLATION
;
474 if (pkt
->mask
& RXE_WRITE_MASK
) {
476 if (pktlen
!= mtu
|| bth_pad(pkt
)) {
477 state
= RESPST_ERR_LENGTH
;
481 if (pktlen
!= resid
) {
482 state
= RESPST_ERR_LENGTH
;
485 if ((bth_pad(pkt
) != (0x3 & (-resid
)))) {
486 /* This case may not be exactly that
487 * but nothing else fits.
489 state
= RESPST_ERR_LENGTH
;
495 WARN_ON_ONCE(qp
->resp
.mr
);
498 return RESPST_EXECUTE
;
506 static enum resp_states
send_data_in(struct rxe_qp
*qp
, void *data_addr
,
511 err
= copy_data(qp
->pd
, IB_ACCESS_LOCAL_WRITE
, &qp
->resp
.wqe
->dma
,
512 data_addr
, data_len
, to_mem_obj
, NULL
);
514 return (err
== -ENOSPC
) ? RESPST_ERR_LENGTH
515 : RESPST_ERR_MALFORMED_WQE
;
520 static enum resp_states
write_data_in(struct rxe_qp
*qp
,
521 struct rxe_pkt_info
*pkt
)
523 enum resp_states rc
= RESPST_NONE
;
525 int data_len
= payload_size(pkt
);
527 err
= rxe_mem_copy(qp
->resp
.mr
, qp
->resp
.va
, payload_addr(pkt
),
528 data_len
, to_mem_obj
, NULL
);
530 rc
= RESPST_ERR_RKEY_VIOLATION
;
534 qp
->resp
.va
+= data_len
;
535 qp
->resp
.resid
-= data_len
;
541 /* Guarantee atomicity of atomic operations at the machine level. */
542 static DEFINE_SPINLOCK(atomic_ops_lock
);
544 static enum resp_states
process_atomic(struct rxe_qp
*qp
,
545 struct rxe_pkt_info
*pkt
)
547 u64 iova
= atmeth_va(pkt
);
549 enum resp_states ret
;
550 struct rxe_mem
*mr
= qp
->resp
.mr
;
552 if (mr
->state
!= RXE_MEM_STATE_VALID
) {
553 ret
= RESPST_ERR_RKEY_VIOLATION
;
557 vaddr
= iova_to_vaddr(mr
, iova
, sizeof(u64
));
559 /* check vaddr is 8 bytes aligned. */
560 if (!vaddr
|| (uintptr_t)vaddr
& 7) {
561 ret
= RESPST_ERR_MISALIGNED_ATOMIC
;
565 spin_lock_bh(&atomic_ops_lock
);
567 qp
->resp
.atomic_orig
= *vaddr
;
569 if (pkt
->opcode
== IB_OPCODE_RC_COMPARE_SWAP
||
570 pkt
->opcode
== IB_OPCODE_RD_COMPARE_SWAP
) {
571 if (*vaddr
== atmeth_comp(pkt
))
572 *vaddr
= atmeth_swap_add(pkt
);
574 *vaddr
+= atmeth_swap_add(pkt
);
577 spin_unlock_bh(&atomic_ops_lock
);
584 static struct sk_buff
*prepare_ack_packet(struct rxe_qp
*qp
,
585 struct rxe_pkt_info
*pkt
,
586 struct rxe_pkt_info
*ack
,
593 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
604 pad
= (-payload
) & 0x3;
605 paylen
= rxe_opcode
[opcode
].length
+ payload
+ pad
+ RXE_ICRC_SIZE
;
607 skb
= rxe_init_packet(rxe
, &qp
->pri_av
, paylen
, ack
);
612 ack
->opcode
= opcode
;
613 ack
->mask
= rxe_opcode
[opcode
].mask
;
614 ack
->offset
= pkt
->offset
;
615 ack
->paylen
= paylen
;
617 /* fill in bth using the request packet headers */
618 memcpy(ack
->hdr
, pkt
->hdr
, pkt
->offset
+ RXE_BTH_BYTES
);
620 bth_set_opcode(ack
, opcode
);
621 bth_set_qpn(ack
, qp
->attr
.dest_qp_num
);
622 bth_set_pad(ack
, pad
);
624 bth_set_psn(ack
, psn
);
628 if (ack
->mask
& RXE_AETH_MASK
) {
629 aeth_set_syn(ack
, syndrome
);
630 aeth_set_msn(ack
, qp
->resp
.msn
);
633 if (ack
->mask
& RXE_ATMACK_MASK
)
634 atmack_set_orig(ack
, qp
->resp
.atomic_orig
);
636 err
= rxe_prepare(ack
, skb
, &crc
);
643 /* CRC computation will be continued by the caller */
646 p
= payload_addr(ack
) + payload
+ bth_pad(ack
);
653 /* RDMA read response. If res is not NULL, then we have a current RDMA request
654 * being processed or replayed.
656 static enum resp_states
read_reply(struct rxe_qp
*qp
,
657 struct rxe_pkt_info
*req_pkt
)
659 struct rxe_pkt_info ack_pkt
;
662 enum resp_states state
;
666 struct resp_res
*res
= qp
->resp
.res
;
671 /* This is the first time we process that request. Get a
674 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
676 free_rd_atomic_resource(qp
, res
);
677 rxe_advance_resp_resource(qp
);
679 res
->type
= RXE_READ_MASK
;
682 res
->read
.va
= qp
->resp
.va
;
683 res
->read
.va_org
= qp
->resp
.va
;
685 res
->first_psn
= req_pkt
->psn
;
687 if (reth_len(req_pkt
)) {
688 res
->last_psn
= (req_pkt
->psn
+
689 (reth_len(req_pkt
) + mtu
- 1) /
690 mtu
- 1) & BTH_PSN_MASK
;
692 res
->last_psn
= res
->first_psn
;
694 res
->cur_psn
= req_pkt
->psn
;
696 res
->read
.resid
= qp
->resp
.resid
;
697 res
->read
.length
= qp
->resp
.resid
;
698 res
->read
.rkey
= qp
->resp
.rkey
;
700 /* note res inherits the reference to mr from qp */
701 res
->read
.mr
= qp
->resp
.mr
;
705 res
->state
= rdatm_res_state_new
;
708 if (res
->state
== rdatm_res_state_new
) {
709 if (res
->read
.resid
<= mtu
)
710 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY
;
712 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST
;
714 if (res
->read
.resid
> mtu
)
715 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE
;
717 opcode
= IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST
;
720 res
->state
= rdatm_res_state_next
;
722 payload
= min_t(int, res
->read
.resid
, mtu
);
724 skb
= prepare_ack_packet(qp
, req_pkt
, &ack_pkt
, opcode
, payload
,
725 res
->cur_psn
, AETH_ACK_UNLIMITED
, &icrc
);
727 return RESPST_ERR_RNR
;
729 err
= rxe_mem_copy(res
->read
.mr
, res
->read
.va
, payload_addr(&ack_pkt
),
730 payload
, from_mem_obj
, &icrc
);
732 pr_err("Failed copying memory\n");
734 p
= payload_addr(&ack_pkt
) + payload
+ bth_pad(&ack_pkt
);
737 err
= rxe_xmit_packet(qp
, &ack_pkt
, skb
);
739 pr_err("Failed sending RDMA reply.\n");
740 return RESPST_ERR_RNR
;
743 res
->read
.va
+= payload
;
744 res
->read
.resid
-= payload
;
745 res
->cur_psn
= (res
->cur_psn
+ 1) & BTH_PSN_MASK
;
747 if (res
->read
.resid
> 0) {
752 qp
->resp
.opcode
= -1;
753 if (psn_compare(res
->cur_psn
, qp
->resp
.psn
) >= 0)
754 qp
->resp
.psn
= res
->cur_psn
;
755 state
= RESPST_CLEANUP
;
761 static void build_rdma_network_hdr(union rdma_network_hdr
*hdr
,
762 struct rxe_pkt_info
*pkt
)
764 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
766 memset(hdr
, 0, sizeof(*hdr
));
767 if (skb
->protocol
== htons(ETH_P_IP
))
768 memcpy(&hdr
->roce4grh
, ip_hdr(skb
), sizeof(hdr
->roce4grh
));
769 else if (skb
->protocol
== htons(ETH_P_IPV6
))
770 memcpy(&hdr
->ibgrh
, ipv6_hdr(skb
), sizeof(hdr
->ibgrh
));
773 /* Executes a new request. A retried request never reach that function (send
774 * and writes are discarded, and reads and atomics are retried elsewhere.
776 static enum resp_states
execute(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
)
778 enum resp_states err
;
780 if (pkt
->mask
& RXE_SEND_MASK
) {
781 if (qp_type(qp
) == IB_QPT_UD
||
782 qp_type(qp
) == IB_QPT_SMI
||
783 qp_type(qp
) == IB_QPT_GSI
) {
784 union rdma_network_hdr hdr
;
786 build_rdma_network_hdr(&hdr
, pkt
);
788 err
= send_data_in(qp
, &hdr
, sizeof(hdr
));
792 err
= send_data_in(qp
, payload_addr(pkt
), payload_size(pkt
));
795 } else if (pkt
->mask
& RXE_WRITE_MASK
) {
796 err
= write_data_in(qp
, pkt
);
799 } else if (pkt
->mask
& RXE_READ_MASK
) {
800 /* For RDMA Read we can increment the msn now. See C9-148. */
802 return RESPST_READ_REPLY
;
803 } else if (pkt
->mask
& RXE_ATOMIC_MASK
) {
804 err
= process_atomic(qp
, pkt
);
812 /* next expected psn, read handles this separately */
813 qp
->resp
.psn
= (pkt
->psn
+ 1) & BTH_PSN_MASK
;
814 qp
->resp
.ack_psn
= qp
->resp
.psn
;
816 qp
->resp
.opcode
= pkt
->opcode
;
817 qp
->resp
.status
= IB_WC_SUCCESS
;
819 if (pkt
->mask
& RXE_COMP_MASK
) {
820 /* We successfully processed this new request. */
822 return RESPST_COMPLETE
;
823 } else if (qp_type(qp
) == IB_QPT_RC
)
824 return RESPST_ACKNOWLEDGE
;
826 return RESPST_CLEANUP
;
829 static enum resp_states
do_complete(struct rxe_qp
*qp
,
830 struct rxe_pkt_info
*pkt
)
833 struct ib_wc
*wc
= &cqe
.ibwc
;
834 struct ib_uverbs_wc
*uwc
= &cqe
.uibwc
;
835 struct rxe_recv_wqe
*wqe
= qp
->resp
.wqe
;
836 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
839 return RESPST_CLEANUP
;
841 memset(&cqe
, 0, sizeof(cqe
));
843 if (qp
->rcq
->is_user
) {
844 uwc
->status
= qp
->resp
.status
;
845 uwc
->qp_num
= qp
->ibqp
.qp_num
;
846 uwc
->wr_id
= wqe
->wr_id
;
848 wc
->status
= qp
->resp
.status
;
850 wc
->wr_id
= wqe
->wr_id
;
853 if (wc
->status
== IB_WC_SUCCESS
) {
854 rxe_counter_inc(rxe
, RXE_CNT_RDMA_RECV
);
855 wc
->opcode
= (pkt
->mask
& RXE_IMMDT_MASK
&&
856 pkt
->mask
& RXE_WRITE_MASK
) ?
857 IB_WC_RECV_RDMA_WITH_IMM
: IB_WC_RECV
;
859 wc
->byte_len
= wqe
->dma
.length
- wqe
->dma
.resid
;
861 /* fields after byte_len are different between kernel and user
864 if (qp
->rcq
->is_user
) {
865 uwc
->wc_flags
= IB_WC_GRH
;
867 if (pkt
->mask
& RXE_IMMDT_MASK
) {
868 uwc
->wc_flags
|= IB_WC_WITH_IMM
;
869 uwc
->ex
.imm_data
= immdt_imm(pkt
);
872 if (pkt
->mask
& RXE_IETH_MASK
) {
873 uwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
874 uwc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
877 uwc
->qp_num
= qp
->ibqp
.qp_num
;
879 if (pkt
->mask
& RXE_DETH_MASK
)
880 uwc
->src_qp
= deth_sqp(pkt
);
882 uwc
->port_num
= qp
->attr
.port_num
;
884 struct sk_buff
*skb
= PKT_TO_SKB(pkt
);
886 wc
->wc_flags
= IB_WC_GRH
| IB_WC_WITH_NETWORK_HDR_TYPE
;
887 if (skb
->protocol
== htons(ETH_P_IP
))
888 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
890 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
892 if (is_vlan_dev(skb
->dev
)) {
893 wc
->wc_flags
|= IB_WC_WITH_VLAN
;
894 wc
->vlan_id
= vlan_dev_vlan_id(skb
->dev
);
897 if (pkt
->mask
& RXE_IMMDT_MASK
) {
898 wc
->wc_flags
|= IB_WC_WITH_IMM
;
899 wc
->ex
.imm_data
= immdt_imm(pkt
);
902 if (pkt
->mask
& RXE_IETH_MASK
) {
905 wc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
906 wc
->ex
.invalidate_rkey
= ieth_rkey(pkt
);
908 rmr
= rxe_pool_get_index(&rxe
->mr_pool
,
909 wc
->ex
.invalidate_rkey
>> 8);
910 if (unlikely(!rmr
)) {
911 pr_err("Bad rkey %#x invalidation\n",
912 wc
->ex
.invalidate_rkey
);
915 rmr
->state
= RXE_MEM_STATE_FREE
;
921 if (pkt
->mask
& RXE_DETH_MASK
)
922 wc
->src_qp
= deth_sqp(pkt
);
924 wc
->port_num
= qp
->attr
.port_num
;
928 /* have copy for srq and reference for !srq */
930 advance_consumer(qp
->rq
.queue
);
934 if (rxe_cq_post(qp
->rcq
, &cqe
, pkt
? bth_se(pkt
) : 1))
935 return RESPST_ERR_CQ_OVERFLOW
;
937 if (qp
->resp
.state
== QP_STATE_ERROR
)
938 return RESPST_CHK_RESOURCE
;
942 else if (qp_type(qp
) == IB_QPT_RC
)
943 return RESPST_ACKNOWLEDGE
;
945 return RESPST_CLEANUP
;
948 static int send_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
949 u8 syndrome
, u32 psn
)
952 struct rxe_pkt_info ack_pkt
;
955 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
, IB_OPCODE_RC_ACKNOWLEDGE
,
956 0, psn
, syndrome
, NULL
);
962 err
= rxe_xmit_packet(qp
, &ack_pkt
, skb
);
964 pr_err_ratelimited("Failed sending ack\n");
970 static int send_atomic_ack(struct rxe_qp
*qp
, struct rxe_pkt_info
*pkt
,
974 struct rxe_pkt_info ack_pkt
;
976 struct resp_res
*res
;
978 skb
= prepare_ack_packet(qp
, pkt
, &ack_pkt
,
979 IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE
, 0, pkt
->psn
,
988 res
= &qp
->resp
.resources
[qp
->resp
.res_head
];
989 free_rd_atomic_resource(qp
, res
);
990 rxe_advance_resp_resource(qp
);
992 memcpy(SKB_TO_PKT(skb
), &ack_pkt
, sizeof(ack_pkt
));
993 memset((unsigned char *)SKB_TO_PKT(skb
) + sizeof(ack_pkt
), 0,
994 sizeof(skb
->cb
) - sizeof(ack_pkt
));
997 res
->type
= RXE_ATOMIC_MASK
;
998 res
->atomic
.skb
= skb
;
999 res
->first_psn
= ack_pkt
.psn
;
1000 res
->last_psn
= ack_pkt
.psn
;
1001 res
->cur_psn
= ack_pkt
.psn
;
1003 rc
= rxe_xmit_packet(qp
, &ack_pkt
, skb
);
1005 pr_err_ratelimited("Failed sending ack\n");
1012 static enum resp_states
acknowledge(struct rxe_qp
*qp
,
1013 struct rxe_pkt_info
*pkt
)
1015 if (qp_type(qp
) != IB_QPT_RC
)
1016 return RESPST_CLEANUP
;
1018 if (qp
->resp
.aeth_syndrome
!= AETH_ACK_UNLIMITED
)
1019 send_ack(qp
, pkt
, qp
->resp
.aeth_syndrome
, pkt
->psn
);
1020 else if (pkt
->mask
& RXE_ATOMIC_MASK
)
1021 send_atomic_ack(qp
, pkt
, AETH_ACK_UNLIMITED
);
1022 else if (bth_ack(pkt
))
1023 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, pkt
->psn
);
1025 return RESPST_CLEANUP
;
1028 static enum resp_states
cleanup(struct rxe_qp
*qp
,
1029 struct rxe_pkt_info
*pkt
)
1031 struct sk_buff
*skb
;
1034 skb
= skb_dequeue(&qp
->req_pkts
);
1040 rxe_drop_ref(qp
->resp
.mr
);
1047 static struct resp_res
*find_resource(struct rxe_qp
*qp
, u32 psn
)
1051 for (i
= 0; i
< qp
->attr
.max_dest_rd_atomic
; i
++) {
1052 struct resp_res
*res
= &qp
->resp
.resources
[i
];
1057 if (psn_compare(psn
, res
->first_psn
) >= 0 &&
1058 psn_compare(psn
, res
->last_psn
) <= 0) {
1066 static enum resp_states
duplicate_request(struct rxe_qp
*qp
,
1067 struct rxe_pkt_info
*pkt
)
1069 enum resp_states rc
;
1070 u32 prev_psn
= (qp
->resp
.ack_psn
- 1) & BTH_PSN_MASK
;
1072 if (pkt
->mask
& RXE_SEND_MASK
||
1073 pkt
->mask
& RXE_WRITE_MASK
) {
1074 /* SEND. Ack again and cleanup. C9-105. */
1076 send_ack(qp
, pkt
, AETH_ACK_UNLIMITED
, prev_psn
);
1077 rc
= RESPST_CLEANUP
;
1079 } else if (pkt
->mask
& RXE_READ_MASK
) {
1080 struct resp_res
*res
;
1082 res
= find_resource(qp
, pkt
->psn
);
1084 /* Resource not found. Class D error. Drop the
1087 rc
= RESPST_CLEANUP
;
1090 /* Ensure this new request is the same as the previous
1091 * one or a subset of it.
1093 u64 iova
= reth_va(pkt
);
1094 u32 resid
= reth_len(pkt
);
1096 if (iova
< res
->read
.va_org
||
1097 resid
> res
->read
.length
||
1098 (iova
+ resid
) > (res
->read
.va_org
+
1099 res
->read
.length
)) {
1100 rc
= RESPST_CLEANUP
;
1104 if (reth_rkey(pkt
) != res
->read
.rkey
) {
1105 rc
= RESPST_CLEANUP
;
1109 res
->cur_psn
= pkt
->psn
;
1110 res
->state
= (pkt
->psn
== res
->first_psn
) ?
1111 rdatm_res_state_new
:
1112 rdatm_res_state_replay
;
1115 /* Reset the resource, except length. */
1116 res
->read
.va_org
= iova
;
1117 res
->read
.va
= iova
;
1118 res
->read
.resid
= resid
;
1120 /* Replay the RDMA read reply. */
1122 rc
= RESPST_READ_REPLY
;
1126 struct resp_res
*res
;
1128 /* Find the operation in our list of responder resources. */
1129 res
= find_resource(qp
, pkt
->psn
);
1131 skb_get(res
->atomic
.skb
);
1132 /* Resend the result. */
1133 rc
= rxe_xmit_packet(qp
, pkt
, res
->atomic
.skb
);
1135 pr_err("Failed resending result. This flow is not handled - skb ignored\n");
1136 rc
= RESPST_CLEANUP
;
1141 /* Resource not found. Class D error. Drop the request. */
1142 rc
= RESPST_CLEANUP
;
1149 /* Process a class A or C. Both are treated the same in this implementation. */
1150 static void do_class_ac_error(struct rxe_qp
*qp
, u8 syndrome
,
1151 enum ib_wc_status status
)
1153 qp
->resp
.aeth_syndrome
= syndrome
;
1154 qp
->resp
.status
= status
;
1156 /* indicate that we should go through the ERROR state */
1157 qp
->resp
.goto_error
= 1;
1160 static enum resp_states
do_class_d1e_error(struct rxe_qp
*qp
)
1165 qp
->resp
.drop_msg
= 1;
1167 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1168 return RESPST_COMPLETE
;
1170 return RESPST_CLEANUP
;
1173 /* Class D1. This packet may be the start of a
1174 * new message and could be valid. The previous
1175 * message is invalid and ignored. reset the
1176 * recv wr to its original state
1179 qp
->resp
.wqe
->dma
.resid
= qp
->resp
.wqe
->dma
.length
;
1180 qp
->resp
.wqe
->dma
.cur_sge
= 0;
1181 qp
->resp
.wqe
->dma
.sge_offset
= 0;
1182 qp
->resp
.opcode
= -1;
1186 rxe_drop_ref(qp
->resp
.mr
);
1190 return RESPST_CLEANUP
;
1194 static void rxe_drain_req_pkts(struct rxe_qp
*qp
, bool notify
)
1196 struct sk_buff
*skb
;
1198 while ((skb
= skb_dequeue(&qp
->req_pkts
))) {
1206 while (!qp
->srq
&& qp
->rq
.queue
&& queue_head(qp
->rq
.queue
))
1207 advance_consumer(qp
->rq
.queue
);
1210 int rxe_responder(void *arg
)
1212 struct rxe_qp
*qp
= (struct rxe_qp
*)arg
;
1213 struct rxe_dev
*rxe
= to_rdev(qp
->ibqp
.device
);
1214 enum resp_states state
;
1215 struct rxe_pkt_info
*pkt
= NULL
;
1220 qp
->resp
.aeth_syndrome
= AETH_ACK_UNLIMITED
;
1227 switch (qp
->resp
.state
) {
1228 case QP_STATE_RESET
:
1229 state
= RESPST_RESET
;
1233 state
= RESPST_GET_REQ
;
1238 pr_debug("qp#%d state = %s\n", qp_num(qp
),
1239 resp_state_name
[state
]);
1241 case RESPST_GET_REQ
:
1242 state
= get_req(qp
, &pkt
);
1244 case RESPST_CHK_PSN
:
1245 state
= check_psn(qp
, pkt
);
1247 case RESPST_CHK_OP_SEQ
:
1248 state
= check_op_seq(qp
, pkt
);
1250 case RESPST_CHK_OP_VALID
:
1251 state
= check_op_valid(qp
, pkt
);
1253 case RESPST_CHK_RESOURCE
:
1254 state
= check_resource(qp
, pkt
);
1256 case RESPST_CHK_LENGTH
:
1257 state
= check_length(qp
, pkt
);
1259 case RESPST_CHK_RKEY
:
1260 state
= check_rkey(qp
, pkt
);
1262 case RESPST_EXECUTE
:
1263 state
= execute(qp
, pkt
);
1265 case RESPST_COMPLETE
:
1266 state
= do_complete(qp
, pkt
);
1268 case RESPST_READ_REPLY
:
1269 state
= read_reply(qp
, pkt
);
1271 case RESPST_ACKNOWLEDGE
:
1272 state
= acknowledge(qp
, pkt
);
1274 case RESPST_CLEANUP
:
1275 state
= cleanup(qp
, pkt
);
1277 case RESPST_DUPLICATE_REQUEST
:
1278 state
= duplicate_request(qp
, pkt
);
1280 case RESPST_ERR_PSN_OUT_OF_SEQ
:
1281 /* RC only - Class B. Drop packet. */
1282 send_ack(qp
, pkt
, AETH_NAK_PSN_SEQ_ERROR
, qp
->resp
.psn
);
1283 state
= RESPST_CLEANUP
;
1286 case RESPST_ERR_TOO_MANY_RDMA_ATM_REQ
:
1287 case RESPST_ERR_MISSING_OPCODE_FIRST
:
1288 case RESPST_ERR_MISSING_OPCODE_LAST_C
:
1289 case RESPST_ERR_UNSUPPORTED_OPCODE
:
1290 case RESPST_ERR_MISALIGNED_ATOMIC
:
1291 /* RC Only - Class C. */
1292 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1293 IB_WC_REM_INV_REQ_ERR
);
1294 state
= RESPST_COMPLETE
;
1297 case RESPST_ERR_MISSING_OPCODE_LAST_D1E
:
1298 state
= do_class_d1e_error(qp
);
1300 case RESPST_ERR_RNR
:
1301 if (qp_type(qp
) == IB_QPT_RC
) {
1302 rxe_counter_inc(rxe
, RXE_CNT_SND_RNR
);
1304 send_ack(qp
, pkt
, AETH_RNR_NAK
|
1306 qp
->attr
.min_rnr_timer
),
1309 /* UD/UC - class D */
1310 qp
->resp
.drop_msg
= 1;
1312 state
= RESPST_CLEANUP
;
1315 case RESPST_ERR_RKEY_VIOLATION
:
1316 if (qp_type(qp
) == IB_QPT_RC
) {
1318 do_class_ac_error(qp
, AETH_NAK_REM_ACC_ERR
,
1319 IB_WC_REM_ACCESS_ERR
);
1320 state
= RESPST_COMPLETE
;
1322 qp
->resp
.drop_msg
= 1;
1324 /* UC/SRQ Class D */
1325 qp
->resp
.status
= IB_WC_REM_ACCESS_ERR
;
1326 state
= RESPST_COMPLETE
;
1328 /* UC/non-SRQ Class E. */
1329 state
= RESPST_CLEANUP
;
1334 case RESPST_ERR_LENGTH
:
1335 if (qp_type(qp
) == IB_QPT_RC
) {
1337 do_class_ac_error(qp
, AETH_NAK_INVALID_REQ
,
1338 IB_WC_REM_INV_REQ_ERR
);
1339 state
= RESPST_COMPLETE
;
1340 } else if (qp
->srq
) {
1341 /* UC/UD - class E */
1342 qp
->resp
.status
= IB_WC_REM_INV_REQ_ERR
;
1343 state
= RESPST_COMPLETE
;
1345 /* UC/UD - class D */
1346 qp
->resp
.drop_msg
= 1;
1347 state
= RESPST_CLEANUP
;
1351 case RESPST_ERR_MALFORMED_WQE
:
1353 do_class_ac_error(qp
, AETH_NAK_REM_OP_ERR
,
1354 IB_WC_LOC_QP_OP_ERR
);
1355 state
= RESPST_COMPLETE
;
1358 case RESPST_ERR_CQ_OVERFLOW
:
1360 state
= RESPST_ERROR
;
1364 if (qp
->resp
.goto_error
) {
1365 state
= RESPST_ERROR
;
1372 if (qp
->resp
.goto_error
) {
1373 state
= RESPST_ERROR
;
1380 rxe_drain_req_pkts(qp
, false);
1381 qp
->resp
.wqe
= NULL
;
1385 qp
->resp
.goto_error
= 0;
1386 pr_warn("qp#%d moved to error state\n", qp_num(qp
));