2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/kref.h>
34 #include <rdma/ib_umem.h>
35 #include <rdma/ib_user_verbs.h>
36 #include <rdma/ib_cache.h>
40 static void mlx5_ib_cq_comp(struct mlx5_core_cq
*cq
)
42 struct ib_cq
*ibcq
= &to_mibcq(cq
)->ibcq
;
44 ibcq
->comp_handler(ibcq
, ibcq
->cq_context
);
47 static void mlx5_ib_cq_event(struct mlx5_core_cq
*mcq
, enum mlx5_event type
)
49 struct mlx5_ib_cq
*cq
= container_of(mcq
, struct mlx5_ib_cq
, mcq
);
50 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
51 struct ib_cq
*ibcq
= &cq
->ibcq
;
52 struct ib_event event
;
54 if (type
!= MLX5_EVENT_TYPE_CQ_ERROR
) {
55 mlx5_ib_warn(dev
, "Unexpected event type %d on CQ %06x\n",
60 if (ibcq
->event_handler
) {
61 event
.device
= &dev
->ib_dev
;
62 event
.event
= IB_EVENT_CQ_ERR
;
63 event
.element
.cq
= ibcq
;
64 ibcq
->event_handler(&event
, ibcq
->cq_context
);
68 static void *get_cqe_from_buf(struct mlx5_ib_cq_buf
*buf
, int n
, int size
)
70 return mlx5_buf_offset(&buf
->buf
, n
* size
);
73 static void *get_cqe(struct mlx5_ib_cq
*cq
, int n
)
75 return get_cqe_from_buf(&cq
->buf
, n
, cq
->mcq
.cqe_sz
);
78 static u8
sw_ownership_bit(int n
, int nent
)
80 return (n
& nent
) ? 1 : 0;
83 static void *get_sw_cqe(struct mlx5_ib_cq
*cq
, int n
)
85 void *cqe
= get_cqe(cq
, n
& cq
->ibcq
.cqe
);
86 struct mlx5_cqe64
*cqe64
;
88 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
90 if (likely((cqe64
->op_own
) >> 4 != MLX5_CQE_INVALID
) &&
91 !((cqe64
->op_own
& MLX5_CQE_OWNER_MASK
) ^ !!(n
& (cq
->ibcq
.cqe
+ 1)))) {
98 static void *next_cqe_sw(struct mlx5_ib_cq
*cq
)
100 return get_sw_cqe(cq
, cq
->mcq
.cons_index
);
103 static enum ib_wc_opcode
get_umr_comp(struct mlx5_ib_wq
*wq
, int idx
)
105 switch (wq
->wr_data
[idx
]) {
109 case IB_WR_LOCAL_INV
:
110 return IB_WC_LOCAL_INV
;
116 pr_warn("unknown completion status\n");
121 static void handle_good_req(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
122 struct mlx5_ib_wq
*wq
, int idx
)
125 switch (be32_to_cpu(cqe
->sop_drop_qpn
) >> 24) {
126 case MLX5_OPCODE_RDMA_WRITE_IMM
:
127 wc
->wc_flags
|= IB_WC_WITH_IMM
;
128 case MLX5_OPCODE_RDMA_WRITE
:
129 wc
->opcode
= IB_WC_RDMA_WRITE
;
131 case MLX5_OPCODE_SEND_IMM
:
132 wc
->wc_flags
|= IB_WC_WITH_IMM
;
133 case MLX5_OPCODE_SEND
:
134 case MLX5_OPCODE_SEND_INVAL
:
135 wc
->opcode
= IB_WC_SEND
;
137 case MLX5_OPCODE_RDMA_READ
:
138 wc
->opcode
= IB_WC_RDMA_READ
;
139 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
141 case MLX5_OPCODE_ATOMIC_CS
:
142 wc
->opcode
= IB_WC_COMP_SWAP
;
145 case MLX5_OPCODE_ATOMIC_FA
:
146 wc
->opcode
= IB_WC_FETCH_ADD
;
149 case MLX5_OPCODE_ATOMIC_MASKED_CS
:
150 wc
->opcode
= IB_WC_MASKED_COMP_SWAP
;
153 case MLX5_OPCODE_ATOMIC_MASKED_FA
:
154 wc
->opcode
= IB_WC_MASKED_FETCH_ADD
;
157 case MLX5_OPCODE_UMR
:
158 wc
->opcode
= get_umr_comp(wq
, idx
);
164 MLX5_GRH_IN_BUFFER
= 1,
168 static void handle_responder(struct ib_wc
*wc
, struct mlx5_cqe64
*cqe
,
169 struct mlx5_ib_qp
*qp
)
171 enum rdma_link_layer ll
= rdma_port_get_link_layer(qp
->ibqp
.device
, 1);
172 struct mlx5_ib_dev
*dev
= to_mdev(qp
->ibqp
.device
);
173 struct mlx5_ib_srq
*srq
;
174 struct mlx5_ib_wq
*wq
;
178 if (qp
->ibqp
.srq
|| qp
->ibqp
.xrcd
) {
179 struct mlx5_core_srq
*msrq
= NULL
;
182 msrq
= mlx5_core_get_srq(dev
->mdev
,
183 be32_to_cpu(cqe
->srqn
));
184 srq
= to_mibsrq(msrq
);
186 srq
= to_msrq(qp
->ibqp
.srq
);
189 wqe_ctr
= be16_to_cpu(cqe
->wqe_counter
);
190 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
191 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
192 if (msrq
&& atomic_dec_and_test(&msrq
->refcount
))
193 complete(&msrq
->free
);
197 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
200 wc
->byte_len
= be32_to_cpu(cqe
->byte_cnt
);
202 switch (cqe
->op_own
>> 4) {
203 case MLX5_CQE_RESP_WR_IMM
:
204 wc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
205 wc
->wc_flags
= IB_WC_WITH_IMM
;
206 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
208 case MLX5_CQE_RESP_SEND
:
209 wc
->opcode
= IB_WC_RECV
;
212 case MLX5_CQE_RESP_SEND_IMM
:
213 wc
->opcode
= IB_WC_RECV
;
214 wc
->wc_flags
= IB_WC_WITH_IMM
;
215 wc
->ex
.imm_data
= cqe
->imm_inval_pkey
;
217 case MLX5_CQE_RESP_SEND_INV
:
218 wc
->opcode
= IB_WC_RECV
;
219 wc
->wc_flags
= IB_WC_WITH_INVALIDATE
;
220 wc
->ex
.invalidate_rkey
= be32_to_cpu(cqe
->imm_inval_pkey
);
223 wc
->slid
= be16_to_cpu(cqe
->slid
);
224 wc
->sl
= (be32_to_cpu(cqe
->flags_rqpn
) >> 24) & 0xf;
225 wc
->src_qp
= be32_to_cpu(cqe
->flags_rqpn
) & 0xffffff;
226 wc
->dlid_path_bits
= cqe
->ml_path
;
227 g
= (be32_to_cpu(cqe
->flags_rqpn
) >> 28) & 3;
228 wc
->wc_flags
|= g
? IB_WC_GRH
: 0;
229 if (unlikely(is_qp1(qp
->ibqp
.qp_type
))) {
230 u16 pkey
= be32_to_cpu(cqe
->imm_inval_pkey
) & 0xffff;
232 ib_find_cached_pkey(&dev
->ib_dev
, qp
->port
, pkey
,
238 if (ll
!= IB_LINK_LAYER_ETHERNET
)
241 switch (wc
->sl
& 0x3) {
242 case MLX5_CQE_ROCE_L3_HEADER_TYPE_GRH
:
243 wc
->network_hdr_type
= RDMA_NETWORK_IB
;
245 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV6
:
246 wc
->network_hdr_type
= RDMA_NETWORK_IPV6
;
248 case MLX5_CQE_ROCE_L3_HEADER_TYPE_IPV4
:
249 wc
->network_hdr_type
= RDMA_NETWORK_IPV4
;
252 wc
->wc_flags
|= IB_WC_WITH_NETWORK_HDR_TYPE
;
255 static void dump_cqe(struct mlx5_ib_dev
*dev
, struct mlx5_err_cqe
*cqe
)
257 __be32
*p
= (__be32
*)cqe
;
260 mlx5_ib_warn(dev
, "dump error cqe\n");
261 for (i
= 0; i
< sizeof(*cqe
) / 16; i
++, p
+= 4)
262 pr_info("%08x %08x %08x %08x\n", be32_to_cpu(p
[0]),
263 be32_to_cpu(p
[1]), be32_to_cpu(p
[2]),
267 static void mlx5_handle_error_cqe(struct mlx5_ib_dev
*dev
,
268 struct mlx5_err_cqe
*cqe
,
273 switch (cqe
->syndrome
) {
274 case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR
:
275 wc
->status
= IB_WC_LOC_LEN_ERR
;
277 case MLX5_CQE_SYNDROME_LOCAL_QP_OP_ERR
:
278 wc
->status
= IB_WC_LOC_QP_OP_ERR
;
280 case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR
:
281 wc
->status
= IB_WC_LOC_PROT_ERR
;
283 case MLX5_CQE_SYNDROME_WR_FLUSH_ERR
:
285 wc
->status
= IB_WC_WR_FLUSH_ERR
;
287 case MLX5_CQE_SYNDROME_MW_BIND_ERR
:
288 wc
->status
= IB_WC_MW_BIND_ERR
;
290 case MLX5_CQE_SYNDROME_BAD_RESP_ERR
:
291 wc
->status
= IB_WC_BAD_RESP_ERR
;
293 case MLX5_CQE_SYNDROME_LOCAL_ACCESS_ERR
:
294 wc
->status
= IB_WC_LOC_ACCESS_ERR
;
296 case MLX5_CQE_SYNDROME_REMOTE_INVAL_REQ_ERR
:
297 wc
->status
= IB_WC_REM_INV_REQ_ERR
;
299 case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR
:
300 wc
->status
= IB_WC_REM_ACCESS_ERR
;
302 case MLX5_CQE_SYNDROME_REMOTE_OP_ERR
:
303 wc
->status
= IB_WC_REM_OP_ERR
;
305 case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR
:
306 wc
->status
= IB_WC_RETRY_EXC_ERR
;
309 case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR
:
310 wc
->status
= IB_WC_RNR_RETRY_EXC_ERR
;
313 case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR
:
314 wc
->status
= IB_WC_REM_ABORT_ERR
;
317 wc
->status
= IB_WC_GENERAL_ERR
;
321 wc
->vendor_err
= cqe
->vendor_err_synd
;
326 static int is_atomic_response(struct mlx5_ib_qp
*qp
, uint16_t idx
)
328 /* TBD: waiting decision
333 static void *mlx5_get_atomic_laddr(struct mlx5_ib_qp
*qp
, uint16_t idx
)
335 struct mlx5_wqe_data_seg
*dpseg
;
338 dpseg
= mlx5_get_send_wqe(qp
, idx
) + sizeof(struct mlx5_wqe_ctrl_seg
) +
339 sizeof(struct mlx5_wqe_raddr_seg
) +
340 sizeof(struct mlx5_wqe_atomic_seg
);
341 addr
= (void *)(unsigned long)be64_to_cpu(dpseg
->addr
);
345 static void handle_atomic(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
352 if (!is_atomic_response(qp
, idx
))
355 byte_count
= be32_to_cpu(cqe64
->byte_cnt
);
356 addr
= mlx5_get_atomic_laddr(qp
, idx
);
358 if (byte_count
== 4) {
359 *(uint32_t *)addr
= be32_to_cpu(*((__be32
*)addr
));
361 for (i
= 0; i
< byte_count
; i
+= 8) {
362 *(uint64_t *)addr
= be64_to_cpu(*((__be64
*)addr
));
370 static void handle_atomics(struct mlx5_ib_qp
*qp
, struct mlx5_cqe64
*cqe64
,
376 idx
= tail
& (qp
->sq
.wqe_cnt
- 1);
377 handle_atomic(qp
, cqe64
, idx
);
381 tail
= qp
->sq
.w_list
[idx
].next
;
383 tail
= qp
->sq
.w_list
[idx
].next
;
384 qp
->sq
.last_poll
= tail
;
387 static void free_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
)
389 mlx5_buf_free(dev
->mdev
, &buf
->buf
);
392 static void get_sig_err_item(struct mlx5_sig_err_cqe
*cqe
,
393 struct ib_sig_err
*item
)
395 u16 syndrome
= be16_to_cpu(cqe
->syndrome
);
397 #define GUARD_ERR (1 << 13)
398 #define APPTAG_ERR (1 << 12)
399 #define REFTAG_ERR (1 << 11)
401 if (syndrome
& GUARD_ERR
) {
402 item
->err_type
= IB_SIG_BAD_GUARD
;
403 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) >> 16;
404 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) >> 16;
406 if (syndrome
& REFTAG_ERR
) {
407 item
->err_type
= IB_SIG_BAD_REFTAG
;
408 item
->expected
= be32_to_cpu(cqe
->expected_reftag
);
409 item
->actual
= be32_to_cpu(cqe
->actual_reftag
);
411 if (syndrome
& APPTAG_ERR
) {
412 item
->err_type
= IB_SIG_BAD_APPTAG
;
413 item
->expected
= be32_to_cpu(cqe
->expected_trans_sig
) & 0xffff;
414 item
->actual
= be32_to_cpu(cqe
->actual_trans_sig
) & 0xffff;
416 pr_err("Got signature completion error with bad syndrome %04x\n",
420 item
->sig_err_offset
= be64_to_cpu(cqe
->err_offset
);
421 item
->key
= be32_to_cpu(cqe
->mkey
);
424 static int mlx5_poll_one(struct mlx5_ib_cq
*cq
,
425 struct mlx5_ib_qp
**cur_qp
,
428 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
429 struct mlx5_err_cqe
*err_cqe
;
430 struct mlx5_cqe64
*cqe64
;
431 struct mlx5_core_qp
*mqp
;
432 struct mlx5_ib_wq
*wq
;
433 struct mlx5_sig_err_cqe
*sig_err_cqe
;
434 struct mlx5_core_mr
*mmr
;
435 struct mlx5_ib_mr
*mr
;
443 cqe
= next_cqe_sw(cq
);
447 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
449 ++cq
->mcq
.cons_index
;
451 /* Make sure we read CQ entry contents after we've checked the
456 opcode
= cqe64
->op_own
>> 4;
457 if (unlikely(opcode
== MLX5_CQE_RESIZE_CQ
)) {
458 if (likely(cq
->resize_buf
)) {
459 free_cq_buf(dev
, &cq
->buf
);
460 cq
->buf
= *cq
->resize_buf
;
461 kfree(cq
->resize_buf
);
462 cq
->resize_buf
= NULL
;
465 mlx5_ib_warn(dev
, "unexpected resize cqe\n");
469 qpn
= ntohl(cqe64
->sop_drop_qpn
) & 0xffffff;
470 if (!*cur_qp
|| (qpn
!= (*cur_qp
)->ibqp
.qp_num
)) {
471 /* We do not have to take the QP table lock here,
472 * because CQs will be locked while QPs are removed
475 mqp
= __mlx5_qp_lookup(dev
->mdev
, qpn
);
476 if (unlikely(!mqp
)) {
477 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown QPN %6x\n",
482 *cur_qp
= to_mibqp(mqp
);
485 wc
->qp
= &(*cur_qp
)->ibqp
;
489 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
490 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
491 handle_good_req(wc
, cqe64
, wq
, idx
);
492 handle_atomics(*cur_qp
, cqe64
, wq
->last_poll
, idx
);
493 wc
->wr_id
= wq
->wrid
[idx
];
494 wq
->tail
= wq
->wqe_head
[idx
] + 1;
495 wc
->status
= IB_WC_SUCCESS
;
497 case MLX5_CQE_RESP_WR_IMM
:
498 case MLX5_CQE_RESP_SEND
:
499 case MLX5_CQE_RESP_SEND_IMM
:
500 case MLX5_CQE_RESP_SEND_INV
:
501 handle_responder(wc
, cqe64
, *cur_qp
);
502 wc
->status
= IB_WC_SUCCESS
;
504 case MLX5_CQE_RESIZE_CQ
:
506 case MLX5_CQE_REQ_ERR
:
507 case MLX5_CQE_RESP_ERR
:
508 err_cqe
= (struct mlx5_err_cqe
*)cqe64
;
509 mlx5_handle_error_cqe(dev
, err_cqe
, wc
);
510 mlx5_ib_dbg(dev
, "%s error cqe on cqn 0x%x:\n",
511 opcode
== MLX5_CQE_REQ_ERR
?
512 "Requestor" : "Responder", cq
->mcq
.cqn
);
513 mlx5_ib_dbg(dev
, "syndrome 0x%x, vendor syndrome 0x%x\n",
514 err_cqe
->syndrome
, err_cqe
->vendor_err_synd
);
515 if (opcode
== MLX5_CQE_REQ_ERR
) {
517 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
518 idx
= wqe_ctr
& (wq
->wqe_cnt
- 1);
519 wc
->wr_id
= wq
->wrid
[idx
];
520 wq
->tail
= wq
->wqe_head
[idx
] + 1;
522 struct mlx5_ib_srq
*srq
;
524 if ((*cur_qp
)->ibqp
.srq
) {
525 srq
= to_msrq((*cur_qp
)->ibqp
.srq
);
526 wqe_ctr
= be16_to_cpu(cqe64
->wqe_counter
);
527 wc
->wr_id
= srq
->wrid
[wqe_ctr
];
528 mlx5_ib_free_srq_wqe(srq
, wqe_ctr
);
531 wc
->wr_id
= wq
->wrid
[wq
->tail
& (wq
->wqe_cnt
- 1)];
536 case MLX5_CQE_SIG_ERR
:
537 sig_err_cqe
= (struct mlx5_sig_err_cqe
*)cqe64
;
539 read_lock(&dev
->mdev
->priv
.mr_table
.lock
);
540 mmr
= __mlx5_mr_lookup(dev
->mdev
,
541 mlx5_base_mkey(be32_to_cpu(sig_err_cqe
->mkey
)));
542 if (unlikely(!mmr
)) {
543 read_unlock(&dev
->mdev
->priv
.mr_table
.lock
);
544 mlx5_ib_warn(dev
, "CQE@CQ %06x for unknown MR %6x\n",
545 cq
->mcq
.cqn
, be32_to_cpu(sig_err_cqe
->mkey
));
550 get_sig_err_item(sig_err_cqe
, &mr
->sig
->err_item
);
551 mr
->sig
->sig_err_exists
= true;
552 mr
->sig
->sigerr_count
++;
554 mlx5_ib_warn(dev
, "CQN: 0x%x Got SIGERR on key: 0x%x err_type %x err_offset %llx expected %x actual %x\n",
555 cq
->mcq
.cqn
, mr
->sig
->err_item
.key
,
556 mr
->sig
->err_item
.err_type
,
557 mr
->sig
->err_item
.sig_err_offset
,
558 mr
->sig
->err_item
.expected
,
559 mr
->sig
->err_item
.actual
);
561 read_unlock(&dev
->mdev
->priv
.mr_table
.lock
);
568 int mlx5_ib_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
570 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
571 struct mlx5_ib_qp
*cur_qp
= NULL
;
576 spin_lock_irqsave(&cq
->lock
, flags
);
578 for (npolled
= 0; npolled
< num_entries
; npolled
++) {
579 err
= mlx5_poll_one(cq
, &cur_qp
, wc
+ npolled
);
585 mlx5_cq_set_ci(&cq
->mcq
);
587 spin_unlock_irqrestore(&cq
->lock
, flags
);
589 if (err
== 0 || err
== -EAGAIN
)
595 int mlx5_ib_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
597 struct mlx5_core_dev
*mdev
= to_mdev(ibcq
->device
)->mdev
;
598 void __iomem
*uar_page
= mdev
->priv
.uuari
.uars
[0].map
;
600 mlx5_cq_arm(&to_mcq(ibcq
)->mcq
,
601 (flags
& IB_CQ_SOLICITED_MASK
) == IB_CQ_SOLICITED
?
602 MLX5_CQ_DB_REQ_NOT_SOL
: MLX5_CQ_DB_REQ_NOT
,
604 MLX5_GET_DOORBELL_LOCK(&mdev
->priv
.cq_uar_lock
),
605 to_mcq(ibcq
)->mcq
.cons_index
);
610 static int alloc_cq_buf(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq_buf
*buf
,
611 int nent
, int cqe_size
)
615 err
= mlx5_buf_alloc(dev
->mdev
, nent
* cqe_size
, &buf
->buf
);
619 buf
->cqe_size
= cqe_size
;
625 static int create_cq_user(struct mlx5_ib_dev
*dev
, struct ib_udata
*udata
,
626 struct ib_ucontext
*context
, struct mlx5_ib_cq
*cq
,
627 int entries
, struct mlx5_create_cq_mbox_in
**cqb
,
628 int *cqe_size
, int *index
, int *inlen
)
630 struct mlx5_ib_create_cq ucmd
;
638 (udata
->inlen
- sizeof(struct ib_uverbs_cmd_hdr
) <
639 sizeof(ucmd
)) ? (sizeof(ucmd
) -
640 sizeof(ucmd
.reserved
)) : sizeof(ucmd
);
642 if (ib_copy_from_udata(&ucmd
, udata
, ucmdlen
))
645 if (ucmdlen
== sizeof(ucmd
) &&
649 if (ucmd
.cqe_size
!= 64 && ucmd
.cqe_size
!= 128)
652 *cqe_size
= ucmd
.cqe_size
;
654 cq
->buf
.umem
= ib_umem_get(context
, ucmd
.buf_addr
,
655 entries
* ucmd
.cqe_size
,
656 IB_ACCESS_LOCAL_WRITE
, 1);
657 if (IS_ERR(cq
->buf
.umem
)) {
658 err
= PTR_ERR(cq
->buf
.umem
);
662 err
= mlx5_ib_db_map_user(to_mucontext(context
), ucmd
.db_addr
,
667 mlx5_ib_cont_pages(cq
->buf
.umem
, ucmd
.buf_addr
, &npages
, &page_shift
,
669 mlx5_ib_dbg(dev
, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n",
670 ucmd
.buf_addr
, entries
* ucmd
.cqe_size
, npages
, page_shift
, ncont
);
672 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * ncont
;
673 *cqb
= mlx5_vzalloc(*inlen
);
678 mlx5_ib_populate_pas(dev
, cq
->buf
.umem
, page_shift
, (*cqb
)->pas
, 0);
679 (*cqb
)->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
681 *index
= to_mucontext(context
)->uuari
.uars
[0].index
;
686 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
689 ib_umem_release(cq
->buf
.umem
);
693 static void destroy_cq_user(struct mlx5_ib_cq
*cq
, struct ib_ucontext
*context
)
695 mlx5_ib_db_unmap_user(to_mucontext(context
), &cq
->db
);
696 ib_umem_release(cq
->buf
.umem
);
699 static void init_cq_buf(struct mlx5_ib_cq
*cq
, struct mlx5_ib_cq_buf
*buf
)
703 struct mlx5_cqe64
*cqe64
;
705 for (i
= 0; i
< buf
->nent
; i
++) {
706 cqe
= get_cqe_from_buf(buf
, i
, buf
->cqe_size
);
707 cqe64
= buf
->cqe_size
== 64 ? cqe
: cqe
+ 64;
708 cqe64
->op_own
= MLX5_CQE_INVALID
<< 4;
712 static int create_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
713 int entries
, int cqe_size
,
714 struct mlx5_create_cq_mbox_in
**cqb
,
715 int *index
, int *inlen
)
719 err
= mlx5_db_alloc(dev
->mdev
, &cq
->db
);
723 cq
->mcq
.set_ci_db
= cq
->db
.db
;
724 cq
->mcq
.arm_db
= cq
->db
.db
+ 1;
725 cq
->mcq
.cqe_sz
= cqe_size
;
727 err
= alloc_cq_buf(dev
, &cq
->buf
, entries
, cqe_size
);
731 init_cq_buf(cq
, &cq
->buf
);
733 *inlen
= sizeof(**cqb
) + sizeof(*(*cqb
)->pas
) * cq
->buf
.buf
.npages
;
734 *cqb
= mlx5_vzalloc(*inlen
);
739 mlx5_fill_page_array(&cq
->buf
.buf
, (*cqb
)->pas
);
741 (*cqb
)->ctx
.log_pg_sz
= cq
->buf
.buf
.page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
742 *index
= dev
->mdev
->priv
.uuari
.uars
[0].index
;
747 free_cq_buf(dev
, &cq
->buf
);
750 mlx5_db_free(dev
->mdev
, &cq
->db
);
754 static void destroy_cq_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
756 free_cq_buf(dev
, &cq
->buf
);
757 mlx5_db_free(dev
->mdev
, &cq
->db
);
760 struct ib_cq
*mlx5_ib_create_cq(struct ib_device
*ibdev
,
761 const struct ib_cq_init_attr
*attr
,
762 struct ib_ucontext
*context
,
763 struct ib_udata
*udata
)
765 int entries
= attr
->cqe
;
766 int vector
= attr
->comp_vector
;
767 struct mlx5_create_cq_mbox_in
*cqb
= NULL
;
768 struct mlx5_ib_dev
*dev
= to_mdev(ibdev
);
769 struct mlx5_ib_cq
*cq
;
770 int uninitialized_var(index
);
771 int uninitialized_var(inlen
);
778 return ERR_PTR(-EINVAL
);
780 if (check_cq_create_flags(attr
->flags
))
781 return ERR_PTR(-EOPNOTSUPP
);
783 entries
= roundup_pow_of_two(entries
+ 1);
784 if (entries
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_cq_sz
)))
785 return ERR_PTR(-EINVAL
);
787 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
789 return ERR_PTR(-ENOMEM
);
791 cq
->ibcq
.cqe
= entries
- 1;
792 mutex_init(&cq
->resize_mutex
);
793 spin_lock_init(&cq
->lock
);
794 cq
->resize_buf
= NULL
;
795 cq
->resize_umem
= NULL
;
796 cq
->create_flags
= attr
->flags
;
799 err
= create_cq_user(dev
, udata
, context
, cq
, entries
,
800 &cqb
, &cqe_size
, &index
, &inlen
);
804 /* for now choose 64 bytes till we have a proper interface */
806 err
= create_cq_kernel(dev
, cq
, entries
, cqe_size
, &cqb
,
812 cq
->cqe_size
= cqe_size
;
813 cqb
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
815 if (cq
->create_flags
& IB_CQ_FLAGS_IGNORE_OVERRUN
)
816 cqb
->ctx
.cqe_sz_flags
|= (1 << 1);
818 cqb
->ctx
.log_sz_usr_page
= cpu_to_be32((ilog2(entries
) << 24) | index
);
819 err
= mlx5_vector2eqn(dev
->mdev
, vector
, &eqn
, &irqn
);
823 cqb
->ctx
.c_eqn
= cpu_to_be16(eqn
);
824 cqb
->ctx
.db_record_addr
= cpu_to_be64(cq
->db
.dma
);
826 err
= mlx5_core_create_cq(dev
->mdev
, &cq
->mcq
, cqb
, inlen
);
830 mlx5_ib_dbg(dev
, "cqn 0x%x\n", cq
->mcq
.cqn
);
832 cq
->mcq
.comp
= mlx5_ib_cq_comp
;
833 cq
->mcq
.event
= mlx5_ib_cq_event
;
836 if (ib_copy_to_udata(udata
, &cq
->mcq
.cqn
, sizeof(__u32
))) {
846 mlx5_core_destroy_cq(dev
->mdev
, &cq
->mcq
);
851 destroy_cq_user(cq
, context
);
853 destroy_cq_kernel(dev
, cq
);
862 int mlx5_ib_destroy_cq(struct ib_cq
*cq
)
864 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
865 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
866 struct ib_ucontext
*context
= NULL
;
869 context
= cq
->uobject
->context
;
871 mlx5_core_destroy_cq(dev
->mdev
, &mcq
->mcq
);
873 destroy_cq_user(mcq
, context
);
875 destroy_cq_kernel(dev
, mcq
);
882 static int is_equal_rsn(struct mlx5_cqe64
*cqe64
, u32 rsn
)
884 return rsn
== (ntohl(cqe64
->sop_drop_qpn
) & 0xffffff);
887 void __mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 rsn
, struct mlx5_ib_srq
*srq
)
889 struct mlx5_cqe64
*cqe64
, *dest64
;
898 /* First we need to find the current producer index, so we
899 * know where to start cleaning from. It doesn't matter if HW
900 * adds new entries after this loop -- the QP we're worried
901 * about is already in RESET, so the new entries won't come
902 * from our QP and therefore don't need to be checked.
904 for (prod_index
= cq
->mcq
.cons_index
; get_sw_cqe(cq
, prod_index
); prod_index
++)
905 if (prod_index
== cq
->mcq
.cons_index
+ cq
->ibcq
.cqe
)
908 /* Now sweep backwards through the CQ, removing CQ entries
909 * that match our QP by copying older entries on top of them.
911 while ((int) --prod_index
- (int) cq
->mcq
.cons_index
>= 0) {
912 cqe
= get_cqe(cq
, prod_index
& cq
->ibcq
.cqe
);
913 cqe64
= (cq
->mcq
.cqe_sz
== 64) ? cqe
: cqe
+ 64;
914 if (is_equal_rsn(cqe64
, rsn
)) {
915 if (srq
&& (ntohl(cqe64
->srqn
) & 0xffffff))
916 mlx5_ib_free_srq_wqe(srq
, be16_to_cpu(cqe64
->wqe_counter
));
919 dest
= get_cqe(cq
, (prod_index
+ nfreed
) & cq
->ibcq
.cqe
);
920 dest64
= (cq
->mcq
.cqe_sz
== 64) ? dest
: dest
+ 64;
921 owner_bit
= dest64
->op_own
& MLX5_CQE_OWNER_MASK
;
922 memcpy(dest
, cqe
, cq
->mcq
.cqe_sz
);
923 dest64
->op_own
= owner_bit
|
924 (dest64
->op_own
& ~MLX5_CQE_OWNER_MASK
);
929 cq
->mcq
.cons_index
+= nfreed
;
930 /* Make sure update of buffer contents is done before
931 * updating consumer index.
934 mlx5_cq_set_ci(&cq
->mcq
);
938 void mlx5_ib_cq_clean(struct mlx5_ib_cq
*cq
, u32 qpn
, struct mlx5_ib_srq
*srq
)
943 spin_lock_irq(&cq
->lock
);
944 __mlx5_ib_cq_clean(cq
, qpn
, srq
);
945 spin_unlock_irq(&cq
->lock
);
948 int mlx5_ib_modify_cq(struct ib_cq
*cq
, u16 cq_count
, u16 cq_period
)
950 struct mlx5_modify_cq_mbox_in
*in
;
951 struct mlx5_ib_dev
*dev
= to_mdev(cq
->device
);
952 struct mlx5_ib_cq
*mcq
= to_mcq(cq
);
956 if (!MLX5_CAP_GEN(dev
->mdev
, cq_moderation
))
959 in
= kzalloc(sizeof(*in
), GFP_KERNEL
);
963 in
->cqn
= cpu_to_be32(mcq
->mcq
.cqn
);
964 fsel
= (MLX5_CQ_MODIFY_PERIOD
| MLX5_CQ_MODIFY_COUNT
);
965 in
->ctx
.cq_period
= cpu_to_be16(cq_period
);
966 in
->ctx
.cq_max_count
= cpu_to_be16(cq_count
);
967 in
->field_select
= cpu_to_be32(fsel
);
968 err
= mlx5_core_modify_cq(dev
->mdev
, &mcq
->mcq
, in
, sizeof(*in
));
972 mlx5_ib_warn(dev
, "modify cq 0x%x failed\n", mcq
->mcq
.cqn
);
977 static int resize_user(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
978 int entries
, struct ib_udata
*udata
, int *npas
,
979 int *page_shift
, int *cqe_size
)
981 struct mlx5_ib_resize_cq ucmd
;
982 struct ib_umem
*umem
;
985 struct ib_ucontext
*context
= cq
->buf
.umem
->context
;
987 err
= ib_copy_from_udata(&ucmd
, udata
, sizeof(ucmd
));
991 if (ucmd
.reserved0
|| ucmd
.reserved1
)
994 umem
= ib_umem_get(context
, ucmd
.buf_addr
, entries
* ucmd
.cqe_size
,
995 IB_ACCESS_LOCAL_WRITE
, 1);
1001 mlx5_ib_cont_pages(umem
, ucmd
.buf_addr
, &npages
, page_shift
,
1004 cq
->resize_umem
= umem
;
1005 *cqe_size
= ucmd
.cqe_size
;
1010 static void un_resize_user(struct mlx5_ib_cq
*cq
)
1012 ib_umem_release(cq
->resize_umem
);
1015 static int resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
,
1016 int entries
, int cqe_size
)
1020 cq
->resize_buf
= kzalloc(sizeof(*cq
->resize_buf
), GFP_KERNEL
);
1021 if (!cq
->resize_buf
)
1024 err
= alloc_cq_buf(dev
, cq
->resize_buf
, entries
, cqe_size
);
1028 init_cq_buf(cq
, cq
->resize_buf
);
1033 kfree(cq
->resize_buf
);
1037 static void un_resize_kernel(struct mlx5_ib_dev
*dev
, struct mlx5_ib_cq
*cq
)
1039 free_cq_buf(dev
, cq
->resize_buf
);
1040 cq
->resize_buf
= NULL
;
1043 static int copy_resize_cqes(struct mlx5_ib_cq
*cq
)
1045 struct mlx5_ib_dev
*dev
= to_mdev(cq
->ibcq
.device
);
1046 struct mlx5_cqe64
*scqe64
;
1047 struct mlx5_cqe64
*dcqe64
;
1056 ssize
= cq
->buf
.cqe_size
;
1057 dsize
= cq
->resize_buf
->cqe_size
;
1058 if (ssize
!= dsize
) {
1059 mlx5_ib_warn(dev
, "resize from different cqe size is not supported\n");
1063 i
= cq
->mcq
.cons_index
;
1064 scqe
= get_sw_cqe(cq
, i
);
1065 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1068 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1072 while ((scqe64
->op_own
>> 4) != MLX5_CQE_RESIZE_CQ
) {
1073 dcqe
= get_cqe_from_buf(cq
->resize_buf
,
1074 (i
+ 1) & (cq
->resize_buf
->nent
),
1076 dcqe64
= dsize
== 64 ? dcqe
: dcqe
+ 64;
1077 sw_own
= sw_ownership_bit(i
+ 1, cq
->resize_buf
->nent
);
1078 memcpy(dcqe
, scqe
, dsize
);
1079 dcqe64
->op_own
= (dcqe64
->op_own
& ~MLX5_CQE_OWNER_MASK
) | sw_own
;
1082 scqe
= get_sw_cqe(cq
, i
);
1083 scqe64
= ssize
== 64 ? scqe
: scqe
+ 64;
1085 mlx5_ib_warn(dev
, "expected cqe in sw ownership\n");
1089 if (scqe
== start_cqe
) {
1090 pr_warn("resize CQ failed to get resize CQE, CQN 0x%x\n",
1095 ++cq
->mcq
.cons_index
;
1099 int mlx5_ib_resize_cq(struct ib_cq
*ibcq
, int entries
, struct ib_udata
*udata
)
1101 struct mlx5_ib_dev
*dev
= to_mdev(ibcq
->device
);
1102 struct mlx5_ib_cq
*cq
= to_mcq(ibcq
);
1103 struct mlx5_modify_cq_mbox_in
*in
;
1108 int uninitialized_var(cqe_size
);
1109 unsigned long flags
;
1111 if (!MLX5_CAP_GEN(dev
->mdev
, cq_resize
)) {
1112 pr_info("Firmware does not support resize CQ\n");
1119 entries
= roundup_pow_of_two(entries
+ 1);
1120 if (entries
> (1 << MLX5_CAP_GEN(dev
->mdev
, log_max_cq_sz
)) + 1)
1123 if (entries
== ibcq
->cqe
+ 1)
1126 mutex_lock(&cq
->resize_mutex
);
1128 err
= resize_user(dev
, cq
, entries
, udata
, &npas
, &page_shift
,
1132 err
= resize_kernel(dev
, cq
, entries
, cqe_size
);
1134 npas
= cq
->resize_buf
->buf
.npages
;
1135 page_shift
= cq
->resize_buf
->buf
.page_shift
;
1142 inlen
= sizeof(*in
) + npas
* sizeof(in
->pas
[0]);
1143 in
= mlx5_vzalloc(inlen
);
1150 mlx5_ib_populate_pas(dev
, cq
->resize_umem
, page_shift
,
1153 mlx5_fill_page_array(&cq
->resize_buf
->buf
, in
->pas
);
1155 in
->field_select
= cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE
|
1156 MLX5_MODIFY_CQ_MASK_PG_OFFSET
|
1157 MLX5_MODIFY_CQ_MASK_PG_SIZE
);
1158 in
->ctx
.log_pg_sz
= page_shift
- MLX5_ADAPTER_PAGE_SHIFT
;
1159 in
->ctx
.cqe_sz_flags
= cqe_sz_to_mlx_sz(cqe_size
) << 5;
1160 in
->ctx
.page_offset
= 0;
1161 in
->ctx
.log_sz_usr_page
= cpu_to_be32(ilog2(entries
) << 24);
1162 in
->hdr
.opmod
= cpu_to_be16(MLX5_CQ_OPMOD_RESIZE
);
1163 in
->cqn
= cpu_to_be32(cq
->mcq
.cqn
);
1165 err
= mlx5_core_modify_cq(dev
->mdev
, &cq
->mcq
, in
, inlen
);
1170 cq
->ibcq
.cqe
= entries
- 1;
1171 ib_umem_release(cq
->buf
.umem
);
1172 cq
->buf
.umem
= cq
->resize_umem
;
1173 cq
->resize_umem
= NULL
;
1175 struct mlx5_ib_cq_buf tbuf
;
1178 spin_lock_irqsave(&cq
->lock
, flags
);
1179 if (cq
->resize_buf
) {
1180 err
= copy_resize_cqes(cq
);
1183 cq
->buf
= *cq
->resize_buf
;
1184 kfree(cq
->resize_buf
);
1185 cq
->resize_buf
= NULL
;
1189 cq
->ibcq
.cqe
= entries
- 1;
1190 spin_unlock_irqrestore(&cq
->lock
, flags
);
1192 free_cq_buf(dev
, &tbuf
);
1194 mutex_unlock(&cq
->resize_mutex
);
1206 un_resize_kernel(dev
, cq
);
1208 mutex_unlock(&cq
->resize_mutex
);
1212 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev
*dev
, struct ib_cq
*ibcq
)
1214 struct mlx5_ib_cq
*cq
;
1220 return cq
->cqe_size
;