2 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
3 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <linux/dma-mapping.h>
35 #include <net/addrconf.h>
38 #include "rxe_queue.h"
39 #include "rxe_hw_counters.h"
41 static int rxe_query_device(struct ib_device
*dev
,
42 struct ib_device_attr
*attr
,
45 struct rxe_dev
*rxe
= to_rdev(dev
);
47 if (uhw
->inlen
|| uhw
->outlen
)
54 static void rxe_eth_speed_to_ib_speed(int speed
, u8
*active_speed
,
58 *active_width
= IB_WIDTH_1X
;
59 *active_speed
= IB_SPEED_SDR
;
60 } else if (speed
<= 10000) {
61 *active_width
= IB_WIDTH_1X
;
62 *active_speed
= IB_SPEED_FDR10
;
63 } else if (speed
<= 20000) {
64 *active_width
= IB_WIDTH_4X
;
65 *active_speed
= IB_SPEED_DDR
;
66 } else if (speed
<= 30000) {
67 *active_width
= IB_WIDTH_4X
;
68 *active_speed
= IB_SPEED_QDR
;
69 } else if (speed
<= 40000) {
70 *active_width
= IB_WIDTH_4X
;
71 *active_speed
= IB_SPEED_FDR10
;
73 *active_width
= IB_WIDTH_4X
;
74 *active_speed
= IB_SPEED_EDR
;
78 static int rxe_query_port(struct ib_device
*dev
,
79 u8 port_num
, struct ib_port_attr
*attr
)
81 struct rxe_dev
*rxe
= to_rdev(dev
);
82 struct rxe_port
*port
;
85 if (unlikely(port_num
!= 1)) {
86 pr_warn("invalid port_number %d\n", port_num
);
92 /* *attr being zeroed by the caller, avoid zeroing it here */
95 mutex_lock(&rxe
->usdev_lock
);
96 if (rxe
->ndev
->ethtool_ops
->get_link_ksettings
) {
97 struct ethtool_link_ksettings ks
;
99 rxe
->ndev
->ethtool_ops
->get_link_ksettings(rxe
->ndev
, &ks
);
100 speed
= ks
.base
.speed
;
101 } else if (rxe
->ndev
->ethtool_ops
->get_settings
) {
102 struct ethtool_cmd cmd
;
104 rxe
->ndev
->ethtool_ops
->get_settings(rxe
->ndev
, &cmd
);
107 pr_warn("%s speed is unknown, defaulting to 1000\n",
111 rxe_eth_speed_to_ib_speed(speed
, &attr
->active_speed
,
112 &attr
->active_width
);
113 mutex_unlock(&rxe
->usdev_lock
);
121 static int rxe_query_gid(struct ib_device
*device
,
122 u8 port_num
, int index
, union ib_gid
*gid
)
126 if (index
> RXE_PORT_GID_TBL_LEN
)
129 ret
= ib_get_cached_gid(device
, port_num
, index
, gid
, NULL
);
130 if (ret
== -EAGAIN
) {
131 memcpy(gid
, &zgid
, sizeof(*gid
));
138 static int rxe_add_gid(struct ib_device
*device
, u8 port_num
, unsigned int
139 index
, const union ib_gid
*gid
,
140 const struct ib_gid_attr
*attr
, void **context
)
142 if (index
>= RXE_PORT_GID_TBL_LEN
)
147 static int rxe_del_gid(struct ib_device
*device
, u8 port_num
, unsigned int
148 index
, void **context
)
150 if (index
>= RXE_PORT_GID_TBL_LEN
)
155 static struct net_device
*rxe_get_netdev(struct ib_device
*device
,
158 struct rxe_dev
*rxe
= to_rdev(device
);
168 static int rxe_query_pkey(struct ib_device
*device
,
169 u8 port_num
, u16 index
, u16
*pkey
)
171 struct rxe_dev
*rxe
= to_rdev(device
);
172 struct rxe_port
*port
;
174 if (unlikely(port_num
!= 1)) {
175 dev_warn(device
->dev
.parent
, "invalid port_num = %d\n",
182 if (unlikely(index
>= port
->attr
.pkey_tbl_len
)) {
183 dev_warn(device
->dev
.parent
, "invalid index = %d\n",
188 *pkey
= port
->pkey_tbl
[index
];
195 static int rxe_modify_device(struct ib_device
*dev
,
196 int mask
, struct ib_device_modify
*attr
)
198 struct rxe_dev
*rxe
= to_rdev(dev
);
200 if (mask
& IB_DEVICE_MODIFY_SYS_IMAGE_GUID
)
201 rxe
->attr
.sys_image_guid
= cpu_to_be64(attr
->sys_image_guid
);
203 if (mask
& IB_DEVICE_MODIFY_NODE_DESC
) {
204 memcpy(rxe
->ib_dev
.node_desc
,
205 attr
->node_desc
, sizeof(rxe
->ib_dev
.node_desc
));
211 static int rxe_modify_port(struct ib_device
*dev
,
212 u8 port_num
, int mask
, struct ib_port_modify
*attr
)
214 struct rxe_dev
*rxe
= to_rdev(dev
);
215 struct rxe_port
*port
;
217 if (unlikely(port_num
!= 1)) {
218 pr_warn("invalid port_num = %d\n", port_num
);
224 port
->attr
.port_cap_flags
|= attr
->set_port_cap_mask
;
225 port
->attr
.port_cap_flags
&= ~attr
->clr_port_cap_mask
;
227 if (mask
& IB_PORT_RESET_QKEY_CNTR
)
228 port
->attr
.qkey_viol_cntr
= 0;
236 static enum rdma_link_layer
rxe_get_link_layer(struct ib_device
*dev
,
239 struct rxe_dev
*rxe
= to_rdev(dev
);
241 return rxe_link_layer(rxe
, port_num
);
244 static struct ib_ucontext
*rxe_alloc_ucontext(struct ib_device
*dev
,
245 struct ib_udata
*udata
)
247 struct rxe_dev
*rxe
= to_rdev(dev
);
248 struct rxe_ucontext
*uc
;
250 uc
= rxe_alloc(&rxe
->uc_pool
);
251 return uc
? &uc
->ibuc
: ERR_PTR(-ENOMEM
);
254 static int rxe_dealloc_ucontext(struct ib_ucontext
*ibuc
)
256 struct rxe_ucontext
*uc
= to_ruc(ibuc
);
262 static int rxe_port_immutable(struct ib_device
*dev
, u8 port_num
,
263 struct ib_port_immutable
*immutable
)
266 struct ib_port_attr attr
;
268 immutable
->core_cap_flags
= RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP
;
270 err
= ib_query_port(dev
, port_num
, &attr
);
274 immutable
->pkey_tbl_len
= attr
.pkey_tbl_len
;
275 immutable
->gid_tbl_len
= attr
.gid_tbl_len
;
276 immutable
->max_mad_size
= IB_MGMT_MAD_SIZE
;
281 static struct ib_pd
*rxe_alloc_pd(struct ib_device
*dev
,
282 struct ib_ucontext
*context
,
283 struct ib_udata
*udata
)
285 struct rxe_dev
*rxe
= to_rdev(dev
);
288 pd
= rxe_alloc(&rxe
->pd_pool
);
289 return pd
? &pd
->ibpd
: ERR_PTR(-ENOMEM
);
292 static int rxe_dealloc_pd(struct ib_pd
*ibpd
)
294 struct rxe_pd
*pd
= to_rpd(ibpd
);
300 static int rxe_init_av(struct rxe_dev
*rxe
, struct rdma_ah_attr
*attr
,
305 struct ib_gid_attr sgid_attr
;
307 err
= ib_get_cached_gid(&rxe
->ib_dev
, rdma_ah_get_port_num(attr
),
308 rdma_ah_read_grh(attr
)->sgid_index
, &sgid
,
311 pr_err("Failed to query sgid. err = %d\n", err
);
315 err
= rxe_av_from_attr(rxe
, rdma_ah_get_port_num(attr
), av
, attr
);
317 err
= rxe_av_fill_ip_info(rxe
, av
, attr
, &sgid_attr
, &sgid
);
320 dev_put(sgid_attr
.ndev
);
324 static struct ib_ah
*rxe_create_ah(struct ib_pd
*ibpd
,
325 struct rdma_ah_attr
*attr
,
326 struct ib_udata
*udata
)
330 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
331 struct rxe_pd
*pd
= to_rpd(ibpd
);
334 err
= rxe_av_chk_attr(rxe
, attr
);
338 ah
= rxe_alloc(&rxe
->ah_pool
);
347 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
360 static int rxe_modify_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
363 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
364 struct rxe_ah
*ah
= to_rah(ibah
);
366 err
= rxe_av_chk_attr(rxe
, attr
);
370 err
= rxe_init_av(rxe
, attr
, &ah
->av
);
377 static int rxe_query_ah(struct ib_ah
*ibah
, struct rdma_ah_attr
*attr
)
379 struct rxe_dev
*rxe
= to_rdev(ibah
->device
);
380 struct rxe_ah
*ah
= to_rah(ibah
);
382 memset(attr
, 0, sizeof(*attr
));
383 attr
->type
= ibah
->type
;
384 rxe_av_to_attr(rxe
, &ah
->av
, attr
);
388 static int rxe_destroy_ah(struct ib_ah
*ibah
)
390 struct rxe_ah
*ah
= to_rah(ibah
);
392 rxe_drop_ref(ah
->pd
);
397 static int post_one_recv(struct rxe_rq
*rq
, struct ib_recv_wr
*ibwr
)
402 struct rxe_recv_wqe
*recv_wqe
;
403 int num_sge
= ibwr
->num_sge
;
405 if (unlikely(queue_full(rq
->queue
))) {
410 if (unlikely(num_sge
> rq
->max_sge
)) {
416 for (i
= 0; i
< num_sge
; i
++)
417 length
+= ibwr
->sg_list
[i
].length
;
419 recv_wqe
= producer_addr(rq
->queue
);
420 recv_wqe
->wr_id
= ibwr
->wr_id
;
421 recv_wqe
->num_sge
= num_sge
;
423 memcpy(recv_wqe
->dma
.sge
, ibwr
->sg_list
,
424 num_sge
* sizeof(struct ib_sge
));
426 recv_wqe
->dma
.length
= length
;
427 recv_wqe
->dma
.resid
= length
;
428 recv_wqe
->dma
.num_sge
= num_sge
;
429 recv_wqe
->dma
.cur_sge
= 0;
430 recv_wqe
->dma
.sge_offset
= 0;
432 /* make sure all changes to the work queue are written before we
433 * update the producer pointer
437 advance_producer(rq
->queue
);
444 static struct ib_srq
*rxe_create_srq(struct ib_pd
*ibpd
,
445 struct ib_srq_init_attr
*init
,
446 struct ib_udata
*udata
)
449 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
450 struct rxe_pd
*pd
= to_rpd(ibpd
);
452 struct ib_ucontext
*context
= udata
? ibpd
->uobject
->context
: NULL
;
454 err
= rxe_srq_chk_attr(rxe
, NULL
, &init
->attr
, IB_SRQ_INIT_MASK
);
458 srq
= rxe_alloc(&rxe
->srq_pool
);
468 err
= rxe_srq_from_init(rxe
, srq
, init
, context
, udata
);
482 static int rxe_modify_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
,
483 enum ib_srq_attr_mask mask
,
484 struct ib_udata
*udata
)
487 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
488 struct rxe_dev
*rxe
= to_rdev(ibsrq
->device
);
490 err
= rxe_srq_chk_attr(rxe
, srq
, attr
, mask
);
494 err
= rxe_srq_from_attr(rxe
, srq
, attr
, mask
, udata
);
504 static int rxe_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*attr
)
506 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
511 attr
->max_wr
= srq
->rq
.queue
->buf
->index_mask
;
512 attr
->max_sge
= srq
->rq
.max_sge
;
513 attr
->srq_limit
= srq
->limit
;
517 static int rxe_destroy_srq(struct ib_srq
*ibsrq
)
519 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
522 rxe_queue_cleanup(srq
->rq
.queue
);
524 rxe_drop_ref(srq
->pd
);
531 static int rxe_post_srq_recv(struct ib_srq
*ibsrq
, struct ib_recv_wr
*wr
,
532 struct ib_recv_wr
**bad_wr
)
536 struct rxe_srq
*srq
= to_rsrq(ibsrq
);
538 spin_lock_irqsave(&srq
->rq
.producer_lock
, flags
);
541 err
= post_one_recv(&srq
->rq
, wr
);
547 spin_unlock_irqrestore(&srq
->rq
.producer_lock
, flags
);
555 static struct ib_qp
*rxe_create_qp(struct ib_pd
*ibpd
,
556 struct ib_qp_init_attr
*init
,
557 struct ib_udata
*udata
)
560 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
561 struct rxe_pd
*pd
= to_rpd(ibpd
);
564 err
= rxe_qp_chk_init(rxe
, init
);
568 qp
= rxe_alloc(&rxe
->qp_pool
);
584 err
= rxe_qp_from_init(rxe
, qp
, pd
, init
, udata
, ibpd
);
598 static int rxe_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
599 int mask
, struct ib_udata
*udata
)
602 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
603 struct rxe_qp
*qp
= to_rqp(ibqp
);
605 err
= rxe_qp_chk_attr(rxe
, qp
, attr
, mask
);
609 err
= rxe_qp_from_attr(qp
, attr
, mask
, udata
);
619 static int rxe_query_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
620 int mask
, struct ib_qp_init_attr
*init
)
622 struct rxe_qp
*qp
= to_rqp(ibqp
);
624 rxe_qp_to_init(qp
, init
);
625 rxe_qp_to_attr(qp
, attr
, mask
);
630 static int rxe_destroy_qp(struct ib_qp
*ibqp
)
632 struct rxe_qp
*qp
= to_rqp(ibqp
);
640 static int validate_send_wr(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
641 unsigned int mask
, unsigned int length
)
643 int num_sge
= ibwr
->num_sge
;
644 struct rxe_sq
*sq
= &qp
->sq
;
646 if (unlikely(num_sge
> sq
->max_sge
))
649 if (unlikely(mask
& WR_ATOMIC_MASK
)) {
653 if (atomic_wr(ibwr
)->remote_addr
& 0x7)
657 if (unlikely((ibwr
->send_flags
& IB_SEND_INLINE
) &&
658 (length
> sq
->max_inline
)))
667 static void init_send_wr(struct rxe_qp
*qp
, struct rxe_send_wr
*wr
,
668 struct ib_send_wr
*ibwr
)
670 wr
->wr_id
= ibwr
->wr_id
;
671 wr
->num_sge
= ibwr
->num_sge
;
672 wr
->opcode
= ibwr
->opcode
;
673 wr
->send_flags
= ibwr
->send_flags
;
675 if (qp_type(qp
) == IB_QPT_UD
||
676 qp_type(qp
) == IB_QPT_SMI
||
677 qp_type(qp
) == IB_QPT_GSI
) {
678 wr
->wr
.ud
.remote_qpn
= ud_wr(ibwr
)->remote_qpn
;
679 wr
->wr
.ud
.remote_qkey
= ud_wr(ibwr
)->remote_qkey
;
680 if (qp_type(qp
) == IB_QPT_GSI
)
681 wr
->wr
.ud
.pkey_index
= ud_wr(ibwr
)->pkey_index
;
682 if (wr
->opcode
== IB_WR_SEND_WITH_IMM
)
683 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
685 switch (wr
->opcode
) {
686 case IB_WR_RDMA_WRITE_WITH_IMM
:
687 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
688 case IB_WR_RDMA_READ
:
689 case IB_WR_RDMA_WRITE
:
690 wr
->wr
.rdma
.remote_addr
= rdma_wr(ibwr
)->remote_addr
;
691 wr
->wr
.rdma
.rkey
= rdma_wr(ibwr
)->rkey
;
693 case IB_WR_SEND_WITH_IMM
:
694 wr
->ex
.imm_data
= ibwr
->ex
.imm_data
;
696 case IB_WR_SEND_WITH_INV
:
697 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
699 case IB_WR_ATOMIC_CMP_AND_SWP
:
700 case IB_WR_ATOMIC_FETCH_AND_ADD
:
701 wr
->wr
.atomic
.remote_addr
=
702 atomic_wr(ibwr
)->remote_addr
;
703 wr
->wr
.atomic
.compare_add
=
704 atomic_wr(ibwr
)->compare_add
;
705 wr
->wr
.atomic
.swap
= atomic_wr(ibwr
)->swap
;
706 wr
->wr
.atomic
.rkey
= atomic_wr(ibwr
)->rkey
;
708 case IB_WR_LOCAL_INV
:
709 wr
->ex
.invalidate_rkey
= ibwr
->ex
.invalidate_rkey
;
712 wr
->wr
.reg
.mr
= reg_wr(ibwr
)->mr
;
713 wr
->wr
.reg
.key
= reg_wr(ibwr
)->key
;
714 wr
->wr
.reg
.access
= reg_wr(ibwr
)->access
;
722 static int init_send_wqe(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
723 unsigned int mask
, unsigned int length
,
724 struct rxe_send_wqe
*wqe
)
726 int num_sge
= ibwr
->num_sge
;
731 init_send_wr(qp
, &wqe
->wr
, ibwr
);
733 if (qp_type(qp
) == IB_QPT_UD
||
734 qp_type(qp
) == IB_QPT_SMI
||
735 qp_type(qp
) == IB_QPT_GSI
)
736 memcpy(&wqe
->av
, &to_rah(ud_wr(ibwr
)->ah
)->av
, sizeof(wqe
->av
));
738 if (unlikely(ibwr
->send_flags
& IB_SEND_INLINE
)) {
739 p
= wqe
->dma
.inline_data
;
742 for (i
= 0; i
< num_sge
; i
++, sge
++) {
743 memcpy(p
, (void *)(uintptr_t)sge
->addr
,
748 } else if (mask
& WR_REG_MASK
) {
750 wqe
->state
= wqe_state_posted
;
753 memcpy(wqe
->dma
.sge
, ibwr
->sg_list
,
754 num_sge
* sizeof(struct ib_sge
));
756 wqe
->iova
= (mask
& WR_ATOMIC_MASK
) ?
757 atomic_wr(ibwr
)->remote_addr
:
758 rdma_wr(ibwr
)->remote_addr
;
760 wqe
->dma
.length
= length
;
761 wqe
->dma
.resid
= length
;
762 wqe
->dma
.num_sge
= num_sge
;
763 wqe
->dma
.cur_sge
= 0;
764 wqe
->dma
.sge_offset
= 0;
765 wqe
->state
= wqe_state_posted
;
766 wqe
->ssn
= atomic_add_return(1, &qp
->ssn
);
771 static int post_one_send(struct rxe_qp
*qp
, struct ib_send_wr
*ibwr
,
772 unsigned int mask
, u32 length
)
775 struct rxe_sq
*sq
= &qp
->sq
;
776 struct rxe_send_wqe
*send_wqe
;
779 err
= validate_send_wr(qp
, ibwr
, mask
, length
);
783 spin_lock_irqsave(&qp
->sq
.sq_lock
, flags
);
785 if (unlikely(queue_full(sq
->queue
))) {
790 send_wqe
= producer_addr(sq
->queue
);
792 err
= init_send_wqe(qp
, ibwr
, mask
, length
, send_wqe
);
797 * make sure all changes to the work queue are
798 * written before we update the producer pointer
802 advance_producer(sq
->queue
);
803 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
808 spin_unlock_irqrestore(&qp
->sq
.sq_lock
, flags
);
812 static int rxe_post_send_kernel(struct rxe_qp
*qp
, struct ib_send_wr
*wr
,
813 struct ib_send_wr
**bad_wr
)
817 unsigned int length
= 0;
822 mask
= wr_opcode_mask(wr
->opcode
, qp
);
823 if (unlikely(!mask
)) {
829 if (unlikely((wr
->send_flags
& IB_SEND_INLINE
) &&
830 !(mask
& WR_INLINE_MASK
))) {
837 for (i
= 0; i
< wr
->num_sge
; i
++)
838 length
+= wr
->sg_list
[i
].length
;
840 err
= post_one_send(qp
, wr
, mask
, length
);
850 * Must sched in case of GSI QP because ib_send_mad() hold irq lock,
851 * and the requester call ip_local_out_sk() that takes spin_lock_bh.
853 must_sched
= (qp_type(qp
) == IB_QPT_GSI
) ||
854 (queue_count(qp
->sq
.queue
) > 1);
856 rxe_run_task(&qp
->req
.task
, must_sched
);
861 static int rxe_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
862 struct ib_send_wr
**bad_wr
)
864 struct rxe_qp
*qp
= to_rqp(ibqp
);
866 if (unlikely(!qp
->valid
)) {
871 if (unlikely(qp
->req
.state
< QP_STATE_READY
)) {
877 /* Utilize process context to do protocol processing */
878 rxe_run_task(&qp
->req
.task
, 0);
881 return rxe_post_send_kernel(qp
, wr
, bad_wr
);
884 static int rxe_post_recv(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
885 struct ib_recv_wr
**bad_wr
)
888 struct rxe_qp
*qp
= to_rqp(ibqp
);
889 struct rxe_rq
*rq
= &qp
->rq
;
892 if (unlikely((qp_state(qp
) < IB_QPS_INIT
) || !qp
->valid
)) {
898 if (unlikely(qp
->srq
)) {
904 spin_lock_irqsave(&rq
->producer_lock
, flags
);
907 err
= post_one_recv(rq
, wr
);
915 spin_unlock_irqrestore(&rq
->producer_lock
, flags
);
921 static struct ib_cq
*rxe_create_cq(struct ib_device
*dev
,
922 const struct ib_cq_init_attr
*attr
,
923 struct ib_ucontext
*context
,
924 struct ib_udata
*udata
)
927 struct rxe_dev
*rxe
= to_rdev(dev
);
931 return ERR_PTR(-EINVAL
);
933 err
= rxe_cq_chk_attr(rxe
, NULL
, attr
->cqe
, attr
->comp_vector
, udata
);
937 cq
= rxe_alloc(&rxe
->cq_pool
);
943 err
= rxe_cq_from_init(rxe
, cq
, attr
->cqe
, attr
->comp_vector
,
956 static int rxe_destroy_cq(struct ib_cq
*ibcq
)
958 struct rxe_cq
*cq
= to_rcq(ibcq
);
964 static int rxe_resize_cq(struct ib_cq
*ibcq
, int cqe
, struct ib_udata
*udata
)
967 struct rxe_cq
*cq
= to_rcq(ibcq
);
968 struct rxe_dev
*rxe
= to_rdev(ibcq
->device
);
970 err
= rxe_cq_chk_attr(rxe
, cq
, cqe
, 0, udata
);
974 err
= rxe_cq_resize_queue(cq
, cqe
, udata
);
984 static int rxe_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
987 struct rxe_cq
*cq
= to_rcq(ibcq
);
991 spin_lock_irqsave(&cq
->cq_lock
, flags
);
992 for (i
= 0; i
< num_entries
; i
++) {
993 cqe
= queue_head(cq
->queue
);
997 memcpy(wc
++, &cqe
->ibwc
, sizeof(*wc
));
998 advance_consumer(cq
->queue
);
1000 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
1005 static int rxe_peek_cq(struct ib_cq
*ibcq
, int wc_cnt
)
1007 struct rxe_cq
*cq
= to_rcq(ibcq
);
1008 int count
= queue_count(cq
->queue
);
1010 return (count
> wc_cnt
) ? wc_cnt
: count
;
1013 static int rxe_req_notify_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags flags
)
1015 struct rxe_cq
*cq
= to_rcq(ibcq
);
1016 unsigned long irq_flags
;
1019 spin_lock_irqsave(&cq
->cq_lock
, irq_flags
);
1020 if (cq
->notify
!= IB_CQ_NEXT_COMP
)
1021 cq
->notify
= flags
& IB_CQ_SOLICITED_MASK
;
1023 if ((flags
& IB_CQ_REPORT_MISSED_EVENTS
) && !queue_empty(cq
->queue
))
1026 spin_unlock_irqrestore(&cq
->cq_lock
, irq_flags
);
1031 static struct ib_mr
*rxe_get_dma_mr(struct ib_pd
*ibpd
, int access
)
1033 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1034 struct rxe_pd
*pd
= to_rpd(ibpd
);
1038 mr
= rxe_alloc(&rxe
->mr_pool
);
1048 err
= rxe_mem_init_dma(rxe
, pd
, access
, mr
);
1059 return ERR_PTR(err
);
1062 static struct ib_mr
*rxe_reg_user_mr(struct ib_pd
*ibpd
,
1066 int access
, struct ib_udata
*udata
)
1069 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1070 struct rxe_pd
*pd
= to_rpd(ibpd
);
1073 mr
= rxe_alloc(&rxe
->mr_pool
);
1083 err
= rxe_mem_init_user(rxe
, pd
, start
, length
, iova
,
1095 return ERR_PTR(err
);
1098 static int rxe_dereg_mr(struct ib_mr
*ibmr
)
1100 struct rxe_mem
*mr
= to_rmr(ibmr
);
1102 mr
->state
= RXE_MEM_STATE_ZOMBIE
;
1103 rxe_drop_ref(mr
->pd
);
1109 static struct ib_mr
*rxe_alloc_mr(struct ib_pd
*ibpd
,
1110 enum ib_mr_type mr_type
,
1113 struct rxe_dev
*rxe
= to_rdev(ibpd
->device
);
1114 struct rxe_pd
*pd
= to_rpd(ibpd
);
1118 if (mr_type
!= IB_MR_TYPE_MEM_REG
)
1119 return ERR_PTR(-EINVAL
);
1121 mr
= rxe_alloc(&rxe
->mr_pool
);
1131 err
= rxe_mem_init_fast(rxe
, pd
, max_num_sg
, mr
);
1142 return ERR_PTR(err
);
1145 static int rxe_set_page(struct ib_mr
*ibmr
, u64 addr
)
1147 struct rxe_mem
*mr
= to_rmr(ibmr
);
1148 struct rxe_map
*map
;
1149 struct rxe_phys_buf
*buf
;
1151 if (unlikely(mr
->nbuf
== mr
->num_buf
))
1154 map
= mr
->map
[mr
->nbuf
/ RXE_BUF_PER_MAP
];
1155 buf
= &map
->buf
[mr
->nbuf
% RXE_BUF_PER_MAP
];
1158 buf
->size
= ibmr
->page_size
;
1164 static int rxe_map_mr_sg(struct ib_mr
*ibmr
, struct scatterlist
*sg
,
1165 int sg_nents
, unsigned int *sg_offset
)
1167 struct rxe_mem
*mr
= to_rmr(ibmr
);
1172 n
= ib_sg_to_pages(ibmr
, sg
, sg_nents
, sg_offset
, rxe_set_page
);
1174 mr
->va
= ibmr
->iova
;
1175 mr
->iova
= ibmr
->iova
;
1176 mr
->length
= ibmr
->length
;
1177 mr
->page_shift
= ilog2(ibmr
->page_size
);
1178 mr
->page_mask
= ibmr
->page_size
- 1;
1179 mr
->offset
= mr
->iova
& mr
->page_mask
;
1184 static int rxe_attach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1187 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1188 struct rxe_qp
*qp
= to_rqp(ibqp
);
1189 struct rxe_mc_grp
*grp
;
1191 /* takes a ref on grp if successful */
1192 err
= rxe_mcast_get_grp(rxe
, mgid
, &grp
);
1196 err
= rxe_mcast_add_grp_elem(rxe
, qp
, grp
);
1202 static int rxe_detach_mcast(struct ib_qp
*ibqp
, union ib_gid
*mgid
, u16 mlid
)
1204 struct rxe_dev
*rxe
= to_rdev(ibqp
->device
);
1205 struct rxe_qp
*qp
= to_rqp(ibqp
);
1207 return rxe_mcast_drop_grp_elem(rxe
, qp
, mgid
);
1210 static ssize_t
rxe_show_parent(struct device
*device
,
1211 struct device_attribute
*attr
, char *buf
)
1213 struct rxe_dev
*rxe
= container_of(device
, struct rxe_dev
,
1216 return snprintf(buf
, 16, "%s\n", rxe_parent_name(rxe
, 1));
1219 static DEVICE_ATTR(parent
, S_IRUGO
, rxe_show_parent
, NULL
);
1221 static struct device_attribute
*rxe_dev_attributes
[] = {
1225 int rxe_register_device(struct rxe_dev
*rxe
)
1229 struct ib_device
*dev
= &rxe
->ib_dev
;
1231 strlcpy(dev
->name
, "rxe%d", IB_DEVICE_NAME_MAX
);
1232 strlcpy(dev
->node_desc
, "rxe", sizeof(dev
->node_desc
));
1234 dev
->owner
= THIS_MODULE
;
1235 dev
->node_type
= RDMA_NODE_IB_CA
;
1236 dev
->phys_port_cnt
= 1;
1237 dev
->num_comp_vectors
= num_possible_cpus();
1238 dev
->dev
.parent
= rxe_dma_device(rxe
);
1239 dev
->local_dma_lkey
= 0;
1240 addrconf_addr_eui48((unsigned char *)&dev
->node_guid
,
1241 rxe
->ndev
->dev_addr
);
1242 dev
->dev
.dma_ops
= &dma_virt_ops
;
1244 dev
->uverbs_abi_ver
= RXE_UVERBS_ABI_VERSION
;
1245 dev
->uverbs_cmd_mask
= BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT
)
1246 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL
)
1247 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE
)
1248 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT
)
1249 | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD
)
1250 | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD
)
1251 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ
)
1252 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ
)
1253 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ
)
1254 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ
)
1255 | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV
)
1256 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP
)
1257 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP
)
1258 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP
)
1259 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP
)
1260 | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND
)
1261 | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV
)
1262 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ
)
1263 | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ
)
1264 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ
)
1265 | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ
)
1266 | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ
)
1267 | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ
)
1268 | BIT_ULL(IB_USER_VERBS_CMD_REG_MR
)
1269 | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR
)
1270 | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH
)
1271 | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH
)
1272 | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH
)
1273 | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH
)
1274 | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST
)
1275 | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST
)
1278 dev
->query_device
= rxe_query_device
;
1279 dev
->modify_device
= rxe_modify_device
;
1280 dev
->query_port
= rxe_query_port
;
1281 dev
->modify_port
= rxe_modify_port
;
1282 dev
->get_link_layer
= rxe_get_link_layer
;
1283 dev
->query_gid
= rxe_query_gid
;
1284 dev
->get_netdev
= rxe_get_netdev
;
1285 dev
->add_gid
= rxe_add_gid
;
1286 dev
->del_gid
= rxe_del_gid
;
1287 dev
->query_pkey
= rxe_query_pkey
;
1288 dev
->alloc_ucontext
= rxe_alloc_ucontext
;
1289 dev
->dealloc_ucontext
= rxe_dealloc_ucontext
;
1290 dev
->mmap
= rxe_mmap
;
1291 dev
->get_port_immutable
= rxe_port_immutable
;
1292 dev
->alloc_pd
= rxe_alloc_pd
;
1293 dev
->dealloc_pd
= rxe_dealloc_pd
;
1294 dev
->create_ah
= rxe_create_ah
;
1295 dev
->modify_ah
= rxe_modify_ah
;
1296 dev
->query_ah
= rxe_query_ah
;
1297 dev
->destroy_ah
= rxe_destroy_ah
;
1298 dev
->create_srq
= rxe_create_srq
;
1299 dev
->modify_srq
= rxe_modify_srq
;
1300 dev
->query_srq
= rxe_query_srq
;
1301 dev
->destroy_srq
= rxe_destroy_srq
;
1302 dev
->post_srq_recv
= rxe_post_srq_recv
;
1303 dev
->create_qp
= rxe_create_qp
;
1304 dev
->modify_qp
= rxe_modify_qp
;
1305 dev
->query_qp
= rxe_query_qp
;
1306 dev
->destroy_qp
= rxe_destroy_qp
;
1307 dev
->post_send
= rxe_post_send
;
1308 dev
->post_recv
= rxe_post_recv
;
1309 dev
->create_cq
= rxe_create_cq
;
1310 dev
->destroy_cq
= rxe_destroy_cq
;
1311 dev
->resize_cq
= rxe_resize_cq
;
1312 dev
->poll_cq
= rxe_poll_cq
;
1313 dev
->peek_cq
= rxe_peek_cq
;
1314 dev
->req_notify_cq
= rxe_req_notify_cq
;
1315 dev
->get_dma_mr
= rxe_get_dma_mr
;
1316 dev
->reg_user_mr
= rxe_reg_user_mr
;
1317 dev
->dereg_mr
= rxe_dereg_mr
;
1318 dev
->alloc_mr
= rxe_alloc_mr
;
1319 dev
->map_mr_sg
= rxe_map_mr_sg
;
1320 dev
->attach_mcast
= rxe_attach_mcast
;
1321 dev
->detach_mcast
= rxe_detach_mcast
;
1322 dev
->get_hw_stats
= rxe_ib_get_hw_stats
;
1323 dev
->alloc_hw_stats
= rxe_ib_alloc_hw_stats
;
1325 rxe
->tfm
= crypto_alloc_shash("crc32", 0, 0);
1326 if (IS_ERR(rxe
->tfm
)) {
1327 pr_err("failed to allocate crc algorithm err:%ld\n",
1329 return PTR_ERR(rxe
->tfm
);
1332 err
= ib_register_device(dev
, NULL
);
1334 pr_warn("rxe_register_device failed, err = %d\n", err
);
1338 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
) {
1339 err
= device_create_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1341 pr_warn("device_create_file failed, i = %d, err = %d\n",
1350 ib_unregister_device(dev
);
1352 crypto_free_shash(rxe
->tfm
);
1357 int rxe_unregister_device(struct rxe_dev
*rxe
)
1360 struct ib_device
*dev
= &rxe
->ib_dev
;
1362 for (i
= 0; i
< ARRAY_SIZE(rxe_dev_attributes
); ++i
)
1363 device_remove_file(&dev
->dev
, rxe_dev_attributes
[i
]);
1365 ib_unregister_device(dev
);