1 /*******************************************************************
2 * This file is part of the Emulex RoCE Device Driver for *
3 * RoCE (RDMA over Converged Ethernet) adapters. *
4 * Copyright (C) 2008-2012 Emulex. All rights reserved. *
5 * EMULEX and SLI are trademarks of Emulex. *
8 * This program is free software; you can redistribute it and/or *
9 * modify it under the terms of version 2 of the GNU General *
10 * Public License as published by the Free Software Foundation. *
11 * This program is distributed in the hope that it will be useful. *
12 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
13 * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
14 * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
15 * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
16 * TO BE LEGALLY INVALID. See the GNU General Public License for *
17 * more details, a copy of which can be found in the file COPYING *
18 * included with this package. *
20 * Contact Information:
21 * linux-drivers@emulex.com
25 * Costa Mesa, CA 92626
26 *******************************************************************/
28 #include <linux/dma-mapping.h>
29 #include <rdma/ib_verbs.h>
30 #include <rdma/ib_user_verbs.h>
31 #include <rdma/iw_cm.h>
32 #include <rdma/ib_umem.h>
33 #include <rdma/ib_addr.h>
36 #include "ocrdma_hw.h"
37 #include "ocrdma_verbs.h"
38 #include "ocrdma_abi.h"
40 int ocrdma_query_pkey(struct ib_device
*ibdev
, u8 port
, u16 index
, u16
*pkey
)
49 int ocrdma_query_gid(struct ib_device
*ibdev
, u8 port
,
50 int index
, union ib_gid
*sgid
)
52 struct ocrdma_dev
*dev
;
54 dev
= get_ocrdma_dev(ibdev
);
55 memset(sgid
, 0, sizeof(*sgid
));
56 if (index
>= OCRDMA_MAX_SGID
)
59 memcpy(sgid
, &dev
->sgid_tbl
[index
], sizeof(*sgid
));
64 int ocrdma_query_device(struct ib_device
*ibdev
, struct ib_device_attr
*attr
)
66 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
68 memset(attr
, 0, sizeof *attr
);
69 memcpy(&attr
->fw_ver
, &dev
->attr
.fw_ver
[0],
70 min(sizeof(dev
->attr
.fw_ver
), sizeof(attr
->fw_ver
)));
71 ocrdma_get_guid(dev
, (u8
*)&attr
->sys_image_guid
);
72 attr
->max_mr_size
= ~0ull;
73 attr
->page_size_cap
= 0xffff000;
74 attr
->vendor_id
= dev
->nic_info
.pdev
->vendor
;
75 attr
->vendor_part_id
= dev
->nic_info
.pdev
->device
;
77 attr
->max_qp
= dev
->attr
.max_qp
;
78 attr
->max_ah
= OCRDMA_MAX_AH
;
79 attr
->max_qp_wr
= dev
->attr
.max_wqe
;
81 attr
->device_cap_flags
= IB_DEVICE_CURR_QP_STATE_MOD
|
82 IB_DEVICE_RC_RNR_NAK_GEN
|
83 IB_DEVICE_SHUTDOWN_PORT
|
84 IB_DEVICE_SYS_IMAGE_GUID
|
85 IB_DEVICE_LOCAL_DMA_LKEY
|
86 IB_DEVICE_MEM_MGT_EXTENSIONS
;
87 attr
->max_sge
= min(dev
->attr
.max_send_sge
, dev
->attr
.max_srq_sge
);
89 attr
->max_cq
= dev
->attr
.max_cq
;
90 attr
->max_cqe
= dev
->attr
.max_cqe
;
91 attr
->max_mr
= dev
->attr
.max_mr
;
93 attr
->max_pd
= dev
->attr
.max_pd
;
96 attr
->max_map_per_fmr
= 0;
97 attr
->max_qp_rd_atom
=
98 min(dev
->attr
.max_ord_per_qp
, dev
->attr
.max_ird_per_qp
);
99 attr
->max_qp_init_rd_atom
= dev
->attr
.max_ord_per_qp
;
100 attr
->max_srq
= dev
->attr
.max_srq
;
101 attr
->max_srq_sge
= dev
->attr
.max_srq_sge
;
102 attr
->max_srq_wr
= dev
->attr
.max_rqe
;
103 attr
->local_ca_ack_delay
= dev
->attr
.local_ca_ack_delay
;
104 attr
->max_fast_reg_page_list_len
= 0;
109 static inline void get_link_speed_and_width(struct ocrdma_dev
*dev
,
110 u8
*ib_speed
, u8
*ib_width
)
115 status
= ocrdma_mbx_get_link_speed(dev
, &speed
);
117 speed
= OCRDMA_PHYS_LINK_SPEED_ZERO
;
120 case OCRDMA_PHYS_LINK_SPEED_1GBPS
:
121 *ib_speed
= IB_SPEED_SDR
;
122 *ib_width
= IB_WIDTH_1X
;
125 case OCRDMA_PHYS_LINK_SPEED_10GBPS
:
126 *ib_speed
= IB_SPEED_QDR
;
127 *ib_width
= IB_WIDTH_1X
;
130 case OCRDMA_PHYS_LINK_SPEED_20GBPS
:
131 *ib_speed
= IB_SPEED_DDR
;
132 *ib_width
= IB_WIDTH_4X
;
135 case OCRDMA_PHYS_LINK_SPEED_40GBPS
:
136 *ib_speed
= IB_SPEED_QDR
;
137 *ib_width
= IB_WIDTH_4X
;
142 *ib_speed
= IB_SPEED_SDR
;
143 *ib_width
= IB_WIDTH_1X
;
148 int ocrdma_query_port(struct ib_device
*ibdev
,
149 u8 port
, struct ib_port_attr
*props
)
151 enum ib_port_state port_state
;
152 struct ocrdma_dev
*dev
;
153 struct net_device
*netdev
;
155 dev
= get_ocrdma_dev(ibdev
);
157 pr_err("%s(%d) invalid_port=0x%x\n", __func__
,
161 netdev
= dev
->nic_info
.netdev
;
162 if (netif_running(netdev
) && netif_oper_up(netdev
)) {
163 port_state
= IB_PORT_ACTIVE
;
164 props
->phys_state
= 5;
166 port_state
= IB_PORT_DOWN
;
167 props
->phys_state
= 3;
169 props
->max_mtu
= IB_MTU_4096
;
170 props
->active_mtu
= iboe_get_mtu(netdev
->mtu
);
175 props
->state
= port_state
;
176 props
->port_cap_flags
=
179 IB_PORT_DEVICE_MGMT_SUP
| IB_PORT_VENDOR_CLASS_SUP
;
180 props
->gid_tbl_len
= OCRDMA_MAX_SGID
;
181 props
->pkey_tbl_len
= 1;
182 props
->bad_pkey_cntr
= 0;
183 props
->qkey_viol_cntr
= 0;
184 get_link_speed_and_width(dev
, &props
->active_speed
,
185 &props
->active_width
);
186 props
->max_msg_sz
= 0x80000000;
187 props
->max_vl_num
= 4;
191 int ocrdma_modify_port(struct ib_device
*ibdev
, u8 port
, int mask
,
192 struct ib_port_modify
*props
)
194 struct ocrdma_dev
*dev
;
196 dev
= get_ocrdma_dev(ibdev
);
198 pr_err("%s(%d) invalid_port=0x%x\n", __func__
, dev
->id
, port
);
204 static int ocrdma_add_mmap(struct ocrdma_ucontext
*uctx
, u64 phy_addr
,
207 struct ocrdma_mm
*mm
;
209 mm
= kzalloc(sizeof(*mm
), GFP_KERNEL
);
212 mm
->key
.phy_addr
= phy_addr
;
214 INIT_LIST_HEAD(&mm
->entry
);
216 mutex_lock(&uctx
->mm_list_lock
);
217 list_add_tail(&mm
->entry
, &uctx
->mm_head
);
218 mutex_unlock(&uctx
->mm_list_lock
);
222 static void ocrdma_del_mmap(struct ocrdma_ucontext
*uctx
, u64 phy_addr
,
225 struct ocrdma_mm
*mm
, *tmp
;
227 mutex_lock(&uctx
->mm_list_lock
);
228 list_for_each_entry_safe(mm
, tmp
, &uctx
->mm_head
, entry
) {
229 if (len
!= mm
->key
.len
&& phy_addr
!= mm
->key
.phy_addr
)
232 list_del(&mm
->entry
);
236 mutex_unlock(&uctx
->mm_list_lock
);
239 static bool ocrdma_search_mmap(struct ocrdma_ucontext
*uctx
, u64 phy_addr
,
243 struct ocrdma_mm
*mm
;
245 mutex_lock(&uctx
->mm_list_lock
);
246 list_for_each_entry(mm
, &uctx
->mm_head
, entry
) {
247 if (len
!= mm
->key
.len
&& phy_addr
!= mm
->key
.phy_addr
)
253 mutex_unlock(&uctx
->mm_list_lock
);
257 static struct ocrdma_pd
*_ocrdma_alloc_pd(struct ocrdma_dev
*dev
,
258 struct ocrdma_ucontext
*uctx
,
259 struct ib_udata
*udata
)
261 struct ocrdma_pd
*pd
= NULL
;
264 pd
= kzalloc(sizeof(*pd
), GFP_KERNEL
);
266 return ERR_PTR(-ENOMEM
);
270 dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
;
272 pd
->dpp_enabled
? OCRDMA_PD_MAX_DPP_ENABLED_QP
: 0;
276 status
= ocrdma_mbx_alloc_pd(dev
, pd
);
278 if (pd
->dpp_enabled
) {
279 pd
->dpp_enabled
= false;
284 return ERR_PTR(status
);
291 static inline int is_ucontext_pd(struct ocrdma_ucontext
*uctx
,
292 struct ocrdma_pd
*pd
)
294 return (uctx
->cntxt_pd
== pd
? true : false);
297 static int _ocrdma_dealloc_pd(struct ocrdma_dev
*dev
,
298 struct ocrdma_pd
*pd
)
302 status
= ocrdma_mbx_dealloc_pd(dev
, pd
);
307 static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev
*dev
,
308 struct ocrdma_ucontext
*uctx
,
309 struct ib_udata
*udata
)
313 uctx
->cntxt_pd
= _ocrdma_alloc_pd(dev
, uctx
, udata
);
314 if (IS_ERR(uctx
->cntxt_pd
)) {
315 status
= PTR_ERR(uctx
->cntxt_pd
);
316 uctx
->cntxt_pd
= NULL
;
320 uctx
->cntxt_pd
->uctx
= uctx
;
321 uctx
->cntxt_pd
->ibpd
.device
= &dev
->ibdev
;
326 static int ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext
*uctx
)
329 struct ocrdma_pd
*pd
= uctx
->cntxt_pd
;
330 struct ocrdma_dev
*dev
= get_ocrdma_dev(pd
->ibpd
.device
);
332 BUG_ON(uctx
->pd_in_use
);
333 uctx
->cntxt_pd
= NULL
;
334 status
= _ocrdma_dealloc_pd(dev
, pd
);
338 static struct ocrdma_pd
*ocrdma_get_ucontext_pd(struct ocrdma_ucontext
*uctx
)
340 struct ocrdma_pd
*pd
= NULL
;
342 mutex_lock(&uctx
->mm_list_lock
);
343 if (!uctx
->pd_in_use
) {
344 uctx
->pd_in_use
= true;
347 mutex_unlock(&uctx
->mm_list_lock
);
352 static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext
*uctx
)
354 mutex_lock(&uctx
->mm_list_lock
);
355 uctx
->pd_in_use
= false;
356 mutex_unlock(&uctx
->mm_list_lock
);
359 struct ib_ucontext
*ocrdma_alloc_ucontext(struct ib_device
*ibdev
,
360 struct ib_udata
*udata
)
363 struct ocrdma_ucontext
*ctx
;
364 struct ocrdma_alloc_ucontext_resp resp
;
365 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
366 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
367 u32 map_len
= roundup(sizeof(u32
) * 2048, PAGE_SIZE
);
370 return ERR_PTR(-EFAULT
);
371 ctx
= kzalloc(sizeof(*ctx
), GFP_KERNEL
);
373 return ERR_PTR(-ENOMEM
);
374 INIT_LIST_HEAD(&ctx
->mm_head
);
375 mutex_init(&ctx
->mm_list_lock
);
377 ctx
->ah_tbl
.va
= dma_alloc_coherent(&pdev
->dev
, map_len
,
378 &ctx
->ah_tbl
.pa
, GFP_KERNEL
);
379 if (!ctx
->ah_tbl
.va
) {
381 return ERR_PTR(-ENOMEM
);
383 memset(ctx
->ah_tbl
.va
, 0, map_len
);
384 ctx
->ah_tbl
.len
= map_len
;
386 memset(&resp
, 0, sizeof(resp
));
387 resp
.ah_tbl_len
= ctx
->ah_tbl
.len
;
388 resp
.ah_tbl_page
= ctx
->ah_tbl
.pa
;
390 status
= ocrdma_add_mmap(ctx
, resp
.ah_tbl_page
, resp
.ah_tbl_len
);
394 status
= ocrdma_alloc_ucontext_pd(dev
, ctx
, udata
);
398 resp
.dev_id
= dev
->id
;
399 resp
.max_inline_data
= dev
->attr
.max_inline_data
;
400 resp
.wqe_size
= dev
->attr
.wqe_size
;
401 resp
.rqe_size
= dev
->attr
.rqe_size
;
402 resp
.dpp_wqe_size
= dev
->attr
.wqe_size
;
404 memcpy(resp
.fw_ver
, dev
->attr
.fw_ver
, sizeof(resp
.fw_ver
));
405 status
= ib_copy_to_udata(udata
, &resp
, sizeof(resp
));
408 return &ctx
->ibucontext
;
412 ocrdma_del_mmap(ctx
, ctx
->ah_tbl
.pa
, ctx
->ah_tbl
.len
);
414 dma_free_coherent(&pdev
->dev
, ctx
->ah_tbl
.len
, ctx
->ah_tbl
.va
,
417 return ERR_PTR(status
);
420 int ocrdma_dealloc_ucontext(struct ib_ucontext
*ibctx
)
423 struct ocrdma_mm
*mm
, *tmp
;
424 struct ocrdma_ucontext
*uctx
= get_ocrdma_ucontext(ibctx
);
425 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibctx
->device
);
426 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
428 status
= ocrdma_dealloc_ucontext_pd(uctx
);
430 ocrdma_del_mmap(uctx
, uctx
->ah_tbl
.pa
, uctx
->ah_tbl
.len
);
431 dma_free_coherent(&pdev
->dev
, uctx
->ah_tbl
.len
, uctx
->ah_tbl
.va
,
434 list_for_each_entry_safe(mm
, tmp
, &uctx
->mm_head
, entry
) {
435 list_del(&mm
->entry
);
442 int ocrdma_mmap(struct ib_ucontext
*context
, struct vm_area_struct
*vma
)
444 struct ocrdma_ucontext
*ucontext
= get_ocrdma_ucontext(context
);
445 struct ocrdma_dev
*dev
= get_ocrdma_dev(context
->device
);
446 unsigned long vm_page
= vma
->vm_pgoff
<< PAGE_SHIFT
;
447 u64 unmapped_db
= (u64
) dev
->nic_info
.unmapped_db
;
448 unsigned long len
= (vma
->vm_end
- vma
->vm_start
);
452 if (vma
->vm_start
& (PAGE_SIZE
- 1))
454 found
= ocrdma_search_mmap(ucontext
, vma
->vm_pgoff
<< PAGE_SHIFT
, len
);
458 if ((vm_page
>= unmapped_db
) && (vm_page
<= (unmapped_db
+
459 dev
->nic_info
.db_total_size
)) &&
460 (len
<= dev
->nic_info
.db_page_size
)) {
461 if (vma
->vm_flags
& VM_READ
)
464 vma
->vm_page_prot
= pgprot_noncached(vma
->vm_page_prot
);
465 status
= io_remap_pfn_range(vma
, vma
->vm_start
, vma
->vm_pgoff
,
466 len
, vma
->vm_page_prot
);
467 } else if (dev
->nic_info
.dpp_unmapped_len
&&
468 (vm_page
>= (u64
) dev
->nic_info
.dpp_unmapped_addr
) &&
469 (vm_page
<= (u64
) (dev
->nic_info
.dpp_unmapped_addr
+
470 dev
->nic_info
.dpp_unmapped_len
)) &&
471 (len
<= dev
->nic_info
.dpp_unmapped_len
)) {
472 if (vma
->vm_flags
& VM_READ
)
475 vma
->vm_page_prot
= pgprot_writecombine(vma
->vm_page_prot
);
476 status
= io_remap_pfn_range(vma
, vma
->vm_start
, vma
->vm_pgoff
,
477 len
, vma
->vm_page_prot
);
479 status
= remap_pfn_range(vma
, vma
->vm_start
,
480 vma
->vm_pgoff
, len
, vma
->vm_page_prot
);
485 static int ocrdma_copy_pd_uresp(struct ocrdma_dev
*dev
, struct ocrdma_pd
*pd
,
486 struct ib_ucontext
*ib_ctx
,
487 struct ib_udata
*udata
)
491 u64 dpp_page_addr
= 0;
493 struct ocrdma_alloc_pd_uresp rsp
;
494 struct ocrdma_ucontext
*uctx
= get_ocrdma_ucontext(ib_ctx
);
496 memset(&rsp
, 0, sizeof(rsp
));
498 rsp
.dpp_enabled
= pd
->dpp_enabled
;
499 db_page_addr
= ocrdma_get_db_addr(dev
, pd
->id
);
500 db_page_size
= dev
->nic_info
.db_page_size
;
502 status
= ocrdma_add_mmap(uctx
, db_page_addr
, db_page_size
);
506 if (pd
->dpp_enabled
) {
507 dpp_page_addr
= dev
->nic_info
.dpp_unmapped_addr
+
508 (pd
->id
* PAGE_SIZE
);
509 status
= ocrdma_add_mmap(uctx
, dpp_page_addr
,
513 rsp
.dpp_page_addr_hi
= upper_32_bits(dpp_page_addr
);
514 rsp
.dpp_page_addr_lo
= dpp_page_addr
;
517 status
= ib_copy_to_udata(udata
, &rsp
, sizeof(rsp
));
526 ocrdma_del_mmap(pd
->uctx
, dpp_page_addr
, PAGE_SIZE
);
528 ocrdma_del_mmap(pd
->uctx
, db_page_addr
, db_page_size
);
532 struct ib_pd
*ocrdma_alloc_pd(struct ib_device
*ibdev
,
533 struct ib_ucontext
*context
,
534 struct ib_udata
*udata
)
536 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
537 struct ocrdma_pd
*pd
;
538 struct ocrdma_ucontext
*uctx
= NULL
;
540 u8 is_uctx_pd
= false;
542 if (udata
&& context
) {
543 uctx
= get_ocrdma_ucontext(context
);
544 pd
= ocrdma_get_ucontext_pd(uctx
);
551 pd
= _ocrdma_alloc_pd(dev
, uctx
, udata
);
553 status
= PTR_ERR(pd
);
558 if (udata
&& context
) {
559 status
= ocrdma_copy_pd_uresp(dev
, pd
, context
, udata
);
567 ocrdma_release_ucontext_pd(uctx
);
569 status
= ocrdma_mbx_dealloc_pd(dev
, pd
);
573 return ERR_PTR(status
);
576 int ocrdma_dealloc_pd(struct ib_pd
*ibpd
)
578 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
579 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
580 struct ocrdma_ucontext
*uctx
= NULL
;
586 u64 dpp_db
= dev
->nic_info
.dpp_unmapped_addr
+
587 (pd
->id
* PAGE_SIZE
);
589 ocrdma_del_mmap(pd
->uctx
, dpp_db
, PAGE_SIZE
);
590 usr_db
= ocrdma_get_db_addr(dev
, pd
->id
);
591 ocrdma_del_mmap(pd
->uctx
, usr_db
, dev
->nic_info
.db_page_size
);
593 if (is_ucontext_pd(uctx
, pd
)) {
594 ocrdma_release_ucontext_pd(uctx
);
598 status
= _ocrdma_dealloc_pd(dev
, pd
);
602 static int ocrdma_alloc_lkey(struct ocrdma_dev
*dev
, struct ocrdma_mr
*mr
,
603 u32 pdid
, int acc
, u32 num_pbls
, u32 addr_check
)
608 mr
->hwmr
.local_rd
= 1;
609 mr
->hwmr
.remote_rd
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
610 mr
->hwmr
.remote_wr
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
611 mr
->hwmr
.local_wr
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
612 mr
->hwmr
.mw_bind
= (acc
& IB_ACCESS_MW_BIND
) ? 1 : 0;
613 mr
->hwmr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
614 mr
->hwmr
.num_pbls
= num_pbls
;
616 status
= ocrdma_mbx_alloc_lkey(dev
, &mr
->hwmr
, pdid
, addr_check
);
620 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
621 if (mr
->hwmr
.remote_wr
|| mr
->hwmr
.remote_rd
)
622 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
626 struct ib_mr
*ocrdma_get_dma_mr(struct ib_pd
*ibpd
, int acc
)
629 struct ocrdma_mr
*mr
;
630 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
631 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
633 if (acc
& IB_ACCESS_REMOTE_WRITE
&& !(acc
& IB_ACCESS_LOCAL_WRITE
)) {
634 pr_err("%s err, invalid access rights\n", __func__
);
635 return ERR_PTR(-EINVAL
);
638 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
640 return ERR_PTR(-ENOMEM
);
642 status
= ocrdma_alloc_lkey(dev
, mr
, pd
->id
, acc
, 0,
643 OCRDMA_ADDR_CHECK_DISABLE
);
646 return ERR_PTR(status
);
652 static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev
*dev
,
653 struct ocrdma_hw_mr
*mr
)
655 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
659 for (i
= 0; i
< mr
->num_pbls
; i
++) {
660 if (!mr
->pbl_table
[i
].va
)
662 dma_free_coherent(&pdev
->dev
, mr
->pbl_size
,
664 mr
->pbl_table
[i
].pa
);
666 kfree(mr
->pbl_table
);
667 mr
->pbl_table
= NULL
;
671 static int ocrdma_get_pbl_info(struct ocrdma_dev
*dev
, struct ocrdma_mr
*mr
,
680 pbl_size
= OCRDMA_MIN_HPAGE_SIZE
* (1 << idx
);
681 if (pbl_size
> MAX_OCRDMA_PBL_SIZE
) {
685 num_pbls
= roundup(num_pbes
, (pbl_size
/ sizeof(u64
)));
686 num_pbls
= num_pbls
/ (pbl_size
/ sizeof(u64
));
688 } while (num_pbls
>= dev
->attr
.max_num_mr_pbl
);
690 mr
->hwmr
.num_pbes
= num_pbes
;
691 mr
->hwmr
.num_pbls
= num_pbls
;
692 mr
->hwmr
.pbl_size
= pbl_size
;
696 static int ocrdma_build_pbl_tbl(struct ocrdma_dev
*dev
, struct ocrdma_hw_mr
*mr
)
700 u32 dma_len
= mr
->pbl_size
;
701 struct pci_dev
*pdev
= dev
->nic_info
.pdev
;
705 mr
->pbl_table
= kzalloc(sizeof(struct ocrdma_pbl
) *
706 mr
->num_pbls
, GFP_KERNEL
);
711 for (i
= 0; i
< mr
->num_pbls
; i
++) {
712 va
= dma_alloc_coherent(&pdev
->dev
, dma_len
, &pa
, GFP_KERNEL
);
714 ocrdma_free_mr_pbl_tbl(dev
, mr
);
718 memset(va
, 0, dma_len
);
719 mr
->pbl_table
[i
].va
= va
;
720 mr
->pbl_table
[i
].pa
= pa
;
725 static void build_user_pbes(struct ocrdma_dev
*dev
, struct ocrdma_mr
*mr
,
728 struct ocrdma_pbe
*pbe
;
729 struct ib_umem_chunk
*chunk
;
730 struct ocrdma_pbl
*pbl_tbl
= mr
->hwmr
.pbl_table
;
731 struct ib_umem
*umem
= mr
->umem
;
732 int i
, shift
, pg_cnt
, pages
, pbe_cnt
, total_num_pbes
= 0;
734 if (!mr
->hwmr
.num_pbes
)
737 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
740 shift
= ilog2(umem
->page_size
);
742 list_for_each_entry(chunk
, &umem
->chunk_list
, list
) {
743 /* get all the dma regions from the chunk. */
744 for (i
= 0; i
< chunk
->nmap
; i
++) {
745 pages
= sg_dma_len(&chunk
->page_list
[i
]) >> shift
;
746 for (pg_cnt
= 0; pg_cnt
< pages
; pg_cnt
++) {
747 /* store the page address in pbe */
749 cpu_to_le32(sg_dma_address
750 (&chunk
->page_list
[i
]) +
751 (umem
->page_size
* pg_cnt
));
753 cpu_to_le32(upper_32_bits
755 (&chunk
->page_list
[i
]) +
756 umem
->page_size
* pg_cnt
)));
761 /* if done building pbes, issue the mbx cmd. */
762 if (total_num_pbes
== num_pbes
)
765 /* if the given pbl is full storing the pbes,
769 (mr
->hwmr
.pbl_size
/ sizeof(u64
))) {
771 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
779 struct ib_mr
*ocrdma_reg_user_mr(struct ib_pd
*ibpd
, u64 start
, u64 len
,
780 u64 usr_addr
, int acc
, struct ib_udata
*udata
)
782 int status
= -ENOMEM
;
783 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
784 struct ocrdma_mr
*mr
;
785 struct ocrdma_pd
*pd
;
788 pd
= get_ocrdma_pd(ibpd
);
790 if (acc
& IB_ACCESS_REMOTE_WRITE
&& !(acc
& IB_ACCESS_LOCAL_WRITE
))
791 return ERR_PTR(-EINVAL
);
793 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
795 return ERR_PTR(status
);
796 mr
->umem
= ib_umem_get(ibpd
->uobject
->context
, start
, len
, acc
, 0);
797 if (IS_ERR(mr
->umem
)) {
801 num_pbes
= ib_umem_page_count(mr
->umem
);
802 status
= ocrdma_get_pbl_info(dev
, mr
, num_pbes
);
806 mr
->hwmr
.pbe_size
= mr
->umem
->page_size
;
807 mr
->hwmr
.fbo
= mr
->umem
->offset
;
808 mr
->hwmr
.va
= usr_addr
;
810 mr
->hwmr
.remote_wr
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
811 mr
->hwmr
.remote_rd
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
812 mr
->hwmr
.local_wr
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
813 mr
->hwmr
.local_rd
= 1;
814 mr
->hwmr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
815 status
= ocrdma_build_pbl_tbl(dev
, &mr
->hwmr
);
818 build_user_pbes(dev
, mr
, num_pbes
);
819 status
= ocrdma_reg_mr(dev
, &mr
->hwmr
, pd
->id
, acc
);
822 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
823 if (mr
->hwmr
.remote_wr
|| mr
->hwmr
.remote_rd
)
824 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
829 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
832 return ERR_PTR(status
);
835 int ocrdma_dereg_mr(struct ib_mr
*ib_mr
)
837 struct ocrdma_mr
*mr
= get_ocrdma_mr(ib_mr
);
838 struct ocrdma_dev
*dev
= get_ocrdma_dev(ib_mr
->device
);
841 status
= ocrdma_mbx_dealloc_lkey(dev
, mr
->hwmr
.fr_mr
, mr
->hwmr
.lkey
);
843 if (mr
->hwmr
.fr_mr
== 0)
844 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
846 /* it could be user registered memory. */
848 ib_umem_release(mr
->umem
);
853 static int ocrdma_copy_cq_uresp(struct ocrdma_dev
*dev
, struct ocrdma_cq
*cq
,
854 struct ib_udata
*udata
,
855 struct ib_ucontext
*ib_ctx
)
858 struct ocrdma_ucontext
*uctx
= get_ocrdma_ucontext(ib_ctx
);
859 struct ocrdma_create_cq_uresp uresp
;
861 memset(&uresp
, 0, sizeof(uresp
));
862 uresp
.cq_id
= cq
->id
;
863 uresp
.page_size
= PAGE_ALIGN(cq
->len
);
865 uresp
.max_hw_cqe
= cq
->max_hw_cqe
;
866 uresp
.page_addr
[0] = cq
->pa
;
867 uresp
.db_page_addr
= ocrdma_get_db_addr(dev
, uctx
->cntxt_pd
->id
);
868 uresp
.db_page_size
= dev
->nic_info
.db_page_size
;
869 uresp
.phase_change
= cq
->phase_change
? 1 : 0;
870 status
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
872 pr_err("%s(%d) copy error cqid=0x%x.\n",
873 __func__
, dev
->id
, cq
->id
);
876 status
= ocrdma_add_mmap(uctx
, uresp
.db_page_addr
, uresp
.db_page_size
);
879 status
= ocrdma_add_mmap(uctx
, uresp
.page_addr
[0], uresp
.page_size
);
881 ocrdma_del_mmap(uctx
, uresp
.db_page_addr
, uresp
.db_page_size
);
889 struct ib_cq
*ocrdma_create_cq(struct ib_device
*ibdev
, int entries
, int vector
,
890 struct ib_ucontext
*ib_ctx
,
891 struct ib_udata
*udata
)
893 struct ocrdma_cq
*cq
;
894 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibdev
);
895 struct ocrdma_ucontext
*uctx
= NULL
;
898 struct ocrdma_create_cq_ureq ureq
;
901 if (ib_copy_from_udata(&ureq
, udata
, sizeof(ureq
)))
902 return ERR_PTR(-EFAULT
);
905 cq
= kzalloc(sizeof(*cq
), GFP_KERNEL
);
907 return ERR_PTR(-ENOMEM
);
909 spin_lock_init(&cq
->cq_lock
);
910 spin_lock_init(&cq
->comp_handler_lock
);
911 INIT_LIST_HEAD(&cq
->sq_head
);
912 INIT_LIST_HEAD(&cq
->rq_head
);
915 uctx
= get_ocrdma_ucontext(ib_ctx
);
916 pd_id
= uctx
->cntxt_pd
->id
;
919 status
= ocrdma_mbx_create_cq(dev
, cq
, entries
, ureq
.dpp_cq
, pd_id
);
922 return ERR_PTR(status
);
925 status
= ocrdma_copy_cq_uresp(dev
, cq
, udata
, ib_ctx
);
929 cq
->phase
= OCRDMA_CQE_VALID
;
930 cq
->arm_needed
= true;
931 dev
->cq_tbl
[cq
->id
] = cq
;
936 ocrdma_mbx_destroy_cq(dev
, cq
);
938 return ERR_PTR(status
);
941 int ocrdma_resize_cq(struct ib_cq
*ibcq
, int new_cnt
,
942 struct ib_udata
*udata
)
945 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
947 if (new_cnt
< 1 || new_cnt
> cq
->max_hw_cqe
) {
955 int ocrdma_destroy_cq(struct ib_cq
*ibcq
)
958 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
959 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibcq
->device
);
962 status
= ocrdma_mbx_destroy_cq(dev
, cq
);
965 pdid
= cq
->ucontext
->cntxt_pd
->id
;
966 ocrdma_del_mmap(cq
->ucontext
, (u64
) cq
->pa
,
967 PAGE_ALIGN(cq
->len
));
968 ocrdma_del_mmap(cq
->ucontext
,
969 ocrdma_get_db_addr(dev
, pdid
),
970 dev
->nic_info
.db_page_size
);
972 dev
->cq_tbl
[cq
->id
] = NULL
;
978 static int ocrdma_add_qpn_map(struct ocrdma_dev
*dev
, struct ocrdma_qp
*qp
)
980 int status
= -EINVAL
;
982 if (qp
->id
< OCRDMA_MAX_QP
&& dev
->qp_tbl
[qp
->id
] == NULL
) {
983 dev
->qp_tbl
[qp
->id
] = qp
;
989 static void ocrdma_del_qpn_map(struct ocrdma_dev
*dev
, struct ocrdma_qp
*qp
)
991 dev
->qp_tbl
[qp
->id
] = NULL
;
994 static int ocrdma_check_qp_params(struct ib_pd
*ibpd
, struct ocrdma_dev
*dev
,
995 struct ib_qp_init_attr
*attrs
)
997 if ((attrs
->qp_type
!= IB_QPT_GSI
) &&
998 (attrs
->qp_type
!= IB_QPT_RC
) &&
999 (attrs
->qp_type
!= IB_QPT_UC
) &&
1000 (attrs
->qp_type
!= IB_QPT_UD
)) {
1001 pr_err("%s(%d) unsupported qp type=0x%x requested\n",
1002 __func__
, dev
->id
, attrs
->qp_type
);
1005 /* Skip the check for QP1 to support CM size of 128 */
1006 if ((attrs
->qp_type
!= IB_QPT_GSI
) &&
1007 (attrs
->cap
.max_send_wr
> dev
->attr
.max_wqe
)) {
1008 pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
1009 __func__
, dev
->id
, attrs
->cap
.max_send_wr
);
1010 pr_err("%s(%d) supported send_wr=0x%x\n",
1011 __func__
, dev
->id
, dev
->attr
.max_wqe
);
1014 if (!attrs
->srq
&& (attrs
->cap
.max_recv_wr
> dev
->attr
.max_rqe
)) {
1015 pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
1016 __func__
, dev
->id
, attrs
->cap
.max_recv_wr
);
1017 pr_err("%s(%d) supported recv_wr=0x%x\n",
1018 __func__
, dev
->id
, dev
->attr
.max_rqe
);
1021 if (attrs
->cap
.max_inline_data
> dev
->attr
.max_inline_data
) {
1022 pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
1023 __func__
, dev
->id
, attrs
->cap
.max_inline_data
);
1024 pr_err("%s(%d) supported inline data size=0x%x\n",
1025 __func__
, dev
->id
, dev
->attr
.max_inline_data
);
1028 if (attrs
->cap
.max_send_sge
> dev
->attr
.max_send_sge
) {
1029 pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
1030 __func__
, dev
->id
, attrs
->cap
.max_send_sge
);
1031 pr_err("%s(%d) supported send_sge=0x%x\n",
1032 __func__
, dev
->id
, dev
->attr
.max_send_sge
);
1035 if (attrs
->cap
.max_recv_sge
> dev
->attr
.max_recv_sge
) {
1036 pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
1037 __func__
, dev
->id
, attrs
->cap
.max_recv_sge
);
1038 pr_err("%s(%d) supported recv_sge=0x%x\n",
1039 __func__
, dev
->id
, dev
->attr
.max_recv_sge
);
1042 /* unprivileged user space cannot create special QP */
1043 if (ibpd
->uobject
&& attrs
->qp_type
== IB_QPT_GSI
) {
1045 ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
1046 __func__
, dev
->id
, attrs
->qp_type
);
1049 /* allow creating only one GSI type of QP */
1050 if (attrs
->qp_type
== IB_QPT_GSI
&& dev
->gsi_qp_created
) {
1051 pr_err("%s(%d) GSI special QPs already created.\n",
1055 /* verify consumer QPs are not trying to use GSI QP's CQ */
1056 if ((attrs
->qp_type
!= IB_QPT_GSI
) && (dev
->gsi_qp_created
)) {
1057 if ((dev
->gsi_sqcq
== get_ocrdma_cq(attrs
->send_cq
)) ||
1058 (dev
->gsi_rqcq
== get_ocrdma_cq(attrs
->recv_cq
))) {
1059 pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
1067 static int ocrdma_copy_qp_uresp(struct ocrdma_qp
*qp
,
1068 struct ib_udata
*udata
, int dpp_offset
,
1069 int dpp_credit_lmt
, int srq
)
1073 struct ocrdma_create_qp_uresp uresp
;
1074 struct ocrdma_dev
*dev
= qp
->dev
;
1075 struct ocrdma_pd
*pd
= qp
->pd
;
1077 memset(&uresp
, 0, sizeof(uresp
));
1078 usr_db
= dev
->nic_info
.unmapped_db
+
1079 (pd
->id
* dev
->nic_info
.db_page_size
);
1080 uresp
.qp_id
= qp
->id
;
1081 uresp
.sq_dbid
= qp
->sq
.dbid
;
1082 uresp
.num_sq_pages
= 1;
1083 uresp
.sq_page_size
= PAGE_ALIGN(qp
->sq
.len
);
1084 uresp
.sq_page_addr
[0] = qp
->sq
.pa
;
1085 uresp
.num_wqe_allocated
= qp
->sq
.max_cnt
;
1087 uresp
.rq_dbid
= qp
->rq
.dbid
;
1088 uresp
.num_rq_pages
= 1;
1089 uresp
.rq_page_size
= PAGE_ALIGN(qp
->rq
.len
);
1090 uresp
.rq_page_addr
[0] = qp
->rq
.pa
;
1091 uresp
.num_rqe_allocated
= qp
->rq
.max_cnt
;
1093 uresp
.db_page_addr
= usr_db
;
1094 uresp
.db_page_size
= dev
->nic_info
.db_page_size
;
1095 if (dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
) {
1096 uresp
.db_sq_offset
= OCRDMA_DB_GEN2_SQ_OFFSET
;
1097 uresp
.db_rq_offset
= OCRDMA_DB_GEN2_RQ_OFFSET
;
1098 uresp
.db_shift
= 24;
1100 uresp
.db_sq_offset
= OCRDMA_DB_SQ_OFFSET
;
1101 uresp
.db_rq_offset
= OCRDMA_DB_RQ_OFFSET
;
1102 uresp
.db_shift
= 16;
1105 if (qp
->dpp_enabled
) {
1106 uresp
.dpp_credit
= dpp_credit_lmt
;
1107 uresp
.dpp_offset
= dpp_offset
;
1109 status
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
1111 pr_err("%s(%d) user copy error.\n", __func__
, dev
->id
);
1114 status
= ocrdma_add_mmap(pd
->uctx
, uresp
.sq_page_addr
[0],
1115 uresp
.sq_page_size
);
1120 status
= ocrdma_add_mmap(pd
->uctx
, uresp
.rq_page_addr
[0],
1121 uresp
.rq_page_size
);
1127 ocrdma_del_mmap(pd
->uctx
, uresp
.sq_page_addr
[0], uresp
.sq_page_size
);
1132 static void ocrdma_set_qp_db(struct ocrdma_dev
*dev
, struct ocrdma_qp
*qp
,
1133 struct ocrdma_pd
*pd
)
1135 if (dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
) {
1136 qp
->sq_db
= dev
->nic_info
.db
+
1137 (pd
->id
* dev
->nic_info
.db_page_size
) +
1138 OCRDMA_DB_GEN2_SQ_OFFSET
;
1139 qp
->rq_db
= dev
->nic_info
.db
+
1140 (pd
->id
* dev
->nic_info
.db_page_size
) +
1141 OCRDMA_DB_GEN2_RQ_OFFSET
;
1143 qp
->sq_db
= dev
->nic_info
.db
+
1144 (pd
->id
* dev
->nic_info
.db_page_size
) +
1145 OCRDMA_DB_SQ_OFFSET
;
1146 qp
->rq_db
= dev
->nic_info
.db
+
1147 (pd
->id
* dev
->nic_info
.db_page_size
) +
1148 OCRDMA_DB_RQ_OFFSET
;
1152 static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp
*qp
)
1155 kzalloc(sizeof(*(qp
->wqe_wr_id_tbl
)) * qp
->sq
.max_cnt
,
1157 if (qp
->wqe_wr_id_tbl
== NULL
)
1160 kzalloc(sizeof(u64
) * qp
->rq
.max_cnt
, GFP_KERNEL
);
1161 if (qp
->rqe_wr_id_tbl
== NULL
)
1167 static void ocrdma_set_qp_init_params(struct ocrdma_qp
*qp
,
1168 struct ocrdma_pd
*pd
,
1169 struct ib_qp_init_attr
*attrs
)
1172 spin_lock_init(&qp
->q_lock
);
1173 INIT_LIST_HEAD(&qp
->sq_entry
);
1174 INIT_LIST_HEAD(&qp
->rq_entry
);
1176 qp
->qp_type
= attrs
->qp_type
;
1177 qp
->cap_flags
= OCRDMA_QP_INB_RD
| OCRDMA_QP_INB_WR
;
1178 qp
->max_inline_data
= attrs
->cap
.max_inline_data
;
1179 qp
->sq
.max_sges
= attrs
->cap
.max_send_sge
;
1180 qp
->rq
.max_sges
= attrs
->cap
.max_recv_sge
;
1181 qp
->state
= OCRDMA_QPS_RST
;
1182 qp
->signaled
= (attrs
->sq_sig_type
== IB_SIGNAL_ALL_WR
) ? true : false;
1186 static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev
*dev
,
1187 struct ib_qp_init_attr
*attrs
)
1189 if (attrs
->qp_type
== IB_QPT_GSI
) {
1190 dev
->gsi_qp_created
= 1;
1191 dev
->gsi_sqcq
= get_ocrdma_cq(attrs
->send_cq
);
1192 dev
->gsi_rqcq
= get_ocrdma_cq(attrs
->recv_cq
);
1196 struct ib_qp
*ocrdma_create_qp(struct ib_pd
*ibpd
,
1197 struct ib_qp_init_attr
*attrs
,
1198 struct ib_udata
*udata
)
1201 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
1202 struct ocrdma_qp
*qp
;
1203 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
1204 struct ocrdma_create_qp_ureq ureq
;
1205 u16 dpp_credit_lmt
, dpp_offset
;
1207 status
= ocrdma_check_qp_params(ibpd
, dev
, attrs
);
1211 memset(&ureq
, 0, sizeof(ureq
));
1213 if (ib_copy_from_udata(&ureq
, udata
, sizeof(ureq
)))
1214 return ERR_PTR(-EFAULT
);
1216 qp
= kzalloc(sizeof(*qp
), GFP_KERNEL
);
1222 ocrdma_set_qp_init_params(qp
, pd
, attrs
);
1224 qp
->cap_flags
|= (OCRDMA_QP_MW_BIND
| OCRDMA_QP_LKEY0
|
1225 OCRDMA_QP_FAST_REG
);
1227 mutex_lock(&dev
->dev_lock
);
1228 status
= ocrdma_mbx_create_qp(qp
, attrs
, ureq
.enable_dpp_cq
,
1230 &dpp_offset
, &dpp_credit_lmt
);
1234 /* user space QP's wr_id table are managed in library */
1235 if (udata
== NULL
) {
1236 status
= ocrdma_alloc_wr_id_tbl(qp
);
1241 status
= ocrdma_add_qpn_map(dev
, qp
);
1244 ocrdma_set_qp_db(dev
, qp
, pd
);
1246 status
= ocrdma_copy_qp_uresp(qp
, udata
, dpp_offset
,
1248 (attrs
->srq
!= NULL
));
1252 ocrdma_store_gsi_qp_cq(dev
, attrs
);
1253 qp
->ibqp
.qp_num
= qp
->id
;
1254 mutex_unlock(&dev
->dev_lock
);
1258 ocrdma_del_qpn_map(dev
, qp
);
1260 ocrdma_mbx_destroy_qp(dev
, qp
);
1262 mutex_unlock(&dev
->dev_lock
);
1263 kfree(qp
->wqe_wr_id_tbl
);
1264 kfree(qp
->rqe_wr_id_tbl
);
1266 pr_err("%s(%d) error=%d\n", __func__
, dev
->id
, status
);
1268 return ERR_PTR(status
);
1272 static void ocrdma_flush_rq_db(struct ocrdma_qp
*qp
)
1275 u32 val
= qp
->rq
.dbid
| (qp
->db_cache
<<
1276 ocrdma_get_num_posted_shift(qp
));
1277 iowrite32(val
, qp
->rq_db
);
1282 int _ocrdma_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
1286 struct ocrdma_qp
*qp
;
1287 struct ocrdma_dev
*dev
;
1288 enum ib_qp_state old_qps
;
1290 qp
= get_ocrdma_qp(ibqp
);
1292 if (attr_mask
& IB_QP_STATE
)
1293 status
= ocrdma_qp_state_change(qp
, attr
->qp_state
, &old_qps
);
1294 /* if new and previous states are same hw doesn't need to
1299 status
= ocrdma_mbx_modify_qp(dev
, qp
, attr
, attr_mask
, old_qps
);
1300 if (!status
&& attr_mask
& IB_QP_STATE
&& attr
->qp_state
== IB_QPS_RTR
)
1301 ocrdma_flush_rq_db(qp
);
1306 int ocrdma_modify_qp(struct ib_qp
*ibqp
, struct ib_qp_attr
*attr
,
1307 int attr_mask
, struct ib_udata
*udata
)
1309 unsigned long flags
;
1310 int status
= -EINVAL
;
1311 struct ocrdma_qp
*qp
;
1312 struct ocrdma_dev
*dev
;
1313 enum ib_qp_state old_qps
, new_qps
;
1315 qp
= get_ocrdma_qp(ibqp
);
1318 /* syncronize with multiple context trying to change, retrive qps */
1319 mutex_lock(&dev
->dev_lock
);
1320 /* syncronize with wqe, rqe posting and cqe processing contexts */
1321 spin_lock_irqsave(&qp
->q_lock
, flags
);
1322 old_qps
= get_ibqp_state(qp
->state
);
1323 if (attr_mask
& IB_QP_STATE
)
1324 new_qps
= attr
->qp_state
;
1327 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
1329 if (!ib_modify_qp_is_ok(old_qps
, new_qps
, ibqp
->qp_type
, attr_mask
)) {
1330 pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1331 "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1332 __func__
, dev
->id
, attr_mask
, qp
->id
, ibqp
->qp_type
,
1337 status
= _ocrdma_modify_qp(ibqp
, attr
, attr_mask
);
1341 mutex_unlock(&dev
->dev_lock
);
1345 static enum ib_mtu
ocrdma_mtu_int_to_enum(u16 mtu
)
1363 static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags
)
1365 int ib_qp_acc_flags
= 0;
1367 if (qp_cap_flags
& OCRDMA_QP_INB_WR
)
1368 ib_qp_acc_flags
|= IB_ACCESS_REMOTE_WRITE
;
1369 if (qp_cap_flags
& OCRDMA_QP_INB_RD
)
1370 ib_qp_acc_flags
|= IB_ACCESS_LOCAL_WRITE
;
1371 return ib_qp_acc_flags
;
1374 int ocrdma_query_qp(struct ib_qp
*ibqp
,
1375 struct ib_qp_attr
*qp_attr
,
1376 int attr_mask
, struct ib_qp_init_attr
*qp_init_attr
)
1380 struct ocrdma_qp_params params
;
1381 struct ocrdma_qp
*qp
= get_ocrdma_qp(ibqp
);
1382 struct ocrdma_dev
*dev
= qp
->dev
;
1384 memset(¶ms
, 0, sizeof(params
));
1385 mutex_lock(&dev
->dev_lock
);
1386 status
= ocrdma_mbx_query_qp(dev
, qp
, ¶ms
);
1387 mutex_unlock(&dev
->dev_lock
);
1390 qp_attr
->qp_state
= get_ibqp_state(IB_QPS_INIT
);
1391 qp_attr
->cur_qp_state
= get_ibqp_state(IB_QPS_INIT
);
1393 ocrdma_mtu_int_to_enum(params
.path_mtu_pkey_indx
&
1394 OCRDMA_QP_PARAMS_PATH_MTU_MASK
) >>
1395 OCRDMA_QP_PARAMS_PATH_MTU_SHIFT
;
1396 qp_attr
->path_mig_state
= IB_MIG_MIGRATED
;
1397 qp_attr
->rq_psn
= params
.hop_lmt_rq_psn
& OCRDMA_QP_PARAMS_RQ_PSN_MASK
;
1398 qp_attr
->sq_psn
= params
.tclass_sq_psn
& OCRDMA_QP_PARAMS_SQ_PSN_MASK
;
1399 qp_attr
->dest_qp_num
=
1400 params
.ack_to_rnr_rtc_dest_qpn
& OCRDMA_QP_PARAMS_DEST_QPN_MASK
;
1402 qp_attr
->qp_access_flags
= ocrdma_to_ib_qp_acc_flags(qp
->cap_flags
);
1403 qp_attr
->cap
.max_send_wr
= qp
->sq
.max_cnt
- 1;
1404 qp_attr
->cap
.max_recv_wr
= qp
->rq
.max_cnt
- 1;
1405 qp_attr
->cap
.max_send_sge
= qp
->sq
.max_sges
;
1406 qp_attr
->cap
.max_recv_sge
= qp
->rq
.max_sges
;
1407 qp_attr
->cap
.max_inline_data
= qp
->max_inline_data
;
1408 qp_init_attr
->cap
= qp_attr
->cap
;
1409 memcpy(&qp_attr
->ah_attr
.grh
.dgid
, ¶ms
.dgid
[0],
1410 sizeof(params
.dgid
));
1411 qp_attr
->ah_attr
.grh
.flow_label
= params
.rnt_rc_sl_fl
&
1412 OCRDMA_QP_PARAMS_FLOW_LABEL_MASK
;
1413 qp_attr
->ah_attr
.grh
.sgid_index
= qp
->sgid_idx
;
1414 qp_attr
->ah_attr
.grh
.hop_limit
= (params
.hop_lmt_rq_psn
&
1415 OCRDMA_QP_PARAMS_HOP_LMT_MASK
) >>
1416 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT
;
1417 qp_attr
->ah_attr
.grh
.traffic_class
= (params
.tclass_sq_psn
&
1418 OCRDMA_QP_PARAMS_SQ_PSN_MASK
) >>
1419 OCRDMA_QP_PARAMS_TCLASS_SHIFT
;
1421 qp_attr
->ah_attr
.ah_flags
= IB_AH_GRH
;
1422 qp_attr
->ah_attr
.port_num
= 1;
1423 qp_attr
->ah_attr
.sl
= (params
.rnt_rc_sl_fl
&
1424 OCRDMA_QP_PARAMS_SL_MASK
) >>
1425 OCRDMA_QP_PARAMS_SL_SHIFT
;
1426 qp_attr
->timeout
= (params
.ack_to_rnr_rtc_dest_qpn
&
1427 OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK
) >>
1428 OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT
;
1429 qp_attr
->rnr_retry
= (params
.ack_to_rnr_rtc_dest_qpn
&
1430 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK
) >>
1431 OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT
;
1432 qp_attr
->retry_cnt
=
1433 (params
.rnt_rc_sl_fl
& OCRDMA_QP_PARAMS_RETRY_CNT_MASK
) >>
1434 OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT
;
1435 qp_attr
->min_rnr_timer
= 0;
1436 qp_attr
->pkey_index
= 0;
1437 qp_attr
->port_num
= 1;
1438 qp_attr
->ah_attr
.src_path_bits
= 0;
1439 qp_attr
->ah_attr
.static_rate
= 0;
1440 qp_attr
->alt_pkey_index
= 0;
1441 qp_attr
->alt_port_num
= 0;
1442 qp_attr
->alt_timeout
= 0;
1443 memset(&qp_attr
->alt_ah_attr
, 0, sizeof(qp_attr
->alt_ah_attr
));
1444 qp_state
= (params
.max_sge_recv_flags
& OCRDMA_QP_PARAMS_STATE_MASK
) >>
1445 OCRDMA_QP_PARAMS_STATE_SHIFT
;
1446 qp_attr
->sq_draining
= (qp_state
== OCRDMA_QPS_SQ_DRAINING
) ? 1 : 0;
1447 qp_attr
->max_dest_rd_atomic
=
1448 params
.max_ord_ird
>> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT
;
1449 qp_attr
->max_rd_atomic
=
1450 params
.max_ord_ird
& OCRDMA_QP_PARAMS_MAX_IRD_MASK
;
1451 qp_attr
->en_sqd_async_notify
= (params
.max_sge_recv_flags
&
1452 OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC
) ? 1 : 0;
1457 static void ocrdma_srq_toggle_bit(struct ocrdma_srq
*srq
, int idx
)
1460 unsigned int mask
= (1 << (idx
% 32));
1462 if (srq
->idx_bit_fields
[i
] & mask
)
1463 srq
->idx_bit_fields
[i
] &= ~mask
;
1465 srq
->idx_bit_fields
[i
] |= mask
;
1468 static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info
*q
)
1470 return ((q
->max_wqe_idx
- q
->head
) + q
->tail
) % q
->max_cnt
;
1473 static int is_hw_sq_empty(struct ocrdma_qp
*qp
)
1475 return (qp
->sq
.tail
== qp
->sq
.head
);
1478 static int is_hw_rq_empty(struct ocrdma_qp
*qp
)
1480 return (qp
->rq
.tail
== qp
->rq
.head
);
1483 static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info
*q
)
1485 return q
->va
+ (q
->head
* q
->entry_size
);
1488 static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info
*q
,
1491 return q
->va
+ (idx
* q
->entry_size
);
1494 static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info
*q
)
1496 q
->head
= (q
->head
+ 1) & q
->max_wqe_idx
;
1499 static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info
*q
)
1501 q
->tail
= (q
->tail
+ 1) & q
->max_wqe_idx
;
1504 /* discard the cqe for a given QP */
1505 static void ocrdma_discard_cqes(struct ocrdma_qp
*qp
, struct ocrdma_cq
*cq
)
1507 unsigned long cq_flags
;
1508 unsigned long flags
;
1509 int discard_cnt
= 0;
1510 u32 cur_getp
, stop_getp
;
1511 struct ocrdma_cqe
*cqe
;
1514 spin_lock_irqsave(&cq
->cq_lock
, cq_flags
);
1516 /* traverse through the CQEs in the hw CQ,
1517 * find the matching CQE for a given qp,
1518 * mark the matching one discarded by clearing qpn.
1519 * ring the doorbell in the poll_cq() as
1520 * we don't complete out of order cqe.
1523 cur_getp
= cq
->getp
;
1524 /* find upto when do we reap the cq. */
1525 stop_getp
= cur_getp
;
1527 if (is_hw_sq_empty(qp
) && (!qp
->srq
&& is_hw_rq_empty(qp
)))
1530 cqe
= cq
->va
+ cur_getp
;
1531 /* if (a) done reaping whole hw cq, or
1532 * (b) qp_xq becomes empty.
1535 qpn
= cqe
->cmn
.qpn
& OCRDMA_CQE_QPN_MASK
;
1536 /* if previously discarded cqe found, skip that too. */
1537 /* check for matching qp */
1538 if (qpn
== 0 || qpn
!= qp
->id
)
1541 /* mark cqe discarded so that it is not picked up later
1546 if (is_cqe_for_sq(cqe
)) {
1547 ocrdma_hwq_inc_tail(&qp
->sq
);
1550 spin_lock_irqsave(&qp
->srq
->q_lock
, flags
);
1551 ocrdma_hwq_inc_tail(&qp
->srq
->rq
);
1552 ocrdma_srq_toggle_bit(qp
->srq
, cur_getp
);
1553 spin_unlock_irqrestore(&qp
->srq
->q_lock
, flags
);
1556 ocrdma_hwq_inc_tail(&qp
->rq
);
1560 cur_getp
= (cur_getp
+ 1) % cq
->max_hw_cqe
;
1561 } while (cur_getp
!= stop_getp
);
1562 spin_unlock_irqrestore(&cq
->cq_lock
, cq_flags
);
1565 void ocrdma_del_flush_qp(struct ocrdma_qp
*qp
)
1568 unsigned long flags
;
1569 struct ocrdma_dev
*dev
= qp
->dev
;
1570 /* sync with any active CQ poll */
1572 spin_lock_irqsave(&dev
->flush_q_lock
, flags
);
1573 found
= ocrdma_is_qp_in_sq_flushlist(qp
->sq_cq
, qp
);
1575 list_del(&qp
->sq_entry
);
1577 found
= ocrdma_is_qp_in_rq_flushlist(qp
->rq_cq
, qp
);
1579 list_del(&qp
->rq_entry
);
1581 spin_unlock_irqrestore(&dev
->flush_q_lock
, flags
);
1584 int ocrdma_destroy_qp(struct ib_qp
*ibqp
)
1587 struct ocrdma_pd
*pd
;
1588 struct ocrdma_qp
*qp
;
1589 struct ocrdma_dev
*dev
;
1590 struct ib_qp_attr attrs
;
1591 int attr_mask
= IB_QP_STATE
;
1592 unsigned long flags
;
1594 qp
= get_ocrdma_qp(ibqp
);
1597 attrs
.qp_state
= IB_QPS_ERR
;
1600 /* change the QP state to ERROR */
1601 _ocrdma_modify_qp(ibqp
, &attrs
, attr_mask
);
1603 /* ensure that CQEs for newly created QP (whose id may be same with
1604 * one which just getting destroyed are same), dont get
1605 * discarded until the old CQEs are discarded.
1607 mutex_lock(&dev
->dev_lock
);
1608 status
= ocrdma_mbx_destroy_qp(dev
, qp
);
1611 * acquire CQ lock while destroy is in progress, in order to
1612 * protect against proessing in-flight CQEs for this QP.
1614 spin_lock_irqsave(&qp
->sq_cq
->cq_lock
, flags
);
1615 if (qp
->rq_cq
&& (qp
->rq_cq
!= qp
->sq_cq
))
1616 spin_lock(&qp
->rq_cq
->cq_lock
);
1618 ocrdma_del_qpn_map(dev
, qp
);
1620 if (qp
->rq_cq
&& (qp
->rq_cq
!= qp
->sq_cq
))
1621 spin_unlock(&qp
->rq_cq
->cq_lock
);
1622 spin_unlock_irqrestore(&qp
->sq_cq
->cq_lock
, flags
);
1625 ocrdma_discard_cqes(qp
, qp
->sq_cq
);
1626 ocrdma_discard_cqes(qp
, qp
->rq_cq
);
1628 mutex_unlock(&dev
->dev_lock
);
1631 ocrdma_del_mmap(pd
->uctx
, (u64
) qp
->sq
.pa
,
1632 PAGE_ALIGN(qp
->sq
.len
));
1634 ocrdma_del_mmap(pd
->uctx
, (u64
) qp
->rq
.pa
,
1635 PAGE_ALIGN(qp
->rq
.len
));
1638 ocrdma_del_flush_qp(qp
);
1640 kfree(qp
->wqe_wr_id_tbl
);
1641 kfree(qp
->rqe_wr_id_tbl
);
1646 static int ocrdma_copy_srq_uresp(struct ocrdma_dev
*dev
, struct ocrdma_srq
*srq
,
1647 struct ib_udata
*udata
)
1650 struct ocrdma_create_srq_uresp uresp
;
1652 memset(&uresp
, 0, sizeof(uresp
));
1653 uresp
.rq_dbid
= srq
->rq
.dbid
;
1654 uresp
.num_rq_pages
= 1;
1655 uresp
.rq_page_addr
[0] = srq
->rq
.pa
;
1656 uresp
.rq_page_size
= srq
->rq
.len
;
1657 uresp
.db_page_addr
= dev
->nic_info
.unmapped_db
+
1658 (srq
->pd
->id
* dev
->nic_info
.db_page_size
);
1659 uresp
.db_page_size
= dev
->nic_info
.db_page_size
;
1660 uresp
.num_rqe_allocated
= srq
->rq
.max_cnt
;
1661 if (dev
->nic_info
.dev_family
== OCRDMA_GEN2_FAMILY
) {
1662 uresp
.db_rq_offset
= OCRDMA_DB_GEN2_RQ_OFFSET
;
1663 uresp
.db_shift
= 24;
1665 uresp
.db_rq_offset
= OCRDMA_DB_RQ_OFFSET
;
1666 uresp
.db_shift
= 16;
1669 status
= ib_copy_to_udata(udata
, &uresp
, sizeof(uresp
));
1672 status
= ocrdma_add_mmap(srq
->pd
->uctx
, uresp
.rq_page_addr
[0],
1673 uresp
.rq_page_size
);
1679 struct ib_srq
*ocrdma_create_srq(struct ib_pd
*ibpd
,
1680 struct ib_srq_init_attr
*init_attr
,
1681 struct ib_udata
*udata
)
1683 int status
= -ENOMEM
;
1684 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
1685 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
1686 struct ocrdma_srq
*srq
;
1688 if (init_attr
->attr
.max_sge
> dev
->attr
.max_recv_sge
)
1689 return ERR_PTR(-EINVAL
);
1690 if (init_attr
->attr
.max_wr
> dev
->attr
.max_rqe
)
1691 return ERR_PTR(-EINVAL
);
1693 srq
= kzalloc(sizeof(*srq
), GFP_KERNEL
);
1695 return ERR_PTR(status
);
1697 spin_lock_init(&srq
->q_lock
);
1699 srq
->db
= dev
->nic_info
.db
+ (pd
->id
* dev
->nic_info
.db_page_size
);
1700 status
= ocrdma_mbx_create_srq(dev
, srq
, init_attr
, pd
);
1704 if (udata
== NULL
) {
1705 srq
->rqe_wr_id_tbl
= kzalloc(sizeof(u64
) * srq
->rq
.max_cnt
,
1707 if (srq
->rqe_wr_id_tbl
== NULL
)
1710 srq
->bit_fields_len
= (srq
->rq
.max_cnt
/ 32) +
1711 (srq
->rq
.max_cnt
% 32 ? 1 : 0);
1712 srq
->idx_bit_fields
=
1713 kmalloc(srq
->bit_fields_len
* sizeof(u32
), GFP_KERNEL
);
1714 if (srq
->idx_bit_fields
== NULL
)
1716 memset(srq
->idx_bit_fields
, 0xff,
1717 srq
->bit_fields_len
* sizeof(u32
));
1720 if (init_attr
->attr
.srq_limit
) {
1721 status
= ocrdma_mbx_modify_srq(srq
, &init_attr
->attr
);
1727 status
= ocrdma_copy_srq_uresp(dev
, srq
, udata
);
1735 ocrdma_mbx_destroy_srq(dev
, srq
);
1737 kfree(srq
->rqe_wr_id_tbl
);
1738 kfree(srq
->idx_bit_fields
);
1740 return ERR_PTR(status
);
1743 int ocrdma_modify_srq(struct ib_srq
*ibsrq
,
1744 struct ib_srq_attr
*srq_attr
,
1745 enum ib_srq_attr_mask srq_attr_mask
,
1746 struct ib_udata
*udata
)
1749 struct ocrdma_srq
*srq
;
1751 srq
= get_ocrdma_srq(ibsrq
);
1752 if (srq_attr_mask
& IB_SRQ_MAX_WR
)
1755 status
= ocrdma_mbx_modify_srq(srq
, srq_attr
);
1759 int ocrdma_query_srq(struct ib_srq
*ibsrq
, struct ib_srq_attr
*srq_attr
)
1762 struct ocrdma_srq
*srq
;
1764 srq
= get_ocrdma_srq(ibsrq
);
1765 status
= ocrdma_mbx_query_srq(srq
, srq_attr
);
1769 int ocrdma_destroy_srq(struct ib_srq
*ibsrq
)
1772 struct ocrdma_srq
*srq
;
1773 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibsrq
->device
);
1775 srq
= get_ocrdma_srq(ibsrq
);
1777 status
= ocrdma_mbx_destroy_srq(dev
, srq
);
1780 ocrdma_del_mmap(srq
->pd
->uctx
, (u64
) srq
->rq
.pa
,
1781 PAGE_ALIGN(srq
->rq
.len
));
1783 kfree(srq
->idx_bit_fields
);
1784 kfree(srq
->rqe_wr_id_tbl
);
1789 /* unprivileged verbs and their support functions. */
1790 static void ocrdma_build_ud_hdr(struct ocrdma_qp
*qp
,
1791 struct ocrdma_hdr_wqe
*hdr
,
1792 struct ib_send_wr
*wr
)
1794 struct ocrdma_ewqe_ud_hdr
*ud_hdr
=
1795 (struct ocrdma_ewqe_ud_hdr
*)(hdr
+ 1);
1796 struct ocrdma_ah
*ah
= get_ocrdma_ah(wr
->wr
.ud
.ah
);
1798 ud_hdr
->rsvd_dest_qpn
= wr
->wr
.ud
.remote_qpn
;
1799 if (qp
->qp_type
== IB_QPT_GSI
)
1800 ud_hdr
->qkey
= qp
->qkey
;
1802 ud_hdr
->qkey
= wr
->wr
.ud
.remote_qkey
;
1803 ud_hdr
->rsvd_ahid
= ah
->id
;
1806 static void ocrdma_build_sges(struct ocrdma_hdr_wqe
*hdr
,
1807 struct ocrdma_sge
*sge
, int num_sge
,
1808 struct ib_sge
*sg_list
)
1812 for (i
= 0; i
< num_sge
; i
++) {
1813 sge
[i
].lrkey
= sg_list
[i
].lkey
;
1814 sge
[i
].addr_lo
= sg_list
[i
].addr
;
1815 sge
[i
].addr_hi
= upper_32_bits(sg_list
[i
].addr
);
1816 sge
[i
].len
= sg_list
[i
].length
;
1817 hdr
->total_len
+= sg_list
[i
].length
;
1820 memset(sge
, 0, sizeof(*sge
));
1823 static inline uint32_t ocrdma_sglist_len(struct ib_sge
*sg_list
, int num_sge
)
1825 uint32_t total_len
= 0, i
;
1827 for (i
= 0; i
< num_sge
; i
++)
1828 total_len
+= sg_list
[i
].length
;
1833 static int ocrdma_build_inline_sges(struct ocrdma_qp
*qp
,
1834 struct ocrdma_hdr_wqe
*hdr
,
1835 struct ocrdma_sge
*sge
,
1836 struct ib_send_wr
*wr
, u32 wqe_size
)
1841 if (wr
->send_flags
& IB_SEND_INLINE
&& qp
->qp_type
!= IB_QPT_UD
) {
1842 hdr
->total_len
= ocrdma_sglist_len(wr
->sg_list
, wr
->num_sge
);
1843 if (unlikely(hdr
->total_len
> qp
->max_inline_data
)) {
1844 pr_err("%s() supported_len=0x%x,\n"
1845 " unspported len req=0x%x\n", __func__
,
1846 qp
->max_inline_data
, hdr
->total_len
);
1849 dpp_addr
= (char *)sge
;
1850 for (i
= 0; i
< wr
->num_sge
; i
++) {
1852 (void *)(unsigned long)wr
->sg_list
[i
].addr
,
1853 wr
->sg_list
[i
].length
);
1854 dpp_addr
+= wr
->sg_list
[i
].length
;
1857 wqe_size
+= roundup(hdr
->total_len
, OCRDMA_WQE_ALIGN_BYTES
);
1858 if (0 == hdr
->total_len
)
1859 wqe_size
+= sizeof(struct ocrdma_sge
);
1860 hdr
->cw
|= (OCRDMA_TYPE_INLINE
<< OCRDMA_WQE_TYPE_SHIFT
);
1862 ocrdma_build_sges(hdr
, sge
, wr
->num_sge
, wr
->sg_list
);
1864 wqe_size
+= (wr
->num_sge
* sizeof(struct ocrdma_sge
));
1866 wqe_size
+= sizeof(struct ocrdma_sge
);
1867 hdr
->cw
|= (OCRDMA_TYPE_LKEY
<< OCRDMA_WQE_TYPE_SHIFT
);
1869 hdr
->cw
|= ((wqe_size
/ OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
);
1873 static int ocrdma_build_send(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1874 struct ib_send_wr
*wr
)
1877 struct ocrdma_sge
*sge
;
1878 u32 wqe_size
= sizeof(*hdr
);
1880 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
) {
1881 ocrdma_build_ud_hdr(qp
, hdr
, wr
);
1882 sge
= (struct ocrdma_sge
*)(hdr
+ 2);
1883 wqe_size
+= sizeof(struct ocrdma_ewqe_ud_hdr
);
1885 sge
= (struct ocrdma_sge
*)(hdr
+ 1);
1888 status
= ocrdma_build_inline_sges(qp
, hdr
, sge
, wr
, wqe_size
);
1892 static int ocrdma_build_write(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1893 struct ib_send_wr
*wr
)
1896 struct ocrdma_sge
*ext_rw
= (struct ocrdma_sge
*)(hdr
+ 1);
1897 struct ocrdma_sge
*sge
= ext_rw
+ 1;
1898 u32 wqe_size
= sizeof(*hdr
) + sizeof(*ext_rw
);
1900 status
= ocrdma_build_inline_sges(qp
, hdr
, sge
, wr
, wqe_size
);
1903 ext_rw
->addr_lo
= wr
->wr
.rdma
.remote_addr
;
1904 ext_rw
->addr_hi
= upper_32_bits(wr
->wr
.rdma
.remote_addr
);
1905 ext_rw
->lrkey
= wr
->wr
.rdma
.rkey
;
1906 ext_rw
->len
= hdr
->total_len
;
1910 static void ocrdma_build_read(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1911 struct ib_send_wr
*wr
)
1913 struct ocrdma_sge
*ext_rw
= (struct ocrdma_sge
*)(hdr
+ 1);
1914 struct ocrdma_sge
*sge
= ext_rw
+ 1;
1915 u32 wqe_size
= ((wr
->num_sge
+ 1) * sizeof(struct ocrdma_sge
)) +
1916 sizeof(struct ocrdma_hdr_wqe
);
1918 ocrdma_build_sges(hdr
, sge
, wr
->num_sge
, wr
->sg_list
);
1919 hdr
->cw
|= ((wqe_size
/ OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
);
1920 hdr
->cw
|= (OCRDMA_READ
<< OCRDMA_WQE_OPCODE_SHIFT
);
1921 hdr
->cw
|= (OCRDMA_TYPE_LKEY
<< OCRDMA_WQE_TYPE_SHIFT
);
1923 ext_rw
->addr_lo
= wr
->wr
.rdma
.remote_addr
;
1924 ext_rw
->addr_hi
= upper_32_bits(wr
->wr
.rdma
.remote_addr
);
1925 ext_rw
->lrkey
= wr
->wr
.rdma
.rkey
;
1926 ext_rw
->len
= hdr
->total_len
;
1929 static void build_frmr_pbes(struct ib_send_wr
*wr
, struct ocrdma_pbl
*pbl_tbl
,
1930 struct ocrdma_hw_mr
*hwmr
)
1935 struct ocrdma_pbe
*pbe
;
1937 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
1940 /* go through the OS phy regions & fill hw pbe entries into pbls. */
1941 for (i
= 0; i
< wr
->wr
.fast_reg
.page_list_len
; i
++) {
1942 /* number of pbes can be more for one OS buf, when
1943 * buffers are of different sizes.
1944 * split the ib_buf to one or more pbes.
1946 buf_addr
= wr
->wr
.fast_reg
.page_list
->page_list
[i
];
1947 pbe
->pa_lo
= cpu_to_le32((u32
) (buf_addr
& PAGE_MASK
));
1948 pbe
->pa_hi
= cpu_to_le32((u32
) upper_32_bits(buf_addr
));
1952 /* if the pbl is full storing the pbes,
1955 if (num_pbes
== (hwmr
->pbl_size
/sizeof(u64
))) {
1957 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
1963 static int get_encoded_page_size(int pg_sz
)
1965 /* Max size is 256M 4096 << 16 */
1968 if (pg_sz
== (4096 << i
))
1974 static int ocrdma_build_fr(struct ocrdma_qp
*qp
, struct ocrdma_hdr_wqe
*hdr
,
1975 struct ib_send_wr
*wr
)
1978 struct ocrdma_ewqe_fr
*fast_reg
= (struct ocrdma_ewqe_fr
*)(hdr
+ 1);
1979 struct ocrdma_mr
*mr
;
1980 u32 wqe_size
= sizeof(*fast_reg
) + sizeof(*hdr
);
1982 wqe_size
= roundup(wqe_size
, OCRDMA_WQE_ALIGN_BYTES
);
1984 if (wr
->wr
.fast_reg
.page_list_len
> qp
->dev
->attr
.max_pages_per_frmr
)
1987 hdr
->cw
|= (OCRDMA_FR_MR
<< OCRDMA_WQE_OPCODE_SHIFT
);
1988 hdr
->cw
|= ((wqe_size
/ OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
);
1990 if (wr
->wr
.fast_reg
.page_list_len
== 0)
1992 if (wr
->wr
.fast_reg
.access_flags
& IB_ACCESS_LOCAL_WRITE
)
1993 hdr
->rsvd_lkey_flags
|= OCRDMA_LKEY_FLAG_LOCAL_WR
;
1994 if (wr
->wr
.fast_reg
.access_flags
& IB_ACCESS_REMOTE_WRITE
)
1995 hdr
->rsvd_lkey_flags
|= OCRDMA_LKEY_FLAG_REMOTE_WR
;
1996 if (wr
->wr
.fast_reg
.access_flags
& IB_ACCESS_REMOTE_READ
)
1997 hdr
->rsvd_lkey_flags
|= OCRDMA_LKEY_FLAG_REMOTE_RD
;
1998 hdr
->lkey
= wr
->wr
.fast_reg
.rkey
;
1999 hdr
->total_len
= wr
->wr
.fast_reg
.length
;
2001 fbo
= wr
->wr
.fast_reg
.iova_start
-
2002 (wr
->wr
.fast_reg
.page_list
->page_list
[0] & PAGE_MASK
);
2004 fast_reg
->va_hi
= upper_32_bits(wr
->wr
.fast_reg
.iova_start
);
2005 fast_reg
->va_lo
= (u32
) (wr
->wr
.fast_reg
.iova_start
& 0xffffffff);
2006 fast_reg
->fbo_hi
= upper_32_bits(fbo
);
2007 fast_reg
->fbo_lo
= (u32
) fbo
& 0xffffffff;
2008 fast_reg
->num_sges
= wr
->wr
.fast_reg
.page_list_len
;
2009 fast_reg
->size_sge
=
2010 get_encoded_page_size(1 << wr
->wr
.fast_reg
.page_shift
);
2011 mr
= (struct ocrdma_mr
*) (unsigned long) qp
->dev
->stag_arr
[(hdr
->lkey
>> 8) &
2012 (OCRDMA_MAX_STAG
- 1)];
2013 build_frmr_pbes(wr
, mr
->hwmr
.pbl_table
, &mr
->hwmr
);
2017 static void ocrdma_ring_sq_db(struct ocrdma_qp
*qp
)
2019 u32 val
= qp
->sq
.dbid
| (1 << 16);
2021 iowrite32(val
, qp
->sq_db
);
2024 int ocrdma_post_send(struct ib_qp
*ibqp
, struct ib_send_wr
*wr
,
2025 struct ib_send_wr
**bad_wr
)
2028 struct ocrdma_qp
*qp
= get_ocrdma_qp(ibqp
);
2029 struct ocrdma_hdr_wqe
*hdr
;
2030 unsigned long flags
;
2032 spin_lock_irqsave(&qp
->q_lock
, flags
);
2033 if (qp
->state
!= OCRDMA_QPS_RTS
&& qp
->state
!= OCRDMA_QPS_SQD
) {
2034 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2040 if (ocrdma_hwq_free_cnt(&qp
->sq
) == 0 ||
2041 wr
->num_sge
> qp
->sq
.max_sges
) {
2046 hdr
= ocrdma_hwq_head(&qp
->sq
);
2048 if (wr
->send_flags
& IB_SEND_SIGNALED
|| qp
->signaled
)
2049 hdr
->cw
|= (OCRDMA_FLAG_SIG
<< OCRDMA_WQE_FLAGS_SHIFT
);
2050 if (wr
->send_flags
& IB_SEND_FENCE
)
2052 (OCRDMA_FLAG_FENCE_L
<< OCRDMA_WQE_FLAGS_SHIFT
);
2053 if (wr
->send_flags
& IB_SEND_SOLICITED
)
2055 (OCRDMA_FLAG_SOLICIT
<< OCRDMA_WQE_FLAGS_SHIFT
);
2057 switch (wr
->opcode
) {
2058 case IB_WR_SEND_WITH_IMM
:
2059 hdr
->cw
|= (OCRDMA_FLAG_IMM
<< OCRDMA_WQE_FLAGS_SHIFT
);
2060 hdr
->immdt
= ntohl(wr
->ex
.imm_data
);
2062 hdr
->cw
|= (OCRDMA_SEND
<< OCRDMA_WQE_OPCODE_SHIFT
);
2063 ocrdma_build_send(qp
, hdr
, wr
);
2065 case IB_WR_SEND_WITH_INV
:
2066 hdr
->cw
|= (OCRDMA_FLAG_INV
<< OCRDMA_WQE_FLAGS_SHIFT
);
2067 hdr
->cw
|= (OCRDMA_SEND
<< OCRDMA_WQE_OPCODE_SHIFT
);
2068 hdr
->lkey
= wr
->ex
.invalidate_rkey
;
2069 status
= ocrdma_build_send(qp
, hdr
, wr
);
2071 case IB_WR_RDMA_WRITE_WITH_IMM
:
2072 hdr
->cw
|= (OCRDMA_FLAG_IMM
<< OCRDMA_WQE_FLAGS_SHIFT
);
2073 hdr
->immdt
= ntohl(wr
->ex
.imm_data
);
2074 case IB_WR_RDMA_WRITE
:
2075 hdr
->cw
|= (OCRDMA_WRITE
<< OCRDMA_WQE_OPCODE_SHIFT
);
2076 status
= ocrdma_build_write(qp
, hdr
, wr
);
2078 case IB_WR_RDMA_READ_WITH_INV
:
2079 hdr
->cw
|= (OCRDMA_FLAG_INV
<< OCRDMA_WQE_FLAGS_SHIFT
);
2080 case IB_WR_RDMA_READ
:
2081 ocrdma_build_read(qp
, hdr
, wr
);
2083 case IB_WR_LOCAL_INV
:
2085 (OCRDMA_LKEY_INV
<< OCRDMA_WQE_OPCODE_SHIFT
);
2086 hdr
->cw
|= ((sizeof(struct ocrdma_hdr_wqe
) +
2087 sizeof(struct ocrdma_sge
)) /
2088 OCRDMA_WQE_STRIDE
) << OCRDMA_WQE_SIZE_SHIFT
;
2089 hdr
->lkey
= wr
->ex
.invalidate_rkey
;
2091 case IB_WR_FAST_REG_MR
:
2092 status
= ocrdma_build_fr(qp
, hdr
, wr
);
2102 if (wr
->send_flags
& IB_SEND_SIGNALED
|| qp
->signaled
)
2103 qp
->wqe_wr_id_tbl
[qp
->sq
.head
].signaled
= 1;
2105 qp
->wqe_wr_id_tbl
[qp
->sq
.head
].signaled
= 0;
2106 qp
->wqe_wr_id_tbl
[qp
->sq
.head
].wrid
= wr
->wr_id
;
2107 ocrdma_cpu_to_le32(hdr
, ((hdr
->cw
>> OCRDMA_WQE_SIZE_SHIFT
) &
2108 OCRDMA_WQE_SIZE_MASK
) * OCRDMA_WQE_STRIDE
);
2109 /* make sure wqe is written before adapter can access it */
2111 /* inform hw to start processing it */
2112 ocrdma_ring_sq_db(qp
);
2114 /* update pointer, counter for next wr */
2115 ocrdma_hwq_inc_head(&qp
->sq
);
2118 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2122 static void ocrdma_ring_rq_db(struct ocrdma_qp
*qp
)
2124 u32 val
= qp
->rq
.dbid
| (1 << ocrdma_get_num_posted_shift(qp
));
2126 if (qp
->state
!= OCRDMA_QPS_INIT
)
2127 iowrite32(val
, qp
->rq_db
);
2132 static void ocrdma_build_rqe(struct ocrdma_hdr_wqe
*rqe
, struct ib_recv_wr
*wr
,
2136 struct ocrdma_sge
*sge
;
2138 wqe_size
= (wr
->num_sge
* sizeof(*sge
)) + sizeof(*rqe
);
2140 wqe_size
= sizeof(*sge
) + sizeof(*rqe
);
2142 rqe
->cw
= ((wqe_size
/ OCRDMA_WQE_STRIDE
) <<
2143 OCRDMA_WQE_SIZE_SHIFT
);
2144 rqe
->cw
|= (OCRDMA_FLAG_SIG
<< OCRDMA_WQE_FLAGS_SHIFT
);
2145 rqe
->cw
|= (OCRDMA_TYPE_LKEY
<< OCRDMA_WQE_TYPE_SHIFT
);
2147 rqe
->rsvd_tag
= tag
;
2148 sge
= (struct ocrdma_sge
*)(rqe
+ 1);
2149 ocrdma_build_sges(rqe
, sge
, wr
->num_sge
, wr
->sg_list
);
2150 ocrdma_cpu_to_le32(rqe
, wqe_size
);
2153 int ocrdma_post_recv(struct ib_qp
*ibqp
, struct ib_recv_wr
*wr
,
2154 struct ib_recv_wr
**bad_wr
)
2157 unsigned long flags
;
2158 struct ocrdma_qp
*qp
= get_ocrdma_qp(ibqp
);
2159 struct ocrdma_hdr_wqe
*rqe
;
2161 spin_lock_irqsave(&qp
->q_lock
, flags
);
2162 if (qp
->state
== OCRDMA_QPS_RST
|| qp
->state
== OCRDMA_QPS_ERR
) {
2163 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2168 if (ocrdma_hwq_free_cnt(&qp
->rq
) == 0 ||
2169 wr
->num_sge
> qp
->rq
.max_sges
) {
2174 rqe
= ocrdma_hwq_head(&qp
->rq
);
2175 ocrdma_build_rqe(rqe
, wr
, 0);
2177 qp
->rqe_wr_id_tbl
[qp
->rq
.head
] = wr
->wr_id
;
2178 /* make sure rqe is written before adapter can access it */
2181 /* inform hw to start processing it */
2182 ocrdma_ring_rq_db(qp
);
2184 /* update pointer, counter for next wr */
2185 ocrdma_hwq_inc_head(&qp
->rq
);
2188 spin_unlock_irqrestore(&qp
->q_lock
, flags
);
2192 /* cqe for srq's rqe can potentially arrive out of order.
2193 * index gives the entry in the shadow table where to store
2194 * the wr_id. tag/index is returned in cqe to reference back
2197 static int ocrdma_srq_get_idx(struct ocrdma_srq
*srq
)
2202 for (row
= 0; row
< srq
->bit_fields_len
; row
++) {
2203 if (srq
->idx_bit_fields
[row
]) {
2204 indx
= ffs(srq
->idx_bit_fields
[row
]);
2205 indx
= (row
* 32) + (indx
- 1);
2206 if (indx
>= srq
->rq
.max_cnt
)
2208 ocrdma_srq_toggle_bit(srq
, indx
);
2213 if (row
== srq
->bit_fields_len
)
2218 static void ocrdma_ring_srq_db(struct ocrdma_srq
*srq
)
2220 u32 val
= srq
->rq
.dbid
| (1 << 16);
2222 iowrite32(val
, srq
->db
+ OCRDMA_DB_GEN2_SRQ_OFFSET
);
2225 int ocrdma_post_srq_recv(struct ib_srq
*ibsrq
, struct ib_recv_wr
*wr
,
2226 struct ib_recv_wr
**bad_wr
)
2229 unsigned long flags
;
2230 struct ocrdma_srq
*srq
;
2231 struct ocrdma_hdr_wqe
*rqe
;
2234 srq
= get_ocrdma_srq(ibsrq
);
2236 spin_lock_irqsave(&srq
->q_lock
, flags
);
2238 if (ocrdma_hwq_free_cnt(&srq
->rq
) == 0 ||
2239 wr
->num_sge
> srq
->rq
.max_sges
) {
2244 tag
= ocrdma_srq_get_idx(srq
);
2245 rqe
= ocrdma_hwq_head(&srq
->rq
);
2246 ocrdma_build_rqe(rqe
, wr
, tag
);
2248 srq
->rqe_wr_id_tbl
[tag
] = wr
->wr_id
;
2249 /* make sure rqe is written before adapter can perform DMA */
2251 /* inform hw to start processing it */
2252 ocrdma_ring_srq_db(srq
);
2253 /* update pointer, counter for next wr */
2254 ocrdma_hwq_inc_head(&srq
->rq
);
2257 spin_unlock_irqrestore(&srq
->q_lock
, flags
);
2261 static enum ib_wc_status
ocrdma_to_ibwc_err(u16 status
)
2263 enum ib_wc_status ibwc_status
;
2266 case OCRDMA_CQE_GENERAL_ERR
:
2267 ibwc_status
= IB_WC_GENERAL_ERR
;
2269 case OCRDMA_CQE_LOC_LEN_ERR
:
2270 ibwc_status
= IB_WC_LOC_LEN_ERR
;
2272 case OCRDMA_CQE_LOC_QP_OP_ERR
:
2273 ibwc_status
= IB_WC_LOC_QP_OP_ERR
;
2275 case OCRDMA_CQE_LOC_EEC_OP_ERR
:
2276 ibwc_status
= IB_WC_LOC_EEC_OP_ERR
;
2278 case OCRDMA_CQE_LOC_PROT_ERR
:
2279 ibwc_status
= IB_WC_LOC_PROT_ERR
;
2281 case OCRDMA_CQE_WR_FLUSH_ERR
:
2282 ibwc_status
= IB_WC_WR_FLUSH_ERR
;
2284 case OCRDMA_CQE_MW_BIND_ERR
:
2285 ibwc_status
= IB_WC_MW_BIND_ERR
;
2287 case OCRDMA_CQE_BAD_RESP_ERR
:
2288 ibwc_status
= IB_WC_BAD_RESP_ERR
;
2290 case OCRDMA_CQE_LOC_ACCESS_ERR
:
2291 ibwc_status
= IB_WC_LOC_ACCESS_ERR
;
2293 case OCRDMA_CQE_REM_INV_REQ_ERR
:
2294 ibwc_status
= IB_WC_REM_INV_REQ_ERR
;
2296 case OCRDMA_CQE_REM_ACCESS_ERR
:
2297 ibwc_status
= IB_WC_REM_ACCESS_ERR
;
2299 case OCRDMA_CQE_REM_OP_ERR
:
2300 ibwc_status
= IB_WC_REM_OP_ERR
;
2302 case OCRDMA_CQE_RETRY_EXC_ERR
:
2303 ibwc_status
= IB_WC_RETRY_EXC_ERR
;
2305 case OCRDMA_CQE_RNR_RETRY_EXC_ERR
:
2306 ibwc_status
= IB_WC_RNR_RETRY_EXC_ERR
;
2308 case OCRDMA_CQE_LOC_RDD_VIOL_ERR
:
2309 ibwc_status
= IB_WC_LOC_RDD_VIOL_ERR
;
2311 case OCRDMA_CQE_REM_INV_RD_REQ_ERR
:
2312 ibwc_status
= IB_WC_REM_INV_RD_REQ_ERR
;
2314 case OCRDMA_CQE_REM_ABORT_ERR
:
2315 ibwc_status
= IB_WC_REM_ABORT_ERR
;
2317 case OCRDMA_CQE_INV_EECN_ERR
:
2318 ibwc_status
= IB_WC_INV_EECN_ERR
;
2320 case OCRDMA_CQE_INV_EEC_STATE_ERR
:
2321 ibwc_status
= IB_WC_INV_EEC_STATE_ERR
;
2323 case OCRDMA_CQE_FATAL_ERR
:
2324 ibwc_status
= IB_WC_FATAL_ERR
;
2326 case OCRDMA_CQE_RESP_TIMEOUT_ERR
:
2327 ibwc_status
= IB_WC_RESP_TIMEOUT_ERR
;
2330 ibwc_status
= IB_WC_GENERAL_ERR
;
2336 static void ocrdma_update_wc(struct ocrdma_qp
*qp
, struct ib_wc
*ibwc
,
2339 struct ocrdma_hdr_wqe
*hdr
;
2340 struct ocrdma_sge
*rw
;
2343 hdr
= ocrdma_hwq_head_from_idx(&qp
->sq
, wqe_idx
);
2345 ibwc
->wr_id
= qp
->wqe_wr_id_tbl
[wqe_idx
].wrid
;
2346 /* Undo the hdr->cw swap */
2347 opcode
= le32_to_cpu(hdr
->cw
) & OCRDMA_WQE_OPCODE_MASK
;
2350 ibwc
->opcode
= IB_WC_RDMA_WRITE
;
2353 rw
= (struct ocrdma_sge
*)(hdr
+ 1);
2354 ibwc
->opcode
= IB_WC_RDMA_READ
;
2355 ibwc
->byte_len
= rw
->len
;
2358 ibwc
->opcode
= IB_WC_SEND
;
2361 ibwc
->opcode
= IB_WC_FAST_REG_MR
;
2363 case OCRDMA_LKEY_INV
:
2364 ibwc
->opcode
= IB_WC_LOCAL_INV
;
2367 ibwc
->status
= IB_WC_GENERAL_ERR
;
2368 pr_err("%s() invalid opcode received = 0x%x\n",
2369 __func__
, hdr
->cw
& OCRDMA_WQE_OPCODE_MASK
);
2374 static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp
*qp
,
2375 struct ocrdma_cqe
*cqe
)
2377 if (is_cqe_for_sq(cqe
)) {
2378 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2379 cqe
->flags_status_srcqpn
) &
2380 ~OCRDMA_CQE_STATUS_MASK
);
2381 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2382 cqe
->flags_status_srcqpn
) |
2383 (OCRDMA_CQE_WR_FLUSH_ERR
<<
2384 OCRDMA_CQE_STATUS_SHIFT
));
2386 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
) {
2387 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2388 cqe
->flags_status_srcqpn
) &
2389 ~OCRDMA_CQE_UD_STATUS_MASK
);
2390 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2391 cqe
->flags_status_srcqpn
) |
2392 (OCRDMA_CQE_WR_FLUSH_ERR
<<
2393 OCRDMA_CQE_UD_STATUS_SHIFT
));
2395 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2396 cqe
->flags_status_srcqpn
) &
2397 ~OCRDMA_CQE_STATUS_MASK
);
2398 cqe
->flags_status_srcqpn
= cpu_to_le32(le32_to_cpu(
2399 cqe
->flags_status_srcqpn
) |
2400 (OCRDMA_CQE_WR_FLUSH_ERR
<<
2401 OCRDMA_CQE_STATUS_SHIFT
));
2406 static bool ocrdma_update_err_cqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
,
2407 struct ocrdma_qp
*qp
, int status
)
2409 bool expand
= false;
2412 ibwc
->qp
= &qp
->ibqp
;
2413 ibwc
->status
= ocrdma_to_ibwc_err(status
);
2415 ocrdma_flush_qp(qp
);
2416 ocrdma_qp_state_change(qp
, IB_QPS_ERR
, NULL
);
2418 /* if wqe/rqe pending for which cqe needs to be returned,
2419 * trigger inflating it.
2421 if (!is_hw_rq_empty(qp
) || !is_hw_sq_empty(qp
)) {
2423 ocrdma_set_cqe_status_flushed(qp
, cqe
);
2428 static int ocrdma_update_err_rcqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
,
2429 struct ocrdma_qp
*qp
, int status
)
2431 ibwc
->opcode
= IB_WC_RECV
;
2432 ibwc
->wr_id
= qp
->rqe_wr_id_tbl
[qp
->rq
.tail
];
2433 ocrdma_hwq_inc_tail(&qp
->rq
);
2435 return ocrdma_update_err_cqe(ibwc
, cqe
, qp
, status
);
2438 static int ocrdma_update_err_scqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
,
2439 struct ocrdma_qp
*qp
, int status
)
2441 ocrdma_update_wc(qp
, ibwc
, qp
->sq
.tail
);
2442 ocrdma_hwq_inc_tail(&qp
->sq
);
2444 return ocrdma_update_err_cqe(ibwc
, cqe
, qp
, status
);
2448 static bool ocrdma_poll_err_scqe(struct ocrdma_qp
*qp
,
2449 struct ocrdma_cqe
*cqe
, struct ib_wc
*ibwc
,
2450 bool *polled
, bool *stop
)
2453 int status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2454 OCRDMA_CQE_STATUS_MASK
) >> OCRDMA_CQE_STATUS_SHIFT
;
2456 /* when hw sq is empty, but rq is not empty, so we continue
2457 * to keep the cqe in order to get the cq event again.
2459 if (is_hw_sq_empty(qp
) && !is_hw_rq_empty(qp
)) {
2460 /* when cq for rq and sq is same, it is safe to return
2461 * flush cqe for RQEs.
2463 if (!qp
->srq
&& (qp
->sq_cq
== qp
->rq_cq
)) {
2465 status
= OCRDMA_CQE_WR_FLUSH_ERR
;
2466 expand
= ocrdma_update_err_rcqe(ibwc
, cqe
, qp
, status
);
2468 /* stop processing further cqe as this cqe is used for
2469 * triggering cq event on buddy cq of RQ.
2470 * When QP is destroyed, this cqe will be removed
2471 * from the cq's hardware q.
2479 expand
= ocrdma_update_err_scqe(ibwc
, cqe
, qp
, status
);
2484 static bool ocrdma_poll_success_scqe(struct ocrdma_qp
*qp
,
2485 struct ocrdma_cqe
*cqe
,
2486 struct ib_wc
*ibwc
, bool *polled
)
2488 bool expand
= false;
2489 int tail
= qp
->sq
.tail
;
2492 if (!qp
->wqe_wr_id_tbl
[tail
].signaled
) {
2493 *polled
= false; /* WC cannot be consumed yet */
2495 ibwc
->status
= IB_WC_SUCCESS
;
2497 ibwc
->qp
= &qp
->ibqp
;
2498 ocrdma_update_wc(qp
, ibwc
, tail
);
2501 wqe_idx
= (le32_to_cpu(cqe
->wq
.wqeidx
) &
2502 OCRDMA_CQE_WQEIDX_MASK
) & qp
->sq
.max_wqe_idx
;
2503 if (tail
!= wqe_idx
)
2504 expand
= true; /* Coalesced CQE can't be consumed yet */
2506 ocrdma_hwq_inc_tail(&qp
->sq
);
2510 static bool ocrdma_poll_scqe(struct ocrdma_qp
*qp
, struct ocrdma_cqe
*cqe
,
2511 struct ib_wc
*ibwc
, bool *polled
, bool *stop
)
2516 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2517 OCRDMA_CQE_STATUS_MASK
) >> OCRDMA_CQE_STATUS_SHIFT
;
2519 if (status
== OCRDMA_CQE_SUCCESS
)
2520 expand
= ocrdma_poll_success_scqe(qp
, cqe
, ibwc
, polled
);
2522 expand
= ocrdma_poll_err_scqe(qp
, cqe
, ibwc
, polled
, stop
);
2526 static int ocrdma_update_ud_rcqe(struct ib_wc
*ibwc
, struct ocrdma_cqe
*cqe
)
2530 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2531 OCRDMA_CQE_UD_STATUS_MASK
) >> OCRDMA_CQE_UD_STATUS_SHIFT
;
2532 ibwc
->src_qp
= le32_to_cpu(cqe
->flags_status_srcqpn
) &
2533 OCRDMA_CQE_SRCQP_MASK
;
2534 ibwc
->pkey_index
= le32_to_cpu(cqe
->ud
.rxlen_pkey
) &
2535 OCRDMA_CQE_PKEY_MASK
;
2536 ibwc
->wc_flags
= IB_WC_GRH
;
2537 ibwc
->byte_len
= (le32_to_cpu(cqe
->ud
.rxlen_pkey
) >>
2538 OCRDMA_CQE_UD_XFER_LEN_SHIFT
);
2542 static void ocrdma_update_free_srq_cqe(struct ib_wc
*ibwc
,
2543 struct ocrdma_cqe
*cqe
,
2544 struct ocrdma_qp
*qp
)
2546 unsigned long flags
;
2547 struct ocrdma_srq
*srq
;
2550 srq
= get_ocrdma_srq(qp
->ibqp
.srq
);
2551 wqe_idx
= (le32_to_cpu(cqe
->rq
.buftag_qpn
) >>
2552 OCRDMA_CQE_BUFTAG_SHIFT
) & srq
->rq
.max_wqe_idx
;
2553 ibwc
->wr_id
= srq
->rqe_wr_id_tbl
[wqe_idx
];
2554 spin_lock_irqsave(&srq
->q_lock
, flags
);
2555 ocrdma_srq_toggle_bit(srq
, wqe_idx
);
2556 spin_unlock_irqrestore(&srq
->q_lock
, flags
);
2557 ocrdma_hwq_inc_tail(&srq
->rq
);
2560 static bool ocrdma_poll_err_rcqe(struct ocrdma_qp
*qp
, struct ocrdma_cqe
*cqe
,
2561 struct ib_wc
*ibwc
, bool *polled
, bool *stop
,
2566 /* when hw_rq is empty, but wq is not empty, so continue
2567 * to keep the cqe to get the cq event again.
2569 if (is_hw_rq_empty(qp
) && !is_hw_sq_empty(qp
)) {
2570 if (!qp
->srq
&& (qp
->sq_cq
== qp
->rq_cq
)) {
2572 status
= OCRDMA_CQE_WR_FLUSH_ERR
;
2573 expand
= ocrdma_update_err_scqe(ibwc
, cqe
, qp
, status
);
2581 expand
= ocrdma_update_err_rcqe(ibwc
, cqe
, qp
, status
);
2586 static void ocrdma_poll_success_rcqe(struct ocrdma_qp
*qp
,
2587 struct ocrdma_cqe
*cqe
, struct ib_wc
*ibwc
)
2589 ibwc
->opcode
= IB_WC_RECV
;
2590 ibwc
->qp
= &qp
->ibqp
;
2591 ibwc
->status
= IB_WC_SUCCESS
;
2593 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
)
2594 ocrdma_update_ud_rcqe(ibwc
, cqe
);
2596 ibwc
->byte_len
= le32_to_cpu(cqe
->rq
.rxlen
);
2598 if (is_cqe_imm(cqe
)) {
2599 ibwc
->ex
.imm_data
= htonl(le32_to_cpu(cqe
->rq
.lkey_immdt
));
2600 ibwc
->wc_flags
|= IB_WC_WITH_IMM
;
2601 } else if (is_cqe_wr_imm(cqe
)) {
2602 ibwc
->opcode
= IB_WC_RECV_RDMA_WITH_IMM
;
2603 ibwc
->ex
.imm_data
= htonl(le32_to_cpu(cqe
->rq
.lkey_immdt
));
2604 ibwc
->wc_flags
|= IB_WC_WITH_IMM
;
2605 } else if (is_cqe_invalidated(cqe
)) {
2606 ibwc
->ex
.invalidate_rkey
= le32_to_cpu(cqe
->rq
.lkey_immdt
);
2607 ibwc
->wc_flags
|= IB_WC_WITH_INVALIDATE
;
2610 ocrdma_update_free_srq_cqe(ibwc
, cqe
, qp
);
2612 ibwc
->wr_id
= qp
->rqe_wr_id_tbl
[qp
->rq
.tail
];
2613 ocrdma_hwq_inc_tail(&qp
->rq
);
2617 static bool ocrdma_poll_rcqe(struct ocrdma_qp
*qp
, struct ocrdma_cqe
*cqe
,
2618 struct ib_wc
*ibwc
, bool *polled
, bool *stop
)
2621 bool expand
= false;
2624 if (qp
->qp_type
== IB_QPT_UD
|| qp
->qp_type
== IB_QPT_GSI
) {
2625 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2626 OCRDMA_CQE_UD_STATUS_MASK
) >>
2627 OCRDMA_CQE_UD_STATUS_SHIFT
;
2629 status
= (le32_to_cpu(cqe
->flags_status_srcqpn
) &
2630 OCRDMA_CQE_STATUS_MASK
) >> OCRDMA_CQE_STATUS_SHIFT
;
2633 if (status
== OCRDMA_CQE_SUCCESS
) {
2635 ocrdma_poll_success_rcqe(qp
, cqe
, ibwc
);
2637 expand
= ocrdma_poll_err_rcqe(qp
, cqe
, ibwc
, polled
, stop
,
2643 static void ocrdma_change_cq_phase(struct ocrdma_cq
*cq
, struct ocrdma_cqe
*cqe
,
2646 if (cq
->phase_change
) {
2648 cq
->phase
= (~cq
->phase
& OCRDMA_CQE_VALID
);
2650 /* clear valid bit */
2651 cqe
->flags_status_srcqpn
= 0;
2655 static int ocrdma_poll_hwcq(struct ocrdma_cq
*cq
, int num_entries
,
2660 bool expand
= false;
2661 int polled_hw_cqes
= 0;
2662 struct ocrdma_qp
*qp
= NULL
;
2663 struct ocrdma_dev
*dev
= get_ocrdma_dev(cq
->ibcq
.device
);
2664 struct ocrdma_cqe
*cqe
;
2665 u16 cur_getp
; bool polled
= false; bool stop
= false;
2667 cur_getp
= cq
->getp
;
2668 while (num_entries
) {
2669 cqe
= cq
->va
+ cur_getp
;
2670 /* check whether valid cqe or not */
2671 if (!is_cqe_valid(cq
, cqe
))
2673 qpn
= (le32_to_cpu(cqe
->cmn
.qpn
) & OCRDMA_CQE_QPN_MASK
);
2674 /* ignore discarded cqe */
2677 qp
= dev
->qp_tbl
[qpn
];
2680 if (is_cqe_for_sq(cqe
)) {
2681 expand
= ocrdma_poll_scqe(qp
, cqe
, ibwc
, &polled
,
2684 expand
= ocrdma_poll_rcqe(qp
, cqe
, ibwc
, &polled
,
2691 /* clear qpn to avoid duplicate processing by discard_cqe() */
2694 polled_hw_cqes
+= 1;
2695 cur_getp
= (cur_getp
+ 1) % cq
->max_hw_cqe
;
2696 ocrdma_change_cq_phase(cq
, cqe
, cur_getp
);
2706 cq
->getp
= cur_getp
;
2707 if (polled_hw_cqes
|| expand
|| stop
) {
2708 ocrdma_ring_cq_db(dev
, cq
->id
, cq
->armed
, cq
->solicited
,
2714 /* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2715 static int ocrdma_add_err_cqe(struct ocrdma_cq
*cq
, int num_entries
,
2716 struct ocrdma_qp
*qp
, struct ib_wc
*ibwc
)
2720 while (num_entries
) {
2721 if (is_hw_sq_empty(qp
) && is_hw_rq_empty(qp
))
2723 if (!is_hw_sq_empty(qp
) && qp
->sq_cq
== cq
) {
2724 ocrdma_update_wc(qp
, ibwc
, qp
->sq
.tail
);
2725 ocrdma_hwq_inc_tail(&qp
->sq
);
2726 } else if (!is_hw_rq_empty(qp
) && qp
->rq_cq
== cq
) {
2727 ibwc
->wr_id
= qp
->rqe_wr_id_tbl
[qp
->rq
.tail
];
2728 ocrdma_hwq_inc_tail(&qp
->rq
);
2733 ibwc
->status
= IB_WC_WR_FLUSH_ERR
;
2741 int ocrdma_poll_cq(struct ib_cq
*ibcq
, int num_entries
, struct ib_wc
*wc
)
2743 int cqes_to_poll
= num_entries
;
2744 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
2745 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibcq
->device
);
2746 int num_os_cqe
= 0, err_cqes
= 0;
2747 struct ocrdma_qp
*qp
;
2748 unsigned long flags
;
2750 /* poll cqes from adapter CQ */
2751 spin_lock_irqsave(&cq
->cq_lock
, flags
);
2752 num_os_cqe
= ocrdma_poll_hwcq(cq
, cqes_to_poll
, wc
);
2753 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
2754 cqes_to_poll
-= num_os_cqe
;
2757 wc
= wc
+ num_os_cqe
;
2758 /* adapter returns single error cqe when qp moves to
2759 * error state. So insert error cqes with wc_status as
2760 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2761 * respectively which uses this CQ.
2763 spin_lock_irqsave(&dev
->flush_q_lock
, flags
);
2764 list_for_each_entry(qp
, &cq
->sq_head
, sq_entry
) {
2765 if (cqes_to_poll
== 0)
2767 err_cqes
= ocrdma_add_err_cqe(cq
, cqes_to_poll
, qp
, wc
);
2768 cqes_to_poll
-= err_cqes
;
2769 num_os_cqe
+= err_cqes
;
2772 spin_unlock_irqrestore(&dev
->flush_q_lock
, flags
);
2777 int ocrdma_arm_cq(struct ib_cq
*ibcq
, enum ib_cq_notify_flags cq_flags
)
2779 struct ocrdma_cq
*cq
= get_ocrdma_cq(ibcq
);
2780 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibcq
->device
);
2783 struct ocrdma_cqe
*cqe
;
2784 unsigned long flags
;
2788 spin_lock_irqsave(&cq
->cq_lock
, flags
);
2789 if (cq_flags
& IB_CQ_NEXT_COMP
|| cq_flags
& IB_CQ_SOLICITED
)
2791 if (cq_flags
& IB_CQ_SOLICITED
)
2792 cq
->solicited
= true;
2794 cur_getp
= cq
->getp
;
2795 cqe
= cq
->va
+ cur_getp
;
2797 /* check whether any valid cqe exist or not, if not then safe to
2798 * arm. If cqe is not yet consumed, then let it get consumed and then
2799 * we arm it to avoid false interrupts.
2801 if (!is_cqe_valid(cq
, cqe
) || cq
->arm_needed
) {
2802 cq
->arm_needed
= false;
2803 ocrdma_ring_cq_db(dev
, cq_id
, cq
->armed
, cq
->solicited
, 0);
2805 spin_unlock_irqrestore(&cq
->cq_lock
, flags
);
2809 struct ib_mr
*ocrdma_alloc_frmr(struct ib_pd
*ibpd
, int max_page_list_len
)
2812 struct ocrdma_mr
*mr
;
2813 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
2814 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
2816 if (max_page_list_len
> dev
->attr
.max_pages_per_frmr
)
2817 return ERR_PTR(-EINVAL
);
2819 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
2821 return ERR_PTR(-ENOMEM
);
2823 status
= ocrdma_get_pbl_info(dev
, mr
, max_page_list_len
);
2827 mr
->hwmr
.remote_rd
= 0;
2828 mr
->hwmr
.remote_wr
= 0;
2829 mr
->hwmr
.local_rd
= 0;
2830 mr
->hwmr
.local_wr
= 0;
2831 mr
->hwmr
.mw_bind
= 0;
2832 status
= ocrdma_build_pbl_tbl(dev
, &mr
->hwmr
);
2835 status
= ocrdma_reg_mr(dev
, &mr
->hwmr
, pd
->id
, 0);
2838 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
2839 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
2840 dev
->stag_arr
[(mr
->hwmr
.lkey
>> 8) & (OCRDMA_MAX_STAG
- 1)] = mr
;
2843 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
2846 return ERR_PTR(-ENOMEM
);
2849 struct ib_fast_reg_page_list
*ocrdma_alloc_frmr_page_list(struct ib_device
2853 struct ib_fast_reg_page_list
*frmr_list
;
2856 size
= sizeof(*frmr_list
) + (page_list_len
* sizeof(u64
));
2857 frmr_list
= kzalloc(size
, GFP_KERNEL
);
2859 return ERR_PTR(-ENOMEM
);
2860 frmr_list
->page_list
= (u64
*)(frmr_list
+ 1);
2864 void ocrdma_free_frmr_page_list(struct ib_fast_reg_page_list
*page_list
)
2869 #define MAX_KERNEL_PBE_SIZE 65536
2870 static inline int count_kernel_pbes(struct ib_phys_buf
*buf_list
,
2871 int buf_cnt
, u32
*pbe_size
)
2876 *pbe_size
= roundup(buf_list
[0].size
, PAGE_SIZE
);
2877 *pbe_size
= roundup_pow_of_two(*pbe_size
);
2879 /* find the smallest PBE size that we can have */
2880 for (i
= 0; i
< buf_cnt
; i
++) {
2881 /* first addr may not be page aligned, so ignore checking */
2882 if ((i
!= 0) && ((buf_list
[i
].addr
& ~PAGE_MASK
) ||
2883 (buf_list
[i
].size
& ~PAGE_MASK
))) {
2887 /* if configured PBE size is greater then the chosen one,
2888 * reduce the PBE size.
2890 buf_size
= roundup(buf_list
[i
].size
, PAGE_SIZE
);
2891 /* pbe_size has to be even multiple of 4K 1,2,4,8...*/
2892 buf_size
= roundup_pow_of_two(buf_size
);
2893 if (*pbe_size
> buf_size
)
2894 *pbe_size
= buf_size
;
2896 total_size
+= buf_size
;
2898 *pbe_size
= *pbe_size
> MAX_KERNEL_PBE_SIZE
?
2899 (MAX_KERNEL_PBE_SIZE
) : (*pbe_size
);
2901 /* num_pbes = total_size / (*pbe_size); this is implemented below. */
2903 return total_size
>> ilog2(*pbe_size
);
2906 static void build_kernel_pbes(struct ib_phys_buf
*buf_list
, int ib_buf_cnt
,
2907 u32 pbe_size
, struct ocrdma_pbl
*pbl_tbl
,
2908 struct ocrdma_hw_mr
*hwmr
)
2912 int pbes_per_buf
= 0;
2915 struct ocrdma_pbe
*pbe
;
2916 int total_num_pbes
= 0;
2918 if (!hwmr
->num_pbes
)
2921 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
2924 /* go through the OS phy regions & fill hw pbe entries into pbls. */
2925 for (i
= 0; i
< ib_buf_cnt
; i
++) {
2926 buf_addr
= buf_list
[i
].addr
;
2928 roundup_pow_of_two(roundup(buf_list
[i
].size
, PAGE_SIZE
)) /
2930 hwmr
->len
+= buf_list
[i
].size
;
2931 /* number of pbes can be more for one OS buf, when
2932 * buffers are of different sizes.
2933 * split the ib_buf to one or more pbes.
2935 for (idx
= 0; idx
< pbes_per_buf
; idx
++) {
2936 /* we program always page aligned addresses,
2937 * first unaligned address is taken care by fbo.
2940 /* for non zero fbo, assign the
2941 * start of the page.
2944 cpu_to_le32((u32
) (buf_addr
& PAGE_MASK
));
2946 cpu_to_le32((u32
) upper_32_bits(buf_addr
));
2949 cpu_to_le32((u32
) (buf_addr
& 0xffffffff));
2951 cpu_to_le32((u32
) upper_32_bits(buf_addr
));
2953 buf_addr
+= pbe_size
;
2955 total_num_pbes
+= 1;
2958 if (total_num_pbes
== hwmr
->num_pbes
)
2960 /* if the pbl is full storing the pbes,
2963 if (num_pbes
== (hwmr
->pbl_size
/sizeof(u64
))) {
2965 pbe
= (struct ocrdma_pbe
*)pbl_tbl
->va
;
2974 struct ib_mr
*ocrdma_reg_kernel_mr(struct ib_pd
*ibpd
,
2975 struct ib_phys_buf
*buf_list
,
2976 int buf_cnt
, int acc
, u64
*iova_start
)
2978 int status
= -ENOMEM
;
2979 struct ocrdma_mr
*mr
;
2980 struct ocrdma_pd
*pd
= get_ocrdma_pd(ibpd
);
2981 struct ocrdma_dev
*dev
= get_ocrdma_dev(ibpd
->device
);
2985 if ((acc
& IB_ACCESS_REMOTE_WRITE
) && !(acc
& IB_ACCESS_LOCAL_WRITE
))
2986 return ERR_PTR(-EINVAL
);
2988 mr
= kzalloc(sizeof(*mr
), GFP_KERNEL
);
2990 return ERR_PTR(status
);
2992 num_pbes
= count_kernel_pbes(buf_list
, buf_cnt
, &pbe_size
);
2993 if (num_pbes
== 0) {
2997 status
= ocrdma_get_pbl_info(dev
, mr
, num_pbes
);
3001 mr
->hwmr
.pbe_size
= pbe_size
;
3002 mr
->hwmr
.fbo
= *iova_start
- (buf_list
[0].addr
& PAGE_MASK
);
3003 mr
->hwmr
.va
= *iova_start
;
3004 mr
->hwmr
.local_rd
= 1;
3005 mr
->hwmr
.remote_wr
= (acc
& IB_ACCESS_REMOTE_WRITE
) ? 1 : 0;
3006 mr
->hwmr
.remote_rd
= (acc
& IB_ACCESS_REMOTE_READ
) ? 1 : 0;
3007 mr
->hwmr
.local_wr
= (acc
& IB_ACCESS_LOCAL_WRITE
) ? 1 : 0;
3008 mr
->hwmr
.remote_atomic
= (acc
& IB_ACCESS_REMOTE_ATOMIC
) ? 1 : 0;
3009 mr
->hwmr
.mw_bind
= (acc
& IB_ACCESS_MW_BIND
) ? 1 : 0;
3011 status
= ocrdma_build_pbl_tbl(dev
, &mr
->hwmr
);
3014 build_kernel_pbes(buf_list
, buf_cnt
, pbe_size
, mr
->hwmr
.pbl_table
,
3016 status
= ocrdma_reg_mr(dev
, &mr
->hwmr
, pd
->id
, acc
);
3020 mr
->ibmr
.lkey
= mr
->hwmr
.lkey
;
3021 if (mr
->hwmr
.remote_wr
|| mr
->hwmr
.remote_rd
)
3022 mr
->ibmr
.rkey
= mr
->hwmr
.lkey
;
3026 ocrdma_free_mr_pbl_tbl(dev
, &mr
->hwmr
);
3029 return ERR_PTR(status
);