]>
Commit | Line | Data |
---|---|---|
77241056 | 1 | /* |
a74d5307 | 2 | * Copyright(c) 2015 - 2018 Intel Corporation. |
77241056 MM |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
77241056 MM |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
77241056 MM |
20 | * Redistribution and use in source and binary forms, with or without |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | ||
48 | #include <rdma/ib_mad.h> | |
49 | #include <rdma/ib_user_verbs.h> | |
50 | #include <linux/io.h> | |
51 | #include <linux/module.h> | |
52 | #include <linux/utsname.h> | |
53 | #include <linux/rculist.h> | |
54 | #include <linux/mm.h> | |
77241056 | 55 | #include <linux/vmalloc.h> |
13c19222 | 56 | #include <rdma/opa_addr.h> |
77241056 MM |
57 | |
58 | #include "hfi.h" | |
59 | #include "common.h" | |
60 | #include "device.h" | |
61 | #include "trace.h" | |
62 | #include "qp.h" | |
45842abb | 63 | #include "verbs_txreq.h" |
0181ce31 | 64 | #include "debugfs.h" |
2280740f | 65 | #include "vnic.h" |
a74d5307 | 66 | #include "fault.h" |
5d18ee67 | 67 | #include "affinity.h" |
77241056 | 68 | |
895420dd | 69 | static unsigned int hfi1_lkey_table_size = 16; |
77241056 MM |
70 | module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, |
71 | S_IRUGO); | |
72 | MODULE_PARM_DESC(lkey_table_size, | |
73 | "LKEY table size in bits (2^n, 1 <= n <= 23)"); | |
74 | ||
75 | static unsigned int hfi1_max_pds = 0xFFFF; | |
76 | module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO); | |
77 | MODULE_PARM_DESC(max_pds, | |
78 | "Maximum number of protection domains to support"); | |
79 | ||
80 | static unsigned int hfi1_max_ahs = 0xFFFF; | |
81 | module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); | |
82 | MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); | |
83 | ||
f6aa7835 | 84 | unsigned int hfi1_max_cqes = 0x2FFFFF; |
77241056 MM |
85 | module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); |
86 | MODULE_PARM_DESC(max_cqes, | |
87 | "Maximum number of completion queue entries to support"); | |
88 | ||
89 | unsigned int hfi1_max_cqs = 0x1FFFF; | |
90 | module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO); | |
91 | MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); | |
92 | ||
93 | unsigned int hfi1_max_qp_wrs = 0x3FFF; | |
94 | module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); | |
95 | MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); | |
96 | ||
f6aa7835 | 97 | unsigned int hfi1_max_qps = 32768; |
77241056 MM |
98 | module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); |
99 | MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); | |
100 | ||
101 | unsigned int hfi1_max_sges = 0x60; | |
102 | module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO); | |
103 | MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); | |
104 | ||
105 | unsigned int hfi1_max_mcast_grps = 16384; | |
106 | module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO); | |
107 | MODULE_PARM_DESC(max_mcast_grps, | |
108 | "Maximum number of multicast groups to support"); | |
109 | ||
110 | unsigned int hfi1_max_mcast_qp_attached = 16; | |
111 | module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached, | |
112 | uint, S_IRUGO); | |
113 | MODULE_PARM_DESC(max_mcast_qp_attached, | |
114 | "Maximum number of attached QPs to support"); | |
115 | ||
116 | unsigned int hfi1_max_srqs = 1024; | |
117 | module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO); | |
118 | MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); | |
119 | ||
120 | unsigned int hfi1_max_srq_sges = 128; | |
121 | module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO); | |
122 | MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); | |
123 | ||
124 | unsigned int hfi1_max_srq_wrs = 0x1FFFF; | |
125 | module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); | |
126 | MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); | |
127 | ||
d0e859c3 | 128 | unsigned short piothreshold = 256; |
14553ca1 MM |
129 | module_param(piothreshold, ushort, S_IRUGO); |
130 | MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); | |
131 | ||
528ee9fb DL |
132 | static unsigned int sge_copy_mode; |
133 | module_param(sge_copy_mode, uint, S_IRUGO); | |
134 | MODULE_PARM_DESC(sge_copy_mode, | |
135 | "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS"); | |
136 | ||
77241056 MM |
137 | static void verbs_sdma_complete( |
138 | struct sdma_txreq *cookie, | |
a545f530 | 139 | int status); |
77241056 | 140 | |
14553ca1 MM |
141 | static int pio_wait(struct rvt_qp *qp, |
142 | struct send_context *sc, | |
143 | struct hfi1_pkt_state *ps, | |
144 | u32 flag); | |
145 | ||
64ffd86c JJ |
146 | /* Length of buffer to create verbs txreq cache name */ |
147 | #define TXREQ_NAME_LEN 24 | |
148 | ||
f8195f3b DH |
149 | /* 16B trailing buffer */ |
150 | static const u8 trail_buf[MAX_16B_PADDING]; | |
151 | ||
019f118b | 152 | static uint wss_threshold = 80; |
528ee9fb DL |
153 | module_param(wss_threshold, uint, S_IRUGO); |
154 | MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); | |
155 | static uint wss_clean_period = 256; | |
156 | module_param(wss_clean_period, uint, S_IRUGO); | |
157 | MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned"); | |
158 | ||
43a474aa MM |
159 | /* |
160 | * Translate ib_wr_opcode into ib_wc_opcode. | |
161 | */ | |
162 | const enum ib_wc_opcode ib_hfi1_wc_opcode[] = { | |
163 | [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE, | |
3c6cb20a | 164 | [IB_WR_TID_RDMA_WRITE] = IB_WC_RDMA_WRITE, |
43a474aa MM |
165 | [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE, |
166 | [IB_WR_SEND] = IB_WC_SEND, | |
167 | [IB_WR_SEND_WITH_IMM] = IB_WC_SEND, | |
168 | [IB_WR_RDMA_READ] = IB_WC_RDMA_READ, | |
24b11923 | 169 | [IB_WR_TID_RDMA_READ] = IB_WC_RDMA_READ, |
43a474aa MM |
170 | [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP, |
171 | [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD, | |
172 | [IB_WR_SEND_WITH_INV] = IB_WC_SEND, | |
173 | [IB_WR_LOCAL_INV] = IB_WC_LOCAL_INV, | |
174 | [IB_WR_REG_MR] = IB_WC_REG_MR | |
175 | }; | |
176 | ||
77241056 MM |
177 | /* |
178 | * Length of header by opcode, 0 --> not supported | |
179 | */ | |
180 | const u8 hdr_len_by_opcode[256] = { | |
181 | /* RC */ | |
182 | [IB_OPCODE_RC_SEND_FIRST] = 12 + 8, | |
183 | [IB_OPCODE_RC_SEND_MIDDLE] = 12 + 8, | |
184 | [IB_OPCODE_RC_SEND_LAST] = 12 + 8, | |
185 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
186 | [IB_OPCODE_RC_SEND_ONLY] = 12 + 8, | |
187 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
188 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
189 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
190 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = 12 + 8, | |
191 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
192 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
193 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
194 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = 12 + 8 + 16, | |
195 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = 12 + 8 + 4, | |
196 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = 12 + 8, | |
197 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, | |
198 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, | |
199 | [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, | |
37aab620 | 200 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, |
77241056 MM |
201 | [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, |
202 | [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, | |
bdd8a98c JX |
203 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, |
204 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, | |
22d136d7 KW |
205 | [IB_OPCODE_TID_RDMA_READ_REQ] = 12 + 8 + 36, |
206 | [IB_OPCODE_TID_RDMA_READ_RESP] = 12 + 8 + 36, | |
3c6cb20a KW |
207 | [IB_OPCODE_TID_RDMA_WRITE_REQ] = 12 + 8 + 36, |
208 | [IB_OPCODE_TID_RDMA_WRITE_RESP] = 12 + 8 + 36, | |
209 | [IB_OPCODE_TID_RDMA_WRITE_DATA] = 12 + 8 + 36, | |
210 | [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = 12 + 8 + 36, | |
211 | [IB_OPCODE_TID_RDMA_ACK] = 12 + 8 + 36, | |
212 | [IB_OPCODE_TID_RDMA_RESYNC] = 12 + 8 + 36, | |
77241056 MM |
213 | /* UC */ |
214 | [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, | |
215 | [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, | |
216 | [IB_OPCODE_UC_SEND_LAST] = 12 + 8, | |
217 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
218 | [IB_OPCODE_UC_SEND_ONLY] = 12 + 8, | |
219 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
220 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
221 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
222 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = 12 + 8, | |
223 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
224 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
225 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
226 | /* UD */ | |
227 | [IB_OPCODE_UD_SEND_ONLY] = 12 + 8 + 8, | |
228 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 12 | |
229 | }; | |
230 | ||
231 | static const opcode_handler opcode_handler_tbl[256] = { | |
232 | /* RC */ | |
233 | [IB_OPCODE_RC_SEND_FIRST] = &hfi1_rc_rcv, | |
234 | [IB_OPCODE_RC_SEND_MIDDLE] = &hfi1_rc_rcv, | |
235 | [IB_OPCODE_RC_SEND_LAST] = &hfi1_rc_rcv, | |
236 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
237 | [IB_OPCODE_RC_SEND_ONLY] = &hfi1_rc_rcv, | |
238 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
239 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = &hfi1_rc_rcv, | |
240 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = &hfi1_rc_rcv, | |
241 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = &hfi1_rc_rcv, | |
242 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
243 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = &hfi1_rc_rcv, | |
244 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
245 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = &hfi1_rc_rcv, | |
246 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = &hfi1_rc_rcv, | |
247 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = &hfi1_rc_rcv, | |
248 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = &hfi1_rc_rcv, | |
249 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = &hfi1_rc_rcv, | |
250 | [IB_OPCODE_RC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
251 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
252 | [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, | |
253 | [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, | |
a2df0c83 JX |
254 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, |
255 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, | |
22d136d7 KW |
256 | |
257 | /* TID RDMA has separate handlers for different opcodes.*/ | |
3c6cb20a KW |
258 | [IB_OPCODE_TID_RDMA_WRITE_REQ] = &hfi1_rc_rcv_tid_rdma_write_req, |
259 | [IB_OPCODE_TID_RDMA_WRITE_RESP] = &hfi1_rc_rcv_tid_rdma_write_resp, | |
260 | [IB_OPCODE_TID_RDMA_WRITE_DATA] = &hfi1_rc_rcv_tid_rdma_write_data, | |
261 | [IB_OPCODE_TID_RDMA_WRITE_DATA_LAST] = &hfi1_rc_rcv_tid_rdma_write_data, | |
22d136d7 KW |
262 | [IB_OPCODE_TID_RDMA_READ_REQ] = &hfi1_rc_rcv_tid_rdma_read_req, |
263 | [IB_OPCODE_TID_RDMA_READ_RESP] = &hfi1_rc_rcv_tid_rdma_read_resp, | |
3c6cb20a KW |
264 | [IB_OPCODE_TID_RDMA_RESYNC] = &hfi1_rc_rcv_tid_rdma_resync, |
265 | [IB_OPCODE_TID_RDMA_ACK] = &hfi1_rc_rcv_tid_rdma_ack, | |
22d136d7 | 266 | |
77241056 MM |
267 | /* UC */ |
268 | [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, | |
269 | [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, | |
270 | [IB_OPCODE_UC_SEND_LAST] = &hfi1_uc_rcv, | |
271 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
272 | [IB_OPCODE_UC_SEND_ONLY] = &hfi1_uc_rcv, | |
273 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
274 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = &hfi1_uc_rcv, | |
275 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = &hfi1_uc_rcv, | |
276 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = &hfi1_uc_rcv, | |
277 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
278 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = &hfi1_uc_rcv, | |
279 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
280 | /* UD */ | |
281 | [IB_OPCODE_UD_SEND_ONLY] = &hfi1_ud_rcv, | |
282 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_ud_rcv, | |
283 | /* CNP */ | |
284 | [IB_OPCODE_CNP] = &hfi1_cnp_rcv | |
285 | }; | |
286 | ||
b374e060 MM |
287 | #define OPMASK 0x1f |
288 | ||
289 | static const u32 pio_opmask[BIT(3)] = { | |
290 | /* RC */ | |
291 | [IB_OPCODE_RC >> 5] = | |
292 | BIT(RC_OP(SEND_ONLY) & OPMASK) | | |
293 | BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | | |
294 | BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | | |
295 | BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | | |
296 | BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | | |
297 | BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | | |
298 | BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | | |
299 | BIT(RC_OP(COMPARE_SWAP) & OPMASK) | | |
300 | BIT(RC_OP(FETCH_ADD) & OPMASK), | |
301 | /* UC */ | |
302 | [IB_OPCODE_UC >> 5] = | |
303 | BIT(UC_OP(SEND_ONLY) & OPMASK) | | |
304 | BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | | |
305 | BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | | |
306 | BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), | |
307 | }; | |
308 | ||
77241056 MM |
309 | /* |
310 | * System image GUID. | |
311 | */ | |
312 | __be64 ib_hfi1_sys_image_guid; | |
313 | ||
77241056 MM |
314 | /* |
315 | * Make sure the QP is ready and able to accept the given opcode. | |
316 | */ | |
9039746c | 317 | static inline opcode_handler qp_ok(struct hfi1_packet *packet) |
77241056 | 318 | { |
83693bd1 | 319 | if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) |
71e68e3d | 320 | return NULL; |
9039746c DH |
321 | if (((packet->opcode & RVT_OPCODE_QP_MASK) == |
322 | packet->qp->allowed_ops) || | |
323 | (packet->opcode == IB_OPCODE_CNP)) | |
324 | return opcode_handler_tbl[packet->opcode]; | |
71e68e3d JP |
325 | |
326 | return NULL; | |
77241056 MM |
327 | } |
328 | ||
243d9f43 DH |
329 | static u64 hfi1_fault_tx(struct rvt_qp *qp, u8 opcode, u64 pbc) |
330 | { | |
331 | #ifdef CONFIG_FAULT_INJECTION | |
6b6cf935 | 332 | if ((opcode & IB_OPCODE_MSP) == IB_OPCODE_MSP) { |
243d9f43 DH |
333 | /* |
334 | * In order to drop non-IB traffic we | |
335 | * set PbcInsertHrc to NONE (0x2). | |
336 | * The packet will still be delivered | |
337 | * to the receiving node but a | |
338 | * KHdrHCRCErr (KDETH packet with a bad | |
339 | * HCRC) will be triggered and the | |
340 | * packet will not be delivered to the | |
341 | * correct context. | |
342 | */ | |
6b6cf935 | 343 | pbc &= ~PBC_INSERT_HCRC_SMASK; |
243d9f43 | 344 | pbc |= (u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT; |
6b6cf935 | 345 | } else { |
243d9f43 DH |
346 | /* |
347 | * In order to drop regular verbs | |
348 | * traffic we set the PbcTestEbp | |
349 | * flag. The packet will still be | |
350 | * delivered to the receiving node but | |
351 | * a 'late ebp error' will be | |
352 | * triggered and will be dropped. | |
353 | */ | |
354 | pbc |= PBC_TEST_EBP; | |
6b6cf935 | 355 | } |
243d9f43 DH |
356 | #endif |
357 | return pbc; | |
358 | } | |
359 | ||
22d136d7 KW |
360 | static opcode_handler tid_qp_ok(int opcode, struct hfi1_packet *packet) |
361 | { | |
362 | if (packet->qp->ibqp.qp_type != IB_QPT_RC || | |
363 | !(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) | |
364 | return NULL; | |
365 | if ((opcode & RVT_OPCODE_QP_MASK) == IB_OPCODE_TID_RDMA) | |
366 | return opcode_handler_tbl[opcode]; | |
367 | return NULL; | |
368 | } | |
369 | ||
370 | void hfi1_kdeth_eager_rcv(struct hfi1_packet *packet) | |
371 | { | |
372 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
373 | struct ib_header *hdr = packet->hdr; | |
374 | u32 tlen = packet->tlen; | |
375 | struct hfi1_pportdata *ppd = rcd->ppd; | |
376 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
377 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; | |
378 | opcode_handler opcode_handler; | |
379 | unsigned long flags; | |
380 | u32 qp_num; | |
381 | int lnh; | |
382 | u8 opcode; | |
383 | ||
384 | /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ | |
385 | if (unlikely(tlen < 15 * sizeof(u32))) | |
386 | goto drop; | |
387 | ||
388 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
389 | if (lnh != HFI1_LRH_BTH) | |
390 | goto drop; | |
391 | ||
392 | packet->ohdr = &hdr->u.oth; | |
393 | trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); | |
394 | ||
395 | opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); | |
396 | inc_opstats(tlen, &rcd->opstats->stats[opcode]); | |
397 | ||
398 | /* verbs_qp can be picked up from any tid_rdma header struct */ | |
399 | qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_req.verbs_qp) & | |
400 | RVT_QPN_MASK; | |
401 | ||
402 | rcu_read_lock(); | |
403 | packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); | |
404 | if (!packet->qp) | |
405 | goto drop_rcu; | |
406 | spin_lock_irqsave(&packet->qp->r_lock, flags); | |
407 | opcode_handler = tid_qp_ok(opcode, packet); | |
408 | if (likely(opcode_handler)) | |
409 | opcode_handler(packet); | |
410 | else | |
411 | goto drop_unlock; | |
412 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); | |
413 | rcu_read_unlock(); | |
414 | ||
415 | return; | |
416 | drop_unlock: | |
417 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); | |
418 | drop_rcu: | |
419 | rcu_read_unlock(); | |
420 | drop: | |
421 | ibp->rvp.n_pkt_drops++; | |
422 | } | |
423 | ||
424 | void hfi1_kdeth_expected_rcv(struct hfi1_packet *packet) | |
425 | { | |
426 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
427 | struct ib_header *hdr = packet->hdr; | |
428 | u32 tlen = packet->tlen; | |
429 | struct hfi1_pportdata *ppd = rcd->ppd; | |
430 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
431 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; | |
432 | opcode_handler opcode_handler; | |
433 | unsigned long flags; | |
434 | u32 qp_num; | |
435 | int lnh; | |
436 | u8 opcode; | |
437 | ||
438 | /* DW == LRH (2) + BTH (3) + KDETH (9) + CRC (1) */ | |
439 | if (unlikely(tlen < 15 * sizeof(u32))) | |
440 | goto drop; | |
441 | ||
442 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
443 | if (lnh != HFI1_LRH_BTH) | |
444 | goto drop; | |
445 | ||
446 | packet->ohdr = &hdr->u.oth; | |
447 | trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); | |
448 | ||
449 | opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); | |
450 | inc_opstats(tlen, &rcd->opstats->stats[opcode]); | |
451 | ||
452 | /* verbs_qp can be picked up from any tid_rdma header struct */ | |
453 | qp_num = be32_to_cpu(packet->ohdr->u.tid_rdma.r_rsp.verbs_qp) & | |
454 | RVT_QPN_MASK; | |
455 | ||
456 | rcu_read_lock(); | |
457 | packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); | |
458 | if (!packet->qp) | |
459 | goto drop_rcu; | |
460 | spin_lock_irqsave(&packet->qp->r_lock, flags); | |
461 | opcode_handler = tid_qp_ok(opcode, packet); | |
462 | if (likely(opcode_handler)) | |
463 | opcode_handler(packet); | |
464 | else | |
465 | goto drop_unlock; | |
466 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); | |
467 | rcu_read_unlock(); | |
468 | ||
469 | return; | |
470 | drop_unlock: | |
471 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); | |
472 | drop_rcu: | |
473 | rcu_read_unlock(); | |
474 | drop: | |
475 | ibp->rvp.n_pkt_drops++; | |
476 | } | |
477 | ||
5786adf3 DH |
478 | static int hfi1_do_pkey_check(struct hfi1_packet *packet) |
479 | { | |
480 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
481 | struct hfi1_pportdata *ppd = rcd->ppd; | |
482 | struct hfi1_16b_header *hdr = packet->hdr; | |
483 | u16 pkey; | |
484 | ||
485 | /* Pkey check needed only for bypass packets */ | |
486 | if (packet->etype != RHF_RCV_TYPE_BYPASS) | |
487 | return 0; | |
488 | ||
489 | /* Perform pkey check */ | |
490 | pkey = hfi1_16B_get_pkey(hdr); | |
491 | return ingress_pkey_check(ppd, pkey, packet->sc, | |
492 | packet->qp->s_pkey_index, | |
493 | packet->slid, true); | |
494 | } | |
495 | ||
9039746c DH |
496 | static inline void hfi1_handle_packet(struct hfi1_packet *packet, |
497 | bool is_mcast) | |
77241056 | 498 | { |
9039746c | 499 | u32 qp_num; |
77241056 | 500 | struct hfi1_ctxtdata *rcd = packet->rcd; |
77241056 | 501 | struct hfi1_pportdata *ppd = rcd->ppd; |
f3e862cb | 502 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
ec4274f1 | 503 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
71e68e3d | 504 | opcode_handler packet_handler; |
b77d713a | 505 | unsigned long flags; |
77241056 | 506 | |
9039746c | 507 | inc_opstats(packet->tlen, &rcd->opstats->stats[packet->opcode]); |
77241056 | 508 | |
9039746c | 509 | if (unlikely(is_mcast)) { |
0facc5a1 DD |
510 | struct rvt_mcast *mcast; |
511 | struct rvt_mcast_qp *p; | |
77241056 | 512 | |
9039746c | 513 | if (!packet->grh) |
77241056 | 514 | goto drop; |
9039746c DH |
515 | mcast = rvt_mcast_find(&ibp->rvp, |
516 | &packet->grh->dgid, | |
72c07e2b | 517 | opa_get_lid(packet->dlid, 9B)); |
d125a6c6 | 518 | if (!mcast) |
77241056 MM |
519 | goto drop; |
520 | list_for_each_entry_rcu(p, &mcast->qp_list, list) { | |
521 | packet->qp = p->qp; | |
5786adf3 DH |
522 | if (hfi1_do_pkey_check(packet)) |
523 | goto drop; | |
b77d713a | 524 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
9039746c | 525 | packet_handler = qp_ok(packet); |
71e68e3d JP |
526 | if (likely(packet_handler)) |
527 | packet_handler(packet); | |
528 | else | |
529 | ibp->rvp.n_pkt_drops++; | |
b77d713a | 530 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
77241056 MM |
531 | } |
532 | /* | |
0facc5a1 | 533 | * Notify rvt_multicast_detach() if it is waiting for us |
77241056 MM |
534 | * to finish. |
535 | */ | |
536 | if (atomic_dec_return(&mcast->refcount) <= 1) | |
537 | wake_up(&mcast->wait); | |
538 | } else { | |
9039746c | 539 | /* Get the destination QP number. */ |
81cd3891 DH |
540 | if (packet->etype == RHF_RCV_TYPE_BYPASS && |
541 | hfi1_16B_get_l4(packet->hdr) == OPA_16B_L4_FM) | |
542 | qp_num = hfi1_16B_get_dest_qpn(packet->mgmt); | |
543 | else | |
544 | qp_num = ib_bth_get_qpn(packet->ohdr); | |
545 | ||
77241056 | 546 | rcu_read_lock(); |
ec4274f1 | 547 | packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); |
5786adf3 DH |
548 | if (!packet->qp) |
549 | goto unlock_drop; | |
550 | ||
551 | if (hfi1_do_pkey_check(packet)) | |
552 | goto unlock_drop; | |
553 | ||
b77d713a | 554 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
9039746c | 555 | packet_handler = qp_ok(packet); |
71e68e3d JP |
556 | if (likely(packet_handler)) |
557 | packet_handler(packet); | |
558 | else | |
559 | ibp->rvp.n_pkt_drops++; | |
b77d713a | 560 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
77241056 MM |
561 | rcu_read_unlock(); |
562 | } | |
563 | return; | |
5786adf3 DH |
564 | unlock_drop: |
565 | rcu_read_unlock(); | |
77241056 | 566 | drop: |
4eb06882 | 567 | ibp->rvp.n_pkt_drops++; |
77241056 MM |
568 | } |
569 | ||
9039746c DH |
570 | /** |
571 | * hfi1_ib_rcv - process an incoming packet | |
572 | * @packet: data packet information | |
573 | * | |
574 | * This is called to process an incoming packet at interrupt level. | |
575 | */ | |
576 | void hfi1_ib_rcv(struct hfi1_packet *packet) | |
577 | { | |
578 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
9039746c | 579 | |
72c07e2b DH |
580 | trace_input_ibhdr(rcd->dd, packet, !!(rhf_dc_info(packet->rhf))); |
581 | hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid)); | |
582 | } | |
583 | ||
584 | void hfi1_16B_rcv(struct hfi1_packet *packet) | |
585 | { | |
586 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
9039746c | 587 | |
72c07e2b DH |
588 | trace_input_ibhdr(rcd->dd, packet, false); |
589 | hfi1_handle_packet(packet, hfi1_check_mcast(packet->dlid)); | |
9039746c DH |
590 | } |
591 | ||
77241056 MM |
592 | /* |
593 | * This is called from a timer to check for QPs | |
594 | * which need kernel memory in order to send a packet. | |
595 | */ | |
8064135e | 596 | static void mem_timer(struct timer_list *t) |
77241056 | 597 | { |
8064135e | 598 | struct hfi1_ibdev *dev = from_timer(dev, t, mem_timer); |
77241056 | 599 | struct list_head *list = &dev->memwait; |
895420dd | 600 | struct rvt_qp *qp = NULL; |
77241056 MM |
601 | struct iowait *wait; |
602 | unsigned long flags; | |
4c6829c5 | 603 | struct hfi1_qp_priv *priv; |
77241056 MM |
604 | |
605 | write_seqlock_irqsave(&dev->iowait_lock, flags); | |
606 | if (!list_empty(list)) { | |
607 | wait = list_first_entry(list, struct iowait, list); | |
4c6829c5 DD |
608 | qp = iowait_to_qp(wait); |
609 | priv = qp->priv; | |
610 | list_del_init(&priv->s_iowait.list); | |
4e045572 | 611 | priv->s_iowait.lock = NULL; |
77241056 MM |
612 | /* refcount held until actual wake up */ |
613 | if (!list_empty(list)) | |
614 | mod_timer(&dev->mem_timer, jiffies + 1); | |
615 | } | |
616 | write_sequnlock_irqrestore(&dev->iowait_lock, flags); | |
617 | ||
618 | if (qp) | |
54d10c1e | 619 | hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM); |
77241056 MM |
620 | } |
621 | ||
77241056 MM |
622 | /* |
623 | * This is called with progress side lock held. | |
624 | */ | |
625 | /* New API */ | |
626 | static void verbs_sdma_complete( | |
627 | struct sdma_txreq *cookie, | |
a545f530 | 628 | int status) |
77241056 MM |
629 | { |
630 | struct verbs_txreq *tx = | |
631 | container_of(cookie, struct verbs_txreq, txreq); | |
895420dd | 632 | struct rvt_qp *qp = tx->qp; |
77241056 MM |
633 | |
634 | spin_lock(&qp->s_lock); | |
e490974e | 635 | if (tx->wqe) { |
116aa033 | 636 | rvt_send_complete(qp, tx->wqe, IB_WC_SUCCESS); |
e490974e | 637 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { |
30e07416 | 638 | struct hfi1_opa_header *hdr; |
77241056 MM |
639 | |
640 | hdr = &tx->phdr.hdr; | |
641 | hfi1_rc_send_complete(qp, hdr); | |
642 | } | |
77241056 MM |
643 | spin_unlock(&qp->s_lock); |
644 | ||
645 | hfi1_put_txreq(tx); | |
646 | } | |
647 | ||
838b6fd2 KW |
648 | void hfi1_wait_kmem(struct rvt_qp *qp) |
649 | { | |
650 | struct hfi1_qp_priv *priv = qp->priv; | |
651 | struct ib_qp *ibqp = &qp->ibqp; | |
652 | struct ib_device *ibdev = ibqp->device; | |
653 | struct hfi1_ibdev *dev = to_idev(ibdev); | |
654 | ||
655 | if (list_empty(&priv->s_iowait.list)) { | |
656 | if (list_empty(&dev->memwait)) | |
657 | mod_timer(&dev->mem_timer, jiffies + 1); | |
658 | qp->s_flags |= RVT_S_WAIT_KMEM; | |
659 | list_add_tail(&priv->s_iowait.list, &dev->memwait); | |
660 | priv->s_iowait.lock = &dev->iowait_lock; | |
661 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); | |
662 | rvt_get_qp(qp); | |
663 | } | |
664 | } | |
665 | ||
711e104d MM |
666 | static int wait_kmem(struct hfi1_ibdev *dev, |
667 | struct rvt_qp *qp, | |
668 | struct hfi1_pkt_state *ps) | |
77241056 MM |
669 | { |
670 | unsigned long flags; | |
671 | int ret = 0; | |
672 | ||
673 | spin_lock_irqsave(&qp->s_lock, flags); | |
83693bd1 | 674 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
77241056 | 675 | write_seqlock(&dev->iowait_lock); |
711e104d | 676 | list_add_tail(&ps->s_txreq->txreq.list, |
5da0fc9d | 677 | &ps->wait->tx_head); |
838b6fd2 | 678 | hfi1_wait_kmem(qp); |
77241056 | 679 | write_sequnlock(&dev->iowait_lock); |
5da0fc9d | 680 | hfi1_qp_unbusy(qp, ps->wait); |
77241056 MM |
681 | ret = -EBUSY; |
682 | } | |
683 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
684 | ||
685 | return ret; | |
686 | } | |
687 | ||
688 | /* | |
689 | * This routine calls txadds for each sg entry. | |
690 | * | |
691 | * Add failures will revert the sge cursor | |
692 | */ | |
711e104d | 693 | static noinline int build_verbs_ulp_payload( |
77241056 | 694 | struct sdma_engine *sde, |
77241056 MM |
695 | u32 length, |
696 | struct verbs_txreq *tx) | |
697 | { | |
b777f154 | 698 | struct rvt_sge_state *ss = tx->ss; |
895420dd DD |
699 | struct rvt_sge *sg_list = ss->sg_list; |
700 | struct rvt_sge sge = ss->sge; | |
77241056 MM |
701 | u8 num_sge = ss->num_sge; |
702 | u32 len; | |
703 | int ret = 0; | |
704 | ||
705 | while (length) { | |
87fc34b5 | 706 | len = rvt_get_sge_length(&ss->sge, length); |
77241056 MM |
707 | WARN_ON_ONCE(len == 0); |
708 | ret = sdma_txadd_kvaddr( | |
709 | sde->dd, | |
710 | &tx->txreq, | |
711 | ss->sge.vaddr, | |
712 | len); | |
713 | if (ret) | |
714 | goto bail_txadd; | |
1198fcea | 715 | rvt_update_sge(ss, len, false); |
77241056 MM |
716 | length -= len; |
717 | } | |
718 | return ret; | |
719 | bail_txadd: | |
720 | /* unwind cursor */ | |
721 | ss->sge = sge; | |
722 | ss->num_sge = num_sge; | |
723 | ss->sg_list = sg_list; | |
724 | return ret; | |
725 | } | |
726 | ||
1b311f89 MM |
727 | /** |
728 | * update_tx_opstats - record stats by opcode | |
729 | * @qp; the qp | |
730 | * @ps: transmit packet state | |
731 | * @plen: the plen in dwords | |
732 | * | |
733 | * This is a routine to record the tx opstats after a | |
734 | * packet has been presented to the egress mechanism. | |
735 | */ | |
736 | static void update_tx_opstats(struct rvt_qp *qp, struct hfi1_pkt_state *ps, | |
737 | u32 plen) | |
738 | { | |
739 | #ifdef CONFIG_DEBUG_FS | |
740 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
741 | struct hfi1_opcode_stats_perctx *s = get_cpu_ptr(dd->tx_opstats); | |
742 | ||
743 | inc_opstats(plen * 4, &s->stats[ps->opcode]); | |
744 | put_cpu_ptr(s); | |
745 | #endif | |
746 | } | |
747 | ||
77241056 MM |
748 | /* |
749 | * Build the number of DMA descriptors needed to send length bytes of data. | |
750 | * | |
751 | * NOTE: DMA mapping is held in the tx until completed in the ring or | |
752 | * the tx desc is freed without having been submitted to the ring | |
753 | * | |
bb5df5f9 | 754 | * This routine ensures all the helper routine calls succeed. |
77241056 MM |
755 | */ |
756 | /* New API */ | |
757 | static int build_verbs_tx_desc( | |
758 | struct sdma_engine *sde, | |
77241056 MM |
759 | u32 length, |
760 | struct verbs_txreq *tx, | |
a9b6b3bc | 761 | struct hfi1_ahg_info *ahg_info, |
77241056 MM |
762 | u64 pbc) |
763 | { | |
764 | int ret = 0; | |
d4d602e9 | 765 | struct hfi1_sdma_header *phdr = &tx->phdr; |
9636258f | 766 | u16 hdrbytes = (tx->hdr_dwords + sizeof(pbc) / 4) << 2; |
566d53a8 | 767 | u8 extra_bytes = 0; |
77241056 | 768 | |
566d53a8 DH |
769 | if (tx->phdr.hdr.hdr_type) { |
770 | /* | |
771 | * hdrbytes accounts for PBC. Need to subtract 8 bytes | |
772 | * before calculating padding. | |
773 | */ | |
774 | extra_bytes = hfi1_get_16b_padding(hdrbytes - 8, length) + | |
775 | (SIZE_OF_CRC << 2) + SIZE_OF_LT; | |
566d53a8 | 776 | } |
a9b6b3bc | 777 | if (!ahg_info->ahgcount) { |
77241056 MM |
778 | ret = sdma_txinit_ahg( |
779 | &tx->txreq, | |
a9b6b3bc | 780 | ahg_info->tx_flags, |
566d53a8 DH |
781 | hdrbytes + length + |
782 | extra_bytes, | |
a9b6b3bc | 783 | ahg_info->ahgidx, |
77241056 MM |
784 | 0, |
785 | NULL, | |
786 | 0, | |
787 | verbs_sdma_complete); | |
788 | if (ret) | |
789 | goto bail_txadd; | |
790 | phdr->pbc = cpu_to_le64(pbc); | |
77241056 MM |
791 | ret = sdma_txadd_kvaddr( |
792 | sde->dd, | |
793 | &tx->txreq, | |
bb5df5f9 DD |
794 | phdr, |
795 | hdrbytes); | |
77241056 MM |
796 | if (ret) |
797 | goto bail_txadd; | |
798 | } else { | |
77241056 MM |
799 | ret = sdma_txinit_ahg( |
800 | &tx->txreq, | |
a9b6b3bc | 801 | ahg_info->tx_flags, |
77241056 | 802 | length, |
a9b6b3bc DC |
803 | ahg_info->ahgidx, |
804 | ahg_info->ahgcount, | |
805 | ahg_info->ahgdesc, | |
77241056 MM |
806 | hdrbytes, |
807 | verbs_sdma_complete); | |
808 | if (ret) | |
809 | goto bail_txadd; | |
810 | } | |
b777f154 | 811 | /* add the ulp payload - if any. tx->ss can be NULL for acks */ |
566d53a8 | 812 | if (tx->ss) { |
b777f154 | 813 | ret = build_verbs_ulp_payload(sde, length, tx); |
566d53a8 DH |
814 | if (ret) |
815 | goto bail_txadd; | |
816 | } | |
817 | ||
818 | /* add icrc, lt byte, and padding to flit */ | |
f8195f3b | 819 | if (extra_bytes) |
566d53a8 | 820 | ret = sdma_txadd_kvaddr(sde->dd, &tx->txreq, |
f8195f3b | 821 | (void *)trail_buf, extra_bytes); |
566d53a8 | 822 | |
77241056 MM |
823 | bail_txadd: |
824 | return ret; | |
825 | } | |
826 | ||
6b6cf935 KW |
827 | static u64 update_hcrc(u8 opcode, u64 pbc) |
828 | { | |
829 | if ((opcode & IB_OPCODE_TID_RDMA) == IB_OPCODE_TID_RDMA) { | |
830 | pbc &= ~PBC_INSERT_HCRC_SMASK; | |
831 | pbc |= (u64)PBC_IHCRC_LKDETH << PBC_INSERT_HCRC_SHIFT; | |
832 | } | |
833 | return pbc; | |
834 | } | |
835 | ||
895420dd | 836 | int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 837 | u64 pbc) |
77241056 | 838 | { |
4c6829c5 | 839 | struct hfi1_qp_priv *priv = qp->priv; |
a9b6b3bc | 840 | struct hfi1_ahg_info *ahg_info = priv->s_ahg; |
9636258f | 841 | u32 hdrwords = ps->s_txreq->hdr_dwords; |
e922ae06 | 842 | u32 len = ps->s_txreq->s_cur_size; |
566d53a8 | 843 | u32 plen; |
d46e5144 DD |
844 | struct hfi1_ibdev *dev = ps->dev; |
845 | struct hfi1_pportdata *ppd = ps->ppd; | |
77241056 | 846 | struct verbs_txreq *tx; |
4c6829c5 | 847 | u8 sc5 = priv->s_sc; |
77241056 | 848 | int ret; |
566d53a8 | 849 | u32 dwords; |
566d53a8 DH |
850 | |
851 | if (ps->s_txreq->phdr.hdr.hdr_type) { | |
852 | u8 extra_bytes = hfi1_get_16b_padding((hdrwords << 2), len); | |
853 | ||
854 | dwords = (len + extra_bytes + (SIZE_OF_CRC << 2) + | |
855 | SIZE_OF_LT) >> 2; | |
566d53a8 DH |
856 | } else { |
857 | dwords = (len + 3) >> 2; | |
858 | } | |
9636258f | 859 | plen = hdrwords + dwords + sizeof(pbc) / 4; |
77241056 | 860 | |
bb5df5f9 | 861 | tx = ps->s_txreq; |
711e104d MM |
862 | if (!sdma_txreq_built(&tx->txreq)) { |
863 | if (likely(pbc == 0)) { | |
864 | u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); | |
243d9f43 | 865 | |
711e104d | 866 | /* No vl15 here */ |
566d53a8 DH |
867 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ |
868 | if (ps->s_txreq->phdr.hdr.hdr_type) | |
869 | pbc |= PBC_PACKET_BYPASS | | |
870 | PBC_INSERT_BYPASS_ICRC; | |
871 | else | |
872 | pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); | |
711e104d | 873 | |
a74d5307 | 874 | if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) |
566d53a8 | 875 | pbc = hfi1_fault_tx(qp, ps->opcode, pbc); |
711e104d | 876 | pbc = create_pbc(ppd, |
243d9f43 | 877 | pbc, |
711e104d MM |
878 | qp->srate_mbps, |
879 | vl, | |
880 | plen); | |
6b6cf935 KW |
881 | |
882 | /* Update HCRC based on packet opcode */ | |
883 | pbc = update_hcrc(ps->opcode, pbc); | |
711e104d MM |
884 | } |
885 | tx->wqe = qp->s_wqe; | |
b777f154 | 886 | ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); |
711e104d MM |
887 | if (unlikely(ret)) |
888 | goto bail_build; | |
77241056 | 889 | } |
5da0fc9d | 890 | ret = sdma_send_txreq(tx->sde, ps->wait, &tx->txreq, ps->pkts_sent); |
5326dfbf MM |
891 | if (unlikely(ret < 0)) { |
892 | if (ret == -ECOMM) | |
893 | goto bail_ecomm; | |
894 | return ret; | |
895 | } | |
1b311f89 MM |
896 | |
897 | update_tx_opstats(qp, ps, plen); | |
1db78eee | 898 | trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), |
228d2af1 | 899 | &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); |
77241056 MM |
900 | return ret; |
901 | ||
77241056 MM |
902 | bail_ecomm: |
903 | /* The current one got "sent" */ | |
904 | return 0; | |
905 | bail_build: | |
711e104d MM |
906 | ret = wait_kmem(dev, qp, ps); |
907 | if (!ret) { | |
908 | /* free txreq - bad state */ | |
909 | hfi1_put_txreq(ps->s_txreq); | |
910 | ps->s_txreq = NULL; | |
911 | } | |
912 | return ret; | |
77241056 MM |
913 | } |
914 | ||
915 | /* | |
916 | * If we are now in the error state, return zero to flush the | |
917 | * send work request. | |
918 | */ | |
14553ca1 MM |
919 | static int pio_wait(struct rvt_qp *qp, |
920 | struct send_context *sc, | |
921 | struct hfi1_pkt_state *ps, | |
922 | u32 flag) | |
77241056 | 923 | { |
4c6829c5 | 924 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 | 925 | struct hfi1_devdata *dd = sc->dd; |
77241056 MM |
926 | unsigned long flags; |
927 | int ret = 0; | |
928 | ||
929 | /* | |
930 | * Note that as soon as want_buffer() is called and | |
931 | * possibly before it returns, sc_piobufavail() | |
932 | * could be called. Therefore, put QP on the I/O wait list before | |
933 | * enabling the PIO avail interrupt. | |
934 | */ | |
935 | spin_lock_irqsave(&qp->s_lock, flags); | |
83693bd1 | 936 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
9aefcabe | 937 | write_seqlock(&sc->waitlock); |
711e104d | 938 | list_add_tail(&ps->s_txreq->txreq.list, |
5da0fc9d | 939 | &ps->wait->tx_head); |
4c6829c5 | 940 | if (list_empty(&priv->s_iowait.list)) { |
77241056 MM |
941 | struct hfi1_ibdev *dev = &dd->verbs_dev; |
942 | int was_empty; | |
943 | ||
14553ca1 | 944 | dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); |
2e2ba09e | 945 | dev->n_piodrain += !!(flag & HFI1_S_WAIT_PIO_DRAIN); |
14553ca1 | 946 | qp->s_flags |= flag; |
77241056 | 947 | was_empty = list_empty(&sc->piowait); |
34025fb0 | 948 | iowait_get_priority(&priv->s_iowait); |
bcad2913 KW |
949 | iowait_queue(ps->pkts_sent, &priv->s_iowait, |
950 | &sc->piowait); | |
9aefcabe | 951 | priv->s_iowait.lock = &sc->waitlock; |
54d10c1e | 952 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); |
4d6f85c3 | 953 | rvt_get_qp(qp); |
77241056 MM |
954 | /* counting: only call wantpiobuf_intr if first user */ |
955 | if (was_empty) | |
956 | hfi1_sc_wantpiobuf_intr(sc, 1); | |
957 | } | |
9aefcabe | 958 | write_sequnlock(&sc->waitlock); |
5da0fc9d | 959 | hfi1_qp_unbusy(qp, ps->wait); |
77241056 MM |
960 | ret = -EBUSY; |
961 | } | |
962 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
963 | return ret; | |
964 | } | |
965 | ||
14553ca1 MM |
966 | static void verbs_pio_complete(void *arg, int code) |
967 | { | |
968 | struct rvt_qp *qp = (struct rvt_qp *)arg; | |
969 | struct hfi1_qp_priv *priv = qp->priv; | |
970 | ||
971 | if (iowait_pio_dec(&priv->s_iowait)) | |
972 | iowait_drain_wakeup(&priv->s_iowait); | |
973 | } | |
974 | ||
895420dd | 975 | int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 976 | u64 pbc) |
77241056 | 977 | { |
4c6829c5 | 978 | struct hfi1_qp_priv *priv = qp->priv; |
9636258f | 979 | u32 hdrwords = ps->s_txreq->hdr_dwords; |
b777f154 | 980 | struct rvt_sge_state *ss = ps->s_txreq->ss; |
e922ae06 | 981 | u32 len = ps->s_txreq->s_cur_size; |
566d53a8 DH |
982 | u32 dwords; |
983 | u32 plen; | |
d46e5144 | 984 | struct hfi1_pportdata *ppd = ps->ppd; |
566d53a8 | 985 | u32 *hdr; |
4f8cc5c0 | 986 | u8 sc5; |
77241056 MM |
987 | unsigned long flags = 0; |
988 | struct send_context *sc; | |
989 | struct pio_buf *pbuf; | |
990 | int wc_status = IB_WC_SUCCESS; | |
bb5df5f9 | 991 | int ret = 0; |
14553ca1 | 992 | pio_release_cb cb = NULL; |
566d53a8 DH |
993 | u8 extra_bytes = 0; |
994 | ||
995 | if (ps->s_txreq->phdr.hdr.hdr_type) { | |
996 | u8 pad_size = hfi1_get_16b_padding((hdrwords << 2), len); | |
997 | ||
998 | extra_bytes = pad_size + (SIZE_OF_CRC << 2) + SIZE_OF_LT; | |
999 | dwords = (len + extra_bytes) >> 2; | |
1000 | hdr = (u32 *)&ps->s_txreq->phdr.hdr.opah; | |
566d53a8 DH |
1001 | } else { |
1002 | dwords = (len + 3) >> 2; | |
1003 | hdr = (u32 *)&ps->s_txreq->phdr.hdr.ibh; | |
1004 | } | |
9636258f | 1005 | plen = hdrwords + dwords + sizeof(pbc) / 4; |
14553ca1 MM |
1006 | |
1007 | /* only RC/UC use complete */ | |
1008 | switch (qp->ibqp.qp_type) { | |
1009 | case IB_QPT_RC: | |
1010 | case IB_QPT_UC: | |
1011 | cb = verbs_pio_complete; | |
1012 | break; | |
1013 | default: | |
1014 | break; | |
1015 | } | |
77241056 MM |
1016 | |
1017 | /* vl15 special case taken care of in ud.c */ | |
4c6829c5 | 1018 | sc5 = priv->s_sc; |
cef504c5 | 1019 | sc = ps->s_txreq->psc; |
77241056 | 1020 | |
77241056 | 1021 | if (likely(pbc == 0)) { |
4f8cc5c0 | 1022 | u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); |
243d9f43 | 1023 | |
566d53a8 DH |
1024 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc */ |
1025 | if (ps->s_txreq->phdr.hdr.hdr_type) | |
1026 | pbc |= PBC_PACKET_BYPASS | PBC_INSERT_BYPASS_ICRC; | |
1027 | else | |
1028 | pbc |= (ib_is_sc5(sc5) << PBC_DC_INFO_SHIFT); | |
a74d5307 MH |
1029 | |
1030 | if (unlikely(hfi1_dbg_should_fault_tx(qp, ps->opcode))) | |
566d53a8 | 1031 | pbc = hfi1_fault_tx(qp, ps->opcode, pbc); |
243d9f43 | 1032 | pbc = create_pbc(ppd, pbc, qp->srate_mbps, vl, plen); |
6b6cf935 KW |
1033 | |
1034 | /* Update HCRC based on packet opcode */ | |
1035 | pbc = update_hcrc(ps->opcode, pbc); | |
77241056 | 1036 | } |
14553ca1 MM |
1037 | if (cb) |
1038 | iowait_pio_inc(&priv->s_iowait); | |
1039 | pbuf = sc_buffer_alloc(sc, plen, cb, qp); | |
d125a6c6 | 1040 | if (unlikely(!pbuf)) { |
14553ca1 MM |
1041 | if (cb) |
1042 | verbs_pio_complete(qp, 0); | |
77241056 MM |
1043 | if (ppd->host_link_state != HLS_UP_ACTIVE) { |
1044 | /* | |
1045 | * If we have filled the PIO buffers to capacity and are | |
1046 | * not in an active state this request is not going to | |
1047 | * go out to so just complete it with an error or else a | |
1048 | * ULP or the core may be stuck waiting. | |
1049 | */ | |
1050 | hfi1_cdbg( | |
1051 | PIO, | |
1052 | "alloc failed. state not active, completing"); | |
1053 | wc_status = IB_WC_GENERAL_ERR; | |
1054 | goto pio_bail; | |
1055 | } else { | |
1056 | /* | |
1057 | * This is a normal occurrence. The PIO buffs are full | |
1058 | * up but we are still happily sending, well we could be | |
1059 | * so lets continue to queue the request. | |
1060 | */ | |
1061 | hfi1_cdbg(PIO, "alloc failed. state active, queuing"); | |
14553ca1 | 1062 | ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO); |
711e104d | 1063 | if (!ret) |
14553ca1 | 1064 | /* txreq not queued - free */ |
711e104d MM |
1065 | goto bail; |
1066 | /* tx consumed in wait */ | |
1067 | return ret; | |
77241056 MM |
1068 | } |
1069 | } | |
1070 | ||
566d53a8 | 1071 | if (dwords == 0) { |
77241056 MM |
1072 | pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords); |
1073 | } else { | |
566d53a8 DH |
1074 | seg_pio_copy_start(pbuf, pbc, |
1075 | hdr, hdrwords * 4); | |
77241056 | 1076 | if (ss) { |
77241056 MM |
1077 | while (len) { |
1078 | void *addr = ss->sge.vaddr; | |
87fc34b5 | 1079 | u32 slen = rvt_get_sge_length(&ss->sge, len); |
77241056 | 1080 | |
1198fcea | 1081 | rvt_update_sge(ss, slen, false); |
77241056 MM |
1082 | seg_pio_copy_mid(pbuf, addr, slen); |
1083 | len -= slen; | |
1084 | } | |
77241056 | 1085 | } |
f8195f3b DH |
1086 | /* add icrc, lt byte, and padding to flit */ |
1087 | if (extra_bytes) | |
1088 | seg_pio_copy_mid(pbuf, trail_buf, extra_bytes); | |
566d53a8 | 1089 | |
566d53a8 | 1090 | seg_pio_copy_end(pbuf); |
77241056 MM |
1091 | } |
1092 | ||
1b311f89 | 1093 | update_tx_opstats(qp, ps, plen); |
1db78eee | 1094 | trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), |
228d2af1 | 1095 | &ps->s_txreq->phdr.hdr, ib_is_sc5(sc5)); |
77241056 | 1096 | |
77241056 MM |
1097 | pio_bail: |
1098 | if (qp->s_wqe) { | |
1099 | spin_lock_irqsave(&qp->s_lock, flags); | |
116aa033 | 1100 | rvt_send_complete(qp, qp->s_wqe, wc_status); |
77241056 MM |
1101 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1102 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { | |
1103 | spin_lock_irqsave(&qp->s_lock, flags); | |
bb5df5f9 | 1104 | hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); |
77241056 MM |
1105 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1106 | } | |
bb5df5f9 DD |
1107 | |
1108 | ret = 0; | |
1109 | ||
1110 | bail: | |
1111 | hfi1_put_txreq(ps->s_txreq); | |
1112 | return ret; | |
77241056 | 1113 | } |
b91cc573 | 1114 | |
77241056 MM |
1115 | /* |
1116 | * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent | |
e38d1e4f | 1117 | * being an entry from the partition key table), return 0 |
77241056 MM |
1118 | * otherwise. Use the matching criteria for egress partition keys |
1119 | * specified in the OPAv1 spec., section 9.1l.7. | |
1120 | */ | |
1121 | static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) | |
1122 | { | |
1123 | u16 mkey = pkey & PKEY_LOW_15_MASK; | |
e38d1e4f | 1124 | u16 mentry = ent & PKEY_LOW_15_MASK; |
77241056 | 1125 | |
e38d1e4f | 1126 | if (mkey == mentry) { |
77241056 MM |
1127 | /* |
1128 | * If pkey[15] is set (full partition member), | |
1129 | * is bit 15 in the corresponding table element | |
1130 | * clear (limited member)? | |
1131 | */ | |
1132 | if (pkey & PKEY_MEMBER_MASK) | |
1133 | return !!(ent & PKEY_MEMBER_MASK); | |
1134 | return 1; | |
1135 | } | |
1136 | return 0; | |
1137 | } | |
1138 | ||
e38d1e4f SS |
1139 | /** |
1140 | * egress_pkey_check - check P_KEY of a packet | |
566d53a8 DH |
1141 | * @ppd: Physical IB port data |
1142 | * @slid: SLID for packet | |
1143 | * @bkey: PKEY for header | |
1144 | * @sc5: SC for packet | |
e38d1e4f SS |
1145 | * @s_pkey_index: It will be used for look up optimization for kernel contexts |
1146 | * only. If it is negative value, then it means user contexts is calling this | |
1147 | * function. | |
1148 | * | |
1149 | * It checks if hdr's pkey is valid. | |
1150 | * | |
1151 | * Return: 0 on success, otherwise, 1 | |
77241056 | 1152 | */ |
566d53a8 | 1153 | int egress_pkey_check(struct hfi1_pportdata *ppd, u32 slid, u16 pkey, |
e38d1e4f | 1154 | u8 sc5, int8_t s_pkey_index) |
77241056 | 1155 | { |
77241056 | 1156 | struct hfi1_devdata *dd; |
e38d1e4f | 1157 | int i; |
e38d1e4f | 1158 | int is_user_ctxt_mechanism = (s_pkey_index < 0); |
77241056 MM |
1159 | |
1160 | if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) | |
1161 | return 0; | |
1162 | ||
77241056 MM |
1163 | /* If SC15, pkey[0:14] must be 0x7fff */ |
1164 | if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) | |
1165 | goto bad; | |
1166 | ||
77241056 MM |
1167 | /* Is the pkey = 0x0, or 0x8000? */ |
1168 | if ((pkey & PKEY_LOW_15_MASK) == 0) | |
1169 | goto bad; | |
1170 | ||
e38d1e4f SS |
1171 | /* |
1172 | * For the kernel contexts only, if a qp is passed into the function, | |
1173 | * the most likely matching pkey has index qp->s_pkey_index | |
1174 | */ | |
1175 | if (!is_user_ctxt_mechanism && | |
1176 | egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) { | |
1177 | return 0; | |
77241056 MM |
1178 | } |
1179 | ||
e38d1e4f SS |
1180 | for (i = 0; i < MAX_PKEY_VALUES; i++) { |
1181 | if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) | |
1182 | return 0; | |
1183 | } | |
77241056 | 1184 | bad: |
e38d1e4f SS |
1185 | /* |
1186 | * For the user-context mechanism, the P_KEY check would only happen | |
1187 | * once per SDMA request, not once per packet. Therefore, there's no | |
1188 | * need to increment the counter for the user-context mechanism. | |
1189 | */ | |
1190 | if (!is_user_ctxt_mechanism) { | |
1191 | incr_cntr64(&ppd->port_xmit_constraint_errors); | |
1192 | dd = ppd->dd; | |
1193 | if (!(dd->err_info_xmit_constraint.status & | |
1194 | OPA_EI_STATUS_SMASK)) { | |
e38d1e4f SS |
1195 | dd->err_info_xmit_constraint.status |= |
1196 | OPA_EI_STATUS_SMASK; | |
1197 | dd->err_info_xmit_constraint.slid = slid; | |
1198 | dd->err_info_xmit_constraint.pkey = pkey; | |
1199 | } | |
77241056 MM |
1200 | } |
1201 | return 1; | |
1202 | } | |
1203 | ||
14553ca1 MM |
1204 | /** |
1205 | * get_send_routine - choose an egress routine | |
1206 | * | |
1207 | * Choose an egress routine based on QP type | |
1208 | * and size | |
1209 | */ | |
1210 | static inline send_routine get_send_routine(struct rvt_qp *qp, | |
566d53a8 | 1211 | struct hfi1_pkt_state *ps) |
14553ca1 MM |
1212 | { |
1213 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
1214 | struct hfi1_qp_priv *priv = qp->priv; | |
566d53a8 | 1215 | struct verbs_txreq *tx = ps->s_txreq; |
14553ca1 MM |
1216 | |
1217 | if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) | |
1218 | return dd->process_pio_send; | |
1219 | switch (qp->ibqp.qp_type) { | |
1220 | case IB_QPT_SMI: | |
1221 | return dd->process_pio_send; | |
1222 | case IB_QPT_GSI: | |
1223 | case IB_QPT_UD: | |
14553ca1 | 1224 | break; |
14553ca1 | 1225 | case IB_QPT_UC: |
270a9833 MM |
1226 | case IB_QPT_RC: |
1227 | priv->s_running_pkt_size = | |
1228 | (tx->s_cur_size + priv->s_running_pkt_size) / 2; | |
14553ca1 | 1229 | if (piothreshold && |
270a9833 | 1230 | priv->s_running_pkt_size <= min(piothreshold, qp->pmtu) && |
566d53a8 | 1231 | (BIT(ps->opcode & OPMASK) & pio_opmask[ps->opcode >> 5]) && |
47177f1b MM |
1232 | iowait_sdma_pending(&priv->s_iowait) == 0 && |
1233 | !sdma_txreq_built(&tx->txreq)) | |
14553ca1 MM |
1234 | return dd->process_pio_send; |
1235 | break; | |
1236 | default: | |
1237 | break; | |
1238 | } | |
1239 | return dd->process_dma_send; | |
1240 | } | |
1241 | ||
77241056 MM |
1242 | /** |
1243 | * hfi1_verbs_send - send a packet | |
1244 | * @qp: the QP to send on | |
d46e5144 | 1245 | * @ps: the state of the packet to send |
77241056 MM |
1246 | * |
1247 | * Return zero if packet is sent or queued OK. | |
54d10c1e | 1248 | * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. |
77241056 | 1249 | */ |
895420dd | 1250 | int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) |
77241056 MM |
1251 | { |
1252 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
47177f1b | 1253 | struct hfi1_qp_priv *priv = qp->priv; |
81cd3891 | 1254 | struct ib_other_headers *ohdr = NULL; |
14553ca1 | 1255 | send_routine sr; |
77241056 | 1256 | int ret; |
566d53a8 DH |
1257 | u16 pkey; |
1258 | u32 slid; | |
81cd3891 | 1259 | u8 l4 = 0; |
e38d1e4f | 1260 | |
e38d1e4f | 1261 | /* locate the pkey within the headers */ |
566d53a8 DH |
1262 | if (ps->s_txreq->phdr.hdr.hdr_type) { |
1263 | struct hfi1_16b_header *hdr = &ps->s_txreq->phdr.hdr.opah; | |
566d53a8 | 1264 | |
81cd3891 DH |
1265 | l4 = hfi1_16B_get_l4(hdr); |
1266 | if (l4 == OPA_16B_L4_IB_LOCAL) | |
566d53a8 | 1267 | ohdr = &hdr->u.oth; |
81cd3891 DH |
1268 | else if (l4 == OPA_16B_L4_IB_GLOBAL) |
1269 | ohdr = &hdr->u.l.oth; | |
1270 | ||
566d53a8 DH |
1271 | slid = hfi1_16B_get_slid(hdr); |
1272 | pkey = hfi1_16B_get_pkey(hdr); | |
1273 | } else { | |
1274 | struct ib_header *hdr = &ps->s_txreq->phdr.hdr.ibh; | |
1275 | u8 lnh = ib_get_lnh(hdr); | |
1276 | ||
1277 | if (lnh == HFI1_LRH_GRH) | |
1278 | ohdr = &hdr->u.l.oth; | |
1279 | else | |
1280 | ohdr = &hdr->u.oth; | |
1281 | slid = ib_get_slid(hdr); | |
1282 | pkey = ib_bth_get_pkey(ohdr); | |
1283 | } | |
1284 | ||
81cd3891 DH |
1285 | if (likely(l4 != OPA_16B_L4_FM)) |
1286 | ps->opcode = ib_bth_get_opcode(ohdr); | |
1287 | else | |
1288 | ps->opcode = IB_OPCODE_UD_SEND_ONLY; | |
1289 | ||
566d53a8 DH |
1290 | sr = get_send_routine(qp, ps); |
1291 | ret = egress_pkey_check(dd->pport, slid, pkey, | |
1292 | priv->s_sc, qp->s_pkey_index); | |
77241056 MM |
1293 | if (unlikely(ret)) { |
1294 | /* | |
1295 | * The value we are returning here does not get propagated to | |
1296 | * the verbs caller. Thus we need to complete the request with | |
1297 | * error otherwise the caller could be sitting waiting on the | |
1298 | * completion event. Only do this for PIO. SDMA has its own | |
1299 | * mechanism for handling the errors. So for SDMA we can just | |
1300 | * return. | |
1301 | */ | |
14553ca1 MM |
1302 | if (sr == dd->process_pio_send) { |
1303 | unsigned long flags; | |
1304 | ||
77241056 MM |
1305 | hfi1_cdbg(PIO, "%s() Failed. Completing with err", |
1306 | __func__); | |
1307 | spin_lock_irqsave(&qp->s_lock, flags); | |
116aa033 | 1308 | rvt_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); |
77241056 MM |
1309 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1310 | } | |
1311 | return -EINVAL; | |
1312 | } | |
47177f1b MM |
1313 | if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait)) |
1314 | return pio_wait(qp, | |
1315 | ps->s_txreq->psc, | |
1316 | ps, | |
2e2ba09e | 1317 | HFI1_S_WAIT_PIO_DRAIN); |
14553ca1 | 1318 | return sr(qp, ps, 0); |
77241056 MM |
1319 | } |
1320 | ||
94d5171c HC |
1321 | /** |
1322 | * hfi1_fill_device_attr - Fill in rvt dev info device attributes. | |
1323 | * @dd: the device data structure | |
1324 | */ | |
1325 | static void hfi1_fill_device_attr(struct hfi1_devdata *dd) | |
77241056 | 1326 | { |
94d5171c | 1327 | struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; |
5e6e9424 | 1328 | u32 ver = dd->dc8051_ver; |
94d5171c HC |
1329 | |
1330 | memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); | |
1331 | ||
5e6e9424 MR |
1332 | rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 32) | |
1333 | ((u64)(dc8051_ver_min(ver)) << 16) | | |
1334 | (u64)dc8051_ver_patch(ver); | |
1335 | ||
94d5171c HC |
1336 | rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | |
1337 | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | | |
1338 | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | | |
c72cfe3e | 1339 | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | |
2280740f VN |
1340 | IB_DEVICE_MEM_MGT_EXTENSIONS | |
1341 | IB_DEVICE_RDMA_NETDEV_OPA_VNIC; | |
94d5171c HC |
1342 | rdi->dparms.props.page_size_cap = PAGE_SIZE; |
1343 | rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; | |
1344 | rdi->dparms.props.vendor_part_id = dd->pcidev->device; | |
1345 | rdi->dparms.props.hw_ver = dd->minrev; | |
1346 | rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; | |
c72cfe3e JX |
1347 | rdi->dparms.props.max_mr_size = U64_MAX; |
1348 | rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; | |
94d5171c | 1349 | rdi->dparms.props.max_qp = hfi1_max_qps; |
3c6cb20a KW |
1350 | rdi->dparms.props.max_qp_wr = |
1351 | (hfi1_max_qp_wrs >= HFI1_QP_WQE_INVALID ? | |
1352 | HFI1_QP_WQE_INVALID - 1 : hfi1_max_qp_wrs); | |
33023fb8 SW |
1353 | rdi->dparms.props.max_send_sge = hfi1_max_sges; |
1354 | rdi->dparms.props.max_recv_sge = hfi1_max_sges; | |
94d5171c HC |
1355 | rdi->dparms.props.max_sge_rd = hfi1_max_sges; |
1356 | rdi->dparms.props.max_cq = hfi1_max_cqs; | |
1357 | rdi->dparms.props.max_ah = hfi1_max_ahs; | |
1358 | rdi->dparms.props.max_cqe = hfi1_max_cqes; | |
94d5171c HC |
1359 | rdi->dparms.props.max_map_per_fmr = 32767; |
1360 | rdi->dparms.props.max_pd = hfi1_max_pds; | |
1361 | rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; | |
1362 | rdi->dparms.props.max_qp_init_rd_atom = 255; | |
1363 | rdi->dparms.props.max_srq = hfi1_max_srqs; | |
1364 | rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs; | |
1365 | rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges; | |
1366 | rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB; | |
1367 | rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd); | |
1368 | rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps; | |
1369 | rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached; | |
1370 | rdi->dparms.props.max_total_mcast_qp_attach = | |
1371 | rdi->dparms.props.max_mcast_qp_attach * | |
1372 | rdi->dparms.props.max_mcast_grp; | |
77241056 MM |
1373 | } |
1374 | ||
1375 | static inline u16 opa_speed_to_ib(u16 in) | |
1376 | { | |
1377 | u16 out = 0; | |
1378 | ||
1379 | if (in & OPA_LINK_SPEED_25G) | |
1380 | out |= IB_SPEED_EDR; | |
1381 | if (in & OPA_LINK_SPEED_12_5G) | |
1382 | out |= IB_SPEED_FDR; | |
1383 | ||
1384 | return out; | |
1385 | } | |
1386 | ||
1387 | /* | |
1388 | * Convert a single OPA link width (no multiple flags) to an IB value. | |
1389 | * A zero OPA link width means link down, which means the IB width value | |
1390 | * is a don't care. | |
1391 | */ | |
1392 | static inline u16 opa_width_to_ib(u16 in) | |
1393 | { | |
1394 | switch (in) { | |
1395 | case OPA_LINK_WIDTH_1X: | |
1396 | /* map 2x and 3x to 1x as they don't exist in IB */ | |
1397 | case OPA_LINK_WIDTH_2X: | |
1398 | case OPA_LINK_WIDTH_3X: | |
1399 | return IB_WIDTH_1X; | |
1400 | default: /* link down or unknown, return our largest width */ | |
1401 | case OPA_LINK_WIDTH_4X: | |
1402 | return IB_WIDTH_4X; | |
1403 | } | |
1404 | } | |
1405 | ||
45b59eef | 1406 | static int query_port(struct rvt_dev_info *rdi, u8 port_num, |
77241056 MM |
1407 | struct ib_port_attr *props) |
1408 | { | |
45b59eef HC |
1409 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1410 | struct hfi1_devdata *dd = dd_from_dev(verbs_dev); | |
1411 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; | |
51e658f5 | 1412 | u32 lid = ppd->lid; |
77241056 | 1413 | |
c4550c63 | 1414 | /* props being zeroed by the caller, avoid zeroing it here */ |
77241056 MM |
1415 | props->lid = lid ? lid : 0; |
1416 | props->lmc = ppd->lmc; | |
77241056 MM |
1417 | /* OPA logical states match IB logical states */ |
1418 | props->state = driver_lstate(ppd); | |
bec7c79c | 1419 | props->phys_state = driver_pstate(ppd); |
77241056 | 1420 | props->gid_tbl_len = HFI1_GUIDS_PER_PORT; |
77241056 MM |
1421 | props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); |
1422 | /* see rate_show() in ib core/sysfs.c */ | |
1423 | props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); | |
1424 | props->max_vl_num = ppd->vls_supported; | |
77241056 MM |
1425 | |
1426 | /* Once we are a "first class" citizen and have added the OPA MTUs to | |
1427 | * the core we can advertise the larger MTU enum to the ULPs, for now | |
1428 | * advertise only 4K. | |
1429 | * | |
1430 | * Those applications which are either OPA aware or pass the MTU enum | |
1431 | * from the Path Records to us will get the new 8k MTU. Those that | |
1432 | * attempt to process the MTU enum may fail in various ways. | |
1433 | */ | |
1434 | props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ? | |
1435 | 4096 : hfi1_max_mtu), IB_MTU_4096); | |
1436 | props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : | |
69a3ffaa | 1437 | mtu_to_enum(ppd->ibmtu, IB_MTU_4096); |
77241056 MM |
1438 | |
1439 | return 0; | |
1440 | } | |
1441 | ||
1442 | static int modify_device(struct ib_device *device, | |
1443 | int device_modify_mask, | |
1444 | struct ib_device_modify *device_modify) | |
1445 | { | |
1446 | struct hfi1_devdata *dd = dd_from_ibdev(device); | |
1447 | unsigned i; | |
1448 | int ret; | |
1449 | ||
1450 | if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | | |
1451 | IB_DEVICE_MODIFY_NODE_DESC)) { | |
1452 | ret = -EOPNOTSUPP; | |
1453 | goto bail; | |
1454 | } | |
1455 | ||
1456 | if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { | |
bd99fdea YS |
1457 | memcpy(device->node_desc, device_modify->node_desc, |
1458 | IB_DEVICE_NODE_DESC_MAX); | |
77241056 MM |
1459 | for (i = 0; i < dd->num_pports; i++) { |
1460 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1461 | ||
1462 | hfi1_node_desc_chg(ibp); | |
1463 | } | |
1464 | } | |
1465 | ||
1466 | if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { | |
1467 | ib_hfi1_sys_image_guid = | |
1468 | cpu_to_be64(device_modify->sys_image_guid); | |
1469 | for (i = 0; i < dd->num_pports; i++) { | |
1470 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1471 | ||
1472 | hfi1_sys_guid_chg(ibp); | |
1473 | } | |
1474 | } | |
1475 | ||
1476 | ret = 0; | |
1477 | ||
1478 | bail: | |
1479 | return ret; | |
1480 | } | |
1481 | ||
45b59eef | 1482 | static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num) |
77241056 | 1483 | { |
45b59eef HC |
1484 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1485 | struct hfi1_devdata *dd = dd_from_dev(verbs_dev); | |
1486 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; | |
1487 | int ret; | |
77241056 | 1488 | |
45b59eef HC |
1489 | set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, |
1490 | OPA_LINKDOWN_REASON_UNKNOWN); | |
1491 | ret = set_link_state(ppd, HLS_DN_DOWNDEF); | |
77241056 MM |
1492 | return ret; |
1493 | } | |
1494 | ||
25131463 DD |
1495 | static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, |
1496 | int guid_index, __be64 *guid) | |
77241056 | 1497 | { |
25131463 | 1498 | struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); |
77241056 | 1499 | |
a6cd5f08 | 1500 | if (guid_index >= HFI1_GUIDS_PER_PORT) |
25131463 | 1501 | return -EINVAL; |
77241056 | 1502 | |
a6cd5f08 | 1503 | *guid = get_sguid(ibp, guid_index); |
25131463 | 1504 | return 0; |
77241056 MM |
1505 | } |
1506 | ||
77241056 MM |
1507 | /* |
1508 | * convert ah port,sl to sc | |
1509 | */ | |
90898850 | 1510 | u8 ah_to_sc(struct ib_device *ibdev, struct rdma_ah_attr *ah) |
77241056 | 1511 | { |
d8966fcd | 1512 | struct hfi1_ibport *ibp = to_iport(ibdev, rdma_ah_get_port_num(ah)); |
77241056 | 1513 | |
d8966fcd | 1514 | return ibp->sl_to_sc[rdma_ah_get_sl(ah)]; |
77241056 MM |
1515 | } |
1516 | ||
90898850 | 1517 | static int hfi1_check_ah(struct ib_device *ibdev, struct rdma_ah_attr *ah_attr) |
77241056 MM |
1518 | { |
1519 | struct hfi1_ibport *ibp; | |
1520 | struct hfi1_pportdata *ppd; | |
1521 | struct hfi1_devdata *dd; | |
1522 | u8 sc5; | |
0dbfaa9f | 1523 | u8 sl; |
77241056 | 1524 | |
13c19222 DH |
1525 | if (hfi1_check_mcast(rdma_ah_get_dlid(ah_attr)) && |
1526 | !(rdma_ah_get_ah_flags(ah_attr) & IB_AH_GRH)) | |
1527 | return -EINVAL; | |
1528 | ||
77241056 | 1529 | /* test the mapping for validity */ |
d8966fcd | 1530 | ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); |
77241056 | 1531 | ppd = ppd_from_ibp(ibp); |
77241056 | 1532 | dd = dd_from_ppd(ppd); |
0dbfaa9f IW |
1533 | |
1534 | sl = rdma_ah_get_sl(ah_attr); | |
1535 | if (sl >= ARRAY_SIZE(ibp->sl_to_sc)) | |
1536 | return -EINVAL; | |
1537 | ||
1538 | sc5 = ibp->sl_to_sc[sl]; | |
77241056 | 1539 | if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) |
15723f06 | 1540 | return -EINVAL; |
77241056 | 1541 | return 0; |
77241056 MM |
1542 | } |
1543 | ||
8f1764fa | 1544 | static void hfi1_notify_new_ah(struct ib_device *ibdev, |
90898850 | 1545 | struct rdma_ah_attr *ah_attr, |
8f1764fa DD |
1546 | struct rvt_ah *ah) |
1547 | { | |
1548 | struct hfi1_ibport *ibp; | |
1549 | struct hfi1_pportdata *ppd; | |
1550 | struct hfi1_devdata *dd; | |
1551 | u8 sc5; | |
d98bb7f7 | 1552 | struct rdma_ah_attr *attr = &ah->attr; |
8f1764fa DD |
1553 | |
1554 | /* | |
1555 | * Do not trust reading anything from rvt_ah at this point as it is not | |
1556 | * done being setup. We can however modify things which we need to set. | |
1557 | */ | |
1558 | ||
d8966fcd | 1559 | ibp = to_iport(ibdev, rdma_ah_get_port_num(ah_attr)); |
8f1764fa | 1560 | ppd = ppd_from_ibp(ibp); |
d8966fcd | 1561 | sc5 = ibp->sl_to_sc[rdma_ah_get_sl(&ah->attr)]; |
d98bb7f7 DH |
1562 | hfi1_update_ah_attr(ibdev, attr); |
1563 | hfi1_make_opa_lid(attr); | |
8f1764fa DD |
1564 | dd = dd_from_ppd(ppd); |
1565 | ah->vl = sc_to_vlt(dd, sc5); | |
1566 | if (ah->vl < num_vls || ah->vl == 15) | |
1567 | ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); | |
1568 | } | |
1569 | ||
77241056 MM |
1570 | /** |
1571 | * hfi1_get_npkeys - return the size of the PKEY table for context 0 | |
1572 | * @dd: the hfi1_ib device | |
1573 | */ | |
1574 | unsigned hfi1_get_npkeys(struct hfi1_devdata *dd) | |
1575 | { | |
1576 | return ARRAY_SIZE(dd->pport[0].pkeys); | |
1577 | } | |
1578 | ||
77241056 MM |
1579 | static void init_ibport(struct hfi1_pportdata *ppd) |
1580 | { | |
1581 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
1582 | size_t sz = ARRAY_SIZE(ibp->sl_to_sc); | |
1583 | int i; | |
1584 | ||
1585 | for (i = 0; i < sz; i++) { | |
1586 | ibp->sl_to_sc[i] = i; | |
1587 | ibp->sc_to_sl[i] = i; | |
1588 | } | |
1589 | ||
bf90aadd MR |
1590 | for (i = 0; i < RVT_MAX_TRAP_LISTS ; i++) |
1591 | INIT_LIST_HEAD(&ibp->rvp.trap_lists[i].list); | |
8064135e | 1592 | timer_setup(&ibp->rvp.trap_timer, hfi1_handle_trap_timer, 0); |
bf90aadd | 1593 | |
4eb06882 | 1594 | spin_lock_init(&ibp->rvp.lock); |
77241056 | 1595 | /* Set the prefix to the default value (see ch. 4.1.1) */ |
4eb06882 DD |
1596 | ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; |
1597 | ibp->rvp.sm_lid = 0; | |
cb49366f VN |
1598 | /* |
1599 | * Below should only set bits defined in OPA PortInfo.CapabilityMask | |
1600 | * and PortInfo.CapabilityMask3 | |
1601 | */ | |
4eb06882 | 1602 | ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | |
77241056 | 1603 | IB_PORT_CAP_MASK_NOTICE_SUP; |
cb49366f | 1604 | ibp->rvp.port_cap3_flags = OPA_CAP_MASK3_IsSharedSpaceSupported; |
4eb06882 DD |
1605 | ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; |
1606 | ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; | |
1607 | ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; | |
1608 | ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; | |
1609 | ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; | |
1610 | ||
1611 | RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); | |
1612 | RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); | |
77241056 MM |
1613 | } |
1614 | ||
9abb0d1b | 1615 | static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str) |
939b6ca8 IW |
1616 | { |
1617 | struct rvt_dev_info *rdi = ib_to_rvt(ibdev); | |
1618 | struct hfi1_ibdev *dev = dev_from_rdi(rdi); | |
5e6e9424 | 1619 | u32 ver = dd_from_dev(dev)->dc8051_ver; |
939b6ca8 | 1620 | |
9abb0d1b | 1621 | snprintf(str, IB_FW_VERSION_NAME_MAX, "%u.%u.%u", dc8051_ver_maj(ver), |
5e6e9424 | 1622 | dc8051_ver_min(ver), dc8051_ver_patch(ver)); |
939b6ca8 IW |
1623 | } |
1624 | ||
b7481944 JX |
1625 | static const char * const driver_cntr_names[] = { |
1626 | /* must be element 0*/ | |
1627 | "DRIVER_KernIntr", | |
1628 | "DRIVER_ErrorIntr", | |
1629 | "DRIVER_Tx_Errs", | |
1630 | "DRIVER_Rcv_Errs", | |
1631 | "DRIVER_HW_Errs", | |
1632 | "DRIVER_NoPIOBufs", | |
1633 | "DRIVER_CtxtsOpen", | |
1634 | "DRIVER_RcvLen_Errs", | |
1635 | "DRIVER_EgrBufFull", | |
1636 | "DRIVER_EgrHdrFull" | |
1637 | }; | |
1638 | ||
62eed66e | 1639 | static DEFINE_MUTEX(cntr_names_lock); /* protects the *_cntr_names bufers */ |
b7481944 JX |
1640 | static const char **dev_cntr_names; |
1641 | static const char **port_cntr_names; | |
36d84219 | 1642 | int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); |
b7481944 JX |
1643 | static int num_dev_cntrs; |
1644 | static int num_port_cntrs; | |
1645 | static int cntr_names_initialized; | |
1646 | ||
1647 | /* | |
1648 | * Convert a list of names separated by '\n' into an array of NULL terminated | |
1649 | * strings. Optionally some entries can be reserved in the array to hold extra | |
1650 | * external strings. | |
1651 | */ | |
1652 | static int init_cntr_names(const char *names_in, | |
64b2ae74 | 1653 | const size_t names_len, |
b7481944 JX |
1654 | int num_extra_names, |
1655 | int *num_cntrs, | |
1656 | const char ***cntr_names) | |
1657 | { | |
1658 | char *names_out, *p, **q; | |
1659 | int i, n; | |
1660 | ||
1661 | n = 0; | |
1662 | for (i = 0; i < names_len; i++) | |
1663 | if (names_in[i] == '\n') | |
1664 | n++; | |
1665 | ||
1666 | names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, | |
1667 | GFP_KERNEL); | |
1668 | if (!names_out) { | |
1669 | *num_cntrs = 0; | |
1670 | *cntr_names = NULL; | |
1671 | return -ENOMEM; | |
1672 | } | |
1673 | ||
1674 | p = names_out + (n + num_extra_names) * sizeof(char *); | |
1675 | memcpy(p, names_in, names_len); | |
1676 | ||
1677 | q = (char **)names_out; | |
1678 | for (i = 0; i < n; i++) { | |
1679 | q[i] = p; | |
1680 | p = strchr(p, '\n'); | |
1681 | *p++ = '\0'; | |
1682 | } | |
1683 | ||
1684 | *num_cntrs = n; | |
1685 | *cntr_names = (const char **)names_out; | |
1686 | return 0; | |
1687 | } | |
1688 | ||
1689 | static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, | |
1690 | u8 port_num) | |
1691 | { | |
1692 | int i, err; | |
1693 | ||
62eed66e | 1694 | mutex_lock(&cntr_names_lock); |
b7481944 JX |
1695 | if (!cntr_names_initialized) { |
1696 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); | |
1697 | ||
1698 | err = init_cntr_names(dd->cntrnames, | |
1699 | dd->cntrnameslen, | |
1700 | num_driver_cntrs, | |
1701 | &num_dev_cntrs, | |
1702 | &dev_cntr_names); | |
62eed66e TS |
1703 | if (err) { |
1704 | mutex_unlock(&cntr_names_lock); | |
b7481944 | 1705 | return NULL; |
62eed66e | 1706 | } |
b7481944 JX |
1707 | |
1708 | for (i = 0; i < num_driver_cntrs; i++) | |
1709 | dev_cntr_names[num_dev_cntrs + i] = | |
1710 | driver_cntr_names[i]; | |
1711 | ||
1712 | err = init_cntr_names(dd->portcntrnames, | |
1713 | dd->portcntrnameslen, | |
1714 | 0, | |
1715 | &num_port_cntrs, | |
1716 | &port_cntr_names); | |
1717 | if (err) { | |
1718 | kfree(dev_cntr_names); | |
1719 | dev_cntr_names = NULL; | |
62eed66e | 1720 | mutex_unlock(&cntr_names_lock); |
b7481944 JX |
1721 | return NULL; |
1722 | } | |
1723 | cntr_names_initialized = 1; | |
1724 | } | |
62eed66e | 1725 | mutex_unlock(&cntr_names_lock); |
b7481944 JX |
1726 | |
1727 | if (!port_num) | |
1728 | return rdma_alloc_hw_stats_struct( | |
1729 | dev_cntr_names, | |
1730 | num_dev_cntrs + num_driver_cntrs, | |
1731 | RDMA_HW_STATS_DEFAULT_LIFESPAN); | |
1732 | else | |
1733 | return rdma_alloc_hw_stats_struct( | |
1734 | port_cntr_names, | |
1735 | num_port_cntrs, | |
1736 | RDMA_HW_STATS_DEFAULT_LIFESPAN); | |
1737 | } | |
1738 | ||
1739 | static u64 hfi1_sps_ints(void) | |
1740 | { | |
03b92789 | 1741 | unsigned long index, flags; |
b7481944 JX |
1742 | struct hfi1_devdata *dd; |
1743 | u64 sps_ints = 0; | |
1744 | ||
03b92789 MW |
1745 | xa_lock_irqsave(&hfi1_dev_table, flags); |
1746 | xa_for_each(&hfi1_dev_table, index, dd) { | |
b7481944 JX |
1747 | sps_ints += get_all_cpu_total(dd->int_counter); |
1748 | } | |
03b92789 | 1749 | xa_unlock_irqrestore(&hfi1_dev_table, flags); |
b7481944 JX |
1750 | return sps_ints; |
1751 | } | |
1752 | ||
1753 | static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, | |
1754 | u8 port, int index) | |
1755 | { | |
1756 | u64 *values; | |
1757 | int count; | |
1758 | ||
1759 | if (!port) { | |
1760 | u64 *stats = (u64 *)&hfi1_stats; | |
1761 | int i; | |
1762 | ||
1763 | hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values); | |
1764 | values[num_dev_cntrs] = hfi1_sps_ints(); | |
1765 | for (i = 1; i < num_driver_cntrs; i++) | |
1766 | values[num_dev_cntrs + i] = stats[i]; | |
1767 | count = num_dev_cntrs + num_driver_cntrs; | |
1768 | } else { | |
1769 | struct hfi1_ibport *ibp = to_iport(ibdev, port); | |
1770 | ||
1771 | hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values); | |
1772 | count = num_port_cntrs; | |
1773 | } | |
1774 | ||
1775 | memcpy(stats->value, values, count * sizeof(u64)); | |
1776 | return count; | |
1777 | } | |
1778 | ||
e3c320ca KH |
1779 | static const struct ib_device_ops hfi1_dev_ops = { |
1780 | .alloc_hw_stats = alloc_hw_stats, | |
1781 | .alloc_rdma_netdev = hfi1_vnic_alloc_rn, | |
1782 | .get_dev_fw_str = hfi1_get_dev_fw_str, | |
1783 | .get_hw_stats = get_hw_stats, | |
ea4baf7f | 1784 | .init_port = hfi1_create_port_files, |
e3c320ca KH |
1785 | .modify_device = modify_device, |
1786 | /* keep process mad in the driver */ | |
1787 | .process_mad = hfi1_process_mad, | |
1788 | }; | |
1789 | ||
77241056 MM |
1790 | /** |
1791 | * hfi1_register_ib_device - register our device with the infiniband core | |
1792 | * @dd: the device data structure | |
1793 | * Return 0 if successful, errno if unsuccessful. | |
1794 | */ | |
1795 | int hfi1_register_ib_device(struct hfi1_devdata *dd) | |
1796 | { | |
1797 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
ec3f2c12 | 1798 | struct ib_device *ibdev = &dev->rdi.ibdev; |
77241056 | 1799 | struct hfi1_pportdata *ppd = dd->pport; |
a6cd5f08 | 1800 | struct hfi1_ibport *ibp = &ppd->ibport_data; |
895420dd | 1801 | unsigned i; |
77241056 | 1802 | int ret; |
77241056 | 1803 | |
77241056 MM |
1804 | for (i = 0; i < dd->num_pports; i++) |
1805 | init_ibport(ppd + i); | |
1806 | ||
1807 | /* Only need to initialize non-zero fields. */ | |
4f87ccfc | 1808 | |
8064135e | 1809 | timer_setup(&dev->mem_timer, mem_timer, 0); |
77241056 | 1810 | |
77241056 | 1811 | seqlock_init(&dev->iowait_lock); |
4e045572 | 1812 | seqlock_init(&dev->txwait_lock); |
77241056 MM |
1813 | INIT_LIST_HEAD(&dev->txwait); |
1814 | INIT_LIST_HEAD(&dev->memwait); | |
1815 | ||
45842abb MM |
1816 | ret = verbs_txreq_init(dev); |
1817 | if (ret) | |
77241056 | 1818 | goto err_verbs_txreq; |
77241056 | 1819 | |
a6cd5f08 JP |
1820 | /* Use first-port GUID as node guid */ |
1821 | ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX); | |
1822 | ||
77241056 MM |
1823 | /* |
1824 | * The system image GUID is supposed to be the same for all | |
1825 | * HFIs in a single system but since there can be other | |
1826 | * device types in the system, we can't be sure this is unique. | |
1827 | */ | |
1828 | if (!ib_hfi1_sys_image_guid) | |
a6cd5f08 | 1829 | ib_hfi1_sys_image_guid = ibdev->node_guid; |
77241056 | 1830 | ibdev->owner = THIS_MODULE; |
77241056 | 1831 | ibdev->phys_port_cnt = dd->num_pports; |
3067771c | 1832 | ibdev->dev.parent = &dd->pcidev->dev; |
4331629f | 1833 | |
e3c320ca | 1834 | ib_set_device_ops(ibdev, &hfi1_dev_ops); |
77241056 | 1835 | |
522628ed | 1836 | strlcpy(ibdev->node_desc, init_utsname()->nodename, |
77241056 MM |
1837 | sizeof(ibdev->node_desc)); |
1838 | ||
ec3f2c12 DD |
1839 | /* |
1840 | * Fill in rvt info object. | |
1841 | */ | |
49dbb6cf | 1842 | dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; |
15723f06 | 1843 | dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; |
8f1764fa | 1844 | dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; |
25131463 | 1845 | dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be; |
45b59eef HC |
1846 | dd->verbs_dev.rdi.driver_f.query_port_state = query_port; |
1847 | dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port; | |
1848 | dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg; | |
94d5171c HC |
1849 | /* |
1850 | * Fill in rvt info device attributes. | |
1851 | */ | |
1852 | hfi1_fill_device_attr(dd); | |
a2c2d608 DD |
1853 | |
1854 | /* queue pair */ | |
a2c2d608 DD |
1855 | dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size; |
1856 | dd->verbs_dev.rdi.dparms.qpn_start = 0; | |
1857 | dd->verbs_dev.rdi.dparms.qpn_inc = 1; | |
1858 | dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; | |
1859 | dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; | |
1860 | dd->verbs_dev.rdi.dparms.qpn_res_end = | |
abd712da | 1861 | dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; |
ec4274f1 DD |
1862 | dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; |
1863 | dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; | |
1864 | dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; | |
1865 | dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; | |
7221403d DC |
1866 | dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA | |
1867 | RDMA_CORE_CAP_OPA_AH; | |
45b59eef HC |
1868 | dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; |
1869 | ||
a2c2d608 | 1870 | dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; |
5190f052 | 1871 | dd->verbs_dev.rdi.driver_f.qp_priv_init = hfi1_qp_priv_init; |
a2c2d608 DD |
1872 | dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; |
1873 | dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; | |
1874 | dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; | |
b6eac931 | 1875 | dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send_from_rvt; |
83693bd1 | 1876 | dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; |
46a80d62 | 1877 | dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; |
ec4274f1 DD |
1878 | dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; |
1879 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; | |
1880 | dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; | |
1881 | dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; | |
1882 | dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; | |
1883 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; | |
1884 | dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; | |
1885 | dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; | |
1886 | dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; | |
1887 | dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; | |
56acbbfb | 1888 | dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; |
d205a06a | 1889 | dd->verbs_dev.rdi.driver_f.setup_wqe = hfi1_setup_wqe; |
5d18ee67 SS |
1890 | dd->verbs_dev.rdi.driver_f.comp_vect_cpu_lookup = |
1891 | hfi1_comp_vect_mappings_lookup; | |
a2c2d608 | 1892 | |
abd712da | 1893 | /* completeion queue */ |
5d18ee67 | 1894 | dd->verbs_dev.rdi.ibdev.num_comp_vectors = dd->comp_vect_possible_cpus; |
27807392 | 1895 | dd->verbs_dev.rdi.dparms.node = dd->node; |
abd712da | 1896 | |
a2c2d608 | 1897 | /* misc settings */ |
abd712da | 1898 | dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */ |
895420dd | 1899 | dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; |
4eb06882 DD |
1900 | dd->verbs_dev.rdi.dparms.nports = dd->num_pports; |
1901 | dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); | |
019f118b BW |
1902 | dd->verbs_dev.rdi.dparms.sge_copy_mode = sge_copy_mode; |
1903 | dd->verbs_dev.rdi.dparms.wss_threshold = wss_threshold; | |
1904 | dd->verbs_dev.rdi.dparms.wss_clean_period = wss_clean_period; | |
48a615dc | 1905 | dd->verbs_dev.rdi.dparms.reserved_operations = 1; |
f5a4a95f | 1906 | dd->verbs_dev.rdi.dparms.extra_rdma_atomic = HFI1_TID_RDMA_WRITE_CNT; |
4eb06882 | 1907 | |
1ac57c50 MM |
1908 | /* post send table */ |
1909 | dd->verbs_dev.rdi.post_parms = hfi1_post_parms; | |
1910 | ||
116aa033 VSD |
1911 | /* opcode translation table */ |
1912 | dd->verbs_dev.rdi.wc_opcode = ib_hfi1_wc_opcode; | |
1913 | ||
4eb06882 DD |
1914 | ppd = dd->pport; |
1915 | for (i = 0; i < dd->num_pports; i++, ppd++) | |
1916 | rvt_init_port(&dd->verbs_dev.rdi, | |
1917 | &ppd->ibport_data.rvp, | |
1918 | i, | |
1919 | ppd->pkeys); | |
ec3f2c12 | 1920 | |
508a523f PP |
1921 | rdma_set_device_sysfs_group(&dd->verbs_dev.rdi.ibdev, |
1922 | &ib_hfi1_attr_group); | |
1923 | ||
0ede73bc | 1924 | ret = rvt_register_device(&dd->verbs_dev.rdi, RDMA_DRIVER_HFI1); |
77241056 | 1925 | if (ret) |
9c4a311e | 1926 | goto err_verbs_txreq; |
77241056 MM |
1927 | |
1928 | ret = hfi1_verbs_register_sysfs(dd); | |
1929 | if (ret) | |
1930 | goto err_class; | |
1931 | ||
9c4a311e | 1932 | return ret; |
77241056 MM |
1933 | |
1934 | err_class: | |
ec3f2c12 | 1935 | rvt_unregister_device(&dd->verbs_dev.rdi); |
77241056 | 1936 | err_verbs_txreq: |
45842abb | 1937 | verbs_txreq_exit(dev); |
77241056 | 1938 | dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); |
77241056 MM |
1939 | return ret; |
1940 | } | |
1941 | ||
1942 | void hfi1_unregister_ib_device(struct hfi1_devdata *dd) | |
1943 | { | |
1944 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
77241056 MM |
1945 | |
1946 | hfi1_verbs_unregister_sysfs(dd); | |
1947 | ||
ec3f2c12 | 1948 | rvt_unregister_device(&dd->verbs_dev.rdi); |
77241056 MM |
1949 | |
1950 | if (!list_empty(&dev->txwait)) | |
1951 | dd_dev_err(dd, "txwait list not empty!\n"); | |
1952 | if (!list_empty(&dev->memwait)) | |
1953 | dd_dev_err(dd, "memwait list not empty!\n"); | |
77241056 | 1954 | |
77241056 | 1955 | del_timer_sync(&dev->mem_timer); |
45842abb | 1956 | verbs_txreq_exit(dev); |
b7481944 | 1957 | |
62eed66e | 1958 | mutex_lock(&cntr_names_lock); |
b7481944 JX |
1959 | kfree(dev_cntr_names); |
1960 | kfree(port_cntr_names); | |
62eed66e TS |
1961 | dev_cntr_names = NULL; |
1962 | port_cntr_names = NULL; | |
b7481944 | 1963 | cntr_names_initialized = 0; |
62eed66e | 1964 | mutex_unlock(&cntr_names_lock); |
77241056 MM |
1965 | } |
1966 | ||
77241056 MM |
1967 | void hfi1_cnp_rcv(struct hfi1_packet *packet) |
1968 | { | |
f3e862cb | 1969 | struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); |
977940b8 | 1970 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
261a4351 | 1971 | struct ib_header *hdr = packet->hdr; |
895420dd | 1972 | struct rvt_qp *qp = packet->qp; |
977940b8 AK |
1973 | u32 lqpn, rqpn = 0; |
1974 | u16 rlid = 0; | |
b736a469 | 1975 | u8 sl, sc5, svc_type; |
977940b8 AK |
1976 | |
1977 | switch (packet->qp->ibqp.qp_type) { | |
1978 | case IB_QPT_UC: | |
d8966fcd | 1979 | rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); |
977940b8 AK |
1980 | rqpn = qp->remote_qpn; |
1981 | svc_type = IB_CC_SVCTYPE_UC; | |
1982 | break; | |
1983 | case IB_QPT_RC: | |
d8966fcd | 1984 | rlid = rdma_ah_get_dlid(&qp->remote_ah_attr); |
977940b8 AK |
1985 | rqpn = qp->remote_qpn; |
1986 | svc_type = IB_CC_SVCTYPE_RC; | |
1987 | break; | |
1988 | case IB_QPT_SMI: | |
1989 | case IB_QPT_GSI: | |
1990 | case IB_QPT_UD: | |
1991 | svc_type = IB_CC_SVCTYPE_UD; | |
1992 | break; | |
1993 | default: | |
4eb06882 | 1994 | ibp->rvp.n_pkt_drops++; |
977940b8 AK |
1995 | return; |
1996 | } | |
1997 | ||
aad559c2 | 1998 | sc5 = hfi1_9B_get_sc5(hdr, packet->rhf); |
977940b8 AK |
1999 | sl = ibp->sc_to_sl[sc5]; |
2000 | lqpn = qp->ibqp.qp_num; | |
2001 | ||
2002 | process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); | |
77241056 | 2003 | } |