]>
Commit | Line | Data |
---|---|---|
77241056 | 1 | /* |
05d6ac1d | 2 | * Copyright(c) 2015, 2016 Intel Corporation. |
77241056 MM |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
77241056 MM |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
77241056 MM |
20 | * Redistribution and use in source and binary forms, with or without |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | ||
48 | #include <rdma/ib_mad.h> | |
49 | #include <rdma/ib_user_verbs.h> | |
50 | #include <linux/io.h> | |
51 | #include <linux/module.h> | |
52 | #include <linux/utsname.h> | |
53 | #include <linux/rculist.h> | |
54 | #include <linux/mm.h> | |
77241056 MM |
55 | #include <linux/vmalloc.h> |
56 | ||
57 | #include "hfi.h" | |
58 | #include "common.h" | |
59 | #include "device.h" | |
60 | #include "trace.h" | |
61 | #include "qp.h" | |
45842abb | 62 | #include "verbs_txreq.h" |
77241056 | 63 | |
895420dd | 64 | static unsigned int hfi1_lkey_table_size = 16; |
77241056 MM |
65 | module_param_named(lkey_table_size, hfi1_lkey_table_size, uint, |
66 | S_IRUGO); | |
67 | MODULE_PARM_DESC(lkey_table_size, | |
68 | "LKEY table size in bits (2^n, 1 <= n <= 23)"); | |
69 | ||
70 | static unsigned int hfi1_max_pds = 0xFFFF; | |
71 | module_param_named(max_pds, hfi1_max_pds, uint, S_IRUGO); | |
72 | MODULE_PARM_DESC(max_pds, | |
73 | "Maximum number of protection domains to support"); | |
74 | ||
75 | static unsigned int hfi1_max_ahs = 0xFFFF; | |
76 | module_param_named(max_ahs, hfi1_max_ahs, uint, S_IRUGO); | |
77 | MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support"); | |
78 | ||
f6aa7835 | 79 | unsigned int hfi1_max_cqes = 0x2FFFFF; |
77241056 MM |
80 | module_param_named(max_cqes, hfi1_max_cqes, uint, S_IRUGO); |
81 | MODULE_PARM_DESC(max_cqes, | |
82 | "Maximum number of completion queue entries to support"); | |
83 | ||
84 | unsigned int hfi1_max_cqs = 0x1FFFF; | |
85 | module_param_named(max_cqs, hfi1_max_cqs, uint, S_IRUGO); | |
86 | MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support"); | |
87 | ||
88 | unsigned int hfi1_max_qp_wrs = 0x3FFF; | |
89 | module_param_named(max_qp_wrs, hfi1_max_qp_wrs, uint, S_IRUGO); | |
90 | MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support"); | |
91 | ||
f6aa7835 | 92 | unsigned int hfi1_max_qps = 32768; |
77241056 MM |
93 | module_param_named(max_qps, hfi1_max_qps, uint, S_IRUGO); |
94 | MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support"); | |
95 | ||
96 | unsigned int hfi1_max_sges = 0x60; | |
97 | module_param_named(max_sges, hfi1_max_sges, uint, S_IRUGO); | |
98 | MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support"); | |
99 | ||
100 | unsigned int hfi1_max_mcast_grps = 16384; | |
101 | module_param_named(max_mcast_grps, hfi1_max_mcast_grps, uint, S_IRUGO); | |
102 | MODULE_PARM_DESC(max_mcast_grps, | |
103 | "Maximum number of multicast groups to support"); | |
104 | ||
105 | unsigned int hfi1_max_mcast_qp_attached = 16; | |
106 | module_param_named(max_mcast_qp_attached, hfi1_max_mcast_qp_attached, | |
107 | uint, S_IRUGO); | |
108 | MODULE_PARM_DESC(max_mcast_qp_attached, | |
109 | "Maximum number of attached QPs to support"); | |
110 | ||
111 | unsigned int hfi1_max_srqs = 1024; | |
112 | module_param_named(max_srqs, hfi1_max_srqs, uint, S_IRUGO); | |
113 | MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support"); | |
114 | ||
115 | unsigned int hfi1_max_srq_sges = 128; | |
116 | module_param_named(max_srq_sges, hfi1_max_srq_sges, uint, S_IRUGO); | |
117 | MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support"); | |
118 | ||
119 | unsigned int hfi1_max_srq_wrs = 0x1FFFF; | |
120 | module_param_named(max_srq_wrs, hfi1_max_srq_wrs, uint, S_IRUGO); | |
121 | MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support"); | |
122 | ||
d0e859c3 | 123 | unsigned short piothreshold = 256; |
14553ca1 MM |
124 | module_param(piothreshold, ushort, S_IRUGO); |
125 | MODULE_PARM_DESC(piothreshold, "size used to determine sdma vs. pio"); | |
126 | ||
528ee9fb DL |
127 | #define COPY_CACHELESS 1 |
128 | #define COPY_ADAPTIVE 2 | |
129 | static unsigned int sge_copy_mode; | |
130 | module_param(sge_copy_mode, uint, S_IRUGO); | |
131 | MODULE_PARM_DESC(sge_copy_mode, | |
132 | "Verbs copy mode: 0 use memcpy, 1 use cacheless copy, 2 adapt based on WSS"); | |
133 | ||
77241056 MM |
134 | static void verbs_sdma_complete( |
135 | struct sdma_txreq *cookie, | |
a545f530 | 136 | int status); |
77241056 | 137 | |
14553ca1 MM |
138 | static int pio_wait(struct rvt_qp *qp, |
139 | struct send_context *sc, | |
140 | struct hfi1_pkt_state *ps, | |
141 | u32 flag); | |
142 | ||
64ffd86c JJ |
143 | /* Length of buffer to create verbs txreq cache name */ |
144 | #define TXREQ_NAME_LEN 24 | |
145 | ||
528ee9fb DL |
146 | static uint wss_threshold; |
147 | module_param(wss_threshold, uint, S_IRUGO); | |
148 | MODULE_PARM_DESC(wss_threshold, "Percentage (1-100) of LLC to use as a threshold for a cacheless copy"); | |
149 | static uint wss_clean_period = 256; | |
150 | module_param(wss_clean_period, uint, S_IRUGO); | |
151 | MODULE_PARM_DESC(wss_clean_period, "Count of verbs copies before an entry in the page copy table is cleaned"); | |
152 | ||
153 | /* memory working set size */ | |
154 | struct hfi1_wss { | |
155 | unsigned long *entries; | |
156 | atomic_t total_count; | |
157 | atomic_t clean_counter; | |
158 | atomic_t clean_entry; | |
159 | ||
160 | int threshold; | |
161 | int num_entries; | |
162 | long pages_mask; | |
163 | }; | |
164 | ||
165 | static struct hfi1_wss wss; | |
166 | ||
167 | int hfi1_wss_init(void) | |
168 | { | |
169 | long llc_size; | |
170 | long llc_bits; | |
171 | long table_size; | |
172 | long table_bits; | |
173 | ||
174 | /* check for a valid percent range - default to 80 if none or invalid */ | |
175 | if (wss_threshold < 1 || wss_threshold > 100) | |
176 | wss_threshold = 80; | |
177 | /* reject a wildly large period */ | |
178 | if (wss_clean_period > 1000000) | |
179 | wss_clean_period = 256; | |
180 | /* reject a zero period */ | |
181 | if (wss_clean_period == 0) | |
182 | wss_clean_period = 1; | |
183 | ||
184 | /* | |
185 | * Calculate the table size - the next power of 2 larger than the | |
186 | * LLC size. LLC size is in KiB. | |
187 | */ | |
188 | llc_size = wss_llc_size() * 1024; | |
189 | table_size = roundup_pow_of_two(llc_size); | |
190 | ||
191 | /* one bit per page in rounded up table */ | |
192 | llc_bits = llc_size / PAGE_SIZE; | |
193 | table_bits = table_size / PAGE_SIZE; | |
194 | wss.pages_mask = table_bits - 1; | |
195 | wss.num_entries = table_bits / BITS_PER_LONG; | |
196 | ||
197 | wss.threshold = (llc_bits * wss_threshold) / 100; | |
198 | if (wss.threshold == 0) | |
199 | wss.threshold = 1; | |
200 | ||
201 | atomic_set(&wss.clean_counter, wss_clean_period); | |
202 | ||
203 | wss.entries = kcalloc(wss.num_entries, sizeof(*wss.entries), | |
204 | GFP_KERNEL); | |
205 | if (!wss.entries) { | |
206 | hfi1_wss_exit(); | |
207 | return -ENOMEM; | |
208 | } | |
209 | ||
210 | return 0; | |
211 | } | |
212 | ||
213 | void hfi1_wss_exit(void) | |
214 | { | |
215 | /* coded to handle partially initialized and repeat callers */ | |
216 | kfree(wss.entries); | |
217 | wss.entries = NULL; | |
218 | } | |
219 | ||
220 | /* | |
221 | * Advance the clean counter. When the clean period has expired, | |
222 | * clean an entry. | |
223 | * | |
224 | * This is implemented in atomics to avoid locking. Because multiple | |
225 | * variables are involved, it can be racy which can lead to slightly | |
226 | * inaccurate information. Since this is only a heuristic, this is | |
227 | * OK. Any innaccuracies will clean themselves out as the counter | |
228 | * advances. That said, it is unlikely the entry clean operation will | |
229 | * race - the next possible racer will not start until the next clean | |
230 | * period. | |
231 | * | |
232 | * The clean counter is implemented as a decrement to zero. When zero | |
233 | * is reached an entry is cleaned. | |
234 | */ | |
235 | static void wss_advance_clean_counter(void) | |
236 | { | |
237 | int entry; | |
238 | int weight; | |
239 | unsigned long bits; | |
240 | ||
241 | /* become the cleaner if we decrement the counter to zero */ | |
242 | if (atomic_dec_and_test(&wss.clean_counter)) { | |
243 | /* | |
244 | * Set, not add, the clean period. This avoids an issue | |
245 | * where the counter could decrement below the clean period. | |
246 | * Doing a set can result in lost decrements, slowing the | |
247 | * clean advance. Since this a heuristic, this possible | |
248 | * slowdown is OK. | |
249 | * | |
250 | * An alternative is to loop, advancing the counter by a | |
251 | * clean period until the result is > 0. However, this could | |
252 | * lead to several threads keeping another in the clean loop. | |
253 | * This could be mitigated by limiting the number of times | |
254 | * we stay in the loop. | |
255 | */ | |
256 | atomic_set(&wss.clean_counter, wss_clean_period); | |
257 | ||
258 | /* | |
259 | * Uniquely grab the entry to clean and move to next. | |
260 | * The current entry is always the lower bits of | |
261 | * wss.clean_entry. The table size, wss.num_entries, | |
262 | * is always a power-of-2. | |
263 | */ | |
264 | entry = (atomic_inc_return(&wss.clean_entry) - 1) | |
265 | & (wss.num_entries - 1); | |
266 | ||
267 | /* clear the entry and count the bits */ | |
268 | bits = xchg(&wss.entries[entry], 0); | |
269 | weight = hweight64((u64)bits); | |
270 | /* only adjust the contended total count if needed */ | |
271 | if (weight) | |
272 | atomic_sub(weight, &wss.total_count); | |
273 | } | |
274 | } | |
275 | ||
276 | /* | |
277 | * Insert the given address into the working set array. | |
278 | */ | |
279 | static void wss_insert(void *address) | |
280 | { | |
281 | u32 page = ((unsigned long)address >> PAGE_SHIFT) & wss.pages_mask; | |
282 | u32 entry = page / BITS_PER_LONG; /* assumes this ends up a shift */ | |
283 | u32 nr = page & (BITS_PER_LONG - 1); | |
284 | ||
285 | if (!test_and_set_bit(nr, &wss.entries[entry])) | |
286 | atomic_inc(&wss.total_count); | |
287 | ||
288 | wss_advance_clean_counter(); | |
289 | } | |
290 | ||
291 | /* | |
292 | * Is the working set larger than the threshold? | |
293 | */ | |
0128fcea | 294 | static inline bool wss_exceeds_threshold(void) |
528ee9fb DL |
295 | { |
296 | return atomic_read(&wss.total_count) >= wss.threshold; | |
297 | } | |
298 | ||
77241056 MM |
299 | /* |
300 | * Length of header by opcode, 0 --> not supported | |
301 | */ | |
302 | const u8 hdr_len_by_opcode[256] = { | |
303 | /* RC */ | |
304 | [IB_OPCODE_RC_SEND_FIRST] = 12 + 8, | |
305 | [IB_OPCODE_RC_SEND_MIDDLE] = 12 + 8, | |
306 | [IB_OPCODE_RC_SEND_LAST] = 12 + 8, | |
307 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
308 | [IB_OPCODE_RC_SEND_ONLY] = 12 + 8, | |
309 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
310 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
311 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
312 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = 12 + 8, | |
313 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
314 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
315 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
316 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = 12 + 8 + 16, | |
317 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = 12 + 8 + 4, | |
318 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = 12 + 8, | |
319 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = 12 + 8 + 4, | |
320 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = 12 + 8 + 4, | |
321 | [IB_OPCODE_RC_ACKNOWLEDGE] = 12 + 8 + 4, | |
37aab620 | 322 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = 12 + 8 + 4 + 8, |
77241056 MM |
323 | [IB_OPCODE_RC_COMPARE_SWAP] = 12 + 8 + 28, |
324 | [IB_OPCODE_RC_FETCH_ADD] = 12 + 8 + 28, | |
bdd8a98c JX |
325 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = 12 + 8 + 4, |
326 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = 12 + 8 + 4, | |
77241056 MM |
327 | /* UC */ |
328 | [IB_OPCODE_UC_SEND_FIRST] = 12 + 8, | |
329 | [IB_OPCODE_UC_SEND_MIDDLE] = 12 + 8, | |
330 | [IB_OPCODE_UC_SEND_LAST] = 12 + 8, | |
331 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
332 | [IB_OPCODE_UC_SEND_ONLY] = 12 + 8, | |
333 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 4, | |
334 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = 12 + 8 + 16, | |
335 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = 12 + 8, | |
336 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = 12 + 8, | |
337 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = 12 + 8 + 4, | |
338 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = 12 + 8 + 16, | |
339 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = 12 + 8 + 20, | |
340 | /* UD */ | |
341 | [IB_OPCODE_UD_SEND_ONLY] = 12 + 8 + 8, | |
342 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = 12 + 8 + 12 | |
343 | }; | |
344 | ||
345 | static const opcode_handler opcode_handler_tbl[256] = { | |
346 | /* RC */ | |
347 | [IB_OPCODE_RC_SEND_FIRST] = &hfi1_rc_rcv, | |
348 | [IB_OPCODE_RC_SEND_MIDDLE] = &hfi1_rc_rcv, | |
349 | [IB_OPCODE_RC_SEND_LAST] = &hfi1_rc_rcv, | |
350 | [IB_OPCODE_RC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
351 | [IB_OPCODE_RC_SEND_ONLY] = &hfi1_rc_rcv, | |
352 | [IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
353 | [IB_OPCODE_RC_RDMA_WRITE_FIRST] = &hfi1_rc_rcv, | |
354 | [IB_OPCODE_RC_RDMA_WRITE_MIDDLE] = &hfi1_rc_rcv, | |
355 | [IB_OPCODE_RC_RDMA_WRITE_LAST] = &hfi1_rc_rcv, | |
356 | [IB_OPCODE_RC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
357 | [IB_OPCODE_RC_RDMA_WRITE_ONLY] = &hfi1_rc_rcv, | |
358 | [IB_OPCODE_RC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_rc_rcv, | |
359 | [IB_OPCODE_RC_RDMA_READ_REQUEST] = &hfi1_rc_rcv, | |
360 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST] = &hfi1_rc_rcv, | |
361 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE] = &hfi1_rc_rcv, | |
362 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST] = &hfi1_rc_rcv, | |
363 | [IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY] = &hfi1_rc_rcv, | |
364 | [IB_OPCODE_RC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
365 | [IB_OPCODE_RC_ATOMIC_ACKNOWLEDGE] = &hfi1_rc_rcv, | |
366 | [IB_OPCODE_RC_COMPARE_SWAP] = &hfi1_rc_rcv, | |
367 | [IB_OPCODE_RC_FETCH_ADD] = &hfi1_rc_rcv, | |
a2df0c83 JX |
368 | [IB_OPCODE_RC_SEND_LAST_WITH_INVALIDATE] = &hfi1_rc_rcv, |
369 | [IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE] = &hfi1_rc_rcv, | |
77241056 MM |
370 | /* UC */ |
371 | [IB_OPCODE_UC_SEND_FIRST] = &hfi1_uc_rcv, | |
372 | [IB_OPCODE_UC_SEND_MIDDLE] = &hfi1_uc_rcv, | |
373 | [IB_OPCODE_UC_SEND_LAST] = &hfi1_uc_rcv, | |
374 | [IB_OPCODE_UC_SEND_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
375 | [IB_OPCODE_UC_SEND_ONLY] = &hfi1_uc_rcv, | |
376 | [IB_OPCODE_UC_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
377 | [IB_OPCODE_UC_RDMA_WRITE_FIRST] = &hfi1_uc_rcv, | |
378 | [IB_OPCODE_UC_RDMA_WRITE_MIDDLE] = &hfi1_uc_rcv, | |
379 | [IB_OPCODE_UC_RDMA_WRITE_LAST] = &hfi1_uc_rcv, | |
380 | [IB_OPCODE_UC_RDMA_WRITE_LAST_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
381 | [IB_OPCODE_UC_RDMA_WRITE_ONLY] = &hfi1_uc_rcv, | |
382 | [IB_OPCODE_UC_RDMA_WRITE_ONLY_WITH_IMMEDIATE] = &hfi1_uc_rcv, | |
383 | /* UD */ | |
384 | [IB_OPCODE_UD_SEND_ONLY] = &hfi1_ud_rcv, | |
385 | [IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE] = &hfi1_ud_rcv, | |
386 | /* CNP */ | |
387 | [IB_OPCODE_CNP] = &hfi1_cnp_rcv | |
388 | }; | |
389 | ||
b374e060 MM |
390 | #define OPMASK 0x1f |
391 | ||
392 | static const u32 pio_opmask[BIT(3)] = { | |
393 | /* RC */ | |
394 | [IB_OPCODE_RC >> 5] = | |
395 | BIT(RC_OP(SEND_ONLY) & OPMASK) | | |
396 | BIT(RC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | | |
397 | BIT(RC_OP(RDMA_WRITE_ONLY) & OPMASK) | | |
398 | BIT(RC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK) | | |
399 | BIT(RC_OP(RDMA_READ_REQUEST) & OPMASK) | | |
400 | BIT(RC_OP(ACKNOWLEDGE) & OPMASK) | | |
401 | BIT(RC_OP(ATOMIC_ACKNOWLEDGE) & OPMASK) | | |
402 | BIT(RC_OP(COMPARE_SWAP) & OPMASK) | | |
403 | BIT(RC_OP(FETCH_ADD) & OPMASK), | |
404 | /* UC */ | |
405 | [IB_OPCODE_UC >> 5] = | |
406 | BIT(UC_OP(SEND_ONLY) & OPMASK) | | |
407 | BIT(UC_OP(SEND_ONLY_WITH_IMMEDIATE) & OPMASK) | | |
408 | BIT(UC_OP(RDMA_WRITE_ONLY) & OPMASK) | | |
409 | BIT(UC_OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE) & OPMASK), | |
410 | }; | |
411 | ||
77241056 MM |
412 | /* |
413 | * System image GUID. | |
414 | */ | |
415 | __be64 ib_hfi1_sys_image_guid; | |
416 | ||
417 | /** | |
418 | * hfi1_copy_sge - copy data to SGE memory | |
419 | * @ss: the SGE state | |
420 | * @data: the data to copy | |
421 | * @length: the length of the data | |
0128fcea | 422 | * @release: boolean to release MR |
7b0b01aa | 423 | * @copy_last: do a separate copy of the last 8 bytes |
77241056 MM |
424 | */ |
425 | void hfi1_copy_sge( | |
895420dd | 426 | struct rvt_sge_state *ss, |
77241056 | 427 | void *data, u32 length, |
0128fcea BW |
428 | bool release, |
429 | bool copy_last) | |
77241056 | 430 | { |
895420dd | 431 | struct rvt_sge *sge = &ss->sge; |
7b0b01aa | 432 | int i; |
0128fcea BW |
433 | bool in_last = false; |
434 | bool cacheless_copy = false; | |
528ee9fb DL |
435 | |
436 | if (sge_copy_mode == COPY_CACHELESS) { | |
437 | cacheless_copy = length >= PAGE_SIZE; | |
438 | } else if (sge_copy_mode == COPY_ADAPTIVE) { | |
439 | if (length >= PAGE_SIZE) { | |
440 | /* | |
441 | * NOTE: this *assumes*: | |
442 | * o The first vaddr is the dest. | |
443 | * o If multiple pages, then vaddr is sequential. | |
444 | */ | |
445 | wss_insert(sge->vaddr); | |
446 | if (length >= (2 * PAGE_SIZE)) | |
447 | wss_insert(sge->vaddr + PAGE_SIZE); | |
77241056 | 448 | |
528ee9fb DL |
449 | cacheless_copy = wss_exceeds_threshold(); |
450 | } else { | |
451 | wss_advance_clean_counter(); | |
452 | } | |
453 | } | |
7b0b01aa DL |
454 | if (copy_last) { |
455 | if (length > 8) { | |
456 | length -= 8; | |
457 | } else { | |
0128fcea BW |
458 | copy_last = false; |
459 | in_last = true; | |
7b0b01aa DL |
460 | } |
461 | } | |
462 | ||
463 | again: | |
77241056 | 464 | while (length) { |
1198fcea | 465 | u32 len = rvt_get_sge_length(sge, length); |
77241056 | 466 | |
77241056 | 467 | WARN_ON_ONCE(len == 0); |
528ee9fb DL |
468 | if (unlikely(in_last)) { |
469 | /* enforce byte transfer ordering */ | |
7b0b01aa DL |
470 | for (i = 0; i < len; i++) |
471 | ((u8 *)sge->vaddr)[i] = ((u8 *)data)[i]; | |
528ee9fb DL |
472 | } else if (cacheless_copy) { |
473 | cacheless_memcpy(sge->vaddr, data, len); | |
7b0b01aa DL |
474 | } else { |
475 | memcpy(sge->vaddr, data, len); | |
476 | } | |
1198fcea | 477 | rvt_update_sge(ss, len, release); |
77241056 MM |
478 | data += len; |
479 | length -= len; | |
480 | } | |
7b0b01aa DL |
481 | |
482 | if (copy_last) { | |
0128fcea BW |
483 | copy_last = false; |
484 | in_last = true; | |
7b0b01aa DL |
485 | length = 8; |
486 | goto again; | |
487 | } | |
77241056 MM |
488 | } |
489 | ||
77241056 MM |
490 | /* |
491 | * Make sure the QP is ready and able to accept the given opcode. | |
492 | */ | |
71e68e3d | 493 | static inline opcode_handler qp_ok(int opcode, struct hfi1_packet *packet) |
77241056 | 494 | { |
83693bd1 | 495 | if (!(ib_rvt_state_ops[packet->qp->state] & RVT_PROCESS_RECV_OK)) |
71e68e3d | 496 | return NULL; |
b218f786 | 497 | if (((opcode & RVT_OPCODE_QP_MASK) == packet->qp->allowed_ops) || |
77241056 | 498 | (opcode == IB_OPCODE_CNP)) |
71e68e3d JP |
499 | return opcode_handler_tbl[opcode]; |
500 | ||
501 | return NULL; | |
77241056 MM |
502 | } |
503 | ||
77241056 MM |
504 | /** |
505 | * hfi1_ib_rcv - process an incoming packet | |
506 | * @packet: data packet information | |
507 | * | |
508 | * This is called to process an incoming packet at interrupt level. | |
509 | * | |
510 | * Tlen is the length of the header + data + CRC in bytes. | |
511 | */ | |
512 | void hfi1_ib_rcv(struct hfi1_packet *packet) | |
513 | { | |
514 | struct hfi1_ctxtdata *rcd = packet->rcd; | |
261a4351 | 515 | struct ib_header *hdr = packet->hdr; |
77241056 MM |
516 | u32 tlen = packet->tlen; |
517 | struct hfi1_pportdata *ppd = rcd->ppd; | |
f3e862cb | 518 | struct hfi1_ibport *ibp = rcd_to_iport(rcd); |
ec4274f1 | 519 | struct rvt_dev_info *rdi = &ppd->dd->verbs_dev.rdi; |
71e68e3d | 520 | opcode_handler packet_handler; |
b77d713a | 521 | unsigned long flags; |
77241056 MM |
522 | u32 qp_num; |
523 | int lnh; | |
524 | u8 opcode; | |
525 | u16 lid; | |
526 | ||
527 | /* Check for GRH */ | |
528 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
e490974e | 529 | if (lnh == HFI1_LRH_BTH) { |
77241056 | 530 | packet->ohdr = &hdr->u.oth; |
e490974e | 531 | } else if (lnh == HFI1_LRH_GRH) { |
77241056 MM |
532 | u32 vtf; |
533 | ||
534 | packet->ohdr = &hdr->u.l.oth; | |
535 | if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR) | |
536 | goto drop; | |
537 | vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow); | |
538 | if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION) | |
539 | goto drop; | |
540 | packet->rcv_flags |= HFI1_HAS_GRH; | |
e490974e | 541 | } else { |
77241056 | 542 | goto drop; |
e490974e | 543 | } |
77241056 MM |
544 | |
545 | trace_input_ibhdr(rcd->dd, hdr); | |
546 | ||
547 | opcode = (be32_to_cpu(packet->ohdr->bth[0]) >> 24); | |
548 | inc_opstats(tlen, &rcd->opstats->stats[opcode]); | |
549 | ||
550 | /* Get the destination QP number. */ | |
ec4274f1 | 551 | qp_num = be32_to_cpu(packet->ohdr->bth[1]) & RVT_QPN_MASK; |
77241056 | 552 | lid = be16_to_cpu(hdr->lrh[1]); |
8859b4a6 DD |
553 | if (unlikely((lid >= be16_to_cpu(IB_MULTICAST_LID_BASE)) && |
554 | (lid != be16_to_cpu(IB_LID_PERMISSIVE)))) { | |
0facc5a1 DD |
555 | struct rvt_mcast *mcast; |
556 | struct rvt_mcast_qp *p; | |
77241056 MM |
557 | |
558 | if (lnh != HFI1_LRH_GRH) | |
559 | goto drop; | |
0facc5a1 | 560 | mcast = rvt_mcast_find(&ibp->rvp, &hdr->u.l.grh.dgid); |
d125a6c6 | 561 | if (!mcast) |
77241056 MM |
562 | goto drop; |
563 | list_for_each_entry_rcu(p, &mcast->qp_list, list) { | |
564 | packet->qp = p->qp; | |
b77d713a | 565 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
71e68e3d JP |
566 | packet_handler = qp_ok(opcode, packet); |
567 | if (likely(packet_handler)) | |
568 | packet_handler(packet); | |
569 | else | |
570 | ibp->rvp.n_pkt_drops++; | |
b77d713a | 571 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
77241056 MM |
572 | } |
573 | /* | |
0facc5a1 | 574 | * Notify rvt_multicast_detach() if it is waiting for us |
77241056 MM |
575 | * to finish. |
576 | */ | |
577 | if (atomic_dec_return(&mcast->refcount) <= 1) | |
578 | wake_up(&mcast->wait); | |
579 | } else { | |
580 | rcu_read_lock(); | |
ec4274f1 | 581 | packet->qp = rvt_lookup_qpn(rdi, &ibp->rvp, qp_num); |
77241056 MM |
582 | if (!packet->qp) { |
583 | rcu_read_unlock(); | |
584 | goto drop; | |
585 | } | |
b77d713a | 586 | spin_lock_irqsave(&packet->qp->r_lock, flags); |
71e68e3d JP |
587 | packet_handler = qp_ok(opcode, packet); |
588 | if (likely(packet_handler)) | |
589 | packet_handler(packet); | |
590 | else | |
591 | ibp->rvp.n_pkt_drops++; | |
b77d713a | 592 | spin_unlock_irqrestore(&packet->qp->r_lock, flags); |
77241056 MM |
593 | rcu_read_unlock(); |
594 | } | |
595 | return; | |
596 | ||
597 | drop: | |
4eb06882 | 598 | ibp->rvp.n_pkt_drops++; |
77241056 MM |
599 | } |
600 | ||
601 | /* | |
602 | * This is called from a timer to check for QPs | |
603 | * which need kernel memory in order to send a packet. | |
604 | */ | |
605 | static void mem_timer(unsigned long data) | |
606 | { | |
607 | struct hfi1_ibdev *dev = (struct hfi1_ibdev *)data; | |
608 | struct list_head *list = &dev->memwait; | |
895420dd | 609 | struct rvt_qp *qp = NULL; |
77241056 MM |
610 | struct iowait *wait; |
611 | unsigned long flags; | |
4c6829c5 | 612 | struct hfi1_qp_priv *priv; |
77241056 MM |
613 | |
614 | write_seqlock_irqsave(&dev->iowait_lock, flags); | |
615 | if (!list_empty(list)) { | |
616 | wait = list_first_entry(list, struct iowait, list); | |
4c6829c5 DD |
617 | qp = iowait_to_qp(wait); |
618 | priv = qp->priv; | |
619 | list_del_init(&priv->s_iowait.list); | |
4e045572 | 620 | priv->s_iowait.lock = NULL; |
77241056 MM |
621 | /* refcount held until actual wake up */ |
622 | if (!list_empty(list)) | |
623 | mod_timer(&dev->mem_timer, jiffies + 1); | |
624 | } | |
625 | write_sequnlock_irqrestore(&dev->iowait_lock, flags); | |
626 | ||
627 | if (qp) | |
54d10c1e | 628 | hfi1_qp_wakeup(qp, RVT_S_WAIT_KMEM); |
77241056 MM |
629 | } |
630 | ||
77241056 MM |
631 | /* |
632 | * This is called with progress side lock held. | |
633 | */ | |
634 | /* New API */ | |
635 | static void verbs_sdma_complete( | |
636 | struct sdma_txreq *cookie, | |
a545f530 | 637 | int status) |
77241056 MM |
638 | { |
639 | struct verbs_txreq *tx = | |
640 | container_of(cookie, struct verbs_txreq, txreq); | |
895420dd | 641 | struct rvt_qp *qp = tx->qp; |
77241056 MM |
642 | |
643 | spin_lock(&qp->s_lock); | |
e490974e | 644 | if (tx->wqe) { |
77241056 | 645 | hfi1_send_complete(qp, tx->wqe, IB_WC_SUCCESS); |
e490974e | 646 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { |
261a4351 | 647 | struct ib_header *hdr; |
77241056 MM |
648 | |
649 | hdr = &tx->phdr.hdr; | |
650 | hfi1_rc_send_complete(qp, hdr); | |
651 | } | |
77241056 MM |
652 | spin_unlock(&qp->s_lock); |
653 | ||
654 | hfi1_put_txreq(tx); | |
655 | } | |
656 | ||
711e104d MM |
657 | static int wait_kmem(struct hfi1_ibdev *dev, |
658 | struct rvt_qp *qp, | |
659 | struct hfi1_pkt_state *ps) | |
77241056 | 660 | { |
4c6829c5 | 661 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 MM |
662 | unsigned long flags; |
663 | int ret = 0; | |
664 | ||
665 | spin_lock_irqsave(&qp->s_lock, flags); | |
83693bd1 | 666 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
77241056 | 667 | write_seqlock(&dev->iowait_lock); |
711e104d MM |
668 | list_add_tail(&ps->s_txreq->txreq.list, |
669 | &priv->s_iowait.tx_head); | |
4c6829c5 | 670 | if (list_empty(&priv->s_iowait.list)) { |
77241056 MM |
671 | if (list_empty(&dev->memwait)) |
672 | mod_timer(&dev->mem_timer, jiffies + 1); | |
54d10c1e | 673 | qp->s_flags |= RVT_S_WAIT_KMEM; |
4c6829c5 | 674 | list_add_tail(&priv->s_iowait.list, &dev->memwait); |
4e045572 | 675 | priv->s_iowait.lock = &dev->iowait_lock; |
54d10c1e | 676 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_KMEM); |
4d6f85c3 | 677 | rvt_get_qp(qp); |
77241056 MM |
678 | } |
679 | write_sequnlock(&dev->iowait_lock); | |
54d10c1e | 680 | qp->s_flags &= ~RVT_S_BUSY; |
77241056 MM |
681 | ret = -EBUSY; |
682 | } | |
683 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
684 | ||
685 | return ret; | |
686 | } | |
687 | ||
688 | /* | |
689 | * This routine calls txadds for each sg entry. | |
690 | * | |
691 | * Add failures will revert the sge cursor | |
692 | */ | |
711e104d | 693 | static noinline int build_verbs_ulp_payload( |
77241056 | 694 | struct sdma_engine *sde, |
77241056 MM |
695 | u32 length, |
696 | struct verbs_txreq *tx) | |
697 | { | |
b777f154 | 698 | struct rvt_sge_state *ss = tx->ss; |
895420dd DD |
699 | struct rvt_sge *sg_list = ss->sg_list; |
700 | struct rvt_sge sge = ss->sge; | |
77241056 MM |
701 | u8 num_sge = ss->num_sge; |
702 | u32 len; | |
703 | int ret = 0; | |
704 | ||
705 | while (length) { | |
706 | len = ss->sge.length; | |
707 | if (len > length) | |
708 | len = length; | |
709 | if (len > ss->sge.sge_length) | |
710 | len = ss->sge.sge_length; | |
711 | WARN_ON_ONCE(len == 0); | |
712 | ret = sdma_txadd_kvaddr( | |
713 | sde->dd, | |
714 | &tx->txreq, | |
715 | ss->sge.vaddr, | |
716 | len); | |
717 | if (ret) | |
718 | goto bail_txadd; | |
1198fcea | 719 | rvt_update_sge(ss, len, false); |
77241056 MM |
720 | length -= len; |
721 | } | |
722 | return ret; | |
723 | bail_txadd: | |
724 | /* unwind cursor */ | |
725 | ss->sge = sge; | |
726 | ss->num_sge = num_sge; | |
727 | ss->sg_list = sg_list; | |
728 | return ret; | |
729 | } | |
730 | ||
731 | /* | |
732 | * Build the number of DMA descriptors needed to send length bytes of data. | |
733 | * | |
734 | * NOTE: DMA mapping is held in the tx until completed in the ring or | |
735 | * the tx desc is freed without having been submitted to the ring | |
736 | * | |
bb5df5f9 | 737 | * This routine ensures all the helper routine calls succeed. |
77241056 MM |
738 | */ |
739 | /* New API */ | |
740 | static int build_verbs_tx_desc( | |
741 | struct sdma_engine *sde, | |
77241056 MM |
742 | u32 length, |
743 | struct verbs_txreq *tx, | |
a9b6b3bc | 744 | struct hfi1_ahg_info *ahg_info, |
77241056 MM |
745 | u64 pbc) |
746 | { | |
747 | int ret = 0; | |
d4d602e9 | 748 | struct hfi1_sdma_header *phdr = &tx->phdr; |
77241056 MM |
749 | u16 hdrbytes = tx->hdr_dwords << 2; |
750 | ||
a9b6b3bc | 751 | if (!ahg_info->ahgcount) { |
77241056 MM |
752 | ret = sdma_txinit_ahg( |
753 | &tx->txreq, | |
a9b6b3bc | 754 | ahg_info->tx_flags, |
77241056 | 755 | hdrbytes + length, |
a9b6b3bc | 756 | ahg_info->ahgidx, |
77241056 MM |
757 | 0, |
758 | NULL, | |
759 | 0, | |
760 | verbs_sdma_complete); | |
761 | if (ret) | |
762 | goto bail_txadd; | |
763 | phdr->pbc = cpu_to_le64(pbc); | |
77241056 MM |
764 | ret = sdma_txadd_kvaddr( |
765 | sde->dd, | |
766 | &tx->txreq, | |
bb5df5f9 DD |
767 | phdr, |
768 | hdrbytes); | |
77241056 MM |
769 | if (ret) |
770 | goto bail_txadd; | |
771 | } else { | |
77241056 MM |
772 | ret = sdma_txinit_ahg( |
773 | &tx->txreq, | |
a9b6b3bc | 774 | ahg_info->tx_flags, |
77241056 | 775 | length, |
a9b6b3bc DC |
776 | ahg_info->ahgidx, |
777 | ahg_info->ahgcount, | |
778 | ahg_info->ahgdesc, | |
77241056 MM |
779 | hdrbytes, |
780 | verbs_sdma_complete); | |
781 | if (ret) | |
782 | goto bail_txadd; | |
783 | } | |
784 | ||
b777f154 MH |
785 | /* add the ulp payload - if any. tx->ss can be NULL for acks */ |
786 | if (tx->ss) | |
787 | ret = build_verbs_ulp_payload(sde, length, tx); | |
77241056 MM |
788 | bail_txadd: |
789 | return ret; | |
790 | } | |
791 | ||
895420dd | 792 | int hfi1_verbs_send_dma(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 793 | u64 pbc) |
77241056 | 794 | { |
4c6829c5 | 795 | struct hfi1_qp_priv *priv = qp->priv; |
a9b6b3bc | 796 | struct hfi1_ahg_info *ahg_info = priv->s_ahg; |
d46e5144 | 797 | u32 hdrwords = qp->s_hdrwords; |
e922ae06 | 798 | u32 len = ps->s_txreq->s_cur_size; |
d46e5144 DD |
799 | u32 plen = hdrwords + ((len + 3) >> 2) + 2; /* includes pbc */ |
800 | struct hfi1_ibdev *dev = ps->dev; | |
801 | struct hfi1_pportdata *ppd = ps->ppd; | |
77241056 | 802 | struct verbs_txreq *tx; |
77241056 | 803 | u64 pbc_flags = 0; |
4c6829c5 DD |
804 | u8 sc5 = priv->s_sc; |
805 | ||
77241056 | 806 | int ret; |
77241056 | 807 | |
bb5df5f9 | 808 | tx = ps->s_txreq; |
711e104d MM |
809 | if (!sdma_txreq_built(&tx->txreq)) { |
810 | if (likely(pbc == 0)) { | |
811 | u32 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); | |
812 | /* No vl15 here */ | |
813 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ | |
814 | pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; | |
815 | ||
816 | pbc = create_pbc(ppd, | |
817 | pbc_flags, | |
818 | qp->srate_mbps, | |
819 | vl, | |
820 | plen); | |
821 | } | |
822 | tx->wqe = qp->s_wqe; | |
b777f154 | 823 | ret = build_verbs_tx_desc(tx->sde, len, tx, ahg_info, pbc); |
711e104d MM |
824 | if (unlikely(ret)) |
825 | goto bail_build; | |
77241056 | 826 | } |
5326dfbf MM |
827 | ret = sdma_send_txreq(tx->sde, &priv->s_iowait, &tx->txreq); |
828 | if (unlikely(ret < 0)) { | |
829 | if (ret == -ECOMM) | |
830 | goto bail_ecomm; | |
831 | return ret; | |
832 | } | |
1db78eee MM |
833 | trace_sdma_output_ibhdr(dd_from_ibdev(qp->ibqp.device), |
834 | &ps->s_txreq->phdr.hdr); | |
77241056 MM |
835 | return ret; |
836 | ||
77241056 MM |
837 | bail_ecomm: |
838 | /* The current one got "sent" */ | |
839 | return 0; | |
840 | bail_build: | |
711e104d MM |
841 | ret = wait_kmem(dev, qp, ps); |
842 | if (!ret) { | |
843 | /* free txreq - bad state */ | |
844 | hfi1_put_txreq(ps->s_txreq); | |
845 | ps->s_txreq = NULL; | |
846 | } | |
847 | return ret; | |
77241056 MM |
848 | } |
849 | ||
850 | /* | |
851 | * If we are now in the error state, return zero to flush the | |
852 | * send work request. | |
853 | */ | |
14553ca1 MM |
854 | static int pio_wait(struct rvt_qp *qp, |
855 | struct send_context *sc, | |
856 | struct hfi1_pkt_state *ps, | |
857 | u32 flag) | |
77241056 | 858 | { |
4c6829c5 | 859 | struct hfi1_qp_priv *priv = qp->priv; |
77241056 MM |
860 | struct hfi1_devdata *dd = sc->dd; |
861 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
862 | unsigned long flags; | |
863 | int ret = 0; | |
864 | ||
865 | /* | |
866 | * Note that as soon as want_buffer() is called and | |
867 | * possibly before it returns, sc_piobufavail() | |
868 | * could be called. Therefore, put QP on the I/O wait list before | |
869 | * enabling the PIO avail interrupt. | |
870 | */ | |
871 | spin_lock_irqsave(&qp->s_lock, flags); | |
83693bd1 | 872 | if (ib_rvt_state_ops[qp->state] & RVT_PROCESS_RECV_OK) { |
77241056 | 873 | write_seqlock(&dev->iowait_lock); |
711e104d MM |
874 | list_add_tail(&ps->s_txreq->txreq.list, |
875 | &priv->s_iowait.tx_head); | |
4c6829c5 | 876 | if (list_empty(&priv->s_iowait.list)) { |
77241056 MM |
877 | struct hfi1_ibdev *dev = &dd->verbs_dev; |
878 | int was_empty; | |
879 | ||
14553ca1 MM |
880 | dev->n_piowait += !!(flag & RVT_S_WAIT_PIO); |
881 | dev->n_piodrain += !!(flag & RVT_S_WAIT_PIO_DRAIN); | |
14553ca1 | 882 | qp->s_flags |= flag; |
77241056 | 883 | was_empty = list_empty(&sc->piowait); |
4c6829c5 | 884 | list_add_tail(&priv->s_iowait.list, &sc->piowait); |
4e045572 | 885 | priv->s_iowait.lock = &dev->iowait_lock; |
54d10c1e | 886 | trace_hfi1_qpsleep(qp, RVT_S_WAIT_PIO); |
4d6f85c3 | 887 | rvt_get_qp(qp); |
77241056 MM |
888 | /* counting: only call wantpiobuf_intr if first user */ |
889 | if (was_empty) | |
890 | hfi1_sc_wantpiobuf_intr(sc, 1); | |
891 | } | |
892 | write_sequnlock(&dev->iowait_lock); | |
54d10c1e | 893 | qp->s_flags &= ~RVT_S_BUSY; |
77241056 MM |
894 | ret = -EBUSY; |
895 | } | |
896 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
897 | return ret; | |
898 | } | |
899 | ||
14553ca1 MM |
900 | static void verbs_pio_complete(void *arg, int code) |
901 | { | |
902 | struct rvt_qp *qp = (struct rvt_qp *)arg; | |
903 | struct hfi1_qp_priv *priv = qp->priv; | |
904 | ||
905 | if (iowait_pio_dec(&priv->s_iowait)) | |
906 | iowait_drain_wakeup(&priv->s_iowait); | |
907 | } | |
908 | ||
895420dd | 909 | int hfi1_verbs_send_pio(struct rvt_qp *qp, struct hfi1_pkt_state *ps, |
d46e5144 | 910 | u64 pbc) |
77241056 | 911 | { |
4c6829c5 | 912 | struct hfi1_qp_priv *priv = qp->priv; |
d46e5144 | 913 | u32 hdrwords = qp->s_hdrwords; |
b777f154 | 914 | struct rvt_sge_state *ss = ps->s_txreq->ss; |
e922ae06 | 915 | u32 len = ps->s_txreq->s_cur_size; |
d46e5144 DD |
916 | u32 dwords = (len + 3) >> 2; |
917 | u32 plen = hdrwords + dwords + 2; /* includes pbc */ | |
918 | struct hfi1_pportdata *ppd = ps->ppd; | |
bb5df5f9 | 919 | u32 *hdr = (u32 *)&ps->s_txreq->phdr.hdr; |
77241056 | 920 | u64 pbc_flags = 0; |
4f8cc5c0 | 921 | u8 sc5; |
77241056 MM |
922 | unsigned long flags = 0; |
923 | struct send_context *sc; | |
924 | struct pio_buf *pbuf; | |
925 | int wc_status = IB_WC_SUCCESS; | |
bb5df5f9 | 926 | int ret = 0; |
14553ca1 MM |
927 | pio_release_cb cb = NULL; |
928 | ||
929 | /* only RC/UC use complete */ | |
930 | switch (qp->ibqp.qp_type) { | |
931 | case IB_QPT_RC: | |
932 | case IB_QPT_UC: | |
933 | cb = verbs_pio_complete; | |
934 | break; | |
935 | default: | |
936 | break; | |
937 | } | |
77241056 MM |
938 | |
939 | /* vl15 special case taken care of in ud.c */ | |
4c6829c5 | 940 | sc5 = priv->s_sc; |
cef504c5 | 941 | sc = ps->s_txreq->psc; |
77241056 | 942 | |
77241056 | 943 | if (likely(pbc == 0)) { |
4f8cc5c0 | 944 | u8 vl = sc_to_vlt(dd_from_ibdev(qp->ibqp.device), sc5); |
77241056 MM |
945 | /* set PBC_DC_INFO bit (aka SC[4]) in pbc_flags */ |
946 | pbc_flags |= (!!(sc5 & 0x10)) << PBC_DC_INFO_SHIFT; | |
947 | pbc = create_pbc(ppd, pbc_flags, qp->srate_mbps, vl, plen); | |
948 | } | |
14553ca1 MM |
949 | if (cb) |
950 | iowait_pio_inc(&priv->s_iowait); | |
951 | pbuf = sc_buffer_alloc(sc, plen, cb, qp); | |
d125a6c6 | 952 | if (unlikely(!pbuf)) { |
14553ca1 MM |
953 | if (cb) |
954 | verbs_pio_complete(qp, 0); | |
77241056 MM |
955 | if (ppd->host_link_state != HLS_UP_ACTIVE) { |
956 | /* | |
957 | * If we have filled the PIO buffers to capacity and are | |
958 | * not in an active state this request is not going to | |
959 | * go out to so just complete it with an error or else a | |
960 | * ULP or the core may be stuck waiting. | |
961 | */ | |
962 | hfi1_cdbg( | |
963 | PIO, | |
964 | "alloc failed. state not active, completing"); | |
965 | wc_status = IB_WC_GENERAL_ERR; | |
966 | goto pio_bail; | |
967 | } else { | |
968 | /* | |
969 | * This is a normal occurrence. The PIO buffs are full | |
970 | * up but we are still happily sending, well we could be | |
971 | * so lets continue to queue the request. | |
972 | */ | |
973 | hfi1_cdbg(PIO, "alloc failed. state active, queuing"); | |
14553ca1 | 974 | ret = pio_wait(qp, sc, ps, RVT_S_WAIT_PIO); |
711e104d | 975 | if (!ret) |
14553ca1 | 976 | /* txreq not queued - free */ |
711e104d MM |
977 | goto bail; |
978 | /* tx consumed in wait */ | |
979 | return ret; | |
77241056 MM |
980 | } |
981 | } | |
982 | ||
983 | if (len == 0) { | |
984 | pio_copy(ppd->dd, pbuf, pbc, hdr, hdrwords); | |
985 | } else { | |
986 | if (ss) { | |
8638b77f | 987 | seg_pio_copy_start(pbuf, pbc, hdr, hdrwords * 4); |
77241056 MM |
988 | while (len) { |
989 | void *addr = ss->sge.vaddr; | |
990 | u32 slen = ss->sge.length; | |
991 | ||
992 | if (slen > len) | |
993 | slen = len; | |
1198fcea | 994 | rvt_update_sge(ss, slen, false); |
77241056 MM |
995 | seg_pio_copy_mid(pbuf, addr, slen); |
996 | len -= slen; | |
997 | } | |
998 | seg_pio_copy_end(pbuf); | |
999 | } | |
1000 | } | |
1001 | ||
1db78eee MM |
1002 | trace_pio_output_ibhdr(dd_from_ibdev(qp->ibqp.device), |
1003 | &ps->s_txreq->phdr.hdr); | |
77241056 | 1004 | |
77241056 MM |
1005 | pio_bail: |
1006 | if (qp->s_wqe) { | |
1007 | spin_lock_irqsave(&qp->s_lock, flags); | |
1008 | hfi1_send_complete(qp, qp->s_wqe, wc_status); | |
1009 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1010 | } else if (qp->ibqp.qp_type == IB_QPT_RC) { | |
1011 | spin_lock_irqsave(&qp->s_lock, flags); | |
bb5df5f9 | 1012 | hfi1_rc_send_complete(qp, &ps->s_txreq->phdr.hdr); |
77241056 MM |
1013 | spin_unlock_irqrestore(&qp->s_lock, flags); |
1014 | } | |
bb5df5f9 DD |
1015 | |
1016 | ret = 0; | |
1017 | ||
1018 | bail: | |
1019 | hfi1_put_txreq(ps->s_txreq); | |
1020 | return ret; | |
77241056 | 1021 | } |
b91cc573 | 1022 | |
77241056 MM |
1023 | /* |
1024 | * egress_pkey_matches_entry - return 1 if the pkey matches ent (ent | |
e38d1e4f | 1025 | * being an entry from the partition key table), return 0 |
77241056 MM |
1026 | * otherwise. Use the matching criteria for egress partition keys |
1027 | * specified in the OPAv1 spec., section 9.1l.7. | |
1028 | */ | |
1029 | static inline int egress_pkey_matches_entry(u16 pkey, u16 ent) | |
1030 | { | |
1031 | u16 mkey = pkey & PKEY_LOW_15_MASK; | |
e38d1e4f | 1032 | u16 mentry = ent & PKEY_LOW_15_MASK; |
77241056 | 1033 | |
e38d1e4f | 1034 | if (mkey == mentry) { |
77241056 MM |
1035 | /* |
1036 | * If pkey[15] is set (full partition member), | |
1037 | * is bit 15 in the corresponding table element | |
1038 | * clear (limited member)? | |
1039 | */ | |
1040 | if (pkey & PKEY_MEMBER_MASK) | |
1041 | return !!(ent & PKEY_MEMBER_MASK); | |
1042 | return 1; | |
1043 | } | |
1044 | return 0; | |
1045 | } | |
1046 | ||
e38d1e4f SS |
1047 | /** |
1048 | * egress_pkey_check - check P_KEY of a packet | |
1049 | * @ppd: Physical IB port data | |
1050 | * @lrh: Local route header | |
1051 | * @bth: Base transport header | |
1052 | * @sc5: SC for packet | |
1053 | * @s_pkey_index: It will be used for look up optimization for kernel contexts | |
1054 | * only. If it is negative value, then it means user contexts is calling this | |
1055 | * function. | |
1056 | * | |
1057 | * It checks if hdr's pkey is valid. | |
1058 | * | |
1059 | * Return: 0 on success, otherwise, 1 | |
77241056 | 1060 | */ |
e38d1e4f SS |
1061 | int egress_pkey_check(struct hfi1_pportdata *ppd, __be16 *lrh, __be32 *bth, |
1062 | u8 sc5, int8_t s_pkey_index) | |
77241056 | 1063 | { |
77241056 | 1064 | struct hfi1_devdata *dd; |
e38d1e4f | 1065 | int i; |
77241056 | 1066 | u16 pkey; |
e38d1e4f | 1067 | int is_user_ctxt_mechanism = (s_pkey_index < 0); |
77241056 MM |
1068 | |
1069 | if (!(ppd->part_enforce & HFI1_PART_ENFORCE_OUT)) | |
1070 | return 0; | |
1071 | ||
e38d1e4f | 1072 | pkey = (u16)be32_to_cpu(bth[0]); |
77241056 MM |
1073 | |
1074 | /* If SC15, pkey[0:14] must be 0x7fff */ | |
1075 | if ((sc5 == 0xf) && ((pkey & PKEY_LOW_15_MASK) != PKEY_LOW_15_MASK)) | |
1076 | goto bad; | |
1077 | ||
77241056 MM |
1078 | /* Is the pkey = 0x0, or 0x8000? */ |
1079 | if ((pkey & PKEY_LOW_15_MASK) == 0) | |
1080 | goto bad; | |
1081 | ||
e38d1e4f SS |
1082 | /* |
1083 | * For the kernel contexts only, if a qp is passed into the function, | |
1084 | * the most likely matching pkey has index qp->s_pkey_index | |
1085 | */ | |
1086 | if (!is_user_ctxt_mechanism && | |
1087 | egress_pkey_matches_entry(pkey, ppd->pkeys[s_pkey_index])) { | |
1088 | return 0; | |
77241056 MM |
1089 | } |
1090 | ||
e38d1e4f SS |
1091 | for (i = 0; i < MAX_PKEY_VALUES; i++) { |
1092 | if (egress_pkey_matches_entry(pkey, ppd->pkeys[i])) | |
1093 | return 0; | |
1094 | } | |
77241056 | 1095 | bad: |
e38d1e4f SS |
1096 | /* |
1097 | * For the user-context mechanism, the P_KEY check would only happen | |
1098 | * once per SDMA request, not once per packet. Therefore, there's no | |
1099 | * need to increment the counter for the user-context mechanism. | |
1100 | */ | |
1101 | if (!is_user_ctxt_mechanism) { | |
1102 | incr_cntr64(&ppd->port_xmit_constraint_errors); | |
1103 | dd = ppd->dd; | |
1104 | if (!(dd->err_info_xmit_constraint.status & | |
1105 | OPA_EI_STATUS_SMASK)) { | |
1106 | u16 slid = be16_to_cpu(lrh[3]); | |
1107 | ||
1108 | dd->err_info_xmit_constraint.status |= | |
1109 | OPA_EI_STATUS_SMASK; | |
1110 | dd->err_info_xmit_constraint.slid = slid; | |
1111 | dd->err_info_xmit_constraint.pkey = pkey; | |
1112 | } | |
77241056 MM |
1113 | } |
1114 | return 1; | |
1115 | } | |
1116 | ||
14553ca1 MM |
1117 | /** |
1118 | * get_send_routine - choose an egress routine | |
1119 | * | |
1120 | * Choose an egress routine based on QP type | |
1121 | * and size | |
1122 | */ | |
1123 | static inline send_routine get_send_routine(struct rvt_qp *qp, | |
47177f1b | 1124 | struct verbs_txreq *tx) |
14553ca1 MM |
1125 | { |
1126 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
1127 | struct hfi1_qp_priv *priv = qp->priv; | |
261a4351 | 1128 | struct ib_header *h = &tx->phdr.hdr; |
14553ca1 MM |
1129 | |
1130 | if (unlikely(!(dd->flags & HFI1_HAS_SEND_DMA))) | |
1131 | return dd->process_pio_send; | |
1132 | switch (qp->ibqp.qp_type) { | |
1133 | case IB_QPT_SMI: | |
1134 | return dd->process_pio_send; | |
1135 | case IB_QPT_GSI: | |
1136 | case IB_QPT_UD: | |
14553ca1 | 1137 | break; |
14553ca1 | 1138 | case IB_QPT_UC: |
b374e060 MM |
1139 | case IB_QPT_RC: { |
1140 | u8 op = get_opcode(h); | |
1141 | ||
14553ca1 | 1142 | if (piothreshold && |
e922ae06 | 1143 | tx->s_cur_size <= min(piothreshold, qp->pmtu) && |
b374e060 | 1144 | (BIT(op & OPMASK) & pio_opmask[op >> 5]) && |
47177f1b MM |
1145 | iowait_sdma_pending(&priv->s_iowait) == 0 && |
1146 | !sdma_txreq_built(&tx->txreq)) | |
14553ca1 MM |
1147 | return dd->process_pio_send; |
1148 | break; | |
b374e060 | 1149 | } |
14553ca1 MM |
1150 | default: |
1151 | break; | |
1152 | } | |
1153 | return dd->process_dma_send; | |
1154 | } | |
1155 | ||
77241056 MM |
1156 | /** |
1157 | * hfi1_verbs_send - send a packet | |
1158 | * @qp: the QP to send on | |
d46e5144 | 1159 | * @ps: the state of the packet to send |
77241056 MM |
1160 | * |
1161 | * Return zero if packet is sent or queued OK. | |
54d10c1e | 1162 | * Return non-zero and clear qp->s_flags RVT_S_BUSY otherwise. |
77241056 | 1163 | */ |
895420dd | 1164 | int hfi1_verbs_send(struct rvt_qp *qp, struct hfi1_pkt_state *ps) |
77241056 MM |
1165 | { |
1166 | struct hfi1_devdata *dd = dd_from_ibdev(qp->ibqp.device); | |
47177f1b | 1167 | struct hfi1_qp_priv *priv = qp->priv; |
261a4351 MM |
1168 | struct ib_other_headers *ohdr; |
1169 | struct ib_header *hdr; | |
14553ca1 | 1170 | send_routine sr; |
77241056 | 1171 | int ret; |
e38d1e4f SS |
1172 | u8 lnh; |
1173 | ||
1174 | hdr = &ps->s_txreq->phdr.hdr; | |
1175 | /* locate the pkey within the headers */ | |
1176 | lnh = be16_to_cpu(hdr->lrh[0]) & 3; | |
1177 | if (lnh == HFI1_LRH_GRH) | |
1178 | ohdr = &hdr->u.l.oth; | |
1179 | else | |
1180 | ohdr = &hdr->u.oth; | |
77241056 | 1181 | |
47177f1b | 1182 | sr = get_send_routine(qp, ps->s_txreq); |
e38d1e4f SS |
1183 | ret = egress_pkey_check(dd->pport, |
1184 | hdr->lrh, | |
1185 | ohdr->bth, | |
1186 | priv->s_sc, | |
1187 | qp->s_pkey_index); | |
77241056 MM |
1188 | if (unlikely(ret)) { |
1189 | /* | |
1190 | * The value we are returning here does not get propagated to | |
1191 | * the verbs caller. Thus we need to complete the request with | |
1192 | * error otherwise the caller could be sitting waiting on the | |
1193 | * completion event. Only do this for PIO. SDMA has its own | |
1194 | * mechanism for handling the errors. So for SDMA we can just | |
1195 | * return. | |
1196 | */ | |
14553ca1 MM |
1197 | if (sr == dd->process_pio_send) { |
1198 | unsigned long flags; | |
1199 | ||
77241056 MM |
1200 | hfi1_cdbg(PIO, "%s() Failed. Completing with err", |
1201 | __func__); | |
1202 | spin_lock_irqsave(&qp->s_lock, flags); | |
1203 | hfi1_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR); | |
1204 | spin_unlock_irqrestore(&qp->s_lock, flags); | |
1205 | } | |
1206 | return -EINVAL; | |
1207 | } | |
47177f1b MM |
1208 | if (sr == dd->process_dma_send && iowait_pio_pending(&priv->s_iowait)) |
1209 | return pio_wait(qp, | |
1210 | ps->s_txreq->psc, | |
1211 | ps, | |
1212 | RVT_S_WAIT_PIO_DRAIN); | |
14553ca1 | 1213 | return sr(qp, ps, 0); |
77241056 MM |
1214 | } |
1215 | ||
94d5171c HC |
1216 | /** |
1217 | * hfi1_fill_device_attr - Fill in rvt dev info device attributes. | |
1218 | * @dd: the device data structure | |
1219 | */ | |
1220 | static void hfi1_fill_device_attr(struct hfi1_devdata *dd) | |
77241056 | 1221 | { |
94d5171c | 1222 | struct rvt_dev_info *rdi = &dd->verbs_dev.rdi; |
939b6ca8 | 1223 | u16 ver = dd->dc8051_ver; |
94d5171c HC |
1224 | |
1225 | memset(&rdi->dparms.props, 0, sizeof(rdi->dparms.props)); | |
1226 | ||
939b6ca8 IW |
1227 | rdi->dparms.props.fw_ver = ((u64)(dc8051_ver_maj(ver)) << 16) | |
1228 | (u64)dc8051_ver_min(ver); | |
94d5171c HC |
1229 | rdi->dparms.props.device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR | |
1230 | IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT | | |
1231 | IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN | | |
c72cfe3e JX |
1232 | IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE | |
1233 | IB_DEVICE_MEM_MGT_EXTENSIONS; | |
94d5171c HC |
1234 | rdi->dparms.props.page_size_cap = PAGE_SIZE; |
1235 | rdi->dparms.props.vendor_id = dd->oui1 << 16 | dd->oui2 << 8 | dd->oui3; | |
1236 | rdi->dparms.props.vendor_part_id = dd->pcidev->device; | |
1237 | rdi->dparms.props.hw_ver = dd->minrev; | |
1238 | rdi->dparms.props.sys_image_guid = ib_hfi1_sys_image_guid; | |
c72cfe3e JX |
1239 | rdi->dparms.props.max_mr_size = U64_MAX; |
1240 | rdi->dparms.props.max_fast_reg_page_list_len = UINT_MAX; | |
94d5171c HC |
1241 | rdi->dparms.props.max_qp = hfi1_max_qps; |
1242 | rdi->dparms.props.max_qp_wr = hfi1_max_qp_wrs; | |
1243 | rdi->dparms.props.max_sge = hfi1_max_sges; | |
1244 | rdi->dparms.props.max_sge_rd = hfi1_max_sges; | |
1245 | rdi->dparms.props.max_cq = hfi1_max_cqs; | |
1246 | rdi->dparms.props.max_ah = hfi1_max_ahs; | |
1247 | rdi->dparms.props.max_cqe = hfi1_max_cqes; | |
1248 | rdi->dparms.props.max_mr = rdi->lkey_table.max; | |
1249 | rdi->dparms.props.max_fmr = rdi->lkey_table.max; | |
1250 | rdi->dparms.props.max_map_per_fmr = 32767; | |
1251 | rdi->dparms.props.max_pd = hfi1_max_pds; | |
1252 | rdi->dparms.props.max_qp_rd_atom = HFI1_MAX_RDMA_ATOMIC; | |
1253 | rdi->dparms.props.max_qp_init_rd_atom = 255; | |
1254 | rdi->dparms.props.max_srq = hfi1_max_srqs; | |
1255 | rdi->dparms.props.max_srq_wr = hfi1_max_srq_wrs; | |
1256 | rdi->dparms.props.max_srq_sge = hfi1_max_srq_sges; | |
1257 | rdi->dparms.props.atomic_cap = IB_ATOMIC_GLOB; | |
1258 | rdi->dparms.props.max_pkeys = hfi1_get_npkeys(dd); | |
1259 | rdi->dparms.props.max_mcast_grp = hfi1_max_mcast_grps; | |
1260 | rdi->dparms.props.max_mcast_qp_attach = hfi1_max_mcast_qp_attached; | |
1261 | rdi->dparms.props.max_total_mcast_qp_attach = | |
1262 | rdi->dparms.props.max_mcast_qp_attach * | |
1263 | rdi->dparms.props.max_mcast_grp; | |
77241056 MM |
1264 | } |
1265 | ||
1266 | static inline u16 opa_speed_to_ib(u16 in) | |
1267 | { | |
1268 | u16 out = 0; | |
1269 | ||
1270 | if (in & OPA_LINK_SPEED_25G) | |
1271 | out |= IB_SPEED_EDR; | |
1272 | if (in & OPA_LINK_SPEED_12_5G) | |
1273 | out |= IB_SPEED_FDR; | |
1274 | ||
1275 | return out; | |
1276 | } | |
1277 | ||
1278 | /* | |
1279 | * Convert a single OPA link width (no multiple flags) to an IB value. | |
1280 | * A zero OPA link width means link down, which means the IB width value | |
1281 | * is a don't care. | |
1282 | */ | |
1283 | static inline u16 opa_width_to_ib(u16 in) | |
1284 | { | |
1285 | switch (in) { | |
1286 | case OPA_LINK_WIDTH_1X: | |
1287 | /* map 2x and 3x to 1x as they don't exist in IB */ | |
1288 | case OPA_LINK_WIDTH_2X: | |
1289 | case OPA_LINK_WIDTH_3X: | |
1290 | return IB_WIDTH_1X; | |
1291 | default: /* link down or unknown, return our largest width */ | |
1292 | case OPA_LINK_WIDTH_4X: | |
1293 | return IB_WIDTH_4X; | |
1294 | } | |
1295 | } | |
1296 | ||
45b59eef | 1297 | static int query_port(struct rvt_dev_info *rdi, u8 port_num, |
77241056 MM |
1298 | struct ib_port_attr *props) |
1299 | { | |
45b59eef HC |
1300 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1301 | struct hfi1_devdata *dd = dd_from_dev(verbs_dev); | |
1302 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; | |
77241056 MM |
1303 | u16 lid = ppd->lid; |
1304 | ||
c4550c63 | 1305 | /* props being zeroed by the caller, avoid zeroing it here */ |
77241056 MM |
1306 | props->lid = lid ? lid : 0; |
1307 | props->lmc = ppd->lmc; | |
77241056 MM |
1308 | /* OPA logical states match IB logical states */ |
1309 | props->state = driver_lstate(ppd); | |
1310 | props->phys_state = hfi1_ibphys_portstate(ppd); | |
77241056 | 1311 | props->gid_tbl_len = HFI1_GUIDS_PER_PORT; |
77241056 MM |
1312 | props->active_width = (u8)opa_width_to_ib(ppd->link_width_active); |
1313 | /* see rate_show() in ib core/sysfs.c */ | |
1314 | props->active_speed = (u8)opa_speed_to_ib(ppd->link_speed_active); | |
1315 | props->max_vl_num = ppd->vls_supported; | |
77241056 MM |
1316 | |
1317 | /* Once we are a "first class" citizen and have added the OPA MTUs to | |
1318 | * the core we can advertise the larger MTU enum to the ULPs, for now | |
1319 | * advertise only 4K. | |
1320 | * | |
1321 | * Those applications which are either OPA aware or pass the MTU enum | |
1322 | * from the Path Records to us will get the new 8k MTU. Those that | |
1323 | * attempt to process the MTU enum may fail in various ways. | |
1324 | */ | |
1325 | props->max_mtu = mtu_to_enum((!valid_ib_mtu(hfi1_max_mtu) ? | |
1326 | 4096 : hfi1_max_mtu), IB_MTU_4096); | |
1327 | props->active_mtu = !valid_ib_mtu(ppd->ibmtu) ? props->max_mtu : | |
1328 | mtu_to_enum(ppd->ibmtu, IB_MTU_2048); | |
77241056 MM |
1329 | |
1330 | return 0; | |
1331 | } | |
1332 | ||
1333 | static int modify_device(struct ib_device *device, | |
1334 | int device_modify_mask, | |
1335 | struct ib_device_modify *device_modify) | |
1336 | { | |
1337 | struct hfi1_devdata *dd = dd_from_ibdev(device); | |
1338 | unsigned i; | |
1339 | int ret; | |
1340 | ||
1341 | if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | | |
1342 | IB_DEVICE_MODIFY_NODE_DESC)) { | |
1343 | ret = -EOPNOTSUPP; | |
1344 | goto bail; | |
1345 | } | |
1346 | ||
1347 | if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { | |
bd99fdea YS |
1348 | memcpy(device->node_desc, device_modify->node_desc, |
1349 | IB_DEVICE_NODE_DESC_MAX); | |
77241056 MM |
1350 | for (i = 0; i < dd->num_pports; i++) { |
1351 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1352 | ||
1353 | hfi1_node_desc_chg(ibp); | |
1354 | } | |
1355 | } | |
1356 | ||
1357 | if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) { | |
1358 | ib_hfi1_sys_image_guid = | |
1359 | cpu_to_be64(device_modify->sys_image_guid); | |
1360 | for (i = 0; i < dd->num_pports; i++) { | |
1361 | struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; | |
1362 | ||
1363 | hfi1_sys_guid_chg(ibp); | |
1364 | } | |
1365 | } | |
1366 | ||
1367 | ret = 0; | |
1368 | ||
1369 | bail: | |
1370 | return ret; | |
1371 | } | |
1372 | ||
45b59eef | 1373 | static int shut_down_port(struct rvt_dev_info *rdi, u8 port_num) |
77241056 | 1374 | { |
45b59eef HC |
1375 | struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); |
1376 | struct hfi1_devdata *dd = dd_from_dev(verbs_dev); | |
1377 | struct hfi1_pportdata *ppd = &dd->pport[port_num - 1]; | |
1378 | int ret; | |
77241056 | 1379 | |
45b59eef HC |
1380 | set_link_down_reason(ppd, OPA_LINKDOWN_REASON_UNKNOWN, 0, |
1381 | OPA_LINKDOWN_REASON_UNKNOWN); | |
1382 | ret = set_link_state(ppd, HLS_DN_DOWNDEF); | |
77241056 MM |
1383 | return ret; |
1384 | } | |
1385 | ||
25131463 DD |
1386 | static int hfi1_get_guid_be(struct rvt_dev_info *rdi, struct rvt_ibport *rvp, |
1387 | int guid_index, __be64 *guid) | |
77241056 | 1388 | { |
25131463 | 1389 | struct hfi1_ibport *ibp = container_of(rvp, struct hfi1_ibport, rvp); |
77241056 | 1390 | |
a6cd5f08 | 1391 | if (guid_index >= HFI1_GUIDS_PER_PORT) |
25131463 | 1392 | return -EINVAL; |
77241056 | 1393 | |
a6cd5f08 | 1394 | *guid = get_sguid(ibp, guid_index); |
25131463 | 1395 | return 0; |
77241056 MM |
1396 | } |
1397 | ||
77241056 MM |
1398 | /* |
1399 | * convert ah port,sl to sc | |
1400 | */ | |
1401 | u8 ah_to_sc(struct ib_device *ibdev, struct ib_ah_attr *ah) | |
1402 | { | |
1403 | struct hfi1_ibport *ibp = to_iport(ibdev, ah->port_num); | |
1404 | ||
1405 | return ibp->sl_to_sc[ah->sl]; | |
1406 | } | |
1407 | ||
15723f06 | 1408 | static int hfi1_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr) |
77241056 MM |
1409 | { |
1410 | struct hfi1_ibport *ibp; | |
1411 | struct hfi1_pportdata *ppd; | |
1412 | struct hfi1_devdata *dd; | |
1413 | u8 sc5; | |
1414 | ||
77241056 MM |
1415 | /* test the mapping for validity */ |
1416 | ibp = to_iport(ibdev, ah_attr->port_num); | |
1417 | ppd = ppd_from_ibp(ibp); | |
1418 | sc5 = ibp->sl_to_sc[ah_attr->sl]; | |
1419 | dd = dd_from_ppd(ppd); | |
1420 | if (sc_to_vlt(dd, sc5) > num_vls && sc_to_vlt(dd, sc5) != 0xf) | |
15723f06 | 1421 | return -EINVAL; |
77241056 | 1422 | return 0; |
77241056 MM |
1423 | } |
1424 | ||
8f1764fa DD |
1425 | static void hfi1_notify_new_ah(struct ib_device *ibdev, |
1426 | struct ib_ah_attr *ah_attr, | |
1427 | struct rvt_ah *ah) | |
1428 | { | |
1429 | struct hfi1_ibport *ibp; | |
1430 | struct hfi1_pportdata *ppd; | |
1431 | struct hfi1_devdata *dd; | |
1432 | u8 sc5; | |
1433 | ||
1434 | /* | |
1435 | * Do not trust reading anything from rvt_ah at this point as it is not | |
1436 | * done being setup. We can however modify things which we need to set. | |
1437 | */ | |
1438 | ||
1439 | ibp = to_iport(ibdev, ah_attr->port_num); | |
1440 | ppd = ppd_from_ibp(ibp); | |
1441 | sc5 = ibp->sl_to_sc[ah->attr.sl]; | |
1442 | dd = dd_from_ppd(ppd); | |
1443 | ah->vl = sc_to_vlt(dd, sc5); | |
1444 | if (ah->vl < num_vls || ah->vl == 15) | |
1445 | ah->log_pmtu = ilog2(dd->vld[ah->vl].mtu); | |
1446 | } | |
1447 | ||
77241056 MM |
1448 | struct ib_ah *hfi1_create_qp0_ah(struct hfi1_ibport *ibp, u16 dlid) |
1449 | { | |
1450 | struct ib_ah_attr attr; | |
1451 | struct ib_ah *ah = ERR_PTR(-EINVAL); | |
895420dd | 1452 | struct rvt_qp *qp0; |
77241056 MM |
1453 | |
1454 | memset(&attr, 0, sizeof(attr)); | |
1455 | attr.dlid = dlid; | |
1456 | attr.port_num = ppd_from_ibp(ibp)->port; | |
1457 | rcu_read_lock(); | |
4eb06882 | 1458 | qp0 = rcu_dereference(ibp->rvp.qp[0]); |
77241056 MM |
1459 | if (qp0) |
1460 | ah = ib_create_ah(qp0->ibqp.pd, &attr); | |
1461 | rcu_read_unlock(); | |
1462 | return ah; | |
1463 | } | |
1464 | ||
77241056 MM |
1465 | /** |
1466 | * hfi1_get_npkeys - return the size of the PKEY table for context 0 | |
1467 | * @dd: the hfi1_ib device | |
1468 | */ | |
1469 | unsigned hfi1_get_npkeys(struct hfi1_devdata *dd) | |
1470 | { | |
1471 | return ARRAY_SIZE(dd->pport[0].pkeys); | |
1472 | } | |
1473 | ||
77241056 MM |
1474 | static void init_ibport(struct hfi1_pportdata *ppd) |
1475 | { | |
1476 | struct hfi1_ibport *ibp = &ppd->ibport_data; | |
1477 | size_t sz = ARRAY_SIZE(ibp->sl_to_sc); | |
1478 | int i; | |
1479 | ||
1480 | for (i = 0; i < sz; i++) { | |
1481 | ibp->sl_to_sc[i] = i; | |
1482 | ibp->sc_to_sl[i] = i; | |
1483 | } | |
1484 | ||
4eb06882 | 1485 | spin_lock_init(&ibp->rvp.lock); |
77241056 | 1486 | /* Set the prefix to the default value (see ch. 4.1.1) */ |
4eb06882 DD |
1487 | ibp->rvp.gid_prefix = IB_DEFAULT_GID_PREFIX; |
1488 | ibp->rvp.sm_lid = 0; | |
77241056 | 1489 | /* Below should only set bits defined in OPA PortInfo.CapabilityMask */ |
4eb06882 | 1490 | ibp->rvp.port_cap_flags = IB_PORT_AUTO_MIGR_SUP | |
77241056 | 1491 | IB_PORT_CAP_MASK_NOTICE_SUP; |
4eb06882 DD |
1492 | ibp->rvp.pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA; |
1493 | ibp->rvp.pma_counter_select[1] = IB_PMA_PORT_RCV_DATA; | |
1494 | ibp->rvp.pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS; | |
1495 | ibp->rvp.pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS; | |
1496 | ibp->rvp.pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT; | |
1497 | ||
1498 | RCU_INIT_POINTER(ibp->rvp.qp[0], NULL); | |
1499 | RCU_INIT_POINTER(ibp->rvp.qp[1], NULL); | |
77241056 MM |
1500 | } |
1501 | ||
939b6ca8 IW |
1502 | static void hfi1_get_dev_fw_str(struct ib_device *ibdev, char *str, |
1503 | size_t str_len) | |
1504 | { | |
1505 | struct rvt_dev_info *rdi = ib_to_rvt(ibdev); | |
1506 | struct hfi1_ibdev *dev = dev_from_rdi(rdi); | |
1507 | u16 ver = dd_from_dev(dev)->dc8051_ver; | |
1508 | ||
1509 | snprintf(str, str_len, "%u.%u", dc8051_ver_maj(ver), | |
1510 | dc8051_ver_min(ver)); | |
1511 | } | |
1512 | ||
b7481944 JX |
1513 | static const char * const driver_cntr_names[] = { |
1514 | /* must be element 0*/ | |
1515 | "DRIVER_KernIntr", | |
1516 | "DRIVER_ErrorIntr", | |
1517 | "DRIVER_Tx_Errs", | |
1518 | "DRIVER_Rcv_Errs", | |
1519 | "DRIVER_HW_Errs", | |
1520 | "DRIVER_NoPIOBufs", | |
1521 | "DRIVER_CtxtsOpen", | |
1522 | "DRIVER_RcvLen_Errs", | |
1523 | "DRIVER_EgrBufFull", | |
1524 | "DRIVER_EgrHdrFull" | |
1525 | }; | |
1526 | ||
1527 | static const char **dev_cntr_names; | |
1528 | static const char **port_cntr_names; | |
1529 | static int num_driver_cntrs = ARRAY_SIZE(driver_cntr_names); | |
1530 | static int num_dev_cntrs; | |
1531 | static int num_port_cntrs; | |
1532 | static int cntr_names_initialized; | |
1533 | ||
1534 | /* | |
1535 | * Convert a list of names separated by '\n' into an array of NULL terminated | |
1536 | * strings. Optionally some entries can be reserved in the array to hold extra | |
1537 | * external strings. | |
1538 | */ | |
1539 | static int init_cntr_names(const char *names_in, | |
64b2ae74 | 1540 | const size_t names_len, |
b7481944 JX |
1541 | int num_extra_names, |
1542 | int *num_cntrs, | |
1543 | const char ***cntr_names) | |
1544 | { | |
1545 | char *names_out, *p, **q; | |
1546 | int i, n; | |
1547 | ||
1548 | n = 0; | |
1549 | for (i = 0; i < names_len; i++) | |
1550 | if (names_in[i] == '\n') | |
1551 | n++; | |
1552 | ||
1553 | names_out = kmalloc((n + num_extra_names) * sizeof(char *) + names_len, | |
1554 | GFP_KERNEL); | |
1555 | if (!names_out) { | |
1556 | *num_cntrs = 0; | |
1557 | *cntr_names = NULL; | |
1558 | return -ENOMEM; | |
1559 | } | |
1560 | ||
1561 | p = names_out + (n + num_extra_names) * sizeof(char *); | |
1562 | memcpy(p, names_in, names_len); | |
1563 | ||
1564 | q = (char **)names_out; | |
1565 | for (i = 0; i < n; i++) { | |
1566 | q[i] = p; | |
1567 | p = strchr(p, '\n'); | |
1568 | *p++ = '\0'; | |
1569 | } | |
1570 | ||
1571 | *num_cntrs = n; | |
1572 | *cntr_names = (const char **)names_out; | |
1573 | return 0; | |
1574 | } | |
1575 | ||
1576 | static struct rdma_hw_stats *alloc_hw_stats(struct ib_device *ibdev, | |
1577 | u8 port_num) | |
1578 | { | |
1579 | int i, err; | |
1580 | ||
1581 | if (!cntr_names_initialized) { | |
1582 | struct hfi1_devdata *dd = dd_from_ibdev(ibdev); | |
1583 | ||
1584 | err = init_cntr_names(dd->cntrnames, | |
1585 | dd->cntrnameslen, | |
1586 | num_driver_cntrs, | |
1587 | &num_dev_cntrs, | |
1588 | &dev_cntr_names); | |
1589 | if (err) | |
1590 | return NULL; | |
1591 | ||
1592 | for (i = 0; i < num_driver_cntrs; i++) | |
1593 | dev_cntr_names[num_dev_cntrs + i] = | |
1594 | driver_cntr_names[i]; | |
1595 | ||
1596 | err = init_cntr_names(dd->portcntrnames, | |
1597 | dd->portcntrnameslen, | |
1598 | 0, | |
1599 | &num_port_cntrs, | |
1600 | &port_cntr_names); | |
1601 | if (err) { | |
1602 | kfree(dev_cntr_names); | |
1603 | dev_cntr_names = NULL; | |
1604 | return NULL; | |
1605 | } | |
1606 | cntr_names_initialized = 1; | |
1607 | } | |
1608 | ||
1609 | if (!port_num) | |
1610 | return rdma_alloc_hw_stats_struct( | |
1611 | dev_cntr_names, | |
1612 | num_dev_cntrs + num_driver_cntrs, | |
1613 | RDMA_HW_STATS_DEFAULT_LIFESPAN); | |
1614 | else | |
1615 | return rdma_alloc_hw_stats_struct( | |
1616 | port_cntr_names, | |
1617 | num_port_cntrs, | |
1618 | RDMA_HW_STATS_DEFAULT_LIFESPAN); | |
1619 | } | |
1620 | ||
1621 | static u64 hfi1_sps_ints(void) | |
1622 | { | |
1623 | unsigned long flags; | |
1624 | struct hfi1_devdata *dd; | |
1625 | u64 sps_ints = 0; | |
1626 | ||
1627 | spin_lock_irqsave(&hfi1_devs_lock, flags); | |
1628 | list_for_each_entry(dd, &hfi1_dev_list, list) { | |
1629 | sps_ints += get_all_cpu_total(dd->int_counter); | |
1630 | } | |
1631 | spin_unlock_irqrestore(&hfi1_devs_lock, flags); | |
1632 | return sps_ints; | |
1633 | } | |
1634 | ||
1635 | static int get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats, | |
1636 | u8 port, int index) | |
1637 | { | |
1638 | u64 *values; | |
1639 | int count; | |
1640 | ||
1641 | if (!port) { | |
1642 | u64 *stats = (u64 *)&hfi1_stats; | |
1643 | int i; | |
1644 | ||
1645 | hfi1_read_cntrs(dd_from_ibdev(ibdev), NULL, &values); | |
1646 | values[num_dev_cntrs] = hfi1_sps_ints(); | |
1647 | for (i = 1; i < num_driver_cntrs; i++) | |
1648 | values[num_dev_cntrs + i] = stats[i]; | |
1649 | count = num_dev_cntrs + num_driver_cntrs; | |
1650 | } else { | |
1651 | struct hfi1_ibport *ibp = to_iport(ibdev, port); | |
1652 | ||
1653 | hfi1_read_portcntrs(ppd_from_ibp(ibp), NULL, &values); | |
1654 | count = num_port_cntrs; | |
1655 | } | |
1656 | ||
1657 | memcpy(stats->value, values, count * sizeof(u64)); | |
1658 | return count; | |
1659 | } | |
1660 | ||
77241056 MM |
1661 | /** |
1662 | * hfi1_register_ib_device - register our device with the infiniband core | |
1663 | * @dd: the device data structure | |
1664 | * Return 0 if successful, errno if unsuccessful. | |
1665 | */ | |
1666 | int hfi1_register_ib_device(struct hfi1_devdata *dd) | |
1667 | { | |
1668 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
ec3f2c12 | 1669 | struct ib_device *ibdev = &dev->rdi.ibdev; |
77241056 | 1670 | struct hfi1_pportdata *ppd = dd->pport; |
a6cd5f08 | 1671 | struct hfi1_ibport *ibp = &ppd->ibport_data; |
895420dd | 1672 | unsigned i; |
77241056 MM |
1673 | int ret; |
1674 | size_t lcpysz = IB_DEVICE_NAME_MAX; | |
77241056 | 1675 | |
77241056 MM |
1676 | for (i = 0; i < dd->num_pports; i++) |
1677 | init_ibport(ppd + i); | |
1678 | ||
1679 | /* Only need to initialize non-zero fields. */ | |
4f87ccfc | 1680 | |
045277cf | 1681 | setup_timer(&dev->mem_timer, mem_timer, (unsigned long)dev); |
77241056 | 1682 | |
77241056 | 1683 | seqlock_init(&dev->iowait_lock); |
4e045572 | 1684 | seqlock_init(&dev->txwait_lock); |
77241056 MM |
1685 | INIT_LIST_HEAD(&dev->txwait); |
1686 | INIT_LIST_HEAD(&dev->memwait); | |
1687 | ||
45842abb MM |
1688 | ret = verbs_txreq_init(dev); |
1689 | if (ret) | |
77241056 | 1690 | goto err_verbs_txreq; |
77241056 | 1691 | |
a6cd5f08 JP |
1692 | /* Use first-port GUID as node guid */ |
1693 | ibdev->node_guid = get_sguid(ibp, HFI1_PORT_GUID_INDEX); | |
1694 | ||
77241056 MM |
1695 | /* |
1696 | * The system image GUID is supposed to be the same for all | |
1697 | * HFIs in a single system but since there can be other | |
1698 | * device types in the system, we can't be sure this is unique. | |
1699 | */ | |
1700 | if (!ib_hfi1_sys_image_guid) | |
a6cd5f08 | 1701 | ib_hfi1_sys_image_guid = ibdev->node_guid; |
77241056 MM |
1702 | lcpysz = strlcpy(ibdev->name, class_name(), lcpysz); |
1703 | strlcpy(ibdev->name + lcpysz, "_%d", IB_DEVICE_NAME_MAX - lcpysz); | |
1704 | ibdev->owner = THIS_MODULE; | |
77241056 | 1705 | ibdev->phys_port_cnt = dd->num_pports; |
77241056 | 1706 | ibdev->dma_device = &dd->pcidev->dev; |
77241056 | 1707 | ibdev->modify_device = modify_device; |
b7481944 JX |
1708 | ibdev->alloc_hw_stats = alloc_hw_stats; |
1709 | ibdev->get_hw_stats = get_hw_stats; | |
4331629f DD |
1710 | |
1711 | /* keep process mad in the driver */ | |
77241056 | 1712 | ibdev->process_mad = hfi1_process_mad; |
939b6ca8 | 1713 | ibdev->get_dev_fw_str = hfi1_get_dev_fw_str; |
77241056 MM |
1714 | |
1715 | strncpy(ibdev->node_desc, init_utsname()->nodename, | |
1716 | sizeof(ibdev->node_desc)); | |
1717 | ||
ec3f2c12 DD |
1718 | /* |
1719 | * Fill in rvt info object. | |
1720 | */ | |
1721 | dd->verbs_dev.rdi.driver_f.port_callback = hfi1_create_port_files; | |
49dbb6cf DD |
1722 | dd->verbs_dev.rdi.driver_f.get_card_name = get_card_name; |
1723 | dd->verbs_dev.rdi.driver_f.get_pci_dev = get_pci_dev; | |
15723f06 | 1724 | dd->verbs_dev.rdi.driver_f.check_ah = hfi1_check_ah; |
8f1764fa | 1725 | dd->verbs_dev.rdi.driver_f.notify_new_ah = hfi1_notify_new_ah; |
25131463 | 1726 | dd->verbs_dev.rdi.driver_f.get_guid_be = hfi1_get_guid_be; |
45b59eef HC |
1727 | dd->verbs_dev.rdi.driver_f.query_port_state = query_port; |
1728 | dd->verbs_dev.rdi.driver_f.shut_down_port = shut_down_port; | |
1729 | dd->verbs_dev.rdi.driver_f.cap_mask_chg = hfi1_cap_mask_chg; | |
94d5171c HC |
1730 | /* |
1731 | * Fill in rvt info device attributes. | |
1732 | */ | |
1733 | hfi1_fill_device_attr(dd); | |
a2c2d608 DD |
1734 | |
1735 | /* queue pair */ | |
a2c2d608 DD |
1736 | dd->verbs_dev.rdi.dparms.qp_table_size = hfi1_qp_table_size; |
1737 | dd->verbs_dev.rdi.dparms.qpn_start = 0; | |
1738 | dd->verbs_dev.rdi.dparms.qpn_inc = 1; | |
1739 | dd->verbs_dev.rdi.dparms.qos_shift = dd->qos_shift; | |
1740 | dd->verbs_dev.rdi.dparms.qpn_res_start = kdeth_qp << 16; | |
1741 | dd->verbs_dev.rdi.dparms.qpn_res_end = | |
abd712da | 1742 | dd->verbs_dev.rdi.dparms.qpn_res_start + 65535; |
ec4274f1 DD |
1743 | dd->verbs_dev.rdi.dparms.max_rdma_atomic = HFI1_MAX_RDMA_ATOMIC; |
1744 | dd->verbs_dev.rdi.dparms.psn_mask = PSN_MASK; | |
1745 | dd->verbs_dev.rdi.dparms.psn_shift = PSN_SHIFT; | |
1746 | dd->verbs_dev.rdi.dparms.psn_modify_mask = PSN_MODIFY_MASK; | |
45b59eef HC |
1747 | dd->verbs_dev.rdi.dparms.core_cap_flags = RDMA_CORE_PORT_INTEL_OPA; |
1748 | dd->verbs_dev.rdi.dparms.max_mad_size = OPA_MGMT_MAD_SIZE; | |
1749 | ||
a2c2d608 DD |
1750 | dd->verbs_dev.rdi.driver_f.qp_priv_alloc = qp_priv_alloc; |
1751 | dd->verbs_dev.rdi.driver_f.qp_priv_free = qp_priv_free; | |
1752 | dd->verbs_dev.rdi.driver_f.free_all_qps = free_all_qps; | |
1753 | dd->verbs_dev.rdi.driver_f.notify_qp_reset = notify_qp_reset; | |
83693bd1 DD |
1754 | dd->verbs_dev.rdi.driver_f.do_send = hfi1_do_send; |
1755 | dd->verbs_dev.rdi.driver_f.schedule_send = hfi1_schedule_send; | |
46a80d62 | 1756 | dd->verbs_dev.rdi.driver_f.schedule_send_no_lock = _hfi1_schedule_send; |
ec4274f1 DD |
1757 | dd->verbs_dev.rdi.driver_f.get_pmtu_from_attr = get_pmtu_from_attr; |
1758 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; | |
1759 | dd->verbs_dev.rdi.driver_f.flush_qp_waiters = flush_qp_waiters; | |
1760 | dd->verbs_dev.rdi.driver_f.stop_send_queue = stop_send_queue; | |
1761 | dd->verbs_dev.rdi.driver_f.quiesce_qp = quiesce_qp; | |
1762 | dd->verbs_dev.rdi.driver_f.notify_error_qp = notify_error_qp; | |
1763 | dd->verbs_dev.rdi.driver_f.mtu_from_qp = mtu_from_qp; | |
1764 | dd->verbs_dev.rdi.driver_f.mtu_to_path_mtu = mtu_to_path_mtu; | |
1765 | dd->verbs_dev.rdi.driver_f.check_modify_qp = hfi1_check_modify_qp; | |
1766 | dd->verbs_dev.rdi.driver_f.modify_qp = hfi1_modify_qp; | |
56acbbfb | 1767 | dd->verbs_dev.rdi.driver_f.notify_restart_rc = hfi1_restart_rc; |
46a80d62 | 1768 | dd->verbs_dev.rdi.driver_f.check_send_wqe = hfi1_check_send_wqe; |
a2c2d608 | 1769 | |
abd712da DD |
1770 | /* completeion queue */ |
1771 | snprintf(dd->verbs_dev.rdi.dparms.cq_name, | |
1772 | sizeof(dd->verbs_dev.rdi.dparms.cq_name), | |
1773 | "hfi1_cq%d", dd->unit); | |
27807392 | 1774 | dd->verbs_dev.rdi.dparms.node = dd->node; |
abd712da | 1775 | |
a2c2d608 | 1776 | /* misc settings */ |
abd712da | 1777 | dd->verbs_dev.rdi.flags = 0; /* Let rdmavt handle it all */ |
895420dd | 1778 | dd->verbs_dev.rdi.dparms.lkey_table_size = hfi1_lkey_table_size; |
4eb06882 DD |
1779 | dd->verbs_dev.rdi.dparms.nports = dd->num_pports; |
1780 | dd->verbs_dev.rdi.dparms.npkeys = hfi1_get_npkeys(dd); | |
1781 | ||
1ac57c50 MM |
1782 | /* post send table */ |
1783 | dd->verbs_dev.rdi.post_parms = hfi1_post_parms; | |
1784 | ||
4eb06882 DD |
1785 | ppd = dd->pport; |
1786 | for (i = 0; i < dd->num_pports; i++, ppd++) | |
1787 | rvt_init_port(&dd->verbs_dev.rdi, | |
1788 | &ppd->ibport_data.rvp, | |
1789 | i, | |
1790 | ppd->pkeys); | |
ec3f2c12 DD |
1791 | |
1792 | ret = rvt_register_device(&dd->verbs_dev.rdi); | |
77241056 | 1793 | if (ret) |
9c4a311e | 1794 | goto err_verbs_txreq; |
77241056 MM |
1795 | |
1796 | ret = hfi1_verbs_register_sysfs(dd); | |
1797 | if (ret) | |
1798 | goto err_class; | |
1799 | ||
9c4a311e | 1800 | return ret; |
77241056 MM |
1801 | |
1802 | err_class: | |
ec3f2c12 | 1803 | rvt_unregister_device(&dd->verbs_dev.rdi); |
77241056 | 1804 | err_verbs_txreq: |
45842abb | 1805 | verbs_txreq_exit(dev); |
77241056 | 1806 | dd_dev_err(dd, "cannot register verbs: %d!\n", -ret); |
77241056 MM |
1807 | return ret; |
1808 | } | |
1809 | ||
1810 | void hfi1_unregister_ib_device(struct hfi1_devdata *dd) | |
1811 | { | |
1812 | struct hfi1_ibdev *dev = &dd->verbs_dev; | |
77241056 MM |
1813 | |
1814 | hfi1_verbs_unregister_sysfs(dd); | |
1815 | ||
ec3f2c12 | 1816 | rvt_unregister_device(&dd->verbs_dev.rdi); |
77241056 MM |
1817 | |
1818 | if (!list_empty(&dev->txwait)) | |
1819 | dd_dev_err(dd, "txwait list not empty!\n"); | |
1820 | if (!list_empty(&dev->memwait)) | |
1821 | dd_dev_err(dd, "memwait list not empty!\n"); | |
77241056 | 1822 | |
77241056 | 1823 | del_timer_sync(&dev->mem_timer); |
45842abb | 1824 | verbs_txreq_exit(dev); |
b7481944 JX |
1825 | |
1826 | kfree(dev_cntr_names); | |
1827 | kfree(port_cntr_names); | |
1828 | cntr_names_initialized = 0; | |
77241056 MM |
1829 | } |
1830 | ||
77241056 MM |
1831 | void hfi1_cnp_rcv(struct hfi1_packet *packet) |
1832 | { | |
f3e862cb | 1833 | struct hfi1_ibport *ibp = rcd_to_iport(packet->rcd); |
977940b8 | 1834 | struct hfi1_pportdata *ppd = ppd_from_ibp(ibp); |
261a4351 | 1835 | struct ib_header *hdr = packet->hdr; |
895420dd | 1836 | struct rvt_qp *qp = packet->qp; |
977940b8 AK |
1837 | u32 lqpn, rqpn = 0; |
1838 | u16 rlid = 0; | |
b736a469 | 1839 | u8 sl, sc5, svc_type; |
977940b8 AK |
1840 | |
1841 | switch (packet->qp->ibqp.qp_type) { | |
1842 | case IB_QPT_UC: | |
1843 | rlid = qp->remote_ah_attr.dlid; | |
1844 | rqpn = qp->remote_qpn; | |
1845 | svc_type = IB_CC_SVCTYPE_UC; | |
1846 | break; | |
1847 | case IB_QPT_RC: | |
1848 | rlid = qp->remote_ah_attr.dlid; | |
1849 | rqpn = qp->remote_qpn; | |
1850 | svc_type = IB_CC_SVCTYPE_RC; | |
1851 | break; | |
1852 | case IB_QPT_SMI: | |
1853 | case IB_QPT_GSI: | |
1854 | case IB_QPT_UD: | |
1855 | svc_type = IB_CC_SVCTYPE_UD; | |
1856 | break; | |
1857 | default: | |
4eb06882 | 1858 | ibp->rvp.n_pkt_drops++; |
977940b8 AK |
1859 | return; |
1860 | } | |
1861 | ||
261a4351 | 1862 | sc5 = hdr2sc(hdr, packet->rhf); |
977940b8 AK |
1863 | sl = ibp->sc_to_sl[sc5]; |
1864 | lqpn = qp->ibqp.qp_num; | |
1865 | ||
1866 | process_becn(ppd, sl, rlid, lqpn, rqpn, svc_type); | |
77241056 | 1867 | } |