]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - drivers/infiniband/hw/mlx4/mad.c
425515eb01eabf424e73dd741f50f63e40a8d04e
[mirror_ubuntu-bionic-kernel.git] / drivers / infiniband / hw / mlx4 / mad.c
1 /*
2 * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <rdma/ib_mad.h>
34 #include <rdma/ib_smi.h>
35 #include <rdma/ib_sa.h>
36 #include <rdma/ib_cache.h>
37
38 #include <linux/random.h>
39 #include <linux/mlx4/cmd.h>
40 #include <linux/gfp.h>
41 #include <rdma/ib_pma.h>
42 #include <linux/ip.h>
43 #include <net/ipv6.h>
44
45 #include <linux/mlx4/driver.h>
46 #include "mlx4_ib.h"
47
48 enum {
49 MLX4_IB_VENDOR_CLASS1 = 0x9,
50 MLX4_IB_VENDOR_CLASS2 = 0xa
51 };
52
53 #define MLX4_TUN_SEND_WRID_SHIFT 34
54 #define MLX4_TUN_QPN_SHIFT 32
55 #define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT)
56 #define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT)
57
58 #define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1)
59 #define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3)
60
61 /* Port mgmt change event handling */
62
63 #define GET_BLK_PTR_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.block_ptr)
64 #define GET_MASK_FROM_EQE(eqe) be32_to_cpu(eqe->event.port_mgmt_change.params.tbl_change_info.tbl_entries_mask)
65 #define NUM_IDX_IN_PKEY_TBL_BLK 32
66 #define GUID_TBL_ENTRY_SIZE 8 /* size in bytes */
67 #define GUID_TBL_BLK_NUM_ENTRIES 8
68 #define GUID_TBL_BLK_SIZE (GUID_TBL_ENTRY_SIZE * GUID_TBL_BLK_NUM_ENTRIES)
69
70 struct mlx4_mad_rcv_buf {
71 struct ib_grh grh;
72 u8 payload[256];
73 } __packed;
74
75 struct mlx4_mad_snd_buf {
76 u8 payload[256];
77 } __packed;
78
79 struct mlx4_tunnel_mad {
80 struct ib_grh grh;
81 struct mlx4_ib_tunnel_header hdr;
82 struct ib_mad mad;
83 } __packed;
84
85 struct mlx4_rcv_tunnel_mad {
86 struct mlx4_rcv_tunnel_hdr hdr;
87 struct ib_grh grh;
88 struct ib_mad mad;
89 } __packed;
90
91 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num);
92 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num);
93 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
94 int block, u32 change_bitmap);
95
96 __be64 mlx4_ib_gen_node_guid(void)
97 {
98 #define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
99 return cpu_to_be64(NODE_GUID_HI | prandom_u32());
100 }
101
102 __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
103 {
104 return cpu_to_be64(atomic_inc_return(&ctx->tid)) |
105 cpu_to_be64(0xff00000000000000LL);
106 }
107
108 int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int mad_ifc_flags,
109 int port, const struct ib_wc *in_wc,
110 const struct ib_grh *in_grh,
111 const void *in_mad, void *response_mad)
112 {
113 struct mlx4_cmd_mailbox *inmailbox, *outmailbox;
114 void *inbox;
115 int err;
116 u32 in_modifier = port;
117 u8 op_modifier = 0;
118
119 inmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
120 if (IS_ERR(inmailbox))
121 return PTR_ERR(inmailbox);
122 inbox = inmailbox->buf;
123
124 outmailbox = mlx4_alloc_cmd_mailbox(dev->dev);
125 if (IS_ERR(outmailbox)) {
126 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
127 return PTR_ERR(outmailbox);
128 }
129
130 memcpy(inbox, in_mad, 256);
131
132 /*
133 * Key check traps can't be generated unless we have in_wc to
134 * tell us where to send the trap.
135 */
136 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_MKEY) || !in_wc)
137 op_modifier |= 0x1;
138 if ((mad_ifc_flags & MLX4_MAD_IFC_IGNORE_BKEY) || !in_wc)
139 op_modifier |= 0x2;
140 if (mlx4_is_mfunc(dev->dev) &&
141 (mad_ifc_flags & MLX4_MAD_IFC_NET_VIEW || in_wc))
142 op_modifier |= 0x8;
143
144 if (in_wc) {
145 struct {
146 __be32 my_qpn;
147 u32 reserved1;
148 __be32 rqpn;
149 u8 sl;
150 u8 g_path;
151 u16 reserved2[2];
152 __be16 pkey;
153 u32 reserved3[11];
154 u8 grh[40];
155 } *ext_info;
156
157 memset(inbox + 256, 0, 256);
158 ext_info = inbox + 256;
159
160 ext_info->my_qpn = cpu_to_be32(in_wc->qp->qp_num);
161 ext_info->rqpn = cpu_to_be32(in_wc->src_qp);
162 ext_info->sl = in_wc->sl << 4;
163 ext_info->g_path = in_wc->dlid_path_bits |
164 (in_wc->wc_flags & IB_WC_GRH ? 0x80 : 0);
165 ext_info->pkey = cpu_to_be16(in_wc->pkey_index);
166
167 if (in_grh)
168 memcpy(ext_info->grh, in_grh, 40);
169
170 op_modifier |= 0x4;
171
172 in_modifier |= in_wc->slid << 16;
173 }
174
175 err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma, in_modifier,
176 mlx4_is_master(dev->dev) ? (op_modifier & ~0x8) : op_modifier,
177 MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
178 (op_modifier & 0x8) ? MLX4_CMD_NATIVE : MLX4_CMD_WRAPPED);
179
180 if (!err)
181 memcpy(response_mad, outmailbox->buf, 256);
182
183 mlx4_free_cmd_mailbox(dev->dev, inmailbox);
184 mlx4_free_cmd_mailbox(dev->dev, outmailbox);
185
186 return err;
187 }
188
189 static void update_sm_ah(struct mlx4_ib_dev *dev, u8 port_num, u16 lid, u8 sl)
190 {
191 struct ib_ah *new_ah;
192 struct rdma_ah_attr ah_attr;
193 unsigned long flags;
194
195 if (!dev->send_agent[port_num - 1][0])
196 return;
197
198 memset(&ah_attr, 0, sizeof ah_attr);
199 rdma_ah_set_dlid(&ah_attr, lid);
200 rdma_ah_set_sl(&ah_attr, sl);
201 rdma_ah_set_port_num(&ah_attr, port_num);
202
203 new_ah = rdma_create_ah(dev->send_agent[port_num - 1][0]->qp->pd,
204 &ah_attr);
205 if (IS_ERR(new_ah))
206 return;
207
208 spin_lock_irqsave(&dev->sm_lock, flags);
209 if (dev->sm_ah[port_num - 1])
210 rdma_destroy_ah(dev->sm_ah[port_num - 1]);
211 dev->sm_ah[port_num - 1] = new_ah;
212 spin_unlock_irqrestore(&dev->sm_lock, flags);
213 }
214
215 /*
216 * Snoop SM MADs for port info, GUID info, and P_Key table sets, so we can
217 * synthesize LID change, Client-Rereg, GID change, and P_Key change events.
218 */
219 static void smp_snoop(struct ib_device *ibdev, u8 port_num, const struct ib_mad *mad,
220 u16 prev_lid)
221 {
222 struct ib_port_info *pinfo;
223 u16 lid;
224 __be16 *base;
225 u32 bn, pkey_change_bitmap;
226 int i;
227
228
229 struct mlx4_ib_dev *dev = to_mdev(ibdev);
230 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
231 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
232 mad->mad_hdr.method == IB_MGMT_METHOD_SET)
233 switch (mad->mad_hdr.attr_id) {
234 case IB_SMP_ATTR_PORT_INFO:
235 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
236 return;
237 pinfo = (struct ib_port_info *) ((struct ib_smp *) mad)->data;
238 lid = be16_to_cpu(pinfo->lid);
239
240 update_sm_ah(dev, port_num,
241 be16_to_cpu(pinfo->sm_lid),
242 pinfo->neighbormtu_mastersmsl & 0xf);
243
244 if (pinfo->clientrereg_resv_subnetto & 0x80)
245 handle_client_rereg_event(dev, port_num);
246
247 if (prev_lid != lid)
248 handle_lid_change_event(dev, port_num);
249 break;
250
251 case IB_SMP_ATTR_PKEY_TABLE:
252 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
253 return;
254 if (!mlx4_is_mfunc(dev->dev)) {
255 mlx4_ib_dispatch_event(dev, port_num,
256 IB_EVENT_PKEY_CHANGE);
257 break;
258 }
259
260 /* at this point, we are running in the master.
261 * Slaves do not receive SMPs.
262 */
263 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF;
264 base = (__be16 *) &(((struct ib_smp *)mad)->data[0]);
265 pkey_change_bitmap = 0;
266 for (i = 0; i < 32; i++) {
267 pr_debug("PKEY[%d] = x%x\n",
268 i + bn*32, be16_to_cpu(base[i]));
269 if (be16_to_cpu(base[i]) !=
270 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) {
271 pkey_change_bitmap |= (1 << i);
272 dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] =
273 be16_to_cpu(base[i]);
274 }
275 }
276 pr_debug("PKEY Change event: port=%d, "
277 "block=0x%x, change_bitmap=0x%x\n",
278 port_num, bn, pkey_change_bitmap);
279
280 if (pkey_change_bitmap) {
281 mlx4_ib_dispatch_event(dev, port_num,
282 IB_EVENT_PKEY_CHANGE);
283 if (!dev->sriov.is_going_down)
284 __propagate_pkey_ev(dev, port_num, bn,
285 pkey_change_bitmap);
286 }
287 break;
288
289 case IB_SMP_ATTR_GUID_INFO:
290 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
291 return;
292 /* paravirtualized master's guid is guid 0 -- does not change */
293 if (!mlx4_is_master(dev->dev))
294 mlx4_ib_dispatch_event(dev, port_num,
295 IB_EVENT_GID_CHANGE);
296 /*if master, notify relevant slaves*/
297 if (mlx4_is_master(dev->dev) &&
298 !dev->sriov.is_going_down) {
299 bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod);
300 mlx4_ib_update_cache_on_guid_change(dev, bn, port_num,
301 (u8 *)(&((struct ib_smp *)mad)->data));
302 mlx4_ib_notify_slaves_on_guid_change(dev, bn, port_num,
303 (u8 *)(&((struct ib_smp *)mad)->data));
304 }
305 break;
306
307 case IB_SMP_ATTR_SL_TO_VL_TABLE:
308 /* cache sl to vl mapping changes for use in
309 * filling QP1 LRH VL field when sending packets
310 */
311 if (dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV &&
312 dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)
313 return;
314 if (!mlx4_is_slave(dev->dev)) {
315 union sl2vl_tbl_to_u64 sl2vl64;
316 int jj;
317
318 for (jj = 0; jj < 8; jj++) {
319 sl2vl64.sl8[jj] = ((struct ib_smp *)mad)->data[jj];
320 pr_debug("port %u, sl2vl[%d] = %02x\n",
321 port_num, jj, sl2vl64.sl8[jj]);
322 }
323 atomic64_set(&dev->sl2vl[port_num - 1], sl2vl64.sl64);
324 }
325 break;
326
327 default:
328 break;
329 }
330 }
331
332 static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
333 int block, u32 change_bitmap)
334 {
335 int i, ix, slave, err;
336 int have_event = 0;
337
338 for (slave = 0; slave < dev->dev->caps.sqp_demux; slave++) {
339 if (slave == mlx4_master_func_num(dev->dev))
340 continue;
341 if (!mlx4_is_slave_active(dev->dev, slave))
342 continue;
343
344 have_event = 0;
345 for (i = 0; i < 32; i++) {
346 if (!(change_bitmap & (1 << i)))
347 continue;
348 for (ix = 0;
349 ix < dev->dev->caps.pkey_table_len[port_num]; ix++) {
350 if (dev->pkeys.virt2phys_pkey[slave][port_num - 1]
351 [ix] == i + 32 * block) {
352 err = mlx4_gen_pkey_eqe(dev->dev, slave, port_num);
353 pr_debug("propagate_pkey_ev: slave %d,"
354 " port %d, ix %d (%d)\n",
355 slave, port_num, ix, err);
356 have_event = 1;
357 break;
358 }
359 }
360 if (have_event)
361 break;
362 }
363 }
364 }
365
366 static void node_desc_override(struct ib_device *dev,
367 struct ib_mad *mad)
368 {
369 unsigned long flags;
370
371 if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
372 mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
373 mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP &&
374 mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) {
375 spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags);
376 memcpy(((struct ib_smp *) mad)->data, dev->node_desc,
377 IB_DEVICE_NODE_DESC_MAX);
378 spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags);
379 }
380 }
381
382 static void forward_trap(struct mlx4_ib_dev *dev, u8 port_num, const struct ib_mad *mad)
383 {
384 int qpn = mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED;
385 struct ib_mad_send_buf *send_buf;
386 struct ib_mad_agent *agent = dev->send_agent[port_num - 1][qpn];
387 int ret;
388 unsigned long flags;
389
390 if (agent) {
391 send_buf = ib_create_send_mad(agent, qpn, 0, 0, IB_MGMT_MAD_HDR,
392 IB_MGMT_MAD_DATA, GFP_ATOMIC,
393 IB_MGMT_BASE_VERSION);
394 if (IS_ERR(send_buf))
395 return;
396 /*
397 * We rely here on the fact that MLX QPs don't use the
398 * address handle after the send is posted (this is
399 * wrong following the IB spec strictly, but we know
400 * it's OK for our devices).
401 */
402 spin_lock_irqsave(&dev->sm_lock, flags);
403 memcpy(send_buf->mad, mad, sizeof *mad);
404 if ((send_buf->ah = dev->sm_ah[port_num - 1]))
405 ret = ib_post_send_mad(send_buf, NULL);
406 else
407 ret = -EINVAL;
408 spin_unlock_irqrestore(&dev->sm_lock, flags);
409
410 if (ret)
411 ib_free_send_mad(send_buf);
412 }
413 }
414
415 static int mlx4_ib_demux_sa_handler(struct ib_device *ibdev, int port, int slave,
416 struct ib_sa_mad *sa_mad)
417 {
418 int ret = 0;
419
420 /* dispatch to different sa handlers */
421 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
422 case IB_SA_ATTR_MC_MEMBER_REC:
423 ret = mlx4_ib_mcg_demux_handler(ibdev, port, slave, sa_mad);
424 break;
425 default:
426 break;
427 }
428 return ret;
429 }
430
431 int mlx4_ib_find_real_gid(struct ib_device *ibdev, u8 port, __be64 guid)
432 {
433 struct mlx4_ib_dev *dev = to_mdev(ibdev);
434 int i;
435
436 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
437 if (dev->sriov.demux[port - 1].guid_cache[i] == guid)
438 return i;
439 }
440 return -1;
441 }
442
443
444 static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave,
445 u8 port, u16 pkey, u16 *ix)
446 {
447 int i, ret;
448 u8 unassigned_pkey_ix, pkey_ix, partial_ix = 0xFF;
449 u16 slot_pkey;
450
451 if (slave == mlx4_master_func_num(dev->dev))
452 return ib_find_cached_pkey(&dev->ib_dev, port, pkey, ix);
453
454 unassigned_pkey_ix = dev->dev->phys_caps.pkey_phys_table_len[port] - 1;
455
456 for (i = 0; i < dev->dev->caps.pkey_table_len[port]; i++) {
457 if (dev->pkeys.virt2phys_pkey[slave][port - 1][i] == unassigned_pkey_ix)
458 continue;
459
460 pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][i];
461
462 ret = ib_get_cached_pkey(&dev->ib_dev, port, pkey_ix, &slot_pkey);
463 if (ret)
464 continue;
465 if ((slot_pkey & 0x7FFF) == (pkey & 0x7FFF)) {
466 if (slot_pkey & 0x8000) {
467 *ix = (u16) pkey_ix;
468 return 0;
469 } else {
470 /* take first partial pkey index found */
471 if (partial_ix == 0xFF)
472 partial_ix = pkey_ix;
473 }
474 }
475 }
476
477 if (partial_ix < 0xFF) {
478 *ix = (u16) partial_ix;
479 return 0;
480 }
481
482 return -EINVAL;
483 }
484
485 static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid,
486 union ib_gid *dgid)
487 {
488 int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh);
489 enum rdma_network_type net_type;
490
491 if (version == 4)
492 net_type = RDMA_NETWORK_IPV4;
493 else if (version == 6)
494 net_type = RDMA_NETWORK_IPV6;
495 else
496 return -EINVAL;
497
498 return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type,
499 sgid, dgid);
500 }
501
502 int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port,
503 enum ib_qp_type dest_qpt, struct ib_wc *wc,
504 struct ib_grh *grh, struct ib_mad *mad)
505 {
506 struct ib_sge list;
507 struct ib_ud_wr wr;
508 struct ib_send_wr *bad_wr;
509 struct mlx4_ib_demux_pv_ctx *tun_ctx;
510 struct mlx4_ib_demux_pv_qp *tun_qp;
511 struct mlx4_rcv_tunnel_mad *tun_mad;
512 struct rdma_ah_attr attr;
513 struct ib_ah *ah;
514 struct ib_qp *src_qp = NULL;
515 unsigned tun_tx_ix = 0;
516 int dqpn;
517 int ret = 0;
518 u16 tun_pkey_ix;
519 u16 cached_pkey;
520 u8 is_eth = dev->dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH;
521
522 if (dest_qpt > IB_QPT_GSI)
523 return -EINVAL;
524
525 tun_ctx = dev->sriov.demux[port-1].tun[slave];
526
527 /* check if proxy qp created */
528 if (!tun_ctx || tun_ctx->state != DEMUX_PV_STATE_ACTIVE)
529 return -EAGAIN;
530
531 if (!dest_qpt)
532 tun_qp = &tun_ctx->qp[0];
533 else
534 tun_qp = &tun_ctx->qp[1];
535
536 /* compute P_Key index to put in tunnel header for slave */
537 if (dest_qpt) {
538 u16 pkey_ix;
539 ret = ib_get_cached_pkey(&dev->ib_dev, port, wc->pkey_index, &cached_pkey);
540 if (ret)
541 return -EINVAL;
542
543 ret = find_slave_port_pkey_ix(dev, slave, port, cached_pkey, &pkey_ix);
544 if (ret)
545 return -EINVAL;
546 tun_pkey_ix = pkey_ix;
547 } else
548 tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
549
550 dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1;
551
552 /* get tunnel tx data buf for slave */
553 src_qp = tun_qp->qp;
554
555 /* create ah. Just need an empty one with the port num for the post send.
556 * The driver will set the force loopback bit in post_send */
557 memset(&attr, 0, sizeof attr);
558
559 rdma_ah_set_port_num(&attr, port);
560 if (is_eth) {
561 union ib_gid sgid;
562 union ib_gid dgid;
563
564 if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
565 return -EINVAL;
566 rdma_ah_set_grh(&attr, &dgid, 0, 0, 0, 0);
567 }
568 ah = rdma_create_ah(tun_ctx->pd, &attr);
569 if (IS_ERR(ah))
570 return -ENOMEM;
571
572 /* allocate tunnel tx buf after pass failure returns */
573 spin_lock(&tun_qp->tx_lock);
574 if (tun_qp->tx_ix_head - tun_qp->tx_ix_tail >=
575 (MLX4_NUM_TUNNEL_BUFS - 1))
576 ret = -EAGAIN;
577 else
578 tun_tx_ix = (++tun_qp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
579 spin_unlock(&tun_qp->tx_lock);
580 if (ret)
581 goto end;
582
583 tun_mad = (struct mlx4_rcv_tunnel_mad *) (tun_qp->tx_ring[tun_tx_ix].buf.addr);
584 if (tun_qp->tx_ring[tun_tx_ix].ah)
585 rdma_destroy_ah(tun_qp->tx_ring[tun_tx_ix].ah);
586 tun_qp->tx_ring[tun_tx_ix].ah = ah;
587 ib_dma_sync_single_for_cpu(&dev->ib_dev,
588 tun_qp->tx_ring[tun_tx_ix].buf.map,
589 sizeof (struct mlx4_rcv_tunnel_mad),
590 DMA_TO_DEVICE);
591
592 /* copy over to tunnel buffer */
593 if (grh)
594 memcpy(&tun_mad->grh, grh, sizeof *grh);
595 memcpy(&tun_mad->mad, mad, sizeof *mad);
596
597 /* adjust tunnel data */
598 tun_mad->hdr.pkey_index = cpu_to_be16(tun_pkey_ix);
599 tun_mad->hdr.flags_src_qp = cpu_to_be32(wc->src_qp & 0xFFFFFF);
600 tun_mad->hdr.g_ml_path = (grh && (wc->wc_flags & IB_WC_GRH)) ? 0x80 : 0;
601
602 if (is_eth) {
603 u16 vlan = 0;
604 if (mlx4_get_slave_default_vlan(dev->dev, port, slave, &vlan,
605 NULL)) {
606 /* VST mode */
607 if (vlan != wc->vlan_id)
608 /* Packet vlan is not the VST-assigned vlan.
609 * Drop the packet.
610 */
611 goto out;
612 else
613 /* Remove the vlan tag before forwarding
614 * the packet to the VF.
615 */
616 vlan = 0xffff;
617 } else {
618 vlan = wc->vlan_id;
619 }
620
621 tun_mad->hdr.sl_vid = cpu_to_be16(vlan);
622 memcpy((char *)&tun_mad->hdr.mac_31_0, &(wc->smac[0]), 4);
623 memcpy((char *)&tun_mad->hdr.slid_mac_47_32, &(wc->smac[4]), 2);
624 } else {
625 tun_mad->hdr.sl_vid = cpu_to_be16(((u16)(wc->sl)) << 12);
626 tun_mad->hdr.slid_mac_47_32 = cpu_to_be16(wc->slid);
627 }
628
629 ib_dma_sync_single_for_device(&dev->ib_dev,
630 tun_qp->tx_ring[tun_tx_ix].buf.map,
631 sizeof (struct mlx4_rcv_tunnel_mad),
632 DMA_TO_DEVICE);
633
634 list.addr = tun_qp->tx_ring[tun_tx_ix].buf.map;
635 list.length = sizeof (struct mlx4_rcv_tunnel_mad);
636 list.lkey = tun_ctx->pd->local_dma_lkey;
637
638 wr.ah = ah;
639 wr.port_num = port;
640 wr.remote_qkey = IB_QP_SET_QKEY;
641 wr.remote_qpn = dqpn;
642 wr.wr.next = NULL;
643 wr.wr.wr_id = ((u64) tun_tx_ix) | MLX4_TUN_SET_WRID_QPN(dest_qpt);
644 wr.wr.sg_list = &list;
645 wr.wr.num_sge = 1;
646 wr.wr.opcode = IB_WR_SEND;
647 wr.wr.send_flags = IB_SEND_SIGNALED;
648
649 ret = ib_post_send(src_qp, &wr.wr, &bad_wr);
650 if (!ret)
651 return 0;
652 out:
653 spin_lock(&tun_qp->tx_lock);
654 tun_qp->tx_ix_tail++;
655 spin_unlock(&tun_qp->tx_lock);
656 tun_qp->tx_ring[tun_tx_ix].ah = NULL;
657 end:
658 rdma_destroy_ah(ah);
659 return ret;
660 }
661
662 static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port,
663 struct ib_wc *wc, struct ib_grh *grh,
664 struct ib_mad *mad)
665 {
666 struct mlx4_ib_dev *dev = to_mdev(ibdev);
667 int err, other_port;
668 int slave = -1;
669 u8 *slave_id;
670 int is_eth = 0;
671
672 if (rdma_port_get_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND)
673 is_eth = 0;
674 else
675 is_eth = 1;
676
677 if (is_eth) {
678 union ib_gid dgid;
679 union ib_gid sgid;
680
681 if (get_gids_from_l3_hdr(grh, &sgid, &dgid))
682 return -EINVAL;
683 if (!(wc->wc_flags & IB_WC_GRH)) {
684 mlx4_ib_warn(ibdev, "RoCE grh not present.\n");
685 return -EINVAL;
686 }
687 if (mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_CM) {
688 mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n");
689 return -EINVAL;
690 }
691 err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave);
692 if (err && mlx4_is_mf_bonded(dev->dev)) {
693 other_port = (port == 1) ? 2 : 1;
694 err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave);
695 if (!err) {
696 port = other_port;
697 pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n",
698 slave, grh->dgid.raw, port, other_port);
699 }
700 }
701 if (err) {
702 mlx4_ib_warn(ibdev, "failed matching grh\n");
703 return -ENOENT;
704 }
705 if (slave >= dev->dev->caps.sqp_demux) {
706 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
707 slave, dev->dev->caps.sqp_demux);
708 return -ENOENT;
709 }
710
711 if (mlx4_ib_demux_cm_handler(ibdev, port, NULL, mad))
712 return 0;
713
714 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
715 if (err)
716 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
717 slave, err);
718 return 0;
719 }
720
721 /* Initially assume that this mad is for us */
722 slave = mlx4_master_func_num(dev->dev);
723
724 /* See if the slave id is encoded in a response mad */
725 if (mad->mad_hdr.method & 0x80) {
726 slave_id = (u8 *) &mad->mad_hdr.tid;
727 slave = *slave_id;
728 if (slave != 255) /*255 indicates the dom0*/
729 *slave_id = 0; /* remap tid */
730 }
731
732 /* If a grh is present, we demux according to it */
733 if (wc->wc_flags & IB_WC_GRH) {
734 if (grh->dgid.global.interface_id ==
735 cpu_to_be64(IB_SA_WELL_KNOWN_GUID) &&
736 grh->dgid.global.subnet_prefix == cpu_to_be64(
737 atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) {
738 slave = 0;
739 } else {
740 slave = mlx4_ib_find_real_gid(ibdev, port,
741 grh->dgid.global.interface_id);
742 if (slave < 0) {
743 mlx4_ib_warn(ibdev, "failed matching grh\n");
744 return -ENOENT;
745 }
746 }
747 }
748 /* Class-specific handling */
749 switch (mad->mad_hdr.mgmt_class) {
750 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
751 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
752 /* 255 indicates the dom0 */
753 if (slave != 255 && slave != mlx4_master_func_num(dev->dev)) {
754 if (!mlx4_vf_smi_enabled(dev->dev, slave, port))
755 return -EPERM;
756 /* for a VF. drop unsolicited MADs */
757 if (!(mad->mad_hdr.method & IB_MGMT_METHOD_RESP)) {
758 mlx4_ib_warn(ibdev, "demux QP0. rejecting unsolicited mad for slave %d class 0x%x, method 0x%x\n",
759 slave, mad->mad_hdr.mgmt_class,
760 mad->mad_hdr.method);
761 return -EINVAL;
762 }
763 }
764 break;
765 case IB_MGMT_CLASS_SUBN_ADM:
766 if (mlx4_ib_demux_sa_handler(ibdev, port, slave,
767 (struct ib_sa_mad *) mad))
768 return 0;
769 break;
770 case IB_MGMT_CLASS_CM:
771 if (mlx4_ib_demux_cm_handler(ibdev, port, &slave, mad))
772 return 0;
773 break;
774 case IB_MGMT_CLASS_DEVICE_MGMT:
775 if (mad->mad_hdr.method != IB_MGMT_METHOD_GET_RESP)
776 return 0;
777 break;
778 default:
779 /* Drop unsupported classes for slaves in tunnel mode */
780 if (slave != mlx4_master_func_num(dev->dev)) {
781 pr_debug("dropping unsupported ingress mad from class:%d "
782 "for slave:%d\n", mad->mad_hdr.mgmt_class, slave);
783 return 0;
784 }
785 }
786 /*make sure that no slave==255 was not handled yet.*/
787 if (slave >= dev->dev->caps.sqp_demux) {
788 mlx4_ib_warn(ibdev, "slave id: %d is bigger than allowed:%d\n",
789 slave, dev->dev->caps.sqp_demux);
790 return -ENOENT;
791 }
792
793 err = mlx4_ib_send_to_slave(dev, slave, port, wc->qp->qp_type, wc, grh, mad);
794 if (err)
795 pr_debug("failed sending to slave %d via tunnel qp (%d)\n",
796 slave, err);
797 return 0;
798 }
799
800 static int ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
801 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
802 const struct ib_mad *in_mad, struct ib_mad *out_mad)
803 {
804 u16 slid, prev_lid = 0;
805 int err;
806 struct ib_port_attr pattr;
807
808 if (in_wc && in_wc->qp->qp_num) {
809 pr_debug("received MAD: slid:%d sqpn:%d "
810 "dlid_bits:%d dqpn:%d wc_flags:0x%x, cls %x, mtd %x, atr %x\n",
811 in_wc->slid, in_wc->src_qp,
812 in_wc->dlid_path_bits,
813 in_wc->qp->qp_num,
814 in_wc->wc_flags,
815 in_mad->mad_hdr.mgmt_class, in_mad->mad_hdr.method,
816 be16_to_cpu(in_mad->mad_hdr.attr_id));
817 if (in_wc->wc_flags & IB_WC_GRH) {
818 pr_debug("sgid_hi:0x%016llx sgid_lo:0x%016llx\n",
819 be64_to_cpu(in_grh->sgid.global.subnet_prefix),
820 be64_to_cpu(in_grh->sgid.global.interface_id));
821 pr_debug("dgid_hi:0x%016llx dgid_lo:0x%016llx\n",
822 be64_to_cpu(in_grh->dgid.global.subnet_prefix),
823 be64_to_cpu(in_grh->dgid.global.interface_id));
824 }
825 }
826
827 slid = in_wc ? in_wc->slid : be16_to_cpu(IB_LID_PERMISSIVE);
828
829 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP && slid == 0) {
830 forward_trap(to_mdev(ibdev), port_num, in_mad);
831 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
832 }
833
834 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
835 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
836 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
837 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET &&
838 in_mad->mad_hdr.method != IB_MGMT_METHOD_TRAP_REPRESS)
839 return IB_MAD_RESULT_SUCCESS;
840
841 /*
842 * Don't process SMInfo queries -- the SMA can't handle them.
843 */
844 if (in_mad->mad_hdr.attr_id == IB_SMP_ATTR_SM_INFO)
845 return IB_MAD_RESULT_SUCCESS;
846 } else if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT ||
847 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS1 ||
848 in_mad->mad_hdr.mgmt_class == MLX4_IB_VENDOR_CLASS2 ||
849 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_CONG_MGMT) {
850 if (in_mad->mad_hdr.method != IB_MGMT_METHOD_GET &&
851 in_mad->mad_hdr.method != IB_MGMT_METHOD_SET)
852 return IB_MAD_RESULT_SUCCESS;
853 } else
854 return IB_MAD_RESULT_SUCCESS;
855
856 if ((in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED ||
857 in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) &&
858 in_mad->mad_hdr.method == IB_MGMT_METHOD_SET &&
859 in_mad->mad_hdr.attr_id == IB_SMP_ATTR_PORT_INFO &&
860 !ib_query_port(ibdev, port_num, &pattr))
861 prev_lid = pattr.lid;
862
863 err = mlx4_MAD_IFC(to_mdev(ibdev),
864 (mad_flags & IB_MAD_IGNORE_MKEY ? MLX4_MAD_IFC_IGNORE_MKEY : 0) |
865 (mad_flags & IB_MAD_IGNORE_BKEY ? MLX4_MAD_IFC_IGNORE_BKEY : 0) |
866 MLX4_MAD_IFC_NET_VIEW,
867 port_num, in_wc, in_grh, in_mad, out_mad);
868 if (err)
869 return IB_MAD_RESULT_FAILURE;
870
871 if (!out_mad->mad_hdr.status) {
872 smp_snoop(ibdev, port_num, in_mad, prev_lid);
873 /* slaves get node desc from FW */
874 if (!mlx4_is_slave(to_mdev(ibdev)->dev))
875 node_desc_override(ibdev, out_mad);
876 }
877
878 /* set return bit in status of directed route responses */
879 if (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
880 out_mad->mad_hdr.status |= cpu_to_be16(1 << 15);
881
882 if (in_mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS)
883 /* no response for trap repress */
884 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED;
885
886 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
887 }
888
889 static void edit_counter(struct mlx4_counter *cnt, void *counters,
890 __be16 attr_id)
891 {
892 switch (attr_id) {
893 case IB_PMA_PORT_COUNTERS:
894 {
895 struct ib_pma_portcounters *pma_cnt =
896 (struct ib_pma_portcounters *)counters;
897
898 ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_data,
899 (be64_to_cpu(cnt->tx_bytes) >> 2));
900 ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_data,
901 (be64_to_cpu(cnt->rx_bytes) >> 2));
902 ASSIGN_32BIT_COUNTER(pma_cnt->port_xmit_packets,
903 be64_to_cpu(cnt->tx_frames));
904 ASSIGN_32BIT_COUNTER(pma_cnt->port_rcv_packets,
905 be64_to_cpu(cnt->rx_frames));
906 break;
907 }
908 case IB_PMA_PORT_COUNTERS_EXT:
909 {
910 struct ib_pma_portcounters_ext *pma_cnt_ext =
911 (struct ib_pma_portcounters_ext *)counters;
912
913 pma_cnt_ext->port_xmit_data =
914 cpu_to_be64(be64_to_cpu(cnt->tx_bytes) >> 2);
915 pma_cnt_ext->port_rcv_data =
916 cpu_to_be64(be64_to_cpu(cnt->rx_bytes) >> 2);
917 pma_cnt_ext->port_xmit_packets = cnt->tx_frames;
918 pma_cnt_ext->port_rcv_packets = cnt->rx_frames;
919 break;
920 }
921 }
922 }
923
924 static int iboe_process_mad_port_info(void *out_mad)
925 {
926 struct ib_class_port_info cpi = {};
927
928 cpi.capability_mask = IB_PMA_CLASS_CAP_EXT_WIDTH;
929 memcpy(out_mad, &cpi, sizeof(cpi));
930 return IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
931 }
932
933 static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
934 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
935 const struct ib_mad *in_mad, struct ib_mad *out_mad)
936 {
937 struct mlx4_counter counter_stats;
938 struct mlx4_ib_dev *dev = to_mdev(ibdev);
939 struct counter_index *tmp_counter;
940 int err = IB_MAD_RESULT_FAILURE, stats_avail = 0;
941
942 if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_PERF_MGMT)
943 return -EINVAL;
944
945 if (in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)
946 return iboe_process_mad_port_info((void *)(out_mad->data + 40));
947
948 memset(&counter_stats, 0, sizeof(counter_stats));
949 mutex_lock(&dev->counters_table[port_num - 1].mutex);
950 list_for_each_entry(tmp_counter,
951 &dev->counters_table[port_num - 1].counters_list,
952 list) {
953 err = mlx4_get_counter_stats(dev->dev,
954 tmp_counter->index,
955 &counter_stats, 0);
956 if (err) {
957 err = IB_MAD_RESULT_FAILURE;
958 stats_avail = 0;
959 break;
960 }
961 stats_avail = 1;
962 }
963 mutex_unlock(&dev->counters_table[port_num - 1].mutex);
964 if (stats_avail) {
965 memset(out_mad->data, 0, sizeof out_mad->data);
966 switch (counter_stats.counter_mode & 0xf) {
967 case 0:
968 edit_counter(&counter_stats,
969 (void *)(out_mad->data + 40),
970 in_mad->mad_hdr.attr_id);
971 err = IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY;
972 break;
973 default:
974 err = IB_MAD_RESULT_FAILURE;
975 }
976 }
977
978 return err;
979 }
980
981 int mlx4_ib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
982 const struct ib_wc *in_wc, const struct ib_grh *in_grh,
983 const struct ib_mad_hdr *in, size_t in_mad_size,
984 struct ib_mad_hdr *out, size_t *out_mad_size,
985 u16 *out_mad_pkey_index)
986 {
987 struct mlx4_ib_dev *dev = to_mdev(ibdev);
988 const struct ib_mad *in_mad = (const struct ib_mad *)in;
989 struct ib_mad *out_mad = (struct ib_mad *)out;
990 enum rdma_link_layer link = rdma_port_get_link_layer(ibdev, port_num);
991
992 if (WARN_ON_ONCE(in_mad_size != sizeof(*in_mad) ||
993 *out_mad_size != sizeof(*out_mad)))
994 return IB_MAD_RESULT_FAILURE;
995
996 /* iboe_process_mad() which uses the HCA flow-counters to implement IB PMA
997 * queries, should be called only by VFs and for that specific purpose
998 */
999 if (link == IB_LINK_LAYER_INFINIBAND) {
1000 if (mlx4_is_slave(dev->dev) &&
1001 (in_mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_PERF_MGMT &&
1002 (in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS ||
1003 in_mad->mad_hdr.attr_id == IB_PMA_PORT_COUNTERS_EXT ||
1004 in_mad->mad_hdr.attr_id == IB_PMA_CLASS_PORT_INFO)))
1005 return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
1006 in_grh, in_mad, out_mad);
1007
1008 return ib_process_mad(ibdev, mad_flags, port_num, in_wc,
1009 in_grh, in_mad, out_mad);
1010 }
1011
1012 if (link == IB_LINK_LAYER_ETHERNET)
1013 return iboe_process_mad(ibdev, mad_flags, port_num, in_wc,
1014 in_grh, in_mad, out_mad);
1015
1016 return -EINVAL;
1017 }
1018
1019 static void send_handler(struct ib_mad_agent *agent,
1020 struct ib_mad_send_wc *mad_send_wc)
1021 {
1022 if (mad_send_wc->send_buf->context[0])
1023 rdma_destroy_ah(mad_send_wc->send_buf->context[0]);
1024 ib_free_send_mad(mad_send_wc->send_buf);
1025 }
1026
1027 int mlx4_ib_mad_init(struct mlx4_ib_dev *dev)
1028 {
1029 struct ib_mad_agent *agent;
1030 int p, q;
1031 int ret;
1032 enum rdma_link_layer ll;
1033
1034 for (p = 0; p < dev->num_ports; ++p) {
1035 ll = rdma_port_get_link_layer(&dev->ib_dev, p + 1);
1036 for (q = 0; q <= 1; ++q) {
1037 if (ll == IB_LINK_LAYER_INFINIBAND) {
1038 agent = ib_register_mad_agent(&dev->ib_dev, p + 1,
1039 q ? IB_QPT_GSI : IB_QPT_SMI,
1040 NULL, 0, send_handler,
1041 NULL, NULL, 0);
1042 if (IS_ERR(agent)) {
1043 ret = PTR_ERR(agent);
1044 goto err;
1045 }
1046 dev->send_agent[p][q] = agent;
1047 } else
1048 dev->send_agent[p][q] = NULL;
1049 }
1050 }
1051
1052 return 0;
1053
1054 err:
1055 for (p = 0; p < dev->num_ports; ++p)
1056 for (q = 0; q <= 1; ++q)
1057 if (dev->send_agent[p][q])
1058 ib_unregister_mad_agent(dev->send_agent[p][q]);
1059
1060 return ret;
1061 }
1062
1063 void mlx4_ib_mad_cleanup(struct mlx4_ib_dev *dev)
1064 {
1065 struct ib_mad_agent *agent;
1066 int p, q;
1067
1068 for (p = 0; p < dev->num_ports; ++p) {
1069 for (q = 0; q <= 1; ++q) {
1070 agent = dev->send_agent[p][q];
1071 if (agent) {
1072 dev->send_agent[p][q] = NULL;
1073 ib_unregister_mad_agent(agent);
1074 }
1075 }
1076
1077 if (dev->sm_ah[p])
1078 rdma_destroy_ah(dev->sm_ah[p]);
1079 }
1080 }
1081
1082 static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num)
1083 {
1084 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_LID_CHANGE);
1085
1086 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
1087 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
1088 MLX4_EQ_PORT_INFO_LID_CHANGE_MASK);
1089 }
1090
1091 static void handle_client_rereg_event(struct mlx4_ib_dev *dev, u8 port_num)
1092 {
1093 /* re-configure the alias-guid and mcg's */
1094 if (mlx4_is_master(dev->dev)) {
1095 mlx4_ib_invalidate_all_guid_record(dev, port_num);
1096
1097 if (!dev->sriov.is_going_down) {
1098 mlx4_ib_mcg_port_cleanup(&dev->sriov.demux[port_num - 1], 0);
1099 mlx4_gen_slaves_port_mgt_ev(dev->dev, port_num,
1100 MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK);
1101 }
1102 }
1103
1104 /* Update the sl to vl table from inside client rereg
1105 * only if in secure-host mode (snooping is not possible)
1106 * and the sl-to-vl change event is not generated by FW.
1107 */
1108 if (!mlx4_is_slave(dev->dev) &&
1109 dev->dev->flags & MLX4_FLAG_SECURE_HOST &&
1110 !(dev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SL_TO_VL_CHANGE_EVENT)) {
1111 if (mlx4_is_master(dev->dev))
1112 /* already in work queue from mlx4_ib_event queueing
1113 * mlx4_handle_port_mgmt_change_event, which calls
1114 * this procedure. Therefore, call sl2vl_update directly.
1115 */
1116 mlx4_ib_sl2vl_update(dev, port_num);
1117 else
1118 mlx4_sched_ib_sl2vl_update_work(dev, port_num);
1119 }
1120 mlx4_ib_dispatch_event(dev, port_num, IB_EVENT_CLIENT_REREGISTER);
1121 }
1122
1123 static void propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
1124 struct mlx4_eqe *eqe)
1125 {
1126 __propagate_pkey_ev(dev, port_num, GET_BLK_PTR_FROM_EQE(eqe),
1127 GET_MASK_FROM_EQE(eqe));
1128 }
1129
1130 static void handle_slaves_guid_change(struct mlx4_ib_dev *dev, u8 port_num,
1131 u32 guid_tbl_blk_num, u32 change_bitmap)
1132 {
1133 struct ib_smp *in_mad = NULL;
1134 struct ib_smp *out_mad = NULL;
1135 u16 i;
1136
1137 if (!mlx4_is_mfunc(dev->dev) || !mlx4_is_master(dev->dev))
1138 return;
1139
1140 in_mad = kmalloc(sizeof *in_mad, GFP_KERNEL);
1141 out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
1142 if (!in_mad || !out_mad)
1143 goto out;
1144
1145 guid_tbl_blk_num *= 4;
1146
1147 for (i = 0; i < 4; i++) {
1148 if (change_bitmap && (!((change_bitmap >> (8 * i)) & 0xff)))
1149 continue;
1150 memset(in_mad, 0, sizeof *in_mad);
1151 memset(out_mad, 0, sizeof *out_mad);
1152
1153 in_mad->base_version = 1;
1154 in_mad->mgmt_class = IB_MGMT_CLASS_SUBN_LID_ROUTED;
1155 in_mad->class_version = 1;
1156 in_mad->method = IB_MGMT_METHOD_GET;
1157 in_mad->attr_id = IB_SMP_ATTR_GUID_INFO;
1158 in_mad->attr_mod = cpu_to_be32(guid_tbl_blk_num + i);
1159
1160 if (mlx4_MAD_IFC(dev,
1161 MLX4_MAD_IFC_IGNORE_KEYS | MLX4_MAD_IFC_NET_VIEW,
1162 port_num, NULL, NULL, in_mad, out_mad)) {
1163 mlx4_ib_warn(&dev->ib_dev, "Failed in get GUID INFO MAD_IFC\n");
1164 goto out;
1165 }
1166
1167 mlx4_ib_update_cache_on_guid_change(dev, guid_tbl_blk_num + i,
1168 port_num,
1169 (u8 *)(&((struct ib_smp *)out_mad)->data));
1170 mlx4_ib_notify_slaves_on_guid_change(dev, guid_tbl_blk_num + i,
1171 port_num,
1172 (u8 *)(&((struct ib_smp *)out_mad)->data));
1173 }
1174
1175 out:
1176 kfree(in_mad);
1177 kfree(out_mad);
1178 return;
1179 }
1180
1181 void handle_port_mgmt_change_event(struct work_struct *work)
1182 {
1183 struct ib_event_work *ew = container_of(work, struct ib_event_work, work);
1184 struct mlx4_ib_dev *dev = ew->ib_dev;
1185 struct mlx4_eqe *eqe = &(ew->ib_eqe);
1186 u8 port = eqe->event.port_mgmt_change.port;
1187 u32 changed_attr;
1188 u32 tbl_block;
1189 u32 change_bitmap;
1190
1191 switch (eqe->subtype) {
1192 case MLX4_DEV_PMC_SUBTYPE_PORT_INFO:
1193 changed_attr = be32_to_cpu(eqe->event.port_mgmt_change.params.port_info.changed_attr);
1194
1195 /* Update the SM ah - This should be done before handling
1196 the other changed attributes so that MADs can be sent to the SM */
1197 if (changed_attr & MSTR_SM_CHANGE_MASK) {
1198 u16 lid = be16_to_cpu(eqe->event.port_mgmt_change.params.port_info.mstr_sm_lid);
1199 u8 sl = eqe->event.port_mgmt_change.params.port_info.mstr_sm_sl & 0xf;
1200 update_sm_ah(dev, port, lid, sl);
1201 }
1202
1203 /* Check if it is a lid change event */
1204 if (changed_attr & MLX4_EQ_PORT_INFO_LID_CHANGE_MASK)
1205 handle_lid_change_event(dev, port);
1206
1207 /* Generate GUID changed event */
1208 if (changed_attr & MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK) {
1209 if (mlx4_is_master(dev->dev)) {
1210 union ib_gid gid;
1211 int err = 0;
1212
1213 if (!eqe->event.port_mgmt_change.params.port_info.gid_prefix)
1214 err = __mlx4_ib_query_gid(&dev->ib_dev, port, 0, &gid, 1);
1215 else
1216 gid.global.subnet_prefix =
1217 eqe->event.port_mgmt_change.params.port_info.gid_prefix;
1218 if (err) {
1219 pr_warn("Could not change QP1 subnet prefix for port %d: query_gid error (%d)\n",
1220 port, err);
1221 } else {
1222 pr_debug("Changing QP1 subnet prefix for port %d. old=0x%llx. new=0x%llx\n",
1223 port,
1224 (u64)atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix),
1225 be64_to_cpu(gid.global.subnet_prefix));
1226 atomic64_set(&dev->sriov.demux[port - 1].subnet_prefix,
1227 be64_to_cpu(gid.global.subnet_prefix));
1228 }
1229 }
1230 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
1231 /*if master, notify all slaves*/
1232 if (mlx4_is_master(dev->dev))
1233 mlx4_gen_slaves_port_mgt_ev(dev->dev, port,
1234 MLX4_EQ_PORT_INFO_GID_PFX_CHANGE_MASK);
1235 }
1236
1237 if (changed_attr & MLX4_EQ_PORT_INFO_CLIENT_REREG_MASK)
1238 handle_client_rereg_event(dev, port);
1239 break;
1240
1241 case MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE:
1242 mlx4_ib_dispatch_event(dev, port, IB_EVENT_PKEY_CHANGE);
1243 if (mlx4_is_master(dev->dev) && !dev->sriov.is_going_down)
1244 propagate_pkey_ev(dev, port, eqe);
1245 break;
1246 case MLX4_DEV_PMC_SUBTYPE_GUID_INFO:
1247 /* paravirtualized master's guid is guid 0 -- does not change */
1248 if (!mlx4_is_master(dev->dev))
1249 mlx4_ib_dispatch_event(dev, port, IB_EVENT_GID_CHANGE);
1250 /*if master, notify relevant slaves*/
1251 else if (!dev->sriov.is_going_down) {
1252 tbl_block = GET_BLK_PTR_FROM_EQE(eqe);
1253 change_bitmap = GET_MASK_FROM_EQE(eqe);
1254 handle_slaves_guid_change(dev, port, tbl_block, change_bitmap);
1255 }
1256 break;
1257
1258 case MLX4_DEV_PMC_SUBTYPE_SL_TO_VL_MAP:
1259 /* cache sl to vl mapping changes for use in
1260 * filling QP1 LRH VL field when sending packets
1261 */
1262 if (!mlx4_is_slave(dev->dev)) {
1263 union sl2vl_tbl_to_u64 sl2vl64;
1264 int jj;
1265
1266 for (jj = 0; jj < 8; jj++) {
1267 sl2vl64.sl8[jj] =
1268 eqe->event.port_mgmt_change.params.sl2vl_tbl_change_info.sl2vl_table[jj];
1269 pr_debug("port %u, sl2vl[%d] = %02x\n",
1270 port, jj, sl2vl64.sl8[jj]);
1271 }
1272 atomic64_set(&dev->sl2vl[port - 1], sl2vl64.sl64);
1273 }
1274 break;
1275 default:
1276 pr_warn("Unsupported subtype 0x%x for "
1277 "Port Management Change event\n", eqe->subtype);
1278 }
1279
1280 kfree(ew);
1281 }
1282
1283 void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num,
1284 enum ib_event_type type)
1285 {
1286 struct ib_event event;
1287
1288 event.device = &dev->ib_dev;
1289 event.element.port_num = port_num;
1290 event.event = type;
1291
1292 ib_dispatch_event(&event);
1293 }
1294
1295 static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg)
1296 {
1297 unsigned long flags;
1298 struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context;
1299 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1300 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
1301 if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE)
1302 queue_work(ctx->wq, &ctx->work);
1303 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
1304 }
1305
1306 static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx,
1307 struct mlx4_ib_demux_pv_qp *tun_qp,
1308 int index)
1309 {
1310 struct ib_sge sg_list;
1311 struct ib_recv_wr recv_wr, *bad_recv_wr;
1312 int size;
1313
1314 size = (tun_qp->qp->qp_type == IB_QPT_UD) ?
1315 sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf);
1316
1317 sg_list.addr = tun_qp->ring[index].map;
1318 sg_list.length = size;
1319 sg_list.lkey = ctx->pd->local_dma_lkey;
1320
1321 recv_wr.next = NULL;
1322 recv_wr.sg_list = &sg_list;
1323 recv_wr.num_sge = 1;
1324 recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV |
1325 MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt);
1326 ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map,
1327 size, DMA_FROM_DEVICE);
1328 return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr);
1329 }
1330
1331 static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port,
1332 int slave, struct ib_sa_mad *sa_mad)
1333 {
1334 int ret = 0;
1335
1336 /* dispatch to different sa handlers */
1337 switch (be16_to_cpu(sa_mad->mad_hdr.attr_id)) {
1338 case IB_SA_ATTR_MC_MEMBER_REC:
1339 ret = mlx4_ib_mcg_multiplex_handler(ibdev, port, slave, sa_mad);
1340 break;
1341 default:
1342 break;
1343 }
1344 return ret;
1345 }
1346
1347 static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave)
1348 {
1349 int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave;
1350
1351 return (qpn >= proxy_start && qpn <= proxy_start + 1);
1352 }
1353
1354
1355 int mlx4_ib_send_to_wire(struct mlx4_ib_dev *dev, int slave, u8 port,
1356 enum ib_qp_type dest_qpt, u16 pkey_index,
1357 u32 remote_qpn, u32 qkey, struct rdma_ah_attr *attr,
1358 u8 *s_mac, u16 vlan_id, struct ib_mad *mad)
1359 {
1360 struct ib_sge list;
1361 struct ib_ud_wr wr;
1362 struct ib_send_wr *bad_wr;
1363 struct mlx4_ib_demux_pv_ctx *sqp_ctx;
1364 struct mlx4_ib_demux_pv_qp *sqp;
1365 struct mlx4_mad_snd_buf *sqp_mad;
1366 struct ib_ah *ah;
1367 struct ib_qp *send_qp = NULL;
1368 struct ib_global_route *grh;
1369 unsigned wire_tx_ix = 0;
1370 int ret = 0;
1371 u16 wire_pkey_ix;
1372 int src_qpnum;
1373 u8 sgid_index;
1374
1375
1376 sqp_ctx = dev->sriov.sqps[port-1];
1377
1378 /* check if proxy qp created */
1379 if (!sqp_ctx || sqp_ctx->state != DEMUX_PV_STATE_ACTIVE)
1380 return -EAGAIN;
1381
1382 if (dest_qpt == IB_QPT_SMI) {
1383 src_qpnum = 0;
1384 sqp = &sqp_ctx->qp[0];
1385 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0];
1386 } else {
1387 src_qpnum = 1;
1388 sqp = &sqp_ctx->qp[1];
1389 wire_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][pkey_index];
1390 }
1391
1392 send_qp = sqp->qp;
1393
1394 /* create ah */
1395 grh = rdma_ah_retrieve_grh(attr);
1396 sgid_index = grh->sgid_index;
1397 grh->sgid_index = 0;
1398 ah = rdma_create_ah(sqp_ctx->pd, attr);
1399 if (IS_ERR(ah))
1400 return -ENOMEM;
1401 grh->sgid_index = sgid_index;
1402 to_mah(ah)->av.ib.gid_index = sgid_index;
1403 /* get rid of force-loopback bit */
1404 to_mah(ah)->av.ib.port_pd &= cpu_to_be32(0x7FFFFFFF);
1405 spin_lock(&sqp->tx_lock);
1406 if (sqp->tx_ix_head - sqp->tx_ix_tail >=
1407 (MLX4_NUM_TUNNEL_BUFS - 1))
1408 ret = -EAGAIN;
1409 else
1410 wire_tx_ix = (++sqp->tx_ix_head) & (MLX4_NUM_TUNNEL_BUFS - 1);
1411 spin_unlock(&sqp->tx_lock);
1412 if (ret)
1413 goto out;
1414
1415 sqp_mad = (struct mlx4_mad_snd_buf *) (sqp->tx_ring[wire_tx_ix].buf.addr);
1416 if (sqp->tx_ring[wire_tx_ix].ah)
1417 rdma_destroy_ah(sqp->tx_ring[wire_tx_ix].ah);
1418 sqp->tx_ring[wire_tx_ix].ah = ah;
1419 ib_dma_sync_single_for_cpu(&dev->ib_dev,
1420 sqp->tx_ring[wire_tx_ix].buf.map,
1421 sizeof (struct mlx4_mad_snd_buf),
1422 DMA_TO_DEVICE);
1423
1424 memcpy(&sqp_mad->payload, mad, sizeof *mad);
1425
1426 ib_dma_sync_single_for_device(&dev->ib_dev,
1427 sqp->tx_ring[wire_tx_ix].buf.map,
1428 sizeof (struct mlx4_mad_snd_buf),
1429 DMA_TO_DEVICE);
1430
1431 list.addr = sqp->tx_ring[wire_tx_ix].buf.map;
1432 list.length = sizeof (struct mlx4_mad_snd_buf);
1433 list.lkey = sqp_ctx->pd->local_dma_lkey;
1434
1435 wr.ah = ah;
1436 wr.port_num = port;
1437 wr.pkey_index = wire_pkey_ix;
1438 wr.remote_qkey = qkey;
1439 wr.remote_qpn = remote_qpn;
1440 wr.wr.next = NULL;
1441 wr.wr.wr_id = ((u64) wire_tx_ix) | MLX4_TUN_SET_WRID_QPN(src_qpnum);
1442 wr.wr.sg_list = &list;
1443 wr.wr.num_sge = 1;
1444 wr.wr.opcode = IB_WR_SEND;
1445 wr.wr.send_flags = IB_SEND_SIGNALED;
1446 if (s_mac)
1447 memcpy(to_mah(ah)->av.eth.s_mac, s_mac, 6);
1448 if (vlan_id < 0x1000)
1449 vlan_id |= (rdma_ah_get_sl(attr) & 7) << 13;
1450 to_mah(ah)->av.eth.vlan = cpu_to_be16(vlan_id);
1451
1452
1453 ret = ib_post_send(send_qp, &wr.wr, &bad_wr);
1454 if (!ret)
1455 return 0;
1456
1457 spin_lock(&sqp->tx_lock);
1458 sqp->tx_ix_tail++;
1459 spin_unlock(&sqp->tx_lock);
1460 sqp->tx_ring[wire_tx_ix].ah = NULL;
1461 out:
1462 rdma_destroy_ah(ah);
1463 return ret;
1464 }
1465
1466 static int get_slave_base_gid_ix(struct mlx4_ib_dev *dev, int slave, int port)
1467 {
1468 if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
1469 return slave;
1470 return mlx4_get_base_gid_ix(dev->dev, slave, port);
1471 }
1472
1473 static void fill_in_real_sgid_index(struct mlx4_ib_dev *dev, int slave, int port,
1474 struct rdma_ah_attr *ah_attr)
1475 {
1476 struct ib_global_route *grh = rdma_ah_retrieve_grh(ah_attr);
1477 if (rdma_port_get_link_layer(&dev->ib_dev, port) == IB_LINK_LAYER_INFINIBAND)
1478 grh->sgid_index = slave;
1479 else
1480 grh->sgid_index += get_slave_base_gid_ix(dev, slave, port);
1481 }
1482
1483 static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc *wc)
1484 {
1485 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
1486 struct mlx4_ib_demux_pv_qp *tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc->wr_id)];
1487 int wr_ix = wc->wr_id & (MLX4_NUM_TUNNEL_BUFS - 1);
1488 struct mlx4_tunnel_mad *tunnel = tun_qp->ring[wr_ix].addr;
1489 struct mlx4_ib_ah ah;
1490 struct rdma_ah_attr ah_attr;
1491 u8 *slave_id;
1492 int slave;
1493 int port;
1494 u16 vlan_id;
1495 u8 qos;
1496 u8 *dmac;
1497
1498 /* Get slave that sent this packet */
1499 if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn ||
1500 wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX ||
1501 (wc->src_qp & 0x1) != ctx->port - 1 ||
1502 wc->src_qp & 0x4) {
1503 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp);
1504 return;
1505 }
1506 slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8;
1507 if (slave != ctx->slave) {
1508 mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: "
1509 "belongs to another slave\n", wc->src_qp);
1510 return;
1511 }
1512
1513 /* Map transaction ID */
1514 ib_dma_sync_single_for_cpu(ctx->ib_dev, tun_qp->ring[wr_ix].map,
1515 sizeof (struct mlx4_tunnel_mad),
1516 DMA_FROM_DEVICE);
1517 switch (tunnel->mad.mad_hdr.method) {
1518 case IB_MGMT_METHOD_SET:
1519 case IB_MGMT_METHOD_GET:
1520 case IB_MGMT_METHOD_REPORT:
1521 case IB_SA_METHOD_GET_TABLE:
1522 case IB_SA_METHOD_DELETE:
1523 case IB_SA_METHOD_GET_MULTI:
1524 case IB_SA_METHOD_GET_TRACE_TBL:
1525 slave_id = (u8 *) &tunnel->mad.mad_hdr.tid;
1526 if (*slave_id) {
1527 mlx4_ib_warn(ctx->ib_dev, "egress mad has non-null tid msb:%d "
1528 "class:%d slave:%d\n", *slave_id,
1529 tunnel->mad.mad_hdr.mgmt_class, slave);
1530 return;
1531 } else
1532 *slave_id = slave;
1533 default:
1534 /* nothing */;
1535 }
1536
1537 /* Class-specific handling */
1538 switch (tunnel->mad.mad_hdr.mgmt_class) {
1539 case IB_MGMT_CLASS_SUBN_LID_ROUTED:
1540 case IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE:
1541 if (slave != mlx4_master_func_num(dev->dev) &&
1542 !mlx4_vf_smi_enabled(dev->dev, slave, ctx->port))
1543 return;
1544 break;
1545 case IB_MGMT_CLASS_SUBN_ADM:
1546 if (mlx4_ib_multiplex_sa_handler(ctx->ib_dev, ctx->port, slave,
1547 (struct ib_sa_mad *) &tunnel->mad))
1548 return;
1549 break;
1550 case IB_MGMT_CLASS_CM:
1551 if (mlx4_ib_multiplex_cm_handler(ctx->ib_dev, ctx->port, slave,
1552 (struct ib_mad *) &tunnel->mad))
1553 return;
1554 break;
1555 case IB_MGMT_CLASS_DEVICE_MGMT:
1556 if (tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_GET &&
1557 tunnel->mad.mad_hdr.method != IB_MGMT_METHOD_SET)
1558 return;
1559 break;
1560 default:
1561 /* Drop unsupported classes for slaves in tunnel mode */
1562 if (slave != mlx4_master_func_num(dev->dev)) {
1563 mlx4_ib_warn(ctx->ib_dev, "dropping unsupported egress mad from class:%d "
1564 "for slave:%d\n", tunnel->mad.mad_hdr.mgmt_class, slave);
1565 return;
1566 }
1567 }
1568
1569 /* We are using standard ib_core services to send the mad, so generate a
1570 * stadard address handle by decoding the tunnelled mlx4_ah fields */
1571 memcpy(&ah.av, &tunnel->hdr.av, sizeof (struct mlx4_av));
1572 ah.ibah.device = ctx->ib_dev;
1573
1574 port = be32_to_cpu(ah.av.ib.port_pd) >> 24;
1575 port = mlx4_slave_convert_port(dev->dev, slave, port);
1576 if (port < 0)
1577 return;
1578 ah.av.ib.port_pd = cpu_to_be32(port << 24 | (be32_to_cpu(ah.av.ib.port_pd) & 0xffffff));
1579
1580 mlx4_ib_query_ah(&ah.ibah, &ah_attr);
1581 if (rdma_ah_get_ah_flags(&ah_attr) & IB_AH_GRH)
1582 fill_in_real_sgid_index(dev, slave, ctx->port, &ah_attr);
1583 dmac = rdma_ah_retrieve_dmac(&ah_attr);
1584 if (dmac)
1585 memcpy(dmac, tunnel->hdr.mac, ETH_ALEN);
1586 vlan_id = be16_to_cpu(tunnel->hdr.vlan);
1587 /* if slave have default vlan use it */
1588 if (mlx4_get_slave_default_vlan(dev->dev, ctx->port, slave,
1589 &vlan_id, &qos))
1590 rdma_ah_set_sl(&ah_attr, qos);
1591
1592 mlx4_ib_send_to_wire(dev, slave, ctx->port,
1593 is_proxy_qp0(dev, wc->src_qp, slave) ?
1594 IB_QPT_SMI : IB_QPT_GSI,
1595 be16_to_cpu(tunnel->hdr.pkey_index),
1596 be32_to_cpu(tunnel->hdr.remote_qpn),
1597 be32_to_cpu(tunnel->hdr.qkey),
1598 &ah_attr, wc->smac, vlan_id, &tunnel->mad);
1599 }
1600
1601 static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1602 enum ib_qp_type qp_type, int is_tun)
1603 {
1604 int i;
1605 struct mlx4_ib_demux_pv_qp *tun_qp;
1606 int rx_buf_size, tx_buf_size;
1607
1608 if (qp_type > IB_QPT_GSI)
1609 return -EINVAL;
1610
1611 tun_qp = &ctx->qp[qp_type];
1612
1613 tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS,
1614 GFP_KERNEL);
1615 if (!tun_qp->ring)
1616 return -ENOMEM;
1617
1618 tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS,
1619 sizeof (struct mlx4_ib_tun_tx_buf),
1620 GFP_KERNEL);
1621 if (!tun_qp->tx_ring) {
1622 kfree(tun_qp->ring);
1623 tun_qp->ring = NULL;
1624 return -ENOMEM;
1625 }
1626
1627 if (is_tun) {
1628 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1629 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1630 } else {
1631 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1632 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1633 }
1634
1635 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1636 tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL);
1637 if (!tun_qp->ring[i].addr)
1638 goto err;
1639 tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev,
1640 tun_qp->ring[i].addr,
1641 rx_buf_size,
1642 DMA_FROM_DEVICE);
1643 if (ib_dma_mapping_error(ctx->ib_dev, tun_qp->ring[i].map)) {
1644 kfree(tun_qp->ring[i].addr);
1645 goto err;
1646 }
1647 }
1648
1649 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1650 tun_qp->tx_ring[i].buf.addr =
1651 kmalloc(tx_buf_size, GFP_KERNEL);
1652 if (!tun_qp->tx_ring[i].buf.addr)
1653 goto tx_err;
1654 tun_qp->tx_ring[i].buf.map =
1655 ib_dma_map_single(ctx->ib_dev,
1656 tun_qp->tx_ring[i].buf.addr,
1657 tx_buf_size,
1658 DMA_TO_DEVICE);
1659 if (ib_dma_mapping_error(ctx->ib_dev,
1660 tun_qp->tx_ring[i].buf.map)) {
1661 kfree(tun_qp->tx_ring[i].buf.addr);
1662 goto tx_err;
1663 }
1664 tun_qp->tx_ring[i].ah = NULL;
1665 }
1666 spin_lock_init(&tun_qp->tx_lock);
1667 tun_qp->tx_ix_head = 0;
1668 tun_qp->tx_ix_tail = 0;
1669 tun_qp->proxy_qpt = qp_type;
1670
1671 return 0;
1672
1673 tx_err:
1674 while (i > 0) {
1675 --i;
1676 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1677 tx_buf_size, DMA_TO_DEVICE);
1678 kfree(tun_qp->tx_ring[i].buf.addr);
1679 }
1680 kfree(tun_qp->tx_ring);
1681 tun_qp->tx_ring = NULL;
1682 i = MLX4_NUM_TUNNEL_BUFS;
1683 err:
1684 while (i > 0) {
1685 --i;
1686 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1687 rx_buf_size, DMA_FROM_DEVICE);
1688 kfree(tun_qp->ring[i].addr);
1689 }
1690 kfree(tun_qp->ring);
1691 tun_qp->ring = NULL;
1692 return -ENOMEM;
1693 }
1694
1695 static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx,
1696 enum ib_qp_type qp_type, int is_tun)
1697 {
1698 int i;
1699 struct mlx4_ib_demux_pv_qp *tun_qp;
1700 int rx_buf_size, tx_buf_size;
1701
1702 if (qp_type > IB_QPT_GSI)
1703 return;
1704
1705 tun_qp = &ctx->qp[qp_type];
1706 if (is_tun) {
1707 rx_buf_size = sizeof (struct mlx4_tunnel_mad);
1708 tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad);
1709 } else {
1710 rx_buf_size = sizeof (struct mlx4_mad_rcv_buf);
1711 tx_buf_size = sizeof (struct mlx4_mad_snd_buf);
1712 }
1713
1714
1715 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1716 ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map,
1717 rx_buf_size, DMA_FROM_DEVICE);
1718 kfree(tun_qp->ring[i].addr);
1719 }
1720
1721 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1722 ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map,
1723 tx_buf_size, DMA_TO_DEVICE);
1724 kfree(tun_qp->tx_ring[i].buf.addr);
1725 if (tun_qp->tx_ring[i].ah)
1726 rdma_destroy_ah(tun_qp->tx_ring[i].ah);
1727 }
1728 kfree(tun_qp->tx_ring);
1729 kfree(tun_qp->ring);
1730 }
1731
1732 static void mlx4_ib_tunnel_comp_worker(struct work_struct *work)
1733 {
1734 struct mlx4_ib_demux_pv_ctx *ctx;
1735 struct mlx4_ib_demux_pv_qp *tun_qp;
1736 struct ib_wc wc;
1737 int ret;
1738 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1739 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1740
1741 while (ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1742 tun_qp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1743 if (wc.status == IB_WC_SUCCESS) {
1744 switch (wc.opcode) {
1745 case IB_WC_RECV:
1746 mlx4_ib_multiplex_mad(ctx, &wc);
1747 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp,
1748 wc.wr_id &
1749 (MLX4_NUM_TUNNEL_BUFS - 1));
1750 if (ret)
1751 pr_err("Failed reposting tunnel "
1752 "buf:%lld\n", wc.wr_id);
1753 break;
1754 case IB_WC_SEND:
1755 pr_debug("received tunnel send completion:"
1756 "wrid=0x%llx, status=0x%x\n",
1757 wc.wr_id, wc.status);
1758 rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1759 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1760 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1761 = NULL;
1762 spin_lock(&tun_qp->tx_lock);
1763 tun_qp->tx_ix_tail++;
1764 spin_unlock(&tun_qp->tx_lock);
1765
1766 break;
1767 default:
1768 break;
1769 }
1770 } else {
1771 pr_debug("mlx4_ib: completion error in tunnel: %d."
1772 " status = %d, wrid = 0x%llx\n",
1773 ctx->slave, wc.status, wc.wr_id);
1774 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1775 rdma_destroy_ah(tun_qp->tx_ring[wc.wr_id &
1776 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1777 tun_qp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1778 = NULL;
1779 spin_lock(&tun_qp->tx_lock);
1780 tun_qp->tx_ix_tail++;
1781 spin_unlock(&tun_qp->tx_lock);
1782 }
1783 }
1784 }
1785 }
1786
1787 static void pv_qp_event_handler(struct ib_event *event, void *qp_context)
1788 {
1789 struct mlx4_ib_demux_pv_ctx *sqp = qp_context;
1790
1791 /* It's worse than that! He's dead, Jim! */
1792 pr_err("Fatal error (%d) on a MAD QP on port %d\n",
1793 event->event, sqp->port);
1794 }
1795
1796 static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx,
1797 enum ib_qp_type qp_type, int create_tun)
1798 {
1799 int i, ret;
1800 struct mlx4_ib_demux_pv_qp *tun_qp;
1801 struct mlx4_ib_qp_tunnel_init_attr qp_init_attr;
1802 struct ib_qp_attr attr;
1803 int qp_attr_mask_INIT;
1804
1805 if (qp_type > IB_QPT_GSI)
1806 return -EINVAL;
1807
1808 tun_qp = &ctx->qp[qp_type];
1809
1810 memset(&qp_init_attr, 0, sizeof qp_init_attr);
1811 qp_init_attr.init_attr.send_cq = ctx->cq;
1812 qp_init_attr.init_attr.recv_cq = ctx->cq;
1813 qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
1814 qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS;
1815 qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS;
1816 qp_init_attr.init_attr.cap.max_send_sge = 1;
1817 qp_init_attr.init_attr.cap.max_recv_sge = 1;
1818 if (create_tun) {
1819 qp_init_attr.init_attr.qp_type = IB_QPT_UD;
1820 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP;
1821 qp_init_attr.port = ctx->port;
1822 qp_init_attr.slave = ctx->slave;
1823 qp_init_attr.proxy_qp_type = qp_type;
1824 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX |
1825 IB_QP_QKEY | IB_QP_PORT;
1826 } else {
1827 qp_init_attr.init_attr.qp_type = qp_type;
1828 qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP;
1829 qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY;
1830 }
1831 qp_init_attr.init_attr.port_num = ctx->port;
1832 qp_init_attr.init_attr.qp_context = ctx;
1833 qp_init_attr.init_attr.event_handler = pv_qp_event_handler;
1834 tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr);
1835 if (IS_ERR(tun_qp->qp)) {
1836 ret = PTR_ERR(tun_qp->qp);
1837 tun_qp->qp = NULL;
1838 pr_err("Couldn't create %s QP (%d)\n",
1839 create_tun ? "tunnel" : "special", ret);
1840 return ret;
1841 }
1842
1843 memset(&attr, 0, sizeof attr);
1844 attr.qp_state = IB_QPS_INIT;
1845 ret = 0;
1846 if (create_tun)
1847 ret = find_slave_port_pkey_ix(to_mdev(ctx->ib_dev), ctx->slave,
1848 ctx->port, IB_DEFAULT_PKEY_FULL,
1849 &attr.pkey_index);
1850 if (ret || !create_tun)
1851 attr.pkey_index =
1852 to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0];
1853 attr.qkey = IB_QP1_QKEY;
1854 attr.port_num = ctx->port;
1855 ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT);
1856 if (ret) {
1857 pr_err("Couldn't change %s qp state to INIT (%d)\n",
1858 create_tun ? "tunnel" : "special", ret);
1859 goto err_qp;
1860 }
1861 attr.qp_state = IB_QPS_RTR;
1862 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE);
1863 if (ret) {
1864 pr_err("Couldn't change %s qp state to RTR (%d)\n",
1865 create_tun ? "tunnel" : "special", ret);
1866 goto err_qp;
1867 }
1868 attr.qp_state = IB_QPS_RTS;
1869 attr.sq_psn = 0;
1870 ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN);
1871 if (ret) {
1872 pr_err("Couldn't change %s qp state to RTS (%d)\n",
1873 create_tun ? "tunnel" : "special", ret);
1874 goto err_qp;
1875 }
1876
1877 for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) {
1878 ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i);
1879 if (ret) {
1880 pr_err(" mlx4_ib_post_pv_buf error"
1881 " (err = %d, i = %d)\n", ret, i);
1882 goto err_qp;
1883 }
1884 }
1885 return 0;
1886
1887 err_qp:
1888 ib_destroy_qp(tun_qp->qp);
1889 tun_qp->qp = NULL;
1890 return ret;
1891 }
1892
1893 /*
1894 * IB MAD completion callback for real SQPs
1895 */
1896 static void mlx4_ib_sqp_comp_worker(struct work_struct *work)
1897 {
1898 struct mlx4_ib_demux_pv_ctx *ctx;
1899 struct mlx4_ib_demux_pv_qp *sqp;
1900 struct ib_wc wc;
1901 struct ib_grh *grh;
1902 struct ib_mad *mad;
1903
1904 ctx = container_of(work, struct mlx4_ib_demux_pv_ctx, work);
1905 ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
1906
1907 while (mlx4_ib_poll_cq(ctx->cq, 1, &wc) == 1) {
1908 sqp = &ctx->qp[MLX4_TUN_WRID_QPN(wc.wr_id)];
1909 if (wc.status == IB_WC_SUCCESS) {
1910 switch (wc.opcode) {
1911 case IB_WC_SEND:
1912 rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
1913 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1914 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1915 = NULL;
1916 spin_lock(&sqp->tx_lock);
1917 sqp->tx_ix_tail++;
1918 spin_unlock(&sqp->tx_lock);
1919 break;
1920 case IB_WC_RECV:
1921 mad = (struct ib_mad *) &(((struct mlx4_mad_rcv_buf *)
1922 (sqp->ring[wc.wr_id &
1923 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->payload);
1924 grh = &(((struct mlx4_mad_rcv_buf *)
1925 (sqp->ring[wc.wr_id &
1926 (MLX4_NUM_TUNNEL_BUFS - 1)].addr))->grh);
1927 mlx4_ib_demux_mad(ctx->ib_dev, ctx->port, &wc, grh, mad);
1928 if (mlx4_ib_post_pv_qp_buf(ctx, sqp, wc.wr_id &
1929 (MLX4_NUM_TUNNEL_BUFS - 1)))
1930 pr_err("Failed reposting SQP "
1931 "buf:%lld\n", wc.wr_id);
1932 break;
1933 default:
1934 BUG_ON(1);
1935 break;
1936 }
1937 } else {
1938 pr_debug("mlx4_ib: completion error in tunnel: %d."
1939 " status = %d, wrid = 0x%llx\n",
1940 ctx->slave, wc.status, wc.wr_id);
1941 if (!MLX4_TUN_IS_RECV(wc.wr_id)) {
1942 rdma_destroy_ah(sqp->tx_ring[wc.wr_id &
1943 (MLX4_NUM_TUNNEL_BUFS - 1)].ah);
1944 sqp->tx_ring[wc.wr_id & (MLX4_NUM_TUNNEL_BUFS - 1)].ah
1945 = NULL;
1946 spin_lock(&sqp->tx_lock);
1947 sqp->tx_ix_tail++;
1948 spin_unlock(&sqp->tx_lock);
1949 }
1950 }
1951 }
1952 }
1953
1954 static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port,
1955 struct mlx4_ib_demux_pv_ctx **ret_ctx)
1956 {
1957 struct mlx4_ib_demux_pv_ctx *ctx;
1958
1959 *ret_ctx = NULL;
1960 ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL);
1961 if (!ctx)
1962 return -ENOMEM;
1963
1964 ctx->ib_dev = &dev->ib_dev;
1965 ctx->port = port;
1966 ctx->slave = slave;
1967 *ret_ctx = ctx;
1968 return 0;
1969 }
1970
1971 static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port)
1972 {
1973 if (dev->sriov.demux[port - 1].tun[slave]) {
1974 kfree(dev->sriov.demux[port - 1].tun[slave]);
1975 dev->sriov.demux[port - 1].tun[slave] = NULL;
1976 }
1977 }
1978
1979 static int create_pv_resources(struct ib_device *ibdev, int slave, int port,
1980 int create_tun, struct mlx4_ib_demux_pv_ctx *ctx)
1981 {
1982 int ret, cq_size;
1983 struct ib_cq_init_attr cq_attr = {};
1984
1985 if (ctx->state != DEMUX_PV_STATE_DOWN)
1986 return -EEXIST;
1987
1988 ctx->state = DEMUX_PV_STATE_STARTING;
1989 /* have QP0 only if link layer is IB */
1990 if (rdma_port_get_link_layer(ibdev, ctx->port) ==
1991 IB_LINK_LAYER_INFINIBAND)
1992 ctx->has_smi = 1;
1993
1994 if (ctx->has_smi) {
1995 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun);
1996 if (ret) {
1997 pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret);
1998 goto err_out;
1999 }
2000 }
2001
2002 ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun);
2003 if (ret) {
2004 pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret);
2005 goto err_out_qp0;
2006 }
2007
2008 cq_size = 2 * MLX4_NUM_TUNNEL_BUFS;
2009 if (ctx->has_smi)
2010 cq_size *= 2;
2011
2012 cq_attr.cqe = cq_size;
2013 ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler,
2014 NULL, ctx, &cq_attr);
2015 if (IS_ERR(ctx->cq)) {
2016 ret = PTR_ERR(ctx->cq);
2017 pr_err("Couldn't create tunnel CQ (%d)\n", ret);
2018 goto err_buf;
2019 }
2020
2021 ctx->pd = ib_alloc_pd(ctx->ib_dev, 0);
2022 if (IS_ERR(ctx->pd)) {
2023 ret = PTR_ERR(ctx->pd);
2024 pr_err("Couldn't create tunnel PD (%d)\n", ret);
2025 goto err_cq;
2026 }
2027
2028 if (ctx->has_smi) {
2029 ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun);
2030 if (ret) {
2031 pr_err("Couldn't create %s QP0 (%d)\n",
2032 create_tun ? "tunnel for" : "", ret);
2033 goto err_pd;
2034 }
2035 }
2036
2037 ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun);
2038 if (ret) {
2039 pr_err("Couldn't create %s QP1 (%d)\n",
2040 create_tun ? "tunnel for" : "", ret);
2041 goto err_qp0;
2042 }
2043
2044 if (create_tun)
2045 INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker);
2046 else
2047 INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker);
2048
2049 ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq;
2050
2051 ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP);
2052 if (ret) {
2053 pr_err("Couldn't arm tunnel cq (%d)\n", ret);
2054 goto err_wq;
2055 }
2056 ctx->state = DEMUX_PV_STATE_ACTIVE;
2057 return 0;
2058
2059 err_wq:
2060 ctx->wq = NULL;
2061 ib_destroy_qp(ctx->qp[1].qp);
2062 ctx->qp[1].qp = NULL;
2063
2064
2065 err_qp0:
2066 if (ctx->has_smi)
2067 ib_destroy_qp(ctx->qp[0].qp);
2068 ctx->qp[0].qp = NULL;
2069
2070 err_pd:
2071 ib_dealloc_pd(ctx->pd);
2072 ctx->pd = NULL;
2073
2074 err_cq:
2075 ib_destroy_cq(ctx->cq);
2076 ctx->cq = NULL;
2077
2078 err_buf:
2079 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun);
2080
2081 err_out_qp0:
2082 if (ctx->has_smi)
2083 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun);
2084 err_out:
2085 ctx->state = DEMUX_PV_STATE_DOWN;
2086 return ret;
2087 }
2088
2089 static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port,
2090 struct mlx4_ib_demux_pv_ctx *ctx, int flush)
2091 {
2092 if (!ctx)
2093 return;
2094 if (ctx->state > DEMUX_PV_STATE_DOWN) {
2095 ctx->state = DEMUX_PV_STATE_DOWNING;
2096 if (flush)
2097 flush_workqueue(ctx->wq);
2098 if (ctx->has_smi) {
2099 ib_destroy_qp(ctx->qp[0].qp);
2100 ctx->qp[0].qp = NULL;
2101 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1);
2102 }
2103 ib_destroy_qp(ctx->qp[1].qp);
2104 ctx->qp[1].qp = NULL;
2105 mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1);
2106 ib_dealloc_pd(ctx->pd);
2107 ctx->pd = NULL;
2108 ib_destroy_cq(ctx->cq);
2109 ctx->cq = NULL;
2110 ctx->state = DEMUX_PV_STATE_DOWN;
2111 }
2112 }
2113
2114 static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave,
2115 int port, int do_init)
2116 {
2117 int ret = 0;
2118
2119 if (!do_init) {
2120 clean_vf_mcast(&dev->sriov.demux[port - 1], slave);
2121 /* for master, destroy real sqp resources */
2122 if (slave == mlx4_master_func_num(dev->dev))
2123 destroy_pv_resources(dev, slave, port,
2124 dev->sriov.sqps[port - 1], 1);
2125 /* destroy the tunnel qp resources */
2126 destroy_pv_resources(dev, slave, port,
2127 dev->sriov.demux[port - 1].tun[slave], 1);
2128 return 0;
2129 }
2130
2131 /* create the tunnel qp resources */
2132 ret = create_pv_resources(&dev->ib_dev, slave, port, 1,
2133 dev->sriov.demux[port - 1].tun[slave]);
2134
2135 /* for master, create the real sqp resources */
2136 if (!ret && slave == mlx4_master_func_num(dev->dev))
2137 ret = create_pv_resources(&dev->ib_dev, slave, port, 0,
2138 dev->sriov.sqps[port - 1]);
2139 return ret;
2140 }
2141
2142 void mlx4_ib_tunnels_update_work(struct work_struct *work)
2143 {
2144 struct mlx4_ib_demux_work *dmxw;
2145
2146 dmxw = container_of(work, struct mlx4_ib_demux_work, work);
2147 mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port,
2148 dmxw->do_init);
2149 kfree(dmxw);
2150 return;
2151 }
2152
2153 static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
2154 struct mlx4_ib_demux_ctx *ctx,
2155 int port)
2156 {
2157 char name[12];
2158 int ret = 0;
2159 int i;
2160
2161 ctx->tun = kcalloc(dev->dev->caps.sqp_demux,
2162 sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL);
2163 if (!ctx->tun)
2164 return -ENOMEM;
2165
2166 ctx->dev = dev;
2167 ctx->port = port;
2168 ctx->ib_dev = &dev->ib_dev;
2169
2170 for (i = 0;
2171 i < min(dev->dev->caps.sqp_demux,
2172 (u16)(dev->dev->persist->num_vfs + 1));
2173 i++) {
2174 struct mlx4_active_ports actv_ports =
2175 mlx4_get_active_ports(dev->dev, i);
2176
2177 if (!test_bit(port - 1, actv_ports.ports))
2178 continue;
2179
2180 ret = alloc_pv_object(dev, i, port, &ctx->tun[i]);
2181 if (ret) {
2182 ret = -ENOMEM;
2183 goto err_mcg;
2184 }
2185 }
2186
2187 ret = mlx4_ib_mcg_port_init(ctx);
2188 if (ret) {
2189 pr_err("Failed initializing mcg para-virt (%d)\n", ret);
2190 goto err_mcg;
2191 }
2192
2193 snprintf(name, sizeof name, "mlx4_ibt%d", port);
2194 ctx->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
2195 if (!ctx->wq) {
2196 pr_err("Failed to create tunnelling WQ for port %d\n", port);
2197 ret = -ENOMEM;
2198 goto err_wq;
2199 }
2200
2201 snprintf(name, sizeof name, "mlx4_ibud%d", port);
2202 ctx->ud_wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
2203 if (!ctx->ud_wq) {
2204 pr_err("Failed to create up/down WQ for port %d\n", port);
2205 ret = -ENOMEM;
2206 goto err_udwq;
2207 }
2208
2209 return 0;
2210
2211 err_udwq:
2212 destroy_workqueue(ctx->wq);
2213 ctx->wq = NULL;
2214
2215 err_wq:
2216 mlx4_ib_mcg_port_cleanup(ctx, 1);
2217 err_mcg:
2218 for (i = 0; i < dev->dev->caps.sqp_demux; i++)
2219 free_pv_object(dev, i, port);
2220 kfree(ctx->tun);
2221 ctx->tun = NULL;
2222 return ret;
2223 }
2224
2225 static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx)
2226 {
2227 if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) {
2228 sqp_ctx->state = DEMUX_PV_STATE_DOWNING;
2229 flush_workqueue(sqp_ctx->wq);
2230 if (sqp_ctx->has_smi) {
2231 ib_destroy_qp(sqp_ctx->qp[0].qp);
2232 sqp_ctx->qp[0].qp = NULL;
2233 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0);
2234 }
2235 ib_destroy_qp(sqp_ctx->qp[1].qp);
2236 sqp_ctx->qp[1].qp = NULL;
2237 mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0);
2238 ib_dealloc_pd(sqp_ctx->pd);
2239 sqp_ctx->pd = NULL;
2240 ib_destroy_cq(sqp_ctx->cq);
2241 sqp_ctx->cq = NULL;
2242 sqp_ctx->state = DEMUX_PV_STATE_DOWN;
2243 }
2244 }
2245
2246 static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx)
2247 {
2248 int i;
2249 if (ctx) {
2250 struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev);
2251 mlx4_ib_mcg_port_cleanup(ctx, 1);
2252 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
2253 if (!ctx->tun[i])
2254 continue;
2255 if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN)
2256 ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING;
2257 }
2258 flush_workqueue(ctx->wq);
2259 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
2260 destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0);
2261 free_pv_object(dev, i, ctx->port);
2262 }
2263 kfree(ctx->tun);
2264 destroy_workqueue(ctx->ud_wq);
2265 destroy_workqueue(ctx->wq);
2266 }
2267 }
2268
2269 static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init)
2270 {
2271 int i;
2272
2273 if (!mlx4_is_master(dev->dev))
2274 return;
2275 /* initialize or tear down tunnel QPs for the master */
2276 for (i = 0; i < dev->dev->caps.num_ports; i++)
2277 mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init);
2278 return;
2279 }
2280
2281 int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev)
2282 {
2283 int i = 0;
2284 int err;
2285
2286 if (!mlx4_is_mfunc(dev->dev))
2287 return 0;
2288
2289 dev->sriov.is_going_down = 0;
2290 spin_lock_init(&dev->sriov.going_down_lock);
2291 mlx4_ib_cm_paravirt_init(dev);
2292
2293 mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n");
2294
2295 if (mlx4_is_slave(dev->dev)) {
2296 mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n");
2297 return 0;
2298 }
2299
2300 for (i = 0; i < dev->dev->caps.sqp_demux; i++) {
2301 if (i == mlx4_master_func_num(dev->dev))
2302 mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid);
2303 else
2304 mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid());
2305 }
2306
2307 err = mlx4_ib_init_alias_guid_service(dev);
2308 if (err) {
2309 mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n");
2310 goto paravirt_err;
2311 }
2312 err = mlx4_ib_device_register_sysfs(dev);
2313 if (err) {
2314 mlx4_ib_warn(&dev->ib_dev, "Failed to register sysfs\n");
2315 goto sysfs_err;
2316 }
2317
2318 mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n",
2319 dev->dev->caps.sqp_demux);
2320 for (i = 0; i < dev->num_ports; i++) {
2321 union ib_gid gid;
2322 err = __mlx4_ib_query_gid(&dev->ib_dev, i + 1, 0, &gid, 1);
2323 if (err)
2324 goto demux_err;
2325 dev->sriov.demux[i].guid_cache[0] = gid.global.interface_id;
2326 atomic64_set(&dev->sriov.demux[i].subnet_prefix,
2327 be64_to_cpu(gid.global.subnet_prefix));
2328 err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1,
2329 &dev->sriov.sqps[i]);
2330 if (err)
2331 goto demux_err;
2332 err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1);
2333 if (err)
2334 goto free_pv;
2335 }
2336 mlx4_ib_master_tunnels(dev, 1);
2337 return 0;
2338
2339 free_pv:
2340 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2341 demux_err:
2342 while (--i >= 0) {
2343 free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1);
2344 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2345 }
2346 mlx4_ib_device_unregister_sysfs(dev);
2347
2348 sysfs_err:
2349 mlx4_ib_destroy_alias_guid_service(dev);
2350
2351 paravirt_err:
2352 mlx4_ib_cm_paravirt_clean(dev, -1);
2353
2354 return err;
2355 }
2356
2357 void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev)
2358 {
2359 int i;
2360 unsigned long flags;
2361
2362 if (!mlx4_is_mfunc(dev->dev))
2363 return;
2364
2365 spin_lock_irqsave(&dev->sriov.going_down_lock, flags);
2366 dev->sriov.is_going_down = 1;
2367 spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags);
2368 if (mlx4_is_master(dev->dev)) {
2369 for (i = 0; i < dev->num_ports; i++) {
2370 flush_workqueue(dev->sriov.demux[i].ud_wq);
2371 mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]);
2372 kfree(dev->sriov.sqps[i]);
2373 dev->sriov.sqps[i] = NULL;
2374 mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]);
2375 }
2376
2377 mlx4_ib_cm_paravirt_clean(dev, -1);
2378 mlx4_ib_destroy_alias_guid_service(dev);
2379 mlx4_ib_device_unregister_sysfs(dev);
2380 }
2381 }