2 * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <rdma/ib_verbs.h>
34 #include <linux/mlx5/fs.h>
38 #define IB_DEFAULT_Q_KEY 0xb1b
39 #define MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE 9
41 static int mlx5i_open(struct net_device
*netdev
);
42 static int mlx5i_close(struct net_device
*netdev
);
43 static int mlx5i_change_mtu(struct net_device
*netdev
, int new_mtu
);
44 static int mlx5i_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
);
46 static const struct net_device_ops mlx5i_netdev_ops
= {
47 .ndo_open
= mlx5i_open
,
48 .ndo_stop
= mlx5i_close
,
49 .ndo_init
= mlx5i_dev_init
,
50 .ndo_uninit
= mlx5i_dev_cleanup
,
51 .ndo_change_mtu
= mlx5i_change_mtu
,
52 .ndo_do_ioctl
= mlx5i_ioctl
,
55 /* IPoIB mlx5 netdev profile */
56 static void mlx5i_build_nic_params(struct mlx5_core_dev
*mdev
,
57 struct mlx5e_params
*params
)
59 /* Override RQ params as IPoIB supports only LINKED LIST RQ for now */
60 mlx5e_init_rq_type_params(mdev
, params
, MLX5_WQ_TYPE_LINKED_LIST
);
62 /* RQ size in ipoib by default is 512 */
63 params
->log_rq_size
= is_kdump_kernel() ?
64 MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE
:
65 MLX5I_PARAMS_DEFAULT_LOG_RQ_SIZE
;
67 params
->lro_en
= false;
70 /* Called directly after IPoIB netdevice was created to initialize SW structs */
71 void mlx5i_init(struct mlx5_core_dev
*mdev
,
72 struct net_device
*netdev
,
73 const struct mlx5e_profile
*profile
,
76 struct mlx5e_priv
*priv
= mlx5i_epriv(netdev
);
80 priv
->netdev
= netdev
;
81 priv
->profile
= profile
;
83 priv
->hard_mtu
= MLX5_IB_GRH_BYTES
+ MLX5_IPOIB_HARD_LEN
;
84 mutex_init(&priv
->state_lock
);
86 mlx5e_build_nic_params(mdev
, &priv
->channels
.params
, profile
->max_nch(mdev
));
87 mlx5i_build_nic_params(mdev
, &priv
->channels
.params
);
90 netdev
->hw_features
|= NETIF_F_SG
;
91 netdev
->hw_features
|= NETIF_F_IP_CSUM
;
92 netdev
->hw_features
|= NETIF_F_IPV6_CSUM
;
93 netdev
->hw_features
|= NETIF_F_GRO
;
94 netdev
->hw_features
|= NETIF_F_TSO
;
95 netdev
->hw_features
|= NETIF_F_TSO6
;
96 netdev
->hw_features
|= NETIF_F_RXCSUM
;
97 netdev
->hw_features
|= NETIF_F_RXHASH
;
99 netdev
->netdev_ops
= &mlx5i_netdev_ops
;
100 netdev
->ethtool_ops
= &mlx5i_ethtool_ops
;
103 /* Called directly before IPoIB netdevice is destroyed to cleanup SW structs */
104 static void mlx5i_cleanup(struct mlx5e_priv
*priv
)
109 int mlx5i_init_underlay_qp(struct mlx5e_priv
*priv
)
111 struct mlx5_core_dev
*mdev
= priv
->mdev
;
112 struct mlx5i_priv
*ipriv
= priv
->ppriv
;
113 struct mlx5_core_qp
*qp
= &ipriv
->qp
;
114 struct mlx5_qp_context
*context
;
118 context
= kzalloc(sizeof(*context
), GFP_KERNEL
);
122 context
->flags
= cpu_to_be32(MLX5_QP_PM_MIGRATED
<< 11);
123 context
->pri_path
.port
= 1;
124 context
->pri_path
.pkey_index
= cpu_to_be16(ipriv
->pkey_index
);
125 context
->qkey
= cpu_to_be32(IB_DEFAULT_Q_KEY
);
127 ret
= mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_RST2INIT_QP
, 0, context
, qp
);
129 mlx5_core_err(mdev
, "Failed to modify qp RST2INIT, err: %d\n", ret
);
130 goto err_qp_modify_to_err
;
132 memset(context
, 0, sizeof(*context
));
134 ret
= mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_INIT2RTR_QP
, 0, context
, qp
);
136 mlx5_core_err(mdev
, "Failed to modify qp INIT2RTR, err: %d\n", ret
);
137 goto err_qp_modify_to_err
;
140 ret
= mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_RTR2RTS_QP
, 0, context
, qp
);
142 mlx5_core_err(mdev
, "Failed to modify qp RTR2RTS, err: %d\n", ret
);
143 goto err_qp_modify_to_err
;
149 err_qp_modify_to_err
:
150 mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_2ERR_QP
, 0, &context
, qp
);
155 void mlx5i_uninit_underlay_qp(struct mlx5e_priv
*priv
)
157 struct mlx5i_priv
*ipriv
= priv
->ppriv
;
158 struct mlx5_core_dev
*mdev
= priv
->mdev
;
159 struct mlx5_qp_context context
;
162 err
= mlx5_core_qp_modify(mdev
, MLX5_CMD_OP_2RST_QP
, 0, &context
,
165 mlx5_core_err(mdev
, "Failed to modify qp 2RST, err: %d\n", err
);
168 #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
170 int mlx5i_create_underlay_qp(struct mlx5_core_dev
*mdev
, struct mlx5_core_qp
*qp
)
178 inlen
= MLX5_ST_SZ_BYTES(create_qp_in
);
179 in
= kvzalloc(inlen
, GFP_KERNEL
);
183 qpc
= MLX5_ADDR_OF(create_qp_in
, in
, qpc
);
184 MLX5_SET(qpc
, qpc
, st
, MLX5_QP_ST_UD
);
185 MLX5_SET(qpc
, qpc
, pm_state
, MLX5_QP_PM_MIGRATED
);
186 MLX5_SET(qpc
, qpc
, ulp_stateless_offload_mode
,
187 MLX5_QP_ENHANCED_ULP_STATELESS_MODE
);
189 addr_path
= MLX5_ADDR_OF(qpc
, qpc
, primary_address_path
);
190 MLX5_SET(ads
, addr_path
, port
, 1);
191 MLX5_SET(ads
, addr_path
, grh
, 1);
193 ret
= mlx5_core_create_qp(mdev
, qp
, in
, inlen
);
195 mlx5_core_err(mdev
, "Failed creating IPoIB QP err : %d\n", ret
);
204 void mlx5i_destroy_underlay_qp(struct mlx5_core_dev
*mdev
, struct mlx5_core_qp
*qp
)
206 mlx5_core_destroy_qp(mdev
, qp
);
209 static int mlx5i_init_tx(struct mlx5e_priv
*priv
)
211 struct mlx5i_priv
*ipriv
= priv
->ppriv
;
214 err
= mlx5i_create_underlay_qp(priv
->mdev
, &ipriv
->qp
);
216 mlx5_core_warn(priv
->mdev
, "create underlay QP failed, %d\n", err
);
220 err
= mlx5e_create_tis(priv
->mdev
, 0 /* tc */, ipriv
->qp
.qpn
, &priv
->tisn
[0]);
222 mlx5_core_warn(priv
->mdev
, "create tis failed, %d\n", err
);
223 goto err_destroy_underlay_qp
;
228 err_destroy_underlay_qp
:
229 mlx5i_destroy_underlay_qp(priv
->mdev
, &ipriv
->qp
);
233 static void mlx5i_cleanup_tx(struct mlx5e_priv
*priv
)
235 struct mlx5i_priv
*ipriv
= priv
->ppriv
;
237 mlx5e_destroy_tis(priv
->mdev
, priv
->tisn
[0]);
238 mlx5i_destroy_underlay_qp(priv
->mdev
, &ipriv
->qp
);
241 static int mlx5i_create_flow_steering(struct mlx5e_priv
*priv
)
245 priv
->fs
.ns
= mlx5_get_flow_namespace(priv
->mdev
,
246 MLX5_FLOW_NAMESPACE_KERNEL
);
251 err
= mlx5e_arfs_create_tables(priv
);
253 netdev_err(priv
->netdev
, "Failed to create arfs tables, err=%d\n",
255 priv
->netdev
->hw_features
&= ~NETIF_F_NTUPLE
;
258 err
= mlx5e_create_inner_ttc_table(priv
);
260 netdev_err(priv
->netdev
, "Failed to create inner ttc table, err=%d\n",
262 goto err_destroy_arfs_tables
;
265 err
= mlx5e_create_ttc_table(priv
);
267 netdev_err(priv
->netdev
, "Failed to create ttc table, err=%d\n",
269 goto err_destroy_inner_ttc_table
;
274 err_destroy_inner_ttc_table
:
275 mlx5e_destroy_inner_ttc_table(priv
);
276 err_destroy_arfs_tables
:
277 mlx5e_arfs_destroy_tables(priv
);
282 static void mlx5i_destroy_flow_steering(struct mlx5e_priv
*priv
)
284 mlx5e_destroy_ttc_table(priv
);
285 mlx5e_destroy_inner_ttc_table(priv
);
286 mlx5e_arfs_destroy_tables(priv
);
289 static int mlx5i_init_rx(struct mlx5e_priv
*priv
)
293 err
= mlx5e_create_indirect_rqt(priv
);
297 err
= mlx5e_create_direct_rqts(priv
);
299 goto err_destroy_indirect_rqts
;
301 err
= mlx5e_create_indirect_tirs(priv
);
303 goto err_destroy_direct_rqts
;
305 err
= mlx5e_create_direct_tirs(priv
);
307 goto err_destroy_indirect_tirs
;
309 err
= mlx5i_create_flow_steering(priv
);
311 goto err_destroy_direct_tirs
;
315 err_destroy_direct_tirs
:
316 mlx5e_destroy_direct_tirs(priv
);
317 err_destroy_indirect_tirs
:
318 mlx5e_destroy_indirect_tirs(priv
);
319 err_destroy_direct_rqts
:
320 mlx5e_destroy_direct_rqts(priv
);
321 err_destroy_indirect_rqts
:
322 mlx5e_destroy_rqt(priv
, &priv
->indir_rqt
);
326 static void mlx5i_cleanup_rx(struct mlx5e_priv
*priv
)
328 mlx5i_destroy_flow_steering(priv
);
329 mlx5e_destroy_direct_tirs(priv
);
330 mlx5e_destroy_indirect_tirs(priv
);
331 mlx5e_destroy_direct_rqts(priv
);
332 mlx5e_destroy_rqt(priv
, &priv
->indir_rqt
);
335 static const struct mlx5e_profile mlx5i_nic_profile
= {
337 .cleanup
= mlx5i_cleanup
,
338 .init_tx
= mlx5i_init_tx
,
339 .cleanup_tx
= mlx5i_cleanup_tx
,
340 .init_rx
= mlx5i_init_rx
,
341 .cleanup_rx
= mlx5i_cleanup_rx
,
342 .enable
= NULL
, /* mlx5i_enable */
343 .disable
= NULL
, /* mlx5i_disable */
344 .update_stats
= NULL
, /* mlx5i_update_stats */
345 .max_nch
= mlx5e_get_max_num_channels
,
346 .update_carrier
= NULL
, /* no HW update in IB link */
347 .rx_handlers
.handle_rx_cqe
= mlx5i_handle_rx_cqe
,
348 .rx_handlers
.handle_rx_cqe_mpwqe
= NULL
, /* Not supported */
349 .max_tc
= MLX5I_MAX_NUM_TC
,
352 /* mlx5i netdev NDos */
354 static int mlx5i_change_mtu(struct net_device
*netdev
, int new_mtu
)
356 struct mlx5e_priv
*priv
= mlx5i_epriv(netdev
);
357 struct mlx5e_channels new_channels
= {};
361 mutex_lock(&priv
->state_lock
);
363 curr_mtu
= netdev
->mtu
;
364 netdev
->mtu
= new_mtu
;
366 if (!test_bit(MLX5E_STATE_OPENED
, &priv
->state
))
369 new_channels
.params
= priv
->channels
.params
;
370 err
= mlx5e_open_channels(priv
, &new_channels
);
372 netdev
->mtu
= curr_mtu
;
376 mlx5e_switch_priv_channels(priv
, &new_channels
, NULL
);
379 mutex_unlock(&priv
->state_lock
);
383 int mlx5i_dev_init(struct net_device
*dev
)
385 struct mlx5e_priv
*priv
= mlx5i_epriv(dev
);
386 struct mlx5i_priv
*ipriv
= priv
->ppriv
;
388 /* Set dev address using underlay QP */
389 dev
->dev_addr
[1] = (ipriv
->qp
.qpn
>> 16) & 0xff;
390 dev
->dev_addr
[2] = (ipriv
->qp
.qpn
>> 8) & 0xff;
391 dev
->dev_addr
[3] = (ipriv
->qp
.qpn
) & 0xff;
393 /* Add QPN to net-device mapping to HT */
394 mlx5i_pkey_add_qpn(dev
,ipriv
->qp
.qpn
);
399 static int mlx5i_ioctl(struct net_device
*dev
, struct ifreq
*ifr
, int cmd
)
401 struct mlx5e_priv
*priv
= mlx5i_epriv(dev
);
405 return mlx5e_hwstamp_set(priv
, ifr
);
407 return mlx5e_hwstamp_get(priv
, ifr
);
413 void mlx5i_dev_cleanup(struct net_device
*dev
)
415 struct mlx5e_priv
*priv
= mlx5i_epriv(dev
);
416 struct mlx5i_priv
*ipriv
= priv
->ppriv
;
418 mlx5i_uninit_underlay_qp(priv
);
420 /* Delete QPN to net-device mapping from HT */
421 mlx5i_pkey_del_qpn(dev
, ipriv
->qp
.qpn
);
424 static int mlx5i_open(struct net_device
*netdev
)
426 struct mlx5e_priv
*epriv
= mlx5i_epriv(netdev
);
427 struct mlx5i_priv
*ipriv
= epriv
->ppriv
;
428 struct mlx5_core_dev
*mdev
= epriv
->mdev
;
431 mutex_lock(&epriv
->state_lock
);
433 set_bit(MLX5E_STATE_OPENED
, &epriv
->state
);
435 err
= mlx5i_init_underlay_qp(epriv
);
437 mlx5_core_warn(mdev
, "prepare underlay qp state failed, %d\n", err
);
438 goto err_clear_state_opened_flag
;
441 err
= mlx5_fs_add_rx_underlay_qpn(mdev
, ipriv
->qp
.qpn
);
443 mlx5_core_warn(mdev
, "attach underlay qp to ft failed, %d\n", err
);
447 err
= mlx5e_open_channels(epriv
, &epriv
->channels
);
449 goto err_remove_fs_underlay_qp
;
451 mlx5e_refresh_tirs(epriv
, false);
452 mlx5e_activate_priv_channels(epriv
);
453 mlx5e_timestamp_set(epriv
);
455 mutex_unlock(&epriv
->state_lock
);
458 err_remove_fs_underlay_qp
:
459 mlx5_fs_remove_rx_underlay_qpn(mdev
, ipriv
->qp
.qpn
);
461 mlx5i_uninit_underlay_qp(epriv
);
462 err_clear_state_opened_flag
:
463 clear_bit(MLX5E_STATE_OPENED
, &epriv
->state
);
464 mutex_unlock(&epriv
->state_lock
);
468 static int mlx5i_close(struct net_device
*netdev
)
470 struct mlx5e_priv
*epriv
= mlx5i_epriv(netdev
);
471 struct mlx5i_priv
*ipriv
= epriv
->ppriv
;
472 struct mlx5_core_dev
*mdev
= epriv
->mdev
;
474 /* May already be CLOSED in case a previous configuration operation
475 * (e.g RX/TX queue size change) that involves close&open failed.
477 mutex_lock(&epriv
->state_lock
);
479 if (!test_bit(MLX5E_STATE_OPENED
, &epriv
->state
))
482 clear_bit(MLX5E_STATE_OPENED
, &epriv
->state
);
484 netif_carrier_off(epriv
->netdev
);
485 mlx5_fs_remove_rx_underlay_qpn(mdev
, ipriv
->qp
.qpn
);
486 mlx5i_uninit_underlay_qp(epriv
);
487 mlx5e_deactivate_priv_channels(epriv
);
488 mlx5e_close_channels(&epriv
->channels
);;
490 mutex_unlock(&epriv
->state_lock
);
494 /* IPoIB RDMA netdev callbacks */
495 static int mlx5i_attach_mcast(struct net_device
*netdev
, struct ib_device
*hca
,
496 union ib_gid
*gid
, u16 lid
, int set_qkey
,
499 struct mlx5e_priv
*epriv
= mlx5i_epriv(netdev
);
500 struct mlx5_core_dev
*mdev
= epriv
->mdev
;
501 struct mlx5i_priv
*ipriv
= epriv
->ppriv
;
504 mlx5_core_dbg(mdev
, "attaching QPN 0x%x, MGID %pI6\n", ipriv
->qp
.qpn
, gid
->raw
);
505 err
= mlx5_core_attach_mcg(mdev
, gid
, ipriv
->qp
.qpn
);
507 mlx5_core_warn(mdev
, "failed attaching QPN 0x%x, MGID %pI6\n",
508 ipriv
->qp
.qpn
, gid
->raw
);
511 mlx5_core_dbg(mdev
, "%s setting qkey 0x%x\n",
519 static int mlx5i_detach_mcast(struct net_device
*netdev
, struct ib_device
*hca
,
520 union ib_gid
*gid
, u16 lid
)
522 struct mlx5e_priv
*epriv
= mlx5i_epriv(netdev
);
523 struct mlx5_core_dev
*mdev
= epriv
->mdev
;
524 struct mlx5i_priv
*ipriv
= epriv
->ppriv
;
527 mlx5_core_dbg(mdev
, "detaching QPN 0x%x, MGID %pI6\n", ipriv
->qp
.qpn
, gid
->raw
);
529 err
= mlx5_core_detach_mcg(mdev
, gid
, ipriv
->qp
.qpn
);
531 mlx5_core_dbg(mdev
, "failed dettaching QPN 0x%x, MGID %pI6\n",
532 ipriv
->qp
.qpn
, gid
->raw
);
537 static int mlx5i_xmit(struct net_device
*dev
, struct sk_buff
*skb
,
538 struct ib_ah
*address
, u32 dqpn
)
540 struct mlx5e_priv
*epriv
= mlx5i_epriv(dev
);
541 struct mlx5e_txqsq
*sq
= epriv
->txq2sq
[skb_get_queue_mapping(skb
)];
542 struct mlx5_ib_ah
*mah
= to_mah(address
);
543 struct mlx5i_priv
*ipriv
= epriv
->ppriv
;
545 return mlx5i_sq_xmit(sq
, skb
, &mah
->av
, dqpn
, ipriv
->qkey
);
548 static void mlx5i_set_pkey_index(struct net_device
*netdev
, int id
)
550 struct mlx5i_priv
*ipriv
= netdev_priv(netdev
);
552 ipriv
->pkey_index
= (u16
)id
;
555 static int mlx5i_check_required_hca_cap(struct mlx5_core_dev
*mdev
)
557 if (MLX5_CAP_GEN(mdev
, port_type
) != MLX5_CAP_PORT_TYPE_IB
)
560 if (!MLX5_CAP_GEN(mdev
, ipoib_enhanced_offloads
)) {
561 mlx5_core_warn(mdev
, "IPoIB enhanced offloads are not supported\n");
568 struct net_device
*mlx5_rdma_netdev_alloc(struct mlx5_core_dev
*mdev
,
569 struct ib_device
*ibdev
,
571 void (*setup
)(struct net_device
*))
573 const struct mlx5e_profile
*profile
;
574 struct net_device
*netdev
;
575 struct mlx5i_priv
*ipriv
;
576 struct mlx5e_priv
*epriv
;
577 struct rdma_netdev
*rn
;
582 if (mlx5i_check_required_hca_cap(mdev
)) {
583 mlx5_core_warn(mdev
, "Accelerated mode is not supported\n");
584 return ERR_PTR(-EOPNOTSUPP
);
587 /* TODO: Need to find a better way to check if child device*/
588 sub_interface
= (mdev
->mlx5e_res
.pdn
!= 0);
591 profile
= mlx5i_pkey_get_profile();
593 profile
= &mlx5i_nic_profile
;
595 nch
= profile
->max_nch(mdev
);
597 netdev
= alloc_netdev_mqs(sizeof(struct mlx5i_priv
) + sizeof(struct mlx5e_priv
),
598 name
, NET_NAME_UNKNOWN
,
600 nch
* MLX5E_MAX_NUM_TC
,
603 mlx5_core_warn(mdev
, "alloc_netdev_mqs failed\n");
607 ipriv
= netdev_priv(netdev
);
608 epriv
= mlx5i_epriv(netdev
);
610 epriv
->wq
= create_singlethread_workqueue("mlx5i");
612 goto err_free_netdev
;
614 ipriv
->sub_interface
= sub_interface
;
615 if (!ipriv
->sub_interface
) {
616 err
= mlx5i_pkey_qpn_ht_init(netdev
);
618 mlx5_core_warn(mdev
, "allocate qpn_to_netdev ht failed\n");
622 /* This should only be called once per mdev */
623 err
= mlx5e_create_mdev_resources(mdev
);
628 profile
->init(mdev
, netdev
, profile
, ipriv
);
630 mlx5e_attach_netdev(epriv
);
631 netif_carrier_off(netdev
);
633 /* set rdma_netdev func pointers */
636 rn
->send
= mlx5i_xmit
;
637 rn
->attach_mcast
= mlx5i_attach_mcast
;
638 rn
->detach_mcast
= mlx5i_detach_mcast
;
639 rn
->set_id
= mlx5i_set_pkey_index
;
644 mlx5i_pkey_qpn_ht_cleanup(netdev
);
646 destroy_workqueue(epriv
->wq
);
652 EXPORT_SYMBOL(mlx5_rdma_netdev_alloc
);
654 void mlx5_rdma_netdev_free(struct net_device
*netdev
)
656 struct mlx5e_priv
*priv
= mlx5i_epriv(netdev
);
657 struct mlx5i_priv
*ipriv
= priv
->ppriv
;
658 const struct mlx5e_profile
*profile
= priv
->profile
;
660 mlx5e_detach_netdev(priv
);
661 profile
->cleanup(priv
);
662 destroy_workqueue(priv
->wq
);
664 if (!ipriv
->sub_interface
) {
665 mlx5i_pkey_qpn_ht_cleanup(netdev
);
666 mlx5e_destroy_mdev_resources(priv
->mdev
);
670 EXPORT_SYMBOL(mlx5_rdma_netdev_free
);