1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2017 6WIND S.A.
3 * Copyright 2017 Mellanox Technologies, Ltd
8 * Miscellaneous control operations for mlx4 driver.
14 #include <linux/ethtool.h>
15 #include <linux/sockios.h>
17 #include <netinet/ip.h>
23 #include <sys/ioctl.h>
24 #include <sys/socket.h>
27 /* Verbs headers do not support -pedantic. */
29 #pragma GCC diagnostic ignored "-Wpedantic"
31 #include <infiniband/verbs.h>
33 #pragma GCC diagnostic error "-Wpedantic"
36 #include <rte_bus_pci.h>
37 #include <rte_errno.h>
38 #include <rte_ethdev_driver.h>
39 #include <rte_ether.h>
42 #include <rte_string_fns.h>
45 #include "mlx4_flow.h"
46 #include "mlx4_glue.h"
47 #include "mlx4_rxtx.h"
48 #include "mlx4_utils.h"
51 * Get interface name from private structure.
54 * Pointer to private structure.
56 * Interface name output buffer.
59 * 0 on success, negative errno value otherwise and rte_errno is set.
62 mlx4_get_ifname(const struct priv
*priv
, char (*ifname
)[IF_NAMESIZE
])
66 unsigned int dev_type
= 0;
67 unsigned int dev_port_prev
= ~0u;
68 char match
[IF_NAMESIZE
] = "";
71 MKSTR(path
, "%s/device/net", priv
->ctx
->device
->ibdev_path
);
79 while ((dent
= readdir(dir
)) != NULL
) {
80 char *name
= dent
->d_name
;
82 unsigned int dev_port
;
85 if ((name
[0] == '.') &&
87 ((name
[1] == '.') && (name
[2] == '\0'))))
90 MKSTR(path
, "%s/device/net/%s/%s",
91 priv
->ctx
->device
->ibdev_path
, name
,
92 (dev_type
? "dev_id" : "dev_port"));
94 file
= fopen(path
, "rb");
99 * Switch to dev_id when dev_port does not exist as
100 * is the case with Linux kernel versions < 3.15.
111 r
= fscanf(file
, (dev_type
? "%x" : "%u"), &dev_port
);
116 * Switch to dev_id when dev_port returns the same value for
117 * all ports. May happen when using a MOFED release older than
118 * 3.0 with a Linux kernel >= 3.15.
120 if (dev_port
== dev_port_prev
)
122 dev_port_prev
= dev_port
;
123 if (dev_port
== (priv
->port
- 1u))
124 strlcpy(match
, name
, sizeof(match
));
127 if (match
[0] == '\0') {
131 strncpy(*ifname
, match
, sizeof(*ifname
));
136 * Perform ifreq ioctl() on associated Ethernet device.
139 * Pointer to private structure.
141 * Request number to pass to ioctl().
143 * Interface request structure output buffer.
146 * 0 on success, negative errno value otherwise and rte_errno is set.
149 mlx4_ifreq(const struct priv
*priv
, int req
, struct ifreq
*ifr
)
151 int sock
= socket(PF_INET
, SOCK_DGRAM
, IPPROTO_IP
);
158 ret
= mlx4_get_ifname(priv
, &ifr
->ifr_name
);
159 if (!ret
&& ioctl(sock
, req
, ifr
) == -1) {
168 * Get MAC address by querying netdevice.
171 * Pointer to private structure.
173 * MAC address output buffer.
176 * 0 on success, negative errno value otherwise and rte_errno is set.
179 mlx4_get_mac(struct priv
*priv
, uint8_t (*mac
)[ETHER_ADDR_LEN
])
181 struct ifreq request
;
182 int ret
= mlx4_ifreq(priv
, SIOCGIFHWADDR
, &request
);
186 memcpy(mac
, request
.ifr_hwaddr
.sa_data
, ETHER_ADDR_LEN
);
194 * Pointer to private structure.
196 * MTU value output buffer.
199 * 0 on success, negative errno value otherwise and rte_errno is set.
202 mlx4_mtu_get(struct priv
*priv
, uint16_t *mtu
)
204 struct ifreq request
;
205 int ret
= mlx4_ifreq(priv
, SIOCGIFMTU
, &request
);
209 *mtu
= request
.ifr_mtu
;
214 * DPDK callback to change the MTU.
217 * Pointer to Ethernet device structure.
222 * 0 on success, negative errno value otherwise and rte_errno is set.
225 mlx4_mtu_set(struct rte_eth_dev
*dev
, uint16_t mtu
)
227 struct priv
*priv
= dev
->data
->dev_private
;
228 struct ifreq request
= { .ifr_mtu
= mtu
, };
229 int ret
= mlx4_ifreq(priv
, SIOCSIFMTU
, &request
);
241 * Pointer to private structure.
243 * Bitmask for flags that must remain untouched.
245 * Bitmask for flags to modify.
248 * 0 on success, negative errno value otherwise and rte_errno is set.
251 mlx4_set_flags(struct priv
*priv
, unsigned int keep
, unsigned int flags
)
253 struct ifreq request
;
254 int ret
= mlx4_ifreq(priv
, SIOCGIFFLAGS
, &request
);
258 request
.ifr_flags
&= keep
;
259 request
.ifr_flags
|= flags
& ~keep
;
260 return mlx4_ifreq(priv
, SIOCSIFFLAGS
, &request
);
264 * Change the link state (UP / DOWN).
267 * Pointer to Ethernet device private data.
269 * Nonzero for link up, otherwise link down.
272 * 0 on success, negative errno value otherwise and rte_errno is set.
275 mlx4_dev_set_link(struct priv
*priv
, int up
)
280 err
= mlx4_set_flags(priv
, ~IFF_UP
, IFF_UP
);
284 err
= mlx4_set_flags(priv
, ~IFF_UP
, ~IFF_UP
);
292 * DPDK callback to bring the link DOWN.
295 * Pointer to Ethernet device structure.
298 * 0 on success, negative errno value otherwise and rte_errno is set.
301 mlx4_dev_set_link_down(struct rte_eth_dev
*dev
)
303 struct priv
*priv
= dev
->data
->dev_private
;
305 return mlx4_dev_set_link(priv
, 0);
309 * DPDK callback to bring the link UP.
312 * Pointer to Ethernet device structure.
315 * 0 on success, negative errno value otherwise and rte_errno is set.
318 mlx4_dev_set_link_up(struct rte_eth_dev
*dev
)
320 struct priv
*priv
= dev
->data
->dev_private
;
322 return mlx4_dev_set_link(priv
, 1);
326 * Supported Rx mode toggles.
328 * Even and odd values respectively stand for off and on.
331 RXMODE_TOGGLE_PROMISC_OFF
,
332 RXMODE_TOGGLE_PROMISC_ON
,
333 RXMODE_TOGGLE_ALLMULTI_OFF
,
334 RXMODE_TOGGLE_ALLMULTI_ON
,
338 * Helper function to toggle promiscuous and all multicast modes.
341 * Pointer to Ethernet device structure.
346 mlx4_rxmode_toggle(struct rte_eth_dev
*dev
, enum rxmode_toggle toggle
)
348 struct priv
*priv
= dev
->data
->dev_private
;
350 struct rte_flow_error error
;
353 case RXMODE_TOGGLE_PROMISC_OFF
:
354 case RXMODE_TOGGLE_PROMISC_ON
:
355 mode
= "promiscuous";
356 dev
->data
->promiscuous
= toggle
& 1;
358 case RXMODE_TOGGLE_ALLMULTI_OFF
:
359 case RXMODE_TOGGLE_ALLMULTI_ON
:
360 mode
= "all multicast";
361 dev
->data
->all_multicast
= toggle
& 1;
364 if (!mlx4_flow_sync(priv
, &error
))
366 ERROR("cannot toggle %s mode (code %d, \"%s\"),"
367 " flow error type %d, cause %p, message: %s",
368 mode
, rte_errno
, strerror(rte_errno
), error
.type
, error
.cause
,
369 error
.message
? error
.message
: "(unspecified)");
373 * DPDK callback to enable promiscuous mode.
376 * Pointer to Ethernet device structure.
379 mlx4_promiscuous_enable(struct rte_eth_dev
*dev
)
381 mlx4_rxmode_toggle(dev
, RXMODE_TOGGLE_PROMISC_ON
);
385 * DPDK callback to disable promiscuous mode.
388 * Pointer to Ethernet device structure.
391 mlx4_promiscuous_disable(struct rte_eth_dev
*dev
)
393 mlx4_rxmode_toggle(dev
, RXMODE_TOGGLE_PROMISC_OFF
);
397 * DPDK callback to enable all multicast mode.
400 * Pointer to Ethernet device structure.
403 mlx4_allmulticast_enable(struct rte_eth_dev
*dev
)
405 mlx4_rxmode_toggle(dev
, RXMODE_TOGGLE_ALLMULTI_ON
);
409 * DPDK callback to disable all multicast mode.
412 * Pointer to Ethernet device structure.
415 mlx4_allmulticast_disable(struct rte_eth_dev
*dev
)
417 mlx4_rxmode_toggle(dev
, RXMODE_TOGGLE_ALLMULTI_OFF
);
421 * DPDK callback to remove a MAC address.
424 * Pointer to Ethernet device structure.
429 mlx4_mac_addr_remove(struct rte_eth_dev
*dev
, uint32_t index
)
431 struct priv
*priv
= dev
->data
->dev_private
;
432 struct rte_flow_error error
;
434 if (index
>= RTE_DIM(priv
->mac
)) {
438 memset(&priv
->mac
[index
], 0, sizeof(priv
->mac
[index
]));
439 if (!mlx4_flow_sync(priv
, &error
))
441 ERROR("failed to synchronize flow rules after removing MAC address"
442 " at index %d (code %d, \"%s\"),"
443 " flow error type %d, cause %p, message: %s",
444 index
, rte_errno
, strerror(rte_errno
), error
.type
, error
.cause
,
445 error
.message
? error
.message
: "(unspecified)");
449 * DPDK callback to add a MAC address.
452 * Pointer to Ethernet device structure.
454 * MAC address to register.
458 * VMDq pool index to associate address with (ignored).
461 * 0 on success, negative errno value otherwise and rte_errno is set.
464 mlx4_mac_addr_add(struct rte_eth_dev
*dev
, struct ether_addr
*mac_addr
,
465 uint32_t index
, uint32_t vmdq
)
467 struct priv
*priv
= dev
->data
->dev_private
;
468 struct rte_flow_error error
;
472 if (index
>= RTE_DIM(priv
->mac
)) {
476 memcpy(&priv
->mac
[index
], mac_addr
, sizeof(priv
->mac
[index
]));
477 ret
= mlx4_flow_sync(priv
, &error
);
480 ERROR("failed to synchronize flow rules after adding MAC address"
481 " at index %d (code %d, \"%s\"),"
482 " flow error type %d, cause %p, message: %s",
483 index
, rte_errno
, strerror(rte_errno
), error
.type
, error
.cause
,
484 error
.message
? error
.message
: "(unspecified)");
489 * DPDK callback to configure a VLAN filter.
492 * Pointer to Ethernet device structure.
499 * 0 on success, negative errno value otherwise and rte_errno is set.
502 mlx4_vlan_filter_set(struct rte_eth_dev
*dev
, uint16_t vlan_id
, int on
)
504 struct priv
*priv
= dev
->data
->dev_private
;
505 struct rte_flow_error error
;
506 unsigned int vidx
= vlan_id
/ 64;
507 unsigned int vbit
= vlan_id
% 64;
511 if (vidx
>= RTE_DIM(dev
->data
->vlan_filter_conf
.ids
)) {
515 v
= &dev
->data
->vlan_filter_conf
.ids
[vidx
];
516 *v
&= ~(UINT64_C(1) << vbit
);
517 *v
|= (uint64_t)!!on
<< vbit
;
518 ret
= mlx4_flow_sync(priv
, &error
);
521 ERROR("failed to synchronize flow rules after %s VLAN filter on ID %u"
522 " (code %d, \"%s\"), "
523 " flow error type %d, cause %p, message: %s",
524 on
? "enabling" : "disabling", vlan_id
,
525 rte_errno
, strerror(rte_errno
), error
.type
, error
.cause
,
526 error
.message
? error
.message
: "(unspecified)");
531 * DPDK callback to set the primary MAC address.
534 * Pointer to Ethernet device structure.
536 * MAC address to register.
539 * 0 on success, negative errno value otherwise and rte_errno is set.
542 mlx4_mac_addr_set(struct rte_eth_dev
*dev
, struct ether_addr
*mac_addr
)
544 return mlx4_mac_addr_add(dev
, mac_addr
, 0, 0);
548 * DPDK callback to get information about the device.
551 * Pointer to Ethernet device structure.
553 * Info structure output buffer.
556 mlx4_dev_infos_get(struct rte_eth_dev
*dev
, struct rte_eth_dev_info
*info
)
558 struct priv
*priv
= dev
->data
->dev_private
;
560 char ifname
[IF_NAMESIZE
];
562 /* FIXME: we should ask the device for these values. */
563 info
->min_rx_bufsize
= 32;
564 info
->max_rx_pktlen
= 65536;
566 * Since we need one CQ per QP, the limit is the minimum number
567 * between the two values.
569 max
= ((priv
->device_attr
.max_cq
> priv
->device_attr
.max_qp
) ?
570 priv
->device_attr
.max_qp
: priv
->device_attr
.max_cq
);
571 /* If max >= 65535 then max = 0, max_rx_queues is uint16_t. */
574 info
->max_rx_queues
= max
;
575 info
->max_tx_queues
= max
;
576 info
->max_mac_addrs
= RTE_DIM(priv
->mac
);
577 info
->tx_offload_capa
= mlx4_get_tx_port_offloads(priv
);
578 info
->rx_queue_offload_capa
= mlx4_get_rx_queue_offloads(priv
);
579 info
->rx_offload_capa
= (mlx4_get_rx_port_offloads(priv
) |
580 info
->rx_queue_offload_capa
);
581 if (mlx4_get_ifname(priv
, &ifname
) == 0)
582 info
->if_index
= if_nametoindex(ifname
);
583 info
->hash_key_size
= MLX4_RSS_HASH_KEY_SIZE
;
590 info
->flow_type_rss_offloads
= mlx4_conv_rss_types(priv
, 0, 1);
594 * DPDK callback to get device statistics.
597 * Pointer to Ethernet device structure.
599 * Stats structure output buffer.
602 mlx4_stats_get(struct rte_eth_dev
*dev
, struct rte_eth_stats
*stats
)
604 struct rte_eth_stats tmp
;
608 memset(&tmp
, 0, sizeof(tmp
));
609 /* Add software counters. */
610 for (i
= 0; i
!= dev
->data
->nb_rx_queues
; ++i
) {
611 struct rxq
*rxq
= dev
->data
->rx_queues
[i
];
615 idx
= rxq
->stats
.idx
;
616 if (idx
< RTE_ETHDEV_QUEUE_STAT_CNTRS
) {
617 tmp
.q_ipackets
[idx
] += rxq
->stats
.ipackets
;
618 tmp
.q_ibytes
[idx
] += rxq
->stats
.ibytes
;
619 tmp
.q_errors
[idx
] += (rxq
->stats
.idropped
+
620 rxq
->stats
.rx_nombuf
);
622 tmp
.ipackets
+= rxq
->stats
.ipackets
;
623 tmp
.ibytes
+= rxq
->stats
.ibytes
;
624 tmp
.ierrors
+= rxq
->stats
.idropped
;
625 tmp
.rx_nombuf
+= rxq
->stats
.rx_nombuf
;
627 for (i
= 0; i
!= dev
->data
->nb_tx_queues
; ++i
) {
628 struct txq
*txq
= dev
->data
->tx_queues
[i
];
632 idx
= txq
->stats
.idx
;
633 if (idx
< RTE_ETHDEV_QUEUE_STAT_CNTRS
) {
634 tmp
.q_opackets
[idx
] += txq
->stats
.opackets
;
635 tmp
.q_obytes
[idx
] += txq
->stats
.obytes
;
636 tmp
.q_errors
[idx
] += txq
->stats
.odropped
;
638 tmp
.opackets
+= txq
->stats
.opackets
;
639 tmp
.obytes
+= txq
->stats
.obytes
;
640 tmp
.oerrors
+= txq
->stats
.odropped
;
647 * DPDK callback to clear device statistics.
650 * Pointer to Ethernet device structure.
653 mlx4_stats_reset(struct rte_eth_dev
*dev
)
657 for (i
= 0; i
!= dev
->data
->nb_rx_queues
; ++i
) {
658 struct rxq
*rxq
= dev
->data
->rx_queues
[i
];
661 rxq
->stats
= (struct mlx4_rxq_stats
){
662 .idx
= rxq
->stats
.idx
,
665 for (i
= 0; i
!= dev
->data
->nb_tx_queues
; ++i
) {
666 struct txq
*txq
= dev
->data
->tx_queues
[i
];
669 txq
->stats
= (struct mlx4_txq_stats
){
670 .idx
= txq
->stats
.idx
,
676 * DPDK callback to retrieve physical link information.
679 * Pointer to Ethernet device structure.
680 * @param wait_to_complete
681 * Wait for request completion (ignored).
684 * 0 on success, negative errno value otherwise and rte_errno is set.
687 mlx4_link_update(struct rte_eth_dev
*dev
, int wait_to_complete
)
689 const struct priv
*priv
= dev
->data
->dev_private
;
690 struct ethtool_cmd edata
= {
694 struct rte_eth_link dev_link
;
701 (void)wait_to_complete
;
702 if (mlx4_ifreq(priv
, SIOCGIFFLAGS
, &ifr
)) {
703 WARN("ioctl(SIOCGIFFLAGS) failed: %s", strerror(rte_errno
));
706 memset(&dev_link
, 0, sizeof(dev_link
));
707 dev_link
.link_status
= ((ifr
.ifr_flags
& IFF_UP
) &&
708 (ifr
.ifr_flags
& IFF_RUNNING
));
709 ifr
.ifr_data
= (void *)&edata
;
710 if (mlx4_ifreq(priv
, SIOCETHTOOL
, &ifr
)) {
711 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GSET) failed: %s",
712 strerror(rte_errno
));
715 link_speed
= ethtool_cmd_speed(&edata
);
716 if (link_speed
== -1)
717 dev_link
.link_speed
= ETH_SPEED_NUM_NONE
;
719 dev_link
.link_speed
= link_speed
;
720 dev_link
.link_duplex
= ((edata
.duplex
== DUPLEX_HALF
) ?
721 ETH_LINK_HALF_DUPLEX
: ETH_LINK_FULL_DUPLEX
);
722 dev_link
.link_autoneg
= !(dev
->data
->dev_conf
.link_speeds
&
723 ETH_LINK_SPEED_FIXED
);
724 dev
->data
->dev_link
= dev_link
;
729 * DPDK callback to get flow control status.
732 * Pointer to Ethernet device structure.
733 * @param[out] fc_conf
734 * Flow control output buffer.
737 * 0 on success, negative errno value otherwise and rte_errno is set.
740 mlx4_flow_ctrl_get(struct rte_eth_dev
*dev
, struct rte_eth_fc_conf
*fc_conf
)
742 struct priv
*priv
= dev
->data
->dev_private
;
744 struct ethtool_pauseparam ethpause
= {
745 .cmd
= ETHTOOL_GPAUSEPARAM
,
749 ifr
.ifr_data
= (void *)ðpause
;
750 if (mlx4_ifreq(priv
, SIOCETHTOOL
, &ifr
)) {
752 WARN("ioctl(SIOCETHTOOL, ETHTOOL_GPAUSEPARAM)"
754 strerror(rte_errno
));
757 fc_conf
->autoneg
= ethpause
.autoneg
;
758 if (ethpause
.rx_pause
&& ethpause
.tx_pause
)
759 fc_conf
->mode
= RTE_FC_FULL
;
760 else if (ethpause
.rx_pause
)
761 fc_conf
->mode
= RTE_FC_RX_PAUSE
;
762 else if (ethpause
.tx_pause
)
763 fc_conf
->mode
= RTE_FC_TX_PAUSE
;
765 fc_conf
->mode
= RTE_FC_NONE
;
773 * DPDK callback to modify flow control parameters.
776 * Pointer to Ethernet device structure.
778 * Flow control parameters.
781 * 0 on success, negative errno value otherwise and rte_errno is set.
784 mlx4_flow_ctrl_set(struct rte_eth_dev
*dev
, struct rte_eth_fc_conf
*fc_conf
)
786 struct priv
*priv
= dev
->data
->dev_private
;
788 struct ethtool_pauseparam ethpause
= {
789 .cmd
= ETHTOOL_SPAUSEPARAM
,
793 ifr
.ifr_data
= (void *)ðpause
;
794 ethpause
.autoneg
= fc_conf
->autoneg
;
795 if (((fc_conf
->mode
& RTE_FC_FULL
) == RTE_FC_FULL
) ||
796 (fc_conf
->mode
& RTE_FC_RX_PAUSE
))
797 ethpause
.rx_pause
= 1;
799 ethpause
.rx_pause
= 0;
800 if (((fc_conf
->mode
& RTE_FC_FULL
) == RTE_FC_FULL
) ||
801 (fc_conf
->mode
& RTE_FC_TX_PAUSE
))
802 ethpause
.tx_pause
= 1;
804 ethpause
.tx_pause
= 0;
805 if (mlx4_ifreq(priv
, SIOCETHTOOL
, &ifr
)) {
807 WARN("ioctl(SIOCETHTOOL, ETHTOOL_SPAUSEPARAM)"
809 strerror(rte_errno
));
819 * DPDK callback to retrieve the received packet types that are recognized
823 * Pointer to Ethernet device structure.
826 * Pointer to an array of recognized packet types if in Rx burst mode,
830 mlx4_dev_supported_ptypes_get(struct rte_eth_dev
*dev
)
832 static const uint32_t ptypes
[] = {
833 /* refers to rxq_cq_to_pkt_type() */
835 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
,
836 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
,
842 static const uint32_t ptypes_l2tun
[] = {
843 /* refers to rxq_cq_to_pkt_type() */
845 RTE_PTYPE_L3_IPV4_EXT_UNKNOWN
,
846 RTE_PTYPE_L3_IPV6_EXT_UNKNOWN
,
850 RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN
,
851 RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN
,
854 struct priv
*priv
= dev
->data
->dev_private
;
856 if (dev
->rx_pkt_burst
== mlx4_rx_burst
) {
857 if (priv
->hw_csum_l2tun
)
866 * Check if mlx4 device was removed.
869 * Pointer to Ethernet device structure.
872 * 1 when device is removed, otherwise 0.
875 mlx4_is_removed(struct rte_eth_dev
*dev
)
877 struct ibv_device_attr device_attr
;
878 struct priv
*priv
= dev
->data
->dev_private
;
880 if (mlx4_glue
->query_device(priv
->ctx
, &device_attr
) == EIO
)