]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/drivers/net/mlx5/mlx5_nl.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / dpdk / drivers / net / mlx5 / mlx5_nl.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2018 6WIND S.A.
3 * Copyright 2018 Mellanox Technologies, Ltd
4 */
5
6 #include <errno.h>
7 #include <linux/if_link.h>
8 #include <linux/netlink.h>
9 #include <linux/rtnetlink.h>
10 #include <net/if.h>
11 #include <rdma/rdma_netlink.h>
12 #include <stdbool.h>
13 #include <stdint.h>
14 #include <stdlib.h>
15 #include <string.h>
16 #include <sys/socket.h>
17 #include <unistd.h>
18
19 #include <rte_errno.h>
20
21 #include "mlx5.h"
22 #include "mlx5_utils.h"
23
24 /* Size of the buffer to receive kernel messages */
25 #define MLX5_NL_BUF_SIZE (32 * 1024)
26 /* Send buffer size for the Netlink socket */
27 #define MLX5_SEND_BUF_SIZE 32768
28 /* Receive buffer size for the Netlink socket */
29 #define MLX5_RECV_BUF_SIZE 32768
30
31 /*
32 * Define NDA_RTA as defined in iproute2 sources.
33 *
34 * see in iproute2 sources file include/libnetlink.h
35 */
36 #ifndef MLX5_NDA_RTA
37 #define MLX5_NDA_RTA(r) \
38 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg))))
39 #endif
40
41 /*
42 * The following definitions are normally found in rdma/rdma_netlink.h,
43 * however they are so recent that most systems do not expose them yet.
44 */
45 #ifndef HAVE_RDMA_NL_NLDEV
46 #define RDMA_NL_NLDEV 5
47 #endif
48 #ifndef HAVE_RDMA_NLDEV_CMD_GET
49 #define RDMA_NLDEV_CMD_GET 1
50 #endif
51 #ifndef HAVE_RDMA_NLDEV_CMD_PORT_GET
52 #define RDMA_NLDEV_CMD_PORT_GET 5
53 #endif
54 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_INDEX
55 #define RDMA_NLDEV_ATTR_DEV_INDEX 1
56 #endif
57 #ifndef HAVE_RDMA_NLDEV_ATTR_DEV_NAME
58 #define RDMA_NLDEV_ATTR_DEV_NAME 2
59 #endif
60 #ifndef HAVE_RDMA_NLDEV_ATTR_PORT_INDEX
61 #define RDMA_NLDEV_ATTR_PORT_INDEX 3
62 #endif
63 #ifndef HAVE_RDMA_NLDEV_ATTR_NDEV_INDEX
64 #define RDMA_NLDEV_ATTR_NDEV_INDEX 50
65 #endif
66
67 /* These are normally found in linux/if_link.h. */
68 #ifndef HAVE_IFLA_NUM_VF
69 #define IFLA_NUM_VF 21
70 #endif
71 #ifndef HAVE_IFLA_EXT_MASK
72 #define IFLA_EXT_MASK 29
73 #endif
74 #ifndef HAVE_IFLA_PHYS_SWITCH_ID
75 #define IFLA_PHYS_SWITCH_ID 36
76 #endif
77 #ifndef HAVE_IFLA_PHYS_PORT_NAME
78 #define IFLA_PHYS_PORT_NAME 38
79 #endif
80
81 /* Add/remove MAC address through Netlink */
82 struct mlx5_nl_mac_addr {
83 struct ether_addr (*mac)[];
84 /**< MAC address handled by the device. */
85 int mac_n; /**< Number of addresses in the array. */
86 };
87
88 /** Data structure used by mlx5_nl_cmdget_cb(). */
89 struct mlx5_nl_ifindex_data {
90 const char *name; /**< IB device name (in). */
91 uint32_t ibindex; /**< IB device index (out). */
92 uint32_t ifindex; /**< Network interface index (out). */
93 uint32_t portnum; /**< IB device max port number. */
94 };
95
96 /**
97 * Opens a Netlink socket.
98 *
99 * @param protocol
100 * Netlink protocol (e.g. NETLINK_ROUTE, NETLINK_RDMA).
101 *
102 * @return
103 * A file descriptor on success, a negative errno value otherwise and
104 * rte_errno is set.
105 */
106 int
107 mlx5_nl_init(int protocol)
108 {
109 int fd;
110 int sndbuf_size = MLX5_SEND_BUF_SIZE;
111 int rcvbuf_size = MLX5_RECV_BUF_SIZE;
112 struct sockaddr_nl local = {
113 .nl_family = AF_NETLINK,
114 };
115 int ret;
116
117 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
118 if (fd == -1) {
119 rte_errno = errno;
120 return -rte_errno;
121 }
122 ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int));
123 if (ret == -1) {
124 rte_errno = errno;
125 goto error;
126 }
127 ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int));
128 if (ret == -1) {
129 rte_errno = errno;
130 goto error;
131 }
132 ret = bind(fd, (struct sockaddr *)&local, sizeof(local));
133 if (ret == -1) {
134 rte_errno = errno;
135 goto error;
136 }
137 return fd;
138 error:
139 close(fd);
140 return -rte_errno;
141 }
142
143 /**
144 * Send a request message to the kernel on the Netlink socket.
145 *
146 * @param[in] nlsk_fd
147 * Netlink socket file descriptor.
148 * @param[in] nh
149 * The Netlink message send to the kernel.
150 * @param[in] ssn
151 * Sequence number.
152 * @param[in] req
153 * Pointer to the request structure.
154 * @param[in] len
155 * Length of the request in bytes.
156 *
157 * @return
158 * The number of sent bytes on success, a negative errno value otherwise and
159 * rte_errno is set.
160 */
161 static int
162 mlx5_nl_request(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn, void *req,
163 int len)
164 {
165 struct sockaddr_nl sa = {
166 .nl_family = AF_NETLINK,
167 };
168 struct iovec iov[2] = {
169 { .iov_base = nh, .iov_len = sizeof(*nh), },
170 { .iov_base = req, .iov_len = len, },
171 };
172 struct msghdr msg = {
173 .msg_name = &sa,
174 .msg_namelen = sizeof(sa),
175 .msg_iov = iov,
176 .msg_iovlen = 2,
177 };
178 int send_bytes;
179
180 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
181 nh->nlmsg_seq = sn;
182 send_bytes = sendmsg(nlsk_fd, &msg, 0);
183 if (send_bytes < 0) {
184 rte_errno = errno;
185 return -rte_errno;
186 }
187 return send_bytes;
188 }
189
190 /**
191 * Send a message to the kernel on the Netlink socket.
192 *
193 * @param[in] nlsk_fd
194 * The Netlink socket file descriptor used for communication.
195 * @param[in] nh
196 * The Netlink message send to the kernel.
197 * @param[in] sn
198 * Sequence number.
199 *
200 * @return
201 * The number of sent bytes on success, a negative errno value otherwise and
202 * rte_errno is set.
203 */
204 static int
205 mlx5_nl_send(int nlsk_fd, struct nlmsghdr *nh, uint32_t sn)
206 {
207 struct sockaddr_nl sa = {
208 .nl_family = AF_NETLINK,
209 };
210 struct iovec iov = {
211 .iov_base = nh,
212 .iov_len = nh->nlmsg_len,
213 };
214 struct msghdr msg = {
215 .msg_name = &sa,
216 .msg_namelen = sizeof(sa),
217 .msg_iov = &iov,
218 .msg_iovlen = 1,
219 };
220 int send_bytes;
221
222 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
223 nh->nlmsg_seq = sn;
224 send_bytes = sendmsg(nlsk_fd, &msg, 0);
225 if (send_bytes < 0) {
226 rte_errno = errno;
227 return -rte_errno;
228 }
229 return send_bytes;
230 }
231
232 /**
233 * Receive a message from the kernel on the Netlink socket, following
234 * mlx5_nl_send().
235 *
236 * @param[in] nlsk_fd
237 * The Netlink socket file descriptor used for communication.
238 * @param[in] sn
239 * Sequence number.
240 * @param[in] cb
241 * The callback function to call for each Netlink message received.
242 * @param[in, out] arg
243 * Custom arguments for the callback.
244 *
245 * @return
246 * 0 on success, a negative errno value otherwise and rte_errno is set.
247 */
248 static int
249 mlx5_nl_recv(int nlsk_fd, uint32_t sn, int (*cb)(struct nlmsghdr *, void *arg),
250 void *arg)
251 {
252 struct sockaddr_nl sa;
253 char buf[MLX5_RECV_BUF_SIZE];
254 struct iovec iov = {
255 .iov_base = buf,
256 .iov_len = sizeof(buf),
257 };
258 struct msghdr msg = {
259 .msg_name = &sa,
260 .msg_namelen = sizeof(sa),
261 .msg_iov = &iov,
262 /* One message at a time */
263 .msg_iovlen = 1,
264 };
265 int multipart = 0;
266 int ret = 0;
267
268 do {
269 struct nlmsghdr *nh;
270 int recv_bytes = 0;
271
272 do {
273 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
274 if (recv_bytes == -1) {
275 rte_errno = errno;
276 return -rte_errno;
277 }
278 nh = (struct nlmsghdr *)buf;
279 } while (nh->nlmsg_seq != sn);
280 for (;
281 NLMSG_OK(nh, (unsigned int)recv_bytes);
282 nh = NLMSG_NEXT(nh, recv_bytes)) {
283 if (nh->nlmsg_type == NLMSG_ERROR) {
284 struct nlmsgerr *err_data = NLMSG_DATA(nh);
285
286 if (err_data->error < 0) {
287 rte_errno = -err_data->error;
288 return -rte_errno;
289 }
290 /* Ack message. */
291 return 0;
292 }
293 /* Multi-part msgs and their trailing DONE message. */
294 if (nh->nlmsg_flags & NLM_F_MULTI) {
295 if (nh->nlmsg_type == NLMSG_DONE)
296 return 0;
297 multipart = 1;
298 }
299 if (cb) {
300 ret = cb(nh, arg);
301 if (ret < 0)
302 return ret;
303 }
304 }
305 } while (multipart);
306 return ret;
307 }
308
309 /**
310 * Parse Netlink message to retrieve the bridge MAC address.
311 *
312 * @param nh
313 * Pointer to Netlink Message Header.
314 * @param arg
315 * PMD data register with this callback.
316 *
317 * @return
318 * 0 on success, a negative errno value otherwise and rte_errno is set.
319 */
320 static int
321 mlx5_nl_mac_addr_cb(struct nlmsghdr *nh, void *arg)
322 {
323 struct mlx5_nl_mac_addr *data = arg;
324 struct ndmsg *r = NLMSG_DATA(nh);
325 struct rtattr *attribute;
326 int len;
327
328 len = nh->nlmsg_len - NLMSG_LENGTH(sizeof(*r));
329 for (attribute = MLX5_NDA_RTA(r);
330 RTA_OK(attribute, len);
331 attribute = RTA_NEXT(attribute, len)) {
332 if (attribute->rta_type == NDA_LLADDR) {
333 if (data->mac_n == MLX5_MAX_MAC_ADDRESSES) {
334 DRV_LOG(WARNING,
335 "not enough room to finalize the"
336 " request");
337 rte_errno = ENOMEM;
338 return -rte_errno;
339 }
340 #ifndef NDEBUG
341 char m[18];
342
343 ether_format_addr(m, 18, RTA_DATA(attribute));
344 DRV_LOG(DEBUG, "bridge MAC address %s", m);
345 #endif
346 memcpy(&(*data->mac)[data->mac_n++],
347 RTA_DATA(attribute), ETHER_ADDR_LEN);
348 }
349 }
350 return 0;
351 }
352
353 /**
354 * Get bridge MAC addresses.
355 *
356 * @param dev
357 * Pointer to Ethernet device.
358 * @param mac[out]
359 * Pointer to the array table of MAC addresses to fill.
360 * Its size should be of MLX5_MAX_MAC_ADDRESSES.
361 * @param mac_n[out]
362 * Number of entries filled in MAC array.
363 *
364 * @return
365 * 0 on success, a negative errno value otherwise and rte_errno is set.
366 */
367 static int
368 mlx5_nl_mac_addr_list(struct rte_eth_dev *dev, struct ether_addr (*mac)[],
369 int *mac_n)
370 {
371 struct mlx5_priv *priv = dev->data->dev_private;
372 unsigned int iface_idx = mlx5_ifindex(dev);
373 struct {
374 struct nlmsghdr hdr;
375 struct ifinfomsg ifm;
376 } req = {
377 .hdr = {
378 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
379 .nlmsg_type = RTM_GETNEIGH,
380 .nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
381 },
382 .ifm = {
383 .ifi_family = PF_BRIDGE,
384 .ifi_index = iface_idx,
385 },
386 };
387 struct mlx5_nl_mac_addr data = {
388 .mac = mac,
389 .mac_n = 0,
390 };
391 int fd;
392 int ret;
393 uint32_t sn = priv->nl_sn++;
394
395 if (priv->nl_socket_route == -1)
396 return 0;
397 fd = priv->nl_socket_route;
398 ret = mlx5_nl_request(fd, &req.hdr, sn, &req.ifm,
399 sizeof(struct ifinfomsg));
400 if (ret < 0)
401 goto error;
402 ret = mlx5_nl_recv(fd, sn, mlx5_nl_mac_addr_cb, &data);
403 if (ret < 0)
404 goto error;
405 *mac_n = data.mac_n;
406 return 0;
407 error:
408 DRV_LOG(DEBUG, "port %u cannot retrieve MAC address list %s",
409 dev->data->port_id, strerror(rte_errno));
410 return -rte_errno;
411 }
412
413 /**
414 * Modify the MAC address neighbour table with Netlink.
415 *
416 * @param dev
417 * Pointer to Ethernet device.
418 * @param mac
419 * MAC address to consider.
420 * @param add
421 * 1 to add the MAC address, 0 to remove the MAC address.
422 *
423 * @return
424 * 0 on success, a negative errno value otherwise and rte_errno is set.
425 */
426 static int
427 mlx5_nl_mac_addr_modify(struct rte_eth_dev *dev, struct ether_addr *mac,
428 int add)
429 {
430 struct mlx5_priv *priv = dev->data->dev_private;
431 unsigned int iface_idx = mlx5_ifindex(dev);
432 struct {
433 struct nlmsghdr hdr;
434 struct ndmsg ndm;
435 struct rtattr rta;
436 uint8_t buffer[ETHER_ADDR_LEN];
437 } req = {
438 .hdr = {
439 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg)),
440 .nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
441 NLM_F_EXCL | NLM_F_ACK,
442 .nlmsg_type = add ? RTM_NEWNEIGH : RTM_DELNEIGH,
443 },
444 .ndm = {
445 .ndm_family = PF_BRIDGE,
446 .ndm_state = NUD_NOARP | NUD_PERMANENT,
447 .ndm_ifindex = iface_idx,
448 .ndm_flags = NTF_SELF,
449 },
450 .rta = {
451 .rta_type = NDA_LLADDR,
452 .rta_len = RTA_LENGTH(ETHER_ADDR_LEN),
453 },
454 };
455 int fd;
456 int ret;
457 uint32_t sn = priv->nl_sn++;
458
459 if (priv->nl_socket_route == -1)
460 return 0;
461 fd = priv->nl_socket_route;
462 memcpy(RTA_DATA(&req.rta), mac, ETHER_ADDR_LEN);
463 req.hdr.nlmsg_len = NLMSG_ALIGN(req.hdr.nlmsg_len) +
464 RTA_ALIGN(req.rta.rta_len);
465 ret = mlx5_nl_send(fd, &req.hdr, sn);
466 if (ret < 0)
467 goto error;
468 ret = mlx5_nl_recv(fd, sn, NULL, NULL);
469 if (ret < 0)
470 goto error;
471 return 0;
472 error:
473 DRV_LOG(DEBUG,
474 "port %u cannot %s MAC address %02X:%02X:%02X:%02X:%02X:%02X"
475 " %s",
476 dev->data->port_id,
477 add ? "add" : "remove",
478 mac->addr_bytes[0], mac->addr_bytes[1],
479 mac->addr_bytes[2], mac->addr_bytes[3],
480 mac->addr_bytes[4], mac->addr_bytes[5],
481 strerror(rte_errno));
482 return -rte_errno;
483 }
484
485 /**
486 * Add a MAC address.
487 *
488 * @param dev
489 * Pointer to Ethernet device.
490 * @param mac
491 * MAC address to register.
492 * @param index
493 * MAC address index.
494 *
495 * @return
496 * 0 on success, a negative errno value otherwise and rte_errno is set.
497 */
498 int
499 mlx5_nl_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac,
500 uint32_t index)
501 {
502 struct mlx5_priv *priv = dev->data->dev_private;
503 int ret;
504
505 ret = mlx5_nl_mac_addr_modify(dev, mac, 1);
506 if (!ret)
507 BITFIELD_SET(priv->mac_own, index);
508 if (ret == -EEXIST)
509 return 0;
510 return ret;
511 }
512
513 /**
514 * Remove a MAC address.
515 *
516 * @param dev
517 * Pointer to Ethernet device.
518 * @param mac
519 * MAC address to remove.
520 * @param index
521 * MAC address index.
522 *
523 * @return
524 * 0 on success, a negative errno value otherwise and rte_errno is set.
525 */
526 int
527 mlx5_nl_mac_addr_remove(struct rte_eth_dev *dev, struct ether_addr *mac,
528 uint32_t index)
529 {
530 struct mlx5_priv *priv = dev->data->dev_private;
531
532 BITFIELD_RESET(priv->mac_own, index);
533 return mlx5_nl_mac_addr_modify(dev, mac, 0);
534 }
535
536 /**
537 * Synchronize Netlink bridge table to the internal table.
538 *
539 * @param dev
540 * Pointer to Ethernet device.
541 */
542 void
543 mlx5_nl_mac_addr_sync(struct rte_eth_dev *dev)
544 {
545 struct ether_addr macs[MLX5_MAX_MAC_ADDRESSES];
546 int macs_n = 0;
547 int i;
548 int ret;
549
550 ret = mlx5_nl_mac_addr_list(dev, &macs, &macs_n);
551 if (ret)
552 return;
553 for (i = 0; i != macs_n; ++i) {
554 int j;
555
556 /* Verify the address is not in the array yet. */
557 for (j = 0; j != MLX5_MAX_MAC_ADDRESSES; ++j)
558 if (is_same_ether_addr(&macs[i],
559 &dev->data->mac_addrs[j]))
560 break;
561 if (j != MLX5_MAX_MAC_ADDRESSES)
562 continue;
563 /* Find the first entry available. */
564 for (j = 0; j != MLX5_MAX_MAC_ADDRESSES; ++j) {
565 if (is_zero_ether_addr(&dev->data->mac_addrs[j])) {
566 dev->data->mac_addrs[j] = macs[i];
567 break;
568 }
569 }
570 }
571 }
572
573 /**
574 * Flush all added MAC addresses.
575 *
576 * @param dev
577 * Pointer to Ethernet device.
578 */
579 void
580 mlx5_nl_mac_addr_flush(struct rte_eth_dev *dev)
581 {
582 struct mlx5_priv *priv = dev->data->dev_private;
583 int i;
584
585 for (i = MLX5_MAX_MAC_ADDRESSES - 1; i >= 0; --i) {
586 struct ether_addr *m = &dev->data->mac_addrs[i];
587
588 if (BITFIELD_ISSET(priv->mac_own, i))
589 mlx5_nl_mac_addr_remove(dev, m, i);
590 }
591 }
592
593 /**
594 * Enable promiscuous / all multicast mode through Netlink.
595 *
596 * @param dev
597 * Pointer to Ethernet device structure.
598 * @param flags
599 * IFF_PROMISC for promiscuous, IFF_ALLMULTI for allmulti.
600 * @param enable
601 * Nonzero to enable, disable otherwise.
602 *
603 * @return
604 * 0 on success, a negative errno value otherwise and rte_errno is set.
605 */
606 static int
607 mlx5_nl_device_flags(struct rte_eth_dev *dev, uint32_t flags, int enable)
608 {
609 struct mlx5_priv *priv = dev->data->dev_private;
610 unsigned int iface_idx = mlx5_ifindex(dev);
611 struct {
612 struct nlmsghdr hdr;
613 struct ifinfomsg ifi;
614 } req = {
615 .hdr = {
616 .nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)),
617 .nlmsg_type = RTM_NEWLINK,
618 .nlmsg_flags = NLM_F_REQUEST,
619 },
620 .ifi = {
621 .ifi_flags = enable ? flags : 0,
622 .ifi_change = flags,
623 .ifi_index = iface_idx,
624 },
625 };
626 int fd;
627 int ret;
628
629 assert(!(flags & ~(IFF_PROMISC | IFF_ALLMULTI)));
630 if (priv->nl_socket_route < 0)
631 return 0;
632 fd = priv->nl_socket_route;
633 ret = mlx5_nl_send(fd, &req.hdr, priv->nl_sn++);
634 if (ret < 0)
635 return ret;
636 return 0;
637 }
638
639 /**
640 * Enable promiscuous mode through Netlink.
641 *
642 * @param dev
643 * Pointer to Ethernet device structure.
644 * @param enable
645 * Nonzero to enable, disable otherwise.
646 *
647 * @return
648 * 0 on success, a negative errno value otherwise and rte_errno is set.
649 */
650 int
651 mlx5_nl_promisc(struct rte_eth_dev *dev, int enable)
652 {
653 int ret = mlx5_nl_device_flags(dev, IFF_PROMISC, enable);
654
655 if (ret)
656 DRV_LOG(DEBUG,
657 "port %u cannot %s promisc mode: Netlink error %s",
658 dev->data->port_id, enable ? "enable" : "disable",
659 strerror(rte_errno));
660 return ret;
661 }
662
663 /**
664 * Enable all multicast mode through Netlink.
665 *
666 * @param dev
667 * Pointer to Ethernet device structure.
668 * @param enable
669 * Nonzero to enable, disable otherwise.
670 *
671 * @return
672 * 0 on success, a negative errno value otherwise and rte_errno is set.
673 */
674 int
675 mlx5_nl_allmulti(struct rte_eth_dev *dev, int enable)
676 {
677 int ret = mlx5_nl_device_flags(dev, IFF_ALLMULTI, enable);
678
679 if (ret)
680 DRV_LOG(DEBUG,
681 "port %u cannot %s allmulti mode: Netlink error %s",
682 dev->data->port_id, enable ? "enable" : "disable",
683 strerror(rte_errno));
684 return ret;
685 }
686
687 /**
688 * Process network interface information from Netlink message.
689 *
690 * @param nh
691 * Pointer to Netlink message header.
692 * @param arg
693 * Opaque data pointer for this callback.
694 *
695 * @return
696 * 0 on success, a negative errno value otherwise and rte_errno is set.
697 */
698 static int
699 mlx5_nl_cmdget_cb(struct nlmsghdr *nh, void *arg)
700 {
701 struct mlx5_nl_ifindex_data *data = arg;
702 size_t off = NLMSG_HDRLEN;
703 uint32_t ibindex = 0;
704 uint32_t ifindex = 0;
705 uint32_t portnum = 0;
706 int found = 0;
707
708 if (nh->nlmsg_type !=
709 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET) &&
710 nh->nlmsg_type !=
711 RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_PORT_GET))
712 goto error;
713 while (off < nh->nlmsg_len) {
714 struct nlattr *na = (void *)((uintptr_t)nh + off);
715 void *payload = (void *)((uintptr_t)na + NLA_HDRLEN);
716
717 if (na->nla_len > nh->nlmsg_len - off)
718 goto error;
719 switch (na->nla_type) {
720 case RDMA_NLDEV_ATTR_DEV_INDEX:
721 ibindex = *(uint32_t *)payload;
722 break;
723 case RDMA_NLDEV_ATTR_DEV_NAME:
724 if (!strcmp(payload, data->name))
725 found = 1;
726 break;
727 case RDMA_NLDEV_ATTR_NDEV_INDEX:
728 ifindex = *(uint32_t *)payload;
729 break;
730 case RDMA_NLDEV_ATTR_PORT_INDEX:
731 portnum = *(uint32_t *)payload;
732 break;
733 default:
734 break;
735 }
736 off += NLA_ALIGN(na->nla_len);
737 }
738 if (found) {
739 data->ibindex = ibindex;
740 data->ifindex = ifindex;
741 data->portnum = portnum;
742 }
743 return 0;
744 error:
745 rte_errno = EINVAL;
746 return -rte_errno;
747 }
748
749 /**
750 * Get index of network interface associated with some IB device.
751 *
752 * This is the only somewhat safe method to avoid resorting to heuristics
753 * when faced with port representors. Unfortunately it requires at least
754 * Linux 4.17.
755 *
756 * @param nl
757 * Netlink socket of the RDMA kind (NETLINK_RDMA).
758 * @param[in] name
759 * IB device name.
760 * @param[in] pindex
761 * IB device port index, starting from 1
762 * @return
763 * A valid (nonzero) interface index on success, 0 otherwise and rte_errno
764 * is set.
765 */
766 unsigned int
767 mlx5_nl_ifindex(int nl, const char *name, uint32_t pindex)
768 {
769 uint32_t seq = random();
770 struct mlx5_nl_ifindex_data data = {
771 .name = name,
772 .ibindex = 0, /* Determined during first pass. */
773 .ifindex = 0, /* Determined during second pass. */
774 };
775 union {
776 struct nlmsghdr nh;
777 uint8_t buf[NLMSG_HDRLEN +
778 NLA_HDRLEN + NLA_ALIGN(sizeof(data.ibindex)) +
779 NLA_HDRLEN + NLA_ALIGN(sizeof(pindex))];
780 } req = {
781 .nh = {
782 .nlmsg_len = NLMSG_LENGTH(0),
783 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
784 RDMA_NLDEV_CMD_GET),
785 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
786 },
787 };
788 struct nlattr *na;
789 int ret;
790
791 ret = mlx5_nl_send(nl, &req.nh, seq);
792 if (ret < 0)
793 return 0;
794 ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data);
795 if (ret < 0)
796 return 0;
797 if (!data.ibindex)
798 goto error;
799 ++seq;
800 req.nh.nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
801 RDMA_NLDEV_CMD_PORT_GET);
802 req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
803 req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.buf) - NLMSG_HDRLEN);
804 na = (void *)((uintptr_t)req.buf + NLMSG_HDRLEN);
805 na->nla_len = NLA_HDRLEN + sizeof(data.ibindex);
806 na->nla_type = RDMA_NLDEV_ATTR_DEV_INDEX;
807 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
808 &data.ibindex, sizeof(data.ibindex));
809 na = (void *)((uintptr_t)na + NLA_ALIGN(na->nla_len));
810 na->nla_len = NLA_HDRLEN + sizeof(pindex);
811 na->nla_type = RDMA_NLDEV_ATTR_PORT_INDEX;
812 memcpy((void *)((uintptr_t)na + NLA_HDRLEN),
813 &pindex, sizeof(pindex));
814 ret = mlx5_nl_send(nl, &req.nh, seq);
815 if (ret < 0)
816 return 0;
817 ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data);
818 if (ret < 0)
819 return 0;
820 if (!data.ifindex)
821 goto error;
822 return data.ifindex;
823 error:
824 rte_errno = ENODEV;
825 return 0;
826 }
827
828 /**
829 * Get the number of physical ports of given IB device.
830 *
831 * @param nl
832 * Netlink socket of the RDMA kind (NETLINK_RDMA).
833 * @param[in] name
834 * IB device name.
835 *
836 * @return
837 * A valid (nonzero) number of ports on success, 0 otherwise
838 * and rte_errno is set.
839 */
840 unsigned int
841 mlx5_nl_portnum(int nl, const char *name)
842 {
843 uint32_t seq = random();
844 struct mlx5_nl_ifindex_data data = {
845 .name = name,
846 .ibindex = 0,
847 .ifindex = 0,
848 .portnum = 0,
849 };
850 struct nlmsghdr req = {
851 .nlmsg_len = NLMSG_LENGTH(0),
852 .nlmsg_type = RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
853 RDMA_NLDEV_CMD_GET),
854 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
855 };
856 int ret;
857
858 ret = mlx5_nl_send(nl, &req, seq);
859 if (ret < 0)
860 return 0;
861 ret = mlx5_nl_recv(nl, seq, mlx5_nl_cmdget_cb, &data);
862 if (ret < 0)
863 return 0;
864 if (!data.ibindex) {
865 rte_errno = ENODEV;
866 return 0;
867 }
868 if (!data.portnum)
869 rte_errno = EINVAL;
870 return data.portnum;
871 }
872
873 /**
874 * Process switch information from Netlink message.
875 *
876 * @param nh
877 * Pointer to Netlink message header.
878 * @param arg
879 * Opaque data pointer for this callback.
880 *
881 * @return
882 * 0 on success, a negative errno value otherwise and rte_errno is set.
883 */
884 static int
885 mlx5_nl_switch_info_cb(struct nlmsghdr *nh, void *arg)
886 {
887 struct mlx5_switch_info info = {
888 .master = 0,
889 .representor = 0,
890 .name_type = MLX5_PHYS_PORT_NAME_TYPE_NOTSET,
891 .port_name = 0,
892 .switch_id = 0,
893 };
894 size_t off = NLMSG_LENGTH(sizeof(struct ifinfomsg));
895 bool switch_id_set = false;
896 bool num_vf_set = false;
897
898 if (nh->nlmsg_type != RTM_NEWLINK)
899 goto error;
900 while (off < nh->nlmsg_len) {
901 struct rtattr *ra = (void *)((uintptr_t)nh + off);
902 void *payload = RTA_DATA(ra);
903 unsigned int i;
904
905 if (ra->rta_len > nh->nlmsg_len - off)
906 goto error;
907 switch (ra->rta_type) {
908 case IFLA_NUM_VF:
909 num_vf_set = true;
910 break;
911 case IFLA_PHYS_PORT_NAME:
912 mlx5_translate_port_name((char *)payload, &info);
913 break;
914 case IFLA_PHYS_SWITCH_ID:
915 info.switch_id = 0;
916 for (i = 0; i < RTA_PAYLOAD(ra); ++i) {
917 info.switch_id <<= 8;
918 info.switch_id |= ((uint8_t *)payload)[i];
919 }
920 switch_id_set = true;
921 break;
922 }
923 off += RTA_ALIGN(ra->rta_len);
924 }
925 if (switch_id_set) {
926 /* We have some E-Switch configuration. */
927 mlx5_nl_check_switch_info(num_vf_set, &info);
928 }
929 assert(!(info.master && info.representor));
930 memcpy(arg, &info, sizeof(info));
931 return 0;
932 error:
933 rte_errno = EINVAL;
934 return -rte_errno;
935 }
936
937 /**
938 * Get switch information associated with network interface.
939 *
940 * @param nl
941 * Netlink socket of the ROUTE kind (NETLINK_ROUTE).
942 * @param ifindex
943 * Network interface index.
944 * @param[out] info
945 * Switch information object, populated in case of success.
946 *
947 * @return
948 * 0 on success, a negative errno value otherwise and rte_errno is set.
949 */
950 int
951 mlx5_nl_switch_info(int nl, unsigned int ifindex, struct mlx5_switch_info *info)
952 {
953 uint32_t seq = random();
954 struct {
955 struct nlmsghdr nh;
956 struct ifinfomsg info;
957 struct rtattr rta;
958 uint32_t extmask;
959 } req = {
960 .nh = {
961 .nlmsg_len = NLMSG_LENGTH
962 (sizeof(req.info) +
963 RTA_LENGTH(sizeof(uint32_t))),
964 .nlmsg_type = RTM_GETLINK,
965 .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
966 },
967 .info = {
968 .ifi_family = AF_UNSPEC,
969 .ifi_index = ifindex,
970 },
971 .rta = {
972 .rta_type = IFLA_EXT_MASK,
973 .rta_len = RTA_LENGTH(sizeof(int32_t)),
974 },
975 .extmask = RTE_LE32(1),
976 };
977 int ret;
978
979 ret = mlx5_nl_send(nl, &req.nh, seq);
980 if (ret >= 0)
981 ret = mlx5_nl_recv(nl, seq, mlx5_nl_switch_info_cb, info);
982 if (info->master && info->representor) {
983 DRV_LOG(ERR, "ifindex %u device is recognized as master"
984 " and as representor", ifindex);
985 rte_errno = ENODEV;
986 ret = -rte_errno;
987 }
988 return ret;
989 }