#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
+#include <stdbool.h>
#include <rte_malloc.h>
#include <tap_netlink.h>
#include <rte_random.h>
+
#include "tap_log.h"
+/* Compatibility with glibc < 2.24 */
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
/* Must be quite large to support dumping a huge list of QDISC or filters. */
#define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
#define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
.nl_family = AF_NETLINK,
.nl_groups = nl_groups,
};
+#ifdef NETLINK_EXT_ACK
+ int one = 1;
+#endif
fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (fd < 0) {
close(fd);
return -1;
}
+
+#ifdef NETLINK_EXT_ACK
+ /* Ask for extended ACK response. on older kernel will ignore request. */
+ setsockopt(fd, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one));
+#endif
+
if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
TAP_LOG(ERR, "Unable to bind to the netlink socket");
close(fd);
int
tap_nl_send(int nlsk_fd, struct nlmsghdr *nh)
{
- /* man 7 netlink EXAMPLE */
- struct sockaddr_nl sa = {
- .nl_family = AF_NETLINK,
- };
- struct iovec iov = {
- .iov_base = nh,
- .iov_len = nh->nlmsg_len,
- };
- struct msghdr msg = {
- .msg_name = &sa,
- .msg_namelen = sizeof(sa),
- .msg_iov = &iov,
- .msg_iovlen = 1,
- };
int send_bytes;
nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
nh->nlmsg_seq = (uint32_t)rte_rand();
- send_bytes = sendmsg(nlsk_fd, &msg, 0);
+
+retry:
+ send_bytes = send(nlsk_fd, nh, nh->nlmsg_len, 0);
if (send_bytes < 0) {
+ if (errno == EINTR)
+ goto retry;
+
TAP_LOG(ERR, "Failed to send netlink message: %s (%d)",
strerror(errno), errno);
return -1;
return send_bytes;
}
+#ifdef NETLINK_EXT_ACK
+static const struct nlattr *
+tap_nl_attr_first(const struct nlmsghdr *nh, size_t offset)
+{
+ return (const struct nlattr *)((const char *)nh + NLMSG_SPACE(offset));
+}
+
+static const struct nlattr *
+tap_nl_attr_next(const struct nlattr *attr)
+{
+ return (const struct nlattr *)((const char *)attr
+ + NLMSG_ALIGN(attr->nla_len));
+}
+
+static bool
+tap_nl_attr_ok(const struct nlattr *attr, int len)
+{
+ if (len < (int)sizeof(struct nlattr))
+ return false; /* missing header */
+ if (attr->nla_len < sizeof(struct nlattr))
+ return false; /* attribute length should include itself */
+ if ((int)attr->nla_len > len)
+ return false; /* attribute is truncated */
+ return true;
+}
+
+
+/* Decode extended errors from kernel */
+static void
+tap_nl_dump_ext_ack(const struct nlmsghdr *nh, const struct nlmsgerr *err)
+{
+ const struct nlattr *attr;
+ const char *tail = (const char *)nh + NLMSG_ALIGN(nh->nlmsg_len);
+ size_t hlen = sizeof(*err);
+
+ /* no TLVs, no extended response */
+ if (!(nh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return;
+
+ if (!(nh->nlmsg_flags & NLM_F_CAPPED))
+ hlen += err->msg.nlmsg_len - NLMSG_HDRLEN;
+
+ for (attr = tap_nl_attr_first(nh, hlen);
+ tap_nl_attr_ok(attr, tail - (const char *)attr);
+ attr = tap_nl_attr_next(attr)) {
+ uint16_t type = attr->nla_type & NLA_TYPE_MASK;
+
+ if (type == NLMSGERR_ATTR_MSG) {
+ const char *msg = (const char *)attr
+ + NLMSG_ALIGN(sizeof(*attr));
+
+ if (err->error)
+ TAP_LOG(ERR, "%s", msg);
+ else
+
+ TAP_LOG(WARNING, "%s", msg);
+ break;
+ }
+ }
+}
+#else
+/*
+ * External ACK support was added in Linux kernel 4.17
+ * on older kernels, just ignore that part of message
+ */
+#define tap_nl_dump_ext_ack(nh, err) do { } while (0)
+#endif
+
/**
* Check that the kernel sends an appropriate ACK in response
* to an tap_nl_send().
int
tap_nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
{
- /* man 7 netlink EXAMPLE */
- struct sockaddr_nl sa;
char buf[BUF_SIZE];
- struct iovec iov = {
- .iov_base = buf,
- .iov_len = sizeof(buf),
- };
- struct msghdr msg = {
- .msg_name = &sa,
- .msg_namelen = sizeof(sa),
- .msg_iov = &iov,
- /* One message at a time */
- .msg_iovlen = 1,
- };
int multipart = 0;
int ret = 0;
do {
struct nlmsghdr *nh;
- int recv_bytes = 0;
+ int recv_bytes;
- recv_bytes = recvmsg(nlsk_fd, &msg, 0);
- if (recv_bytes < 0)
+retry:
+ recv_bytes = recv(nlsk_fd, buf, sizeof(buf), 0);
+ if (recv_bytes < 0) {
+ if (errno == EINTR)
+ goto retry;
return -1;
+ }
+
for (nh = (struct nlmsghdr *)buf;
NLMSG_OK(nh, (unsigned int)recv_bytes);
nh = NLMSG_NEXT(nh, recv_bytes)) {
if (nh->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *err_data = NLMSG_DATA(nh);
+ tap_nl_dump_ext_ack(nh, err_data);
if (err_data->error < 0) {
errno = -err_data->error;
return -1;