From a86bd14ec97270fde85a06ce702cdd9ef0d038ca Mon Sep 17 00:00:00 2001 From: Flavio Leitner Date: Thu, 29 Mar 2018 23:05:26 -0300 Subject: [PATCH] netlink: provide network namespace id from a msg. The netlink notification's ancillary data contains the network namespace id (netnsid) needed to identify the device correctly. Signed-off-by: Flavio Leitner Signed-off-by: Ben Pfaff --- configure.ac | 3 +- lib/automake.mk | 1 + lib/dpif-netlink.c | 6 +- lib/netdev-linux.c | 2 +- lib/netlink-notifier.c | 2 +- lib/netlink-socket.c | 53 ++++++++++++++-- lib/netlink-socket.h | 2 +- lib/netnsid.h | 139 +++++++++++++++++++++++++++++++++++++++++ utilities/nlmon.c | 2 +- 9 files changed, 196 insertions(+), 14 deletions(-) create mode 100644 lib/netnsid.h diff --git a/configure.ac b/configure.ac index 9940a1a45..f0e4b5127 100644 --- a/configure.ac +++ b/configure.ac @@ -108,7 +108,8 @@ AC_CHECK_MEMBERS([struct sockaddr_in6.sin6_scope_id], [], [], #include #include ]]) AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r sendmmsg clock_gettime]) -AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h stdatomic.h]) +AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h]) +AC_CHECK_HEADERS([linux/net_namespace.h stdatomic.h]) AC_CHECK_HEADERS([net/if_mib.h], [], [], [[#include #include ]]) diff --git a/lib/automake.mk b/lib/automake.mk index c7eda6e31..dcf90899f 100644 --- a/lib/automake.mk +++ b/lib/automake.mk @@ -143,6 +143,7 @@ lib_libopenvswitch_la_SOURCES = \ lib/netflow.h \ lib/netlink.c \ lib/netlink.h \ + lib/netnsid.h \ lib/nx-match.c \ lib/nx-match.h \ lib/object-collection.c \ diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c index 8543a2bbe..2ba05d7b1 100644 --- a/lib/dpif-netlink.c +++ b/lib/dpif-netlink.c @@ -1288,7 +1288,7 @@ dpif_netlink_port_poll(const struct dpif *dpif_, char **devnamep) int error; ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); - error = nl_sock_recv(dpif->port_notifier, &buf, false); + error = nl_sock_recv(dpif->port_notifier, &buf, NULL, false); if (!error) { struct dpif_netlink_vport vport; @@ -2622,7 +2622,7 @@ dpif_netlink_recv_windows(struct dpif_netlink *dpif, uint32_t handler_id, return EAGAIN; } - error = nl_sock_recv(sock_pool[i].nl_sock, buf, false); + error = nl_sock_recv(sock_pool[i].nl_sock, buf, NULL, false); if (error == ENOBUFS) { /* ENOBUFS typically means that we've received so many * packets that the buffer overflowed. Try again @@ -2697,7 +2697,7 @@ dpif_netlink_recv__(struct dpif_netlink *dpif, uint32_t handler_id, return EAGAIN; } - error = nl_sock_recv(ch->sock, buf, false); + error = nl_sock_recv(ch->sock, buf, NULL, false); if (error == ENOBUFS) { /* ENOBUFS typically means that we've received so many * packets that the buffer overflowed. Try again diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 7ea40a891..1f95c6d1e 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -639,7 +639,7 @@ netdev_linux_run(const struct netdev_class *netdev_class OVS_UNUSED) struct ofpbuf buf; ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); - error = nl_sock_recv(sock, &buf, false); + error = nl_sock_recv(sock, &buf, NULL, false); if (!error) { struct rtnetlink_change change; diff --git a/lib/netlink-notifier.c b/lib/netlink-notifier.c index 7d8cfffa2..dfecb9778 100644 --- a/lib/netlink-notifier.c +++ b/lib/netlink-notifier.c @@ -185,7 +185,7 @@ nln_run(struct nln *nln) int error; ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); - error = nl_sock_recv(nln->notify_sock, &buf, false); + error = nl_sock_recv(nln->notify_sock, &buf, NULL, false); if (!error) { int group = nln->parse(&buf, nln->change); diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c index 317bf907f..f68ca860d 100644 --- a/lib/netlink-socket.c +++ b/lib/netlink-socket.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -28,6 +29,7 @@ #include "openvswitch/hmap.h" #include "netlink.h" #include "netlink-protocol.h" +#include "netnsid.h" #include "odp-netlink.h" #include "openvswitch/ofpbuf.h" #include "ovs-thread.h" @@ -607,7 +609,7 @@ nl_sock_send_seq(struct nl_sock *sock, const struct ofpbuf *msg, } static int -nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) +nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, int *nsid, bool wait) { /* We can't accurately predict the size of the data to be received. The * caller is supposed to have allocated enough space in 'buf' to handle the @@ -618,7 +620,10 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) uint8_t tail[65536]; struct iovec iov[2]; struct msghdr msg; + uint8_t msgctrl[64]; + struct cmsghdr *cmsg; ssize_t retval; + int *ptr; int error; ovs_assert(buf->allocated >= sizeof *nlmsghdr); @@ -632,6 +637,8 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) memset(&msg, 0, sizeof msg); msg.msg_iov = iov; msg.msg_iovlen = 2; + msg.msg_control = msgctrl; + msg.msg_controllen = sizeof msgctrl; /* Receive a Netlink message from the kernel. * @@ -706,6 +713,39 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) } #endif + if (nsid) { + /* The network namespace id from which the message was sent comes + * as ancillary data. For older kernels, this data is either not + * available or it might be -1, so it falls back to local network + * namespace (no id). Latest kernels return a valid ID only if + * available or nothing. */ + netnsid_set_local(nsid); + cmsg = CMSG_FIRSTHDR(&msg); + while (cmsg != NULL) { + if (cmsg->cmsg_level == SOL_NETLINK + && cmsg->cmsg_type == NETLINK_LISTEN_ALL_NSID) { + ptr = ALIGNED_CAST(int *, CMSG_DATA(cmsg)); + netnsid_set(nsid, *ptr); + } + if (cmsg->cmsg_level == SOL_SOCKET + && cmsg->cmsg_type == SCM_RIGHTS) { + /* This is unexpected and unwanted, close all fds */ + int nfds; + int i; + nfds = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr))) + / sizeof(int); + ptr = ALIGNED_CAST(int *, CMSG_DATA(cmsg)); + for (i = 0; i < nfds; i++) { + VLOG_ERR_RL(&rl, "closing unexpected received fd (%d).", + ptr[i]); + close(ptr[i]); + } + } + + cmsg = CMSG_NXTHDR(&msg, cmsg); + } + } + log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol); COVERAGE_INC(netlink_received); @@ -714,7 +754,8 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) /* Tries to receive a Netlink message from the kernel on 'sock' into 'buf'. If * 'wait' is true, waits for a message to be ready. Otherwise, fails with - * EAGAIN if the 'sock' receive buffer is empty. + * EAGAIN if the 'sock' receive buffer is empty. If 'nsid' is provided, the + * network namespace id from which the message was sent will be provided. * * The caller must have initialized 'buf' with an allocation of at least * NLMSG_HDRLEN bytes. For best performance, the caller should allocate enough @@ -730,9 +771,9 @@ nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait) * Regardless of success or failure, this function resets 'buf''s headroom to * 0. */ int -nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait) +nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, int *nsid, bool wait) { - return nl_sock_recv__(sock, buf, wait); + return nl_sock_recv__(sock, buf, nsid, wait); } static void @@ -821,7 +862,7 @@ nl_sock_transact_multiple__(struct nl_sock *sock, } /* Receive a reply. */ - error = nl_sock_recv__(sock, buf_txn->reply, false); + error = nl_sock_recv__(sock, buf_txn->reply, NULL, false); if (error) { if (error == EAGAIN) { nl_sock_record_errors__(transactions, n, 0); @@ -1101,7 +1142,7 @@ nl_dump_refill(struct nl_dump *dump, struct ofpbuf *buffer) int error; while (!buffer->size) { - error = nl_sock_recv__(dump->sock, buffer, false); + error = nl_sock_recv__(dump->sock, buffer, NULL, false); if (error) { /* The kernel never blocks providing the results of a dump, so * error == EAGAIN means that we've read the whole thing, and diff --git a/lib/netlink-socket.h b/lib/netlink-socket.h index d3cc64288..98f6554fa 100644 --- a/lib/netlink-socket.h +++ b/lib/netlink-socket.h @@ -221,7 +221,7 @@ int nl_sock_unsubscribe_packets(struct nl_sock *sock); int nl_sock_send(struct nl_sock *, const struct ofpbuf *, bool wait); int nl_sock_send_seq(struct nl_sock *, const struct ofpbuf *, uint32_t nlmsg_seq, bool wait); -int nl_sock_recv(struct nl_sock *, struct ofpbuf *, bool wait); +int nl_sock_recv(struct nl_sock *, struct ofpbuf *, int *nsid, bool wait); int nl_sock_drain(struct nl_sock *); diff --git a/lib/netnsid.h b/lib/netnsid.h new file mode 100644 index 000000000..1d5ab83c5 --- /dev/null +++ b/lib/netnsid.h @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2017 Red Hat Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef NETNSID_H +#define NETNSID_H 1 + +#include + +#ifdef HAVE_LINUX_NET_NAMESPACE_H +#include +#endif + +/* + * The network namespace ID is a positive number that identifies the namespace + * which the netlink message was sent. It is used to identify if a received + * message belongs to a port attached to the bridge. + * + * There are three port states listed below: + * UNSET: A port in this state means that it could be either in same network + * namespace as the daemon (LOCAL) or in another namespace (ID). Any operation + * on a port in this state that requires the ID will trigger a query to the + * kernel to find out in which namespace the port currently is. + * + * LOCAL: A port in this state means that it is in the same network namespace + * as the daemons. + * + * ID: A port that is not LOCAL and not UNSET has a valid positive (zero + * included) remote namespace ID. + * + * Possible state changes: + * + * Initial port's state: UNSET. + * + * UNSET -> LOCAL: The daemon queries the kernel and finds that it's in the + * same network namespace as the daemon or the API is not available (older + * kernels). + * + * LOCAL -> UNSET: The kernel sends a deregistering netlink message which + * unsets the port. It happens when the port is removed (or moved to another + * network namespace). + * + * UNSET -> ID: The daemon queries the kernel and finds that the port is + * in a specific network namespace with ID assigned. + * + * ID -> UNSET: When it receives a deregistering netlink message from that + * namespace indicating the device is being removed (or moved to another + * network namespace). + */ + +#ifdef NETNSA_NSID_NOT_ASSIGNED +#define NETNSID_LOCAL NETNSA_NSID_NOT_ASSIGNED +#else +#define NETNSID_LOCAL -1 +#endif +#define NETNSID_UNSET (NETNSID_LOCAL - 1) + +/* Prototypes */ +static inline void netnsid_set_local(int *nsid); +static inline bool netnsid_is_local(int nsid); +static inline void netnsid_unset(int *nsid); +static inline bool netnsid_is_unset(int nsid); +static inline bool netnsid_is_remote(int nsid); +static inline void netnsid_set(int *nsid, int id); +static inline bool netnsid_eq(int nsid1, int nsid2); + +/* Functions */ +static inline void +netnsid_set_local(int *nsid) +{ + *nsid = NETNSID_LOCAL; +} + +static inline bool +netnsid_is_local(int nsid) +{ + return nsid == NETNSID_LOCAL; +} + +static inline void +netnsid_unset(int *nsid) +{ + *nsid = NETNSID_UNSET; +} + +static inline bool +netnsid_is_unset(int nsid) +{ + return nsid == NETNSID_UNSET; +} + +static inline bool +netnsid_is_remote(int nsid) +{ + if (netnsid_is_unset(nsid) || netnsid_is_local(nsid)) { + return false; + } + + return true; +} + +static inline void +netnsid_set(int *nsid, int id) +{ + /* The kernel only sends positive numbers for valid IDs. */ + if (id != NETNSID_LOCAL) { + ovs_assert(id >= 0); + } + + *nsid = id; +} + +static inline bool +netnsid_eq(int nsid1, int nsid2) +{ + if (netnsid_is_unset(nsid1) || netnsid_is_unset(nsid2)) { + return false; + } + + if (nsid1 == nsid2) { + return true; + } + + return false; +} + +#endif diff --git a/utilities/nlmon.c b/utilities/nlmon.c index b91fa09b3..d38a70b6f 100644 --- a/utilities/nlmon.c +++ b/utilities/nlmon.c @@ -59,7 +59,7 @@ main(int argc OVS_UNUSED, char *argv[]) ofpbuf_use_stub(&buf, buf_stub, sizeof buf_stub); for (;;) { - error = nl_sock_recv(sock, &buf, false); + error = nl_sock_recv(sock, &buf, NULL, false); if (error == EAGAIN) { /* Nothing to do. */ } else if (error == ENOBUFS) { -- 2.39.2