#include <errno.h>
#include <inttypes.h>
#include <stdlib.h>
+#include <sys/socket.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include "openvswitch/hmap.h"
#include "netlink.h"
#include "netlink-protocol.h"
+#include "netnsid.h"
#include "odp-netlink.h"
#include "openvswitch/ofpbuf.h"
#include "ovs-thread.h"
-#include "poll-loop.h"
+#include "openvswitch/poll-loop.h"
#include "seq.h"
#include "socket-util.h"
#include "util.h"
static int set_sock_property(struct nl_sock *sock);
static int nl_sock_transact(struct nl_sock *sock, const struct ofpbuf *request,
struct ofpbuf **replyp);
+
+/* In the case DeviceIoControl failed and GetLastError returns with
+ * ERROR_NOT_FOUND means we lost communication with the kernel device.
+ * CloseHandle will fail because the handle in 'theory' does not exist.
+ * The only remaining option is to crash and allow the service to be restarted
+ * via service manager. This is the only way to close the handle from both
+ * userspace and kernel. */
+void
+lost_communication(DWORD last_err)
+{
+ if (last_err == ERROR_NOT_FOUND) {
+ ovs_abort(0, "lost communication with the kernel device");
+ }
+}
#endif
\f
/* Netlink sockets. */
if (!DeviceIoControl(sock->handle, OVS_IOCTL_GET_PID,
NULL, 0, &pid, sizeof(pid),
&bytes, NULL)) {
+ lost_communication(GetLastError());
retval = EINVAL;
} else {
if (bytes < sizeof(pid)) {
return 0;
}
+/* When 'enable' is true, it tries to enable 'sock' to receive netlink
+ * notifications form all network namespaces that have an nsid assigned
+ * into the network namespace where the socket has been opened. The
+ * running kernel needs to provide support for that. When 'enable' is
+ * false, it will receive netlink notifications only from the network
+ * namespace where the socket has been opened.
+ *
+ * Returns 0 if successful, otherwise a positive errno. */
+int
+nl_sock_listen_all_nsid(struct nl_sock *sock, bool enable)
+{
+ int error;
+ int val = enable ? 1 : 0;
+
+#ifndef _WIN32
+ if (setsockopt(sock->fd, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, &val,
+ sizeof val) < 0) {
+ error = errno;
+ VLOG_INFO("netlink: could not %s listening to all nsid (%s)",
+ enable ? "enable" : "disable", ovs_strerror(error));
+ return errno;
+ }
+#endif
+
+ return 0;
+}
+
#ifdef _WIN32
int
nl_sock_subscribe_packet__(struct nl_sock *sock, bool subscribe)
if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE,
msg->data, msg->size, NULL, 0,
&bytes, NULL)) {
+ lost_communication(GetLastError());
retval = -1;
/* XXX: Map to a more appropriate error based on GetLastError(). */
errno = EINVAL;
VLOG_DBG_RL(&rl, "fatal driver failure in write: %s",
- ovs_lasterror_to_string());
+ ovs_lasterror_to_string());
} else {
retval = msg->size;
}
}
static int
-nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
+nl_sock_recv__(struct nl_sock *sock, struct ofpbuf *buf, int *nsid, bool wait)
{
/* We can't accurately predict the size of the data to be received. The
* caller is supposed to have allocated enough space in 'buf' to handle the
uint8_t tail[65536];
struct iovec iov[2];
struct msghdr msg;
+ uint8_t msgctrl[64];
+ struct cmsghdr *cmsg;
ssize_t retval;
+ int *ptr;
int error;
ovs_assert(buf->allocated >= sizeof *nlmsghdr);
memset(&msg, 0, sizeof msg);
msg.msg_iov = iov;
msg.msg_iovlen = 2;
+ msg.msg_control = msgctrl;
+ msg.msg_controllen = sizeof msgctrl;
/* Receive a Netlink message from the kernel.
*
DWORD bytes;
if (!DeviceIoControl(sock->handle, sock->read_ioctl,
NULL, 0, tail, sizeof tail, &bytes, NULL)) {
+ lost_communication(GetLastError());
VLOG_DBG_RL(&rl, "fatal driver failure in transact: %s",
ovs_lasterror_to_string());
retval = -1;
}
#endif
+ if (nsid) {
+ /* The network namespace id from which the message was sent comes
+ * as ancillary data. For older kernels, this data is either not
+ * available or it might be -1, so it falls back to local network
+ * namespace (no id). Latest kernels return a valid ID only if
+ * available or nothing. */
+ netnsid_set_local(nsid);
+#ifndef _WIN32
+ cmsg = CMSG_FIRSTHDR(&msg);
+ while (cmsg != NULL) {
+ if (cmsg->cmsg_level == SOL_NETLINK
+ && cmsg->cmsg_type == NETLINK_LISTEN_ALL_NSID) {
+ ptr = ALIGNED_CAST(int *, CMSG_DATA(cmsg));
+ netnsid_set(nsid, *ptr);
+ }
+ if (cmsg->cmsg_level == SOL_SOCKET
+ && cmsg->cmsg_type == SCM_RIGHTS) {
+ /* This is unexpected and unwanted, close all fds */
+ int nfds;
+ int i;
+ nfds = (cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)))
+ / sizeof(int);
+ ptr = ALIGNED_CAST(int *, CMSG_DATA(cmsg));
+ for (i = 0; i < nfds; i++) {
+ VLOG_ERR_RL(&rl, "closing unexpected received fd (%d).",
+ ptr[i]);
+ close(ptr[i]);
+ }
+ }
+
+ cmsg = CMSG_NXTHDR(&msg, cmsg);
+ }
+#endif
+ }
+
log_nlmsg(__func__, 0, buf->data, buf->size, sock->protocol);
COVERAGE_INC(netlink_received);
/* Tries to receive a Netlink message from the kernel on 'sock' into 'buf'. If
* 'wait' is true, waits for a message to be ready. Otherwise, fails with
- * EAGAIN if the 'sock' receive buffer is empty.
+ * EAGAIN if the 'sock' receive buffer is empty. If 'nsid' is provided, the
+ * network namespace id from which the message was sent will be provided.
*
* The caller must have initialized 'buf' with an allocation of at least
* NLMSG_HDRLEN bytes. For best performance, the caller should allocate enough
* Regardless of success or failure, this function resets 'buf''s headroom to
* 0. */
int
-nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, bool wait)
+nl_sock_recv(struct nl_sock *sock, struct ofpbuf *buf, int *nsid, bool wait)
{
- return nl_sock_recv__(sock, buf, wait);
+ return nl_sock_recv__(sock, buf, nsid, wait);
}
static void
}
/* Receive a reply. */
- error = nl_sock_recv__(sock, buf_txn->reply, false);
+ error = nl_sock_recv__(sock, buf_txn->reply, NULL, false);
if (error) {
if (error == EAGAIN) {
nl_sock_record_errors__(transactions, n, 0);
}
} else if (!ret) {
/* XXX: Map to a more appropriate error. */
+ lost_communication(GetLastError());
error = EINVAL;
VLOG_DBG_RL(&rl, "fatal driver failure: %s",
ovs_lasterror_to_string());
}
if (reply_len != 0) {
+ request_nlmsg = nl_msg_nlmsghdr(txn->request);
+
if (reply_len < sizeof *reply_nlmsg) {
nl_sock_record_errors__(transactions, n, 0);
VLOG_DBG_RL(&rl, "insufficient length of reply %#"PRIu32
}
/* Validate the sequence number in the reply. */
- request_nlmsg = nl_msg_nlmsghdr(txn->request);
reply_nlmsg = (struct nlmsghdr *)reply_buf;
if (request_nlmsg->nlmsg_seq != reply_nlmsg->nlmsg_seq) {
int error;
while (!buffer->size) {
- error = nl_sock_recv__(dump->sock, buffer, false);
+ error = nl_sock_recv__(dump->sock, buffer, NULL, false);
if (error) {
/* The kernel never blocks providing the results of a dump, so
* error == EAGAIN means that we've read the whole thing, and
error = GetLastError();
/* Check if the I/O got pended */
if (error != ERROR_IO_INCOMPLETE && error != ERROR_IO_PENDING) {
+ lost_communication(error);
VLOG_ERR("nl_sock_wait failed - %s\n", ovs_format_message(error));
retval = EINVAL;
}
family_name = OVS_WIN_NETDEV_FAMILY;
family_version = OVS_WIN_NETDEV_VERSION;
family_attrmax = OVS_WIN_NETDEV_ATTR_MAX;
+ } else if (!strcmp(name, OVS_CT_LIMIT_FAMILY)) {
+ family_id = OVS_WIN_NL_CTLIMIT_FAMILY_ID;
+ family_name = OVS_CT_LIMIT_FAMILY;
+ family_version = OVS_CT_LIMIT_VERSION;
+ family_attrmax = OVS_CT_LIMIT_ATTR_MAX;
} else {
ofpbuf_delete(reply);
return EINVAL;