#include "table.h"
#include "rib.h"
#include "privs.h"
+#include "vrf.h"
#include "zebra/interface.h"
#include "zebra/zserv.h"
#include "zebra/debug.h"
#include "zebra/kernel_socket.h"
+#include "zebra/rib.h"
extern struct zebra_privs_t zserv_privs;
-extern struct zebra_t zebrad;
/*
- * Given a sockaddr length, round it up to include pad bytes following
- * it. Assumes the kernel pads to sizeof(long).
+ * Historically, the BSD routing socket has aligned data following a
+ * struct sockaddr to sizeof(long), which was 4 bytes on some
+ * platforms, and 8 bytes on others. NetBSD 6 changed the routing
+ * socket to align to sizeof(uint64_t), which is 8 bytes. OS X
+ * appears to align to sizeof(int), which is 4 bytes.
*
- * XXX: why is ROUNDUP(0) sizeof(long)? 0 is an illegal sockaddr
- * length anyway (< sizeof (struct sockaddr)), so this shouldn't
- * matter.
+ * Alignment of zero-sized sockaddrs is nonsensical, but historically
+ * BSD defines RT_ROUNDUP(0) to be the alignment interval (rather than
+ * 0). We follow this practice without questioning it, but it is a
+ * bug if quagga calls ROUNDUP with 0.
*/
+
+/*
+ * Because of these varying conventions, the only sane approach is for
+ * the <net/route.h> header to define some flavor of ROUNDUP macro.
+ */
+
+#if defined(SA_SIZE)
+/* SAROUNDUP is the only thing we need, and SA_SIZE provides that */
+#define SAROUNDUP(a) SA_SIZE(a)
+#else /* !SA_SIZE */
+
+#if defined(RT_ROUNDUP)
+#define ROUNDUP(a) RT_ROUNDUP(a)
+#endif /* defined(RT_ROUNDUP) */
+
+#if defined(SUNOS_5)
+/* Solaris has struct sockaddr_in[6] definitions at 16 / 32 bytes size,
+ * so the whole concept doesn't really apply. */
+#define ROUNDUP(a) (a)
+#endif
+
+/*
+ * If ROUNDUP has not yet been defined in terms of platform-provided
+ * defines, attempt to cope with heuristics.
+ */
+#if !defined(ROUNDUP)
+
+/*
+ * It's a bug for a platform not to define rounding/alignment for
+ * sockaddrs on the routing socket. This warning really is
+ * intentional, to provoke filing bug reports with operating systems
+ * that don't define RT_ROUNDUP or equivalent.
+ */
+#warning "net/route.h does not define RT_ROUNDUP; making unwarranted assumptions!"
+
+/* OS X (Xcode as of 2014-12) is known not to define RT_ROUNDUP */
+#ifdef __APPLE__
+#define ROUNDUP_TYPE int
+#else
+#define ROUNDUP_TYPE long
+#endif
+
#define ROUNDUP(a) \
- ((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
+ ((a) > 0 ? (1 + (((a) - 1) | (sizeof(ROUNDUP_TYPE) - 1))) : sizeof(ROUNDUP_TYPE))
+
+#endif /* defined(ROUNDUP) */
/*
* Given a pointer (sockaddr or void *), return the number of bytes
ROUNDUP(sizeof(struct sockaddr_dl)) : sizeof(struct sockaddr)))
#endif /* HAVE_STRUCT_SOCKADDR_SA_LEN */
-/* We use an additional pointer in following, pdest, rather than (DEST)
- * directly, because gcc will warn if the macro is expanded and DEST is NULL,
- * complaining that memcpy is being passed a NULL value, despite the fact
- * the if (NULL) makes it impossible.
+#endif /* !SA_SIZE */
+
+/*
+ * We use a call to an inline function to copy (PNT) to (DEST)
+ * 1. Calculating the length of the copy requires an #ifdef to determine
+ * if sa_len is a field and can't be used directly inside a #define
+ * 2. So the compiler doesn't complain when DEST is NULL, which is only true
+ * when we are skipping the copy and incrementing to the next SA
*/
+static inline void
+rta_copy (union sockunion *dest, caddr_t src) {
+ int len;
+ if (!dest)
+ return;
+#ifdef HAVE_STRUCT_SOCKADDR_SA_LEN
+ len = (((struct sockaddr *)src)->sa_len > sizeof (*dest)) ?
+ sizeof (*dest) : ((struct sockaddr *)src)->sa_len ;
+#else
+ len = (SAROUNDUP (src) > sizeof (*dest)) ?
+ sizeof (*dest) : SAROUNDUP (src) ;
+#endif
+ memcpy (dest, src, len);
+}
+
#define RTA_ADDR_GET(DEST, RTA, RTMADDRS, PNT) \
if ((RTMADDRS) & (RTA)) \
{ \
- void *pdest = (DEST); \
int len = SAROUNDUP ((PNT)); \
- if ( ((DEST) != NULL) && \
- af_check (((struct sockaddr *)(PNT))->sa_family)) \
- memcpy (pdest, (PNT), len); \
+ if (af_check (((struct sockaddr *)(PNT))->sa_family)) \
+ rta_copy((DEST), (PNT)); \
(PNT) += len; \
}
#define RTA_ATTR_GET(DEST, RTA, RTMADDRS, PNT) \
if ((RTMADDRS) & (RTA)) \
{ \
- void *pdest = (DEST); \
int len = SAROUNDUP ((PNT)); \
- if ((DEST) != NULL) \
- memcpy (pdest, (PNT), len); \
+ rta_copy((DEST), (PNT)); \
(PNT) += len; \
}
(LEN) = 0; \
}
/* Routing socket message types. */
-struct message rtm_type_str[] =
+const struct message rtm_type_str[] =
{
{RTM_ADD, "RTM_ADD"},
{RTM_DELETE, "RTM_DELETE"},
#endif /* RTM_IFANNOUNCE */
{0, NULL}
};
-int rtm_type_str_max = sizeof (rtm_type_str) / sizeof (struct message) - 1;
-struct message rtm_flag_str[] =
+static const struct message rtm_flag_str[] =
{
{RTF_UP, "UP"},
{RTF_GATEWAY, "GATEWAY"},
#ifdef RTF_MASK
{RTF_MASK, "MASK"},
#endif /* RTF_MASK */
+#ifdef RTF_CLONING
{RTF_CLONING, "CLONING"},
+#endif /* RTF_CLONING */
{RTF_XRESOLVE, "XRESOLVE"},
+#ifdef RTF_LLINFO
{RTF_LLINFO, "LLINFO"},
+#endif /* RTF_LLINFO */
{RTF_STATIC, "STATIC"},
{RTF_BLACKHOLE, "BLACKHOLE"},
#ifdef RTF_PRIVATE
/* #define DEBUG */
/* Supported address family check. */
-static int inline
+static inline int
af_check (int family)
{
if (family == AF_INET)
#endif /* HAVE_IPV6 */
return 0;
}
-\f
+
/* Dump routing table flag for debug purpose. */
static void
rtm_flag_dump (int flag)
{
- struct message *mes;
+ const struct message *mes;
static char buf[BUFSIZ];
buf[0] = '\0';
sizeof(ifan->ifan_name)));
ifp->ifindex = ifan->ifan_index;
+ if_get_metric (ifp);
if_add_update (ifp);
}
else if (ifp != NULL && ifan->ifan_what == IFAN_DEPARTURE)
}
#endif /* RTM_IFANNOUNCE */
+#ifdef HAVE_BSD_IFI_LINK_STATE
+/* BSD link detect translation */
+static void
+bsd_linkdetect_translate (struct if_msghdr *ifm)
+{
+ if ((ifm->ifm_data.ifi_link_state >= LINK_STATE_UP) ||
+ (ifm->ifm_data.ifi_link_state == LINK_STATE_UNKNOWN))
+ SET_FLAG(ifm->ifm_flags, IFF_RUNNING);
+ else
+ UNSET_FLAG(ifm->ifm_flags, IFF_RUNNING);
+}
+#endif /* HAVE_BSD_IFI_LINK_STATE */
+
/*
* Handle struct if_msghdr obtained from reading routing socket or
* sysctl (from interface_list). There may or may not be sockaddrs
ifm_read (struct if_msghdr *ifm)
{
struct interface *ifp = NULL;
+ struct sockaddr_dl *sdl;
char ifname[IFNAMSIZ];
short ifnlen = 0;
- caddr_t *cp;
+ caddr_t cp;
/* terminate ifname at head (for strnlen) and tail (for safety) */
ifname[IFNAMSIZ - 1] = '\0';
RTA_ADDR_GET (NULL, RTA_GATEWAY, ifm->ifm_addrs, cp);
RTA_ATTR_GET (NULL, RTA_NETMASK, ifm->ifm_addrs, cp);
RTA_ADDR_GET (NULL, RTA_GENMASK, ifm->ifm_addrs, cp);
+ sdl = (struct sockaddr_dl *)cp;
RTA_NAME_GET (ifname, RTA_IFP, ifm->ifm_addrs, cp, ifnlen);
RTA_ADDR_GET (NULL, RTA_IFA, ifm->ifm_addrs, cp);
RTA_ADDR_GET (NULL, RTA_AUTHOR, ifm->ifm_addrs, cp);
* structure with ifindex IFINDEX_INTERNAL.
*/
ifp->ifindex = ifm->ifm_index;
+
+#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */
+ bsd_linkdetect_translate(ifm);
+#endif /* HAVE_BSD_IFI_LINK_STATE */
+
if_flags_update (ifp, ifm->ifm_flags);
#if defined(__bsdi__)
if_kvm_get_mtu (ifp);
#endif /* __bsdi__ */
if_get_metric (ifp);
+ /*
+ * XXX sockaddr_dl contents can be larger than the structure
+ * definition. There are 2 big families here:
+ * - BSD has sdl_len + sdl_data[16] + overruns sdl_data
+ * we MUST use sdl_len here or we'll truncate data.
+ * - Solaris has no sdl_len, but sdl_data[244]
+ * presumably, it's not going to run past that, so sizeof()
+ * is fine here.
+ * a nonzero ifnlen from RTA_NAME_GET() means sdl is valid
+ */
+ if (ifnlen)
+ {
+#ifdef HAVE_STRUCT_SOCKADDR_DL_SDL_LEN
+ memcpy (&ifp->sdl, sdl, sdl->sdl_len);
+#else
+ memcpy (&ifp->sdl, sdl, sizeof (struct sockaddr_dl));
+#endif /* HAVE_STRUCT_SOCKADDR_DL_SDL_LEN */
+ }
+
if_add_update (ifp);
}
else
return -1;
}
+#ifdef HAVE_BSD_IFI_LINK_STATE /* translate BSD kernel msg for link-state */
+ bsd_linkdetect_translate(ifm);
+#endif /* HAVE_BSD_IFI_LINK_STATE */
+
/* update flags and handle operative->inoperative transition, if any */
if_flags_update (ifp, ifm->ifm_flags);
if_delete_update (ifp);
}
#endif /* RTM_IFANNOUNCE */
+ if (if_is_up (ifp))
+ {
+#if defined(__bsdi__)
+ if_kvm_get_mtu (ifp);
+#else
+ if_get_mtu (ifp);
+#endif /* __bsdi__ */
+ if_get_metric (ifp);
+ }
}
#ifdef HAVE_NET_RT_IFLIST
return 0;
}
-\f
+
/* Address read from struct ifa_msghdr. */
static void
ifam_read_mesg (struct ifa_msghdr *ifm,
/* Assert read up end point matches to end point */
if (pnt != end)
- zlog_warn ("ifam_read() does't read all socket data");
+ zlog_warn ("ifam_read() doesn't read all socket data");
}
/* Interface's address information get. */
/* Unset interface index from link-local address when IPv6 stack
is KAME. */
if (IN6_IS_ADDR_LINKLOCAL (&addr.sin6.sin6_addr))
- SET_IN6_LINKLOCAL_IFINDEX (addr.sin6.sin6_addr, 0);
+ {
+ SET_IN6_LINKLOCAL_IFINDEX (addr.sin6.sin6_addr, 0);
+ }
if (ifam->ifam_type == RTM_NEWADDR)
connected_add_ipv6 (ifp, flags, &addr.sin6.sin6_addr,
return 0;
}
-\f
+
/* Interface function for reading kernel routing table information. */
static int
rtm_read_mesg (struct rt_msghdr *rtm,
/* Assert read up to the end of pointer. */
if (pnt != end)
- zlog (NULL, LOG_WARNING, "rtm_read() does't read all socket data.");
+ zlog (NULL, LOG_WARNING, "rtm_read() doesn't read all socket data.");
return rtm->rtm_flags;
}
return;
if (IS_ZEBRA_DEBUG_KERNEL)
zlog_debug ("%s: got rtm of type %d (%s)", __func__, rtm->rtm_type,
- LOOKUP (rtm_type_str, rtm->rtm_type));
+ lookup (rtm_type_str, rtm->rtm_type));
#ifdef RTF_CLONED /*bsdi, netbsd 1.6*/
if (flags & RTF_CLONED)
* At the same time, ignore unconfirmed messages, they should be tracked
* by rtm_write() and kernel_rtm_ipv4().
*/
- if (rtm->rtm_type != RTM_GET
- && (rtm->rtm_pid == pid || rtm->rtm_pid == old_pid))
+ if (rtm->rtm_type != RTM_GET && rtm->rtm_pid == pid)
{
- char buf[INET_ADDRSTRLEN], gate_buf[INET_ADDRSTRLEN];
+ char buf[PREFIX2STR_BUFFER], gate_buf[INET_ADDRSTRLEN];
int ret;
if (! IS_ZEBRA_DEBUG_RIB)
return;
- ret = rib_lookup_ipv4_route (&p, &gate);
- inet_ntop (AF_INET, &p.prefix, buf, INET_ADDRSTRLEN);
+ ret = rib_lookup_ipv4_route (&p, &gate, VRF_DEFAULT);
+ prefix2str (&p, buf, sizeof(buf));
switch (rtm->rtm_type)
{
case RTM_ADD:
{
case ZEBRA_RIB_NOTFOUND:
zlog_debug ("%s: %s %s/%d: desync: RR isn't yet in RIB, while already in FIB",
- __func__, LOOKUP (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
+ __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
break;
case ZEBRA_RIB_FOUND_CONNECTED:
case ZEBRA_RIB_FOUND_NOGATE:
inet_ntop (AF_INET, &gate.sin.sin_addr, gate_buf, INET_ADDRSTRLEN);
zlog_debug ("%s: %s %s/%d: desync: RR is in RIB, but gate differs (ours is %s)",
- __func__, LOOKUP (rtm_type_str, rtm->rtm_type), buf, p.prefixlen, gate_buf);
+ __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen, gate_buf);
break;
case ZEBRA_RIB_FOUND_EXACT: /* RIB RR == FIB RR */
zlog_debug ("%s: %s %s/%d: done Ok",
- __func__, LOOKUP (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
+ __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
rib_lookup_and_dump (&p);
return;
break;
{
case ZEBRA_RIB_FOUND_EXACT:
zlog_debug ("%s: %s %s/%d: desync: RR is still in RIB, while already not in FIB",
- __func__, LOOKUP (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
+ __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
rib_lookup_and_dump (&p);
break;
case ZEBRA_RIB_FOUND_CONNECTED:
case ZEBRA_RIB_FOUND_NOGATE:
zlog_debug ("%s: %s %s/%d: desync: RR is still in RIB, plus gate differs",
- __func__, LOOKUP (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
+ __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
rib_lookup_and_dump (&p);
break;
case ZEBRA_RIB_NOTFOUND: /* RIB RR == FIB RR */
zlog_debug ("%s: %s %s/%d: done Ok",
- __func__, LOOKUP (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
+ __func__, lookup (rtm_type_str, rtm->rtm_type), buf, p.prefixlen);
rib_lookup_and_dump (&p);
return;
break;
break;
default:
zlog_debug ("%s: %s/%d: warning: loopback RTM of type %s received",
- __func__, buf, p.prefixlen, LOOKUP (rtm_type_str, rtm->rtm_type));
+ __func__, buf, p.prefixlen, lookup (rtm_type_str, rtm->rtm_type));
}
return;
}
* to specify the route really
*/
if (rtm->rtm_type == RTM_CHANGE)
- rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p,
- NULL, 0, 0);
+ rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags, &p,
+ NULL, 0, VRF_DEFAULT, SAFI_UNICAST);
if (rtm->rtm_type == RTM_GET
|| rtm->rtm_type == RTM_ADD
|| rtm->rtm_type == RTM_CHANGE)
- rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags,
- &p, &gate.sin.sin_addr, NULL, 0, 0, 0, 0);
+ rib_add_ipv4 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags,
+ &p, &gate.sin.sin_addr, NULL, 0, VRF_DEFAULT, 0, 0, SAFI_UNICAST);
else
- rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, zebra_flags,
- &p, &gate.sin.sin_addr, 0, 0);
+ rib_delete_ipv4 (ZEBRA_ROUTE_KERNEL, 0 zebra_flags,
+ &p, &gate.sin.sin_addr, 0, VRF_DEFAULT, SAFI_UNICAST);
}
#ifdef HAVE_IPV6
if (dest.sa.sa_family == AF_INET6)
{
+ /* One day we might have a debug section here like one in the
+ * IPv4 case above. Just ignore own messages at the moment.
+ */
+ if (rtm->rtm_type != RTM_GET && rtm->rtm_pid == pid)
+ return;
struct prefix_ipv6 p;
unsigned int ifindex = 0;
* to specify the route really
*/
if (rtm->rtm_type == RTM_CHANGE)
- rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags, &p,
- NULL, 0, 0);
+ rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags, &p,
+ NULL, 0, VRF_DEFAULT, SAFI_UNICAST);
if (rtm->rtm_type == RTM_GET
|| rtm->rtm_type == RTM_ADD
|| rtm->rtm_type == RTM_CHANGE)
- rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags,
- &p, &gate.sin6.sin6_addr, ifindex, 0, 0, 0);
+ rib_add_ipv6 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags,
+ &p, &gate.sin6.sin6_addr, ifindex, VRF_DEFAULT, 0, 0, SAFI_UNICAST);
else
- rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, zebra_flags,
- &p, &gate.sin6.sin6_addr, ifindex, 0);
+ rib_delete_ipv6 (ZEBRA_ROUTE_KERNEL, 0, zebra_flags,
+ &p, &gate.sin6.sin6_addr, ifindex, VRF_DEFAULT, SAFI_UNICAST);
}
#endif /* HAVE_IPV6 */
}
if (gate && message == RTM_ADD)
msg.rtm.rtm_flags |= RTF_GATEWAY;
+ /* When RTF_CLONING is unavailable on BSD, should we set some
+ * other flag instead?
+ */
+#ifdef RTF_CLONING
if (! gate && message == RTM_ADD && ifp &&
(ifp->flags & IFF_POINTOPOINT) == 0)
msg.rtm.rtm_flags |= RTF_CLONING;
+#endif /* RTF_CLONING */
/* If no protocol specific gateway is specified, use link
address for gateway. */
msg.rtm.rtm_flags |= RTF_REJECT;
-#ifdef HAVE_STRUCT_SOCKADDR_IN_SIN_LEN
-#define SOCKADDRSET(X,R) \
- if (msg.rtm.rtm_addrs & (R)) \
- { \
- int len = ROUNDUP ((X)->sa.sa_len); \
- memcpy (pnt, (caddr_t)(X), len); \
- pnt += len; \
- }
-#else
#define SOCKADDRSET(X,R) \
if (msg.rtm.rtm_addrs & (R)) \
{ \
memcpy (pnt, (caddr_t)(X), len); \
pnt += len; \
}
-#endif /* HAVE_STRUCT_SOCKADDR_IN_SIN_LEN */
pnt = (caddr_t) msg.buf;
return ZEBRA_ERR_NOERROR;
}
-\f
+
#include "thread.h"
#include "zebra/zserv.h"
static void
rtmsg_debug (struct rt_msghdr *rtm)
{
- zlog_debug ("Kernel: Len: %d Type: %s", rtm->rtm_msglen, LOOKUP (rtm_type_str, rtm->rtm_type));
+ zlog_debug ("Kernel: Len: %d Type: %s", rtm->rtm_msglen, lookup (rtm_type_str, rtm->rtm_type));
rtm_flag_dump (rtm->rtm_flags);
zlog_debug ("Kernel: message seq %d", rtm->rtm_seq);
- zlog_debug ("Kernel: pid %d, rtm_addrs 0x%x", rtm->rtm_pid, rtm->rtm_addrs);
+ zlog_debug ("Kernel: pid %lld, rtm_addrs 0x%x",
+ (long long)rtm->rtm_pid, rtm->rtm_addrs);
}
/* This is pretty gross, better suggestions welcome -- mhandler */
/* Make routing socket. */
static void
-routing_socket (void)
+routing_socket (struct zebra_ns *zns)
{
if ( zserv_privs.change (ZPRIVS_RAISE) )
zlog_err ("routing_socket: Can't raise privileges");
/* Exported interface function. This function simply calls
routing_socket (). */
void
-kernel_init (void)
+kernel_init (struct zebra_ns *zns)
+{
+ routing_socket (zns);
+}
+
+void
+kernel_terminate (struct zebra_ns *zns)
{
- routing_socket ();
+ return;
}