]> git.proxmox.com Git - mirror_frr.git/blobdiff - zebra/kernel_netlink.c
Merge pull request #2561 from pacovn/Coverity_1399231_Logically_dead_code
[mirror_frr.git] / zebra / kernel_netlink.c
index 0b3b6eed45761b5db16ce36e597f6187ba5d0978..7334c8094a3001c0bf4ccde57cb0ff43842c3b14 100644 (file)
@@ -45,6 +45,7 @@
 #include "zebra/kernel_netlink.h"
 #include "zebra/rt_netlink.h"
 #include "zebra/if_netlink.h"
+#include "zebra/rule_netlink.h"
 
 #ifndef SO_RCVBUFFORCE
 #define SO_RCVBUFFORCE  (33)
 
 #ifndef NLMSG_TAIL
 #define NLMSG_TAIL(nmsg)                                                       \
-       ((struct rtattr *)(((u_char *)(nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
+       ((struct rtattr *)(((uint8_t *)(nmsg))                                 \
+                          + NLMSG_ALIGN((nmsg)->nlmsg_len)))
 #endif
 
 #ifndef RTA_TAIL
 #define RTA_TAIL(rta)                                                          \
-       ((struct rtattr *)(((u_char *)(rta)) + RTA_ALIGN((rta)->rta_len)))
+       ((struct rtattr *)(((uint8_t *)(rta)) + RTA_ALIGN((rta)->rta_len)))
 #endif
 
 #ifndef RTNL_FAMILY_IP6MR
@@ -85,6 +87,9 @@ static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
                                           {RTM_NEWNEIGH, "RTM_NEWNEIGH"},
                                           {RTM_DELNEIGH, "RTM_DELNEIGH"},
                                           {RTM_GETNEIGH, "RTM_GETNEIGH"},
+                                          {RTM_NEWRULE, "RTM_NEWRULE"},
+                                          {RTM_DELRULE, "RTM_DELRULE"},
+                                          {RTM_GETRULE, "RTM_GETRULE"},
                                           {0}};
 
 static const struct message rtproto_str[] = {
@@ -105,6 +110,7 @@ static const struct message rtproto_str[] = {
        {RTPROT_ISIS, "IS-IS"},
        {RTPROT_RIP, "RIP"},
        {RTPROT_RIPNG, "RIPNG"},
+       {RTPROT_ZSTATIC, "static"},
        {0}};
 
 static const struct message family_str[] = {{AF_INET, "ipv4"},
@@ -119,21 +125,32 @@ static const struct message rttype_str[] = {{RTN_UNICAST, "unicast"},
                                            {0}};
 
 extern struct thread_master *master;
-extern u_int32_t nl_rcvbufsize;
+extern uint32_t nl_rcvbufsize;
 
 extern struct zebra_privs_t zserv_privs;
 
-int netlink_talk_filter(struct sockaddr_nl *snl, struct nlmsghdr *h,
-                       ns_id_t ns_id, int startup)
+int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns_id, int startup)
 {
-       zlog_warn("netlink_talk: ignoring message type 0x%04x NS %u",
-                 h->nlmsg_type, ns_id);
+       /*
+        * This is an error condition that must be handled during
+        * development.
+        *
+        * The netlink_talk_filter function is used for communication
+        * down the netlink_cmd pipe and we are expecting
+        * an ack being received.  So if we get here
+        * then we did not receive the ack and instead
+        * received some other message in an unexpected
+        * way.
+        */
+       zlog_err("%s: ignoring message type 0x%04x(%s) NS %u",
+                __PRETTY_FUNCTION__, h->nlmsg_type,
+                nl_msg_type_to_str(h->nlmsg_type), ns_id);
        return 0;
 }
 
 static int netlink_recvbuf(struct nlsock *nl, uint32_t newsize)
 {
-       u_int32_t oldsize;
+       uint32_t oldsize;
        socklen_t newlen = sizeof(newsize);
        socklen_t oldlen = sizeof(oldsize);
        int ret;
@@ -227,45 +244,50 @@ static int netlink_socket(struct nlsock *nl, unsigned long groups,
        return ret;
 }
 
-static int netlink_information_fetch(struct sockaddr_nl *snl,
-                                    struct nlmsghdr *h, ns_id_t ns_id,
+static int netlink_information_fetch(struct nlmsghdr *h, ns_id_t ns_id,
                                     int startup)
 {
-       /* JF: Ignore messages that aren't from the kernel */
-       if (snl->nl_pid != 0) {
-               zlog_err("Ignoring message from pid %u", snl->nl_pid);
-               return 0;
-       }
-
+       /*
+        * When we handle new message types here
+        * because we are starting to install them
+        * then lets check the netlink_install_filter
+        * and see if we should add the corresponding
+        * allow through entry there.
+        * Probably not needed to do but please
+        * think about it.
+        */
        switch (h->nlmsg_type) {
        case RTM_NEWROUTE:
-               return netlink_route_change(snl, h, ns_id, startup);
-               break;
+               return netlink_route_change(h, ns_id, startup);
        case RTM_DELROUTE:
-               return netlink_route_change(snl, h, ns_id, startup);
-               break;
+               return netlink_route_change(h, ns_id, startup);
        case RTM_NEWLINK:
-               return netlink_link_change(snl, h, ns_id, startup);
-               break;
+               return netlink_link_change(h, ns_id, startup);
        case RTM_DELLINK:
-               return netlink_link_change(snl, h, ns_id, startup);
-               break;
+               return netlink_link_change(h, ns_id, startup);
        case RTM_NEWADDR:
-               return netlink_interface_addr(snl, h, ns_id, startup);
-               break;
+               return netlink_interface_addr(h, ns_id, startup);
        case RTM_DELADDR:
-               return netlink_interface_addr(snl, h, ns_id, startup);
-               break;
+               return netlink_interface_addr(h, ns_id, startup);
        case RTM_NEWNEIGH:
-               return netlink_neigh_change(snl, h, ns_id);
-               break;
+               return netlink_neigh_change(h, ns_id);
        case RTM_DELNEIGH:
-               return netlink_neigh_change(snl, h, ns_id);
-               break;
+               return netlink_neigh_change(h, ns_id);
+       case RTM_NEWRULE:
+               return netlink_rule_change(h, ns_id, startup);
+       case RTM_DELRULE:
+               return netlink_rule_change(h, ns_id, startup);
        default:
-               if (IS_ZEBRA_DEBUG_KERNEL)
-                       zlog_debug("Unknown netlink nlmsg_type %d vrf %u\n",
-                                  h->nlmsg_type, ns_id);
+               /*
+                * If we have received this message then
+                * we have made a mistake during development
+                * and we need to write some code to handle
+                * this message type or not ask for
+                * it to be sent up to us
+                */
+               zlog_err("Unknown netlink nlmsg_type %s(%d) vrf %u\n",
+                        nl_msg_type_to_str(h->nlmsg_type), h->nlmsg_type,
+                        ns_id);
                break;
        }
        return 0;
@@ -282,31 +304,69 @@ static int kernel_read(struct thread *thread)
        return 0;
 }
 
-/* Filter out messages from self that occur on listener socket,
+/*
+ * Filter out messages from self that occur on listener socket,
  * caused by our actions on the command socket
+ *
+ * When we add new Netlink message types we probably
+ * do not need to add them here as that we are filtering
+ * on the routes we actually care to receive( which is rarer
+ * then the normal course of operations).  We are intentionally
+ * allowing some messages from ourselves through
+ * ( I'm looking at you Interface based netlink messages )
+ * so that we only had to write one way to handle incoming
+ * address add/delete changes.
  */
 static void netlink_install_filter(int sock, __u32 pid)
 {
+       /*
+        * BPF_JUMP instructions and where you jump to are based upon
+        * 0 as being the next statement.  So count from 0.  Writing
+        * this down because every time I look at this I have to
+        * re-remember it.
+        */
        struct sock_filter filter[] = {
-               /* 0: ldh [4]             */
-               BPF_STMT(BPF_LD | BPF_ABS | BPF_H,
-                        offsetof(struct nlmsghdr, nlmsg_type)),
-               /* 1: jeq 0x18 jt 5 jf next  */
-               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWROUTE), 3, 0),
-               /* 2: jeq 0x19 jt 5 jf next  */
-               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELROUTE), 2, 0),
-               /* 3: jeq 0x19 jt 5 jf next  */
-               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWNEIGH), 1, 0),
-               /* 4: jeq 0x19 jt 5 jf 8  */
-               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELNEIGH), 0, 3),
-               /* 5: ldw [12]            */
+               /*
+                * Logic:
+                *   if (nlmsg_pid == pid) {
+                *       if (the incoming nlmsg_type ==
+                *           RTM_NEWADDR | RTM_DELADDR)
+                *           keep this message
+                *       else
+                *           skip this message
+                *   } else
+                *       keep this netlink message
+                */
+               /*
+                * 0: Load the nlmsg_pid into the BPF register
+                */
                BPF_STMT(BPF_LD | BPF_ABS | BPF_W,
                         offsetof(struct nlmsghdr, nlmsg_pid)),
-               /* 6: jeq XX  jt 7 jf 8   */
-               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 0, 1),
-               /* 7: ret 0    (skip)     */
+               /*
+                * 1: Compare to pid
+                */
+               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 0, 4),
+               /*
+                * 2: Load the nlmsg_type into BPF register
+                */
+               BPF_STMT(BPF_LD | BPF_ABS | BPF_H,
+                        offsetof(struct nlmsghdr, nlmsg_type)),
+               /*
+                * 3: Compare to RTM_NEWADDR
+                */
+               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 2, 0),
+               /*
+                * 4: Compare to RTM_DELADDR
+                */
+               BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 1, 0),
+               /*
+                * 5: This is the end state of we want to skip the
+                *    message
+                */
                BPF_STMT(BPF_RET | BPF_K, 0),
-               /* 8: ret 0xffff (keep)   */
+               /* 6: This is the end state of we want to keep
+                *     the message
+                */
                BPF_STMT(BPF_RET | BPF_K, 0xffff),
        };
 
@@ -380,14 +440,14 @@ int rta_addattr_l(struct rtattr *rta, unsigned int maxlen, int type, void *data,
        return 0;
 }
 
-int addattr16(struct nlmsghdr *n, unsigned int maxlen, int type, u_int16_t data)
+int addattr16(struct nlmsghdr *n, unsigned int maxlen, int type, uint16_t data)
 {
-       return addattr_l(n, maxlen, type, &data, sizeof(u_int16_t));
+       return addattr_l(n, maxlen, type, &data, sizeof(uint16_t));
 }
 
 int addattr32(struct nlmsghdr *n, unsigned int maxlen, int type, int data)
 {
-       return addattr_l(n, maxlen, type, &data, sizeof(u_int32_t));
+       return addattr_l(n, maxlen, type, &data, sizeof(uint32_t));
 }
 
 struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type)
@@ -400,7 +460,7 @@ struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type)
 
 int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
 {
-       nest->rta_len = (u_char *)NLMSG_TAIL(n) - (u_char *)nest;
+       nest->rta_len = (uint8_t *)NLMSG_TAIL(n) - (uint8_t *)nest;
        return n->nlmsg_len;
 }
 
@@ -414,7 +474,7 @@ struct rtattr *rta_nest(struct rtattr *rta, int maxlen, int type)
 
 int rta_nest_end(struct rtattr *rta, struct rtattr *nest)
 {
-       nest->rta_len = (u_char *)RTA_TAIL(rta) - (u_char *)nest;
+       nest->rta_len = (uint8_t *)RTA_TAIL(rta) - (uint8_t *)nest;
        return rta->rta_len;
 }
 
@@ -423,21 +483,91 @@ const char *nl_msg_type_to_str(uint16_t msg_type)
        return lookup_msg(nlmsg_str, msg_type, "");
 }
 
-const char *nl_rtproto_to_str(u_char rtproto)
+const char *nl_rtproto_to_str(uint8_t rtproto)
 {
        return lookup_msg(rtproto_str, rtproto, "");
 }
 
-const char *nl_family_to_str(u_char family)
+const char *nl_family_to_str(uint8_t family)
 {
        return lookup_msg(family_str, family, "");
 }
 
-const char *nl_rttype_to_str(u_char rttype)
+const char *nl_rttype_to_str(uint8_t rttype)
 {
        return lookup_msg(rttype_str, rttype, "");
 }
 
+#define NL_OK(nla, len)                                                        \
+       ((len) >= (int)sizeof(struct nlattr)                                   \
+        && (nla)->nla_len >= sizeof(struct nlattr)                            \
+        && (nla)->nla_len <= (len))
+#define NL_NEXT(nla, attrlen)                                                  \
+       ((attrlen) -= RTA_ALIGN((nla)->nla_len),                               \
+        (struct nlattr *)(((char *)(nla)) + RTA_ALIGN((nla)->nla_len)))
+#define NL_RTA(r)                                                              \
+       ((struct nlattr *)(((char *)(r))                                       \
+                          + NLMSG_ALIGN(sizeof(struct nlmsgerr))))
+
+static void netlink_parse_nlattr(struct nlattr **tb, int max,
+                                struct nlattr *nla, int len)
+{
+       while (NL_OK(nla, len)) {
+               if (nla->nla_type <= max)
+                       tb[nla->nla_type] = nla;
+               nla = NL_NEXT(nla, len);
+       }
+}
+
+static void netlink_parse_extended_ack(struct nlmsghdr *h)
+{
+       struct nlattr *tb[NLMSGERR_ATTR_MAX + 1];
+       const struct nlmsgerr *err =
+               (const struct nlmsgerr *)((uint8_t *)h
+                                         + NLMSG_ALIGN(
+                                                   sizeof(struct nlmsghdr)));
+       const struct nlmsghdr *err_nlh = NULL;
+       uint32_t hlen = sizeof(*err);
+       const char *msg = NULL;
+       uint32_t off = 0;
+
+       if (!(h->nlmsg_flags & NLM_F_CAPPED))
+               hlen += h->nlmsg_len - NLMSG_ALIGN(sizeof(struct nlmsghdr));
+
+       memset(tb, 0, sizeof(tb));
+       netlink_parse_nlattr(tb, NLMSGERR_ATTR_MAX, NL_RTA(h), hlen);
+
+       if (tb[NLMSGERR_ATTR_MSG])
+               msg = (const char *)RTA_DATA(tb[NLMSGERR_ATTR_MSG]);
+
+       if (tb[NLMSGERR_ATTR_OFFS]) {
+               off = *(uint32_t *)RTA_DATA(tb[NLMSGERR_ATTR_OFFS]);
+
+               if (off > h->nlmsg_len) {
+                       zlog_err("Invalid offset for NLMSGERR_ATTR_OFFS\n");
+                       off = 0;
+               } else if (!(h->nlmsg_flags & NLM_F_CAPPED)) {
+                       /*
+                        * Header of failed message
+                        * we are not doing anything currently with it
+                        * but noticing it for later.
+                        */
+                       err_nlh = &err->msg;
+                       zlog_warn("%s: Received %d extended Ack",
+                                 __PRETTY_FUNCTION__, err_nlh->nlmsg_type);
+               }
+       }
+
+       if (msg && *msg != '\0') {
+               bool is_err = !!err->error;
+
+               if (is_err)
+                       zlog_err("Extended Error: %s", msg);
+               else
+                       zlog_warn("Extended Warning: %s", msg);
+       }
+}
+
 /*
  * netlink_parse_info
  *
@@ -451,8 +581,7 @@ const char *nl_rttype_to_str(u_char rttype)
  * startup -> Are we reading in under startup conditions? passed to
  *            the filter.
  */
-int netlink_parse_info(int (*filter)(struct sockaddr_nl *, struct nlmsghdr *,
-                                    ns_id_t, int),
+int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
                       struct nlsock *nl, struct zebra_ns *zns, int count,
                       int startup)
 {
@@ -462,7 +591,7 @@ int netlink_parse_info(int (*filter)(struct sockaddr_nl *, struct nlmsghdr *,
        int read_in = 0;
 
        while (1) {
-               char buf[NL_PKT_BUF_SIZE];
+               char buf[NL_RCV_PKT_BUF_SIZE];
                struct iovec iov = {.iov_base = buf, .iov_len = sizeof buf};
                struct sockaddr_nl snl;
                struct msghdr msg = {.msg_name = (void *)&snl,
@@ -523,6 +652,23 @@ int netlink_parse_info(int (*filter)(struct sockaddr_nl *, struct nlmsghdr *,
                                int errnum = err->error;
                                int msg_type = err->msg.nlmsg_type;
 
+                               if (h->nlmsg_len
+                                   < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
+                                       zlog_err("%s error: message truncated",
+                                                nl->name);
+                                       return -1;
+                               }
+
+                               /*
+                                * Parse the extended information before
+                                * we actually handle it.
+                                * At this point in time we do not
+                                * do anything other than report the
+                                * issue.
+                                */
+                               if (h->nlmsg_flags & NLM_F_ACK_TLVS)
+                                       netlink_parse_extended_ack(h);
+
                                /* If the error field is zero, then this is an
                                 * ACK */
                                if (err->error == 0) {
@@ -544,13 +690,6 @@ int netlink_parse_info(int (*filter)(struct sockaddr_nl *, struct nlmsghdr *,
                                        continue;
                                }
 
-                               if (h->nlmsg_len
-                                   < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
-                                       zlog_err("%s error: message truncated",
-                                                nl->name);
-                                       return -1;
-                               }
-
                                /* Deal with errors that occur because of races
                                 * in link handling */
                                if (nl == &zns->netlink_cmd
@@ -619,26 +758,21 @@ int netlink_parse_info(int (*filter)(struct sockaddr_nl *, struct nlmsghdr *,
                                        h->nlmsg_type, h->nlmsg_len,
                                        h->nlmsg_seq, h->nlmsg_pid);
 
-                       /* skip unsolicited messages originating from command
-                        * socket
-                        * linux sets the originators port-id for {NEW|DEL}ADDR
-                        * messages,
-                        * so this has to be checked here. */
-                       if (nl != &zns->netlink_cmd
-                           && h->nlmsg_pid == zns->netlink_cmd.snl.nl_pid
-                           && (h->nlmsg_type != RTM_NEWADDR
-                               && h->nlmsg_type != RTM_DELADDR)) {
-                               if (IS_ZEBRA_DEBUG_KERNEL)
-                                       zlog_debug(
-                                               "netlink_parse_info: %s packet comes from %s",
-                                               zns->netlink_cmd.name,
-                                               nl->name);
+
+                       /*
+                        * Ignore messages that maybe sent from
+                        * other actors besides the kernel
+                        */
+                       if (snl.nl_pid != 0) {
+                               zlog_err("Ignoring message from pid %u",
+                                        snl.nl_pid);
                                continue;
                        }
 
-                       error = (*filter)(&snl, h, zns->ns_id, startup);
+                       error = (*filter)(h, zns->ns_id, startup);
                        if (error < 0) {
                                zlog_err("%s filter function error", nl->name);
+                               zlog_backtrace(LOG_ERR);
                                ret = error;
                        }
                }
@@ -670,8 +804,7 @@ int netlink_parse_info(int (*filter)(struct sockaddr_nl *, struct nlmsghdr *,
  * startup  -> Are we reading in under startup conditions
  *             This is passed through eventually to filter.
  */
-int netlink_talk(int (*filter)(struct sockaddr_nl *, struct nlmsghdr *, ns_id_t,
-                              int startup),
+int netlink_talk(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
                 struct nlmsghdr *n, struct nlsock *nl, struct zebra_ns *zns,
                 int startup)
 {
@@ -784,11 +917,28 @@ int netlink_request(struct nlsock *nl, struct nlmsghdr *n)
 void kernel_init(struct zebra_ns *zns)
 {
        unsigned long groups;
+#if defined SOL_NETLINK
+       int one, ret;
+#endif
 
-       /* Initialize netlink sockets */
-       groups = RTMGRP_LINK | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_IFADDR
-                | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFADDR | RTMGRP_IPV4_MROUTE
-                | RTMGRP_NEIGH;
+       /*
+        * Initialize netlink sockets
+        *
+        * If RTMGRP_XXX exists use that, but at some point
+        * I think the kernel developers realized that
+        * keeping track of all the different values would
+        * lead to confusion, so we need to convert the
+        * RTNLGRP_XXX to a bit position for ourself
+        */
+       groups = RTMGRP_LINK                   |
+               RTMGRP_IPV4_ROUTE              |
+               RTMGRP_IPV4_IFADDR             |
+               RTMGRP_IPV6_ROUTE              |
+               RTMGRP_IPV6_IFADDR             |
+               RTMGRP_IPV4_MROUTE             |
+               RTMGRP_NEIGH                   |
+               (1 << (RTNLGRP_IPV4_RULE - 1)) |
+               (1 << (RTNLGRP_IPV6_RULE - 1));
 
        snprintf(zns->netlink.name, sizeof(zns->netlink.name),
                 "netlink-listen (NS %u)", zns->ns_id);
@@ -800,6 +950,25 @@ void kernel_init(struct zebra_ns *zns)
        zns->netlink_cmd.sock = -1;
        netlink_socket(&zns->netlink_cmd, 0, zns->ns_id);
 
+       /*
+        * SOL_NETLINK is not available on all platforms yet
+        * apparently.  It's in bits/socket.h which I am not
+        * sure that we want to pull into our build system.
+        */
+#if defined SOL_NETLINK
+       /*
+        * Let's tell the kernel that we want to receive extended
+        * ACKS over our command socket
+        */
+       one = 1;
+       ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK,
+                        &one, sizeof(one));
+
+       if (ret < 0)
+               zlog_notice("Registration for extended ACK failed : %d %s",
+                           errno, safe_strerror(errno));
+#endif
+
        /* Register kernel socket. */
        if (zns->netlink.sock > 0) {
                /* Only want non-blocking on the netlink event socket */
@@ -814,6 +983,7 @@ void kernel_init(struct zebra_ns *zns)
                netlink_install_filter(zns->netlink.sock,
                                       zns->netlink_cmd.snl.nl_pid);
                zns->t_netlink = NULL;
+
                thread_add_read(zebrad.master, kernel_read, zns,
                                zns->netlink.sock, &zns->t_netlink);
        }