Merge branch 'master' into net-next

author Stephen Hemminger <shemming@brocade.com>

Fri, 10 Apr 2015 20:27:37 +0000 (13:27 -0700)

committer Stephen Hemminger <shemming@brocade.com>

Fri, 10 Apr 2015 20:27:37 +0000 (13:27 -0700)
author Stephen Hemminger <shemming@brocade.com>
Fri, 10 Apr 2015 20:27:37 +0000 (13:27 -0700)
committer Stephen Hemminger <shemming@brocade.com>
Fri, 10 Apr 2015 20:27:37 +0000 (13:27 -0700)
diff --git a/Makefile b/Makefile

index 9dbb29f3d0cdb414a7ee5772ec38d320c1c14b64..ca6c2e1413084e73c99e7fd57bd9f2a63b244521 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -26,6 +26,9 @@ ADDLIB+=dnet_ntop.o dnet_pton.o
  #options for ipx
  ADDLIB+=ipx_ntop.o ipx_pton.o
  
+#options for mpls
+ADDLIB+=mpls_ntop.o mpls_pton.o
+
  CC = gcc
  HOSTCC = gcc
  DEFINES += -D_GNU_SOURCE
diff --git a/configure b/configure

index e54f4b6db5e87a6535239b57e4346e05ca4cf4a5..8f528af4c04497282c3ed26bf31b06959c2a901d 100755 (executable)
--- a/configure
+++ b/configure
@@ -266,6 +266,29 @@ EOF
      rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest
  }
  
+check_elf()
+{
+    cat >$TMPDIR/elftest.c <<EOF
+#include <libelf.h>
+#include <gelf.h>
+int main(void)
+{
+       Elf_Scn *scn;
+       GElf_Shdr shdr;
+       return elf_version(EV_CURRENT);
+}
+EOF
+
+    if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
+    then
+       echo "TC_CONFIG_ELF:=y" >>Config
+       echo "yes"
+    else
+       echo "no"
+    fi
+    rm -f $TMPDIR/elftest.c $TMPDIR/elftest
+}
+
  check_selinux()
  # SELinux is a compile time option in the ss utility
  {
@@ -306,5 +329,8 @@ check_netnsid
  echo -n "SELinux support: "
  check_selinux
  
+echo -n "ELF support: "
+check_elf
+
  echo -e "\nDocs"
  check_docs
diff --git a/doc/ip-cref.tex b/doc/ip-cref.tex

index e7a79a5d24c846d652d21aafa51f0e3edd67369d..ea147950ff4af1795301db26a1ed937120bf76a0 100644 (file)
--- a/doc/ip-cref.tex
+++ b/doc/ip-cref.tex
@@ -1432,6 +1432,17 @@ database.
  even if it does not match any interface prefix. One application of this
  option may be found in~\cite{IP-TUNNELS}.
  
+\item \verb|pref PREF|
+
+--- the IPv6 route preference.
+\verb|PREF| PREF is a string specifying the route preference as defined in
+RFC4191 for Router Discovery messages. Namely:
+\begin{itemize}
+\item \verb|low| --- the route has a lowest priority.
+\item \verb|medium| --- the route has a default priority.
+\item \verb|high| --- the route has a highest priority.
+\end{itemize}
+
  \end{itemize}
  
  
diff --git a/include/linux/bpf.h b/include/linux/bpf.h

new file mode 100644 (file)

index 0000000..08aab3a
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,226 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __LINUX_BPF_H__
+#define __LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64      0x07    /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW         0x18    /* double word */
+#define BPF_XADD       0xc0    /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV                0xb0    /* mov reg to reg */
+#define BPF_ARSH       0xc0    /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END                0xd0    /* flags for endianness conversion: */
+#define BPF_TO_LE      0x00    /* convert to little-endian */
+#define BPF_TO_BE      0x08    /* convert to big-endian */
+#define BPF_FROM_LE    BPF_TO_LE
+#define BPF_FROM_BE    BPF_TO_BE
+
+#define BPF_JNE                0x50    /* jump != */
+#define BPF_JSGT       0x60    /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE       0x70    /* SGE is signed '>=', GE in x86 */
+#define BPF_CALL       0x80    /* function call */
+#define BPF_EXIT       0x90    /* function return */
+
+/* Register numbers */
+enum {
+       BPF_REG_0 = 0,
+       BPF_REG_1,
+       BPF_REG_2,
+       BPF_REG_3,
+       BPF_REG_4,
+       BPF_REG_5,
+       BPF_REG_6,
+       BPF_REG_7,
+       BPF_REG_8,
+       BPF_REG_9,
+       BPF_REG_10,
+       __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG    __MAX_BPF_REG
+
+struct bpf_insn {
+       __u8    code;           /* opcode */
+       __u8    dst_reg:4;      /* dest register */
+       __u8    src_reg:4;      /* source register */
+       __s16   off;            /* signed offset */
+       __s32   imm;            /* signed immediate constant */
+};
+
+/* BPF syscall commands */
+enum bpf_cmd {
+       /* create a map with given type and attributes
+        * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
+        * returns fd or negative error
+        * map is deleted when fd is closed
+        */
+       BPF_MAP_CREATE,
+
+       /* lookup key in a given map
+        * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+        * Using attr->map_fd, attr->key, attr->value
+        * returns zero and stores found elem into value
+        * or negative error
+        */
+       BPF_MAP_LOOKUP_ELEM,
+
+       /* create or update key/value pair in a given map
+        * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+        * Using attr->map_fd, attr->key, attr->value, attr->flags
+        * returns zero or negative error
+        */
+       BPF_MAP_UPDATE_ELEM,
+
+       /* find and delete elem by key in a given map
+        * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+        * Using attr->map_fd, attr->key
+        * returns zero or negative error
+        */
+       BPF_MAP_DELETE_ELEM,
+
+       /* lookup key in a given map and return next key
+        * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+        * Using attr->map_fd, attr->key, attr->next_key
+        * returns zero and stores next key or negative error
+        */
+       BPF_MAP_GET_NEXT_KEY,
+
+       /* verify and load eBPF program
+        * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
+        * Using attr->prog_type, attr->insns, attr->license
+        * returns fd or negative error
+        */
+       BPF_PROG_LOAD,
+};
+
+enum bpf_map_type {
+       BPF_MAP_TYPE_UNSPEC,
+       BPF_MAP_TYPE_HASH,
+       BPF_MAP_TYPE_ARRAY,
+};
+
+enum bpf_prog_type {
+       BPF_PROG_TYPE_UNSPEC,
+       BPF_PROG_TYPE_SOCKET_FILTER,
+       BPF_PROG_TYPE_SCHED_CLS,
+       BPF_PROG_TYPE_SCHED_ACT,
+};
+
+#define BPF_PSEUDO_MAP_FD      1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY                0 /* create new element or update existing */
+#define BPF_NOEXIST    1 /* create new element if it didn't exist */
+#define BPF_EXIST      2 /* update existing element */
+
+union bpf_attr {
+       struct { /* anonymous struct used by BPF_MAP_CREATE command */
+               __u32   map_type;       /* one of enum bpf_map_type */
+               __u32   key_size;       /* size of key in bytes */
+               __u32   value_size;     /* size of value in bytes */
+               __u32   max_entries;    /* max number of entries in a map */
+       };
+
+       struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+               __u32           map_fd;
+               __aligned_u64   key;
+               union {
+                       __aligned_u64 value;
+                       __aligned_u64 next_key;
+               };
+               __u64           flags;
+       };
+
+       struct { /* anonymous struct used by BPF_PROG_LOAD command */
+               __u32           prog_type;      /* one of enum bpf_prog_type */
+               __u32           insn_cnt;
+               __aligned_u64   insns;
+               __aligned_u64   license;
+               __u32           log_level;      /* verbosity level of verifier */
+               __u32           log_size;       /* size of user buffer */
+               __aligned_u64   log_buf;        /* user supplied buffer */
+       };
+} __attribute__((aligned(8)));
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+       BPF_FUNC_unspec,
+       BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
+       BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
+       BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+       BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+       BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+
+       /**
+        * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
+        * @skb: pointer to skb
+        * @offset: offset within packet from skb->data
+        * @from: pointer where to copy bytes from
+        * @len: number of bytes to store into packet
+        * @flags: bit 0 - if true, recompute skb->csum
+        *         other bits - reserved
+        * Return: 0 on success
+        */
+       BPF_FUNC_skb_store_bytes,
+
+       /**
+        * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
+        * @skb: pointer to skb
+        * @offset: offset within packet where IP checksum is located
+        * @from: old value of header field
+        * @to: new value of header field
+        * @flags: bits 0-3 - size of header field
+        *         other bits - reserved
+        * Return: 0 on success
+        */
+       BPF_FUNC_l3_csum_replace,
+
+       /**
+        * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
+        * @skb: pointer to skb
+        * @offset: offset within packet where TCP/UDP checksum is located
+        * @from: old value of header field
+        * @to: new value of header field
+        * @flags: bits 0-3 - size of header field
+        *         bit 4 - is pseudo header
+        *         other bits - reserved
+        * Return: 0 on success
+        */
+       BPF_FUNC_l4_csum_replace,
+       __BPF_FUNC_MAX_ID,
+};
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+       __u32 len;
+       __u32 pkt_type;
+       __u32 mark;
+       __u32 queue_mapping;
+       __u32 protocol;
+       __u32 vlan_present;
+       __u32 vlan_tci;
+       __u32 vlan_proto;
+       __u32 priority;
+};
+
+#endif /* __LINUX_BPF_H__ */
diff --git a/include/linux/filter.h b/include/linux/filter.h

index 8688a9851a4bc1fce6862774508c27d25d525a93..344781d71555b98a970d9fcdfea4d6c883af34df 100644 (file)
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -77,7 +77,8 @@ struct sock_fprog {   /* Required for SO_ATTACH_FILTER. */
  #define SKF_AD_VLAN_TAG_PRESENT 48
  #define SKF_AD_PAY_OFFSET      52
  #define SKF_AD_RANDOM  56
-#define SKF_AD_MAX     60
+#define SKF_AD_VLAN_TPID       60
+#define SKF_AD_MAX     64
  #define SKF_NET_OFF   (-0x100000)
  #define SKF_LL_OFF    (-0x200000)
  
diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h

index cc375e42e8bbfc9f4683c36b3324b8534399ced7..26f0ecff9f13dddc381a4c89578e5f5e65e1c30b 100644 (file)
--- a/include/linux/if_addr.h
+++ b/include/linux/if_addr.h
@@ -50,6 +50,8 @@ enum {
  #define IFA_F_PERMANENT                0x80
  #define IFA_F_MANAGETEMPADDR   0x100
  #define IFA_F_NOPREFIXROUTE    0x200
+#define IFA_F_MCAUTOJOIN       0x400
+#define IFA_F_STABLE_PRIVACY   0x800
  
  struct ifa_cacheinfo {
         __u32   ifa_prefered;
diff --git a/include/linux/if_link.h b/include/linux/if_link.h

index 3450c3fbdc653533549f9ed2147559d1f14a1b4e..6689e8fdf4f03f68e69d3ba9096234e90dd51cbc 100644 (file)
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -147,6 +147,7 @@ enum {
         IFLA_CARRIER_CHANGES,
         IFLA_PHYS_SWITCH_ID,
         IFLA_LINK_NETNSID,
+       IFLA_PHYS_PORT_NAME,
         __IFLA_MAX
  };
  
@@ -213,6 +214,7 @@ enum {
  enum in6_addr_gen_mode {
         IN6_ADDR_GEN_MODE_EUI64,
         IN6_ADDR_GEN_MODE_NONE,
+       IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
  };
  
  /* Bridge section */
@@ -222,6 +224,9 @@ enum {
         IFLA_BR_FORWARD_DELAY,
         IFLA_BR_HELLO_TIME,
         IFLA_BR_MAX_AGE,
+       IFLA_BR_AGEING_TIME,
+       IFLA_BR_STP_STATE,
+       IFLA_BR_PRIORITY,
         __IFLA_BR_MAX,
  };
  
@@ -245,6 +250,7 @@ enum {
         IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
         IFLA_BRPORT_PROXYARP,   /* proxy ARP */
         IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+       IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
         __IFLA_BRPORT_MAX
  };
  #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/include/linux/mpls.h b/include/linux/mpls.h

new file mode 100644 (file)

index 0000000..0893902
--- /dev/null
+++ b/include/linux/mpls.h
@@ -0,0 +1,34 @@
+#ifndef _MPLS_H
+#define _MPLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* Reference: RFC 5462, RFC 3032
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                Label                  | TC  |S|       TTL     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *     Label:  Label Value, 20 bits
+ *     TC:     Traffic Class field, 3 bits
+ *     S:      Bottom of Stack, 1 bit
+ *     TTL:    Time to Live, 8 bits
+ */
+
+struct mpls_label {
+       __be32 entry;
+};
+
+#define MPLS_LS_LABEL_MASK      0xFFFFF000
+#define MPLS_LS_LABEL_SHIFT     12
+#define MPLS_LS_TC_MASK         0x00000E00
+#define MPLS_LS_TC_SHIFT        9
+#define MPLS_LS_S_MASK          0x00000100
+#define MPLS_LS_S_SHIFT         8
+#define MPLS_LS_TTL_MASK        0x000000FF
+#define MPLS_LS_TTL_SHIFT       0
+
+#endif /* _MPLS_H */
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h

index 3873a35509aad201f4d5ddb31346077a89641b78..2e35c61bbdd192eb08c8dbe787f179cd62b9aaad 100644 (file)
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -126,6 +126,7 @@ enum {
         NDTPA_PROXY_QLEN,               /* u32 */
         NDTPA_LOCKTIME,                 /* u64, msecs */
         NDTPA_QUEUE_LENBYTES,           /* u32 */
+       NDTPA_MCAST_REPROBES,           /* u32 */
         __NDTPA_MAX
  };
  #define NDTPA_MAX (__NDTPA_MAX - 1)
diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h

index 25731dfb3fcc2f331cbba3c8f415c75614c1f46d..bf08e76bf50525912360edbfe09b1e220f683e94 100644 (file)
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -397,6 +397,8 @@ enum {
         TCA_BPF_CLASSID,
         TCA_BPF_OPS_LEN,
         TCA_BPF_OPS,
+       TCA_BPF_FD,
+       TCA_BPF_NAME,
         __TCA_BPF_MAX,
  };
  
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h

index 3eb78105399be8c377591ee76af993e07e8d5226..702b19b4207eac31d581dbec5fad6ffeb0ca83cc 100644 (file)
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -134,6 +134,8 @@ enum {
  
         RTM_NEWNSID = 88,
  #define RTM_NEWNSID RTM_NEWNSID
+       RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
         RTM_GETNSID = 90,
  #define RTM_GETNSID RTM_GETNSID
  
@@ -303,6 +305,9 @@ enum rtattr_type_t {
         RTA_TABLE,
         RTA_MARK,
         RTA_MFC_STATS,
+       RTA_VIA,
+       RTA_NEWDST,
+       RTA_PREF,
         __RTA_MAX
  };
  
@@ -332,6 +337,7 @@ struct rtnexthop {
  #define RTNH_F_DEAD            1       /* Nexthop is dead (used by multipath)  */
  #define RTNH_F_PERVASIVE       2       /* Do recursive gateway lookup  */
  #define RTNH_F_ONLINK          4       /* Gateway is forced on link    */
+#define RTNH_F_EXTERNAL                8       /* Route installed externally   */
  
  /* Macros to handle hexthops */
  
@@ -344,6 +350,12 @@ struct rtnexthop {
  #define RTNH_SPACE(len)        RTNH_ALIGN(RTNH_LENGTH(len))
  #define RTNH_DATA(rtnh)   ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
  
+/* RTA_VIA */
+struct rtvia {
+       __kernel_sa_family_t    rtvia_family;
+       __u8                    rtvia_addr[0];
+};
+
  /* RTM_CACHEINFO */
  
  struct rta_cacheinfo {
@@ -621,6 +633,10 @@ enum rtnetlink_groups {
  #define RTNLGRP_IPV6_NETCONF   RTNLGRP_IPV6_NETCONF
         RTNLGRP_MDB,
  #define RTNLGRP_MDB            RTNLGRP_MDB
+       RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE     RTNLGRP_MPLS_ROUTE
+       RTNLGRP_NSID,
+#define RTNLGRP_NSID           RTNLGRP_NSID
         __RTNLGRP_MAX
  };
  #define RTNLGRP_MAX    (__RTNLGRP_MAX - 1)
diff --git a/include/linux/tc_act/tc_bpf.h b/include/linux/tc_act/tc_bpf.h

index 5288bd77e63bbfd734e8454aee633a3340ca636f..07f17cc70bb3ee2f8ca7667221679b67f09c3cc6 100644 (file)
--- a/include/linux/tc_act/tc_bpf.h
+++ b/include/linux/tc_act/tc_bpf.h
@@ -24,6 +24,8 @@ enum {
         TCA_ACT_BPF_PARMS,
         TCA_ACT_BPF_OPS_LEN,
         TCA_ACT_BPF_OPS,
+       TCA_ACT_BPF_FD,
+       TCA_ACT_BPF_NAME,
         __TCA_ACT_BPF_MAX,
  };
  #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h

index 3a1fd3299f2c193f163d73762b639afcf7c3d318..b8f54510df076f3a87bd28f0bf1690b13e72e513 100644 (file)
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -1,6 +1,7 @@
  #ifndef _LINUX_XFRM_H
  #define _LINUX_XFRM_H
  
+#include <linux/in6.h>
  #include <linux/types.h>
  
  /* All of the structures in this file may not change size as they are
@@ -13,6 +14,7 @@
  typedef union {
         __be32          a4;
         __be32          a6[4];
+       struct in6_addr in6;
  } xfrm_address_t;
  
  /* Ident of a specific xfrm_state. It is used on input to lookup
diff --git a/include/rt_names.h b/include/rt_names.h

index c0ea4f982904df22148be894986505018525c6bd..921be0607b514d29a741300a4557b08d382a688a 100644 (file)
--- a/include/rt_names.h
+++ b/include/rt_names.h
@@ -22,7 +22,7 @@ int inet_proto_a2n(const char *buf);
  
  
  const char * ll_type_n2a(int type, char *buf, int len);
-const char *ll_addr_n2a(unsigned char *addr, int alen,
+const char *ll_addr_n2a(const unsigned char *addr, int alen,
                         int type, char *buf, int blen);
  int ll_addr_a2n(char *lladdr, int len, const char *arg);
  
diff --git a/include/utils.h b/include/utils.h

index 9151c4f103e3a3801514af08b7776341c49a9086..c21b59c227127fa095a7cb8bffefe816572ffc96 100644 (file)
--- a/include/utils.h
+++ b/include/utils.h
@@ -50,10 +50,11 @@ extern void incomplete_command(void) __attribute__((noreturn));
  
  typedef struct
  {
-       __u8 family;
-       __u8 bytelen;
+       __u16 flags;
+       __u16 bytelen;
         __s16 bitlen;
-       __u32 flags;
+       /* These next two fields match rtvia */
+       __u16 family;
         __u32 data[8];
  } inet_prefix;
  
@@ -77,6 +78,13 @@ struct ipx_addr {
         u_int8_t  ipx_node[IPX_NODE_LEN];
  };
  
+#ifndef AF_MPLS
+# define AF_MPLS 28
+#endif
+
+/* Maximum number of labels the mpls helpers support */
+#define MPLS_MAX_LABELS 8
+
  extern __u32 get_addr32(const char *name);
  extern int get_addr_1(inet_prefix *dst, const char *arg, int family);
  extern int get_prefix_1(inet_prefix *dst, char *arg, int family);
@@ -106,9 +114,12 @@ extern int af_byte_len(int af);
  
  extern const char *format_host(int af, int len, const void *addr,
                                char *buf, int buflen);
-extern const char *rt_addr_n2a(int af, const void *addr,
+extern const char *rt_addr_n2a(int af, int len, const void *addr,
                                char *buf, int buflen);
  
+extern int read_family(const char *name);
+extern const char *family_name(int family);
+
  void missarg(const char *) __attribute__((noreturn));
  void invarg(const char *, const char *) __attribute__((noreturn));
  void duparg(const char *, const char *) __attribute__((noreturn));
@@ -122,6 +133,9 @@ int dnet_pton(int af, const char *src, void *addr);
  const char *ipx_ntop(int af, const void *addr, char *str, size_t len);
  int ipx_pton(int af, const char *src, void *addr);
  
+const char *mpls_ntop(int af, const void *addr, char *str, size_t len);
+int mpls_pton(int af, const char *src, void *addr);
+
  extern int __iproute2_hz_internal;
  extern int __get_hz(void);
  
@@ -157,6 +171,11 @@ void print_nlmsg_timestamp(FILE *fp, const struct nlmsghdr *n);
  
  #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
  
+#ifndef __check_format_string
+# define __check_format_string(pos_str, pos_args) \
+       __attribute__ ((format (printf, (pos_str), (pos_args))))
+#endif
+
  extern int cmdlineno;
  extern ssize_t getcmdline(char **line, size_t *len, FILE *in);
  extern int makeargs(char *line, char *argv[], int maxargs);
diff --git a/ip/ip.c b/ip/ip.c

index da16b15f8b557ebedd6e4799255df43f7a229f10..f7f214b2f5ab98bc104262fbcdf72bb6b891bd4c 100644 (file)
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -52,7 +52,7 @@ static void usage(void)
  "                   netns | l2tp | fou | tcp_metrics | token | netconf }\n"
  "       OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
  "                    -h[uman-readable] | -iec |\n"
-"                    -f[amily] { inet | inet6 | ipx | dnet | bridge | link } |\n"
+"                    -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
  "                    -4 | -6 | -I | -D | -B | -0 |\n"
  "                    -l[oops] { maximum-addr-flush-attempts } |\n"
  "                    -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n"
@@ -190,21 +190,11 @@ int main(int argc, char **argv)
                         argv++;
                         if (argc <= 1)
                                 usage();
-                       if (strcmp(argv[1], "inet") == 0)
-                               preferred_family = AF_INET;
-                       else if (strcmp(argv[1], "inet6") == 0)
-                               preferred_family = AF_INET6;
-                       else if (strcmp(argv[1], "dnet") == 0)
-                               preferred_family = AF_DECnet;
-                       else if (strcmp(argv[1], "link") == 0)
-                               preferred_family = AF_PACKET;
-                       else if (strcmp(argv[1], "ipx") == 0)
-                               preferred_family = AF_IPX;
-                       else if (strcmp(argv[1], "bridge") == 0)
-                               preferred_family = AF_BRIDGE;
-                       else if (strcmp(argv[1], "help") == 0)
+                       if (strcmp(argv[1], "help") == 0)
                                 usage();
                         else
+                               preferred_family = read_family(argv[1]);
+                       if (preferred_family == AF_UNSPEC)
                                 invarg("invalid protocol family", argv[1]);
                 } else if (strcmp(opt, "-4") == 0) {
                         preferred_family = AF_INET;
@@ -216,6 +206,8 @@ int main(int argc, char **argv)
                         preferred_family = AF_IPX;
                 } else if (strcmp(opt, "-D") == 0) {
                         preferred_family = AF_DECnet;
+               } else if (strcmp(opt, "-M") == 0) {
+                       preferred_family = AF_MPLS;
                 } else if (strcmp(opt, "-B") == 0) {
                         preferred_family = AF_BRIDGE;
                 } else if (matches(opt, "-human") == 0 ||
diff --git a/ip/ipaddress.c b/ip/ipaddress.c

index 99a6ab5977e3a942658da2be4c6ea05d5f737a95..e582da031f3acab003b1080184af9ab0123fed14 100644 (file)
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -85,7 +85,7 @@ static void usage(void)
         fprintf(stderr, "           [-]tentative | [-]deprecated | [-]dadfailed | temporary |\n");
         fprintf(stderr, "           CONFFLAG-LIST ]\n");
         fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n");
-       fprintf(stderr, "CONFFLAG  := [ home | nodad | mngtmpaddr | noprefixroute ]\n");
+       fprintf(stderr, "CONFFLAG  := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n");
         fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n");
         fprintf(stderr, "LFT := forever | SECONDS\n");
  
@@ -915,6 +915,10 @@ int print_addrinfo(const struct sockaddr_nl *who, struct nlmsghdr *n,
                 ifa_flags &= ~IFA_F_NOPREFIXROUTE;
                 fprintf(fp, "noprefixroute ");
         }
+       if (ifa_flags & IFA_F_MCAUTOJOIN) {
+               ifa_flags &= ~IFA_F_MCAUTOJOIN;
+               fprintf(fp, "autojoin ");
+       }
         if (!(ifa_flags & IFA_F_PERMANENT)) {
                 fprintf(fp, "dynamic ");
         } else
@@ -1354,6 +1358,9 @@ static int ipaddr_list_flush_or_save(int argc, char **argv, int action)
                 } else if (strcmp(*argv, "noprefixroute") == 0) {
                         filter.flags |= IFA_F_NOPREFIXROUTE;
                         filter.flagmask |= IFA_F_NOPREFIXROUTE;
+               } else if (strcmp(*argv, "autojoin") == 0) {
+                       filter.flags |= IFA_F_MCAUTOJOIN;
+                       filter.flagmask |= IFA_F_MCAUTOJOIN;
                 } else if (strcmp(*argv, "dadfailed") == 0) {
                         filter.flags |= IFA_F_DADFAILED;
                         filter.flagmask |= IFA_F_DADFAILED;
@@ -1558,6 +1565,16 @@ static int default_scope(inet_prefix *lcl)
         return 0;
  }
  
+static bool ipaddr_is_multicast(inet_prefix *a)
+{
+       if (a->family == AF_INET)
+               return IN_MULTICAST(ntohl(a->data[0]));
+       else if (a->family == AF_INET6)
+               return IN6_IS_ADDR_MULTICAST(a->data);
+       else
+               return false;
+}
+
  static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
  {
         struct {
@@ -1665,6 +1682,8 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
                         ifa_flags |= IFA_F_MANAGETEMPADDR;
                 } else if (strcmp(*argv, "noprefixroute") == 0) {
                         ifa_flags |= IFA_F_NOPREFIXROUTE;
+               } else if (strcmp(*argv, "autojoin") == 0) {
+                       ifa_flags |= IFA_F_MCAUTOJOIN;
                 } else {
                         if (strcmp(*argv, "local") == 0) {
                                 NEXT_ARG();
@@ -1755,6 +1774,11 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
                           sizeof(cinfo));
         }
  
+       if ((ifa_flags & IFA_F_MCAUTOJOIN) && !ipaddr_is_multicast(&lcl)) {
+               fprintf(stderr, "autojoin needs multicast address\n");
+               return -1;
+       }
+
         if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
                 return -2;
  
diff --git a/ip/iplink_bond.c b/ip/iplink_bond.c

index 3009ec912e23b6f806122c0cf02394bcd1750796..a573f92b03a048a0dc68cb4b1d942b3235d317c5 100644 (file)
--- a/ip/iplink_bond.c
+++ b/ip/iplink_bond.c
@@ -415,6 +415,7 @@ static void bond_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
                         if (iptb[i])
                                 fprintf(f, "%s",
                                         rt_addr_n2a(AF_INET,
+                                                   RTA_PAYLOAD(iptb[i]),
                                                     RTA_DATA(iptb[i]),
                                                     buf,
                                                     INET_ADDRSTRLEN));
diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c

index 6b5e66534551f0204d913acc43d9619f93cb4512..7833a26329277ca9c329df69c719e03ebee6dfa1 100644 (file)
--- a/ip/ipmonitor.c
+++ b/ip/ipmonitor.c
@@ -158,6 +158,7 @@ int do_ipmonitor(int argc, char **argv)
         groups |= nl_mgrp(RTNLGRP_IPV6_IFADDR);
         groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
         groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+       groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
         groups |= nl_mgrp(RTNLGRP_IPV4_MROUTE);
         groups |= nl_mgrp(RTNLGRP_IPV6_MROUTE);
         groups |= nl_mgrp(RTNLGRP_IPV6_PREFIX);
@@ -235,6 +236,8 @@ int do_ipmonitor(int argc, char **argv)
                         groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
                 if (!preferred_family || preferred_family == AF_INET6)
                         groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+               if (!preferred_family || preferred_family == AF_MPLS)
+                       groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
         }
         if (lmroute) {
                 if (!preferred_family || preferred_family == AF_INET)
diff --git a/ip/ipmroute.c b/ip/ipmroute.c

index b4ed9f15fda5e8349fa6188c77d8f105888cabb0..13ac892512d0222ced62bfffa34c8b10997bcd31 100644 (file)
--- a/ip/ipmroute.c
+++ b/ip/ipmroute.c
@@ -116,6 +116,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
         if (tb[RTA_SRC])
                 len = snprintf(obuf, sizeof(obuf),
                                "(%s, ", rt_addr_n2a(family,
+                                                   RTA_PAYLOAD(tb[RTA_SRC]),
                                                     RTA_DATA(tb[RTA_SRC]),
                                                     abuf, sizeof(abuf)));
         else
@@ -123,6 +124,7 @@ int print_mroute(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
         if (tb[RTA_DST])
                 snprintf(obuf + len, sizeof(obuf) - len,
                          "%s)", rt_addr_n2a(family,
+                                           RTA_PAYLOAD(tb[RTA_DST]),
                                             RTA_DATA(tb[RTA_DST]),
                                             abuf, sizeof(abuf)));
         else
diff --git a/ip/ipprefix.c b/ip/ipprefix.c

index 02c0efce68363403c8030f394a9cf276f5687f03..26b59615121778fdc2cd40b25e76114fc0a9ea30 100644 (file)
--- a/ip/ipprefix.c
+++ b/ip/ipprefix.c
@@ -80,7 +80,9 @@ int print_prefix(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                 pfx = (struct in6_addr *)RTA_DATA(tb[PREFIX_ADDRESS]);
  
                 memset(abuf, '\0', sizeof(abuf));
-               fprintf(fp, "%s", rt_addr_n2a(family, pfx,
+               fprintf(fp, "%s", rt_addr_n2a(family,
+                                             RTA_PAYLOAD(tb[PREFIX_ADDRESS]),
+                                             pfx,
                                               abuf, sizeof(abuf)));
         }
         fprintf(fp, "/%u ", prefix->prefix_len);
diff --git a/ip/iproute.c b/ip/iproute.c

index 024d401cf2e01b69b5c029da64feb301895b9783..132a83a7a139333014208fe6e04a919503befc39 100644 (file)
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -23,6 +23,7 @@
  #include <netinet/ip.h>
  #include <arpa/inet.h>
  #include <linux/in_route.h>
+#include <linux/icmpv6.h>
  #include <errno.h>
  
  #include "rt_names.h"
@@ -75,19 +76,22 @@ static void usage(void)
         fprintf(stderr, "             [ table TABLE_ID ] [ proto RTPROTO ]\n");
         fprintf(stderr, "             [ scope SCOPE ] [ metric METRIC ]\n");
         fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
-       fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
-       fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
+       fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
+       fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]");
+       fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n");
         fprintf(stderr, "           [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n");
         fprintf(stderr, "           [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n");
         fprintf(stderr, "           [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n");
         fprintf(stderr, "           [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n");
         fprintf(stderr, "           [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n");
+       fprintf(stderr, "           [ pref PREF ]\n");
         fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n");
         fprintf(stderr, "          unreachable | prohibit | blackhole | nat ]\n");
         fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
         fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n");
         fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
         fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n");
+       fprintf(stderr, "PREF := [ low | medium | high ]\n");
         fprintf(stderr, "TIME := NUMBER[s|ms]\n");
         fprintf(stderr, "BOOL := [1|0]\n");
         fprintf(stderr, "FEATURES := ecn\n");
@@ -185,8 +189,15 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
             (r->rtm_family != filter.msrc.family ||
              (filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len)))
                 return 0;
-       if (filter.rvia.family && r->rtm_family != filter.rvia.family)
-               return 0;
+       if (filter.rvia.family) {
+               int family = r->rtm_family;
+               if (tb[RTA_VIA]) {
+                       struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+                       family = via->rtvia_family;
+               }
+               if (family != filter.rvia.family)
+                       return 0;
+       }
         if (filter.rprefsrc.family && r->rtm_family != filter.rprefsrc.family)
                 return 0;
  
@@ -205,6 +216,12 @@ static int filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
                 via.family = r->rtm_family;
                 if (tb[RTA_GATEWAY])
                         memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8);
+               if (tb[RTA_VIA]) {
+                       size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+                       struct rtvia *rtvia = RTA_DATA(tb[RTA_VIA]);
+                       via.family = rtvia->rtvia_family;
+                       memcpy(&via.data, rtvia->rtvia_addr, len);
+               }
         }
         if (filter.rprefsrc.bitlen>0) {
                 memset(&prefsrc, 0, sizeof(prefsrc));
@@ -339,8 +356,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
         if (tb[RTA_DST]) {
                 if (r->rtm_dst_len != host_len) {
                         fprintf(fp, "%s/%u ", rt_addr_n2a(r->rtm_family,
-                                                        RTA_DATA(tb[RTA_DST]),
-                                                        abuf, sizeof(abuf)),
+                                                      RTA_PAYLOAD(tb[RTA_DST]),
+                                                      RTA_DATA(tb[RTA_DST]),
+                                                      abuf, sizeof(abuf)),
                                 r->rtm_dst_len
                                 );
                 } else {
@@ -358,8 +376,9 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
         if (tb[RTA_SRC]) {
                 if (r->rtm_src_len != host_len) {
                         fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
-                                                        RTA_DATA(tb[RTA_SRC]),
-                                                        abuf, sizeof(abuf)),
+                                                      RTA_PAYLOAD(tb[RTA_SRC]),
+                                                      RTA_DATA(tb[RTA_SRC]),
+                                                      abuf, sizeof(abuf)),
                                 r->rtm_src_len
                                 );
                 } else {
@@ -372,6 +391,13 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
         } else if (r->rtm_src_len) {
                 fprintf(fp, "from 0/%u ", r->rtm_src_len);
         }
+       if (tb[RTA_NEWDST]) {
+               fprintf(fp, "as to %s ", format_host(r->rtm_family,
+                                                 RTA_PAYLOAD(tb[RTA_NEWDST]),
+                                                 RTA_DATA(tb[RTA_NEWDST]),
+                                                 abuf, sizeof(abuf))
+                       );
+       }
         if (r->rtm_tos && filter.tosmask != -1) {
                 SPRINT_BUF(b1);
                 fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
@@ -384,6 +410,14 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                                     RTA_DATA(tb[RTA_GATEWAY]),
                                     abuf, sizeof(abuf)));
         }
+       if (tb[RTA_VIA]) {
+               size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+               struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+               fprintf(fp, "via %s %s ",
+                       family_name(via->rtvia_family),
+                       format_host(via->rtvia_family, len, via->rtvia_addr,
+                                   abuf, sizeof(abuf)));
+       }
         if (tb[RTA_OIF] && filter.oifmask != -1)
                 fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
  
@@ -401,6 +435,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                  */
                 fprintf(fp, " src %s ",
                         rt_addr_n2a(r->rtm_family,
+                                   RTA_PAYLOAD(tb[RTA_PREFSRC]),
                                     RTA_DATA(tb[RTA_PREFSRC]),
                                     abuf, sizeof(abuf)));
         }
@@ -412,6 +447,8 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                 fprintf(fp, "onlink ");
         if (r->rtm_flags & RTNH_F_PERVASIVE)
                 fprintf(fp, "pervasive ");
+       if (r->rtm_flags & RTNH_F_EXTERNAL)
+               fprintf(fp, "external ");
         if (r->rtm_flags & RTM_F_NOTIFY)
                 fprintf(fp, "notify ");
         if (tb[RTA_MARK]) {
@@ -598,6 +635,14 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                                                             RTA_DATA(tb[RTA_GATEWAY]),
                                                             abuf, sizeof(abuf)));
                                 }
+                               if (tb[RTA_VIA]) {
+                                       size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+                                       struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+                                       fprintf(fp, "via %s %s ",
+                                               family_name(via->rtvia_family),
+                                               format_host(via->rtvia_family, len, via->rtvia_addr,
+                                                           abuf, sizeof(abuf)));
+                               }
                                 if (tb[RTA_FLOW]) {
                                         __u32 to = rta_getattr_u32(tb[RTA_FLOW]);
                                         __u32 from = to>>16;
@@ -629,6 +674,24 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
                         nh = RTNH_NEXT(nh);
                 }
         }
+       if (tb[RTA_PREF]) {
+               unsigned int pref = rta_getattr_u8(tb[RTA_PREF]);
+               fprintf(fp, " pref ");
+
+               switch (pref) {
+               case ICMPV6_ROUTER_PREF_LOW:
+                       fprintf(fp, "low");
+                       break;
+               case ICMPV6_ROUTER_PREF_MEDIUM:
+                       fprintf(fp, "medium");
+                       break;
+               case ICMPV6_ROUTER_PREF_HIGH:
+                       fprintf(fp, "high");
+                       break;
+               default:
+                       fprintf(fp, "%u", pref);
+               }
+       }
         fprintf(fp, "\n");
         fflush(fp);
         return 0;
@@ -645,12 +708,23 @@ static int parse_one_nh(struct rtmsg *r, struct rtattr *rta,
         while (++argv, --argc > 0) {
                 if (strcmp(*argv, "via") == 0) {
                         inet_prefix addr;
+                       int family;
                         NEXT_ARG();
-                       get_addr(&addr, *argv, r->rtm_family);
+                       family = read_family(*argv);
+                       if (family == AF_UNSPEC)
+                               family = r->rtm_family;
+                       else
+                               NEXT_ARG();
+                       get_addr(&addr, *argv, family);
                         if (r->rtm_family == AF_UNSPEC)
                                 r->rtm_family = addr.family;
-                       rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
-                       rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+                       if (addr.family == r->rtm_family) {
+                               rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
+                               rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+                       } else {
+                               rta_addattr_l(rta, 4096, RTA_VIA, &addr.family, addr.bytelen+2);
+                               rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen+2;
+                       }
                 } else if (strcmp(*argv, "dev") == 0) {
                         NEXT_ARG();
                         if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) {
@@ -756,14 +830,33 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
                         if (req.r.rtm_family == AF_UNSPEC)
                                 req.r.rtm_family = addr.family;
                         addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen);
+               } else if (strcmp(*argv, "as") == 0) {
+                       inet_prefix addr;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "to") == 0) {
+                               NEXT_ARG();
+                       }
+                       get_addr(&addr, *argv, req.r.rtm_family);
+                       if (req.r.rtm_family == AF_UNSPEC)
+                               req.r.rtm_family = addr.family;
+                       addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen);
                 } else if (strcmp(*argv, "via") == 0) {
                         inet_prefix addr;
+                       int family;
                         gw_ok = 1;
                         NEXT_ARG();
-                       get_addr(&addr, *argv, req.r.rtm_family);
+                       family = read_family(*argv);
+                       if (family == AF_UNSPEC)
+                               family = req.r.rtm_family;
+                       else
+                               NEXT_ARG();
+                       get_addr(&addr, *argv, family);
                         if (req.r.rtm_family == AF_UNSPEC)
                                 req.r.rtm_family = addr.family;
-                       addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+                       if (addr.family == req.r.rtm_family)
+                               addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+                       else
+                               addattr_l(&req.n, sizeof(req), RTA_VIA, &addr.family, addr.bytelen+2);
                 } else if (strcmp(*argv, "from") == 0) {
                         inet_prefix addr;
                         NEXT_ARG();
@@ -782,7 +875,7 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
                         req.r.rtm_tos = tos;
                 } else if (matches(*argv, "metric") == 0 ||
                            matches(*argv, "priority") == 0 ||
-                          matches(*argv, "preference") == 0) {
+                          strcmp(*argv, "preference") == 0) {
                         __u32 metric;
                         NEXT_ARG();
                         if (get_u32(&metric, *argv, 0))
@@ -979,6 +1072,18 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
                            strcmp(*argv, "oif") == 0) {
                         NEXT_ARG();
                         d = *argv;
+               } else if (matches(*argv, "pref") == 0) {
+                       __u8 pref;
+                       NEXT_ARG();
+                       if (strcmp(*argv, "low") == 0)
+                               pref = ICMPV6_ROUTER_PREF_LOW;
+                       else if (strcmp(*argv, "medium") == 0)
+                               pref = ICMPV6_ROUTER_PREF_MEDIUM;
+                       else if (strcmp(*argv, "high") == 0)
+                               pref = ICMPV6_ROUTER_PREF_HIGH;
+                       else if (get_u8(&pref, *argv, 0))
+                               invarg("\"pref\" value is invalid\n", *argv);
+                       addattr8(&req.n, sizeof(req), RTA_PREF, pref);
                 } else {
                         int type;
                         inet_prefix dst;
@@ -1248,8 +1353,14 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
                         get_unsigned(&mark, *argv, 0);
                         filter.markmask = -1;
                 } else if (strcmp(*argv, "via") == 0) {
+                       int family;
                         NEXT_ARG();
-                       get_prefix(&filter.rvia, *argv, do_ipv6);
+                       family = read_family(*argv);
+                       if (family == AF_UNSPEC)
+                               family = do_ipv6;
+                       else
+                               NEXT_ARG();
+                       get_prefix(&filter.rvia, *argv, family);
                 } else if (strcmp(*argv, "src") == 0) {
                         NEXT_ARG();
                         get_prefix(&filter.rprefsrc, *argv, do_ipv6);
@@ -1551,6 +1662,8 @@ static int iproute_get(int argc, char **argv)
                         tb[RTA_OIF]->rta_type = 0;
                 if (tb[RTA_GATEWAY])
                         tb[RTA_GATEWAY]->rta_type = 0;
+               if (tb[RTA_VIA])
+                       tb[RTA_VIA]->rta_type = 0;
                 if (!idev && tb[RTA_IIF])
                         tb[RTA_IIF]->rta_type = 0;
                 req.n.nlmsg_flags = NLM_F_REQUEST;
diff --git a/ip/iprule.c b/ip/iprule.c

index 54ed7536e0642fc849496886be6c0f343565d0d3..967969c0e60e2f281ce1d4a1b45ac7b27091e757 100644 (file)
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -82,8 +82,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
         if (tb[FRA_SRC]) {
                 if (r->rtm_src_len != host_len) {
                         fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
-                                                        RTA_DATA(tb[FRA_SRC]),
-                                                        abuf, sizeof(abuf)),
+                                                      RTA_PAYLOAD(tb[FRA_SRC]),
+                                                      RTA_DATA(tb[FRA_SRC]),
+                                                      abuf, sizeof(abuf)),
                                 r->rtm_src_len
                                 );
                 } else {
@@ -102,8 +103,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
         if (tb[FRA_DST]) {
                 if (r->rtm_dst_len != host_len) {
                         fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family,
-                                                        RTA_DATA(tb[FRA_DST]),
-                                                        abuf, sizeof(abuf)),
+                                                      RTA_PAYLOAD(tb[FRA_DST]),
+                                                      RTA_DATA(tb[FRA_DST]),
+                                                      abuf, sizeof(abuf)),
                                 r->rtm_dst_len
                                 );
                 } else {
diff --git a/ip/iptunnel.c b/ip/iptunnel.c

index caf8a28e62e88e76988280ec600833fae3c6a7ba..be84b83ec67331674f16b5da5db04ee6711ddc8f 100644 (file)
--- a/ip/iptunnel.c
+++ b/ip/iptunnel.c
@@ -342,8 +342,8 @@ static void print_tunnel(struct ip_tunnel_parm *p)
         printf("%s: %s/ip  remote %s  local %s ",
                p->name,
                tnl_strproto(p->iph.protocol),
-              p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1))  : "any",
-              p->iph.saddr ? rt_addr_n2a(AF_INET, &p->iph.saddr, s2, sizeof(s2)) : "any");
+              p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
+              p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any");
  
         if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) {
                 struct ip_tunnel_prl prl[16];
diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c

index 95f91a537759f90a07832537c92969d46d8a2921..9aaf58d5b5eaa65679ee72459371066c952148a7 100644 (file)
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -288,10 +288,10 @@ void xfrm_id_info_print(xfrm_address_t *saddr, struct xfrm_id *id,
                 fputs(title, fp);
  
         memset(abuf, '\0', sizeof(abuf));
-       fprintf(fp, "src %s ", rt_addr_n2a(family,
+       fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr),
                                            saddr, abuf, sizeof(abuf)));
         memset(abuf, '\0', sizeof(abuf));
-       fprintf(fp, "dst %s", rt_addr_n2a(family,
+       fprintf(fp, "dst %s", rt_addr_n2a(family, sizeof(id->daddr),
                                           &id->daddr, abuf, sizeof(abuf)));
         fprintf(fp, "%s", _SL_);
  
@@ -455,11 +455,15 @@ void xfrm_selector_print(struct xfrm_selector *sel, __u16 family,
                 fputs(prefix, fp);
  
         memset(abuf, '\0', sizeof(abuf));
-       fprintf(fp, "src %s/%u ", rt_addr_n2a(f, &sel->saddr, abuf, sizeof(abuf)),
+       fprintf(fp, "src %s/%u ",
+               rt_addr_n2a(f, sizeof(sel->saddr), &sel->saddr,
+                           abuf, sizeof(abuf)),
                 sel->prefixlen_s);
  
         memset(abuf, '\0', sizeof(abuf));
-       fprintf(fp, "dst %s/%u ", rt_addr_n2a(f, &sel->daddr, abuf, sizeof(abuf)),
+       fprintf(fp, "dst %s/%u ",
+               rt_addr_n2a(f, sizeof(sel->daddr), &sel->daddr,
+                           abuf, sizeof(abuf)),
                 sel->prefixlen_d);
  
         if (sel->proto)
@@ -755,7 +759,8 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
  
                 memset(abuf, '\0', sizeof(abuf));
                 fprintf(fp, "addr %s",
-                       rt_addr_n2a(family, &e->encap_oa, abuf, sizeof(abuf)));
+                       rt_addr_n2a(family, sizeof(e->encap_oa), &e->encap_oa,
+                                   abuf, sizeof(abuf)));
                 fprintf(fp, "%s", _SL_);
         }
  
@@ -783,7 +788,7 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
  
                 memset(abuf, '\0', sizeof(abuf));
                 fprintf(fp, "%s",
-                       rt_addr_n2a(family, coa,
+                       rt_addr_n2a(family, sizeof(*coa), coa,
                                     abuf, sizeof(abuf)));
                 fprintf(fp, "%s", _SL_);
         }
diff --git a/ip/link_ip6tnl.c b/ip/link_ip6tnl.c

index 5ed3d5a23fb52d09d0d3c4cb64e1dfac110c91b3..cf59a9338f5738add81536eb13a37027c33f787e 100644 (file)
--- a/ip/link_ip6tnl.c
+++ b/ip/link_ip6tnl.c
@@ -285,6 +285,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb
         if (tb[IFLA_IPTUN_REMOTE]) {
                 fprintf(f, "remote %s ",
                         rt_addr_n2a(AF_INET6,
+                                   RTA_PAYLOAD(tb[IFLA_IPTUN_REMOTE]),
                                     RTA_DATA(tb[IFLA_IPTUN_REMOTE]),
                                     s1, sizeof(s1)));
         }
@@ -292,6 +293,7 @@ static void ip6tunnel_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb
         if (tb[IFLA_IPTUN_LOCAL]) {
                 fprintf(f, "local %s ",
                         rt_addr_n2a(AF_INET6,
+                                   RTA_PAYLOAD(tb[IFLA_IPTUN_LOCAL]),
                                     RTA_DATA(tb[IFLA_IPTUN_LOCAL]),
                                     s1, sizeof(s1)));
         }
diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c

index 50116a7b5433be61bce8058f202373fbefebdf61..58c7d7f46b44a908934af48c5dec733faa9ba392 100644 (file)
--- a/ip/xfrm_monitor.c
+++ b/ip/xfrm_monitor.c
@@ -27,7 +27,9 @@
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
+#include <netinet/in.h>
  #include <linux/xfrm.h>
+
  #include "utils.h"
  #include "xfrm.h"
  #include "ip_common.h"
@@ -227,7 +229,8 @@ static void xfrm_usersa_print(const struct xfrm_usersa_id *sa_id, __u32 reqid, F
  
         buf[0] = 0;
         fprintf(fp, "dst %s ",
-               rt_addr_n2a(sa_id->family, &sa_id->daddr, buf, sizeof(buf)));
+               rt_addr_n2a(sa_id->family, sizeof(sa_id->daddr), &sa_id->daddr,
+                           buf, sizeof(buf)));
  
         fprintf(fp, " reqid 0x%x", reqid);
  
@@ -246,7 +249,8 @@ static int xfrm_ae_print(const struct sockaddr_nl *who,
         xfrm_ae_flags_print(id->flags, arg);
         fprintf(fp,"\n\t");
         memset(abuf, '\0', sizeof(abuf));
-       fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, &id->saddr,
+       fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family,
+                                          sizeof(id->saddr), &id->saddr,
                                            abuf, sizeof(abuf)));
  
         xfrm_usersa_print(&id->sa_id, id->reqid, fp);
@@ -262,7 +266,7 @@ static void xfrm_print_addr(FILE *fp, int family, xfrm_address_t *a)
         char buf[256];
  
         buf[0] = 0;
-       fprintf(fp, "%s", rt_addr_n2a(family, a, buf, sizeof(buf)));
+       fprintf(fp, "%s", rt_addr_n2a(family, sizeof(*a), a, buf, sizeof(buf)));
  }
  
  static int xfrm_mapping_print(const struct sockaddr_nl *who,
diff --git a/ip/xfrm_policy.c b/ip/xfrm_policy.c

index 2337d35246faa4f775d0cf797dc148ca5324e36c..7333dc5f536e5f2c5eff8b496dcfc1453f47c6d8 100644 (file)
--- a/ip/xfrm_policy.c
+++ b/ip/xfrm_policy.c
@@ -63,7 +63,8 @@ static void usage(void)
         fprintf(stderr, "        [ index INDEX ] [ ptype PTYPE ] [ action ACTION ] [ priority PRIORITY ]\n");
         fprintf(stderr, "        [ flag FLAG-LIST ]\n");
         fprintf(stderr, "Usage: ip xfrm policy flush [ ptype PTYPE ]\n");
-       fprintf(stderr, "Usage: ip xfrm count\n");
+       fprintf(stderr, "Usage: ip xfrm policy count\n");
+       fprintf(stderr, "Usage: ip xfrm policy set [ hthresh4 LBITS RBITS ] [ hthresh6 LBITS RBITS ]\n");
         fprintf(stderr, "SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n");
         fprintf(stderr, "UPSPEC := proto { { ");
         fprintf(stderr, "%s | ", strxf_proto(IPPROTO_TCP));
@@ -934,7 +935,7 @@ static int print_spdinfo( struct nlmsghdr *n, void *arg)
                         fprintf(fp,")");
                 }
  
-               fprintf(fp,"\n");
+               fprintf(fp, "%s", _SL_);
         }
         if (show_stats > 1) {
                 struct xfrmu_spdhinfo *sh;
@@ -948,13 +949,109 @@ static int print_spdinfo( struct nlmsghdr *n, void *arg)
                         fprintf(fp,"\t SPD buckets:");
                         fprintf(fp," count %d", sh->spdhcnt);
                         fprintf(fp," Max %d", sh->spdhmcnt);
+                       fprintf(fp, "%s", _SL_);
+               }
+               if (tb[XFRMA_SPD_IPV4_HTHRESH]) {
+                       struct xfrmu_spdhthresh *th;
+                       if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV4_HTHRESH]) < sizeof(*th)) {
+                               fprintf(stderr, "SPDinfo: Wrong len %d\n", len);
+                               return -1;
+                       }
+                       th = RTA_DATA(tb[XFRMA_SPD_IPV4_HTHRESH]);
+                       fprintf(fp,"\t SPD IPv4 thresholds:");
+                       fprintf(fp," local %d", th->lbits);
+                       fprintf(fp," remote %d", th->rbits);
+                       fprintf(fp, "%s", _SL_);
+
+               }
+               if (tb[XFRMA_SPD_IPV6_HTHRESH]) {
+                       struct xfrmu_spdhthresh *th;
+                       if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV6_HTHRESH]) < sizeof(*th)) {
+                               fprintf(stderr, "SPDinfo: Wrong len %d\n", len);
+                               return -1;
+                       }
+                       th = RTA_DATA(tb[XFRMA_SPD_IPV6_HTHRESH]);
+                       fprintf(fp,"\t SPD IPv6 thresholds:");
+                       fprintf(fp," local %d", th->lbits);
+                       fprintf(fp," remote %d", th->rbits);
+                       fprintf(fp, "%s", _SL_);
                 }
         }
-       fprintf(fp,"\n");
+
+       if (oneline)
+               fprintf(fp, "\n");
  
          return 0;
  }
  
+static int xfrm_spd_setinfo(int argc, char **argv)
+{
+       struct rtnl_handle rth;
+       struct {
+               struct nlmsghdr                 n;
+               __u32                           flags;
+               char                            buf[RTA_BUF_SIZE];
+       } req;
+
+       char *thr4 = NULL;
+       char *thr6 = NULL;
+
+       memset(&req, 0, sizeof(req));
+
+       req.n.nlmsg_len = NLMSG_LENGTH(sizeof(__u32));
+       req.n.nlmsg_flags = NLM_F_REQUEST;
+       req.n.nlmsg_type = XFRM_MSG_NEWSPDINFO;
+       req.flags = 0XFFFFFFFF;
+
+       while (argc > 0) {
+               if (strcmp(*argv, "hthresh4") == 0) {
+                       struct xfrmu_spdhthresh thr;
+
+                       if (thr4)
+                               duparg("hthresh4", *argv);
+                       thr4 = *argv;
+                       NEXT_ARG();
+                       if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 32)
+                               invarg("hthresh4 LBITS value is invalid", *argv);
+                       NEXT_ARG();
+                       if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 32)
+                               invarg("hthresh4 RBITS value is invalid", *argv);
+
+                       addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV4_HTHRESH,
+                                 (void *)&thr, sizeof(thr));
+               } else if (strcmp(*argv, "hthresh6") == 0) {
+                       struct xfrmu_spdhthresh thr;
+
+                       if (thr6)
+                               duparg("hthresh6", *argv);
+                       thr6 = *argv;
+                       NEXT_ARG();
+                       if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 128)
+                               invarg("hthresh6 LBITS value is invalid", *argv);
+                       NEXT_ARG();
+                       if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 128)
+                               invarg("hthresh6 RBITS value is invalid", *argv);
+
+                       addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV6_HTHRESH,
+                                 (void *)&thr, sizeof(thr));
+               } else {
+                       invarg("unknown", *argv);
+               }
+
+               argc--; argv++;
+       }
+
+       if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
+               exit(1);
+
+       if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+               exit(2);
+
+       rtnl_close(&rth);
+
+       return 0;
+}
+
  static int xfrm_spd_getinfo(int argc, char **argv)
  {
         struct rtnl_handle rth;
@@ -1058,6 +1155,8 @@ int do_xfrm_policy(int argc, char **argv)
                 return xfrm_policy_flush(argc-1, argv+1);
         if (matches(*argv, "count") == 0)
                 return xfrm_spd_getinfo(argc, argv);
+       if (matches(*argv, "set") == 0)
+               return xfrm_spd_setinfo(argc-1, argv+1);
         if (matches(*argv, "help") == 0)
                 usage();
         fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm policy help\".\n", *argv);
diff --git a/lib/ll_addr.c b/lib/ll_addr.c

index c12ab075c4a96901ad6dcb20dd2d62083ef11016..2ce9abfbb8c69d19b13cbedbd6feddee1c533550 100644 (file)
--- a/lib/ll_addr.c
+++ b/lib/ll_addr.c
@@ -29,7 +29,7 @@
  #include "utils.h"
  
  
-const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen)
+const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen)
  {
         int i;
         int l;
diff --git a/lib/mpls_ntop.c b/lib/mpls_ntop.c

new file mode 100644 (file)

index 0000000..945d6d5
--- /dev/null
+++ b/lib/mpls_ntop.c
@@ -0,0 +1,48 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+static const char *mpls_ntop1(const struct mpls_label *addr, char *buf, size_t buflen)
+{
+       size_t destlen = buflen;
+       char *dest = buf;
+       int count;
+
+       for (count = 0; count < MPLS_MAX_LABELS; count++) {
+               uint32_t entry = ntohl(addr[count].entry);
+               uint32_t label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+               int len = snprintf(dest, destlen, "%u", label);
+
+               /* Is this the end? */
+               if (entry & MPLS_LS_S_MASK)
+                       return buf;
+
+
+               dest += len;
+               destlen -= len;
+               if (destlen) {
+                       *dest = '/';
+                       dest++;
+                       destlen--;
+               }
+       }
+       errno = -E2BIG;
+       return NULL;
+}
+
+const char *mpls_ntop(int af, const void *addr, char *buf, size_t buflen)
+{
+       switch(af) {
+       case AF_MPLS:
+               errno = 0;
+               return mpls_ntop1((struct mpls_label *)addr, buf, buflen);
+       default:
+               errno = EAFNOSUPPORT;
+       }
+
+       return NULL;
+}
diff --git a/lib/mpls_pton.c b/lib/mpls_pton.c

new file mode 100644 (file)

index 0000000..bd448cf
--- /dev/null
+++ b/lib/mpls_pton.c
@@ -0,0 +1,58 @@
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+
+static int mpls_pton1(const char *name, struct mpls_label *addr)
+{
+       char *endp;
+       unsigned count;
+
+       for (count = 0; count < MPLS_MAX_LABELS; count++) {
+               unsigned long label;
+
+               label = strtoul(name, &endp, 0);
+               /* Fail when the label value is out or range */
+               if (label >= (1 << 20))
+                       return 0;
+
+               if (endp == name) /* no digits */
+                       return 0;
+
+               addr->entry = htonl(label << MPLS_LS_LABEL_SHIFT);
+               if (*endp == '\0') {
+                       addr->entry |= htonl(1 << MPLS_LS_S_SHIFT);
+                       return 1;
+               }
+
+               /* Bad character in the address */
+               if (*endp != '/')
+                       return 0;
+
+               name = endp + 1;
+               addr += 1;
+       }
+       /* The address was too long */
+       return 0;
+}
+
+int mpls_pton(int af, const char *src, void *addr)
+{
+       int err;
+
+       switch(af) {
+       case AF_MPLS:
+               errno = 0;
+               err = mpls_pton1(src, (struct mpls_label *)addr);
+               break;
+       default:
+               errno = EAFNOSUPPORT;
+               err = -1;
+       }
+
+       return err;
+}
diff --git a/lib/utils.c b/lib/utils.c

index 0d08a86872c9c1f27908e5b5a9b4cd9856bef469..428ad8f9dd61739bca7ca71e3e375ddc070cb878 100644 (file)
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -25,11 +25,13 @@
  #include <asm/types.h>
  #include <linux/pkt_sched.h>
  #include <linux/param.h>
+#include <linux/if_arp.h>
+#include <linux/mpls.h>
  #include <time.h>
  #include <sys/time.h>
  #include <errno.h>
  
-
+#include "rt_names.h"
  #include "utils.h"
  #include "namespace.h"
  
@@ -389,7 +391,7 @@ int get_addr_1(inet_prefix *addr, const char *name, int family)
         if (strcmp(name, "default") == 0 ||
             strcmp(name, "all") == 0 ||
             strcmp(name, "any") == 0) {
-               if (family == AF_DECnet)
+               if ((family == AF_DECnet) || (family == AF_MPLS))
                         return -1;
                 addr->family = family;
                 addr->bytelen = (family == AF_INET6 ? 16 : 4);
@@ -397,6 +399,18 @@ int get_addr_1(inet_prefix *addr, const char *name, int family)
                 return 0;
         }
  
+       if (family == AF_PACKET) {
+               int len;
+               len = ll_addr_a2n((char *)&addr->data, sizeof(addr->data), name);
+               if (len < 0)
+                       return -1;
+
+               addr->family = AF_PACKET;
+               addr->bytelen = len;
+               addr->bitlen = len * 8;
+               return 0;
+       }
+
         if (strchr(name, ':')) {
                 addr->family = AF_INET6;
                 if (family != AF_UNSPEC && family != AF_INET6)
@@ -419,6 +433,23 @@ int get_addr_1(inet_prefix *addr, const char *name, int family)
                 return 0;
         }
  
+       if (family == AF_MPLS) {
+               int i;
+               addr->family = AF_MPLS;
+               if (mpls_pton(AF_MPLS, name, addr->data) <= 0)
+                       return -1;
+               addr->bytelen = 4;
+               addr->bitlen = 20;
+               /* How many bytes do I need? */
+               for (i = 0; i < 8; i++) {
+                       if (ntohl(addr->data[i]) & MPLS_LS_S_MASK) {
+                               addr->bytelen = (i + 1)*4;
+                               break;
+                       }
+               }
+               return 0;
+       }
+
         addr->family = AF_INET;
         if (family != AF_UNSPEC && family != AF_INET)
                 return -1;
@@ -442,6 +473,8 @@ int af_bit_len(int af)
                 return 16;
         case AF_IPX:
                 return 80;
+       case AF_MPLS:
+               return 20;
         }
  
         return 0;
@@ -463,7 +496,7 @@ int get_prefix_1(inet_prefix *dst, char *arg, int family)
         if (strcmp(arg, "default") == 0 ||
             strcmp(arg, "any") == 0 ||
             strcmp(arg, "all") == 0) {
-               if (family == AF_DECnet)
+               if ((family == AF_DECnet) || (family = AF_MPLS))
                         return -1;
                 dst->family = family;
                 dst->bytelen = 0;
@@ -497,10 +530,6 @@ done:
  
  int get_addr(inet_prefix *dst, const char *arg, int family)
  {
-       if (family == AF_PACKET) {
-               fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg);
-               exit(1);
-       }
         if (get_addr_1(dst, arg, family)) {
                 fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg);
                 exit(1);
@@ -636,12 +665,14 @@ int __get_user_hz(void)
         return sysconf(_SC_CLK_TCK);
  }
  
-const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen)
+const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen)
  {
         switch (af) {
         case AF_INET:
         case AF_INET6:
                 return inet_ntop(af, addr, buf, buflen);
+       case AF_MPLS:
+               return mpls_ntop(af, addr, buf, buflen);
         case AF_IPX:
                 return ipx_ntop(af, addr, buf, buflen);
         case AF_DECnet:
@@ -650,11 +681,52 @@ const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen)
                 memcpy(dna.a_addr, addr, 2);
                 return dnet_ntop(af, &dna, buf, buflen);
         }
+       case AF_PACKET:
+               return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen);
         default:
                 return "???";
         }
  }
  
+int read_family(const char *name)
+{
+       int family = AF_UNSPEC;
+       if (strcmp(name, "inet") == 0)
+               family = AF_INET;
+       else if (strcmp(name, "inet6") == 0)
+               family = AF_INET6;
+       else if (strcmp(name, "dnet") == 0)
+               family = AF_DECnet;
+       else if (strcmp(name, "link") == 0)
+               family = AF_PACKET;
+       else if (strcmp(name, "ipx") == 0)
+               family = AF_IPX;
+       else if (strcmp(name, "mpls") == 0)
+               family = AF_MPLS;
+       else if (strcmp(name, "bridge") == 0)
+               family = AF_BRIDGE;
+       return family;
+}
+
+const char *family_name(int family)
+{
+       if (family == AF_INET)
+               return "inet";
+       if (family == AF_INET6)
+               return "inet6";
+       if (family == AF_DECnet)
+               return "dnet";
+       if (family == AF_PACKET)
+               return "link";
+       if (family == AF_IPX)
+               return "ipx";
+       if (family == AF_MPLS)
+               return "mpls";
+       if (family == AF_BRIDGE)
+               return "bridge";
+       return "???";
+}
+
  #ifdef RESOLVE_HOSTNAMES
  struct namerec
  {
@@ -723,7 +795,7 @@ const char *format_host(int af, int len, const void *addr,
                         return n;
         }
  #endif
-       return rt_addr_n2a(af, addr, buf, buflen);
+       return rt_addr_n2a(af, len, addr, buf, buflen);
  }
  
  
diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in

index d53cc76935aac59e34241276d155fbe83033448a..72d8d77088e033253e1091ea3d1dbd64aaf19a9f 100644 (file)
--- a/man/man8/ip-route.8.in
+++ b/man/man8/ip-route.8.in
@@ -81,18 +81,28 @@ replace " } "
  .ti -8
  .IR NH " := [ "
  .B  via
-.IR ADDRESS " ] [ "
+[
+.IR FAMILY " ] " ADDRESS " ] [ "
  .B  dev
  .IR STRING " ] [ "
  .B  weight
  .IR NUMBER " ] " NHFLAGS
  
+.ti -8
+.IR FAMILY " := [ "
+.BR inet " | " inet6 " | " ipx " | " dnet " | " mpls " | " bridge " | " link " ]"
+
  .ti -8
  .IR OPTIONS " := " FLAGS " [ "
  .B  mtu
  .IR NUMBER " ] [ "
  .B  advmss
  .IR NUMBER " ] [ "
+.B  as
+[
+.B to
+]
+.IR ADDRESS " ]"
  .B  rtt
  .IR TIME " ] [ "
  .B  rttvar
@@ -119,6 +129,8 @@ replace " } "
  .IR BOOL " ] [ "
  .B  congctl
  .IR NAME " ]"
+.B  pref
+.IR PREF " ]"
  
  .ti -8
  .IR TYPE " := [ "
@@ -148,6 +160,10 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]"
  .IR FEATURES " := [ "
  .BR ecn " | ]"
  
+.ti -8
+.IR PREF " := [ "
+.BR low " | " medium " | " high " ]"
+
  
  .SH DESCRIPTION
  .B ip route
@@ -333,9 +349,10 @@ table by default.
  the output device name.
  
  .TP
-.BI via " ADDRESS"
-the address of the nexthop router. Actually, the sense of this field
-depends on the route type. For normal
+.BI via " [ FAMILY ] ADDRESS"
+the address of the nexthop router, in the address family FAMILY.
+Actually, the sense of this field depends on the route type.  For
+normal
  .B unicast
  routes it is either the true next hop router or, if it is a direct
  route installed in BSD compatibility mode, it can be a local address
@@ -472,7 +489,7 @@ is a complex value with its own syntax similar to the top level
  argument lists:
  
  .in +8
-.BI via " ADDRESS"
+.BI via " [ FAMILY ] ADDRESS"
  - is the nexthop router.
  .sp
  
@@ -551,6 +568,28 @@ to assign (or not to assign) protocol tags.
  .B onlink
  pretend that the nexthop is directly attached to this link,
  even if it does not match any interface prefix.
+
+.TP
+.BI pref " PREF"
+the IPv6 route preference.
+.I PREF
+is a string specifying the route preference as defined in RFC4191 for Router
+Discovery messages. Namely:
+
+.in +8
+.B low
+- the route has a lowest priority
+.sp
+
+.B medium
+- the route has a default priority
+.sp
+
+.B high
+- the route has a highest priority
+.sp
+
+.in -8
  .RE
  
  .TP
@@ -669,7 +708,7 @@ only list routes of this type.
  only list routes going via this device.
  
  .TP
-.BI via " PREFIX"
+.BI via " [ FAMILY ] PREFIX"
  only list routes going via the nexthop routers selected by
  .IR PREFIX "."
  
diff --git a/man/man8/ip-xfrm.8 b/man/man8/ip-xfrm.8

index c9d2a2e17c35b86afba16eb1172ecbfdc81a2daf..29b397f3595937f22bcb020a1ec45931e098b8e0 100644 (file)
--- a/man/man8/ip-xfrm.8
+++ b/man/man8/ip-xfrm.8
@@ -256,6 +256,13 @@ ip-xfrm \- transform configuration
  .ti -8
  .B "ip xfrm policy count"
  
+.ti -8
+.B "ip xfrm policy set"
+.RB "[ " hthresh4
+.IR LBITS " " RBITS " ]"
+.RB "[ " hthresh6
+.IR LBITS " " RBITS " ]"
+
  .ti -8
  .IR SELECTOR " :="
  .RB "[ " src
@@ -360,6 +367,13 @@ ip-xfrm \- transform configuration
  .BR "ip xfrm monitor" " [ " all " |"
  .IR LISTofXFRM-OBJECTS " ]"
  
+.ti -8
+.IR LISTofXFRM-OBJECTS " := [ " LISTofXFRM-OBJECTS " ] " XFRM-OBJECT
+
+.ti -8
+.IR XFRM-OBJECT " := "
+.BR acquire " | " expire " | " SA " | " policy " | " aevent " | " report
+
  .in -8
  .ad b
  
@@ -385,7 +399,6 @@ ip xfrm state deleteall     delete all existing state in xfrm
  ip xfrm state list     print out the list of existing state in xfrm
  ip xfrm state flush    flush all state in xfrm
  ip xfrm state count    count all existing state in xfrm
-ip xfrm monitor        state monitoring for xfrm objects
  .TE
  
  .TP
@@ -507,7 +520,9 @@ encapsulates packets with protocol
  .BR espinudp " or " espinudp-nonike ","
  .RI "using source port " SPORT ", destination port "  DPORT
  .RI ", and original address " OADDR "."
+
  .sp
+.PP
  .TS
  l l.
  ip xfrm policy add     add a new policy
@@ -517,7 +532,6 @@ ip xfrm policy get  get an existing policy
  ip xfrm policy deleteall       delete all existing xfrm policies
  ip xfrm policy list    print out the list of xfrm policies
  ip xfrm policy flush   flush policies
-ip xfrm policy count   count existing policies
  .TE
  
  .TP
@@ -612,7 +626,50 @@ and inbound trigger
  can be
  .BR required " (default) or " use "."
  
+.sp
+.PP
+.TS
+l l.
+ip xfrm policy count   count existing policies
+.TE
+
+.PP
+Use one or more -s options to display more details, including policy hash table
+information.
+
+.sp
+.PP
+.TS
+l l.
+ip xfrm policy set     configure the policy hash table
+.TE
+
+.PP
+Security policies whose address prefix lengths are greater than or equal
+policy hash table thresholds are hashed. Others are stored in the
+policy_inexact chained list.
+
+.TP
+.I LBITS
+specifies the minimum local address prefix length of policies that are
+stored in the Security Policy Database hash table.
+
+.TP
+.I RBITS
+specifies the minimum remote address prefix length of policies that are
+stored in the Security Policy Database hash table.
+
+.sp
+.PP
+.TS
+l l.
+ip xfrm monitor        state monitoring for xfrm objects
+.TE
+
+.PP
  The xfrm objects to monitor can be optionally specified.
  
  .SH AUTHOR
  Manpage revised by David Ward <david.ward@ll.mit.edu>
+.br
+Manpage revised by Christophe Gouault <christophe.gouault@6wind.com>
diff --git a/man/man8/ip.8 b/man/man8/ip.8

index 4cd71de28551280c9f6800667b34521ace9eaceb..44d1ee6b26ce38a9624b42529dfec2c8497e3703 100644 (file)
--- a/man/man8/ip.8
+++ b/man/man8/ip.8
@@ -73,7 +73,7 @@ Zero (0) means loop until all addresses are removed.
  .TP
  .BR "\-f" , " \-family " <FAMILY>
  Specifies the protocol family to use. The protocol family identifier can be one of
-.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet"
+.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet" , " mpls"
  or
  .BR link .
  If this option is not present,
@@ -114,6 +114,11 @@ shortcut for
  shortcut for
  .BR "\-family ipx" .
  
+.TP
+.B \-M
+shortcut for
+.BR "\-family mpls" .
+
  .TP
  .B \-0
  shortcut for
diff --git a/tc/Makefile b/tc/Makefile

index d831a1535337def32bb5e410c5982b843f429a4d..2eff082c68937ef34a9ede854f4d584d31f14106 100644 (file)
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -89,6 +89,11 @@ else
    endif
  endif
  
+ifeq ($(TC_CONFIG_ELF),y)
+  CFLAGS += -DHAVE_ELF
+  LDLIBS += -lelf
+endif
+
  TCOBJ += $(TCMODULES)
  LDLIBS += -L. -ltc -lm
  
diff --git a/tc/f_bpf.c b/tc/f_bpf.c

index e2af94e3de396dcddd89a3b09c492a49f01deeda..6d765807eed0af91001a6ffb7143ae33ba4b64fc 100644 (file)
--- a/tc/f_bpf.c
+++ b/tc/f_bpf.c
@@ -34,13 +34,15 @@ static void explain(void)
         fprintf(stderr, "\n");
         fprintf(stderr, " [inline]:     run bytecode BPF_BYTECODE\n");
         fprintf(stderr, " [from file]:  run bytecode-file FILE\n");
+       fprintf(stderr, " [from file]:  run object-file FILE\n");
         fprintf(stderr, "\n");
         fprintf(stderr, "               [ action ACTION_SPEC ]\n");
         fprintf(stderr, "               [ classid CLASSID ]\n");
         fprintf(stderr, "\n");
         fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
         fprintf(stderr, "      c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
-       fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n");
+       fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
+       fprintf(stderr, "or an ELF file containing eBPF map definitions and bytecode.\n");
         fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n");
         fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
  }
@@ -71,31 +73,40 @@ static int bpf_parse_opt(struct filter_util *qu, char *handle,
  
         while (argc > 0) {
                 if (matches(*argv, "run") == 0) {
-                       bool from_file;
+                       bool from_file = true, ebpf;
                         struct sock_filter bpf_ops[BPF_MAXINSNS];
-                       __u16 bpf_len;
                         int ret;
  
                         NEXT_ARG();
                         if (strcmp(*argv, "bytecode-file") == 0) {
-                               from_file = true;
+                               ebpf = false;
                         } else if (strcmp(*argv, "bytecode") == 0) {
                                 from_file = false;
+                               ebpf = false;
+                       } else if (strcmp(*argv, "object-file") == 0) {
+                               ebpf = true;
                         } else {
                                 fprintf(stderr, "What is \"%s\"?\n", *argv);
                                 explain();
                                 return -1;
                         }
                         NEXT_ARG();
-                       ret = bpf_parse_ops(argc, argv, bpf_ops, from_file);
+                       ret = ebpf ? bpf_open_object(*argv, BPF_PROG_TYPE_SCHED_CLS) :
+                                    bpf_parse_ops(argc, argv, bpf_ops, from_file);
                         if (ret < 0) {
-                               fprintf(stderr, "Illegal \"bytecode\"\n");
+                               fprintf(stderr, "%s\n", ebpf ?
+                                       "Could not load object" :
+                                       "Illegal \"bytecode\"");
                                 return -1;
                         }
-                       bpf_len = ret;
-                       addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len);
-                       addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
-                                 bpf_len * sizeof(struct sock_filter));
+                       if (ebpf) {
+                               addattr32(n, MAX_MSG, TCA_BPF_FD, ret);
+                               addattrstrz(n, MAX_MSG, TCA_BPF_NAME, *argv);
+                       } else {
+                               addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret);
+                               addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
+                                         ret * sizeof(struct sock_filter));
+                       }
                 } else if (matches(*argv, "classid") == 0 ||
                            strcmp(*argv, "flowid") == 0) {
                         unsigned handle;
@@ -153,6 +164,11 @@ static int bpf_print_opt(struct filter_util *qu, FILE *f,
                         sprint_tc_classid(rta_getattr_u32(tb[TCA_BPF_CLASSID]), b1));
         }
  
+       if (tb[TCA_BPF_NAME])
+               fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME]));
+       else if (tb[TCA_BPF_FD])
+               fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
+
         if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
                 bpf_print_ops(f, tb[TCA_BPF_OPS],
                               rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
diff --git a/tc/tc_bpf.c b/tc/tc_bpf.c

index c6901d6c0c88da272e5b7108514c1befaa455fb2..3778d6b5923369762fd75b024d87ad400cfaeadf 100644 (file)
--- a/tc/tc_bpf.c
+++ b/tc/tc_bpf.c
@@ -8,6 +8,7 @@
   *
   * Authors:    Daniel Borkmann <dborkman@redhat.com>
   *             Jiri Pirko <jiri@resnulli.us>
+ *             Alexei Starovoitov <ast@plumgrid.com>
   */
  
  #include <stdio.h>
@@ -16,10 +17,19 @@
  #include <string.h>
  #include <stdbool.h>
  #include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
  #include <linux/filter.h>
  #include <linux/netlink.h>
  #include <linux/rtnetlink.h>
  
+#ifdef HAVE_ELF
+#include <libelf.h>
+#include <gelf.h>
+#endif
+
  #include "utils.h"
  #include "tc_util.h"
  #include "tc_bpf.h"
@@ -144,3 +154,385 @@ void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len)
         fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt,
                 ops[i].jf, ops[i].k);
  }
+
+#ifdef HAVE_ELF
+struct bpf_elf_sec_data {
+       GElf_Shdr       sec_hdr;
+       char            *sec_name;
+       Elf_Data        *sec_data;
+};
+
+static char bpf_log_buf[8192];
+
+static const char *prog_type_section(enum bpf_prog_type type)
+{
+       switch (type) {
+       case BPF_PROG_TYPE_SCHED_CLS:
+               return ELF_SECTION_CLASSIFIER;
+       /* case BPF_PROG_TYPE_SCHED_ACT:   */
+       /*      return ELF_SECTION_ACTION; */
+       default:
+               return NULL;
+       }
+}
+
+static void bpf_dump_error(const char *format, ...)  __check_format_string(1, 2);
+static void bpf_dump_error(const char *format, ...)
+{
+       va_list vl;
+
+       va_start(vl, format);
+       vfprintf(stderr, format, vl);
+       va_end(vl);
+
+       fprintf(stderr, "%s", bpf_log_buf);
+       memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
+}
+
+static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
+                         unsigned int size_value, unsigned int max_elem)
+{
+       union bpf_attr attr = {
+               .map_type       = type,
+               .key_size       = size_key,
+               .value_size     = size_value,
+               .max_entries    = max_elem,
+       };
+
+       return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+                        unsigned int len, const char *license)
+{
+       union bpf_attr attr = {
+               .prog_type      = type,
+               .insns          = bpf_ptr_to_u64(insns),
+               .insn_cnt       = len / sizeof(struct bpf_insn),
+               .license        = bpf_ptr_to_u64(license),
+               .log_buf        = bpf_ptr_to_u64(bpf_log_buf),
+               .log_size       = sizeof(bpf_log_buf),
+               .log_level      = 1,
+       };
+
+       return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns,
+                          unsigned int size, const char *license)
+{
+       int prog_fd = bpf_prog_load(type, insns, size, license);
+
+       if (prog_fd < 0)
+               bpf_dump_error("BPF program rejected: %s\n", strerror(errno));
+
+       return prog_fd;
+}
+
+static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
+                         unsigned int size_value, unsigned int max_elem)
+{
+       int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
+
+       if (map_fd < 0)
+               bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
+
+       return map_fd;
+}
+
+static void bpf_maps_init(int *map_fds, unsigned int max_fds)
+{
+       int i;
+
+       for (i = 0; i < max_fds; i++)
+               map_fds[i] = -1;
+}
+
+static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds)
+{
+       int i;
+
+       for (i = 0; i < max_fds; i++) {
+               if (map_fds[i] >= 0)
+                       close(map_fds[i]);
+       }
+}
+
+static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps,
+                          int *map_fds, unsigned int max_fds)
+{
+       int i, ret;
+
+       for (i = 0; i < num_maps && num_maps <= max_fds; i++) {
+               struct bpf_elf_map *map = &maps[i];
+
+               ret = bpf_map_attach(map->type, map->size_key,
+                                    map->size_value, map->max_elem);
+               if (ret < 0)
+                       goto err_unwind;
+
+               map_fds[i] = ret;
+       }
+
+       return 0;
+
+err_unwind:
+       bpf_maps_destroy(map_fds, i);
+       return ret;
+}
+
+static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
+                                struct bpf_elf_sec_data *sec_data)
+{
+       GElf_Shdr sec_hdr;
+       Elf_Scn *sec_fd;
+       Elf_Data *sec_edata;
+       char *sec_name;
+
+       memset(sec_data, 0, sizeof(*sec_data));
+
+       sec_fd = elf_getscn(elf_fd, sec_index);
+       if (!sec_fd)
+               return -EINVAL;
+
+       if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
+               return -EIO;
+
+       sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
+                             sec_hdr.sh_name);
+       if (!sec_name || !sec_hdr.sh_size)
+               return -ENOENT;
+
+       sec_edata = elf_getdata(sec_fd, NULL);
+       if (!sec_edata || elf_getdata(sec_fd, sec_edata))
+               return -EIO;
+
+       memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
+       sec_data->sec_name = sec_name;
+       sec_data->sec_data = sec_edata;
+
+       return 0;
+}
+
+static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
+                              struct bpf_elf_sec_data *data_insn,
+                              Elf_Data *sym_tab, int *map_fds, int max_fds)
+{
+       Elf_Data *idata = data_insn->sec_data;
+       GElf_Shdr *rhdr = &data_relo->sec_hdr;
+       int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
+       struct bpf_insn *insns = idata->d_buf;
+       unsigned int num_insns = idata->d_size / sizeof(*insns);
+
+       for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
+               unsigned int ioff, fnum;
+               GElf_Rel relo;
+               GElf_Sym sym;
+
+               if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
+                       return -EIO;
+
+               ioff = relo.r_offset / sizeof(struct bpf_insn);
+               if (ioff >= num_insns)
+                       return -EINVAL;
+               if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
+                       return -EINVAL;
+
+               if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
+                       return -EIO;
+
+               fnum = sym.st_value / sizeof(struct bpf_elf_map);
+               if (fnum >= max_fds)
+                       return -EINVAL;
+
+               insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
+               insns[ioff].imm = map_fds[fnum];
+       }
+
+       return 0;
+}
+
+static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+                              int *map_fds, unsigned int max_fds,
+                              char *license, unsigned int lic_len,
+                              Elf_Data **sym_tab)
+{
+       int sec_index, ret = -1;
+
+       for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+               struct bpf_elf_sec_data data_anc;
+
+               ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+                                           &data_anc);
+               if (ret < 0)
+                       continue;
+
+               /* Extract and load eBPF map fds. */
+               if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) {
+                       struct bpf_elf_map *maps = data_anc.sec_data->d_buf;
+                       unsigned int maps_num = data_anc.sec_data->d_size /
+                                               sizeof(*maps);
+
+                       sec_seen[sec_index] = true;
+                       ret = bpf_maps_attach(maps, maps_num, map_fds,
+                                             max_fds);
+                       if (ret < 0)
+                               return ret;
+               }
+               /* Extract eBPF license. */
+               else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
+                       if (data_anc.sec_data->d_size > lic_len)
+                               return -ENOMEM;
+
+                       sec_seen[sec_index] = true;
+                       memcpy(license, data_anc.sec_data->d_buf,
+                              data_anc.sec_data->d_size);
+               }
+               /* Extract symbol table for relocations (map fd fixups). */
+               else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
+                       sec_seen[sec_index] = true;
+                       *sym_tab = data_anc.sec_data;
+               }
+       }
+
+       return ret;
+}
+
+static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+                              enum bpf_prog_type type, char *license,
+                              Elf_Data *sym_tab, int *map_fds, unsigned int max_fds)
+{
+       int sec_index, prog_fd = -1;
+
+       for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+               struct bpf_elf_sec_data data_relo, data_insn;
+               int ins_index, ret;
+
+               /* Attach eBPF programs with relocation data (maps). */
+               ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+                                           &data_relo);
+               if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
+                       continue;
+
+               ins_index = data_relo.sec_hdr.sh_info;
+
+               ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
+                                           &data_insn);
+               if (ret < 0)
+                       continue;
+               if (strcmp(data_insn.sec_name, prog_type_section(type)))
+                       continue;
+
+               sec_seen[sec_index] = true;
+               sec_seen[ins_index] = true;
+
+               ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab,
+                                         map_fds, max_fds);
+               if (ret < 0)
+                       continue;
+
+               prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
+                                         data_insn.sec_data->d_size, license);
+               if (prog_fd < 0)
+                       continue;
+
+               break;
+       }
+
+       return prog_fd;
+}
+
+static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+                         enum bpf_prog_type type, char *license)
+{
+       int sec_index, prog_fd = -1;
+
+       for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+               struct bpf_elf_sec_data data_insn;
+               int ret;
+
+               /* Attach eBPF programs without relocation data. */
+               if (sec_seen[sec_index])
+                       continue;
+
+               ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+                                           &data_insn);
+               if (ret < 0)
+                       continue;
+               if (strcmp(data_insn.sec_name, prog_type_section(type)))
+                       continue;
+
+               prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
+                                         data_insn.sec_data->d_size, license);
+               if (prog_fd < 0)
+                       continue;
+
+               break;
+       }
+
+       return prog_fd;
+}
+
+int bpf_open_object(const char *path, enum bpf_prog_type type)
+{
+       int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds);
+       char license[ELF_MAX_LICENSE_LEN];
+       int file_fd, prog_fd = -1, ret;
+       Elf_Data *sym_tab = NULL;
+       GElf_Ehdr elf_hdr;
+       bool *sec_seen;
+       Elf *elf_fd;
+
+       if (elf_version(EV_CURRENT) == EV_NONE)
+               return -EINVAL;
+
+       file_fd = open(path, O_RDONLY, 0);
+       if (file_fd < 0)
+               return -errno;
+
+       elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
+       if (!elf_fd) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
+               ret = -EIO;
+               goto out_elf;
+       }
+
+       sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen));
+       if (!sec_seen) {
+               ret = -ENOMEM;
+               goto out_elf;
+       }
+
+       memset(license, 0, sizeof(license));
+       bpf_maps_init(map_fds, max_fds);
+
+       ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds,
+                                 license, sizeof(license), &sym_tab);
+       if (ret < 0)
+               goto out_maps;
+       if (sym_tab)
+               prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type,
+                                             license, sym_tab, map_fds, max_fds);
+       if (prog_fd < 0)
+               prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type,
+                                        license);
+       if (prog_fd < 0)
+               goto out_maps;
+out_sec:
+       free(sec_seen);
+out_elf:
+       elf_end(elf_fd);
+out:
+       close(file_fd);
+       return prog_fd;
+
+out_maps:
+       bpf_maps_destroy(map_fds, max_fds);
+       goto out_sec;
+}
+
+#endif /* HAVE_ELF */
diff --git a/tc/tc_bpf.h b/tc/tc_bpf.h

index 08cca9274074778b483dd67acbd2504a70d652ec..ce6474701c5bb3622f25c24feccd7a2c76e2e3d3 100644 (file)
--- a/tc/tc_bpf.h
+++ b/tc/tc_bpf.h
@@ -13,10 +13,42 @@
  #ifndef _TC_BPF_H_
  #define _TC_BPF_H_ 1
  
-#include <stdio.h>
  #include <linux/filter.h>
  #include <linux/netlink.h>
  #include <linux/rtnetlink.h>
+#include <linux/bpf.h>
+#include <sys/syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include "utils.h"
+
+/* Note:
+ *
+ * Below ELF section names and bpf_elf_map structure definition
+ * are not (!) kernel ABI. It's rather a "contract" between the
+ * application and the BPF loader in tc. For compatibility, the
+ * section names should stay as-is. Introduction of aliases, if
+ * needed, are a possibility, though.
+ */
+
+/* ELF section names, etc */
+#define ELF_SECTION_LICENSE    "license"
+#define ELF_SECTION_MAPS       "maps"
+#define ELF_SECTION_CLASSIFIER "classifier"
+#define ELF_SECTION_ACTION     "action"
+
+#define ELF_MAX_MAPS           64
+#define ELF_MAX_LICENSE_LEN    128
+
+/* ELF map definition */
+struct bpf_elf_map {
+       __u32 type;
+       __u32 size_key;
+       __u32 size_value;
+       __u32 max_elem;
+};
  
  int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
                      char **bpf_string, bool *need_release,
@@ -25,4 +57,28 @@ int bpf_parse_ops(int argc, char **argv, struct sock_filter *bpf_ops,
                   bool from_file);
  void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
  
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+       return (__u64) (unsigned long) ptr;
+}
+
+#ifdef HAVE_ELF
+int bpf_open_object(const char *path, enum bpf_prog_type type);
+
+static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+#ifdef __NR_bpf
+       return syscall(__NR_bpf, cmd, attr, size);
+#else
+       errno = ENOSYS;
+       return -1;
  #endif
+}
+#else
+static inline int bpf_open_object(const char *path, enum bpf_prog_type type)
+{
+       errno = ENOSYS;
+       return -1;
+}
+#endif /* HAVE_ELF */
+#endif /* _TC_BPF_H_ */
author	Stephen Hemminger <shemming@brocade.com>
	Fri, 10 Apr 2015 20:27:37 +0000 (13:27 -0700)
committer	Stephen Hemminger <shemming@brocade.com>
	Fri, 10 Apr 2015 20:27:37 +0000 (13:27 -0700)
Makefile		patch \| blob \| blame \| history
configure		patch \| blob \| blame \| history
doc/ip-cref.tex		patch \| blob \| blame \| history
include/linux/bpf.h	[new file with mode: 0644]	patch \| blob
include/linux/filter.h		patch \| blob \| blame \| history
include/linux/if_addr.h		patch \| blob \| blame \| history
include/linux/if_link.h		patch \| blob \| blame \| history
include/linux/mpls.h	[new file with mode: 0644]	patch \| blob
include/linux/neighbour.h		patch \| blob \| blame \| history
include/linux/pkt_cls.h		patch \| blob \| blame \| history
include/linux/rtnetlink.h		patch \| blob \| blame \| history
include/linux/tc_act/tc_bpf.h		patch \| blob \| blame \| history
include/linux/xfrm.h		patch \| blob \| blame \| history
include/rt_names.h		patch \| blob \| blame \| history
include/utils.h		patch \| blob \| blame \| history
ip/ip.c		patch \| blob \| blame \| history
ip/ipaddress.c		patch \| blob \| blame \| history
ip/iplink_bond.c		patch \| blob \| blame \| history
ip/ipmonitor.c		patch \| blob \| blame \| history
ip/ipmroute.c		patch \| blob \| blame \| history
ip/ipprefix.c		patch \| blob \| blame \| history
ip/iproute.c		patch \| blob \| blame \| history
ip/iprule.c		patch \| blob \| blame \| history
ip/iptunnel.c		patch \| blob \| blame \| history
ip/ipxfrm.c		patch \| blob \| blame \| history
ip/link_ip6tnl.c		patch \| blob \| blame \| history
ip/xfrm_monitor.c		patch \| blob \| blame \| history
ip/xfrm_policy.c		patch \| blob \| blame \| history
lib/ll_addr.c		patch \| blob \| blame \| history
lib/mpls_ntop.c	[new file with mode: 0644]	patch \| blob
lib/mpls_pton.c	[new file with mode: 0644]	patch \| blob
lib/utils.c		patch \| blob \| blame \| history
man/man8/ip-route.8.in		patch \| blob \| blame \| history
man/man8/ip-xfrm.8		patch \| blob \| blame \| history
man/man8/ip.8		patch \| blob \| blame \| history
tc/Makefile		patch \| blob \| blame \| history
tc/f_bpf.c		patch \| blob \| blame \| history
tc/tc_bpf.c		patch \| blob \| blame \| history
tc/tc_bpf.h		patch \| blob \| blame \| history