#options for ipx
ADDLIB+=ipx_ntop.o ipx_pton.o
+#options for mpls
+ADDLIB+=mpls_ntop.o mpls_pton.o
+
CC = gcc
HOSTCC = gcc
DEFINES += -D_GNU_SOURCE
rm -f $TMPDIR/ipsettest.c $TMPDIR/ipsettest
}
+check_elf()
+{
+ cat >$TMPDIR/elftest.c <<EOF
+#include <libelf.h>
+#include <gelf.h>
+int main(void)
+{
+ Elf_Scn *scn;
+ GElf_Shdr shdr;
+ return elf_version(EV_CURRENT);
+}
+EOF
+
+ if $CC -I$INCLUDE -o $TMPDIR/elftest $TMPDIR/elftest.c -lelf >/dev/null 2>&1
+ then
+ echo "TC_CONFIG_ELF:=y" >>Config
+ echo "yes"
+ else
+ echo "no"
+ fi
+ rm -f $TMPDIR/elftest.c $TMPDIR/elftest
+}
+
check_selinux()
# SELinux is a compile time option in the ss utility
{
echo -n "SELinux support: "
check_selinux
+echo -n "ELF support: "
+check_elf
+
echo -e "\nDocs"
check_docs
even if it does not match any interface prefix. One application of this
option may be found in~\cite{IP-TUNNELS}.
+\item \verb|pref PREF|
+
+--- the IPv6 route preference.
+\verb|PREF| PREF is a string specifying the route preference as defined in
+RFC4191 for Router Discovery messages. Namely:
+\begin{itemize}
+\item \verb|low| --- the route has a lowest priority.
+\item \verb|medium| --- the route has a default priority.
+\item \verb|high| --- the route has a highest priority.
+\end{itemize}
+
\end{itemize}
--- /dev/null
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __LINUX_BPF_H__
+#define __LINUX_BPF_H__
+
+#include <linux/types.h>
+#include <linux/bpf_common.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64 0x07 /* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW 0x18 /* double word */
+#define BPF_XADD 0xc0 /* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV 0xb0 /* mov reg to reg */
+#define BPF_ARSH 0xc0 /* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END 0xd0 /* flags for endianness conversion: */
+#define BPF_TO_LE 0x00 /* convert to little-endian */
+#define BPF_TO_BE 0x08 /* convert to big-endian */
+#define BPF_FROM_LE BPF_TO_LE
+#define BPF_FROM_BE BPF_TO_BE
+
+#define BPF_JNE 0x50 /* jump != */
+#define BPF_JSGT 0x60 /* SGT is signed '>', GT in x86 */
+#define BPF_JSGE 0x70 /* SGE is signed '>=', GE in x86 */
+#define BPF_CALL 0x80 /* function call */
+#define BPF_EXIT 0x90 /* function return */
+
+/* Register numbers */
+enum {
+ BPF_REG_0 = 0,
+ BPF_REG_1,
+ BPF_REG_2,
+ BPF_REG_3,
+ BPF_REG_4,
+ BPF_REG_5,
+ BPF_REG_6,
+ BPF_REG_7,
+ BPF_REG_8,
+ BPF_REG_9,
+ BPF_REG_10,
+ __MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG __MAX_BPF_REG
+
+struct bpf_insn {
+ __u8 code; /* opcode */
+ __u8 dst_reg:4; /* dest register */
+ __u8 src_reg:4; /* source register */
+ __s16 off; /* signed offset */
+ __s32 imm; /* signed immediate constant */
+};
+
+/* BPF syscall commands */
+enum bpf_cmd {
+ /* create a map with given type and attributes
+ * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
+ * returns fd or negative error
+ * map is deleted when fd is closed
+ */
+ BPF_MAP_CREATE,
+
+ /* lookup key in a given map
+ * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->value
+ * returns zero and stores found elem into value
+ * or negative error
+ */
+ BPF_MAP_LOOKUP_ELEM,
+
+ /* create or update key/value pair in a given map
+ * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->value, attr->flags
+ * returns zero or negative error
+ */
+ BPF_MAP_UPDATE_ELEM,
+
+ /* find and delete elem by key in a given map
+ * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key
+ * returns zero or negative error
+ */
+ BPF_MAP_DELETE_ELEM,
+
+ /* lookup key in a given map and return next key
+ * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->next_key
+ * returns zero and stores next key or negative error
+ */
+ BPF_MAP_GET_NEXT_KEY,
+
+ /* verify and load eBPF program
+ * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
+ * Using attr->prog_type, attr->insns, attr->license
+ * returns fd or negative error
+ */
+ BPF_PROG_LOAD,
+};
+
+enum bpf_map_type {
+ BPF_MAP_TYPE_UNSPEC,
+ BPF_MAP_TYPE_HASH,
+ BPF_MAP_TYPE_ARRAY,
+};
+
+enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_SCHED_CLS,
+ BPF_PROG_TYPE_SCHED_ACT,
+};
+
+#define BPF_PSEUDO_MAP_FD 1
+
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY 0 /* create new element or update existing */
+#define BPF_NOEXIST 1 /* create new element if it didn't exist */
+#define BPF_EXIST 2 /* update existing element */
+
+union bpf_attr {
+ struct { /* anonymous struct used by BPF_MAP_CREATE command */
+ __u32 map_type; /* one of enum bpf_map_type */
+ __u32 key_size; /* size of key in bytes */
+ __u32 value_size; /* size of value in bytes */
+ __u32 max_entries; /* max number of entries in a map */
+ };
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ __u64 flags;
+ };
+
+ struct { /* anonymous struct used by BPF_PROG_LOAD command */
+ __u32 prog_type; /* one of enum bpf_prog_type */
+ __u32 insn_cnt;
+ __aligned_u64 insns;
+ __aligned_u64 license;
+ __u32 log_level; /* verbosity level of verifier */
+ __u32 log_size; /* size of user buffer */
+ __aligned_u64 log_buf; /* user supplied buffer */
+ };
+} __attribute__((aligned(8)));
+
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+ BPF_FUNC_unspec,
+ BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
+ BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
+ BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+ BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+ BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+
+ /**
+ * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
+ * @skb: pointer to skb
+ * @offset: offset within packet from skb->data
+ * @from: pointer where to copy bytes from
+ * @len: number of bytes to store into packet
+ * @flags: bit 0 - if true, recompute skb->csum
+ * other bits - reserved
+ * Return: 0 on success
+ */
+ BPF_FUNC_skb_store_bytes,
+
+ /**
+ * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where IP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * other bits - reserved
+ * Return: 0 on success
+ */
+ BPF_FUNC_l3_csum_replace,
+
+ /**
+ * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
+ * @skb: pointer to skb
+ * @offset: offset within packet where TCP/UDP checksum is located
+ * @from: old value of header field
+ * @to: new value of header field
+ * @flags: bits 0-3 - size of header field
+ * bit 4 - is pseudo header
+ * other bits - reserved
+ * Return: 0 on success
+ */
+ BPF_FUNC_l4_csum_replace,
+ __BPF_FUNC_MAX_ID,
+};
+
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+ __u32 len;
+ __u32 pkt_type;
+ __u32 mark;
+ __u32 queue_mapping;
+ __u32 protocol;
+ __u32 vlan_present;
+ __u32 vlan_tci;
+ __u32 vlan_proto;
+ __u32 priority;
+};
+
+#endif /* __LINUX_BPF_H__ */
#define SKF_AD_VLAN_TAG_PRESENT 48
#define SKF_AD_PAY_OFFSET 52
#define SKF_AD_RANDOM 56
-#define SKF_AD_MAX 60
+#define SKF_AD_VLAN_TPID 60
+#define SKF_AD_MAX 64
#define SKF_NET_OFF (-0x100000)
#define SKF_LL_OFF (-0x200000)
#define IFA_F_PERMANENT 0x80
#define IFA_F_MANAGETEMPADDR 0x100
#define IFA_F_NOPREFIXROUTE 0x200
+#define IFA_F_MCAUTOJOIN 0x400
+#define IFA_F_STABLE_PRIVACY 0x800
struct ifa_cacheinfo {
__u32 ifa_prefered;
IFLA_CARRIER_CHANGES,
IFLA_PHYS_SWITCH_ID,
IFLA_LINK_NETNSID,
+ IFLA_PHYS_PORT_NAME,
__IFLA_MAX
};
enum in6_addr_gen_mode {
IN6_ADDR_GEN_MODE_EUI64,
IN6_ADDR_GEN_MODE_NONE,
+ IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
};
/* Bridge section */
IFLA_BR_FORWARD_DELAY,
IFLA_BR_HELLO_TIME,
IFLA_BR_MAX_AGE,
+ IFLA_BR_AGEING_TIME,
+ IFLA_BR_STP_STATE,
+ IFLA_BR_PRIORITY,
__IFLA_BR_MAX,
};
IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
IFLA_BRPORT_PROXYARP, /* proxy ARP */
IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+ IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
--- /dev/null
+#ifndef _MPLS_H
+#define _MPLS_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+/* Reference: RFC 5462, RFC 3032
+ *
+ * 0 1 2 3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Label | TC |S| TTL |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * Label: Label Value, 20 bits
+ * TC: Traffic Class field, 3 bits
+ * S: Bottom of Stack, 1 bit
+ * TTL: Time to Live, 8 bits
+ */
+
+struct mpls_label {
+ __be32 entry;
+};
+
+#define MPLS_LS_LABEL_MASK 0xFFFFF000
+#define MPLS_LS_LABEL_SHIFT 12
+#define MPLS_LS_TC_MASK 0x00000E00
+#define MPLS_LS_TC_SHIFT 9
+#define MPLS_LS_S_MASK 0x00000100
+#define MPLS_LS_S_SHIFT 8
+#define MPLS_LS_TTL_MASK 0x000000FF
+#define MPLS_LS_TTL_SHIFT 0
+
+#endif /* _MPLS_H */
NDTPA_PROXY_QLEN, /* u32 */
NDTPA_LOCKTIME, /* u64, msecs */
NDTPA_QUEUE_LENBYTES, /* u32 */
+ NDTPA_MCAST_REPROBES, /* u32 */
__NDTPA_MAX
};
#define NDTPA_MAX (__NDTPA_MAX - 1)
TCA_BPF_CLASSID,
TCA_BPF_OPS_LEN,
TCA_BPF_OPS,
+ TCA_BPF_FD,
+ TCA_BPF_NAME,
__TCA_BPF_MAX,
};
RTM_NEWNSID = 88,
#define RTM_NEWNSID RTM_NEWNSID
+ RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
RTM_GETNSID = 90,
#define RTM_GETNSID RTM_GETNSID
RTA_TABLE,
RTA_MARK,
RTA_MFC_STATS,
+ RTA_VIA,
+ RTA_NEWDST,
+ RTA_PREF,
__RTA_MAX
};
#define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
#define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
#define RTNH_F_ONLINK 4 /* Gateway is forced on link */
+#define RTNH_F_EXTERNAL 8 /* Route installed externally */
/* Macros to handle hexthops */
#define RTNH_SPACE(len) RTNH_ALIGN(RTNH_LENGTH(len))
#define RTNH_DATA(rtnh) ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
+/* RTA_VIA */
+struct rtvia {
+ __kernel_sa_family_t rtvia_family;
+ __u8 rtvia_addr[0];
+};
+
/* RTM_CACHEINFO */
struct rta_cacheinfo {
#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF
RTNLGRP_MDB,
#define RTNLGRP_MDB RTNLGRP_MDB
+ RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE RTNLGRP_MPLS_ROUTE
+ RTNLGRP_NSID,
+#define RTNLGRP_NSID RTNLGRP_NSID
__RTNLGRP_MAX
};
#define RTNLGRP_MAX (__RTNLGRP_MAX - 1)
TCA_ACT_BPF_PARMS,
TCA_ACT_BPF_OPS_LEN,
TCA_ACT_BPF_OPS,
+ TCA_ACT_BPF_FD,
+ TCA_ACT_BPF_NAME,
__TCA_ACT_BPF_MAX,
};
#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
#ifndef _LINUX_XFRM_H
#define _LINUX_XFRM_H
+#include <linux/in6.h>
#include <linux/types.h>
/* All of the structures in this file may not change size as they are
typedef union {
__be32 a4;
__be32 a6[4];
+ struct in6_addr in6;
} xfrm_address_t;
/* Ident of a specific xfrm_state. It is used on input to lookup
const char * ll_type_n2a(int type, char *buf, int len);
-const char *ll_addr_n2a(unsigned char *addr, int alen,
+const char *ll_addr_n2a(const unsigned char *addr, int alen,
int type, char *buf, int blen);
int ll_addr_a2n(char *lladdr, int len, const char *arg);
typedef struct
{
- __u8 family;
- __u8 bytelen;
+ __u16 flags;
+ __u16 bytelen;
__s16 bitlen;
- __u32 flags;
+ /* These next two fields match rtvia */
+ __u16 family;
__u32 data[8];
} inet_prefix;
u_int8_t ipx_node[IPX_NODE_LEN];
};
+#ifndef AF_MPLS
+# define AF_MPLS 28
+#endif
+
+/* Maximum number of labels the mpls helpers support */
+#define MPLS_MAX_LABELS 8
+
extern __u32 get_addr32(const char *name);
extern int get_addr_1(inet_prefix *dst, const char *arg, int family);
extern int get_prefix_1(inet_prefix *dst, char *arg, int family);
extern const char *format_host(int af, int len, const void *addr,
char *buf, int buflen);
-extern const char *rt_addr_n2a(int af, const void *addr,
+extern const char *rt_addr_n2a(int af, int len, const void *addr,
char *buf, int buflen);
+extern int read_family(const char *name);
+extern const char *family_name(int family);
+
void missarg(const char *) __attribute__((noreturn));
void invarg(const char *, const char *) __attribute__((noreturn));
void duparg(const char *, const char *) __attribute__((noreturn));
const char *ipx_ntop(int af, const void *addr, char *str, size_t len);
int ipx_pton(int af, const char *src, void *addr);
+const char *mpls_ntop(int af, const void *addr, char *str, size_t len);
+int mpls_pton(int af, const char *src, void *addr);
+
extern int __iproute2_hz_internal;
extern int __get_hz(void);
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#ifndef __check_format_string
+# define __check_format_string(pos_str, pos_args) \
+ __attribute__ ((format (printf, (pos_str), (pos_args))))
+#endif
+
extern int cmdlineno;
extern ssize_t getcmdline(char **line, size_t *len, FILE *in);
extern int makeargs(char *line, char *argv[], int maxargs);
" netns | l2tp | fou | tcp_metrics | token | netconf }\n"
" OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
" -h[uman-readable] | -iec |\n"
-" -f[amily] { inet | inet6 | ipx | dnet | bridge | link } |\n"
+" -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
" -4 | -6 | -I | -D | -B | -0 |\n"
" -l[oops] { maximum-addr-flush-attempts } |\n"
" -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n"
argv++;
if (argc <= 1)
usage();
- if (strcmp(argv[1], "inet") == 0)
- preferred_family = AF_INET;
- else if (strcmp(argv[1], "inet6") == 0)
- preferred_family = AF_INET6;
- else if (strcmp(argv[1], "dnet") == 0)
- preferred_family = AF_DECnet;
- else if (strcmp(argv[1], "link") == 0)
- preferred_family = AF_PACKET;
- else if (strcmp(argv[1], "ipx") == 0)
- preferred_family = AF_IPX;
- else if (strcmp(argv[1], "bridge") == 0)
- preferred_family = AF_BRIDGE;
- else if (strcmp(argv[1], "help") == 0)
+ if (strcmp(argv[1], "help") == 0)
usage();
else
+ preferred_family = read_family(argv[1]);
+ if (preferred_family == AF_UNSPEC)
invarg("invalid protocol family", argv[1]);
} else if (strcmp(opt, "-4") == 0) {
preferred_family = AF_INET;
preferred_family = AF_IPX;
} else if (strcmp(opt, "-D") == 0) {
preferred_family = AF_DECnet;
+ } else if (strcmp(opt, "-M") == 0) {
+ preferred_family = AF_MPLS;
} else if (strcmp(opt, "-B") == 0) {
preferred_family = AF_BRIDGE;
} else if (matches(opt, "-human") == 0 ||
fprintf(stderr, " [-]tentative | [-]deprecated | [-]dadfailed | temporary |\n");
fprintf(stderr, " CONFFLAG-LIST ]\n");
fprintf(stderr, "CONFFLAG-LIST := [ CONFFLAG-LIST ] CONFFLAG\n");
- fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute ]\n");
+ fprintf(stderr, "CONFFLAG := [ home | nodad | mngtmpaddr | noprefixroute | autojoin ]\n");
fprintf(stderr, "LIFETIME := [ valid_lft LFT ] [ preferred_lft LFT ]\n");
fprintf(stderr, "LFT := forever | SECONDS\n");
ifa_flags &= ~IFA_F_NOPREFIXROUTE;
fprintf(fp, "noprefixroute ");
}
+ if (ifa_flags & IFA_F_MCAUTOJOIN) {
+ ifa_flags &= ~IFA_F_MCAUTOJOIN;
+ fprintf(fp, "autojoin ");
+ }
if (!(ifa_flags & IFA_F_PERMANENT)) {
fprintf(fp, "dynamic ");
} else
} else if (strcmp(*argv, "noprefixroute") == 0) {
filter.flags |= IFA_F_NOPREFIXROUTE;
filter.flagmask |= IFA_F_NOPREFIXROUTE;
+ } else if (strcmp(*argv, "autojoin") == 0) {
+ filter.flags |= IFA_F_MCAUTOJOIN;
+ filter.flagmask |= IFA_F_MCAUTOJOIN;
} else if (strcmp(*argv, "dadfailed") == 0) {
filter.flags |= IFA_F_DADFAILED;
filter.flagmask |= IFA_F_DADFAILED;
return 0;
}
+static bool ipaddr_is_multicast(inet_prefix *a)
+{
+ if (a->family == AF_INET)
+ return IN_MULTICAST(ntohl(a->data[0]));
+ else if (a->family == AF_INET6)
+ return IN6_IS_ADDR_MULTICAST(a->data);
+ else
+ return false;
+}
+
static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
{
struct {
ifa_flags |= IFA_F_MANAGETEMPADDR;
} else if (strcmp(*argv, "noprefixroute") == 0) {
ifa_flags |= IFA_F_NOPREFIXROUTE;
+ } else if (strcmp(*argv, "autojoin") == 0) {
+ ifa_flags |= IFA_F_MCAUTOJOIN;
} else {
if (strcmp(*argv, "local") == 0) {
NEXT_ARG();
sizeof(cinfo));
}
+ if ((ifa_flags & IFA_F_MCAUTOJOIN) && !ipaddr_is_multicast(&lcl)) {
+ fprintf(stderr, "autojoin needs multicast address\n");
+ return -1;
+ }
+
if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
return -2;
if (iptb[i])
fprintf(f, "%s",
rt_addr_n2a(AF_INET,
+ RTA_PAYLOAD(iptb[i]),
RTA_DATA(iptb[i]),
buf,
INET_ADDRSTRLEN));
groups |= nl_mgrp(RTNLGRP_IPV6_IFADDR);
groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+ groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
groups |= nl_mgrp(RTNLGRP_IPV4_MROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_MROUTE);
groups |= nl_mgrp(RTNLGRP_IPV6_PREFIX);
groups |= nl_mgrp(RTNLGRP_IPV4_ROUTE);
if (!preferred_family || preferred_family == AF_INET6)
groups |= nl_mgrp(RTNLGRP_IPV6_ROUTE);
+ if (!preferred_family || preferred_family == AF_MPLS)
+ groups |= nl_mgrp(RTNLGRP_MPLS_ROUTE);
}
if (lmroute) {
if (!preferred_family || preferred_family == AF_INET)
if (tb[RTA_SRC])
len = snprintf(obuf, sizeof(obuf),
"(%s, ", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[RTA_SRC]),
RTA_DATA(tb[RTA_SRC]),
abuf, sizeof(abuf)));
else
if (tb[RTA_DST])
snprintf(obuf + len, sizeof(obuf) - len,
"%s)", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[RTA_DST]),
RTA_DATA(tb[RTA_DST]),
abuf, sizeof(abuf)));
else
pfx = (struct in6_addr *)RTA_DATA(tb[PREFIX_ADDRESS]);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "%s", rt_addr_n2a(family, pfx,
+ fprintf(fp, "%s", rt_addr_n2a(family,
+ RTA_PAYLOAD(tb[PREFIX_ADDRESS]),
+ pfx,
abuf, sizeof(abuf)));
}
fprintf(fp, "/%u ", prefix->prefix_len);
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <linux/in_route.h>
+#include <linux/icmpv6.h>
#include <errno.h>
#include "rt_names.h"
fprintf(stderr, " [ table TABLE_ID ] [ proto RTPROTO ]\n");
fprintf(stderr, " [ scope SCOPE ] [ metric METRIC ]\n");
fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
- fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
- fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
+ fprintf(stderr, "NH := [ via [ FAMILY ] ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
+ fprintf(stderr, "FAMILY := [ inet | inet6 | ipx | dnet | mpls | bridge | link ]");
+ fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ] [ as [ to ] ADDRESS ]\n");
fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ] [ reordering NUMBER ]\n");
fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n");
fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ] [ src ADDRESS ]\n");
fprintf(stderr, " [ rto_min TIME ] [ hoplimit NUMBER ] [ initrwnd NUMBER ]\n");
fprintf(stderr, " [ features FEATURES ] [ quickack BOOL ] [ congctl NAME ]\n");
+ fprintf(stderr, " [ pref PREF ]\n");
fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n");
fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n");
fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
fprintf(stderr, "SCOPE := [ host | link | global | NUMBER ]\n");
fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n");
+ fprintf(stderr, "PREF := [ low | medium | high ]\n");
fprintf(stderr, "TIME := NUMBER[s|ms]\n");
fprintf(stderr, "BOOL := [1|0]\n");
fprintf(stderr, "FEATURES := ecn\n");
(r->rtm_family != filter.msrc.family ||
(filter.msrc.bitlen >= 0 && filter.msrc.bitlen < r->rtm_src_len)))
return 0;
- if (filter.rvia.family && r->rtm_family != filter.rvia.family)
- return 0;
+ if (filter.rvia.family) {
+ int family = r->rtm_family;
+ if (tb[RTA_VIA]) {
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ family = via->rtvia_family;
+ }
+ if (family != filter.rvia.family)
+ return 0;
+ }
if (filter.rprefsrc.family && r->rtm_family != filter.rprefsrc.family)
return 0;
via.family = r->rtm_family;
if (tb[RTA_GATEWAY])
memcpy(&via.data, RTA_DATA(tb[RTA_GATEWAY]), host_len/8);
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *rtvia = RTA_DATA(tb[RTA_VIA]);
+ via.family = rtvia->rtvia_family;
+ memcpy(&via.data, rtvia->rtvia_addr, len);
+ }
}
if (filter.rprefsrc.bitlen>0) {
memset(&prefsrc, 0, sizeof(prefsrc));
if (tb[RTA_DST]) {
if (r->rtm_dst_len != host_len) {
fprintf(fp, "%s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[RTA_DST]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[RTA_DST]),
+ RTA_DATA(tb[RTA_DST]),
+ abuf, sizeof(abuf)),
r->rtm_dst_len
);
} else {
if (tb[RTA_SRC]) {
if (r->rtm_src_len != host_len) {
fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[RTA_SRC]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[RTA_SRC]),
+ RTA_DATA(tb[RTA_SRC]),
+ abuf, sizeof(abuf)),
r->rtm_src_len
);
} else {
} else if (r->rtm_src_len) {
fprintf(fp, "from 0/%u ", r->rtm_src_len);
}
+ if (tb[RTA_NEWDST]) {
+ fprintf(fp, "as to %s ", format_host(r->rtm_family,
+ RTA_PAYLOAD(tb[RTA_NEWDST]),
+ RTA_DATA(tb[RTA_NEWDST]),
+ abuf, sizeof(abuf))
+ );
+ }
if (r->rtm_tos && filter.tosmask != -1) {
SPRINT_BUF(b1);
fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
RTA_DATA(tb[RTA_GATEWAY]),
abuf, sizeof(abuf)));
}
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ fprintf(fp, "via %s %s ",
+ family_name(via->rtvia_family),
+ format_host(via->rtvia_family, len, via->rtvia_addr,
+ abuf, sizeof(abuf)));
+ }
if (tb[RTA_OIF] && filter.oifmask != -1)
fprintf(fp, "dev %s ", ll_index_to_name(*(int*)RTA_DATA(tb[RTA_OIF])));
*/
fprintf(fp, " src %s ",
rt_addr_n2a(r->rtm_family,
+ RTA_PAYLOAD(tb[RTA_PREFSRC]),
RTA_DATA(tb[RTA_PREFSRC]),
abuf, sizeof(abuf)));
}
fprintf(fp, "onlink ");
if (r->rtm_flags & RTNH_F_PERVASIVE)
fprintf(fp, "pervasive ");
+ if (r->rtm_flags & RTNH_F_EXTERNAL)
+ fprintf(fp, "external ");
if (r->rtm_flags & RTM_F_NOTIFY)
fprintf(fp, "notify ");
if (tb[RTA_MARK]) {
RTA_DATA(tb[RTA_GATEWAY]),
abuf, sizeof(abuf)));
}
+ if (tb[RTA_VIA]) {
+ size_t len = RTA_PAYLOAD(tb[RTA_VIA]) - 2;
+ struct rtvia *via = RTA_DATA(tb[RTA_VIA]);
+ fprintf(fp, "via %s %s ",
+ family_name(via->rtvia_family),
+ format_host(via->rtvia_family, len, via->rtvia_addr,
+ abuf, sizeof(abuf)));
+ }
if (tb[RTA_FLOW]) {
__u32 to = rta_getattr_u32(tb[RTA_FLOW]);
__u32 from = to>>16;
nh = RTNH_NEXT(nh);
}
}
+ if (tb[RTA_PREF]) {
+ unsigned int pref = rta_getattr_u8(tb[RTA_PREF]);
+ fprintf(fp, " pref ");
+
+ switch (pref) {
+ case ICMPV6_ROUTER_PREF_LOW:
+ fprintf(fp, "low");
+ break;
+ case ICMPV6_ROUTER_PREF_MEDIUM:
+ fprintf(fp, "medium");
+ break;
+ case ICMPV6_ROUTER_PREF_HIGH:
+ fprintf(fp, "high");
+ break;
+ default:
+ fprintf(fp, "%u", pref);
+ }
+ }
fprintf(fp, "\n");
fflush(fp);
return 0;
while (++argv, --argc > 0) {
if (strcmp(*argv, "via") == 0) {
inet_prefix addr;
+ int family;
NEXT_ARG();
- get_addr(&addr, *argv, r->rtm_family);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = r->rtm_family;
+ else
+ NEXT_ARG();
+ get_addr(&addr, *argv, family);
if (r->rtm_family == AF_UNSPEC)
r->rtm_family = addr.family;
- rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
- rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+ if (addr.family == r->rtm_family) {
+ rta_addattr_l(rta, 4096, RTA_GATEWAY, &addr.data, addr.bytelen);
+ rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen;
+ } else {
+ rta_addattr_l(rta, 4096, RTA_VIA, &addr.family, addr.bytelen+2);
+ rtnh->rtnh_len += sizeof(struct rtattr) + addr.bytelen+2;
+ }
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
if ((rtnh->rtnh_ifindex = ll_name_to_index(*argv)) == 0) {
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = addr.family;
addattr_l(&req.n, sizeof(req), RTA_PREFSRC, &addr.data, addr.bytelen);
+ } else if (strcmp(*argv, "as") == 0) {
+ inet_prefix addr;
+ NEXT_ARG();
+ if (strcmp(*argv, "to") == 0) {
+ NEXT_ARG();
+ }
+ get_addr(&addr, *argv, req.r.rtm_family);
+ if (req.r.rtm_family == AF_UNSPEC)
+ req.r.rtm_family = addr.family;
+ addattr_l(&req.n, sizeof(req), RTA_NEWDST, &addr.data, addr.bytelen);
} else if (strcmp(*argv, "via") == 0) {
inet_prefix addr;
+ int family;
gw_ok = 1;
NEXT_ARG();
- get_addr(&addr, *argv, req.r.rtm_family);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = req.r.rtm_family;
+ else
+ NEXT_ARG();
+ get_addr(&addr, *argv, family);
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = addr.family;
- addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+ if (addr.family == req.r.rtm_family)
+ addattr_l(&req.n, sizeof(req), RTA_GATEWAY, &addr.data, addr.bytelen);
+ else
+ addattr_l(&req.n, sizeof(req), RTA_VIA, &addr.family, addr.bytelen+2);
} else if (strcmp(*argv, "from") == 0) {
inet_prefix addr;
NEXT_ARG();
req.r.rtm_tos = tos;
} else if (matches(*argv, "metric") == 0 ||
matches(*argv, "priority") == 0 ||
- matches(*argv, "preference") == 0) {
+ strcmp(*argv, "preference") == 0) {
__u32 metric;
NEXT_ARG();
if (get_u32(&metric, *argv, 0))
strcmp(*argv, "oif") == 0) {
NEXT_ARG();
d = *argv;
+ } else if (matches(*argv, "pref") == 0) {
+ __u8 pref;
+ NEXT_ARG();
+ if (strcmp(*argv, "low") == 0)
+ pref = ICMPV6_ROUTER_PREF_LOW;
+ else if (strcmp(*argv, "medium") == 0)
+ pref = ICMPV6_ROUTER_PREF_MEDIUM;
+ else if (strcmp(*argv, "high") == 0)
+ pref = ICMPV6_ROUTER_PREF_HIGH;
+ else if (get_u8(&pref, *argv, 0))
+ invarg("\"pref\" value is invalid\n", *argv);
+ addattr8(&req.n, sizeof(req), RTA_PREF, pref);
} else {
int type;
inet_prefix dst;
get_unsigned(&mark, *argv, 0);
filter.markmask = -1;
} else if (strcmp(*argv, "via") == 0) {
+ int family;
NEXT_ARG();
- get_prefix(&filter.rvia, *argv, do_ipv6);
+ family = read_family(*argv);
+ if (family == AF_UNSPEC)
+ family = do_ipv6;
+ else
+ NEXT_ARG();
+ get_prefix(&filter.rvia, *argv, family);
} else if (strcmp(*argv, "src") == 0) {
NEXT_ARG();
get_prefix(&filter.rprefsrc, *argv, do_ipv6);
tb[RTA_OIF]->rta_type = 0;
if (tb[RTA_GATEWAY])
tb[RTA_GATEWAY]->rta_type = 0;
+ if (tb[RTA_VIA])
+ tb[RTA_VIA]->rta_type = 0;
if (!idev && tb[RTA_IIF])
tb[RTA_IIF]->rta_type = 0;
req.n.nlmsg_flags = NLM_F_REQUEST;
if (tb[FRA_SRC]) {
if (r->rtm_src_len != host_len) {
fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[FRA_SRC]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[FRA_SRC]),
+ RTA_DATA(tb[FRA_SRC]),
+ abuf, sizeof(abuf)),
r->rtm_src_len
);
} else {
if (tb[FRA_DST]) {
if (r->rtm_dst_len != host_len) {
fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family,
- RTA_DATA(tb[FRA_DST]),
- abuf, sizeof(abuf)),
+ RTA_PAYLOAD(tb[FRA_DST]),
+ RTA_DATA(tb[FRA_DST]),
+ abuf, sizeof(abuf)),
r->rtm_dst_len
);
} else {
printf("%s: %s/ip remote %s local %s ",
p->name,
tnl_strproto(p->iph.protocol),
- p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
- p->iph.saddr ? rt_addr_n2a(AF_INET, &p->iph.saddr, s2, sizeof(s2)) : "any");
+ p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
+ p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any");
if (p->iph.protocol == IPPROTO_IPV6 && (p->i_flags & SIT_ISATAP)) {
struct ip_tunnel_prl prl[16];
fputs(title, fp);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s ", rt_addr_n2a(family,
+ fprintf(fp, "src %s ", rt_addr_n2a(family, sizeof(*saddr),
saddr, abuf, sizeof(abuf)));
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "dst %s", rt_addr_n2a(family,
+ fprintf(fp, "dst %s", rt_addr_n2a(family, sizeof(id->daddr),
&id->daddr, abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
fputs(prefix, fp);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s/%u ", rt_addr_n2a(f, &sel->saddr, abuf, sizeof(abuf)),
+ fprintf(fp, "src %s/%u ",
+ rt_addr_n2a(f, sizeof(sel->saddr), &sel->saddr,
+ abuf, sizeof(abuf)),
sel->prefixlen_s);
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "dst %s/%u ", rt_addr_n2a(f, &sel->daddr, abuf, sizeof(abuf)),
+ fprintf(fp, "dst %s/%u ",
+ rt_addr_n2a(f, sizeof(sel->daddr), &sel->daddr,
+ abuf, sizeof(abuf)),
sel->prefixlen_d);
if (sel->proto)
memset(abuf, '\0', sizeof(abuf));
fprintf(fp, "addr %s",
- rt_addr_n2a(family, &e->encap_oa, abuf, sizeof(abuf)));
+ rt_addr_n2a(family, sizeof(e->encap_oa), &e->encap_oa,
+ abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
}
memset(abuf, '\0', sizeof(abuf));
fprintf(fp, "%s",
- rt_addr_n2a(family, coa,
+ rt_addr_n2a(family, sizeof(*coa), coa,
abuf, sizeof(abuf)));
fprintf(fp, "%s", _SL_);
}
if (tb[IFLA_IPTUN_REMOTE]) {
fprintf(f, "remote %s ",
rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[IFLA_IPTUN_REMOTE]),
RTA_DATA(tb[IFLA_IPTUN_REMOTE]),
s1, sizeof(s1)));
}
if (tb[IFLA_IPTUN_LOCAL]) {
fprintf(f, "local %s ",
rt_addr_n2a(AF_INET6,
+ RTA_PAYLOAD(tb[IFLA_IPTUN_LOCAL]),
RTA_DATA(tb[IFLA_IPTUN_LOCAL]),
s1, sizeof(s1)));
}
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <netinet/in.h>
#include <linux/xfrm.h>
+
#include "utils.h"
#include "xfrm.h"
#include "ip_common.h"
buf[0] = 0;
fprintf(fp, "dst %s ",
- rt_addr_n2a(sa_id->family, &sa_id->daddr, buf, sizeof(buf)));
+ rt_addr_n2a(sa_id->family, sizeof(sa_id->daddr), &sa_id->daddr,
+ buf, sizeof(buf)));
fprintf(fp, " reqid 0x%x", reqid);
xfrm_ae_flags_print(id->flags, arg);
fprintf(fp,"\n\t");
memset(abuf, '\0', sizeof(abuf));
- fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family, &id->saddr,
+ fprintf(fp, "src %s ", rt_addr_n2a(id->sa_id.family,
+ sizeof(id->saddr), &id->saddr,
abuf, sizeof(abuf)));
xfrm_usersa_print(&id->sa_id, id->reqid, fp);
char buf[256];
buf[0] = 0;
- fprintf(fp, "%s", rt_addr_n2a(family, a, buf, sizeof(buf)));
+ fprintf(fp, "%s", rt_addr_n2a(family, sizeof(*a), a, buf, sizeof(buf)));
}
static int xfrm_mapping_print(const struct sockaddr_nl *who,
fprintf(stderr, " [ index INDEX ] [ ptype PTYPE ] [ action ACTION ] [ priority PRIORITY ]\n");
fprintf(stderr, " [ flag FLAG-LIST ]\n");
fprintf(stderr, "Usage: ip xfrm policy flush [ ptype PTYPE ]\n");
- fprintf(stderr, "Usage: ip xfrm count\n");
+ fprintf(stderr, "Usage: ip xfrm policy count\n");
+ fprintf(stderr, "Usage: ip xfrm policy set [ hthresh4 LBITS RBITS ] [ hthresh6 LBITS RBITS ]\n");
fprintf(stderr, "SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n");
fprintf(stderr, "UPSPEC := proto { { ");
fprintf(stderr, "%s | ", strxf_proto(IPPROTO_TCP));
fprintf(fp,")");
}
- fprintf(fp,"\n");
+ fprintf(fp, "%s", _SL_);
}
if (show_stats > 1) {
struct xfrmu_spdhinfo *sh;
fprintf(fp,"\t SPD buckets:");
fprintf(fp," count %d", sh->spdhcnt);
fprintf(fp," Max %d", sh->spdhmcnt);
+ fprintf(fp, "%s", _SL_);
+ }
+ if (tb[XFRMA_SPD_IPV4_HTHRESH]) {
+ struct xfrmu_spdhthresh *th;
+ if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV4_HTHRESH]) < sizeof(*th)) {
+ fprintf(stderr, "SPDinfo: Wrong len %d\n", len);
+ return -1;
+ }
+ th = RTA_DATA(tb[XFRMA_SPD_IPV4_HTHRESH]);
+ fprintf(fp,"\t SPD IPv4 thresholds:");
+ fprintf(fp," local %d", th->lbits);
+ fprintf(fp," remote %d", th->rbits);
+ fprintf(fp, "%s", _SL_);
+
+ }
+ if (tb[XFRMA_SPD_IPV6_HTHRESH]) {
+ struct xfrmu_spdhthresh *th;
+ if (RTA_PAYLOAD(tb[XFRMA_SPD_IPV6_HTHRESH]) < sizeof(*th)) {
+ fprintf(stderr, "SPDinfo: Wrong len %d\n", len);
+ return -1;
+ }
+ th = RTA_DATA(tb[XFRMA_SPD_IPV6_HTHRESH]);
+ fprintf(fp,"\t SPD IPv6 thresholds:");
+ fprintf(fp," local %d", th->lbits);
+ fprintf(fp," remote %d", th->rbits);
+ fprintf(fp, "%s", _SL_);
}
}
- fprintf(fp,"\n");
+
+ if (oneline)
+ fprintf(fp, "\n");
return 0;
}
+static int xfrm_spd_setinfo(int argc, char **argv)
+{
+ struct rtnl_handle rth;
+ struct {
+ struct nlmsghdr n;
+ __u32 flags;
+ char buf[RTA_BUF_SIZE];
+ } req;
+
+ char *thr4 = NULL;
+ char *thr6 = NULL;
+
+ memset(&req, 0, sizeof(req));
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(__u32));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = XFRM_MSG_NEWSPDINFO;
+ req.flags = 0XFFFFFFFF;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "hthresh4") == 0) {
+ struct xfrmu_spdhthresh thr;
+
+ if (thr4)
+ duparg("hthresh4", *argv);
+ thr4 = *argv;
+ NEXT_ARG();
+ if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 32)
+ invarg("hthresh4 LBITS value is invalid", *argv);
+ NEXT_ARG();
+ if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 32)
+ invarg("hthresh4 RBITS value is invalid", *argv);
+
+ addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV4_HTHRESH,
+ (void *)&thr, sizeof(thr));
+ } else if (strcmp(*argv, "hthresh6") == 0) {
+ struct xfrmu_spdhthresh thr;
+
+ if (thr6)
+ duparg("hthresh6", *argv);
+ thr6 = *argv;
+ NEXT_ARG();
+ if (get_u8(&thr.lbits, *argv, 0) || thr.lbits > 128)
+ invarg("hthresh6 LBITS value is invalid", *argv);
+ NEXT_ARG();
+ if (get_u8(&thr.rbits, *argv, 0) || thr.rbits > 128)
+ invarg("hthresh6 RBITS value is invalid", *argv);
+
+ addattr_l(&req.n, sizeof(req), XFRMA_SPD_IPV6_HTHRESH,
+ (void *)&thr, sizeof(thr));
+ } else {
+ invarg("unknown", *argv);
+ }
+
+ argc--; argv++;
+ }
+
+ if (rtnl_open_byproto(&rth, 0, NETLINK_XFRM) < 0)
+ exit(1);
+
+ if (rtnl_talk(&rth, &req.n, 0, 0, NULL) < 0)
+ exit(2);
+
+ rtnl_close(&rth);
+
+ return 0;
+}
+
static int xfrm_spd_getinfo(int argc, char **argv)
{
struct rtnl_handle rth;
return xfrm_policy_flush(argc-1, argv+1);
if (matches(*argv, "count") == 0)
return xfrm_spd_getinfo(argc, argv);
+ if (matches(*argv, "set") == 0)
+ return xfrm_spd_setinfo(argc-1, argv+1);
if (matches(*argv, "help") == 0)
usage();
fprintf(stderr, "Command \"%s\" is unknown, try \"ip xfrm policy help\".\n", *argv);
#include "utils.h"
-const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen)
+const char *ll_addr_n2a(const unsigned char *addr, int alen, int type, char *buf, int blen)
{
int i;
int l;
--- /dev/null
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+static const char *mpls_ntop1(const struct mpls_label *addr, char *buf, size_t buflen)
+{
+ size_t destlen = buflen;
+ char *dest = buf;
+ int count;
+
+ for (count = 0; count < MPLS_MAX_LABELS; count++) {
+ uint32_t entry = ntohl(addr[count].entry);
+ uint32_t label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+ int len = snprintf(dest, destlen, "%u", label);
+
+ /* Is this the end? */
+ if (entry & MPLS_LS_S_MASK)
+ return buf;
+
+
+ dest += len;
+ destlen -= len;
+ if (destlen) {
+ *dest = '/';
+ dest++;
+ destlen--;
+ }
+ }
+ errno = -E2BIG;
+ return NULL;
+}
+
+const char *mpls_ntop(int af, const void *addr, char *buf, size_t buflen)
+{
+ switch(af) {
+ case AF_MPLS:
+ errno = 0;
+ return mpls_ntop1((struct mpls_label *)addr, buf, buflen);
+ default:
+ errno = EAFNOSUPPORT;
+ }
+
+ return NULL;
+}
--- /dev/null
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <linux/mpls.h>
+
+#include "utils.h"
+
+
+static int mpls_pton1(const char *name, struct mpls_label *addr)
+{
+ char *endp;
+ unsigned count;
+
+ for (count = 0; count < MPLS_MAX_LABELS; count++) {
+ unsigned long label;
+
+ label = strtoul(name, &endp, 0);
+ /* Fail when the label value is out or range */
+ if (label >= (1 << 20))
+ return 0;
+
+ if (endp == name) /* no digits */
+ return 0;
+
+ addr->entry = htonl(label << MPLS_LS_LABEL_SHIFT);
+ if (*endp == '\0') {
+ addr->entry |= htonl(1 << MPLS_LS_S_SHIFT);
+ return 1;
+ }
+
+ /* Bad character in the address */
+ if (*endp != '/')
+ return 0;
+
+ name = endp + 1;
+ addr += 1;
+ }
+ /* The address was too long */
+ return 0;
+}
+
+int mpls_pton(int af, const char *src, void *addr)
+{
+ int err;
+
+ switch(af) {
+ case AF_MPLS:
+ errno = 0;
+ err = mpls_pton1(src, (struct mpls_label *)addr);
+ break;
+ default:
+ errno = EAFNOSUPPORT;
+ err = -1;
+ }
+
+ return err;
+}
#include <asm/types.h>
#include <linux/pkt_sched.h>
#include <linux/param.h>
+#include <linux/if_arp.h>
+#include <linux/mpls.h>
#include <time.h>
#include <sys/time.h>
#include <errno.h>
-
+#include "rt_names.h"
#include "utils.h"
#include "namespace.h"
if (strcmp(name, "default") == 0 ||
strcmp(name, "all") == 0 ||
strcmp(name, "any") == 0) {
- if (family == AF_DECnet)
+ if ((family == AF_DECnet) || (family == AF_MPLS))
return -1;
addr->family = family;
addr->bytelen = (family == AF_INET6 ? 16 : 4);
return 0;
}
+ if (family == AF_PACKET) {
+ int len;
+ len = ll_addr_a2n((char *)&addr->data, sizeof(addr->data), name);
+ if (len < 0)
+ return -1;
+
+ addr->family = AF_PACKET;
+ addr->bytelen = len;
+ addr->bitlen = len * 8;
+ return 0;
+ }
+
if (strchr(name, ':')) {
addr->family = AF_INET6;
if (family != AF_UNSPEC && family != AF_INET6)
return 0;
}
+ if (family == AF_MPLS) {
+ int i;
+ addr->family = AF_MPLS;
+ if (mpls_pton(AF_MPLS, name, addr->data) <= 0)
+ return -1;
+ addr->bytelen = 4;
+ addr->bitlen = 20;
+ /* How many bytes do I need? */
+ for (i = 0; i < 8; i++) {
+ if (ntohl(addr->data[i]) & MPLS_LS_S_MASK) {
+ addr->bytelen = (i + 1)*4;
+ break;
+ }
+ }
+ return 0;
+ }
+
addr->family = AF_INET;
if (family != AF_UNSPEC && family != AF_INET)
return -1;
return 16;
case AF_IPX:
return 80;
+ case AF_MPLS:
+ return 20;
}
return 0;
if (strcmp(arg, "default") == 0 ||
strcmp(arg, "any") == 0 ||
strcmp(arg, "all") == 0) {
- if (family == AF_DECnet)
+ if ((family == AF_DECnet) || (family = AF_MPLS))
return -1;
dst->family = family;
dst->bytelen = 0;
int get_addr(inet_prefix *dst, const char *arg, int family)
{
- if (family == AF_PACKET) {
- fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg);
- exit(1);
- }
if (get_addr_1(dst, arg, family)) {
fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg);
exit(1);
return sysconf(_SC_CLK_TCK);
}
-const char *rt_addr_n2a(int af, const void *addr, char *buf, int buflen)
+const char *rt_addr_n2a(int af, int len, const void *addr, char *buf, int buflen)
{
switch (af) {
case AF_INET:
case AF_INET6:
return inet_ntop(af, addr, buf, buflen);
+ case AF_MPLS:
+ return mpls_ntop(af, addr, buf, buflen);
case AF_IPX:
return ipx_ntop(af, addr, buf, buflen);
case AF_DECnet:
memcpy(dna.a_addr, addr, 2);
return dnet_ntop(af, &dna, buf, buflen);
}
+ case AF_PACKET:
+ return ll_addr_n2a(addr, len, ARPHRD_VOID, buf, buflen);
default:
return "???";
}
}
+int read_family(const char *name)
+{
+ int family = AF_UNSPEC;
+ if (strcmp(name, "inet") == 0)
+ family = AF_INET;
+ else if (strcmp(name, "inet6") == 0)
+ family = AF_INET6;
+ else if (strcmp(name, "dnet") == 0)
+ family = AF_DECnet;
+ else if (strcmp(name, "link") == 0)
+ family = AF_PACKET;
+ else if (strcmp(name, "ipx") == 0)
+ family = AF_IPX;
+ else if (strcmp(name, "mpls") == 0)
+ family = AF_MPLS;
+ else if (strcmp(name, "bridge") == 0)
+ family = AF_BRIDGE;
+ return family;
+}
+
+const char *family_name(int family)
+{
+ if (family == AF_INET)
+ return "inet";
+ if (family == AF_INET6)
+ return "inet6";
+ if (family == AF_DECnet)
+ return "dnet";
+ if (family == AF_PACKET)
+ return "link";
+ if (family == AF_IPX)
+ return "ipx";
+ if (family == AF_MPLS)
+ return "mpls";
+ if (family == AF_BRIDGE)
+ return "bridge";
+ return "???";
+}
+
#ifdef RESOLVE_HOSTNAMES
struct namerec
{
return n;
}
#endif
- return rt_addr_n2a(af, addr, buf, buflen);
+ return rt_addr_n2a(af, len, addr, buf, buflen);
}
.ti -8
.IR NH " := [ "
.B via
-.IR ADDRESS " ] [ "
+[
+.IR FAMILY " ] " ADDRESS " ] [ "
.B dev
.IR STRING " ] [ "
.B weight
.IR NUMBER " ] " NHFLAGS
+.ti -8
+.IR FAMILY " := [ "
+.BR inet " | " inet6 " | " ipx " | " dnet " | " mpls " | " bridge " | " link " ]"
+
.ti -8
.IR OPTIONS " := " FLAGS " [ "
.B mtu
.IR NUMBER " ] [ "
.B advmss
.IR NUMBER " ] [ "
+.B as
+[
+.B to
+]
+.IR ADDRESS " ]"
.B rtt
.IR TIME " ] [ "
.B rttvar
.IR BOOL " ] [ "
.B congctl
.IR NAME " ]"
+.B pref
+.IR PREF " ]"
.ti -8
.IR TYPE " := [ "
.IR FEATURES " := [ "
.BR ecn " | ]"
+.ti -8
+.IR PREF " := [ "
+.BR low " | " medium " | " high " ]"
+
.SH DESCRIPTION
.B ip route
the output device name.
.TP
-.BI via " ADDRESS"
-the address of the nexthop router. Actually, the sense of this field
-depends on the route type. For normal
+.BI via " [ FAMILY ] ADDRESS"
+the address of the nexthop router, in the address family FAMILY.
+Actually, the sense of this field depends on the route type. For
+normal
.B unicast
routes it is either the true next hop router or, if it is a direct
route installed in BSD compatibility mode, it can be a local address
argument lists:
.in +8
-.BI via " ADDRESS"
+.BI via " [ FAMILY ] ADDRESS"
- is the nexthop router.
.sp
.B onlink
pretend that the nexthop is directly attached to this link,
even if it does not match any interface prefix.
+
+.TP
+.BI pref " PREF"
+the IPv6 route preference.
+.I PREF
+is a string specifying the route preference as defined in RFC4191 for Router
+Discovery messages. Namely:
+
+.in +8
+.B low
+- the route has a lowest priority
+.sp
+
+.B medium
+- the route has a default priority
+.sp
+
+.B high
+- the route has a highest priority
+.sp
+
+.in -8
.RE
.TP
only list routes going via this device.
.TP
-.BI via " PREFIX"
+.BI via " [ FAMILY ] PREFIX"
only list routes going via the nexthop routers selected by
.IR PREFIX "."
.ti -8
.B "ip xfrm policy count"
+.ti -8
+.B "ip xfrm policy set"
+.RB "[ " hthresh4
+.IR LBITS " " RBITS " ]"
+.RB "[ " hthresh6
+.IR LBITS " " RBITS " ]"
+
.ti -8
.IR SELECTOR " :="
.RB "[ " src
.BR "ip xfrm monitor" " [ " all " |"
.IR LISTofXFRM-OBJECTS " ]"
+.ti -8
+.IR LISTofXFRM-OBJECTS " := [ " LISTofXFRM-OBJECTS " ] " XFRM-OBJECT
+
+.ti -8
+.IR XFRM-OBJECT " := "
+.BR acquire " | " expire " | " SA " | " policy " | " aevent " | " report
+
.in -8
.ad b
ip xfrm state list print out the list of existing state in xfrm
ip xfrm state flush flush all state in xfrm
ip xfrm state count count all existing state in xfrm
-ip xfrm monitor state monitoring for xfrm objects
.TE
.TP
.BR espinudp " or " espinudp-nonike ","
.RI "using source port " SPORT ", destination port " DPORT
.RI ", and original address " OADDR "."
+
.sp
+.PP
.TS
l l.
ip xfrm policy add add a new policy
ip xfrm policy deleteall delete all existing xfrm policies
ip xfrm policy list print out the list of xfrm policies
ip xfrm policy flush flush policies
-ip xfrm policy count count existing policies
.TE
.TP
can be
.BR required " (default) or " use "."
+.sp
+.PP
+.TS
+l l.
+ip xfrm policy count count existing policies
+.TE
+
+.PP
+Use one or more -s options to display more details, including policy hash table
+information.
+
+.sp
+.PP
+.TS
+l l.
+ip xfrm policy set configure the policy hash table
+.TE
+
+.PP
+Security policies whose address prefix lengths are greater than or equal
+policy hash table thresholds are hashed. Others are stored in the
+policy_inexact chained list.
+
+.TP
+.I LBITS
+specifies the minimum local address prefix length of policies that are
+stored in the Security Policy Database hash table.
+
+.TP
+.I RBITS
+specifies the minimum remote address prefix length of policies that are
+stored in the Security Policy Database hash table.
+
+.sp
+.PP
+.TS
+l l.
+ip xfrm monitor state monitoring for xfrm objects
+.TE
+
+.PP
The xfrm objects to monitor can be optionally specified.
.SH AUTHOR
Manpage revised by David Ward <david.ward@ll.mit.edu>
+.br
+Manpage revised by Christophe Gouault <christophe.gouault@6wind.com>
.TP
.BR "\-f" , " \-family " <FAMILY>
Specifies the protocol family to use. The protocol family identifier can be one of
-.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet"
+.BR "inet" , " inet6" , " bridge" , " ipx" , " dnet" , " mpls"
or
.BR link .
If this option is not present,
shortcut for
.BR "\-family ipx" .
+.TP
+.B \-M
+shortcut for
+.BR "\-family mpls" .
+
.TP
.B \-0
shortcut for
endif
endif
+ifeq ($(TC_CONFIG_ELF),y)
+ CFLAGS += -DHAVE_ELF
+ LDLIBS += -lelf
+endif
+
TCOBJ += $(TCMODULES)
LDLIBS += -L. -ltc -lm
fprintf(stderr, "\n");
fprintf(stderr, " [inline]: run bytecode BPF_BYTECODE\n");
fprintf(stderr, " [from file]: run bytecode-file FILE\n");
+ fprintf(stderr, " [from file]: run object-file FILE\n");
fprintf(stderr, "\n");
fprintf(stderr, " [ action ACTION_SPEC ]\n");
fprintf(stderr, " [ classid CLASSID ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where BPF_BYTECODE := \'s,c t f k,c t f k,c t f k,...\'\n");
fprintf(stderr, " c,t,f,k and s are decimals; s denotes number of 4-tuples\n");
- fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string\n");
+ fprintf(stderr, "Where FILE points to a file containing the BPF_BYTECODE string,\n");
+ fprintf(stderr, "or an ELF file containing eBPF map definitions and bytecode.\n");
fprintf(stderr, "\nACTION_SPEC := ... look at individual actions\n");
fprintf(stderr, "NOTE: CLASSID is parsed as hexadecimal input.\n");
}
while (argc > 0) {
if (matches(*argv, "run") == 0) {
- bool from_file;
+ bool from_file = true, ebpf;
struct sock_filter bpf_ops[BPF_MAXINSNS];
- __u16 bpf_len;
int ret;
NEXT_ARG();
if (strcmp(*argv, "bytecode-file") == 0) {
- from_file = true;
+ ebpf = false;
} else if (strcmp(*argv, "bytecode") == 0) {
from_file = false;
+ ebpf = false;
+ } else if (strcmp(*argv, "object-file") == 0) {
+ ebpf = true;
} else {
fprintf(stderr, "What is \"%s\"?\n", *argv);
explain();
return -1;
}
NEXT_ARG();
- ret = bpf_parse_ops(argc, argv, bpf_ops, from_file);
+ ret = ebpf ? bpf_open_object(*argv, BPF_PROG_TYPE_SCHED_CLS) :
+ bpf_parse_ops(argc, argv, bpf_ops, from_file);
if (ret < 0) {
- fprintf(stderr, "Illegal \"bytecode\"\n");
+ fprintf(stderr, "%s\n", ebpf ?
+ "Could not load object" :
+ "Illegal \"bytecode\"");
return -1;
}
- bpf_len = ret;
- addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, bpf_len);
- addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
- bpf_len * sizeof(struct sock_filter));
+ if (ebpf) {
+ addattr32(n, MAX_MSG, TCA_BPF_FD, ret);
+ addattrstrz(n, MAX_MSG, TCA_BPF_NAME, *argv);
+ } else {
+ addattr16(n, MAX_MSG, TCA_BPF_OPS_LEN, ret);
+ addattr_l(n, MAX_MSG, TCA_BPF_OPS, &bpf_ops,
+ ret * sizeof(struct sock_filter));
+ }
} else if (matches(*argv, "classid") == 0 ||
strcmp(*argv, "flowid") == 0) {
unsigned handle;
sprint_tc_classid(rta_getattr_u32(tb[TCA_BPF_CLASSID]), b1));
}
+ if (tb[TCA_BPF_NAME])
+ fprintf(f, "%s ", rta_getattr_str(tb[TCA_BPF_NAME]));
+ else if (tb[TCA_BPF_FD])
+ fprintf(f, "pfd %u ", rta_getattr_u32(tb[TCA_BPF_FD]));
+
if (tb[TCA_BPF_OPS] && tb[TCA_BPF_OPS_LEN])
bpf_print_ops(f, tb[TCA_BPF_OPS],
rta_getattr_u16(tb[TCA_BPF_OPS_LEN]));
*
* Authors: Daniel Borkmann <dborkman@redhat.com>
* Jiri Pirko <jiri@resnulli.us>
+ * Alexei Starovoitov <ast@plumgrid.com>
*/
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
+#include <fcntl.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include <linux/filter.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
+#ifdef HAVE_ELF
+#include <libelf.h>
+#include <gelf.h>
+#endif
+
#include "utils.h"
#include "tc_util.h"
#include "tc_bpf.h"
fprintf(f, "%hu %hhu %hhu %u\'\n", ops[i].code, ops[i].jt,
ops[i].jf, ops[i].k);
}
+
+#ifdef HAVE_ELF
+struct bpf_elf_sec_data {
+ GElf_Shdr sec_hdr;
+ char *sec_name;
+ Elf_Data *sec_data;
+};
+
+static char bpf_log_buf[8192];
+
+static const char *prog_type_section(enum bpf_prog_type type)
+{
+ switch (type) {
+ case BPF_PROG_TYPE_SCHED_CLS:
+ return ELF_SECTION_CLASSIFIER;
+ /* case BPF_PROG_TYPE_SCHED_ACT: */
+ /* return ELF_SECTION_ACTION; */
+ default:
+ return NULL;
+ }
+}
+
+static void bpf_dump_error(const char *format, ...) __check_format_string(1, 2);
+static void bpf_dump_error(const char *format, ...)
+{
+ va_list vl;
+
+ va_start(vl, format);
+ vfprintf(stderr, format, vl);
+ va_end(vl);
+
+ fprintf(stderr, "%s", bpf_log_buf);
+ memset(bpf_log_buf, 0, sizeof(bpf_log_buf));
+}
+
+static int bpf_create_map(enum bpf_map_type type, unsigned int size_key,
+ unsigned int size_value, unsigned int max_elem)
+{
+ union bpf_attr attr = {
+ .map_type = type,
+ .key_size = size_key,
+ .value_size = size_value,
+ .max_entries = max_elem,
+ };
+
+ return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
+}
+
+static int bpf_prog_load(enum bpf_prog_type type, const struct bpf_insn *insns,
+ unsigned int len, const char *license)
+{
+ union bpf_attr attr = {
+ .prog_type = type,
+ .insns = bpf_ptr_to_u64(insns),
+ .insn_cnt = len / sizeof(struct bpf_insn),
+ .license = bpf_ptr_to_u64(license),
+ .log_buf = bpf_ptr_to_u64(bpf_log_buf),
+ .log_size = sizeof(bpf_log_buf),
+ .log_level = 1,
+ };
+
+ return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+}
+
+static int bpf_prog_attach(enum bpf_prog_type type, const struct bpf_insn *insns,
+ unsigned int size, const char *license)
+{
+ int prog_fd = bpf_prog_load(type, insns, size, license);
+
+ if (prog_fd < 0)
+ bpf_dump_error("BPF program rejected: %s\n", strerror(errno));
+
+ return prog_fd;
+}
+
+static int bpf_map_attach(enum bpf_map_type type, unsigned int size_key,
+ unsigned int size_value, unsigned int max_elem)
+{
+ int map_fd = bpf_create_map(type, size_key, size_value, max_elem);
+
+ if (map_fd < 0)
+ bpf_dump_error("BPF map rejected: %s\n", strerror(errno));
+
+ return map_fd;
+}
+
+static void bpf_maps_init(int *map_fds, unsigned int max_fds)
+{
+ int i;
+
+ for (i = 0; i < max_fds; i++)
+ map_fds[i] = -1;
+}
+
+static void bpf_maps_destroy(const int *map_fds, unsigned int max_fds)
+{
+ int i;
+
+ for (i = 0; i < max_fds; i++) {
+ if (map_fds[i] >= 0)
+ close(map_fds[i]);
+ }
+}
+
+static int bpf_maps_attach(struct bpf_elf_map *maps, unsigned int num_maps,
+ int *map_fds, unsigned int max_fds)
+{
+ int i, ret;
+
+ for (i = 0; i < num_maps && num_maps <= max_fds; i++) {
+ struct bpf_elf_map *map = &maps[i];
+
+ ret = bpf_map_attach(map->type, map->size_key,
+ map->size_value, map->max_elem);
+ if (ret < 0)
+ goto err_unwind;
+
+ map_fds[i] = ret;
+ }
+
+ return 0;
+
+err_unwind:
+ bpf_maps_destroy(map_fds, i);
+ return ret;
+}
+
+static int bpf_fill_section_data(Elf *elf_fd, GElf_Ehdr *elf_hdr, int sec_index,
+ struct bpf_elf_sec_data *sec_data)
+{
+ GElf_Shdr sec_hdr;
+ Elf_Scn *sec_fd;
+ Elf_Data *sec_edata;
+ char *sec_name;
+
+ memset(sec_data, 0, sizeof(*sec_data));
+
+ sec_fd = elf_getscn(elf_fd, sec_index);
+ if (!sec_fd)
+ return -EINVAL;
+
+ if (gelf_getshdr(sec_fd, &sec_hdr) != &sec_hdr)
+ return -EIO;
+
+ sec_name = elf_strptr(elf_fd, elf_hdr->e_shstrndx,
+ sec_hdr.sh_name);
+ if (!sec_name || !sec_hdr.sh_size)
+ return -ENOENT;
+
+ sec_edata = elf_getdata(sec_fd, NULL);
+ if (!sec_edata || elf_getdata(sec_fd, sec_edata))
+ return -EIO;
+
+ memcpy(&sec_data->sec_hdr, &sec_hdr, sizeof(sec_hdr));
+ sec_data->sec_name = sec_name;
+ sec_data->sec_data = sec_edata;
+
+ return 0;
+}
+
+static int bpf_apply_relo_data(struct bpf_elf_sec_data *data_relo,
+ struct bpf_elf_sec_data *data_insn,
+ Elf_Data *sym_tab, int *map_fds, int max_fds)
+{
+ Elf_Data *idata = data_insn->sec_data;
+ GElf_Shdr *rhdr = &data_relo->sec_hdr;
+ int relo_ent, relo_num = rhdr->sh_size / rhdr->sh_entsize;
+ struct bpf_insn *insns = idata->d_buf;
+ unsigned int num_insns = idata->d_size / sizeof(*insns);
+
+ for (relo_ent = 0; relo_ent < relo_num; relo_ent++) {
+ unsigned int ioff, fnum;
+ GElf_Rel relo;
+ GElf_Sym sym;
+
+ if (gelf_getrel(data_relo->sec_data, relo_ent, &relo) != &relo)
+ return -EIO;
+
+ ioff = relo.r_offset / sizeof(struct bpf_insn);
+ if (ioff >= num_insns)
+ return -EINVAL;
+ if (insns[ioff].code != (BPF_LD | BPF_IMM | BPF_DW))
+ return -EINVAL;
+
+ if (gelf_getsym(sym_tab, GELF_R_SYM(relo.r_info), &sym) != &sym)
+ return -EIO;
+
+ fnum = sym.st_value / sizeof(struct bpf_elf_map);
+ if (fnum >= max_fds)
+ return -EINVAL;
+
+ insns[ioff].src_reg = BPF_PSEUDO_MAP_FD;
+ insns[ioff].imm = map_fds[fnum];
+ }
+
+ return 0;
+}
+
+static int bpf_fetch_ancillary(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+ int *map_fds, unsigned int max_fds,
+ char *license, unsigned int lic_len,
+ Elf_Data **sym_tab)
+{
+ int sec_index, ret = -1;
+
+ for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+ struct bpf_elf_sec_data data_anc;
+
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+ &data_anc);
+ if (ret < 0)
+ continue;
+
+ /* Extract and load eBPF map fds. */
+ if (!strcmp(data_anc.sec_name, ELF_SECTION_MAPS)) {
+ struct bpf_elf_map *maps = data_anc.sec_data->d_buf;
+ unsigned int maps_num = data_anc.sec_data->d_size /
+ sizeof(*maps);
+
+ sec_seen[sec_index] = true;
+ ret = bpf_maps_attach(maps, maps_num, map_fds,
+ max_fds);
+ if (ret < 0)
+ return ret;
+ }
+ /* Extract eBPF license. */
+ else if (!strcmp(data_anc.sec_name, ELF_SECTION_LICENSE)) {
+ if (data_anc.sec_data->d_size > lic_len)
+ return -ENOMEM;
+
+ sec_seen[sec_index] = true;
+ memcpy(license, data_anc.sec_data->d_buf,
+ data_anc.sec_data->d_size);
+ }
+ /* Extract symbol table for relocations (map fd fixups). */
+ else if (data_anc.sec_hdr.sh_type == SHT_SYMTAB) {
+ sec_seen[sec_index] = true;
+ *sym_tab = data_anc.sec_data;
+ }
+ }
+
+ return ret;
+}
+
+static int bpf_fetch_prog_relo(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+ enum bpf_prog_type type, char *license,
+ Elf_Data *sym_tab, int *map_fds, unsigned int max_fds)
+{
+ int sec_index, prog_fd = -1;
+
+ for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+ struct bpf_elf_sec_data data_relo, data_insn;
+ int ins_index, ret;
+
+ /* Attach eBPF programs with relocation data (maps). */
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+ &data_relo);
+ if (ret < 0 || data_relo.sec_hdr.sh_type != SHT_REL)
+ continue;
+
+ ins_index = data_relo.sec_hdr.sh_info;
+
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, ins_index,
+ &data_insn);
+ if (ret < 0)
+ continue;
+ if (strcmp(data_insn.sec_name, prog_type_section(type)))
+ continue;
+
+ sec_seen[sec_index] = true;
+ sec_seen[ins_index] = true;
+
+ ret = bpf_apply_relo_data(&data_relo, &data_insn, sym_tab,
+ map_fds, max_fds);
+ if (ret < 0)
+ continue;
+
+ prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
+ data_insn.sec_data->d_size, license);
+ if (prog_fd < 0)
+ continue;
+
+ break;
+ }
+
+ return prog_fd;
+}
+
+static int bpf_fetch_prog(Elf *elf_fd, GElf_Ehdr *elf_hdr, bool *sec_seen,
+ enum bpf_prog_type type, char *license)
+{
+ int sec_index, prog_fd = -1;
+
+ for (sec_index = 1; sec_index < elf_hdr->e_shnum; sec_index++) {
+ struct bpf_elf_sec_data data_insn;
+ int ret;
+
+ /* Attach eBPF programs without relocation data. */
+ if (sec_seen[sec_index])
+ continue;
+
+ ret = bpf_fill_section_data(elf_fd, elf_hdr, sec_index,
+ &data_insn);
+ if (ret < 0)
+ continue;
+ if (strcmp(data_insn.sec_name, prog_type_section(type)))
+ continue;
+
+ prog_fd = bpf_prog_attach(type, data_insn.sec_data->d_buf,
+ data_insn.sec_data->d_size, license);
+ if (prog_fd < 0)
+ continue;
+
+ break;
+ }
+
+ return prog_fd;
+}
+
+int bpf_open_object(const char *path, enum bpf_prog_type type)
+{
+ int map_fds[ELF_MAX_MAPS], max_fds = ARRAY_SIZE(map_fds);
+ char license[ELF_MAX_LICENSE_LEN];
+ int file_fd, prog_fd = -1, ret;
+ Elf_Data *sym_tab = NULL;
+ GElf_Ehdr elf_hdr;
+ bool *sec_seen;
+ Elf *elf_fd;
+
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ return -EINVAL;
+
+ file_fd = open(path, O_RDONLY, 0);
+ if (file_fd < 0)
+ return -errno;
+
+ elf_fd = elf_begin(file_fd, ELF_C_READ, NULL);
+ if (!elf_fd) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (gelf_getehdr(elf_fd, &elf_hdr) != &elf_hdr) {
+ ret = -EIO;
+ goto out_elf;
+ }
+
+ sec_seen = calloc(elf_hdr.e_shnum, sizeof(*sec_seen));
+ if (!sec_seen) {
+ ret = -ENOMEM;
+ goto out_elf;
+ }
+
+ memset(license, 0, sizeof(license));
+ bpf_maps_init(map_fds, max_fds);
+
+ ret = bpf_fetch_ancillary(elf_fd, &elf_hdr, sec_seen, map_fds, max_fds,
+ license, sizeof(license), &sym_tab);
+ if (ret < 0)
+ goto out_maps;
+ if (sym_tab)
+ prog_fd = bpf_fetch_prog_relo(elf_fd, &elf_hdr, sec_seen, type,
+ license, sym_tab, map_fds, max_fds);
+ if (prog_fd < 0)
+ prog_fd = bpf_fetch_prog(elf_fd, &elf_hdr, sec_seen, type,
+ license);
+ if (prog_fd < 0)
+ goto out_maps;
+out_sec:
+ free(sec_seen);
+out_elf:
+ elf_end(elf_fd);
+out:
+ close(file_fd);
+ return prog_fd;
+
+out_maps:
+ bpf_maps_destroy(map_fds, max_fds);
+ goto out_sec;
+}
+
+#endif /* HAVE_ELF */
#ifndef _TC_BPF_H_
#define _TC_BPF_H_ 1
-#include <stdio.h>
#include <linux/filter.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
+#include <linux/bpf.h>
+#include <sys/syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include "utils.h"
+
+/* Note:
+ *
+ * Below ELF section names and bpf_elf_map structure definition
+ * are not (!) kernel ABI. It's rather a "contract" between the
+ * application and the BPF loader in tc. For compatibility, the
+ * section names should stay as-is. Introduction of aliases, if
+ * needed, are a possibility, though.
+ */
+
+/* ELF section names, etc */
+#define ELF_SECTION_LICENSE "license"
+#define ELF_SECTION_MAPS "maps"
+#define ELF_SECTION_CLASSIFIER "classifier"
+#define ELF_SECTION_ACTION "action"
+
+#define ELF_MAX_MAPS 64
+#define ELF_MAX_LICENSE_LEN 128
+
+/* ELF map definition */
+struct bpf_elf_map {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+};
int bpf_parse_string(char *arg, bool from_file, __u16 *bpf_len,
char **bpf_string, bool *need_release,
bool from_file);
void bpf_print_ops(FILE *f, struct rtattr *bpf_ops, __u16 len);
+static inline __u64 bpf_ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+#ifdef HAVE_ELF
+int bpf_open_object(const char *path, enum bpf_prog_type type);
+
+static inline int bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+#ifdef __NR_bpf
+ return syscall(__NR_bpf, cmd, attr, size);
+#else
+ errno = ENOSYS;
+ return -1;
#endif
+}
+#else
+static inline int bpf_open_object(const char *path, enum bpf_prog_type type)
+{
+ errno = ENOSYS;
+ return -1;
+}
+#endif /* HAVE_ELF */
+#endif /* _TC_BPF_H_ */