From: Thomas Graf Date: Mon, 12 Dec 2016 00:14:35 +0000 (+0100) Subject: lwt: BPF support for LWT X-Git-Tag: v4.13.0~297 X-Git-Url: https://git.proxmox.com/?a=commitdiff_plain;h=b15f440e7837345fe9eb88d26c8f688de1f9f724;p=mirror_iproute2.git lwt: BPF support for LWT Adds support to configure BPF programs as nexthop actions via the LWT framework. Example: ip route add 192.168.253.2/32 \ encap bpf out obj lwt_len_hist_kern.o section len_hist \ dev veth0 Signed-off-by: Thomas Graf --- diff --git a/include/bpf_api.h b/include/bpf_api.h index 72578c93..d1324719 100644 --- a/include/bpf_api.h +++ b/include/bpf_api.h @@ -87,6 +87,11 @@ __section(ELF_SECTION_ACTION) #endif +#ifndef __section_lwt_entry +# define __section_lwt_entry \ + __section(ELF_SECTION_PROG) +#endif + #ifndef __section_license # define __section_license \ __section(ELF_SECTION_LICENSE) diff --git a/ip/iproute_lwtunnel.c b/ip/iproute_lwtunnel.c index b6561432..6c1f8fb8 100644 --- a/ip/iproute_lwtunnel.c +++ b/ip/iproute_lwtunnel.c @@ -24,20 +24,7 @@ #include "rt_names.h" #include "utils.h" #include "iproute_lwtunnel.h" - -static int read_encap_type(const char *name) -{ - if (strcmp(name, "mpls") == 0) - return LWTUNNEL_ENCAP_MPLS; - else if (strcmp(name, "ip") == 0) - return LWTUNNEL_ENCAP_IP; - else if (strcmp(name, "ip6") == 0) - return LWTUNNEL_ENCAP_IP6; - else if (strcmp(name, "ila") == 0) - return LWTUNNEL_ENCAP_ILA; - else - return LWTUNNEL_ENCAP_NONE; -} +#include "bpf_util.h" static const char *format_encap_type(int type) { @@ -50,11 +37,44 @@ static const char *format_encap_type(int type) return "ip6"; case LWTUNNEL_ENCAP_ILA: return "ila"; + case LWTUNNEL_ENCAP_BPF: + return "bpf"; default: return "unknown"; } } +static void encap_type_usage(void) +{ + int i; + + fprintf(stderr, "Usage: ip route ... encap TYPE [ OPTIONS ] [...]\n"); + + for (i = 1; i <= LWTUNNEL_ENCAP_MAX; i++) + fprintf(stderr, "%s %s\n", format_encap_type(i), + i == 1 ? "TYPE := " : " "); + + exit(-1); +} + +static int read_encap_type(const char *name) +{ + if (strcmp(name, "mpls") == 0) + return LWTUNNEL_ENCAP_MPLS; + else if (strcmp(name, "ip") == 0) + return LWTUNNEL_ENCAP_IP; + else if (strcmp(name, "ip6") == 0) + return LWTUNNEL_ENCAP_IP6; + else if (strcmp(name, "ila") == 0) + return LWTUNNEL_ENCAP_ILA; + else if (strcmp(name, "bpf") == 0) + return LWTUNNEL_ENCAP_BPF; + else if (strcmp(name, "help") == 0) + encap_type_usage(); + + return LWTUNNEL_ENCAP_NONE; +} + static void print_encap_mpls(FILE *fp, struct rtattr *encap) { struct rtattr *tb[MPLS_IPTUNNEL_MAX+1]; @@ -159,6 +179,34 @@ static void print_encap_ip6(FILE *fp, struct rtattr *encap) fprintf(fp, "tc %d ", rta_getattr_u8(tb[LWTUNNEL_IP6_TC])); } +static void print_encap_bpf_prog(FILE *fp, struct rtattr *encap, + const char *str) +{ + struct rtattr *tb[LWT_BPF_PROG_MAX+1]; + + parse_rtattr_nested(tb, LWT_BPF_PROG_MAX, encap); + fprintf(fp, "%s ", str); + + if (tb[LWT_BPF_PROG_NAME]) + fprintf(fp, "%s ", rta_getattr_str(tb[LWT_BPF_PROG_NAME])); +} + +static void print_encap_bpf(FILE *fp, struct rtattr *encap) +{ + struct rtattr *tb[LWT_BPF_MAX+1]; + + parse_rtattr_nested(tb, LWT_BPF_MAX, encap); + + if (tb[LWT_BPF_IN]) + print_encap_bpf_prog(fp, tb[LWT_BPF_IN], "in"); + if (tb[LWT_BPF_OUT]) + print_encap_bpf_prog(fp, tb[LWT_BPF_OUT], "out"); + if (tb[LWT_BPF_XMIT]) + print_encap_bpf_prog(fp, tb[LWT_BPF_XMIT], "xmit"); + if (tb[LWT_BPF_XMIT_HEADROOM]) + fprintf(fp, "%d ", rta_getattr_u32(tb[LWT_BPF_XMIT_HEADROOM])); +} + void lwt_print_encap(FILE *fp, struct rtattr *encap_type, struct rtattr *encap) { @@ -184,6 +232,9 @@ void lwt_print_encap(FILE *fp, struct rtattr *encap_type, case LWTUNNEL_ENCAP_IP6: print_encap_ip6(fp, encap); break; + case LWTUNNEL_ENCAP_BPF: + print_encap_bpf(fp, encap); + break; } } @@ -365,6 +416,109 @@ static int parse_encap_ip6(struct rtattr *rta, size_t len, int *argcp, char ***a return 0; } +struct lwt_x { + struct rtattr *rta; + size_t len; +}; + +static void bpf_lwt_cb(void *lwt_ptr, int fd, const char *annotation) +{ + struct lwt_x *x = lwt_ptr; + + rta_addattr32(x->rta, x->len, LWT_BPF_PROG_FD, fd); + rta_addattr_l(x->rta, x->len, LWT_BPF_PROG_NAME, annotation, + strlen(annotation) + 1); +} + +static const struct bpf_cfg_ops bpf_cb_ops = { + .ebpf_cb = bpf_lwt_cb, +}; + +static int lwt_parse_bpf(struct rtattr *rta, size_t len, int *argcp, char ***argvp, + int attr, const enum bpf_prog_type bpf_type) +{ + struct bpf_cfg_in cfg = { + .argc = *argcp, + .argv = *argvp, + }; + struct lwt_x x = { + .rta = rta, + .len = len, + }; + struct rtattr *nest; + int err; + + nest = rta_nest(rta, len, attr); + err = bpf_parse_common(bpf_type, &cfg, &bpf_cb_ops, &x); + if (err < 0) { + fprintf(stderr, "Failed to parse eBPF program: %s\n", strerror(err)); + return -1; + } + rta_nest_end(rta, nest); + + *argcp = cfg.argc; + *argvp = cfg.argv; + + return 0; +} + +static void lwt_bpf_usage(void) +{ + fprintf(stderr, "Usage: ip route ... encap bpf [ in BPF ] [ out BPF ] [ xmit BPF ] [...]\n"); + fprintf(stderr, "BPF := obj FILE [ section NAME ] [ verbose ]\n"); + exit(-1); +} + +static int parse_encap_bpf(struct rtattr *rta, size_t len, int *argcp, + char ***argvp) +{ + char **argv = *argvp; + int argc = *argcp; + int headroom_set = 0; + + while (argc > 0) { + if (strcmp(*argv, "in") == 0) { + NEXT_ARG(); + if (lwt_parse_bpf(rta, len, &argc, &argv, LWT_BPF_IN, + BPF_PROG_TYPE_LWT_IN) < 0) + return -1; + } else if (strcmp(*argv, "out") == 0) { + NEXT_ARG(); + if (lwt_parse_bpf(rta, len, &argc, &argv, LWT_BPF_OUT, + BPF_PROG_TYPE_LWT_OUT) < 0) + return -1; + } else if (strcmp(*argv, "xmit") == 0) { + NEXT_ARG(); + if (lwt_parse_bpf(rta, len, &argc, &argv, LWT_BPF_XMIT, + BPF_PROG_TYPE_LWT_XMIT) < 0) + return -1; + } else if (strcmp(*argv, "headroom") == 0) { + unsigned int headroom; + + NEXT_ARG(); + if (get_unsigned(&headroom, *argv, 0) || headroom == 0) + invarg("headroom is invalid\n", *argv); + if (!headroom_set) + rta_addattr32(rta, 1024, LWT_BPF_XMIT_HEADROOM, + headroom); + headroom_set = 1; + } else if (strcmp(*argv, "help") == 0) { + lwt_bpf_usage(); + } else { + break; + } + NEXT_ARG_FWD(); + } + + /* argv is currently the first unparsed argument, + * but the lwt_parse_encap() caller will move to the next, + * so step back */ + *argcp = argc + 1; + *argvp = argv - 1; + + return 0; +} + int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp) { struct rtattr *nest; @@ -397,6 +551,10 @@ int lwt_parse_encap(struct rtattr *rta, size_t len, int *argcp, char ***argvp) case LWTUNNEL_ENCAP_IP6: parse_encap_ip6(rta, len, &argc, &argv); break; + case LWTUNNEL_ENCAP_BPF: + if (parse_encap_bpf(rta, len, &argc, &argv) < 0) + exit(-1); + break; default: fprintf(stderr, "Error: unsupported encap type\n"); break; diff --git a/lib/bpf.c b/lib/bpf.c index 2a8cd51d..43ef63db 100644 --- a/lib/bpf.c +++ b/lib/bpf.c @@ -56,6 +56,9 @@ static const enum bpf_prog_type __bpf_types[] = { BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_LWT_IN, + BPF_PROG_TYPE_LWT_OUT, + BPF_PROG_TYPE_LWT_XMIT, }; static const struct bpf_prog_meta __bpf_prog_meta[] = { @@ -76,6 +79,21 @@ static const struct bpf_prog_meta __bpf_prog_meta[] = { .subdir = "xdp", .section = ELF_SECTION_PROG, }, + [BPF_PROG_TYPE_LWT_IN] = { + .type = "lwt_in", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_OUT] = { + .type = "lwt_out", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, + [BPF_PROG_TYPE_LWT_XMIT] = { + .type = "lwt_xmit", + .subdir = "ip", + .section = ELF_SECTION_PROG, + }, }; static const char *bpf_prog_to_subdir(enum bpf_prog_type type) diff --git a/man/man8/ip-route.8.in b/man/man8/ip-route.8.in index c0acaa00..85191531 100644 --- a/man/man8/ip-route.8.in +++ b/man/man8/ip-route.8.in @@ -174,7 +174,7 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .ti -8 .IR ENCAP " := [ " -.IR MPLS " | " IP " ]" +.IR MPLS " | " IP | " BPF " ] " .ti -8 .IR ENCAP_MPLS " := " @@ -193,6 +193,19 @@ throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" .B ttl .IR TTL " ]" + +.ti -8 +.IR ENCAP_BPF " := " +.BR bpf " [ " +.B in +.IR PROG " ] [" +.B out +.IR PROG " ] [" +.B xmit +.IR PROG " ] [" +.B headroom +.IR SIZE " ]" + .SH DESCRIPTION .B ip route is used to manipulate entries in the kernel routing tables. @@ -636,6 +649,9 @@ is a string specifying the supported encapsulation type. Namely: .BI ip - IP encapsulation (Geneve, GRE, VXLAN, ...) .sp +.BI bpf +- Execution of BPF program +.sp .in -8 .I ENCAPHDR @@ -664,6 +680,29 @@ is a set of encapsulation attributes specific to the .in -2 .sp +.B bpf +.in +2 +.B in +.I PROG +- BPF program to execute for incoming packets +.sp + +.B out +.I PROG +- BPF program to execute for outgoing packets +.sp + +.B xmit +.I PROG +- BPF program to execute for transmitted packets +.sp + +.B headroom +.I SIZE +- Size of header BPF program will attach (xmit) +.in -2 +.sp + .in -8 .RE