]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
samples: bpf: Convert xdp_redirect_cpu_kern.o to XDP samples helper
authorKumar Kartikeya Dwivedi <memxor@gmail.com>
Sat, 21 Aug 2021 00:20:05 +0000 (05:50 +0530)
committerAlexei Starovoitov <ast@kernel.org>
Tue, 24 Aug 2021 21:48:42 +0000 (14:48 -0700)
Similar to xdp_monitor_kern, a lot of these BPF programs have been
reimplemented properly consolidating missing features from other XDP
samples. Hence, drop the unneeded code and rename to .bpf.c suffix.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210821002010.845777-18-memxor@gmail.com
samples/bpf/Makefile
samples/bpf/xdp_redirect_cpu.bpf.c [new file with mode: 0644]
samples/bpf/xdp_redirect_cpu_kern.c [deleted file]

index d05105227ec584ff5be5450a7a9a0fb159601fb6..231cdbc773a7fe6d11b1fcaa2032746a61203c08 100644 (file)
@@ -165,7 +165,6 @@ always-y += tcp_tos_reflect_kern.o
 always-y += tcp_dumpstats_kern.o
 always-y += xdp_redirect_map_kern.o
 always-y += xdp_redirect_map_multi_kern.o
-always-y += xdp_redirect_cpu_kern.o
 always-y += xdp_rxq_info_kern.o
 always-y += xdp2skb_meta_kern.o
 always-y += syscall_tp_kern.o
@@ -356,6 +355,7 @@ endef
 
 CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
 
+$(obj)/xdp_redirect_cpu.bpf.o: $(obj)/xdp_sample.bpf.o
 $(obj)/xdp_redirect.bpf.o: $(obj)/xdp_sample.bpf.o
 $(obj)/xdp_monitor.bpf.o: $(obj)/xdp_sample.bpf.o
 
@@ -367,9 +367,10 @@ $(obj)/%.bpf.o: $(src)/%.bpf.c $(obj)/vmlinux.h $(src)/xdp_sample.bpf.h $(src)/x
                -I$(srctree)/tools/lib $(CLANG_SYS_INCLUDES) \
                -c $(filter %.bpf.c,$^) -o $@
 
-LINKED_SKELS := xdp_redirect.skel.h xdp_monitor.skel.h
+LINKED_SKELS := xdp_redirect_cpu.skel.h xdp_redirect.skel.h xdp_monitor.skel.h
 clean-files += $(LINKED_SKELS)
 
+xdp_redirect_cpu.skel.h-deps := xdp_redirect_cpu.bpf.o xdp_sample.bpf.o
 xdp_redirect.skel.h-deps := xdp_redirect.bpf.o xdp_sample.bpf.o
 xdp_monitor.skel.h-deps := xdp_monitor.bpf.o xdp_sample.bpf.o
 
diff --git a/samples/bpf/xdp_redirect_cpu.bpf.c b/samples/bpf/xdp_redirect_cpu.bpf.c
new file mode 100644 (file)
index 0000000..f10fe3c
--- /dev/null
@@ -0,0 +1,541 @@
+/*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
+ *
+ *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
+ */
+#include "vmlinux.h"
+#include "xdp_sample.bpf.h"
+#include "xdp_sample_shared.h"
+#include "hash_func01.h"
+
+/* Special map type that can XDP_REDIRECT frames to another CPU */
+struct {
+       __uint(type, BPF_MAP_TYPE_CPUMAP);
+       __uint(key_size, sizeof(u32));
+       __uint(value_size, sizeof(struct bpf_cpumap_val));
+} cpu_map SEC(".maps");
+
+/* Set of maps controlling available CPU, and for iterating through
+ * selectable redirect CPUs.
+ */
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, u32);
+       __type(value, u32);
+} cpus_available SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __type(key, u32);
+       __type(value, u32);
+       __uint(max_entries, 1);
+} cpus_count SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+       __type(key, u32);
+       __type(value, u32);
+       __uint(max_entries, 1);
+} cpus_iterator SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_DEVMAP);
+       __uint(key_size, sizeof(int));
+       __uint(value_size, sizeof(struct bpf_devmap_val));
+       __uint(max_entries, 1);
+} tx_port SEC(".maps");
+
+char tx_mac_addr[ETH_ALEN];
+
+/* Helper parse functions */
+
+static __always_inline
+bool parse_eth(struct ethhdr *eth, void *data_end,
+              u16 *eth_proto, u64 *l3_offset)
+{
+       u16 eth_type;
+       u64 offset;
+
+       offset = sizeof(*eth);
+       if ((void *)eth + offset > data_end)
+               return false;
+
+       eth_type = eth->h_proto;
+
+       /* Skip non 802.3 Ethertypes */
+       if (__builtin_expect(bpf_ntohs(eth_type) < ETH_P_802_3_MIN, 0))
+               return false;
+
+       /* Handle VLAN tagged packet */
+       if (eth_type == bpf_htons(ETH_P_8021Q) ||
+           eth_type == bpf_htons(ETH_P_8021AD)) {
+               struct vlan_hdr *vlan_hdr;
+
+               vlan_hdr = (void *)eth + offset;
+               offset += sizeof(*vlan_hdr);
+               if ((void *)eth + offset > data_end)
+                       return false;
+               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+       }
+       /* Handle double VLAN tagged packet */
+       if (eth_type == bpf_htons(ETH_P_8021Q) ||
+           eth_type == bpf_htons(ETH_P_8021AD)) {
+               struct vlan_hdr *vlan_hdr;
+
+               vlan_hdr = (void *)eth + offset;
+               offset += sizeof(*vlan_hdr);
+               if ((void *)eth + offset > data_end)
+                       return false;
+               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
+       }
+
+       *eth_proto = bpf_ntohs(eth_type);
+       *l3_offset = offset;
+       return true;
+}
+
+static __always_inline
+u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct iphdr *iph = data + nh_off;
+       struct udphdr *udph;
+       u16 dport;
+
+       if (iph + 1 > data_end)
+               return 0;
+       if (!(iph->protocol == IPPROTO_UDP))
+               return 0;
+
+       udph = (void *)(iph + 1);
+       if (udph + 1 > data_end)
+               return 0;
+
+       dport = bpf_ntohs(udph->dest);
+       return dport;
+}
+
+static __always_inline
+int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct iphdr *iph = data + nh_off;
+
+       if (iph + 1 > data_end)
+               return 0;
+       return iph->protocol;
+}
+
+static __always_inline
+int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct ipv6hdr *ip6h = data + nh_off;
+
+       if (ip6h + 1 > data_end)
+               return 0;
+       return ip6h->nexthdr;
+}
+
+SEC("xdp")
+int  xdp_prognum0_no_touch(struct xdp_md *ctx)
+{
+       u32 key = bpf_get_smp_processor_id();
+       struct datarec *rec;
+       u32 *cpu_selected;
+       u32 cpu_dest = 0;
+       u32 key0 = 0;
+
+       /* Only use first entry in cpus_available */
+       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+       if (!cpu_selected)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_selected;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum1_touch_data(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       struct datarec *rec;
+       u32 *cpu_selected;
+       u32 cpu_dest = 0;
+       u32 key0 = 0;
+       u16 eth_type;
+
+       /* Only use first entry in cpus_available */
+       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key0);
+       if (!cpu_selected)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_selected;
+
+       /* Validate packet length is minimum Eth header size */
+       if (eth + 1 > data_end)
+               return XDP_ABORTED;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       /* Read packet data, and use it (drop non 802.3 Ethertypes) */
+       eth_type = eth->h_proto;
+       if (bpf_ntohs(eth_type) < ETH_P_802_3_MIN) {
+               NO_TEAR_INC(rec->dropped);
+               return XDP_DROP;
+       }
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum2_round_robin(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct datarec *rec;
+       u32 cpu_dest = 0;
+       u32 key0 = 0;
+
+       u32 *cpu_selected;
+       u32 *cpu_iterator;
+       u32 *cpu_max;
+       u32 cpu_idx;
+
+       cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+       if (!cpu_max)
+               return XDP_ABORTED;
+
+       cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
+       if (!cpu_iterator)
+               return XDP_ABORTED;
+       cpu_idx = *cpu_iterator;
+
+       *cpu_iterator += 1;
+       if (*cpu_iterator == *cpu_max)
+               *cpu_iterator = 0;
+
+       cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_selected)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_selected;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       u8 ip_proto = IPPROTO_UDP;
+       struct datarec *rec;
+       u16 eth_proto = 0;
+       u64 l3_offset = 0;
+       u32 cpu_dest = 0;
+       u32 *cpu_lookup;
+       u32 cpu_idx = 0;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+               return XDP_PASS; /* Just skip */
+
+       /* Extract L4 protocol */
+       switch (eth_proto) {
+       case ETH_P_IP:
+               ip_proto = get_proto_ipv4(ctx, l3_offset);
+               break;
+       case ETH_P_IPV6:
+               ip_proto = get_proto_ipv6(ctx, l3_offset);
+               break;
+       case ETH_P_ARP:
+               cpu_idx = 0; /* ARP packet handled on separate CPU */
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       /* Choose CPU based on L4 protocol */
+       switch (ip_proto) {
+       case IPPROTO_ICMP:
+       case IPPROTO_ICMPV6:
+               cpu_idx = 2;
+               break;
+       case IPPROTO_TCP:
+               cpu_idx = 0;
+               break;
+       case IPPROTO_UDP:
+               cpu_idx = 1;
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_lookup)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_lookup;
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp")
+int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       u8 ip_proto = IPPROTO_UDP;
+       struct datarec *rec;
+       u16 eth_proto = 0;
+       u64 l3_offset = 0;
+       u32 cpu_dest = 0;
+       u32 *cpu_lookup;
+       u32 cpu_idx = 0;
+       u16 dest_port;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+               return XDP_PASS; /* Just skip */
+
+       /* Extract L4 protocol */
+       switch (eth_proto) {
+       case ETH_P_IP:
+               ip_proto = get_proto_ipv4(ctx, l3_offset);
+               break;
+       case ETH_P_IPV6:
+               ip_proto = get_proto_ipv6(ctx, l3_offset);
+               break;
+       case ETH_P_ARP:
+               cpu_idx = 0; /* ARP packet handled on separate CPU */
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       /* Choose CPU based on L4 protocol */
+       switch (ip_proto) {
+       case IPPROTO_ICMP:
+       case IPPROTO_ICMPV6:
+               cpu_idx = 2;
+               break;
+       case IPPROTO_TCP:
+               cpu_idx = 0;
+               break;
+       case IPPROTO_UDP:
+               cpu_idx = 1;
+               /* DDoS filter UDP port 9 (pktgen) */
+               dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
+               if (dest_port == 9) {
+                       NO_TEAR_INC(rec->dropped);
+                       return XDP_DROP;
+               }
+               break;
+       default:
+               cpu_idx = 0;
+       }
+
+       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_lookup)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_lookup;
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+/* Hashing initval */
+#define INITVAL 15485863
+
+static __always_inline
+u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct iphdr *iph = data + nh_off;
+       u32 cpu_hash;
+
+       if (iph + 1 > data_end)
+               return 0;
+
+       cpu_hash = iph->saddr + iph->daddr;
+       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
+
+       return cpu_hash;
+}
+
+static __always_inline
+u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       struct ipv6hdr *ip6h = data + nh_off;
+       u32 cpu_hash;
+
+       if (ip6h + 1 > data_end)
+               return 0;
+
+       cpu_hash  = ip6h->saddr.in6_u.u6_addr32[0] + ip6h->daddr.in6_u.u6_addr32[0];
+       cpu_hash += ip6h->saddr.in6_u.u6_addr32[1] + ip6h->daddr.in6_u.u6_addr32[1];
+       cpu_hash += ip6h->saddr.in6_u.u6_addr32[2] + ip6h->daddr.in6_u.u6_addr32[2];
+       cpu_hash += ip6h->saddr.in6_u.u6_addr32[3] + ip6h->daddr.in6_u.u6_addr32[3];
+       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
+
+       return cpu_hash;
+}
+
+/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
+ * hashing scheme is symmetric, meaning swapping IP src/dest still hit
+ * same CPU.
+ */
+SEC("xdp")
+int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data     = (void *)(long)ctx->data;
+       u32 key = bpf_get_smp_processor_id();
+       struct ethhdr *eth = data;
+       struct datarec *rec;
+       u16 eth_proto = 0;
+       u64 l3_offset = 0;
+       u32 cpu_dest = 0;
+       u32 cpu_idx = 0;
+       u32 *cpu_lookup;
+       u32 key0 = 0;
+       u32 *cpu_max;
+       u32 cpu_hash;
+
+       rec = bpf_map_lookup_elem(&rx_cnt, &key);
+       if (!rec)
+               return XDP_PASS;
+       NO_TEAR_INC(rec->processed);
+
+       cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
+       if (!cpu_max)
+               return XDP_ABORTED;
+
+       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
+               return XDP_PASS; /* Just skip */
+
+       /* Hash for IPv4 and IPv6 */
+       switch (eth_proto) {
+       case ETH_P_IP:
+               cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
+               break;
+       case ETH_P_IPV6:
+               cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
+               break;
+       case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
+       default:
+               cpu_hash = 0;
+       }
+
+       /* Choose CPU based on hash */
+       cpu_idx = cpu_hash % *cpu_max;
+
+       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
+       if (!cpu_lookup)
+               return XDP_ABORTED;
+       cpu_dest = *cpu_lookup;
+
+       if (cpu_dest >= nr_cpus) {
+               NO_TEAR_INC(rec->issue);
+               return XDP_ABORTED;
+       }
+       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
+}
+
+SEC("xdp_cpumap/redirect")
+int xdp_redirect_cpu_devmap(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct ethhdr *eth = data;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       swap_src_dst_mac(data);
+       return bpf_redirect_map(&tx_port, 0, 0);
+}
+
+SEC("xdp_cpumap/pass")
+int xdp_redirect_cpu_pass(struct xdp_md *ctx)
+{
+       return XDP_PASS;
+}
+
+SEC("xdp_cpumap/drop")
+int xdp_redirect_cpu_drop(struct xdp_md *ctx)
+{
+       return XDP_DROP;
+}
+
+SEC("xdp_devmap/egress")
+int xdp_redirect_egress_prog(struct xdp_md *ctx)
+{
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       struct ethhdr *eth = data;
+       u64 nh_off;
+
+       nh_off = sizeof(*eth);
+       if (data + nh_off > data_end)
+               return XDP_DROP;
+
+       __builtin_memcpy(eth->h_source, (const char *)tx_mac_addr, ETH_ALEN);
+
+       return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
deleted file mode 100644 (file)
index 8255025..0000000
+++ /dev/null
@@ -1,730 +0,0 @@
-/*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
- *
- *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
- */
-#include <uapi/linux/if_ether.h>
-#include <uapi/linux/if_packet.h>
-#include <uapi/linux/if_vlan.h>
-#include <uapi/linux/ip.h>
-#include <uapi/linux/ipv6.h>
-#include <uapi/linux/in.h>
-#include <uapi/linux/tcp.h>
-#include <uapi/linux/udp.h>
-
-#include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "hash_func01.h"
-
-#define MAX_CPUS NR_CPUS
-
-/* Special map type that can XDP_REDIRECT frames to another CPU */
-struct {
-       __uint(type, BPF_MAP_TYPE_CPUMAP);
-       __uint(key_size, sizeof(u32));
-       __uint(value_size, sizeof(struct bpf_cpumap_val));
-       __uint(max_entries, MAX_CPUS);
-} cpu_map SEC(".maps");
-
-/* Common stats data record to keep userspace more simple */
-struct datarec {
-       __u64 processed;
-       __u64 dropped;
-       __u64 issue;
-       __u64 xdp_pass;
-       __u64 xdp_drop;
-       __u64 xdp_redirect;
-};
-
-/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
- * feedback.  Redirect TX errors can be caught via a tracepoint.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} rx_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 2);
-       /* TODO: have entries for all possible errno's */
-} redirect_err_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, MAX_CPUS);
-} cpumap_enqueue_cnt SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} cpumap_kthread_cnt SEC(".maps");
-
-/* Set of maps controlling available CPU, and for iterating through
- * selectable redirect CPUs.
- */
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __type(key, u32);
-       __type(value, u32);
-       __uint(max_entries, MAX_CPUS);
-} cpus_available SEC(".maps");
-struct {
-       __uint(type, BPF_MAP_TYPE_ARRAY);
-       __type(key, u32);
-       __type(value, u32);
-       __uint(max_entries, 1);
-} cpus_count SEC(".maps");
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, u32);
-       __uint(max_entries, 1);
-} cpus_iterator SEC(".maps");
-
-/* Used by trace point */
-struct {
-       __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
-       __type(key, u32);
-       __type(value, struct datarec);
-       __uint(max_entries, 1);
-} exception_cnt SEC(".maps");
-
-/* Helper parse functions */
-
-/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
- *
- * Returns false on error and non-supported ether-type
- */
-struct vlan_hdr {
-       __be16 h_vlan_TCI;
-       __be16 h_vlan_encapsulated_proto;
-};
-
-static __always_inline
-bool parse_eth(struct ethhdr *eth, void *data_end,
-              u16 *eth_proto, u64 *l3_offset)
-{
-       u16 eth_type;
-       u64 offset;
-
-       offset = sizeof(*eth);
-       if ((void *)eth + offset > data_end)
-               return false;
-
-       eth_type = eth->h_proto;
-
-       /* Skip non 802.3 Ethertypes */
-       if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
-               return false;
-
-       /* Handle VLAN tagged packet */
-       if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
-               struct vlan_hdr *vlan_hdr;
-
-               vlan_hdr = (void *)eth + offset;
-               offset += sizeof(*vlan_hdr);
-               if ((void *)eth + offset > data_end)
-                       return false;
-               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
-       }
-       /* Handle double VLAN tagged packet */
-       if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
-               struct vlan_hdr *vlan_hdr;
-
-               vlan_hdr = (void *)eth + offset;
-               offset += sizeof(*vlan_hdr);
-               if ((void *)eth + offset > data_end)
-                       return false;
-               eth_type = vlan_hdr->h_vlan_encapsulated_proto;
-       }
-
-       *eth_proto = ntohs(eth_type);
-       *l3_offset = offset;
-       return true;
-}
-
-static __always_inline
-u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct iphdr *iph = data + nh_off;
-       struct udphdr *udph;
-       u16 dport;
-
-       if (iph + 1 > data_end)
-               return 0;
-       if (!(iph->protocol == IPPROTO_UDP))
-               return 0;
-
-       udph = (void *)(iph + 1);
-       if (udph + 1 > data_end)
-               return 0;
-
-       dport = ntohs(udph->dest);
-       return dport;
-}
-
-static __always_inline
-int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct iphdr *iph = data + nh_off;
-
-       if (iph + 1 > data_end)
-               return 0;
-       return iph->protocol;
-}
-
-static __always_inline
-int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ipv6hdr *ip6h = data + nh_off;
-
-       if (ip6h + 1 > data_end)
-               return 0;
-       return ip6h->nexthdr;
-}
-
-SEC("xdp_cpu_map0")
-int  xdp_prognum0_no_touch(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct datarec *rec;
-       u32 *cpu_selected;
-       u32 cpu_dest;
-       u32 key = 0;
-
-       /* Only use first entry in cpus_available */
-       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
-       if (!cpu_selected)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_selected;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map1_touch_data")
-int  xdp_prognum1_touch_data(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       struct datarec *rec;
-       u32 *cpu_selected;
-       u32 cpu_dest;
-       u16 eth_type;
-       u32 key = 0;
-
-       /* Only use first entry in cpus_available */
-       cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
-       if (!cpu_selected)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_selected;
-
-       /* Validate packet length is minimum Eth header size */
-       if (eth + 1 > data_end)
-               return XDP_ABORTED;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       /* Read packet data, and use it (drop non 802.3 Ethertypes) */
-       eth_type = eth->h_proto;
-       if (ntohs(eth_type) < ETH_P_802_3_MIN) {
-               rec->dropped++;
-               return XDP_DROP;
-       }
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map2_round_robin")
-int  xdp_prognum2_round_robin(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       struct datarec *rec;
-       u32 cpu_dest;
-       u32 *cpu_lookup;
-       u32 key0 = 0;
-
-       u32 *cpu_selected;
-       u32 *cpu_iterator;
-       u32 *cpu_max;
-       u32 cpu_idx;
-
-       cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
-       if (!cpu_max)
-               return XDP_ABORTED;
-
-       cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
-       if (!cpu_iterator)
-               return XDP_ABORTED;
-       cpu_idx = *cpu_iterator;
-
-       *cpu_iterator += 1;
-       if (*cpu_iterator == *cpu_max)
-               *cpu_iterator = 0;
-
-       cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_selected)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_selected;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key0);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map3_proto_separate")
-int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       u8 ip_proto = IPPROTO_UDP;
-       struct datarec *rec;
-       u16 eth_proto = 0;
-       u64 l3_offset = 0;
-       u32 cpu_dest = 0;
-       u32 cpu_idx = 0;
-       u32 *cpu_lookup;
-       u32 key = 0;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
-               return XDP_PASS; /* Just skip */
-
-       /* Extract L4 protocol */
-       switch (eth_proto) {
-       case ETH_P_IP:
-               ip_proto = get_proto_ipv4(ctx, l3_offset);
-               break;
-       case ETH_P_IPV6:
-               ip_proto = get_proto_ipv6(ctx, l3_offset);
-               break;
-       case ETH_P_ARP:
-               cpu_idx = 0; /* ARP packet handled on separate CPU */
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       /* Choose CPU based on L4 protocol */
-       switch (ip_proto) {
-       case IPPROTO_ICMP:
-       case IPPROTO_ICMPV6:
-               cpu_idx = 2;
-               break;
-       case IPPROTO_TCP:
-               cpu_idx = 0;
-               break;
-       case IPPROTO_UDP:
-               cpu_idx = 1;
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_lookup)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_lookup;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-SEC("xdp_cpu_map4_ddos_filter_pktgen")
-int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       u8 ip_proto = IPPROTO_UDP;
-       struct datarec *rec;
-       u16 eth_proto = 0;
-       u64 l3_offset = 0;
-       u32 cpu_dest = 0;
-       u32 cpu_idx = 0;
-       u16 dest_port;
-       u32 *cpu_lookup;
-       u32 key = 0;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
-               return XDP_PASS; /* Just skip */
-
-       /* Extract L4 protocol */
-       switch (eth_proto) {
-       case ETH_P_IP:
-               ip_proto = get_proto_ipv4(ctx, l3_offset);
-               break;
-       case ETH_P_IPV6:
-               ip_proto = get_proto_ipv6(ctx, l3_offset);
-               break;
-       case ETH_P_ARP:
-               cpu_idx = 0; /* ARP packet handled on separate CPU */
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       /* Choose CPU based on L4 protocol */
-       switch (ip_proto) {
-       case IPPROTO_ICMP:
-       case IPPROTO_ICMPV6:
-               cpu_idx = 2;
-               break;
-       case IPPROTO_TCP:
-               cpu_idx = 0;
-               break;
-       case IPPROTO_UDP:
-               cpu_idx = 1;
-               /* DDoS filter UDP port 9 (pktgen) */
-               dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
-               if (dest_port == 9) {
-                       if (rec)
-                               rec->dropped++;
-                       return XDP_DROP;
-               }
-               break;
-       default:
-               cpu_idx = 0;
-       }
-
-       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_lookup)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_lookup;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-/* Hashing initval */
-#define INITVAL 15485863
-
-static __always_inline
-u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct iphdr *iph = data + nh_off;
-       u32 cpu_hash;
-
-       if (iph + 1 > data_end)
-               return 0;
-
-       cpu_hash = iph->saddr + iph->daddr;
-       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
-
-       return cpu_hash;
-}
-
-static __always_inline
-u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ipv6hdr *ip6h = data + nh_off;
-       u32 cpu_hash;
-
-       if (ip6h + 1 > data_end)
-               return 0;
-
-       cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
-       cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
-       cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
-       cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
-       cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
-
-       return cpu_hash;
-}
-
-/* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
- * hashing scheme is symmetric, meaning swapping IP src/dest still hit
- * same CPU.
- */
-SEC("xdp_cpu_map5_lb_hash_ip_pairs")
-int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
-{
-       void *data_end = (void *)(long)ctx->data_end;
-       void *data     = (void *)(long)ctx->data;
-       struct ethhdr *eth = data;
-       u8 ip_proto = IPPROTO_UDP;
-       struct datarec *rec;
-       u16 eth_proto = 0;
-       u64 l3_offset = 0;
-       u32 cpu_dest = 0;
-       u32 cpu_idx = 0;
-       u32 *cpu_lookup;
-       u32 *cpu_max;
-       u32 cpu_hash;
-       u32 key = 0;
-
-       /* Count RX packet in map */
-       rec = bpf_map_lookup_elem(&rx_cnt, &key);
-       if (!rec)
-               return XDP_ABORTED;
-       rec->processed++;
-
-       cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
-       if (!cpu_max)
-               return XDP_ABORTED;
-
-       if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
-               return XDP_PASS; /* Just skip */
-
-       /* Hash for IPv4 and IPv6 */
-       switch (eth_proto) {
-       case ETH_P_IP:
-               cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
-               break;
-       case ETH_P_IPV6:
-               cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
-               break;
-       case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
-       default:
-               cpu_hash = 0;
-       }
-
-       /* Choose CPU based on hash */
-       cpu_idx = cpu_hash % *cpu_max;
-
-       cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
-       if (!cpu_lookup)
-               return XDP_ABORTED;
-       cpu_dest = *cpu_lookup;
-
-       if (cpu_dest >= MAX_CPUS) {
-               rec->issue++;
-               return XDP_ABORTED;
-       }
-
-       return bpf_redirect_map(&cpu_map, cpu_dest, 0);
-}
-
-char _license[] SEC("license") = "GPL";
-
-/*** Trace point code ***/
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
- * Code in:                kernel/include/trace/events/xdp.h
- */
-struct xdp_redirect_ctx {
-       u64 __pad;      // First 8 bytes are not accessible by bpf code
-       int prog_id;    //      offset:8;  size:4; signed:1;
-       u32 act;        //      offset:12  size:4; signed:0;
-       int ifindex;    //      offset:16  size:4; signed:1;
-       int err;        //      offset:20  size:4; signed:1;
-       int to_ifindex; //      offset:24  size:4; signed:1;
-       u32 map_id;     //      offset:28  size:4; signed:0;
-       int map_index;  //      offset:32  size:4; signed:1;
-};                     //      offset:36
-
-enum {
-       XDP_REDIRECT_SUCCESS = 0,
-       XDP_REDIRECT_ERROR = 1
-};
-
-static __always_inline
-int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
-{
-       u32 key = XDP_REDIRECT_ERROR;
-       struct datarec *rec;
-       int err = ctx->err;
-
-       if (!err)
-               key = XDP_REDIRECT_SUCCESS;
-
-       rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
-       if (!rec)
-               return 0;
-       rec->dropped += 1;
-
-       return 0; /* Indicate event was filtered (no further processing)*/
-       /*
-        * Returning 1 here would allow e.g. a perf-record tracepoint
-        * to see and record these events, but it doesn't work well
-        * in-practice as stopping perf-record also unload this
-        * bpf_prog.  Plus, there is additional overhead of doing so.
-        */
-}
-
-SEC("tracepoint/xdp/xdp_redirect_err")
-int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-SEC("tracepoint/xdp/xdp_redirect_map_err")
-int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
-{
-       return xdp_redirect_collect_stat(ctx);
-}
-
-/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
- * Code in:                kernel/include/trace/events/xdp.h
- */
-struct xdp_exception_ctx {
-       u64 __pad;      // First 8 bytes are not accessible by bpf code
-       int prog_id;    //      offset:8;  size:4; signed:1;
-       u32 act;        //      offset:12; size:4; signed:0;
-       int ifindex;    //      offset:16; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_exception")
-int trace_xdp_exception(struct xdp_exception_ctx *ctx)
-{
-       struct datarec *rec;
-       u32 key = 0;
-
-       rec = bpf_map_lookup_elem(&exception_cnt, &key);
-       if (!rec)
-               return 1;
-       rec->dropped += 1;
-
-       return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
- * Code in:         kernel/include/trace/events/xdp.h
- */
-struct cpumap_enqueue_ctx {
-       u64 __pad;              // First 8 bytes are not accessible by bpf code
-       int map_id;             //      offset:8;  size:4; signed:1;
-       u32 act;                //      offset:12; size:4; signed:0;
-       int cpu;                //      offset:16; size:4; signed:1;
-       unsigned int drops;     //      offset:20; size:4; signed:0;
-       unsigned int processed; //      offset:24; size:4; signed:0;
-       int to_cpu;             //      offset:28; size:4; signed:1;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_enqueue")
-int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
-{
-       u32 to_cpu = ctx->to_cpu;
-       struct datarec *rec;
-
-       if (to_cpu >= MAX_CPUS)
-               return 1;
-
-       rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
-       if (!rec)
-               return 0;
-       rec->processed += ctx->processed;
-       rec->dropped   += ctx->drops;
-
-       /* Record bulk events, then userspace can calc average bulk size */
-       if (ctx->processed > 0)
-               rec->issue += 1;
-
-       /* Inception: It's possible to detect overload situations, via
-        * this tracepoint.  This can be used for creating a feedback
-        * loop to XDP, which can take appropriate actions to mitigate
-        * this overload situation.
-        */
-       return 0;
-}
-
-/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
- * Code in:         kernel/include/trace/events/xdp.h
- */
-struct cpumap_kthread_ctx {
-       u64 __pad;                      // First 8 bytes are not accessible
-       int map_id;                     //      offset:8;  size:4; signed:1;
-       u32 act;                        //      offset:12; size:4; signed:0;
-       int cpu;                        //      offset:16; size:4; signed:1;
-       unsigned int drops;             //      offset:20; size:4; signed:0;
-       unsigned int processed;         //      offset:24; size:4; signed:0;
-       int sched;                      //      offset:28; size:4; signed:1;
-       unsigned int xdp_pass;          //      offset:32; size:4; signed:0;
-       unsigned int xdp_drop;          //      offset:36; size:4; signed:0;
-       unsigned int xdp_redirect;      //      offset:40; size:4; signed:0;
-};
-
-SEC("tracepoint/xdp/xdp_cpumap_kthread")
-int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
-{
-       struct datarec *rec;
-       u32 key = 0;
-
-       rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
-       if (!rec)
-               return 0;
-       rec->processed += ctx->processed;
-       rec->dropped   += ctx->drops;
-       rec->xdp_pass  += ctx->xdp_pass;
-       rec->xdp_drop  += ctx->xdp_drop;
-       rec->xdp_redirect  += ctx->xdp_redirect;
-
-       /* Count times kthread yielded CPU via schedule call */
-       if (ctx->sched)
-               rec->issue++;
-
-       return 0;
-}