samples/bpf/xdp_redirect_cpu_kern.c

   1 /*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
   2  *
   3  *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
   4  */
   5 #include <uapi/linux/if_ether.h>
   6 #include <uapi/linux/if_packet.h>
   7 #include <uapi/linux/if_vlan.h>
   8 #include <uapi/linux/ip.h>
   9 #include <uapi/linux/ipv6.h>
  10 #include <uapi/linux/in.h>
  11 #include <uapi/linux/tcp.h>
  12 #include <uapi/linux/udp.h>
  13
  14 #include <uapi/linux/bpf.h>
  15 #include <bpf/bpf_helpers.h>
  16 #include "hash_func01.h"
  17
  18 #define MAX_CPUS NR_CPUS
  19
  20 /* Special map type that can XDP_REDIRECT frames to another CPU */
  21 struct {
  22         __uint(type, BPF_MAP_TYPE_CPUMAP);
  23         __uint(key_size, sizeof(u32));
  24         __uint(value_size, sizeof(struct bpf_cpumap_val));
  25         __uint(max_entries, MAX_CPUS);
  26 } cpu_map SEC(".maps");
  27
  28 /* Common stats data record to keep userspace more simple */
  29 struct datarec {
  30         __u64 processed;
  31         __u64 dropped;
  32         __u64 issue;
  33         __u64 xdp_pass;
  34         __u64 xdp_drop;
  35         __u64 xdp_redirect;
  36 };
  37
  38 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
  39  * feedback.  Redirect TX errors can be caught via a tracepoint.
  40  */
  41 struct {
  42         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  43         __type(key, u32);
  44         __type(value, struct datarec);
  45         __uint(max_entries, 1);
  46 } rx_cnt SEC(".maps");
  47
  48 /* Used by trace point */
  49 struct {
  50         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  51         __type(key, u32);
  52         __type(value, struct datarec);
  53         __uint(max_entries, 2);
  54         /* TODO: have entries for all possible errno's */
  55 } redirect_err_cnt SEC(".maps");
  56
  57 /* Used by trace point */
  58 struct {
  59         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  60         __type(key, u32);
  61         __type(value, struct datarec);
  62         __uint(max_entries, MAX_CPUS);
  63 } cpumap_enqueue_cnt SEC(".maps");
  64
  65 /* Used by trace point */
  66 struct {
  67         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  68         __type(key, u32);
  69         __type(value, struct datarec);
  70         __uint(max_entries, 1);
  71 } cpumap_kthread_cnt SEC(".maps");
  72
  73 /* Set of maps controlling available CPU, and for iterating through
  74  * selectable redirect CPUs.
  75  */
  76 struct {
  77         __uint(type, BPF_MAP_TYPE_ARRAY);
  78         __type(key, u32);
  79         __type(value, u32);
  80         __uint(max_entries, MAX_CPUS);
  81 } cpus_available SEC(".maps");
  82 struct {
  83         __uint(type, BPF_MAP_TYPE_ARRAY);
  84         __type(key, u32);
  85         __type(value, u32);
  86         __uint(max_entries, 1);
  87 } cpus_count SEC(".maps");
  88 struct {
  89         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  90         __type(key, u32);
  91         __type(value, u32);
  92         __uint(max_entries, 1);
  93 } cpus_iterator SEC(".maps");
  94
  95 /* Used by trace point */
  96 struct {
  97         __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
  98         __type(key, u32);
  99         __type(value, struct datarec);
 100         __uint(max_entries, 1);
 101 } exception_cnt SEC(".maps");
 102
 103 /* Helper parse functions */
 104
 105 /* Parse Ethernet layer 2, extract network layer 3 offset and protocol
 106  *
 107  * Returns false on error and non-supported ether-type
 108  */
 109 struct vlan_hdr {
 110         __be16 h_vlan_TCI;
 111         __be16 h_vlan_encapsulated_proto;
 112 };
 113
 114 static __always_inline
 115 bool parse_eth(struct ethhdr *eth, void *data_end,
 116                u16 *eth_proto, u64 *l3_offset)
 117 {
 118         u16 eth_type;
 119         u64 offset;
 120
 121         offset = sizeof(*eth);
 122         if ((void *)eth + offset > data_end)
 123                 return false;
 124
 125         eth_type = eth->h_proto;
 126
 127         /* Skip non 802.3 Ethertypes */
 128         if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
 129                 return false;
 130
 131         /* Handle VLAN tagged packet */
 132         if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 133                 struct vlan_hdr *vlan_hdr;
 134
 135                 vlan_hdr = (void *)eth + offset;
 136                 offset += sizeof(*vlan_hdr);
 137                 if ((void *)eth + offset > data_end)
 138                         return false;
 139                 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 140         }
 141         /* Handle double VLAN tagged packet */
 142         if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 143                 struct vlan_hdr *vlan_hdr;
 144
 145                 vlan_hdr = (void *)eth + offset;
 146                 offset += sizeof(*vlan_hdr);
 147                 if ((void *)eth + offset > data_end)
 148                         return false;
 149                 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 150         }
 151
 152         *eth_proto = ntohs(eth_type);
 153         *l3_offset = offset;
 154         return true;
 155 }
 156
 157 static __always_inline
 158 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
 159 {
 160         void *data_end = (void *)(long)ctx->data_end;
 161         void *data     = (void *)(long)ctx->data;
 162         struct iphdr *iph = data + nh_off;
 163         struct udphdr *udph;
 164         u16 dport;
 165
 166         if (iph + 1 > data_end)
 167                 return 0;
 168         if (!(iph->protocol == IPPROTO_UDP))
 169                 return 0;
 170
 171         udph = (void *)(iph + 1);
 172         if (udph + 1 > data_end)
 173                 return 0;
 174
 175         dport = ntohs(udph->dest);
 176         return dport;
 177 }
 178
 179 static __always_inline
 180 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
 181 {
 182         void *data_end = (void *)(long)ctx->data_end;
 183         void *data     = (void *)(long)ctx->data;
 184         struct iphdr *iph = data + nh_off;
 185
 186         if (iph + 1 > data_end)
 187                 return 0;
 188         return iph->protocol;
 189 }
 190
 191 static __always_inline
 192 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
 193 {
 194         void *data_end = (void *)(long)ctx->data_end;
 195         void *data     = (void *)(long)ctx->data;
 196         struct ipv6hdr *ip6h = data + nh_off;
 197
 198         if (ip6h + 1 > data_end)
 199                 return 0;
 200         return ip6h->nexthdr;
 201 }
 202
 203 SEC("xdp_cpu_map0")
 204 int  xdp_prognum0_no_touch(struct xdp_md *ctx)
 205 {
 206         void *data_end = (void *)(long)ctx->data_end;
 207         void *data     = (void *)(long)ctx->data;
 208         struct datarec *rec;
 209         u32 *cpu_selected;
 210         u32 cpu_dest;
 211         u32 key = 0;
 212
 213         /* Only use first entry in cpus_available */
 214         cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 215         if (!cpu_selected)
 216                 return XDP_ABORTED;
 217         cpu_dest = *cpu_selected;
 218
 219         /* Count RX packet in map */
 220         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 221         if (!rec)
 222                 return XDP_ABORTED;
 223         rec->processed++;
 224
 225         if (cpu_dest >= MAX_CPUS) {
 226                 rec->issue++;
 227                 return XDP_ABORTED;
 228         }
 229
 230         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 231 }
 232
 233 SEC("xdp_cpu_map1_touch_data")
 234 int  xdp_prognum1_touch_data(struct xdp_md *ctx)
 235 {
 236         void *data_end = (void *)(long)ctx->data_end;
 237         void *data     = (void *)(long)ctx->data;
 238         struct ethhdr *eth = data;
 239         struct datarec *rec;
 240         u32 *cpu_selected;
 241         u32 cpu_dest;
 242         u16 eth_type;
 243         u32 key = 0;
 244
 245         /* Only use first entry in cpus_available */
 246         cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 247         if (!cpu_selected)
 248                 return XDP_ABORTED;
 249         cpu_dest = *cpu_selected;
 250
 251         /* Validate packet length is minimum Eth header size */
 252         if (eth + 1 > data_end)
 253                 return XDP_ABORTED;
 254
 255         /* Count RX packet in map */
 256         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 257         if (!rec)
 258                 return XDP_ABORTED;
 259         rec->processed++;
 260
 261         /* Read packet data, and use it (drop non 802.3 Ethertypes) */
 262         eth_type = eth->h_proto;
 263         if (ntohs(eth_type) < ETH_P_802_3_MIN) {
 264                 rec->dropped++;
 265                 return XDP_DROP;
 266         }
 267
 268         if (cpu_dest >= MAX_CPUS) {
 269                 rec->issue++;
 270                 return XDP_ABORTED;
 271         }
 272
 273         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 274 }
 275
 276 SEC("xdp_cpu_map2_round_robin")
 277 int  xdp_prognum2_round_robin(struct xdp_md *ctx)
 278 {
 279         void *data_end = (void *)(long)ctx->data_end;
 280         void *data     = (void *)(long)ctx->data;
 281         struct ethhdr *eth = data;
 282         struct datarec *rec;
 283         u32 cpu_dest;
 284         u32 *cpu_lookup;
 285         u32 key0 = 0;
 286
 287         u32 *cpu_selected;
 288         u32 *cpu_iterator;
 289         u32 *cpu_max;
 290         u32 cpu_idx;
 291
 292         cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
 293         if (!cpu_max)
 294                 return XDP_ABORTED;
 295
 296         cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
 297         if (!cpu_iterator)
 298                 return XDP_ABORTED;
 299         cpu_idx = *cpu_iterator;
 300
 301         *cpu_iterator += 1;
 302         if (*cpu_iterator == *cpu_max)
 303                 *cpu_iterator = 0;
 304
 305         cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 306         if (!cpu_selected)
 307                 return XDP_ABORTED;
 308         cpu_dest = *cpu_selected;
 309
 310         /* Count RX packet in map */
 311         rec = bpf_map_lookup_elem(&rx_cnt, &key0);
 312         if (!rec)
 313                 return XDP_ABORTED;
 314         rec->processed++;
 315
 316         if (cpu_dest >= MAX_CPUS) {
 317                 rec->issue++;
 318                 return XDP_ABORTED;
 319         }
 320
 321         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 322 }
 323
 324 SEC("xdp_cpu_map3_proto_separate")
 325 int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
 326 {
 327         void *data_end = (void *)(long)ctx->data_end;
 328         void *data     = (void *)(long)ctx->data;
 329         struct ethhdr *eth = data;
 330         u8 ip_proto = IPPROTO_UDP;
 331         struct datarec *rec;
 332         u16 eth_proto = 0;
 333         u64 l3_offset = 0;
 334         u32 cpu_dest = 0;
 335         u32 cpu_idx = 0;
 336         u32 *cpu_lookup;
 337         u32 key = 0;
 338
 339         /* Count RX packet in map */
 340         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 341         if (!rec)
 342                 return XDP_ABORTED;
 343         rec->processed++;
 344
 345         if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 346                 return XDP_PASS; /* Just skip */
 347
 348         /* Extract L4 protocol */
 349         switch (eth_proto) {
 350         case ETH_P_IP:
 351                 ip_proto = get_proto_ipv4(ctx, l3_offset);
 352                 break;
 353         case ETH_P_IPV6:
 354                 ip_proto = get_proto_ipv6(ctx, l3_offset);
 355                 break;
 356         case ETH_P_ARP:
 357                 cpu_idx = 0; /* ARP packet handled on separate CPU */
 358                 break;
 359         default:
 360                 cpu_idx = 0;
 361         }
 362
 363         /* Choose CPU based on L4 protocol */
 364         switch (ip_proto) {
 365         case IPPROTO_ICMP:
 366         case IPPROTO_ICMPV6:
 367                 cpu_idx = 2;
 368                 break;
 369         case IPPROTO_TCP:
 370                 cpu_idx = 0;
 371                 break;
 372         case IPPROTO_UDP:
 373                 cpu_idx = 1;
 374                 break;
 375         default:
 376                 cpu_idx = 0;
 377         }
 378
 379         cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 380         if (!cpu_lookup)
 381                 return XDP_ABORTED;
 382         cpu_dest = *cpu_lookup;
 383
 384         if (cpu_dest >= MAX_CPUS) {
 385                 rec->issue++;
 386                 return XDP_ABORTED;
 387         }
 388
 389         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 390 }
 391
 392 SEC("xdp_cpu_map4_ddos_filter_pktgen")
 393 int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
 394 {
 395         void *data_end = (void *)(long)ctx->data_end;
 396         void *data     = (void *)(long)ctx->data;
 397         struct ethhdr *eth = data;
 398         u8 ip_proto = IPPROTO_UDP;
 399         struct datarec *rec;
 400         u16 eth_proto = 0;
 401         u64 l3_offset = 0;
 402         u32 cpu_dest = 0;
 403         u32 cpu_idx = 0;
 404         u16 dest_port;
 405         u32 *cpu_lookup;
 406         u32 key = 0;
 407
 408         /* Count RX packet in map */
 409         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 410         if (!rec)
 411                 return XDP_ABORTED;
 412         rec->processed++;
 413
 414         if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 415                 return XDP_PASS; /* Just skip */
 416
 417         /* Extract L4 protocol */
 418         switch (eth_proto) {
 419         case ETH_P_IP:
 420                 ip_proto = get_proto_ipv4(ctx, l3_offset);
 421                 break;
 422         case ETH_P_IPV6:
 423                 ip_proto = get_proto_ipv6(ctx, l3_offset);
 424                 break;
 425         case ETH_P_ARP:
 426                 cpu_idx = 0; /* ARP packet handled on separate CPU */
 427                 break;
 428         default:
 429                 cpu_idx = 0;
 430         }
 431
 432         /* Choose CPU based on L4 protocol */
 433         switch (ip_proto) {
 434         case IPPROTO_ICMP:
 435         case IPPROTO_ICMPV6:
 436                 cpu_idx = 2;
 437                 break;
 438         case IPPROTO_TCP:
 439                 cpu_idx = 0;
 440                 break;
 441         case IPPROTO_UDP:
 442                 cpu_idx = 1;
 443                 /* DDoS filter UDP port 9 (pktgen) */
 444                 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
 445                 if (dest_port == 9) {
 446                         if (rec)
 447                                 rec->dropped++;
 448                         return XDP_DROP;
 449                 }
 450                 break;
 451         default:
 452                 cpu_idx = 0;
 453         }
 454
 455         cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 456         if (!cpu_lookup)
 457                 return XDP_ABORTED;
 458         cpu_dest = *cpu_lookup;
 459
 460         if (cpu_dest >= MAX_CPUS) {
 461                 rec->issue++;
 462                 return XDP_ABORTED;
 463         }
 464
 465         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 466 }
 467
 468 /* Hashing initval */
 469 #define INITVAL 15485863
 470
 471 static __always_inline
 472 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 473 {
 474         void *data_end = (void *)(long)ctx->data_end;
 475         void *data     = (void *)(long)ctx->data;
 476         struct iphdr *iph = data + nh_off;
 477         u32 cpu_hash;
 478
 479         if (iph + 1 > data_end)
 480                 return 0;
 481
 482         cpu_hash = iph->saddr + iph->daddr;
 483         cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
 484
 485         return cpu_hash;
 486 }
 487
 488 static __always_inline
 489 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 490 {
 491         void *data_end = (void *)(long)ctx->data_end;
 492         void *data     = (void *)(long)ctx->data;
 493         struct ipv6hdr *ip6h = data + nh_off;
 494         u32 cpu_hash;
 495
 496         if (ip6h + 1 > data_end)
 497                 return 0;
 498
 499         cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
 500         cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
 501         cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
 502         cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
 503         cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
 504
 505         return cpu_hash;
 506 }
 507
 508 /* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
 509  * hashing scheme is symmetric, meaning swapping IP src/dest still hit
 510  * same CPU.
 511  */
 512 SEC("xdp_cpu_map5_lb_hash_ip_pairs")
 513 int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
 514 {
 515         void *data_end = (void *)(long)ctx->data_end;
 516         void *data     = (void *)(long)ctx->data;
 517         struct ethhdr *eth = data;
 518         u8 ip_proto = IPPROTO_UDP;
 519         struct datarec *rec;
 520         u16 eth_proto = 0;
 521         u64 l3_offset = 0;
 522         u32 cpu_dest = 0;
 523         u32 cpu_idx = 0;
 524         u32 *cpu_lookup;
 525         u32 *cpu_max;
 526         u32 cpu_hash;
 527         u32 key = 0;
 528
 529         /* Count RX packet in map */
 530         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 531         if (!rec)
 532                 return XDP_ABORTED;
 533         rec->processed++;
 534
 535         cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
 536         if (!cpu_max)
 537                 return XDP_ABORTED;
 538
 539         if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 540                 return XDP_PASS; /* Just skip */
 541
 542         /* Hash for IPv4 and IPv6 */
 543         switch (eth_proto) {
 544         case ETH_P_IP:
 545                 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
 546                 break;
 547         case ETH_P_IPV6:
 548                 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
 549                 break;
 550         case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
 551         default:
 552                 cpu_hash = 0;
 553         }
 554
 555         /* Choose CPU based on hash */
 556         cpu_idx = cpu_hash % *cpu_max;
 557
 558         cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 559         if (!cpu_lookup)
 560                 return XDP_ABORTED;
 561         cpu_dest = *cpu_lookup;
 562
 563         if (cpu_dest >= MAX_CPUS) {
 564                 rec->issue++;
 565                 return XDP_ABORTED;
 566         }
 567
 568         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 569 }
 570
 571 char _license[] SEC("license") = "GPL";
 572
 573 /*** Trace point code ***/
 574
 575 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
 576  * Code in:                kernel/include/trace/events/xdp.h
 577  */
 578 struct xdp_redirect_ctx {
 579         u64 __pad;      // First 8 bytes are not accessible by bpf code
 580         int prog_id;    //      offset:8;  size:4; signed:1;
 581         u32 act;        //      offset:12  size:4; signed:0;
 582         int ifindex;    //      offset:16  size:4; signed:1;
 583         int err;        //      offset:20  size:4; signed:1;
 584         int to_ifindex; //      offset:24  size:4; signed:1;
 585         u32 map_id;     //      offset:28  size:4; signed:0;
 586         int map_index;  //      offset:32  size:4; signed:1;
 587 };                      //      offset:36
 588
 589 enum {
 590         XDP_REDIRECT_SUCCESS = 0,
 591         XDP_REDIRECT_ERROR = 1
 592 };
 593
 594 static __always_inline
 595 int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
 596 {
 597         u32 key = XDP_REDIRECT_ERROR;
 598         struct datarec *rec;
 599         int err = ctx->err;
 600
 601         if (!err)
 602                 key = XDP_REDIRECT_SUCCESS;
 603
 604         rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
 605         if (!rec)
 606                 return 0;
 607         rec->dropped += 1;
 608
 609         return 0; /* Indicate event was filtered (no further processing)*/
 610         /*
 611          * Returning 1 here would allow e.g. a perf-record tracepoint
 612          * to see and record these events, but it doesn't work well
 613          * in-practice as stopping perf-record also unload this
 614          * bpf_prog.  Plus, there is additional overhead of doing so.
 615          */
 616 }
 617
 618 SEC("tracepoint/xdp/xdp_redirect_err")
 619 int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
 620 {
 621         return xdp_redirect_collect_stat(ctx);
 622 }
 623
 624 SEC("tracepoint/xdp/xdp_redirect_map_err")
 625 int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
 626 {
 627         return xdp_redirect_collect_stat(ctx);
 628 }
 629
 630 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
 631  * Code in:                kernel/include/trace/events/xdp.h
 632  */
 633 struct xdp_exception_ctx {
 634         u64 __pad;      // First 8 bytes are not accessible by bpf code
 635         int prog_id;    //      offset:8;  size:4; signed:1;
 636         u32 act;        //      offset:12; size:4; signed:0;
 637         int ifindex;    //      offset:16; size:4; signed:1;
 638 };
 639
 640 SEC("tracepoint/xdp/xdp_exception")
 641 int trace_xdp_exception(struct xdp_exception_ctx *ctx)
 642 {
 643         struct datarec *rec;
 644         u32 key = 0;
 645
 646         rec = bpf_map_lookup_elem(&exception_cnt, &key);
 647         if (!rec)
 648                 return 1;
 649         rec->dropped += 1;
 650
 651         return 0;
 652 }
 653
 654 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
 655  * Code in:         kernel/include/trace/events/xdp.h
 656  */
 657 struct cpumap_enqueue_ctx {
 658         u64 __pad;              // First 8 bytes are not accessible by bpf code
 659         int map_id;             //      offset:8;  size:4; signed:1;
 660         u32 act;                //      offset:12; size:4; signed:0;
 661         int cpu;                //      offset:16; size:4; signed:1;
 662         unsigned int drops;     //      offset:20; size:4; signed:0;
 663         unsigned int processed; //      offset:24; size:4; signed:0;
 664         int to_cpu;             //      offset:28; size:4; signed:1;
 665 };
 666
 667 SEC("tracepoint/xdp/xdp_cpumap_enqueue")
 668 int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
 669 {
 670         u32 to_cpu = ctx->to_cpu;
 671         struct datarec *rec;
 672
 673         if (to_cpu >= MAX_CPUS)
 674                 return 1;
 675
 676         rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
 677         if (!rec)
 678                 return 0;
 679         rec->processed += ctx->processed;
 680         rec->dropped   += ctx->drops;
 681
 682         /* Record bulk events, then userspace can calc average bulk size */
 683         if (ctx->processed > 0)
 684                 rec->issue += 1;
 685
 686         /* Inception: It's possible to detect overload situations, via
 687          * this tracepoint.  This can be used for creating a feedback
 688          * loop to XDP, which can take appropriate actions to mitigate
 689          * this overload situation.
 690          */
 691         return 0;
 692 }
 693
 694 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
 695  * Code in:         kernel/include/trace/events/xdp.h
 696  */
 697 struct cpumap_kthread_ctx {
 698         u64 __pad;                      // First 8 bytes are not accessible
 699         int map_id;                     //      offset:8;  size:4; signed:1;
 700         u32 act;                        //      offset:12; size:4; signed:0;
 701         int cpu;                        //      offset:16; size:4; signed:1;
 702         unsigned int drops;             //      offset:20; size:4; signed:0;
 703         unsigned int processed;         //      offset:24; size:4; signed:0;
 704         int sched;                      //      offset:28; size:4; signed:1;
 705         unsigned int xdp_pass;          //      offset:32; size:4; signed:0;
 706         unsigned int xdp_drop;          //      offset:36; size:4; signed:0;
 707         unsigned int xdp_redirect;      //      offset:40; size:4; signed:0;
 708 };
 709
 710 SEC("tracepoint/xdp/xdp_cpumap_kthread")
 711 int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
 712 {
 713         struct datarec *rec;
 714         u32 key = 0;
 715
 716         rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
 717         if (!rec)
 718                 return 0;
 719         rec->processed += ctx->processed;
 720         rec->dropped   += ctx->drops;
 721         rec->xdp_pass  += ctx->xdp_pass;
 722         rec->xdp_drop  += ctx->xdp_drop;
 723         rec->xdp_redirect  += ctx->xdp_redirect;
 724
 725         /* Count times kthread yielded CPU via schedule call */
 726         if (ctx->sched)
 727                 rec->issue++;
 728
 729         return 0;
 730 }