samples/bpf/xdp_redirect_cpu_kern.c

   1 /*  XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
   2  *
   3  *  GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
   4  */
   5 #include <uapi/linux/if_ether.h>
   6 #include <uapi/linux/if_packet.h>
   7 #include <uapi/linux/if_vlan.h>
   8 #include <uapi/linux/ip.h>
   9 #include <uapi/linux/ipv6.h>
  10 #include <uapi/linux/in.h>
  11 #include <uapi/linux/tcp.h>
  12 #include <uapi/linux/udp.h>
  13
  14 #include <uapi/linux/bpf.h>
  15 #include "bpf_helpers.h"
  16 #include "hash_func01.h"
  17
  18 #define MAX_CPUS 64 /* WARNING - sync with _user.c */
  19
  20 /* Special map type that can XDP_REDIRECT frames to another CPU */
  21 struct bpf_map_def SEC("maps") cpu_map = {
  22         .type           = BPF_MAP_TYPE_CPUMAP,
  23         .key_size       = sizeof(u32),
  24         .value_size     = sizeof(u32),
  25         .max_entries    = MAX_CPUS,
  26 };
  27
  28 /* Common stats data record to keep userspace more simple */
  29 struct datarec {
  30         __u64 processed;
  31         __u64 dropped;
  32         __u64 issue;
  33 };
  34
  35 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
  36  * feedback.  Redirect TX errors can be caught via a tracepoint.
  37  */
  38 struct bpf_map_def SEC("maps") rx_cnt = {
  39         .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  40         .key_size       = sizeof(u32),
  41         .value_size     = sizeof(struct datarec),
  42         .max_entries    = 1,
  43 };
  44
  45 /* Used by trace point */
  46 struct bpf_map_def SEC("maps") redirect_err_cnt = {
  47         .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  48         .key_size       = sizeof(u32),
  49         .value_size     = sizeof(struct datarec),
  50         .max_entries    = 2,
  51         /* TODO: have entries for all possible errno's */
  52 };
  53
  54 /* Used by trace point */
  55 struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
  56         .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  57         .key_size       = sizeof(u32),
  58         .value_size     = sizeof(struct datarec),
  59         .max_entries    = MAX_CPUS,
  60 };
  61
  62 /* Used by trace point */
  63 struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
  64         .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  65         .key_size       = sizeof(u32),
  66         .value_size     = sizeof(struct datarec),
  67         .max_entries    = 1,
  68 };
  69
  70 /* Set of maps controlling available CPU, and for iterating through
  71  * selectable redirect CPUs.
  72  */
  73 struct bpf_map_def SEC("maps") cpus_available = {
  74         .type           = BPF_MAP_TYPE_ARRAY,
  75         .key_size       = sizeof(u32),
  76         .value_size     = sizeof(u32),
  77         .max_entries    = MAX_CPUS,
  78 };
  79 struct bpf_map_def SEC("maps") cpus_count = {
  80         .type           = BPF_MAP_TYPE_ARRAY,
  81         .key_size       = sizeof(u32),
  82         .value_size     = sizeof(u32),
  83         .max_entries    = 1,
  84 };
  85 struct bpf_map_def SEC("maps") cpus_iterator = {
  86         .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  87         .key_size       = sizeof(u32),
  88         .value_size     = sizeof(u32),
  89         .max_entries    = 1,
  90 };
  91
  92 /* Used by trace point */
  93 struct bpf_map_def SEC("maps") exception_cnt = {
  94         .type           = BPF_MAP_TYPE_PERCPU_ARRAY,
  95         .key_size       = sizeof(u32),
  96         .value_size     = sizeof(struct datarec),
  97         .max_entries    = 1,
  98 };
  99
 100 /* Helper parse functions */
 101
 102 /* Parse Ethernet layer 2, extract network layer 3 offset and protocol
 103  *
 104  * Returns false on error and non-supported ether-type
 105  */
 106 struct vlan_hdr {
 107         __be16 h_vlan_TCI;
 108         __be16 h_vlan_encapsulated_proto;
 109 };
 110
 111 static __always_inline
 112 bool parse_eth(struct ethhdr *eth, void *data_end,
 113                u16 *eth_proto, u64 *l3_offset)
 114 {
 115         u16 eth_type;
 116         u64 offset;
 117
 118         offset = sizeof(*eth);
 119         if ((void *)eth + offset > data_end)
 120                 return false;
 121
 122         eth_type = eth->h_proto;
 123
 124         /* Skip non 802.3 Ethertypes */
 125         if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
 126                 return false;
 127
 128         /* Handle VLAN tagged packet */
 129         if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 130                 struct vlan_hdr *vlan_hdr;
 131
 132                 vlan_hdr = (void *)eth + offset;
 133                 offset += sizeof(*vlan_hdr);
 134                 if ((void *)eth + offset > data_end)
 135                         return false;
 136                 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 137         }
 138         /* Handle double VLAN tagged packet */
 139         if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
 140                 struct vlan_hdr *vlan_hdr;
 141
 142                 vlan_hdr = (void *)eth + offset;
 143                 offset += sizeof(*vlan_hdr);
 144                 if ((void *)eth + offset > data_end)
 145                         return false;
 146                 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
 147         }
 148
 149         *eth_proto = ntohs(eth_type);
 150         *l3_offset = offset;
 151         return true;
 152 }
 153
 154 static __always_inline
 155 u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
 156 {
 157         void *data_end = (void *)(long)ctx->data_end;
 158         void *data     = (void *)(long)ctx->data;
 159         struct iphdr *iph = data + nh_off;
 160         struct udphdr *udph;
 161         u16 dport;
 162
 163         if (iph + 1 > data_end)
 164                 return 0;
 165         if (!(iph->protocol == IPPROTO_UDP))
 166                 return 0;
 167
 168         udph = (void *)(iph + 1);
 169         if (udph + 1 > data_end)
 170                 return 0;
 171
 172         dport = ntohs(udph->dest);
 173         return dport;
 174 }
 175
 176 static __always_inline
 177 int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
 178 {
 179         void *data_end = (void *)(long)ctx->data_end;
 180         void *data     = (void *)(long)ctx->data;
 181         struct iphdr *iph = data + nh_off;
 182
 183         if (iph + 1 > data_end)
 184                 return 0;
 185         return iph->protocol;
 186 }
 187
 188 static __always_inline
 189 int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
 190 {
 191         void *data_end = (void *)(long)ctx->data_end;
 192         void *data     = (void *)(long)ctx->data;
 193         struct ipv6hdr *ip6h = data + nh_off;
 194
 195         if (ip6h + 1 > data_end)
 196                 return 0;
 197         return ip6h->nexthdr;
 198 }
 199
 200 SEC("xdp_cpu_map0")
 201 int  xdp_prognum0_no_touch(struct xdp_md *ctx)
 202 {
 203         void *data_end = (void *)(long)ctx->data_end;
 204         void *data     = (void *)(long)ctx->data;
 205         struct datarec *rec;
 206         u32 *cpu_selected;
 207         u32 cpu_dest;
 208         u32 key = 0;
 209
 210         /* Only use first entry in cpus_available */
 211         cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 212         if (!cpu_selected)
 213                 return XDP_ABORTED;
 214         cpu_dest = *cpu_selected;
 215
 216         /* Count RX packet in map */
 217         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 218         if (!rec)
 219                 return XDP_ABORTED;
 220         rec->processed++;
 221
 222         if (cpu_dest >= MAX_CPUS) {
 223                 rec->issue++;
 224                 return XDP_ABORTED;
 225         }
 226
 227         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 228 }
 229
 230 SEC("xdp_cpu_map1_touch_data")
 231 int  xdp_prognum1_touch_data(struct xdp_md *ctx)
 232 {
 233         void *data_end = (void *)(long)ctx->data_end;
 234         void *data     = (void *)(long)ctx->data;
 235         struct ethhdr *eth = data;
 236         struct datarec *rec;
 237         u32 *cpu_selected;
 238         u32 cpu_dest;
 239         u16 eth_type;
 240         u32 key = 0;
 241
 242         /* Only use first entry in cpus_available */
 243         cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
 244         if (!cpu_selected)
 245                 return XDP_ABORTED;
 246         cpu_dest = *cpu_selected;
 247
 248         /* Validate packet length is minimum Eth header size */
 249         if (eth + 1 > data_end)
 250                 return XDP_ABORTED;
 251
 252         /* Count RX packet in map */
 253         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 254         if (!rec)
 255                 return XDP_ABORTED;
 256         rec->processed++;
 257
 258         /* Read packet data, and use it (drop non 802.3 Ethertypes) */
 259         eth_type = eth->h_proto;
 260         if (ntohs(eth_type) < ETH_P_802_3_MIN) {
 261                 rec->dropped++;
 262                 return XDP_DROP;
 263         }
 264
 265         if (cpu_dest >= MAX_CPUS) {
 266                 rec->issue++;
 267                 return XDP_ABORTED;
 268         }
 269
 270         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 271 }
 272
 273 SEC("xdp_cpu_map2_round_robin")
 274 int  xdp_prognum2_round_robin(struct xdp_md *ctx)
 275 {
 276         void *data_end = (void *)(long)ctx->data_end;
 277         void *data     = (void *)(long)ctx->data;
 278         struct ethhdr *eth = data;
 279         struct datarec *rec;
 280         u32 cpu_dest;
 281         u32 *cpu_lookup;
 282         u32 key0 = 0;
 283
 284         u32 *cpu_selected;
 285         u32 *cpu_iterator;
 286         u32 *cpu_max;
 287         u32 cpu_idx;
 288
 289         cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
 290         if (!cpu_max)
 291                 return XDP_ABORTED;
 292
 293         cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
 294         if (!cpu_iterator)
 295                 return XDP_ABORTED;
 296         cpu_idx = *cpu_iterator;
 297
 298         *cpu_iterator += 1;
 299         if (*cpu_iterator == *cpu_max)
 300                 *cpu_iterator = 0;
 301
 302         cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 303         if (!cpu_selected)
 304                 return XDP_ABORTED;
 305         cpu_dest = *cpu_selected;
 306
 307         /* Count RX packet in map */
 308         rec = bpf_map_lookup_elem(&rx_cnt, &key0);
 309         if (!rec)
 310                 return XDP_ABORTED;
 311         rec->processed++;
 312
 313         if (cpu_dest >= MAX_CPUS) {
 314                 rec->issue++;
 315                 return XDP_ABORTED;
 316         }
 317
 318         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 319 }
 320
 321 SEC("xdp_cpu_map3_proto_separate")
 322 int  xdp_prognum3_proto_separate(struct xdp_md *ctx)
 323 {
 324         void *data_end = (void *)(long)ctx->data_end;
 325         void *data     = (void *)(long)ctx->data;
 326         struct ethhdr *eth = data;
 327         u8 ip_proto = IPPROTO_UDP;
 328         struct datarec *rec;
 329         u16 eth_proto = 0;
 330         u64 l3_offset = 0;
 331         u32 cpu_dest = 0;
 332         u32 cpu_idx = 0;
 333         u32 *cpu_lookup;
 334         u32 key = 0;
 335
 336         /* Count RX packet in map */
 337         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 338         if (!rec)
 339                 return XDP_ABORTED;
 340         rec->processed++;
 341
 342         if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 343                 return XDP_PASS; /* Just skip */
 344
 345         /* Extract L4 protocol */
 346         switch (eth_proto) {
 347         case ETH_P_IP:
 348                 ip_proto = get_proto_ipv4(ctx, l3_offset);
 349                 break;
 350         case ETH_P_IPV6:
 351                 ip_proto = get_proto_ipv6(ctx, l3_offset);
 352                 break;
 353         case ETH_P_ARP:
 354                 cpu_idx = 0; /* ARP packet handled on separate CPU */
 355                 break;
 356         default:
 357                 cpu_idx = 0;
 358         }
 359
 360         /* Choose CPU based on L4 protocol */
 361         switch (ip_proto) {
 362         case IPPROTO_ICMP:
 363         case IPPROTO_ICMPV6:
 364                 cpu_idx = 2;
 365                 break;
 366         case IPPROTO_TCP:
 367                 cpu_idx = 0;
 368                 break;
 369         case IPPROTO_UDP:
 370                 cpu_idx = 1;
 371                 break;
 372         default:
 373                 cpu_idx = 0;
 374         }
 375
 376         cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 377         if (!cpu_lookup)
 378                 return XDP_ABORTED;
 379         cpu_dest = *cpu_lookup;
 380
 381         if (cpu_dest >= MAX_CPUS) {
 382                 rec->issue++;
 383                 return XDP_ABORTED;
 384         }
 385
 386         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 387 }
 388
 389 SEC("xdp_cpu_map4_ddos_filter_pktgen")
 390 int  xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
 391 {
 392         void *data_end = (void *)(long)ctx->data_end;
 393         void *data     = (void *)(long)ctx->data;
 394         struct ethhdr *eth = data;
 395         u8 ip_proto = IPPROTO_UDP;
 396         struct datarec *rec;
 397         u16 eth_proto = 0;
 398         u64 l3_offset = 0;
 399         u32 cpu_dest = 0;
 400         u32 cpu_idx = 0;
 401         u16 dest_port;
 402         u32 *cpu_lookup;
 403         u32 key = 0;
 404
 405         /* Count RX packet in map */
 406         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 407         if (!rec)
 408                 return XDP_ABORTED;
 409         rec->processed++;
 410
 411         if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 412                 return XDP_PASS; /* Just skip */
 413
 414         /* Extract L4 protocol */
 415         switch (eth_proto) {
 416         case ETH_P_IP:
 417                 ip_proto = get_proto_ipv4(ctx, l3_offset);
 418                 break;
 419         case ETH_P_IPV6:
 420                 ip_proto = get_proto_ipv6(ctx, l3_offset);
 421                 break;
 422         case ETH_P_ARP:
 423                 cpu_idx = 0; /* ARP packet handled on separate CPU */
 424                 break;
 425         default:
 426                 cpu_idx = 0;
 427         }
 428
 429         /* Choose CPU based on L4 protocol */
 430         switch (ip_proto) {
 431         case IPPROTO_ICMP:
 432         case IPPROTO_ICMPV6:
 433                 cpu_idx = 2;
 434                 break;
 435         case IPPROTO_TCP:
 436                 cpu_idx = 0;
 437                 break;
 438         case IPPROTO_UDP:
 439                 cpu_idx = 1;
 440                 /* DDoS filter UDP port 9 (pktgen) */
 441                 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
 442                 if (dest_port == 9) {
 443                         if (rec)
 444                                 rec->dropped++;
 445                         return XDP_DROP;
 446                 }
 447                 break;
 448         default:
 449                 cpu_idx = 0;
 450         }
 451
 452         cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 453         if (!cpu_lookup)
 454                 return XDP_ABORTED;
 455         cpu_dest = *cpu_lookup;
 456
 457         if (cpu_dest >= MAX_CPUS) {
 458                 rec->issue++;
 459                 return XDP_ABORTED;
 460         }
 461
 462         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 463 }
 464
 465 /* Hashing initval */
 466 #define INITVAL 15485863
 467
 468 static __always_inline
 469 u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 470 {
 471         void *data_end = (void *)(long)ctx->data_end;
 472         void *data     = (void *)(long)ctx->data;
 473         struct iphdr *iph = data + nh_off;
 474         u32 cpu_hash;
 475
 476         if (iph + 1 > data_end)
 477                 return 0;
 478
 479         cpu_hash = iph->saddr + iph->daddr;
 480         cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
 481
 482         return cpu_hash;
 483 }
 484
 485 static __always_inline
 486 u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
 487 {
 488         void *data_end = (void *)(long)ctx->data_end;
 489         void *data     = (void *)(long)ctx->data;
 490         struct ipv6hdr *ip6h = data + nh_off;
 491         u32 cpu_hash;
 492
 493         if (ip6h + 1 > data_end)
 494                 return 0;
 495
 496         cpu_hash  = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
 497         cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
 498         cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
 499         cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
 500         cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
 501
 502         return cpu_hash;
 503 }
 504
 505 /* Load-Balance traffic based on hashing IP-addrs + L4-proto.  The
 506  * hashing scheme is symmetric, meaning swapping IP src/dest still hit
 507  * same CPU.
 508  */
 509 SEC("xdp_cpu_map5_lb_hash_ip_pairs")
 510 int  xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
 511 {
 512         void *data_end = (void *)(long)ctx->data_end;
 513         void *data     = (void *)(long)ctx->data;
 514         struct ethhdr *eth = data;
 515         u8 ip_proto = IPPROTO_UDP;
 516         struct datarec *rec;
 517         u16 eth_proto = 0;
 518         u64 l3_offset = 0;
 519         u32 cpu_dest = 0;
 520         u32 cpu_idx = 0;
 521         u32 *cpu_lookup;
 522         u32 *cpu_max;
 523         u32 cpu_hash;
 524         u32 key = 0;
 525
 526         /* Count RX packet in map */
 527         rec = bpf_map_lookup_elem(&rx_cnt, &key);
 528         if (!rec)
 529                 return XDP_ABORTED;
 530         rec->processed++;
 531
 532         cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
 533         if (!cpu_max)
 534                 return XDP_ABORTED;
 535
 536         if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
 537                 return XDP_PASS; /* Just skip */
 538
 539         /* Hash for IPv4 and IPv6 */
 540         switch (eth_proto) {
 541         case ETH_P_IP:
 542                 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
 543                 break;
 544         case ETH_P_IPV6:
 545                 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
 546                 break;
 547         case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
 548         default:
 549                 cpu_hash = 0;
 550         }
 551
 552         /* Choose CPU based on hash */
 553         cpu_idx = cpu_hash % *cpu_max;
 554
 555         cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
 556         if (!cpu_lookup)
 557                 return XDP_ABORTED;
 558         cpu_dest = *cpu_lookup;
 559
 560         if (cpu_dest >= MAX_CPUS) {
 561                 rec->issue++;
 562                 return XDP_ABORTED;
 563         }
 564
 565         return bpf_redirect_map(&cpu_map, cpu_dest, 0);
 566 }
 567
 568 char _license[] SEC("license") = "GPL";
 569
 570 /*** Trace point code ***/
 571
 572 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
 573  * Code in:                kernel/include/trace/events/xdp.h
 574  */
 575 struct xdp_redirect_ctx {
 576         u64 __pad;      // First 8 bytes are not accessible by bpf code
 577         int prog_id;    //      offset:8;  size:4; signed:1;
 578         u32 act;        //      offset:12  size:4; signed:0;
 579         int ifindex;    //      offset:16  size:4; signed:1;
 580         int err;        //      offset:20  size:4; signed:1;
 581         int to_ifindex; //      offset:24  size:4; signed:1;
 582         u32 map_id;     //      offset:28  size:4; signed:0;
 583         int map_index;  //      offset:32  size:4; signed:1;
 584 };                      //      offset:36
 585
 586 enum {
 587         XDP_REDIRECT_SUCCESS = 0,
 588         XDP_REDIRECT_ERROR = 1
 589 };
 590
 591 static __always_inline
 592 int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
 593 {
 594         u32 key = XDP_REDIRECT_ERROR;
 595         struct datarec *rec;
 596         int err = ctx->err;
 597
 598         if (!err)
 599                 key = XDP_REDIRECT_SUCCESS;
 600
 601         rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
 602         if (!rec)
 603                 return 0;
 604         rec->dropped += 1;
 605
 606         return 0; /* Indicate event was filtered (no further processing)*/
 607         /*
 608          * Returning 1 here would allow e.g. a perf-record tracepoint
 609          * to see and record these events, but it doesn't work well
 610          * in-practice as stopping perf-record also unload this
 611          * bpf_prog.  Plus, there is additional overhead of doing so.
 612          */
 613 }
 614
 615 SEC("tracepoint/xdp/xdp_redirect_err")
 616 int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
 617 {
 618         return xdp_redirect_collect_stat(ctx);
 619 }
 620
 621 SEC("tracepoint/xdp/xdp_redirect_map_err")
 622 int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
 623 {
 624         return xdp_redirect_collect_stat(ctx);
 625 }
 626
 627 /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
 628  * Code in:                kernel/include/trace/events/xdp.h
 629  */
 630 struct xdp_exception_ctx {
 631         u64 __pad;      // First 8 bytes are not accessible by bpf code
 632         int prog_id;    //      offset:8;  size:4; signed:1;
 633         u32 act;        //      offset:12; size:4; signed:0;
 634         int ifindex;    //      offset:16; size:4; signed:1;
 635 };
 636
 637 SEC("tracepoint/xdp/xdp_exception")
 638 int trace_xdp_exception(struct xdp_exception_ctx *ctx)
 639 {
 640         struct datarec *rec;
 641         u32 key = 0;
 642
 643         rec = bpf_map_lookup_elem(&exception_cnt, &key);
 644         if (!rec)
 645                 return 1;
 646         rec->dropped += 1;
 647
 648         return 0;
 649 }
 650
 651 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
 652  * Code in:         kernel/include/trace/events/xdp.h
 653  */
 654 struct cpumap_enqueue_ctx {
 655         u64 __pad;              // First 8 bytes are not accessible by bpf code
 656         int map_id;             //      offset:8;  size:4; signed:1;
 657         u32 act;                //      offset:12; size:4; signed:0;
 658         int cpu;                //      offset:16; size:4; signed:1;
 659         unsigned int drops;     //      offset:20; size:4; signed:0;
 660         unsigned int processed; //      offset:24; size:4; signed:0;
 661         int to_cpu;             //      offset:28; size:4; signed:1;
 662 };
 663
 664 SEC("tracepoint/xdp/xdp_cpumap_enqueue")
 665 int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
 666 {
 667         u32 to_cpu = ctx->to_cpu;
 668         struct datarec *rec;
 669
 670         if (to_cpu >= MAX_CPUS)
 671                 return 1;
 672
 673         rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
 674         if (!rec)
 675                 return 0;
 676         rec->processed += ctx->processed;
 677         rec->dropped   += ctx->drops;
 678
 679         /* Record bulk events, then userspace can calc average bulk size */
 680         if (ctx->processed > 0)
 681                 rec->issue += 1;
 682
 683         /* Inception: It's possible to detect overload situations, via
 684          * this tracepoint.  This can be used for creating a feedback
 685          * loop to XDP, which can take appropriate actions to mitigate
 686          * this overload situation.
 687          */
 688         return 0;
 689 }
 690
 691 /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
 692  * Code in:         kernel/include/trace/events/xdp.h
 693  */
 694 struct cpumap_kthread_ctx {
 695         u64 __pad;              // First 8 bytes are not accessible by bpf code
 696         int map_id;             //      offset:8;  size:4; signed:1;
 697         u32 act;                //      offset:12; size:4; signed:0;
 698         int cpu;                //      offset:16; size:4; signed:1;
 699         unsigned int drops;     //      offset:20; size:4; signed:0;
 700         unsigned int processed; //      offset:24; size:4; signed:0;
 701         int sched;              //      offset:28; size:4; signed:1;
 702 };
 703
 704 SEC("tracepoint/xdp/xdp_cpumap_kthread")
 705 int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
 706 {
 707         struct datarec *rec;
 708         u32 key = 0;
 709
 710         rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
 711         if (!rec)
 712                 return 0;
 713         rec->processed += ctx->processed;
 714         rec->dropped   += ctx->drops;
 715
 716         /* Count times kthread yielded CPU via schedule call */
 717         if (ctx->sched)
 718                 rec->issue++;
 719
 720         return 0;
 721 }