]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - samples/bpf/xdp_redirect_cpu_kern.c
Merge tag 'tag-chrome-platform-for-v5.13' of git://git.kernel.org/pub/scm/linux/kerne...
[mirror_ubuntu-jammy-kernel.git] / samples / bpf / xdp_redirect_cpu_kern.c
CommitLineData
fad3917e
JDB
1/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2 *
3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4 */
5#include <uapi/linux/if_ether.h>
6#include <uapi/linux/if_packet.h>
7#include <uapi/linux/if_vlan.h>
8#include <uapi/linux/ip.h>
9#include <uapi/linux/ipv6.h>
10#include <uapi/linux/in.h>
11#include <uapi/linux/tcp.h>
12#include <uapi/linux/udp.h>
13
14#include <uapi/linux/bpf.h>
7cf245a3 15#include <bpf/bpf_helpers.h>
1bca4e6b 16#include "hash_func01.h"
fad3917e 17
6a098154 18#define MAX_CPUS NR_CPUS
fad3917e
JDB
19
20/* Special map type that can XDP_REDIRECT frames to another CPU */
451d1dc8
DL
21struct {
22 __uint(type, BPF_MAP_TYPE_CPUMAP);
23 __uint(key_size, sizeof(u32));
ce4dade7 24 __uint(value_size, sizeof(struct bpf_cpumap_val));
451d1dc8
DL
25 __uint(max_entries, MAX_CPUS);
26} cpu_map SEC(".maps");
fad3917e
JDB
27
28/* Common stats data record to keep userspace more simple */
29struct datarec {
30 __u64 processed;
31 __u64 dropped;
32 __u64 issue;
ce4dade7
LB
33 __u64 xdp_pass;
34 __u64 xdp_drop;
35 __u64 xdp_redirect;
fad3917e
JDB
36};
37
38/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
39 * feedback. Redirect TX errors can be caught via a tracepoint.
40 */
451d1dc8
DL
41struct {
42 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
43 __type(key, u32);
44 __type(value, struct datarec);
45 __uint(max_entries, 1);
46} rx_cnt SEC(".maps");
fad3917e
JDB
47
48/* Used by trace point */
451d1dc8
DL
49struct {
50 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
51 __type(key, u32);
52 __type(value, struct datarec);
53 __uint(max_entries, 2);
fad3917e 54 /* TODO: have entries for all possible errno's */
451d1dc8 55} redirect_err_cnt SEC(".maps");
fad3917e
JDB
56
57/* Used by trace point */
451d1dc8
DL
58struct {
59 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
60 __type(key, u32);
61 __type(value, struct datarec);
62 __uint(max_entries, MAX_CPUS);
63} cpumap_enqueue_cnt SEC(".maps");
fad3917e
JDB
64
65/* Used by trace point */
451d1dc8
DL
66struct {
67 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
68 __type(key, u32);
69 __type(value, struct datarec);
70 __uint(max_entries, 1);
71} cpumap_kthread_cnt SEC(".maps");
fad3917e
JDB
72
73/* Set of maps controlling available CPU, and for iterating through
74 * selectable redirect CPUs.
75 */
451d1dc8
DL
76struct {
77 __uint(type, BPF_MAP_TYPE_ARRAY);
78 __type(key, u32);
79 __type(value, u32);
80 __uint(max_entries, MAX_CPUS);
81} cpus_available SEC(".maps");
82struct {
83 __uint(type, BPF_MAP_TYPE_ARRAY);
84 __type(key, u32);
85 __type(value, u32);
86 __uint(max_entries, 1);
87} cpus_count SEC(".maps");
88struct {
89 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
90 __type(key, u32);
91 __type(value, u32);
92 __uint(max_entries, 1);
93} cpus_iterator SEC(".maps");
fad3917e
JDB
94
95/* Used by trace point */
451d1dc8
DL
96struct {
97 __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
98 __type(key, u32);
99 __type(value, struct datarec);
100 __uint(max_entries, 1);
101} exception_cnt SEC(".maps");
fad3917e
JDB
102
103/* Helper parse functions */
104
105/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
106 *
107 * Returns false on error and non-supported ether-type
108 */
109struct vlan_hdr {
110 __be16 h_vlan_TCI;
111 __be16 h_vlan_encapsulated_proto;
112};
113
114static __always_inline
115bool parse_eth(struct ethhdr *eth, void *data_end,
116 u16 *eth_proto, u64 *l3_offset)
117{
118 u16 eth_type;
119 u64 offset;
120
121 offset = sizeof(*eth);
122 if ((void *)eth + offset > data_end)
123 return false;
124
125 eth_type = eth->h_proto;
126
127 /* Skip non 802.3 Ethertypes */
128 if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
129 return false;
130
131 /* Handle VLAN tagged packet */
132 if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
133 struct vlan_hdr *vlan_hdr;
134
135 vlan_hdr = (void *)eth + offset;
136 offset += sizeof(*vlan_hdr);
137 if ((void *)eth + offset > data_end)
138 return false;
139 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
140 }
d23b27c0
JDB
141 /* Handle double VLAN tagged packet */
142 if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
143 struct vlan_hdr *vlan_hdr;
144
145 vlan_hdr = (void *)eth + offset;
146 offset += sizeof(*vlan_hdr);
147 if ((void *)eth + offset > data_end)
148 return false;
149 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
150 }
fad3917e
JDB
151
152 *eth_proto = ntohs(eth_type);
153 *l3_offset = offset;
154 return true;
155}
156
157static __always_inline
158u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
159{
160 void *data_end = (void *)(long)ctx->data_end;
161 void *data = (void *)(long)ctx->data;
162 struct iphdr *iph = data + nh_off;
163 struct udphdr *udph;
164 u16 dport;
165
166 if (iph + 1 > data_end)
167 return 0;
168 if (!(iph->protocol == IPPROTO_UDP))
169 return 0;
170
171 udph = (void *)(iph + 1);
172 if (udph + 1 > data_end)
173 return 0;
174
175 dport = ntohs(udph->dest);
176 return dport;
177}
178
179static __always_inline
180int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
181{
182 void *data_end = (void *)(long)ctx->data_end;
183 void *data = (void *)(long)ctx->data;
184 struct iphdr *iph = data + nh_off;
185
186 if (iph + 1 > data_end)
187 return 0;
188 return iph->protocol;
189}
190
191static __always_inline
192int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
193{
194 void *data_end = (void *)(long)ctx->data_end;
195 void *data = (void *)(long)ctx->data;
196 struct ipv6hdr *ip6h = data + nh_off;
197
198 if (ip6h + 1 > data_end)
199 return 0;
200 return ip6h->nexthdr;
201}
202
203SEC("xdp_cpu_map0")
204int xdp_prognum0_no_touch(struct xdp_md *ctx)
205{
206 void *data_end = (void *)(long)ctx->data_end;
207 void *data = (void *)(long)ctx->data;
208 struct datarec *rec;
209 u32 *cpu_selected;
210 u32 cpu_dest;
211 u32 key = 0;
212
213 /* Only use first entry in cpus_available */
214 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
215 if (!cpu_selected)
216 return XDP_ABORTED;
217 cpu_dest = *cpu_selected;
218
219 /* Count RX packet in map */
220 rec = bpf_map_lookup_elem(&rx_cnt, &key);
221 if (!rec)
222 return XDP_ABORTED;
223 rec->processed++;
224
225 if (cpu_dest >= MAX_CPUS) {
226 rec->issue++;
227 return XDP_ABORTED;
228 }
229
230 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
231}
232
233SEC("xdp_cpu_map1_touch_data")
234int xdp_prognum1_touch_data(struct xdp_md *ctx)
235{
236 void *data_end = (void *)(long)ctx->data_end;
237 void *data = (void *)(long)ctx->data;
238 struct ethhdr *eth = data;
239 struct datarec *rec;
240 u32 *cpu_selected;
241 u32 cpu_dest;
242 u16 eth_type;
243 u32 key = 0;
244
245 /* Only use first entry in cpus_available */
246 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
247 if (!cpu_selected)
248 return XDP_ABORTED;
249 cpu_dest = *cpu_selected;
250
251 /* Validate packet length is minimum Eth header size */
252 if (eth + 1 > data_end)
253 return XDP_ABORTED;
254
255 /* Count RX packet in map */
256 rec = bpf_map_lookup_elem(&rx_cnt, &key);
257 if (!rec)
258 return XDP_ABORTED;
259 rec->processed++;
260
261 /* Read packet data, and use it (drop non 802.3 Ethertypes) */
262 eth_type = eth->h_proto;
263 if (ntohs(eth_type) < ETH_P_802_3_MIN) {
264 rec->dropped++;
265 return XDP_DROP;
266 }
267
268 if (cpu_dest >= MAX_CPUS) {
269 rec->issue++;
270 return XDP_ABORTED;
271 }
272
273 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
274}
275
276SEC("xdp_cpu_map2_round_robin")
277int xdp_prognum2_round_robin(struct xdp_md *ctx)
278{
279 void *data_end = (void *)(long)ctx->data_end;
280 void *data = (void *)(long)ctx->data;
281 struct ethhdr *eth = data;
282 struct datarec *rec;
283 u32 cpu_dest;
284 u32 *cpu_lookup;
285 u32 key0 = 0;
286
287 u32 *cpu_selected;
288 u32 *cpu_iterator;
289 u32 *cpu_max;
290 u32 cpu_idx;
291
292 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
293 if (!cpu_max)
294 return XDP_ABORTED;
295
296 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
297 if (!cpu_iterator)
298 return XDP_ABORTED;
299 cpu_idx = *cpu_iterator;
300
301 *cpu_iterator += 1;
302 if (*cpu_iterator == *cpu_max)
303 *cpu_iterator = 0;
304
305 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
306 if (!cpu_selected)
307 return XDP_ABORTED;
308 cpu_dest = *cpu_selected;
309
310 /* Count RX packet in map */
311 rec = bpf_map_lookup_elem(&rx_cnt, &key0);
312 if (!rec)
313 return XDP_ABORTED;
314 rec->processed++;
315
316 if (cpu_dest >= MAX_CPUS) {
317 rec->issue++;
318 return XDP_ABORTED;
319 }
320
321 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
322}
323
324SEC("xdp_cpu_map3_proto_separate")
325int xdp_prognum3_proto_separate(struct xdp_md *ctx)
326{
327 void *data_end = (void *)(long)ctx->data_end;
328 void *data = (void *)(long)ctx->data;
329 struct ethhdr *eth = data;
330 u8 ip_proto = IPPROTO_UDP;
331 struct datarec *rec;
332 u16 eth_proto = 0;
333 u64 l3_offset = 0;
334 u32 cpu_dest = 0;
335 u32 cpu_idx = 0;
336 u32 *cpu_lookup;
337 u32 key = 0;
338
339 /* Count RX packet in map */
340 rec = bpf_map_lookup_elem(&rx_cnt, &key);
341 if (!rec)
342 return XDP_ABORTED;
343 rec->processed++;
344
345 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
346 return XDP_PASS; /* Just skip */
347
348 /* Extract L4 protocol */
349 switch (eth_proto) {
350 case ETH_P_IP:
351 ip_proto = get_proto_ipv4(ctx, l3_offset);
352 break;
353 case ETH_P_IPV6:
354 ip_proto = get_proto_ipv6(ctx, l3_offset);
355 break;
356 case ETH_P_ARP:
357 cpu_idx = 0; /* ARP packet handled on separate CPU */
358 break;
359 default:
360 cpu_idx = 0;
361 }
362
363 /* Choose CPU based on L4 protocol */
364 switch (ip_proto) {
365 case IPPROTO_ICMP:
366 case IPPROTO_ICMPV6:
367 cpu_idx = 2;
368 break;
369 case IPPROTO_TCP:
370 cpu_idx = 0;
371 break;
372 case IPPROTO_UDP:
373 cpu_idx = 1;
374 break;
375 default:
376 cpu_idx = 0;
377 }
378
379 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
380 if (!cpu_lookup)
381 return XDP_ABORTED;
382 cpu_dest = *cpu_lookup;
383
384 if (cpu_dest >= MAX_CPUS) {
385 rec->issue++;
386 return XDP_ABORTED;
387 }
388
389 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
390}
391
392SEC("xdp_cpu_map4_ddos_filter_pktgen")
393int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
394{
395 void *data_end = (void *)(long)ctx->data_end;
396 void *data = (void *)(long)ctx->data;
397 struct ethhdr *eth = data;
398 u8 ip_proto = IPPROTO_UDP;
399 struct datarec *rec;
400 u16 eth_proto = 0;
401 u64 l3_offset = 0;
402 u32 cpu_dest = 0;
403 u32 cpu_idx = 0;
404 u16 dest_port;
405 u32 *cpu_lookup;
406 u32 key = 0;
407
408 /* Count RX packet in map */
409 rec = bpf_map_lookup_elem(&rx_cnt, &key);
410 if (!rec)
411 return XDP_ABORTED;
412 rec->processed++;
413
414 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
415 return XDP_PASS; /* Just skip */
416
417 /* Extract L4 protocol */
418 switch (eth_proto) {
419 case ETH_P_IP:
420 ip_proto = get_proto_ipv4(ctx, l3_offset);
421 break;
422 case ETH_P_IPV6:
423 ip_proto = get_proto_ipv6(ctx, l3_offset);
424 break;
425 case ETH_P_ARP:
426 cpu_idx = 0; /* ARP packet handled on separate CPU */
427 break;
428 default:
429 cpu_idx = 0;
430 }
431
432 /* Choose CPU based on L4 protocol */
433 switch (ip_proto) {
434 case IPPROTO_ICMP:
435 case IPPROTO_ICMPV6:
436 cpu_idx = 2;
437 break;
438 case IPPROTO_TCP:
439 cpu_idx = 0;
440 break;
441 case IPPROTO_UDP:
442 cpu_idx = 1;
443 /* DDoS filter UDP port 9 (pktgen) */
444 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
445 if (dest_port == 9) {
446 if (rec)
447 rec->dropped++;
448 return XDP_DROP;
449 }
450 break;
451 default:
452 cpu_idx = 0;
453 }
454
455 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
456 if (!cpu_lookup)
457 return XDP_ABORTED;
458 cpu_dest = *cpu_lookup;
459
460 if (cpu_dest >= MAX_CPUS) {
461 rec->issue++;
462 return XDP_ABORTED;
463 }
464
465 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
466}
467
1bca4e6b
JDB
468/* Hashing initval */
469#define INITVAL 15485863
470
471static __always_inline
472u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
473{
474 void *data_end = (void *)(long)ctx->data_end;
475 void *data = (void *)(long)ctx->data;
476 struct iphdr *iph = data + nh_off;
477 u32 cpu_hash;
478
479 if (iph + 1 > data_end)
480 return 0;
481
482 cpu_hash = iph->saddr + iph->daddr;
483 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
484
485 return cpu_hash;
486}
487
488static __always_inline
489u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
490{
491 void *data_end = (void *)(long)ctx->data_end;
492 void *data = (void *)(long)ctx->data;
493 struct ipv6hdr *ip6h = data + nh_off;
494 u32 cpu_hash;
495
496 if (ip6h + 1 > data_end)
497 return 0;
498
499 cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
500 cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
501 cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
502 cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
503 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
504
505 return cpu_hash;
506}
507
508/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
509 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
510 * same CPU.
511 */
512SEC("xdp_cpu_map5_lb_hash_ip_pairs")
513int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
514{
515 void *data_end = (void *)(long)ctx->data_end;
516 void *data = (void *)(long)ctx->data;
517 struct ethhdr *eth = data;
518 u8 ip_proto = IPPROTO_UDP;
519 struct datarec *rec;
520 u16 eth_proto = 0;
521 u64 l3_offset = 0;
522 u32 cpu_dest = 0;
523 u32 cpu_idx = 0;
524 u32 *cpu_lookup;
525 u32 *cpu_max;
526 u32 cpu_hash;
527 u32 key = 0;
528
529 /* Count RX packet in map */
530 rec = bpf_map_lookup_elem(&rx_cnt, &key);
531 if (!rec)
532 return XDP_ABORTED;
533 rec->processed++;
534
535 cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
536 if (!cpu_max)
537 return XDP_ABORTED;
538
539 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
540 return XDP_PASS; /* Just skip */
541
542 /* Hash for IPv4 and IPv6 */
543 switch (eth_proto) {
544 case ETH_P_IP:
545 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
546 break;
547 case ETH_P_IPV6:
548 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
549 break;
550 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
551 default:
552 cpu_hash = 0;
553 }
554
555 /* Choose CPU based on hash */
556 cpu_idx = cpu_hash % *cpu_max;
557
558 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
559 if (!cpu_lookup)
560 return XDP_ABORTED;
561 cpu_dest = *cpu_lookup;
562
563 if (cpu_dest >= MAX_CPUS) {
564 rec->issue++;
565 return XDP_ABORTED;
566 }
567
568 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
569}
fad3917e
JDB
570
571char _license[] SEC("license") = "GPL";
572
573/*** Trace point code ***/
574
575/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
576 * Code in: kernel/include/trace/events/xdp.h
577 */
578struct xdp_redirect_ctx {
579 u64 __pad; // First 8 bytes are not accessible by bpf code
580 int prog_id; // offset:8; size:4; signed:1;
581 u32 act; // offset:12 size:4; signed:0;
582 int ifindex; // offset:16 size:4; signed:1;
583 int err; // offset:20 size:4; signed:1;
584 int to_ifindex; // offset:24 size:4; signed:1;
585 u32 map_id; // offset:28 size:4; signed:0;
586 int map_index; // offset:32 size:4; signed:1;
587}; // offset:36
588
589enum {
590 XDP_REDIRECT_SUCCESS = 0,
591 XDP_REDIRECT_ERROR = 1
592};
593
594static __always_inline
595int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
596{
597 u32 key = XDP_REDIRECT_ERROR;
598 struct datarec *rec;
599 int err = ctx->err;
600
601 if (!err)
602 key = XDP_REDIRECT_SUCCESS;
603
604 rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
605 if (!rec)
606 return 0;
607 rec->dropped += 1;
608
609 return 0; /* Indicate event was filtered (no further processing)*/
610 /*
611 * Returning 1 here would allow e.g. a perf-record tracepoint
612 * to see and record these events, but it doesn't work well
613 * in-practice as stopping perf-record also unload this
614 * bpf_prog. Plus, there is additional overhead of doing so.
615 */
616}
617
618SEC("tracepoint/xdp/xdp_redirect_err")
619int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
620{
621 return xdp_redirect_collect_stat(ctx);
622}
623
624SEC("tracepoint/xdp/xdp_redirect_map_err")
625int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
626{
627 return xdp_redirect_collect_stat(ctx);
628}
629
630/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
631 * Code in: kernel/include/trace/events/xdp.h
632 */
633struct xdp_exception_ctx {
634 u64 __pad; // First 8 bytes are not accessible by bpf code
635 int prog_id; // offset:8; size:4; signed:1;
636 u32 act; // offset:12; size:4; signed:0;
637 int ifindex; // offset:16; size:4; signed:1;
638};
639
640SEC("tracepoint/xdp/xdp_exception")
641int trace_xdp_exception(struct xdp_exception_ctx *ctx)
642{
643 struct datarec *rec;
644 u32 key = 0;
645
646 rec = bpf_map_lookup_elem(&exception_cnt, &key);
647 if (!rec)
648 return 1;
649 rec->dropped += 1;
650
651 return 0;
652}
653
654/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
655 * Code in: kernel/include/trace/events/xdp.h
656 */
657struct cpumap_enqueue_ctx {
658 u64 __pad; // First 8 bytes are not accessible by bpf code
659 int map_id; // offset:8; size:4; signed:1;
660 u32 act; // offset:12; size:4; signed:0;
661 int cpu; // offset:16; size:4; signed:1;
662 unsigned int drops; // offset:20; size:4; signed:0;
663 unsigned int processed; // offset:24; size:4; signed:0;
664 int to_cpu; // offset:28; size:4; signed:1;
665};
666
667SEC("tracepoint/xdp/xdp_cpumap_enqueue")
668int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
669{
670 u32 to_cpu = ctx->to_cpu;
671 struct datarec *rec;
672
673 if (to_cpu >= MAX_CPUS)
674 return 1;
675
676 rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
677 if (!rec)
678 return 0;
679 rec->processed += ctx->processed;
680 rec->dropped += ctx->drops;
681
682 /* Record bulk events, then userspace can calc average bulk size */
683 if (ctx->processed > 0)
684 rec->issue += 1;
685
686 /* Inception: It's possible to detect overload situations, via
687 * this tracepoint. This can be used for creating a feedback
688 * loop to XDP, which can take appropriate actions to mitigate
689 * this overload situation.
690 */
691 return 0;
692}
693
694/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
695 * Code in: kernel/include/trace/events/xdp.h
696 */
697struct cpumap_kthread_ctx {
ce4dade7
LB
698 u64 __pad; // First 8 bytes are not accessible
699 int map_id; // offset:8; size:4; signed:1;
700 u32 act; // offset:12; size:4; signed:0;
701 int cpu; // offset:16; size:4; signed:1;
702 unsigned int drops; // offset:20; size:4; signed:0;
703 unsigned int processed; // offset:24; size:4; signed:0;
704 int sched; // offset:28; size:4; signed:1;
705 unsigned int xdp_pass; // offset:32; size:4; signed:0;
706 unsigned int xdp_drop; // offset:36; size:4; signed:0;
707 unsigned int xdp_redirect; // offset:40; size:4; signed:0;
fad3917e
JDB
708};
709
710SEC("tracepoint/xdp/xdp_cpumap_kthread")
711int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
712{
713 struct datarec *rec;
714 u32 key = 0;
715
716 rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
717 if (!rec)
718 return 0;
719 rec->processed += ctx->processed;
720 rec->dropped += ctx->drops;
ce4dade7
LB
721 rec->xdp_pass += ctx->xdp_pass;
722 rec->xdp_drop += ctx->xdp_drop;
723 rec->xdp_redirect += ctx->xdp_redirect;
fad3917e
JDB
724
725 /* Count times kthread yielded CPU via schedule call */
726 if (ctx->sched)
727 rec->issue++;
728
729 return 0;
730}