]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - samples/bpf/xdp_redirect_cpu_kern.c
Merge tag 'arc-5.1-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc
[mirror_ubuntu-jammy-kernel.git] / samples / bpf / xdp_redirect_cpu_kern.c
CommitLineData
fad3917e
JDB
1/* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP)
2 *
3 * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
4 */
5#include <uapi/linux/if_ether.h>
6#include <uapi/linux/if_packet.h>
7#include <uapi/linux/if_vlan.h>
8#include <uapi/linux/ip.h>
9#include <uapi/linux/ipv6.h>
10#include <uapi/linux/in.h>
11#include <uapi/linux/tcp.h>
12#include <uapi/linux/udp.h>
13
14#include <uapi/linux/bpf.h>
15#include "bpf_helpers.h"
1bca4e6b 16#include "hash_func01.h"
fad3917e 17
37d7ff25 18#define MAX_CPUS 64 /* WARNING - sync with _user.c */
fad3917e
JDB
19
20/* Special map type that can XDP_REDIRECT frames to another CPU */
21struct bpf_map_def SEC("maps") cpu_map = {
22 .type = BPF_MAP_TYPE_CPUMAP,
23 .key_size = sizeof(u32),
24 .value_size = sizeof(u32),
25 .max_entries = MAX_CPUS,
26};
27
28/* Common stats data record to keep userspace more simple */
29struct datarec {
30 __u64 processed;
31 __u64 dropped;
32 __u64 issue;
33};
34
35/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
36 * feedback. Redirect TX errors can be caught via a tracepoint.
37 */
38struct bpf_map_def SEC("maps") rx_cnt = {
39 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
40 .key_size = sizeof(u32),
41 .value_size = sizeof(struct datarec),
42 .max_entries = 1,
43};
44
45/* Used by trace point */
46struct bpf_map_def SEC("maps") redirect_err_cnt = {
47 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
48 .key_size = sizeof(u32),
49 .value_size = sizeof(struct datarec),
50 .max_entries = 2,
51 /* TODO: have entries for all possible errno's */
52};
53
54/* Used by trace point */
55struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = {
56 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
57 .key_size = sizeof(u32),
58 .value_size = sizeof(struct datarec),
59 .max_entries = MAX_CPUS,
60};
61
62/* Used by trace point */
63struct bpf_map_def SEC("maps") cpumap_kthread_cnt = {
64 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
65 .key_size = sizeof(u32),
66 .value_size = sizeof(struct datarec),
67 .max_entries = 1,
68};
69
70/* Set of maps controlling available CPU, and for iterating through
71 * selectable redirect CPUs.
72 */
73struct bpf_map_def SEC("maps") cpus_available = {
74 .type = BPF_MAP_TYPE_ARRAY,
75 .key_size = sizeof(u32),
76 .value_size = sizeof(u32),
77 .max_entries = MAX_CPUS,
78};
79struct bpf_map_def SEC("maps") cpus_count = {
80 .type = BPF_MAP_TYPE_ARRAY,
81 .key_size = sizeof(u32),
82 .value_size = sizeof(u32),
83 .max_entries = 1,
84};
85struct bpf_map_def SEC("maps") cpus_iterator = {
86 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
87 .key_size = sizeof(u32),
88 .value_size = sizeof(u32),
89 .max_entries = 1,
90};
91
92/* Used by trace point */
93struct bpf_map_def SEC("maps") exception_cnt = {
94 .type = BPF_MAP_TYPE_PERCPU_ARRAY,
95 .key_size = sizeof(u32),
96 .value_size = sizeof(struct datarec),
97 .max_entries = 1,
98};
99
100/* Helper parse functions */
101
102/* Parse Ethernet layer 2, extract network layer 3 offset and protocol
103 *
104 * Returns false on error and non-supported ether-type
105 */
106struct vlan_hdr {
107 __be16 h_vlan_TCI;
108 __be16 h_vlan_encapsulated_proto;
109};
110
111static __always_inline
112bool parse_eth(struct ethhdr *eth, void *data_end,
113 u16 *eth_proto, u64 *l3_offset)
114{
115 u16 eth_type;
116 u64 offset;
117
118 offset = sizeof(*eth);
119 if ((void *)eth + offset > data_end)
120 return false;
121
122 eth_type = eth->h_proto;
123
124 /* Skip non 802.3 Ethertypes */
125 if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN))
126 return false;
127
128 /* Handle VLAN tagged packet */
129 if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
130 struct vlan_hdr *vlan_hdr;
131
132 vlan_hdr = (void *)eth + offset;
133 offset += sizeof(*vlan_hdr);
134 if ((void *)eth + offset > data_end)
135 return false;
136 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
137 }
d23b27c0
JDB
138 /* Handle double VLAN tagged packet */
139 if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) {
140 struct vlan_hdr *vlan_hdr;
141
142 vlan_hdr = (void *)eth + offset;
143 offset += sizeof(*vlan_hdr);
144 if ((void *)eth + offset > data_end)
145 return false;
146 eth_type = vlan_hdr->h_vlan_encapsulated_proto;
147 }
fad3917e
JDB
148
149 *eth_proto = ntohs(eth_type);
150 *l3_offset = offset;
151 return true;
152}
153
154static __always_inline
155u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off)
156{
157 void *data_end = (void *)(long)ctx->data_end;
158 void *data = (void *)(long)ctx->data;
159 struct iphdr *iph = data + nh_off;
160 struct udphdr *udph;
161 u16 dport;
162
163 if (iph + 1 > data_end)
164 return 0;
165 if (!(iph->protocol == IPPROTO_UDP))
166 return 0;
167
168 udph = (void *)(iph + 1);
169 if (udph + 1 > data_end)
170 return 0;
171
172 dport = ntohs(udph->dest);
173 return dport;
174}
175
176static __always_inline
177int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off)
178{
179 void *data_end = (void *)(long)ctx->data_end;
180 void *data = (void *)(long)ctx->data;
181 struct iphdr *iph = data + nh_off;
182
183 if (iph + 1 > data_end)
184 return 0;
185 return iph->protocol;
186}
187
188static __always_inline
189int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off)
190{
191 void *data_end = (void *)(long)ctx->data_end;
192 void *data = (void *)(long)ctx->data;
193 struct ipv6hdr *ip6h = data + nh_off;
194
195 if (ip6h + 1 > data_end)
196 return 0;
197 return ip6h->nexthdr;
198}
199
200SEC("xdp_cpu_map0")
201int xdp_prognum0_no_touch(struct xdp_md *ctx)
202{
203 void *data_end = (void *)(long)ctx->data_end;
204 void *data = (void *)(long)ctx->data;
205 struct datarec *rec;
206 u32 *cpu_selected;
207 u32 cpu_dest;
208 u32 key = 0;
209
210 /* Only use first entry in cpus_available */
211 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
212 if (!cpu_selected)
213 return XDP_ABORTED;
214 cpu_dest = *cpu_selected;
215
216 /* Count RX packet in map */
217 rec = bpf_map_lookup_elem(&rx_cnt, &key);
218 if (!rec)
219 return XDP_ABORTED;
220 rec->processed++;
221
222 if (cpu_dest >= MAX_CPUS) {
223 rec->issue++;
224 return XDP_ABORTED;
225 }
226
227 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
228}
229
230SEC("xdp_cpu_map1_touch_data")
231int xdp_prognum1_touch_data(struct xdp_md *ctx)
232{
233 void *data_end = (void *)(long)ctx->data_end;
234 void *data = (void *)(long)ctx->data;
235 struct ethhdr *eth = data;
236 struct datarec *rec;
237 u32 *cpu_selected;
238 u32 cpu_dest;
239 u16 eth_type;
240 u32 key = 0;
241
242 /* Only use first entry in cpus_available */
243 cpu_selected = bpf_map_lookup_elem(&cpus_available, &key);
244 if (!cpu_selected)
245 return XDP_ABORTED;
246 cpu_dest = *cpu_selected;
247
248 /* Validate packet length is minimum Eth header size */
249 if (eth + 1 > data_end)
250 return XDP_ABORTED;
251
252 /* Count RX packet in map */
253 rec = bpf_map_lookup_elem(&rx_cnt, &key);
254 if (!rec)
255 return XDP_ABORTED;
256 rec->processed++;
257
258 /* Read packet data, and use it (drop non 802.3 Ethertypes) */
259 eth_type = eth->h_proto;
260 if (ntohs(eth_type) < ETH_P_802_3_MIN) {
261 rec->dropped++;
262 return XDP_DROP;
263 }
264
265 if (cpu_dest >= MAX_CPUS) {
266 rec->issue++;
267 return XDP_ABORTED;
268 }
269
270 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
271}
272
273SEC("xdp_cpu_map2_round_robin")
274int xdp_prognum2_round_robin(struct xdp_md *ctx)
275{
276 void *data_end = (void *)(long)ctx->data_end;
277 void *data = (void *)(long)ctx->data;
278 struct ethhdr *eth = data;
279 struct datarec *rec;
280 u32 cpu_dest;
281 u32 *cpu_lookup;
282 u32 key0 = 0;
283
284 u32 *cpu_selected;
285 u32 *cpu_iterator;
286 u32 *cpu_max;
287 u32 cpu_idx;
288
289 cpu_max = bpf_map_lookup_elem(&cpus_count, &key0);
290 if (!cpu_max)
291 return XDP_ABORTED;
292
293 cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0);
294 if (!cpu_iterator)
295 return XDP_ABORTED;
296 cpu_idx = *cpu_iterator;
297
298 *cpu_iterator += 1;
299 if (*cpu_iterator == *cpu_max)
300 *cpu_iterator = 0;
301
302 cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
303 if (!cpu_selected)
304 return XDP_ABORTED;
305 cpu_dest = *cpu_selected;
306
307 /* Count RX packet in map */
308 rec = bpf_map_lookup_elem(&rx_cnt, &key0);
309 if (!rec)
310 return XDP_ABORTED;
311 rec->processed++;
312
313 if (cpu_dest >= MAX_CPUS) {
314 rec->issue++;
315 return XDP_ABORTED;
316 }
317
318 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
319}
320
321SEC("xdp_cpu_map3_proto_separate")
322int xdp_prognum3_proto_separate(struct xdp_md *ctx)
323{
324 void *data_end = (void *)(long)ctx->data_end;
325 void *data = (void *)(long)ctx->data;
326 struct ethhdr *eth = data;
327 u8 ip_proto = IPPROTO_UDP;
328 struct datarec *rec;
329 u16 eth_proto = 0;
330 u64 l3_offset = 0;
331 u32 cpu_dest = 0;
332 u32 cpu_idx = 0;
333 u32 *cpu_lookup;
334 u32 key = 0;
335
336 /* Count RX packet in map */
337 rec = bpf_map_lookup_elem(&rx_cnt, &key);
338 if (!rec)
339 return XDP_ABORTED;
340 rec->processed++;
341
342 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
343 return XDP_PASS; /* Just skip */
344
345 /* Extract L4 protocol */
346 switch (eth_proto) {
347 case ETH_P_IP:
348 ip_proto = get_proto_ipv4(ctx, l3_offset);
349 break;
350 case ETH_P_IPV6:
351 ip_proto = get_proto_ipv6(ctx, l3_offset);
352 break;
353 case ETH_P_ARP:
354 cpu_idx = 0; /* ARP packet handled on separate CPU */
355 break;
356 default:
357 cpu_idx = 0;
358 }
359
360 /* Choose CPU based on L4 protocol */
361 switch (ip_proto) {
362 case IPPROTO_ICMP:
363 case IPPROTO_ICMPV6:
364 cpu_idx = 2;
365 break;
366 case IPPROTO_TCP:
367 cpu_idx = 0;
368 break;
369 case IPPROTO_UDP:
370 cpu_idx = 1;
371 break;
372 default:
373 cpu_idx = 0;
374 }
375
376 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
377 if (!cpu_lookup)
378 return XDP_ABORTED;
379 cpu_dest = *cpu_lookup;
380
381 if (cpu_dest >= MAX_CPUS) {
382 rec->issue++;
383 return XDP_ABORTED;
384 }
385
386 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
387}
388
389SEC("xdp_cpu_map4_ddos_filter_pktgen")
390int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx)
391{
392 void *data_end = (void *)(long)ctx->data_end;
393 void *data = (void *)(long)ctx->data;
394 struct ethhdr *eth = data;
395 u8 ip_proto = IPPROTO_UDP;
396 struct datarec *rec;
397 u16 eth_proto = 0;
398 u64 l3_offset = 0;
399 u32 cpu_dest = 0;
400 u32 cpu_idx = 0;
401 u16 dest_port;
402 u32 *cpu_lookup;
403 u32 key = 0;
404
405 /* Count RX packet in map */
406 rec = bpf_map_lookup_elem(&rx_cnt, &key);
407 if (!rec)
408 return XDP_ABORTED;
409 rec->processed++;
410
411 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
412 return XDP_PASS; /* Just skip */
413
414 /* Extract L4 protocol */
415 switch (eth_proto) {
416 case ETH_P_IP:
417 ip_proto = get_proto_ipv4(ctx, l3_offset);
418 break;
419 case ETH_P_IPV6:
420 ip_proto = get_proto_ipv6(ctx, l3_offset);
421 break;
422 case ETH_P_ARP:
423 cpu_idx = 0; /* ARP packet handled on separate CPU */
424 break;
425 default:
426 cpu_idx = 0;
427 }
428
429 /* Choose CPU based on L4 protocol */
430 switch (ip_proto) {
431 case IPPROTO_ICMP:
432 case IPPROTO_ICMPV6:
433 cpu_idx = 2;
434 break;
435 case IPPROTO_TCP:
436 cpu_idx = 0;
437 break;
438 case IPPROTO_UDP:
439 cpu_idx = 1;
440 /* DDoS filter UDP port 9 (pktgen) */
441 dest_port = get_dest_port_ipv4_udp(ctx, l3_offset);
442 if (dest_port == 9) {
443 if (rec)
444 rec->dropped++;
445 return XDP_DROP;
446 }
447 break;
448 default:
449 cpu_idx = 0;
450 }
451
452 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
453 if (!cpu_lookup)
454 return XDP_ABORTED;
455 cpu_dest = *cpu_lookup;
456
457 if (cpu_dest >= MAX_CPUS) {
458 rec->issue++;
459 return XDP_ABORTED;
460 }
461
462 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
463}
464
1bca4e6b
JDB
465/* Hashing initval */
466#define INITVAL 15485863
467
468static __always_inline
469u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
470{
471 void *data_end = (void *)(long)ctx->data_end;
472 void *data = (void *)(long)ctx->data;
473 struct iphdr *iph = data + nh_off;
474 u32 cpu_hash;
475
476 if (iph + 1 > data_end)
477 return 0;
478
479 cpu_hash = iph->saddr + iph->daddr;
480 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol);
481
482 return cpu_hash;
483}
484
485static __always_inline
486u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off)
487{
488 void *data_end = (void *)(long)ctx->data_end;
489 void *data = (void *)(long)ctx->data;
490 struct ipv6hdr *ip6h = data + nh_off;
491 u32 cpu_hash;
492
493 if (ip6h + 1 > data_end)
494 return 0;
495
496 cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0];
497 cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1];
498 cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2];
499 cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3];
500 cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr);
501
502 return cpu_hash;
503}
504
505/* Load-Balance traffic based on hashing IP-addrs + L4-proto. The
506 * hashing scheme is symmetric, meaning swapping IP src/dest still hit
507 * same CPU.
508 */
509SEC("xdp_cpu_map5_lb_hash_ip_pairs")
510int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx)
511{
512 void *data_end = (void *)(long)ctx->data_end;
513 void *data = (void *)(long)ctx->data;
514 struct ethhdr *eth = data;
515 u8 ip_proto = IPPROTO_UDP;
516 struct datarec *rec;
517 u16 eth_proto = 0;
518 u64 l3_offset = 0;
519 u32 cpu_dest = 0;
520 u32 cpu_idx = 0;
521 u32 *cpu_lookup;
522 u32 *cpu_max;
523 u32 cpu_hash;
524 u32 key = 0;
525
526 /* Count RX packet in map */
527 rec = bpf_map_lookup_elem(&rx_cnt, &key);
528 if (!rec)
529 return XDP_ABORTED;
530 rec->processed++;
531
532 cpu_max = bpf_map_lookup_elem(&cpus_count, &key);
533 if (!cpu_max)
534 return XDP_ABORTED;
535
536 if (!(parse_eth(eth, data_end, &eth_proto, &l3_offset)))
537 return XDP_PASS; /* Just skip */
538
539 /* Hash for IPv4 and IPv6 */
540 switch (eth_proto) {
541 case ETH_P_IP:
542 cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset);
543 break;
544 case ETH_P_IPV6:
545 cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset);
546 break;
547 case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */
548 default:
549 cpu_hash = 0;
550 }
551
552 /* Choose CPU based on hash */
553 cpu_idx = cpu_hash % *cpu_max;
554
555 cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx);
556 if (!cpu_lookup)
557 return XDP_ABORTED;
558 cpu_dest = *cpu_lookup;
559
560 if (cpu_dest >= MAX_CPUS) {
561 rec->issue++;
562 return XDP_ABORTED;
563 }
564
565 return bpf_redirect_map(&cpu_map, cpu_dest, 0);
566}
fad3917e
JDB
567
568char _license[] SEC("license") = "GPL";
569
570/*** Trace point code ***/
571
572/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format
573 * Code in: kernel/include/trace/events/xdp.h
574 */
575struct xdp_redirect_ctx {
576 u64 __pad; // First 8 bytes are not accessible by bpf code
577 int prog_id; // offset:8; size:4; signed:1;
578 u32 act; // offset:12 size:4; signed:0;
579 int ifindex; // offset:16 size:4; signed:1;
580 int err; // offset:20 size:4; signed:1;
581 int to_ifindex; // offset:24 size:4; signed:1;
582 u32 map_id; // offset:28 size:4; signed:0;
583 int map_index; // offset:32 size:4; signed:1;
584}; // offset:36
585
586enum {
587 XDP_REDIRECT_SUCCESS = 0,
588 XDP_REDIRECT_ERROR = 1
589};
590
591static __always_inline
592int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx)
593{
594 u32 key = XDP_REDIRECT_ERROR;
595 struct datarec *rec;
596 int err = ctx->err;
597
598 if (!err)
599 key = XDP_REDIRECT_SUCCESS;
600
601 rec = bpf_map_lookup_elem(&redirect_err_cnt, &key);
602 if (!rec)
603 return 0;
604 rec->dropped += 1;
605
606 return 0; /* Indicate event was filtered (no further processing)*/
607 /*
608 * Returning 1 here would allow e.g. a perf-record tracepoint
609 * to see and record these events, but it doesn't work well
610 * in-practice as stopping perf-record also unload this
611 * bpf_prog. Plus, there is additional overhead of doing so.
612 */
613}
614
615SEC("tracepoint/xdp/xdp_redirect_err")
616int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx)
617{
618 return xdp_redirect_collect_stat(ctx);
619}
620
621SEC("tracepoint/xdp/xdp_redirect_map_err")
622int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx)
623{
624 return xdp_redirect_collect_stat(ctx);
625}
626
627/* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format
628 * Code in: kernel/include/trace/events/xdp.h
629 */
630struct xdp_exception_ctx {
631 u64 __pad; // First 8 bytes are not accessible by bpf code
632 int prog_id; // offset:8; size:4; signed:1;
633 u32 act; // offset:12; size:4; signed:0;
634 int ifindex; // offset:16; size:4; signed:1;
635};
636
637SEC("tracepoint/xdp/xdp_exception")
638int trace_xdp_exception(struct xdp_exception_ctx *ctx)
639{
640 struct datarec *rec;
641 u32 key = 0;
642
643 rec = bpf_map_lookup_elem(&exception_cnt, &key);
644 if (!rec)
645 return 1;
646 rec->dropped += 1;
647
648 return 0;
649}
650
651/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format
652 * Code in: kernel/include/trace/events/xdp.h
653 */
654struct cpumap_enqueue_ctx {
655 u64 __pad; // First 8 bytes are not accessible by bpf code
656 int map_id; // offset:8; size:4; signed:1;
657 u32 act; // offset:12; size:4; signed:0;
658 int cpu; // offset:16; size:4; signed:1;
659 unsigned int drops; // offset:20; size:4; signed:0;
660 unsigned int processed; // offset:24; size:4; signed:0;
661 int to_cpu; // offset:28; size:4; signed:1;
662};
663
664SEC("tracepoint/xdp/xdp_cpumap_enqueue")
665int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx)
666{
667 u32 to_cpu = ctx->to_cpu;
668 struct datarec *rec;
669
670 if (to_cpu >= MAX_CPUS)
671 return 1;
672
673 rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu);
674 if (!rec)
675 return 0;
676 rec->processed += ctx->processed;
677 rec->dropped += ctx->drops;
678
679 /* Record bulk events, then userspace can calc average bulk size */
680 if (ctx->processed > 0)
681 rec->issue += 1;
682
683 /* Inception: It's possible to detect overload situations, via
684 * this tracepoint. This can be used for creating a feedback
685 * loop to XDP, which can take appropriate actions to mitigate
686 * this overload situation.
687 */
688 return 0;
689}
690
691/* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format
692 * Code in: kernel/include/trace/events/xdp.h
693 */
694struct cpumap_kthread_ctx {
695 u64 __pad; // First 8 bytes are not accessible by bpf code
696 int map_id; // offset:8; size:4; signed:1;
697 u32 act; // offset:12; size:4; signed:0;
698 int cpu; // offset:16; size:4; signed:1;
699 unsigned int drops; // offset:20; size:4; signed:0;
700 unsigned int processed; // offset:24; size:4; signed:0;
701 int sched; // offset:28; size:4; signed:1;
702};
703
704SEC("tracepoint/xdp/xdp_cpumap_kthread")
705int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx)
706{
707 struct datarec *rec;
708 u32 key = 0;
709
710 rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key);
711 if (!rec)
712 return 0;
713 rec->processed += ctx->processed;
714 rec->dropped += ctx->drops;
715
716 /* Count times kthread yielded CPU via schedule call */
717 if (ctx->sched)
718 rec->issue++;
719
720 return 0;
721}