]>
Commit | Line | Data |
---|---|---|
fad3917e JDB |
1 | /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) |
2 | * | |
3 | * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. | |
4 | */ | |
5 | #include <uapi/linux/if_ether.h> | |
6 | #include <uapi/linux/if_packet.h> | |
7 | #include <uapi/linux/if_vlan.h> | |
8 | #include <uapi/linux/ip.h> | |
9 | #include <uapi/linux/ipv6.h> | |
10 | #include <uapi/linux/in.h> | |
11 | #include <uapi/linux/tcp.h> | |
12 | #include <uapi/linux/udp.h> | |
13 | ||
14 | #include <uapi/linux/bpf.h> | |
15 | #include "bpf_helpers.h" | |
16 | ||
17 | #define MAX_CPUS 12 /* WARNING - sync with _user.c */ | |
18 | ||
19 | /* Special map type that can XDP_REDIRECT frames to another CPU */ | |
20 | struct bpf_map_def SEC("maps") cpu_map = { | |
21 | .type = BPF_MAP_TYPE_CPUMAP, | |
22 | .key_size = sizeof(u32), | |
23 | .value_size = sizeof(u32), | |
24 | .max_entries = MAX_CPUS, | |
25 | }; | |
26 | ||
27 | /* Common stats data record to keep userspace more simple */ | |
28 | struct datarec { | |
29 | __u64 processed; | |
30 | __u64 dropped; | |
31 | __u64 issue; | |
32 | }; | |
33 | ||
34 | /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success | |
35 | * feedback. Redirect TX errors can be caught via a tracepoint. | |
36 | */ | |
37 | struct bpf_map_def SEC("maps") rx_cnt = { | |
38 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
39 | .key_size = sizeof(u32), | |
40 | .value_size = sizeof(struct datarec), | |
41 | .max_entries = 1, | |
42 | }; | |
43 | ||
44 | /* Used by trace point */ | |
45 | struct bpf_map_def SEC("maps") redirect_err_cnt = { | |
46 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
47 | .key_size = sizeof(u32), | |
48 | .value_size = sizeof(struct datarec), | |
49 | .max_entries = 2, | |
50 | /* TODO: have entries for all possible errno's */ | |
51 | }; | |
52 | ||
53 | /* Used by trace point */ | |
54 | struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { | |
55 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
56 | .key_size = sizeof(u32), | |
57 | .value_size = sizeof(struct datarec), | |
58 | .max_entries = MAX_CPUS, | |
59 | }; | |
60 | ||
61 | /* Used by trace point */ | |
62 | struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { | |
63 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
64 | .key_size = sizeof(u32), | |
65 | .value_size = sizeof(struct datarec), | |
66 | .max_entries = 1, | |
67 | }; | |
68 | ||
69 | /* Set of maps controlling available CPU, and for iterating through | |
70 | * selectable redirect CPUs. | |
71 | */ | |
72 | struct bpf_map_def SEC("maps") cpus_available = { | |
73 | .type = BPF_MAP_TYPE_ARRAY, | |
74 | .key_size = sizeof(u32), | |
75 | .value_size = sizeof(u32), | |
76 | .max_entries = MAX_CPUS, | |
77 | }; | |
78 | struct bpf_map_def SEC("maps") cpus_count = { | |
79 | .type = BPF_MAP_TYPE_ARRAY, | |
80 | .key_size = sizeof(u32), | |
81 | .value_size = sizeof(u32), | |
82 | .max_entries = 1, | |
83 | }; | |
84 | struct bpf_map_def SEC("maps") cpus_iterator = { | |
85 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
86 | .key_size = sizeof(u32), | |
87 | .value_size = sizeof(u32), | |
88 | .max_entries = 1, | |
89 | }; | |
90 | ||
91 | /* Used by trace point */ | |
92 | struct bpf_map_def SEC("maps") exception_cnt = { | |
93 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
94 | .key_size = sizeof(u32), | |
95 | .value_size = sizeof(struct datarec), | |
96 | .max_entries = 1, | |
97 | }; | |
98 | ||
99 | /* Helper parse functions */ | |
100 | ||
101 | /* Parse Ethernet layer 2, extract network layer 3 offset and protocol | |
102 | * | |
103 | * Returns false on error and non-supported ether-type | |
104 | */ | |
105 | struct vlan_hdr { | |
106 | __be16 h_vlan_TCI; | |
107 | __be16 h_vlan_encapsulated_proto; | |
108 | }; | |
109 | ||
110 | static __always_inline | |
111 | bool parse_eth(struct ethhdr *eth, void *data_end, | |
112 | u16 *eth_proto, u64 *l3_offset) | |
113 | { | |
114 | u16 eth_type; | |
115 | u64 offset; | |
116 | ||
117 | offset = sizeof(*eth); | |
118 | if ((void *)eth + offset > data_end) | |
119 | return false; | |
120 | ||
121 | eth_type = eth->h_proto; | |
122 | ||
123 | /* Skip non 802.3 Ethertypes */ | |
124 | if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) | |
125 | return false; | |
126 | ||
127 | /* Handle VLAN tagged packet */ | |
128 | if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { | |
129 | struct vlan_hdr *vlan_hdr; | |
130 | ||
131 | vlan_hdr = (void *)eth + offset; | |
132 | offset += sizeof(*vlan_hdr); | |
133 | if ((void *)eth + offset > data_end) | |
134 | return false; | |
135 | eth_type = vlan_hdr->h_vlan_encapsulated_proto; | |
136 | } | |
137 | /* TODO: Handle double VLAN tagged packet */ | |
138 | ||
139 | *eth_proto = ntohs(eth_type); | |
140 | *l3_offset = offset; | |
141 | return true; | |
142 | } | |
143 | ||
144 | static __always_inline | |
145 | u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) | |
146 | { | |
147 | void *data_end = (void *)(long)ctx->data_end; | |
148 | void *data = (void *)(long)ctx->data; | |
149 | struct iphdr *iph = data + nh_off; | |
150 | struct udphdr *udph; | |
151 | u16 dport; | |
152 | ||
153 | if (iph + 1 > data_end) | |
154 | return 0; | |
155 | if (!(iph->protocol == IPPROTO_UDP)) | |
156 | return 0; | |
157 | ||
158 | udph = (void *)(iph + 1); | |
159 | if (udph + 1 > data_end) | |
160 | return 0; | |
161 | ||
162 | dport = ntohs(udph->dest); | |
163 | return dport; | |
164 | } | |
165 | ||
166 | static __always_inline | |
167 | int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) | |
168 | { | |
169 | void *data_end = (void *)(long)ctx->data_end; | |
170 | void *data = (void *)(long)ctx->data; | |
171 | struct iphdr *iph = data + nh_off; | |
172 | ||
173 | if (iph + 1 > data_end) | |
174 | return 0; | |
175 | return iph->protocol; | |
176 | } | |
177 | ||
178 | static __always_inline | |
179 | int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) | |
180 | { | |
181 | void *data_end = (void *)(long)ctx->data_end; | |
182 | void *data = (void *)(long)ctx->data; | |
183 | struct ipv6hdr *ip6h = data + nh_off; | |
184 | ||
185 | if (ip6h + 1 > data_end) | |
186 | return 0; | |
187 | return ip6h->nexthdr; | |
188 | } | |
189 | ||
190 | SEC("xdp_cpu_map0") | |
191 | int xdp_prognum0_no_touch(struct xdp_md *ctx) | |
192 | { | |
193 | void *data_end = (void *)(long)ctx->data_end; | |
194 | void *data = (void *)(long)ctx->data; | |
195 | struct datarec *rec; | |
196 | u32 *cpu_selected; | |
197 | u32 cpu_dest; | |
198 | u32 key = 0; | |
199 | ||
200 | /* Only use first entry in cpus_available */ | |
201 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); | |
202 | if (!cpu_selected) | |
203 | return XDP_ABORTED; | |
204 | cpu_dest = *cpu_selected; | |
205 | ||
206 | /* Count RX packet in map */ | |
207 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
208 | if (!rec) | |
209 | return XDP_ABORTED; | |
210 | rec->processed++; | |
211 | ||
212 | if (cpu_dest >= MAX_CPUS) { | |
213 | rec->issue++; | |
214 | return XDP_ABORTED; | |
215 | } | |
216 | ||
217 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
218 | } | |
219 | ||
220 | SEC("xdp_cpu_map1_touch_data") | |
221 | int xdp_prognum1_touch_data(struct xdp_md *ctx) | |
222 | { | |
223 | void *data_end = (void *)(long)ctx->data_end; | |
224 | void *data = (void *)(long)ctx->data; | |
225 | struct ethhdr *eth = data; | |
226 | struct datarec *rec; | |
227 | u32 *cpu_selected; | |
228 | u32 cpu_dest; | |
229 | u16 eth_type; | |
230 | u32 key = 0; | |
231 | ||
232 | /* Only use first entry in cpus_available */ | |
233 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); | |
234 | if (!cpu_selected) | |
235 | return XDP_ABORTED; | |
236 | cpu_dest = *cpu_selected; | |
237 | ||
238 | /* Validate packet length is minimum Eth header size */ | |
239 | if (eth + 1 > data_end) | |
240 | return XDP_ABORTED; | |
241 | ||
242 | /* Count RX packet in map */ | |
243 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
244 | if (!rec) | |
245 | return XDP_ABORTED; | |
246 | rec->processed++; | |
247 | ||
248 | /* Read packet data, and use it (drop non 802.3 Ethertypes) */ | |
249 | eth_type = eth->h_proto; | |
250 | if (ntohs(eth_type) < ETH_P_802_3_MIN) { | |
251 | rec->dropped++; | |
252 | return XDP_DROP; | |
253 | } | |
254 | ||
255 | if (cpu_dest >= MAX_CPUS) { | |
256 | rec->issue++; | |
257 | return XDP_ABORTED; | |
258 | } | |
259 | ||
260 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
261 | } | |
262 | ||
263 | SEC("xdp_cpu_map2_round_robin") | |
264 | int xdp_prognum2_round_robin(struct xdp_md *ctx) | |
265 | { | |
266 | void *data_end = (void *)(long)ctx->data_end; | |
267 | void *data = (void *)(long)ctx->data; | |
268 | struct ethhdr *eth = data; | |
269 | struct datarec *rec; | |
270 | u32 cpu_dest; | |
271 | u32 *cpu_lookup; | |
272 | u32 key0 = 0; | |
273 | ||
274 | u32 *cpu_selected; | |
275 | u32 *cpu_iterator; | |
276 | u32 *cpu_max; | |
277 | u32 cpu_idx; | |
278 | ||
279 | cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); | |
280 | if (!cpu_max) | |
281 | return XDP_ABORTED; | |
282 | ||
283 | cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); | |
284 | if (!cpu_iterator) | |
285 | return XDP_ABORTED; | |
286 | cpu_idx = *cpu_iterator; | |
287 | ||
288 | *cpu_iterator += 1; | |
289 | if (*cpu_iterator == *cpu_max) | |
290 | *cpu_iterator = 0; | |
291 | ||
292 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
293 | if (!cpu_selected) | |
294 | return XDP_ABORTED; | |
295 | cpu_dest = *cpu_selected; | |
296 | ||
297 | /* Count RX packet in map */ | |
298 | rec = bpf_map_lookup_elem(&rx_cnt, &key0); | |
299 | if (!rec) | |
300 | return XDP_ABORTED; | |
301 | rec->processed++; | |
302 | ||
303 | if (cpu_dest >= MAX_CPUS) { | |
304 | rec->issue++; | |
305 | return XDP_ABORTED; | |
306 | } | |
307 | ||
308 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
309 | } | |
310 | ||
311 | SEC("xdp_cpu_map3_proto_separate") | |
312 | int xdp_prognum3_proto_separate(struct xdp_md *ctx) | |
313 | { | |
314 | void *data_end = (void *)(long)ctx->data_end; | |
315 | void *data = (void *)(long)ctx->data; | |
316 | struct ethhdr *eth = data; | |
317 | u8 ip_proto = IPPROTO_UDP; | |
318 | struct datarec *rec; | |
319 | u16 eth_proto = 0; | |
320 | u64 l3_offset = 0; | |
321 | u32 cpu_dest = 0; | |
322 | u32 cpu_idx = 0; | |
323 | u32 *cpu_lookup; | |
324 | u32 key = 0; | |
325 | ||
326 | /* Count RX packet in map */ | |
327 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
328 | if (!rec) | |
329 | return XDP_ABORTED; | |
330 | rec->processed++; | |
331 | ||
332 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
333 | return XDP_PASS; /* Just skip */ | |
334 | ||
335 | /* Extract L4 protocol */ | |
336 | switch (eth_proto) { | |
337 | case ETH_P_IP: | |
338 | ip_proto = get_proto_ipv4(ctx, l3_offset); | |
339 | break; | |
340 | case ETH_P_IPV6: | |
341 | ip_proto = get_proto_ipv6(ctx, l3_offset); | |
342 | break; | |
343 | case ETH_P_ARP: | |
344 | cpu_idx = 0; /* ARP packet handled on separate CPU */ | |
345 | break; | |
346 | default: | |
347 | cpu_idx = 0; | |
348 | } | |
349 | ||
350 | /* Choose CPU based on L4 protocol */ | |
351 | switch (ip_proto) { | |
352 | case IPPROTO_ICMP: | |
353 | case IPPROTO_ICMPV6: | |
354 | cpu_idx = 2; | |
355 | break; | |
356 | case IPPROTO_TCP: | |
357 | cpu_idx = 0; | |
358 | break; | |
359 | case IPPROTO_UDP: | |
360 | cpu_idx = 1; | |
361 | break; | |
362 | default: | |
363 | cpu_idx = 0; | |
364 | } | |
365 | ||
366 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
367 | if (!cpu_lookup) | |
368 | return XDP_ABORTED; | |
369 | cpu_dest = *cpu_lookup; | |
370 | ||
371 | if (cpu_dest >= MAX_CPUS) { | |
372 | rec->issue++; | |
373 | return XDP_ABORTED; | |
374 | } | |
375 | ||
376 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
377 | } | |
378 | ||
379 | SEC("xdp_cpu_map4_ddos_filter_pktgen") | |
380 | int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) | |
381 | { | |
382 | void *data_end = (void *)(long)ctx->data_end; | |
383 | void *data = (void *)(long)ctx->data; | |
384 | struct ethhdr *eth = data; | |
385 | u8 ip_proto = IPPROTO_UDP; | |
386 | struct datarec *rec; | |
387 | u16 eth_proto = 0; | |
388 | u64 l3_offset = 0; | |
389 | u32 cpu_dest = 0; | |
390 | u32 cpu_idx = 0; | |
391 | u16 dest_port; | |
392 | u32 *cpu_lookup; | |
393 | u32 key = 0; | |
394 | ||
395 | /* Count RX packet in map */ | |
396 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
397 | if (!rec) | |
398 | return XDP_ABORTED; | |
399 | rec->processed++; | |
400 | ||
401 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
402 | return XDP_PASS; /* Just skip */ | |
403 | ||
404 | /* Extract L4 protocol */ | |
405 | switch (eth_proto) { | |
406 | case ETH_P_IP: | |
407 | ip_proto = get_proto_ipv4(ctx, l3_offset); | |
408 | break; | |
409 | case ETH_P_IPV6: | |
410 | ip_proto = get_proto_ipv6(ctx, l3_offset); | |
411 | break; | |
412 | case ETH_P_ARP: | |
413 | cpu_idx = 0; /* ARP packet handled on separate CPU */ | |
414 | break; | |
415 | default: | |
416 | cpu_idx = 0; | |
417 | } | |
418 | ||
419 | /* Choose CPU based on L4 protocol */ | |
420 | switch (ip_proto) { | |
421 | case IPPROTO_ICMP: | |
422 | case IPPROTO_ICMPV6: | |
423 | cpu_idx = 2; | |
424 | break; | |
425 | case IPPROTO_TCP: | |
426 | cpu_idx = 0; | |
427 | break; | |
428 | case IPPROTO_UDP: | |
429 | cpu_idx = 1; | |
430 | /* DDoS filter UDP port 9 (pktgen) */ | |
431 | dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); | |
432 | if (dest_port == 9) { | |
433 | if (rec) | |
434 | rec->dropped++; | |
435 | return XDP_DROP; | |
436 | } | |
437 | break; | |
438 | default: | |
439 | cpu_idx = 0; | |
440 | } | |
441 | ||
442 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
443 | if (!cpu_lookup) | |
444 | return XDP_ABORTED; | |
445 | cpu_dest = *cpu_lookup; | |
446 | ||
447 | if (cpu_dest >= MAX_CPUS) { | |
448 | rec->issue++; | |
449 | return XDP_ABORTED; | |
450 | } | |
451 | ||
452 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
453 | } | |
454 | ||
455 | ||
456 | char _license[] SEC("license") = "GPL"; | |
457 | ||
458 | /*** Trace point code ***/ | |
459 | ||
460 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format | |
461 | * Code in: kernel/include/trace/events/xdp.h | |
462 | */ | |
463 | struct xdp_redirect_ctx { | |
464 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
465 | int prog_id; // offset:8; size:4; signed:1; | |
466 | u32 act; // offset:12 size:4; signed:0; | |
467 | int ifindex; // offset:16 size:4; signed:1; | |
468 | int err; // offset:20 size:4; signed:1; | |
469 | int to_ifindex; // offset:24 size:4; signed:1; | |
470 | u32 map_id; // offset:28 size:4; signed:0; | |
471 | int map_index; // offset:32 size:4; signed:1; | |
472 | }; // offset:36 | |
473 | ||
474 | enum { | |
475 | XDP_REDIRECT_SUCCESS = 0, | |
476 | XDP_REDIRECT_ERROR = 1 | |
477 | }; | |
478 | ||
479 | static __always_inline | |
480 | int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) | |
481 | { | |
482 | u32 key = XDP_REDIRECT_ERROR; | |
483 | struct datarec *rec; | |
484 | int err = ctx->err; | |
485 | ||
486 | if (!err) | |
487 | key = XDP_REDIRECT_SUCCESS; | |
488 | ||
489 | rec = bpf_map_lookup_elem(&redirect_err_cnt, &key); | |
490 | if (!rec) | |
491 | return 0; | |
492 | rec->dropped += 1; | |
493 | ||
494 | return 0; /* Indicate event was filtered (no further processing)*/ | |
495 | /* | |
496 | * Returning 1 here would allow e.g. a perf-record tracepoint | |
497 | * to see and record these events, but it doesn't work well | |
498 | * in-practice as stopping perf-record also unload this | |
499 | * bpf_prog. Plus, there is additional overhead of doing so. | |
500 | */ | |
501 | } | |
502 | ||
503 | SEC("tracepoint/xdp/xdp_redirect_err") | |
504 | int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) | |
505 | { | |
506 | return xdp_redirect_collect_stat(ctx); | |
507 | } | |
508 | ||
509 | SEC("tracepoint/xdp/xdp_redirect_map_err") | |
510 | int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) | |
511 | { | |
512 | return xdp_redirect_collect_stat(ctx); | |
513 | } | |
514 | ||
515 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format | |
516 | * Code in: kernel/include/trace/events/xdp.h | |
517 | */ | |
518 | struct xdp_exception_ctx { | |
519 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
520 | int prog_id; // offset:8; size:4; signed:1; | |
521 | u32 act; // offset:12; size:4; signed:0; | |
522 | int ifindex; // offset:16; size:4; signed:1; | |
523 | }; | |
524 | ||
525 | SEC("tracepoint/xdp/xdp_exception") | |
526 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) | |
527 | { | |
528 | struct datarec *rec; | |
529 | u32 key = 0; | |
530 | ||
531 | rec = bpf_map_lookup_elem(&exception_cnt, &key); | |
532 | if (!rec) | |
533 | return 1; | |
534 | rec->dropped += 1; | |
535 | ||
536 | return 0; | |
537 | } | |
538 | ||
539 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format | |
540 | * Code in: kernel/include/trace/events/xdp.h | |
541 | */ | |
542 | struct cpumap_enqueue_ctx { | |
543 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
544 | int map_id; // offset:8; size:4; signed:1; | |
545 | u32 act; // offset:12; size:4; signed:0; | |
546 | int cpu; // offset:16; size:4; signed:1; | |
547 | unsigned int drops; // offset:20; size:4; signed:0; | |
548 | unsigned int processed; // offset:24; size:4; signed:0; | |
549 | int to_cpu; // offset:28; size:4; signed:1; | |
550 | }; | |
551 | ||
552 | SEC("tracepoint/xdp/xdp_cpumap_enqueue") | |
553 | int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) | |
554 | { | |
555 | u32 to_cpu = ctx->to_cpu; | |
556 | struct datarec *rec; | |
557 | ||
558 | if (to_cpu >= MAX_CPUS) | |
559 | return 1; | |
560 | ||
561 | rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); | |
562 | if (!rec) | |
563 | return 0; | |
564 | rec->processed += ctx->processed; | |
565 | rec->dropped += ctx->drops; | |
566 | ||
567 | /* Record bulk events, then userspace can calc average bulk size */ | |
568 | if (ctx->processed > 0) | |
569 | rec->issue += 1; | |
570 | ||
571 | /* Inception: It's possible to detect overload situations, via | |
572 | * this tracepoint. This can be used for creating a feedback | |
573 | * loop to XDP, which can take appropriate actions to mitigate | |
574 | * this overload situation. | |
575 | */ | |
576 | return 0; | |
577 | } | |
578 | ||
579 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format | |
580 | * Code in: kernel/include/trace/events/xdp.h | |
581 | */ | |
582 | struct cpumap_kthread_ctx { | |
583 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
584 | int map_id; // offset:8; size:4; signed:1; | |
585 | u32 act; // offset:12; size:4; signed:0; | |
586 | int cpu; // offset:16; size:4; signed:1; | |
587 | unsigned int drops; // offset:20; size:4; signed:0; | |
588 | unsigned int processed; // offset:24; size:4; signed:0; | |
589 | int sched; // offset:28; size:4; signed:1; | |
590 | }; | |
591 | ||
592 | SEC("tracepoint/xdp/xdp_cpumap_kthread") | |
593 | int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) | |
594 | { | |
595 | struct datarec *rec; | |
596 | u32 key = 0; | |
597 | ||
598 | rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); | |
599 | if (!rec) | |
600 | return 0; | |
601 | rec->processed += ctx->processed; | |
602 | rec->dropped += ctx->drops; | |
603 | ||
604 | /* Count times kthread yielded CPU via schedule call */ | |
605 | if (ctx->sched) | |
606 | rec->issue++; | |
607 | ||
608 | return 0; | |
609 | } |