]>
Commit | Line | Data |
---|---|---|
fad3917e JDB |
1 | /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) |
2 | * | |
3 | * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. | |
4 | */ | |
5 | #include <uapi/linux/if_ether.h> | |
6 | #include <uapi/linux/if_packet.h> | |
7 | #include <uapi/linux/if_vlan.h> | |
8 | #include <uapi/linux/ip.h> | |
9 | #include <uapi/linux/ipv6.h> | |
10 | #include <uapi/linux/in.h> | |
11 | #include <uapi/linux/tcp.h> | |
12 | #include <uapi/linux/udp.h> | |
13 | ||
14 | #include <uapi/linux/bpf.h> | |
15 | #include "bpf_helpers.h" | |
1bca4e6b | 16 | #include "hash_func01.h" |
fad3917e | 17 | |
37d7ff25 | 18 | #define MAX_CPUS 64 /* WARNING - sync with _user.c */ |
fad3917e JDB |
19 | |
20 | /* Special map type that can XDP_REDIRECT frames to another CPU */ | |
21 | struct bpf_map_def SEC("maps") cpu_map = { | |
22 | .type = BPF_MAP_TYPE_CPUMAP, | |
23 | .key_size = sizeof(u32), | |
24 | .value_size = sizeof(u32), | |
25 | .max_entries = MAX_CPUS, | |
26 | }; | |
27 | ||
28 | /* Common stats data record to keep userspace more simple */ | |
29 | struct datarec { | |
30 | __u64 processed; | |
31 | __u64 dropped; | |
32 | __u64 issue; | |
33 | }; | |
34 | ||
35 | /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success | |
36 | * feedback. Redirect TX errors can be caught via a tracepoint. | |
37 | */ | |
38 | struct bpf_map_def SEC("maps") rx_cnt = { | |
39 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
40 | .key_size = sizeof(u32), | |
41 | .value_size = sizeof(struct datarec), | |
42 | .max_entries = 1, | |
43 | }; | |
44 | ||
45 | /* Used by trace point */ | |
46 | struct bpf_map_def SEC("maps") redirect_err_cnt = { | |
47 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
48 | .key_size = sizeof(u32), | |
49 | .value_size = sizeof(struct datarec), | |
50 | .max_entries = 2, | |
51 | /* TODO: have entries for all possible errno's */ | |
52 | }; | |
53 | ||
54 | /* Used by trace point */ | |
55 | struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { | |
56 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
57 | .key_size = sizeof(u32), | |
58 | .value_size = sizeof(struct datarec), | |
59 | .max_entries = MAX_CPUS, | |
60 | }; | |
61 | ||
62 | /* Used by trace point */ | |
63 | struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { | |
64 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
65 | .key_size = sizeof(u32), | |
66 | .value_size = sizeof(struct datarec), | |
67 | .max_entries = 1, | |
68 | }; | |
69 | ||
70 | /* Set of maps controlling available CPU, and for iterating through | |
71 | * selectable redirect CPUs. | |
72 | */ | |
73 | struct bpf_map_def SEC("maps") cpus_available = { | |
74 | .type = BPF_MAP_TYPE_ARRAY, | |
75 | .key_size = sizeof(u32), | |
76 | .value_size = sizeof(u32), | |
77 | .max_entries = MAX_CPUS, | |
78 | }; | |
79 | struct bpf_map_def SEC("maps") cpus_count = { | |
80 | .type = BPF_MAP_TYPE_ARRAY, | |
81 | .key_size = sizeof(u32), | |
82 | .value_size = sizeof(u32), | |
83 | .max_entries = 1, | |
84 | }; | |
85 | struct bpf_map_def SEC("maps") cpus_iterator = { | |
86 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
87 | .key_size = sizeof(u32), | |
88 | .value_size = sizeof(u32), | |
89 | .max_entries = 1, | |
90 | }; | |
91 | ||
92 | /* Used by trace point */ | |
93 | struct bpf_map_def SEC("maps") exception_cnt = { | |
94 | .type = BPF_MAP_TYPE_PERCPU_ARRAY, | |
95 | .key_size = sizeof(u32), | |
96 | .value_size = sizeof(struct datarec), | |
97 | .max_entries = 1, | |
98 | }; | |
99 | ||
100 | /* Helper parse functions */ | |
101 | ||
102 | /* Parse Ethernet layer 2, extract network layer 3 offset and protocol | |
103 | * | |
104 | * Returns false on error and non-supported ether-type | |
105 | */ | |
106 | struct vlan_hdr { | |
107 | __be16 h_vlan_TCI; | |
108 | __be16 h_vlan_encapsulated_proto; | |
109 | }; | |
110 | ||
111 | static __always_inline | |
112 | bool parse_eth(struct ethhdr *eth, void *data_end, | |
113 | u16 *eth_proto, u64 *l3_offset) | |
114 | { | |
115 | u16 eth_type; | |
116 | u64 offset; | |
117 | ||
118 | offset = sizeof(*eth); | |
119 | if ((void *)eth + offset > data_end) | |
120 | return false; | |
121 | ||
122 | eth_type = eth->h_proto; | |
123 | ||
124 | /* Skip non 802.3 Ethertypes */ | |
125 | if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) | |
126 | return false; | |
127 | ||
128 | /* Handle VLAN tagged packet */ | |
129 | if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { | |
130 | struct vlan_hdr *vlan_hdr; | |
131 | ||
132 | vlan_hdr = (void *)eth + offset; | |
133 | offset += sizeof(*vlan_hdr); | |
134 | if ((void *)eth + offset > data_end) | |
135 | return false; | |
136 | eth_type = vlan_hdr->h_vlan_encapsulated_proto; | |
137 | } | |
d23b27c0 JDB |
138 | /* Handle double VLAN tagged packet */ |
139 | if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { | |
140 | struct vlan_hdr *vlan_hdr; | |
141 | ||
142 | vlan_hdr = (void *)eth + offset; | |
143 | offset += sizeof(*vlan_hdr); | |
144 | if ((void *)eth + offset > data_end) | |
145 | return false; | |
146 | eth_type = vlan_hdr->h_vlan_encapsulated_proto; | |
147 | } | |
fad3917e JDB |
148 | |
149 | *eth_proto = ntohs(eth_type); | |
150 | *l3_offset = offset; | |
151 | return true; | |
152 | } | |
153 | ||
154 | static __always_inline | |
155 | u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) | |
156 | { | |
157 | void *data_end = (void *)(long)ctx->data_end; | |
158 | void *data = (void *)(long)ctx->data; | |
159 | struct iphdr *iph = data + nh_off; | |
160 | struct udphdr *udph; | |
161 | u16 dport; | |
162 | ||
163 | if (iph + 1 > data_end) | |
164 | return 0; | |
165 | if (!(iph->protocol == IPPROTO_UDP)) | |
166 | return 0; | |
167 | ||
168 | udph = (void *)(iph + 1); | |
169 | if (udph + 1 > data_end) | |
170 | return 0; | |
171 | ||
172 | dport = ntohs(udph->dest); | |
173 | return dport; | |
174 | } | |
175 | ||
176 | static __always_inline | |
177 | int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) | |
178 | { | |
179 | void *data_end = (void *)(long)ctx->data_end; | |
180 | void *data = (void *)(long)ctx->data; | |
181 | struct iphdr *iph = data + nh_off; | |
182 | ||
183 | if (iph + 1 > data_end) | |
184 | return 0; | |
185 | return iph->protocol; | |
186 | } | |
187 | ||
188 | static __always_inline | |
189 | int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) | |
190 | { | |
191 | void *data_end = (void *)(long)ctx->data_end; | |
192 | void *data = (void *)(long)ctx->data; | |
193 | struct ipv6hdr *ip6h = data + nh_off; | |
194 | ||
195 | if (ip6h + 1 > data_end) | |
196 | return 0; | |
197 | return ip6h->nexthdr; | |
198 | } | |
199 | ||
200 | SEC("xdp_cpu_map0") | |
201 | int xdp_prognum0_no_touch(struct xdp_md *ctx) | |
202 | { | |
203 | void *data_end = (void *)(long)ctx->data_end; | |
204 | void *data = (void *)(long)ctx->data; | |
205 | struct datarec *rec; | |
206 | u32 *cpu_selected; | |
207 | u32 cpu_dest; | |
208 | u32 key = 0; | |
209 | ||
210 | /* Only use first entry in cpus_available */ | |
211 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); | |
212 | if (!cpu_selected) | |
213 | return XDP_ABORTED; | |
214 | cpu_dest = *cpu_selected; | |
215 | ||
216 | /* Count RX packet in map */ | |
217 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
218 | if (!rec) | |
219 | return XDP_ABORTED; | |
220 | rec->processed++; | |
221 | ||
222 | if (cpu_dest >= MAX_CPUS) { | |
223 | rec->issue++; | |
224 | return XDP_ABORTED; | |
225 | } | |
226 | ||
227 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
228 | } | |
229 | ||
230 | SEC("xdp_cpu_map1_touch_data") | |
231 | int xdp_prognum1_touch_data(struct xdp_md *ctx) | |
232 | { | |
233 | void *data_end = (void *)(long)ctx->data_end; | |
234 | void *data = (void *)(long)ctx->data; | |
235 | struct ethhdr *eth = data; | |
236 | struct datarec *rec; | |
237 | u32 *cpu_selected; | |
238 | u32 cpu_dest; | |
239 | u16 eth_type; | |
240 | u32 key = 0; | |
241 | ||
242 | /* Only use first entry in cpus_available */ | |
243 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); | |
244 | if (!cpu_selected) | |
245 | return XDP_ABORTED; | |
246 | cpu_dest = *cpu_selected; | |
247 | ||
248 | /* Validate packet length is minimum Eth header size */ | |
249 | if (eth + 1 > data_end) | |
250 | return XDP_ABORTED; | |
251 | ||
252 | /* Count RX packet in map */ | |
253 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
254 | if (!rec) | |
255 | return XDP_ABORTED; | |
256 | rec->processed++; | |
257 | ||
258 | /* Read packet data, and use it (drop non 802.3 Ethertypes) */ | |
259 | eth_type = eth->h_proto; | |
260 | if (ntohs(eth_type) < ETH_P_802_3_MIN) { | |
261 | rec->dropped++; | |
262 | return XDP_DROP; | |
263 | } | |
264 | ||
265 | if (cpu_dest >= MAX_CPUS) { | |
266 | rec->issue++; | |
267 | return XDP_ABORTED; | |
268 | } | |
269 | ||
270 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
271 | } | |
272 | ||
273 | SEC("xdp_cpu_map2_round_robin") | |
274 | int xdp_prognum2_round_robin(struct xdp_md *ctx) | |
275 | { | |
276 | void *data_end = (void *)(long)ctx->data_end; | |
277 | void *data = (void *)(long)ctx->data; | |
278 | struct ethhdr *eth = data; | |
279 | struct datarec *rec; | |
280 | u32 cpu_dest; | |
281 | u32 *cpu_lookup; | |
282 | u32 key0 = 0; | |
283 | ||
284 | u32 *cpu_selected; | |
285 | u32 *cpu_iterator; | |
286 | u32 *cpu_max; | |
287 | u32 cpu_idx; | |
288 | ||
289 | cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); | |
290 | if (!cpu_max) | |
291 | return XDP_ABORTED; | |
292 | ||
293 | cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); | |
294 | if (!cpu_iterator) | |
295 | return XDP_ABORTED; | |
296 | cpu_idx = *cpu_iterator; | |
297 | ||
298 | *cpu_iterator += 1; | |
299 | if (*cpu_iterator == *cpu_max) | |
300 | *cpu_iterator = 0; | |
301 | ||
302 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
303 | if (!cpu_selected) | |
304 | return XDP_ABORTED; | |
305 | cpu_dest = *cpu_selected; | |
306 | ||
307 | /* Count RX packet in map */ | |
308 | rec = bpf_map_lookup_elem(&rx_cnt, &key0); | |
309 | if (!rec) | |
310 | return XDP_ABORTED; | |
311 | rec->processed++; | |
312 | ||
313 | if (cpu_dest >= MAX_CPUS) { | |
314 | rec->issue++; | |
315 | return XDP_ABORTED; | |
316 | } | |
317 | ||
318 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
319 | } | |
320 | ||
321 | SEC("xdp_cpu_map3_proto_separate") | |
322 | int xdp_prognum3_proto_separate(struct xdp_md *ctx) | |
323 | { | |
324 | void *data_end = (void *)(long)ctx->data_end; | |
325 | void *data = (void *)(long)ctx->data; | |
326 | struct ethhdr *eth = data; | |
327 | u8 ip_proto = IPPROTO_UDP; | |
328 | struct datarec *rec; | |
329 | u16 eth_proto = 0; | |
330 | u64 l3_offset = 0; | |
331 | u32 cpu_dest = 0; | |
332 | u32 cpu_idx = 0; | |
333 | u32 *cpu_lookup; | |
334 | u32 key = 0; | |
335 | ||
336 | /* Count RX packet in map */ | |
337 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
338 | if (!rec) | |
339 | return XDP_ABORTED; | |
340 | rec->processed++; | |
341 | ||
342 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
343 | return XDP_PASS; /* Just skip */ | |
344 | ||
345 | /* Extract L4 protocol */ | |
346 | switch (eth_proto) { | |
347 | case ETH_P_IP: | |
348 | ip_proto = get_proto_ipv4(ctx, l3_offset); | |
349 | break; | |
350 | case ETH_P_IPV6: | |
351 | ip_proto = get_proto_ipv6(ctx, l3_offset); | |
352 | break; | |
353 | case ETH_P_ARP: | |
354 | cpu_idx = 0; /* ARP packet handled on separate CPU */ | |
355 | break; | |
356 | default: | |
357 | cpu_idx = 0; | |
358 | } | |
359 | ||
360 | /* Choose CPU based on L4 protocol */ | |
361 | switch (ip_proto) { | |
362 | case IPPROTO_ICMP: | |
363 | case IPPROTO_ICMPV6: | |
364 | cpu_idx = 2; | |
365 | break; | |
366 | case IPPROTO_TCP: | |
367 | cpu_idx = 0; | |
368 | break; | |
369 | case IPPROTO_UDP: | |
370 | cpu_idx = 1; | |
371 | break; | |
372 | default: | |
373 | cpu_idx = 0; | |
374 | } | |
375 | ||
376 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
377 | if (!cpu_lookup) | |
378 | return XDP_ABORTED; | |
379 | cpu_dest = *cpu_lookup; | |
380 | ||
381 | if (cpu_dest >= MAX_CPUS) { | |
382 | rec->issue++; | |
383 | return XDP_ABORTED; | |
384 | } | |
385 | ||
386 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
387 | } | |
388 | ||
389 | SEC("xdp_cpu_map4_ddos_filter_pktgen") | |
390 | int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) | |
391 | { | |
392 | void *data_end = (void *)(long)ctx->data_end; | |
393 | void *data = (void *)(long)ctx->data; | |
394 | struct ethhdr *eth = data; | |
395 | u8 ip_proto = IPPROTO_UDP; | |
396 | struct datarec *rec; | |
397 | u16 eth_proto = 0; | |
398 | u64 l3_offset = 0; | |
399 | u32 cpu_dest = 0; | |
400 | u32 cpu_idx = 0; | |
401 | u16 dest_port; | |
402 | u32 *cpu_lookup; | |
403 | u32 key = 0; | |
404 | ||
405 | /* Count RX packet in map */ | |
406 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
407 | if (!rec) | |
408 | return XDP_ABORTED; | |
409 | rec->processed++; | |
410 | ||
411 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
412 | return XDP_PASS; /* Just skip */ | |
413 | ||
414 | /* Extract L4 protocol */ | |
415 | switch (eth_proto) { | |
416 | case ETH_P_IP: | |
417 | ip_proto = get_proto_ipv4(ctx, l3_offset); | |
418 | break; | |
419 | case ETH_P_IPV6: | |
420 | ip_proto = get_proto_ipv6(ctx, l3_offset); | |
421 | break; | |
422 | case ETH_P_ARP: | |
423 | cpu_idx = 0; /* ARP packet handled on separate CPU */ | |
424 | break; | |
425 | default: | |
426 | cpu_idx = 0; | |
427 | } | |
428 | ||
429 | /* Choose CPU based on L4 protocol */ | |
430 | switch (ip_proto) { | |
431 | case IPPROTO_ICMP: | |
432 | case IPPROTO_ICMPV6: | |
433 | cpu_idx = 2; | |
434 | break; | |
435 | case IPPROTO_TCP: | |
436 | cpu_idx = 0; | |
437 | break; | |
438 | case IPPROTO_UDP: | |
439 | cpu_idx = 1; | |
440 | /* DDoS filter UDP port 9 (pktgen) */ | |
441 | dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); | |
442 | if (dest_port == 9) { | |
443 | if (rec) | |
444 | rec->dropped++; | |
445 | return XDP_DROP; | |
446 | } | |
447 | break; | |
448 | default: | |
449 | cpu_idx = 0; | |
450 | } | |
451 | ||
452 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
453 | if (!cpu_lookup) | |
454 | return XDP_ABORTED; | |
455 | cpu_dest = *cpu_lookup; | |
456 | ||
457 | if (cpu_dest >= MAX_CPUS) { | |
458 | rec->issue++; | |
459 | return XDP_ABORTED; | |
460 | } | |
461 | ||
462 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
463 | } | |
464 | ||
1bca4e6b JDB |
465 | /* Hashing initval */ |
466 | #define INITVAL 15485863 | |
467 | ||
468 | static __always_inline | |
469 | u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) | |
470 | { | |
471 | void *data_end = (void *)(long)ctx->data_end; | |
472 | void *data = (void *)(long)ctx->data; | |
473 | struct iphdr *iph = data + nh_off; | |
474 | u32 cpu_hash; | |
475 | ||
476 | if (iph + 1 > data_end) | |
477 | return 0; | |
478 | ||
479 | cpu_hash = iph->saddr + iph->daddr; | |
480 | cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); | |
481 | ||
482 | return cpu_hash; | |
483 | } | |
484 | ||
485 | static __always_inline | |
486 | u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) | |
487 | { | |
488 | void *data_end = (void *)(long)ctx->data_end; | |
489 | void *data = (void *)(long)ctx->data; | |
490 | struct ipv6hdr *ip6h = data + nh_off; | |
491 | u32 cpu_hash; | |
492 | ||
493 | if (ip6h + 1 > data_end) | |
494 | return 0; | |
495 | ||
496 | cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0]; | |
497 | cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1]; | |
498 | cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2]; | |
499 | cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3]; | |
500 | cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); | |
501 | ||
502 | return cpu_hash; | |
503 | } | |
504 | ||
505 | /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The | |
506 | * hashing scheme is symmetric, meaning swapping IP src/dest still hit | |
507 | * same CPU. | |
508 | */ | |
509 | SEC("xdp_cpu_map5_lb_hash_ip_pairs") | |
510 | int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) | |
511 | { | |
512 | void *data_end = (void *)(long)ctx->data_end; | |
513 | void *data = (void *)(long)ctx->data; | |
514 | struct ethhdr *eth = data; | |
515 | u8 ip_proto = IPPROTO_UDP; | |
516 | struct datarec *rec; | |
517 | u16 eth_proto = 0; | |
518 | u64 l3_offset = 0; | |
519 | u32 cpu_dest = 0; | |
520 | u32 cpu_idx = 0; | |
521 | u32 *cpu_lookup; | |
522 | u32 *cpu_max; | |
523 | u32 cpu_hash; | |
524 | u32 key = 0; | |
525 | ||
526 | /* Count RX packet in map */ | |
527 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
528 | if (!rec) | |
529 | return XDP_ABORTED; | |
530 | rec->processed++; | |
531 | ||
532 | cpu_max = bpf_map_lookup_elem(&cpus_count, &key); | |
533 | if (!cpu_max) | |
534 | return XDP_ABORTED; | |
535 | ||
536 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
537 | return XDP_PASS; /* Just skip */ | |
538 | ||
539 | /* Hash for IPv4 and IPv6 */ | |
540 | switch (eth_proto) { | |
541 | case ETH_P_IP: | |
542 | cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); | |
543 | break; | |
544 | case ETH_P_IPV6: | |
545 | cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); | |
546 | break; | |
547 | case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ | |
548 | default: | |
549 | cpu_hash = 0; | |
550 | } | |
551 | ||
552 | /* Choose CPU based on hash */ | |
553 | cpu_idx = cpu_hash % *cpu_max; | |
554 | ||
555 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
556 | if (!cpu_lookup) | |
557 | return XDP_ABORTED; | |
558 | cpu_dest = *cpu_lookup; | |
559 | ||
560 | if (cpu_dest >= MAX_CPUS) { | |
561 | rec->issue++; | |
562 | return XDP_ABORTED; | |
563 | } | |
564 | ||
565 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
566 | } | |
fad3917e JDB |
567 | |
568 | char _license[] SEC("license") = "GPL"; | |
569 | ||
570 | /*** Trace point code ***/ | |
571 | ||
572 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format | |
573 | * Code in: kernel/include/trace/events/xdp.h | |
574 | */ | |
575 | struct xdp_redirect_ctx { | |
576 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
577 | int prog_id; // offset:8; size:4; signed:1; | |
578 | u32 act; // offset:12 size:4; signed:0; | |
579 | int ifindex; // offset:16 size:4; signed:1; | |
580 | int err; // offset:20 size:4; signed:1; | |
581 | int to_ifindex; // offset:24 size:4; signed:1; | |
582 | u32 map_id; // offset:28 size:4; signed:0; | |
583 | int map_index; // offset:32 size:4; signed:1; | |
584 | }; // offset:36 | |
585 | ||
586 | enum { | |
587 | XDP_REDIRECT_SUCCESS = 0, | |
588 | XDP_REDIRECT_ERROR = 1 | |
589 | }; | |
590 | ||
591 | static __always_inline | |
592 | int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) | |
593 | { | |
594 | u32 key = XDP_REDIRECT_ERROR; | |
595 | struct datarec *rec; | |
596 | int err = ctx->err; | |
597 | ||
598 | if (!err) | |
599 | key = XDP_REDIRECT_SUCCESS; | |
600 | ||
601 | rec = bpf_map_lookup_elem(&redirect_err_cnt, &key); | |
602 | if (!rec) | |
603 | return 0; | |
604 | rec->dropped += 1; | |
605 | ||
606 | return 0; /* Indicate event was filtered (no further processing)*/ | |
607 | /* | |
608 | * Returning 1 here would allow e.g. a perf-record tracepoint | |
609 | * to see and record these events, but it doesn't work well | |
610 | * in-practice as stopping perf-record also unload this | |
611 | * bpf_prog. Plus, there is additional overhead of doing so. | |
612 | */ | |
613 | } | |
614 | ||
615 | SEC("tracepoint/xdp/xdp_redirect_err") | |
616 | int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) | |
617 | { | |
618 | return xdp_redirect_collect_stat(ctx); | |
619 | } | |
620 | ||
621 | SEC("tracepoint/xdp/xdp_redirect_map_err") | |
622 | int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) | |
623 | { | |
624 | return xdp_redirect_collect_stat(ctx); | |
625 | } | |
626 | ||
627 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format | |
628 | * Code in: kernel/include/trace/events/xdp.h | |
629 | */ | |
630 | struct xdp_exception_ctx { | |
631 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
632 | int prog_id; // offset:8; size:4; signed:1; | |
633 | u32 act; // offset:12; size:4; signed:0; | |
634 | int ifindex; // offset:16; size:4; signed:1; | |
635 | }; | |
636 | ||
637 | SEC("tracepoint/xdp/xdp_exception") | |
638 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) | |
639 | { | |
640 | struct datarec *rec; | |
641 | u32 key = 0; | |
642 | ||
643 | rec = bpf_map_lookup_elem(&exception_cnt, &key); | |
644 | if (!rec) | |
645 | return 1; | |
646 | rec->dropped += 1; | |
647 | ||
648 | return 0; | |
649 | } | |
650 | ||
651 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format | |
652 | * Code in: kernel/include/trace/events/xdp.h | |
653 | */ | |
654 | struct cpumap_enqueue_ctx { | |
655 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
656 | int map_id; // offset:8; size:4; signed:1; | |
657 | u32 act; // offset:12; size:4; signed:0; | |
658 | int cpu; // offset:16; size:4; signed:1; | |
659 | unsigned int drops; // offset:20; size:4; signed:0; | |
660 | unsigned int processed; // offset:24; size:4; signed:0; | |
661 | int to_cpu; // offset:28; size:4; signed:1; | |
662 | }; | |
663 | ||
664 | SEC("tracepoint/xdp/xdp_cpumap_enqueue") | |
665 | int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) | |
666 | { | |
667 | u32 to_cpu = ctx->to_cpu; | |
668 | struct datarec *rec; | |
669 | ||
670 | if (to_cpu >= MAX_CPUS) | |
671 | return 1; | |
672 | ||
673 | rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); | |
674 | if (!rec) | |
675 | return 0; | |
676 | rec->processed += ctx->processed; | |
677 | rec->dropped += ctx->drops; | |
678 | ||
679 | /* Record bulk events, then userspace can calc average bulk size */ | |
680 | if (ctx->processed > 0) | |
681 | rec->issue += 1; | |
682 | ||
683 | /* Inception: It's possible to detect overload situations, via | |
684 | * this tracepoint. This can be used for creating a feedback | |
685 | * loop to XDP, which can take appropriate actions to mitigate | |
686 | * this overload situation. | |
687 | */ | |
688 | return 0; | |
689 | } | |
690 | ||
691 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format | |
692 | * Code in: kernel/include/trace/events/xdp.h | |
693 | */ | |
694 | struct cpumap_kthread_ctx { | |
695 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
696 | int map_id; // offset:8; size:4; signed:1; | |
697 | u32 act; // offset:12; size:4; signed:0; | |
698 | int cpu; // offset:16; size:4; signed:1; | |
699 | unsigned int drops; // offset:20; size:4; signed:0; | |
700 | unsigned int processed; // offset:24; size:4; signed:0; | |
701 | int sched; // offset:28; size:4; signed:1; | |
702 | }; | |
703 | ||
704 | SEC("tracepoint/xdp/xdp_cpumap_kthread") | |
705 | int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) | |
706 | { | |
707 | struct datarec *rec; | |
708 | u32 key = 0; | |
709 | ||
710 | rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); | |
711 | if (!rec) | |
712 | return 0; | |
713 | rec->processed += ctx->processed; | |
714 | rec->dropped += ctx->drops; | |
715 | ||
716 | /* Count times kthread yielded CPU via schedule call */ | |
717 | if (ctx->sched) | |
718 | rec->issue++; | |
719 | ||
720 | return 0; | |
721 | } |