]>
Commit | Line | Data |
---|---|---|
fad3917e JDB |
1 | /* XDP redirect to CPUs via cpumap (BPF_MAP_TYPE_CPUMAP) |
2 | * | |
3 | * GPLv2, Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc. | |
4 | */ | |
5 | #include <uapi/linux/if_ether.h> | |
6 | #include <uapi/linux/if_packet.h> | |
7 | #include <uapi/linux/if_vlan.h> | |
8 | #include <uapi/linux/ip.h> | |
9 | #include <uapi/linux/ipv6.h> | |
10 | #include <uapi/linux/in.h> | |
11 | #include <uapi/linux/tcp.h> | |
12 | #include <uapi/linux/udp.h> | |
13 | ||
14 | #include <uapi/linux/bpf.h> | |
7cf245a3 | 15 | #include <bpf/bpf_helpers.h> |
1bca4e6b | 16 | #include "hash_func01.h" |
fad3917e | 17 | |
6a098154 | 18 | #define MAX_CPUS NR_CPUS |
fad3917e JDB |
19 | |
20 | /* Special map type that can XDP_REDIRECT frames to another CPU */ | |
451d1dc8 DL |
21 | struct { |
22 | __uint(type, BPF_MAP_TYPE_CPUMAP); | |
23 | __uint(key_size, sizeof(u32)); | |
ce4dade7 | 24 | __uint(value_size, sizeof(struct bpf_cpumap_val)); |
451d1dc8 DL |
25 | __uint(max_entries, MAX_CPUS); |
26 | } cpu_map SEC(".maps"); | |
fad3917e JDB |
27 | |
28 | /* Common stats data record to keep userspace more simple */ | |
29 | struct datarec { | |
30 | __u64 processed; | |
31 | __u64 dropped; | |
32 | __u64 issue; | |
ce4dade7 LB |
33 | __u64 xdp_pass; |
34 | __u64 xdp_drop; | |
35 | __u64 xdp_redirect; | |
fad3917e JDB |
36 | }; |
37 | ||
38 | /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success | |
39 | * feedback. Redirect TX errors can be caught via a tracepoint. | |
40 | */ | |
451d1dc8 DL |
41 | struct { |
42 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); | |
43 | __type(key, u32); | |
44 | __type(value, struct datarec); | |
45 | __uint(max_entries, 1); | |
46 | } rx_cnt SEC(".maps"); | |
fad3917e JDB |
47 | |
48 | /* Used by trace point */ | |
451d1dc8 DL |
49 | struct { |
50 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); | |
51 | __type(key, u32); | |
52 | __type(value, struct datarec); | |
53 | __uint(max_entries, 2); | |
fad3917e | 54 | /* TODO: have entries for all possible errno's */ |
451d1dc8 | 55 | } redirect_err_cnt SEC(".maps"); |
fad3917e JDB |
56 | |
57 | /* Used by trace point */ | |
451d1dc8 DL |
58 | struct { |
59 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); | |
60 | __type(key, u32); | |
61 | __type(value, struct datarec); | |
62 | __uint(max_entries, MAX_CPUS); | |
63 | } cpumap_enqueue_cnt SEC(".maps"); | |
fad3917e JDB |
64 | |
65 | /* Used by trace point */ | |
451d1dc8 DL |
66 | struct { |
67 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); | |
68 | __type(key, u32); | |
69 | __type(value, struct datarec); | |
70 | __uint(max_entries, 1); | |
71 | } cpumap_kthread_cnt SEC(".maps"); | |
fad3917e JDB |
72 | |
73 | /* Set of maps controlling available CPU, and for iterating through | |
74 | * selectable redirect CPUs. | |
75 | */ | |
451d1dc8 DL |
76 | struct { |
77 | __uint(type, BPF_MAP_TYPE_ARRAY); | |
78 | __type(key, u32); | |
79 | __type(value, u32); | |
80 | __uint(max_entries, MAX_CPUS); | |
81 | } cpus_available SEC(".maps"); | |
82 | struct { | |
83 | __uint(type, BPF_MAP_TYPE_ARRAY); | |
84 | __type(key, u32); | |
85 | __type(value, u32); | |
86 | __uint(max_entries, 1); | |
87 | } cpus_count SEC(".maps"); | |
88 | struct { | |
89 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); | |
90 | __type(key, u32); | |
91 | __type(value, u32); | |
92 | __uint(max_entries, 1); | |
93 | } cpus_iterator SEC(".maps"); | |
fad3917e JDB |
94 | |
95 | /* Used by trace point */ | |
451d1dc8 DL |
96 | struct { |
97 | __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); | |
98 | __type(key, u32); | |
99 | __type(value, struct datarec); | |
100 | __uint(max_entries, 1); | |
101 | } exception_cnt SEC(".maps"); | |
fad3917e JDB |
102 | |
103 | /* Helper parse functions */ | |
104 | ||
105 | /* Parse Ethernet layer 2, extract network layer 3 offset and protocol | |
106 | * | |
107 | * Returns false on error and non-supported ether-type | |
108 | */ | |
109 | struct vlan_hdr { | |
110 | __be16 h_vlan_TCI; | |
111 | __be16 h_vlan_encapsulated_proto; | |
112 | }; | |
113 | ||
114 | static __always_inline | |
115 | bool parse_eth(struct ethhdr *eth, void *data_end, | |
116 | u16 *eth_proto, u64 *l3_offset) | |
117 | { | |
118 | u16 eth_type; | |
119 | u64 offset; | |
120 | ||
121 | offset = sizeof(*eth); | |
122 | if ((void *)eth + offset > data_end) | |
123 | return false; | |
124 | ||
125 | eth_type = eth->h_proto; | |
126 | ||
127 | /* Skip non 802.3 Ethertypes */ | |
128 | if (unlikely(ntohs(eth_type) < ETH_P_802_3_MIN)) | |
129 | return false; | |
130 | ||
131 | /* Handle VLAN tagged packet */ | |
132 | if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { | |
133 | struct vlan_hdr *vlan_hdr; | |
134 | ||
135 | vlan_hdr = (void *)eth + offset; | |
136 | offset += sizeof(*vlan_hdr); | |
137 | if ((void *)eth + offset > data_end) | |
138 | return false; | |
139 | eth_type = vlan_hdr->h_vlan_encapsulated_proto; | |
140 | } | |
d23b27c0 JDB |
141 | /* Handle double VLAN tagged packet */ |
142 | if (eth_type == htons(ETH_P_8021Q) || eth_type == htons(ETH_P_8021AD)) { | |
143 | struct vlan_hdr *vlan_hdr; | |
144 | ||
145 | vlan_hdr = (void *)eth + offset; | |
146 | offset += sizeof(*vlan_hdr); | |
147 | if ((void *)eth + offset > data_end) | |
148 | return false; | |
149 | eth_type = vlan_hdr->h_vlan_encapsulated_proto; | |
150 | } | |
fad3917e JDB |
151 | |
152 | *eth_proto = ntohs(eth_type); | |
153 | *l3_offset = offset; | |
154 | return true; | |
155 | } | |
156 | ||
157 | static __always_inline | |
158 | u16 get_dest_port_ipv4_udp(struct xdp_md *ctx, u64 nh_off) | |
159 | { | |
160 | void *data_end = (void *)(long)ctx->data_end; | |
161 | void *data = (void *)(long)ctx->data; | |
162 | struct iphdr *iph = data + nh_off; | |
163 | struct udphdr *udph; | |
164 | u16 dport; | |
165 | ||
166 | if (iph + 1 > data_end) | |
167 | return 0; | |
168 | if (!(iph->protocol == IPPROTO_UDP)) | |
169 | return 0; | |
170 | ||
171 | udph = (void *)(iph + 1); | |
172 | if (udph + 1 > data_end) | |
173 | return 0; | |
174 | ||
175 | dport = ntohs(udph->dest); | |
176 | return dport; | |
177 | } | |
178 | ||
179 | static __always_inline | |
180 | int get_proto_ipv4(struct xdp_md *ctx, u64 nh_off) | |
181 | { | |
182 | void *data_end = (void *)(long)ctx->data_end; | |
183 | void *data = (void *)(long)ctx->data; | |
184 | struct iphdr *iph = data + nh_off; | |
185 | ||
186 | if (iph + 1 > data_end) | |
187 | return 0; | |
188 | return iph->protocol; | |
189 | } | |
190 | ||
191 | static __always_inline | |
192 | int get_proto_ipv6(struct xdp_md *ctx, u64 nh_off) | |
193 | { | |
194 | void *data_end = (void *)(long)ctx->data_end; | |
195 | void *data = (void *)(long)ctx->data; | |
196 | struct ipv6hdr *ip6h = data + nh_off; | |
197 | ||
198 | if (ip6h + 1 > data_end) | |
199 | return 0; | |
200 | return ip6h->nexthdr; | |
201 | } | |
202 | ||
203 | SEC("xdp_cpu_map0") | |
204 | int xdp_prognum0_no_touch(struct xdp_md *ctx) | |
205 | { | |
206 | void *data_end = (void *)(long)ctx->data_end; | |
207 | void *data = (void *)(long)ctx->data; | |
208 | struct datarec *rec; | |
209 | u32 *cpu_selected; | |
210 | u32 cpu_dest; | |
211 | u32 key = 0; | |
212 | ||
213 | /* Only use first entry in cpus_available */ | |
214 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); | |
215 | if (!cpu_selected) | |
216 | return XDP_ABORTED; | |
217 | cpu_dest = *cpu_selected; | |
218 | ||
219 | /* Count RX packet in map */ | |
220 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
221 | if (!rec) | |
222 | return XDP_ABORTED; | |
223 | rec->processed++; | |
224 | ||
225 | if (cpu_dest >= MAX_CPUS) { | |
226 | rec->issue++; | |
227 | return XDP_ABORTED; | |
228 | } | |
229 | ||
230 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
231 | } | |
232 | ||
233 | SEC("xdp_cpu_map1_touch_data") | |
234 | int xdp_prognum1_touch_data(struct xdp_md *ctx) | |
235 | { | |
236 | void *data_end = (void *)(long)ctx->data_end; | |
237 | void *data = (void *)(long)ctx->data; | |
238 | struct ethhdr *eth = data; | |
239 | struct datarec *rec; | |
240 | u32 *cpu_selected; | |
241 | u32 cpu_dest; | |
242 | u16 eth_type; | |
243 | u32 key = 0; | |
244 | ||
245 | /* Only use first entry in cpus_available */ | |
246 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &key); | |
247 | if (!cpu_selected) | |
248 | return XDP_ABORTED; | |
249 | cpu_dest = *cpu_selected; | |
250 | ||
251 | /* Validate packet length is minimum Eth header size */ | |
252 | if (eth + 1 > data_end) | |
253 | return XDP_ABORTED; | |
254 | ||
255 | /* Count RX packet in map */ | |
256 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
257 | if (!rec) | |
258 | return XDP_ABORTED; | |
259 | rec->processed++; | |
260 | ||
261 | /* Read packet data, and use it (drop non 802.3 Ethertypes) */ | |
262 | eth_type = eth->h_proto; | |
263 | if (ntohs(eth_type) < ETH_P_802_3_MIN) { | |
264 | rec->dropped++; | |
265 | return XDP_DROP; | |
266 | } | |
267 | ||
268 | if (cpu_dest >= MAX_CPUS) { | |
269 | rec->issue++; | |
270 | return XDP_ABORTED; | |
271 | } | |
272 | ||
273 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
274 | } | |
275 | ||
276 | SEC("xdp_cpu_map2_round_robin") | |
277 | int xdp_prognum2_round_robin(struct xdp_md *ctx) | |
278 | { | |
279 | void *data_end = (void *)(long)ctx->data_end; | |
280 | void *data = (void *)(long)ctx->data; | |
281 | struct ethhdr *eth = data; | |
282 | struct datarec *rec; | |
283 | u32 cpu_dest; | |
284 | u32 *cpu_lookup; | |
285 | u32 key0 = 0; | |
286 | ||
287 | u32 *cpu_selected; | |
288 | u32 *cpu_iterator; | |
289 | u32 *cpu_max; | |
290 | u32 cpu_idx; | |
291 | ||
292 | cpu_max = bpf_map_lookup_elem(&cpus_count, &key0); | |
293 | if (!cpu_max) | |
294 | return XDP_ABORTED; | |
295 | ||
296 | cpu_iterator = bpf_map_lookup_elem(&cpus_iterator, &key0); | |
297 | if (!cpu_iterator) | |
298 | return XDP_ABORTED; | |
299 | cpu_idx = *cpu_iterator; | |
300 | ||
301 | *cpu_iterator += 1; | |
302 | if (*cpu_iterator == *cpu_max) | |
303 | *cpu_iterator = 0; | |
304 | ||
305 | cpu_selected = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
306 | if (!cpu_selected) | |
307 | return XDP_ABORTED; | |
308 | cpu_dest = *cpu_selected; | |
309 | ||
310 | /* Count RX packet in map */ | |
311 | rec = bpf_map_lookup_elem(&rx_cnt, &key0); | |
312 | if (!rec) | |
313 | return XDP_ABORTED; | |
314 | rec->processed++; | |
315 | ||
316 | if (cpu_dest >= MAX_CPUS) { | |
317 | rec->issue++; | |
318 | return XDP_ABORTED; | |
319 | } | |
320 | ||
321 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
322 | } | |
323 | ||
324 | SEC("xdp_cpu_map3_proto_separate") | |
325 | int xdp_prognum3_proto_separate(struct xdp_md *ctx) | |
326 | { | |
327 | void *data_end = (void *)(long)ctx->data_end; | |
328 | void *data = (void *)(long)ctx->data; | |
329 | struct ethhdr *eth = data; | |
330 | u8 ip_proto = IPPROTO_UDP; | |
331 | struct datarec *rec; | |
332 | u16 eth_proto = 0; | |
333 | u64 l3_offset = 0; | |
334 | u32 cpu_dest = 0; | |
335 | u32 cpu_idx = 0; | |
336 | u32 *cpu_lookup; | |
337 | u32 key = 0; | |
338 | ||
339 | /* Count RX packet in map */ | |
340 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
341 | if (!rec) | |
342 | return XDP_ABORTED; | |
343 | rec->processed++; | |
344 | ||
345 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
346 | return XDP_PASS; /* Just skip */ | |
347 | ||
348 | /* Extract L4 protocol */ | |
349 | switch (eth_proto) { | |
350 | case ETH_P_IP: | |
351 | ip_proto = get_proto_ipv4(ctx, l3_offset); | |
352 | break; | |
353 | case ETH_P_IPV6: | |
354 | ip_proto = get_proto_ipv6(ctx, l3_offset); | |
355 | break; | |
356 | case ETH_P_ARP: | |
357 | cpu_idx = 0; /* ARP packet handled on separate CPU */ | |
358 | break; | |
359 | default: | |
360 | cpu_idx = 0; | |
361 | } | |
362 | ||
363 | /* Choose CPU based on L4 protocol */ | |
364 | switch (ip_proto) { | |
365 | case IPPROTO_ICMP: | |
366 | case IPPROTO_ICMPV6: | |
367 | cpu_idx = 2; | |
368 | break; | |
369 | case IPPROTO_TCP: | |
370 | cpu_idx = 0; | |
371 | break; | |
372 | case IPPROTO_UDP: | |
373 | cpu_idx = 1; | |
374 | break; | |
375 | default: | |
376 | cpu_idx = 0; | |
377 | } | |
378 | ||
379 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
380 | if (!cpu_lookup) | |
381 | return XDP_ABORTED; | |
382 | cpu_dest = *cpu_lookup; | |
383 | ||
384 | if (cpu_dest >= MAX_CPUS) { | |
385 | rec->issue++; | |
386 | return XDP_ABORTED; | |
387 | } | |
388 | ||
389 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
390 | } | |
391 | ||
392 | SEC("xdp_cpu_map4_ddos_filter_pktgen") | |
393 | int xdp_prognum4_ddos_filter_pktgen(struct xdp_md *ctx) | |
394 | { | |
395 | void *data_end = (void *)(long)ctx->data_end; | |
396 | void *data = (void *)(long)ctx->data; | |
397 | struct ethhdr *eth = data; | |
398 | u8 ip_proto = IPPROTO_UDP; | |
399 | struct datarec *rec; | |
400 | u16 eth_proto = 0; | |
401 | u64 l3_offset = 0; | |
402 | u32 cpu_dest = 0; | |
403 | u32 cpu_idx = 0; | |
404 | u16 dest_port; | |
405 | u32 *cpu_lookup; | |
406 | u32 key = 0; | |
407 | ||
408 | /* Count RX packet in map */ | |
409 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
410 | if (!rec) | |
411 | return XDP_ABORTED; | |
412 | rec->processed++; | |
413 | ||
414 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
415 | return XDP_PASS; /* Just skip */ | |
416 | ||
417 | /* Extract L4 protocol */ | |
418 | switch (eth_proto) { | |
419 | case ETH_P_IP: | |
420 | ip_proto = get_proto_ipv4(ctx, l3_offset); | |
421 | break; | |
422 | case ETH_P_IPV6: | |
423 | ip_proto = get_proto_ipv6(ctx, l3_offset); | |
424 | break; | |
425 | case ETH_P_ARP: | |
426 | cpu_idx = 0; /* ARP packet handled on separate CPU */ | |
427 | break; | |
428 | default: | |
429 | cpu_idx = 0; | |
430 | } | |
431 | ||
432 | /* Choose CPU based on L4 protocol */ | |
433 | switch (ip_proto) { | |
434 | case IPPROTO_ICMP: | |
435 | case IPPROTO_ICMPV6: | |
436 | cpu_idx = 2; | |
437 | break; | |
438 | case IPPROTO_TCP: | |
439 | cpu_idx = 0; | |
440 | break; | |
441 | case IPPROTO_UDP: | |
442 | cpu_idx = 1; | |
443 | /* DDoS filter UDP port 9 (pktgen) */ | |
444 | dest_port = get_dest_port_ipv4_udp(ctx, l3_offset); | |
445 | if (dest_port == 9) { | |
446 | if (rec) | |
447 | rec->dropped++; | |
448 | return XDP_DROP; | |
449 | } | |
450 | break; | |
451 | default: | |
452 | cpu_idx = 0; | |
453 | } | |
454 | ||
455 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
456 | if (!cpu_lookup) | |
457 | return XDP_ABORTED; | |
458 | cpu_dest = *cpu_lookup; | |
459 | ||
460 | if (cpu_dest >= MAX_CPUS) { | |
461 | rec->issue++; | |
462 | return XDP_ABORTED; | |
463 | } | |
464 | ||
465 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
466 | } | |
467 | ||
1bca4e6b JDB |
468 | /* Hashing initval */ |
469 | #define INITVAL 15485863 | |
470 | ||
471 | static __always_inline | |
472 | u32 get_ipv4_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) | |
473 | { | |
474 | void *data_end = (void *)(long)ctx->data_end; | |
475 | void *data = (void *)(long)ctx->data; | |
476 | struct iphdr *iph = data + nh_off; | |
477 | u32 cpu_hash; | |
478 | ||
479 | if (iph + 1 > data_end) | |
480 | return 0; | |
481 | ||
482 | cpu_hash = iph->saddr + iph->daddr; | |
483 | cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + iph->protocol); | |
484 | ||
485 | return cpu_hash; | |
486 | } | |
487 | ||
488 | static __always_inline | |
489 | u32 get_ipv6_hash_ip_pair(struct xdp_md *ctx, u64 nh_off) | |
490 | { | |
491 | void *data_end = (void *)(long)ctx->data_end; | |
492 | void *data = (void *)(long)ctx->data; | |
493 | struct ipv6hdr *ip6h = data + nh_off; | |
494 | u32 cpu_hash; | |
495 | ||
496 | if (ip6h + 1 > data_end) | |
497 | return 0; | |
498 | ||
499 | cpu_hash = ip6h->saddr.s6_addr32[0] + ip6h->daddr.s6_addr32[0]; | |
500 | cpu_hash += ip6h->saddr.s6_addr32[1] + ip6h->daddr.s6_addr32[1]; | |
501 | cpu_hash += ip6h->saddr.s6_addr32[2] + ip6h->daddr.s6_addr32[2]; | |
502 | cpu_hash += ip6h->saddr.s6_addr32[3] + ip6h->daddr.s6_addr32[3]; | |
503 | cpu_hash = SuperFastHash((char *)&cpu_hash, 4, INITVAL + ip6h->nexthdr); | |
504 | ||
505 | return cpu_hash; | |
506 | } | |
507 | ||
508 | /* Load-Balance traffic based on hashing IP-addrs + L4-proto. The | |
509 | * hashing scheme is symmetric, meaning swapping IP src/dest still hit | |
510 | * same CPU. | |
511 | */ | |
512 | SEC("xdp_cpu_map5_lb_hash_ip_pairs") | |
513 | int xdp_prognum5_lb_hash_ip_pairs(struct xdp_md *ctx) | |
514 | { | |
515 | void *data_end = (void *)(long)ctx->data_end; | |
516 | void *data = (void *)(long)ctx->data; | |
517 | struct ethhdr *eth = data; | |
518 | u8 ip_proto = IPPROTO_UDP; | |
519 | struct datarec *rec; | |
520 | u16 eth_proto = 0; | |
521 | u64 l3_offset = 0; | |
522 | u32 cpu_dest = 0; | |
523 | u32 cpu_idx = 0; | |
524 | u32 *cpu_lookup; | |
525 | u32 *cpu_max; | |
526 | u32 cpu_hash; | |
527 | u32 key = 0; | |
528 | ||
529 | /* Count RX packet in map */ | |
530 | rec = bpf_map_lookup_elem(&rx_cnt, &key); | |
531 | if (!rec) | |
532 | return XDP_ABORTED; | |
533 | rec->processed++; | |
534 | ||
535 | cpu_max = bpf_map_lookup_elem(&cpus_count, &key); | |
536 | if (!cpu_max) | |
537 | return XDP_ABORTED; | |
538 | ||
539 | if (!(parse_eth(eth, data_end, ð_proto, &l3_offset))) | |
540 | return XDP_PASS; /* Just skip */ | |
541 | ||
542 | /* Hash for IPv4 and IPv6 */ | |
543 | switch (eth_proto) { | |
544 | case ETH_P_IP: | |
545 | cpu_hash = get_ipv4_hash_ip_pair(ctx, l3_offset); | |
546 | break; | |
547 | case ETH_P_IPV6: | |
548 | cpu_hash = get_ipv6_hash_ip_pair(ctx, l3_offset); | |
549 | break; | |
550 | case ETH_P_ARP: /* ARP packet handled on CPU idx 0 */ | |
551 | default: | |
552 | cpu_hash = 0; | |
553 | } | |
554 | ||
555 | /* Choose CPU based on hash */ | |
556 | cpu_idx = cpu_hash % *cpu_max; | |
557 | ||
558 | cpu_lookup = bpf_map_lookup_elem(&cpus_available, &cpu_idx); | |
559 | if (!cpu_lookup) | |
560 | return XDP_ABORTED; | |
561 | cpu_dest = *cpu_lookup; | |
562 | ||
563 | if (cpu_dest >= MAX_CPUS) { | |
564 | rec->issue++; | |
565 | return XDP_ABORTED; | |
566 | } | |
567 | ||
568 | return bpf_redirect_map(&cpu_map, cpu_dest, 0); | |
569 | } | |
fad3917e JDB |
570 | |
571 | char _license[] SEC("license") = "GPL"; | |
572 | ||
573 | /*** Trace point code ***/ | |
574 | ||
575 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_redirect/format | |
576 | * Code in: kernel/include/trace/events/xdp.h | |
577 | */ | |
578 | struct xdp_redirect_ctx { | |
579 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
580 | int prog_id; // offset:8; size:4; signed:1; | |
581 | u32 act; // offset:12 size:4; signed:0; | |
582 | int ifindex; // offset:16 size:4; signed:1; | |
583 | int err; // offset:20 size:4; signed:1; | |
584 | int to_ifindex; // offset:24 size:4; signed:1; | |
585 | u32 map_id; // offset:28 size:4; signed:0; | |
586 | int map_index; // offset:32 size:4; signed:1; | |
587 | }; // offset:36 | |
588 | ||
589 | enum { | |
590 | XDP_REDIRECT_SUCCESS = 0, | |
591 | XDP_REDIRECT_ERROR = 1 | |
592 | }; | |
593 | ||
594 | static __always_inline | |
595 | int xdp_redirect_collect_stat(struct xdp_redirect_ctx *ctx) | |
596 | { | |
597 | u32 key = XDP_REDIRECT_ERROR; | |
598 | struct datarec *rec; | |
599 | int err = ctx->err; | |
600 | ||
601 | if (!err) | |
602 | key = XDP_REDIRECT_SUCCESS; | |
603 | ||
604 | rec = bpf_map_lookup_elem(&redirect_err_cnt, &key); | |
605 | if (!rec) | |
606 | return 0; | |
607 | rec->dropped += 1; | |
608 | ||
609 | return 0; /* Indicate event was filtered (no further processing)*/ | |
610 | /* | |
611 | * Returning 1 here would allow e.g. a perf-record tracepoint | |
612 | * to see and record these events, but it doesn't work well | |
613 | * in-practice as stopping perf-record also unload this | |
614 | * bpf_prog. Plus, there is additional overhead of doing so. | |
615 | */ | |
616 | } | |
617 | ||
618 | SEC("tracepoint/xdp/xdp_redirect_err") | |
619 | int trace_xdp_redirect_err(struct xdp_redirect_ctx *ctx) | |
620 | { | |
621 | return xdp_redirect_collect_stat(ctx); | |
622 | } | |
623 | ||
624 | SEC("tracepoint/xdp/xdp_redirect_map_err") | |
625 | int trace_xdp_redirect_map_err(struct xdp_redirect_ctx *ctx) | |
626 | { | |
627 | return xdp_redirect_collect_stat(ctx); | |
628 | } | |
629 | ||
630 | /* Tracepoint format: /sys/kernel/debug/tracing/events/xdp/xdp_exception/format | |
631 | * Code in: kernel/include/trace/events/xdp.h | |
632 | */ | |
633 | struct xdp_exception_ctx { | |
634 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
635 | int prog_id; // offset:8; size:4; signed:1; | |
636 | u32 act; // offset:12; size:4; signed:0; | |
637 | int ifindex; // offset:16; size:4; signed:1; | |
638 | }; | |
639 | ||
640 | SEC("tracepoint/xdp/xdp_exception") | |
641 | int trace_xdp_exception(struct xdp_exception_ctx *ctx) | |
642 | { | |
643 | struct datarec *rec; | |
644 | u32 key = 0; | |
645 | ||
646 | rec = bpf_map_lookup_elem(&exception_cnt, &key); | |
647 | if (!rec) | |
648 | return 1; | |
649 | rec->dropped += 1; | |
650 | ||
651 | return 0; | |
652 | } | |
653 | ||
654 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_enqueue/format | |
655 | * Code in: kernel/include/trace/events/xdp.h | |
656 | */ | |
657 | struct cpumap_enqueue_ctx { | |
658 | u64 __pad; // First 8 bytes are not accessible by bpf code | |
659 | int map_id; // offset:8; size:4; signed:1; | |
660 | u32 act; // offset:12; size:4; signed:0; | |
661 | int cpu; // offset:16; size:4; signed:1; | |
662 | unsigned int drops; // offset:20; size:4; signed:0; | |
663 | unsigned int processed; // offset:24; size:4; signed:0; | |
664 | int to_cpu; // offset:28; size:4; signed:1; | |
665 | }; | |
666 | ||
667 | SEC("tracepoint/xdp/xdp_cpumap_enqueue") | |
668 | int trace_xdp_cpumap_enqueue(struct cpumap_enqueue_ctx *ctx) | |
669 | { | |
670 | u32 to_cpu = ctx->to_cpu; | |
671 | struct datarec *rec; | |
672 | ||
673 | if (to_cpu >= MAX_CPUS) | |
674 | return 1; | |
675 | ||
676 | rec = bpf_map_lookup_elem(&cpumap_enqueue_cnt, &to_cpu); | |
677 | if (!rec) | |
678 | return 0; | |
679 | rec->processed += ctx->processed; | |
680 | rec->dropped += ctx->drops; | |
681 | ||
682 | /* Record bulk events, then userspace can calc average bulk size */ | |
683 | if (ctx->processed > 0) | |
684 | rec->issue += 1; | |
685 | ||
686 | /* Inception: It's possible to detect overload situations, via | |
687 | * this tracepoint. This can be used for creating a feedback | |
688 | * loop to XDP, which can take appropriate actions to mitigate | |
689 | * this overload situation. | |
690 | */ | |
691 | return 0; | |
692 | } | |
693 | ||
694 | /* Tracepoint: /sys/kernel/debug/tracing/events/xdp/xdp_cpumap_kthread/format | |
695 | * Code in: kernel/include/trace/events/xdp.h | |
696 | */ | |
697 | struct cpumap_kthread_ctx { | |
ce4dade7 LB |
698 | u64 __pad; // First 8 bytes are not accessible |
699 | int map_id; // offset:8; size:4; signed:1; | |
700 | u32 act; // offset:12; size:4; signed:0; | |
701 | int cpu; // offset:16; size:4; signed:1; | |
702 | unsigned int drops; // offset:20; size:4; signed:0; | |
703 | unsigned int processed; // offset:24; size:4; signed:0; | |
704 | int sched; // offset:28; size:4; signed:1; | |
705 | unsigned int xdp_pass; // offset:32; size:4; signed:0; | |
706 | unsigned int xdp_drop; // offset:36; size:4; signed:0; | |
707 | unsigned int xdp_redirect; // offset:40; size:4; signed:0; | |
fad3917e JDB |
708 | }; |
709 | ||
710 | SEC("tracepoint/xdp/xdp_cpumap_kthread") | |
711 | int trace_xdp_cpumap_kthread(struct cpumap_kthread_ctx *ctx) | |
712 | { | |
713 | struct datarec *rec; | |
714 | u32 key = 0; | |
715 | ||
716 | rec = bpf_map_lookup_elem(&cpumap_kthread_cnt, &key); | |
717 | if (!rec) | |
718 | return 0; | |
719 | rec->processed += ctx->processed; | |
720 | rec->dropped += ctx->drops; | |
ce4dade7 LB |
721 | rec->xdp_pass += ctx->xdp_pass; |
722 | rec->xdp_drop += ctx->xdp_drop; | |
723 | rec->xdp_redirect += ctx->xdp_redirect; | |
fad3917e JDB |
724 | |
725 | /* Count times kthread yielded CPU via schedule call */ | |
726 | if (ctx->sched) | |
727 | rec->issue++; | |
728 | ||
729 | return 0; | |
730 | } |