]>
git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - samples/bpf/xdp_redirect_cpu_user.c
1 /* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
3 static const char *__doc__
=
4 " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
14 #include <sys/resource.h>
19 #include <arpa/inet.h>
20 #include <linux/if_link.h>
22 #define MAX_CPUS 12 /* WARNING - sync with _kern.c */
24 /* How many xdp_progs are defined in _kern.c */
27 /* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
28 * use bpf/libbpf.h), but cannot as (currently) needed for XDP
29 * attaching to a device via set_link_xdp_fd()
36 static int ifindex
= -1;
37 static char ifname_buf
[IF_NAMESIZE
];
40 static __u32 xdp_flags
;
42 /* Exit return codes */
45 #define EXIT_FAIL_OPTION 2
46 #define EXIT_FAIL_XDP 3
47 #define EXIT_FAIL_BPF 4
48 #define EXIT_FAIL_MEM 5
50 static const struct option long_options
[] = {
51 {"help", no_argument
, NULL
, 'h' },
52 {"dev", required_argument
, NULL
, 'd' },
53 {"skb-mode", no_argument
, NULL
, 'S' },
54 {"debug", no_argument
, NULL
, 'D' },
55 {"sec", required_argument
, NULL
, 's' },
56 {"prognum", required_argument
, NULL
, 'p' },
57 {"qsize", required_argument
, NULL
, 'q' },
58 {"cpu", required_argument
, NULL
, 'c' },
59 {"stress-mode", no_argument
, NULL
, 'x' },
60 {"no-separators", no_argument
, NULL
, 'z' },
64 static void int_exit(int sig
)
67 "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
70 set_link_xdp_fd(ifindex
, -1, xdp_flags
);
74 static void usage(char *argv
[])
78 printf("\nDOCUMENTATION:\n%s\n", __doc__
);
80 printf(" Usage: %s (options-see-below)\n", argv
[0]);
81 printf(" Listing options:\n");
82 for (i
= 0; long_options
[i
].name
!= 0; i
++) {
83 printf(" --%-12s", long_options
[i
].name
);
84 if (long_options
[i
].flag
!= NULL
)
85 printf(" flag (internal value:%d)",
86 *long_options
[i
].flag
);
88 printf(" short-option: -%c",
95 /* gettime returns the current time of day in nanoseconds.
96 * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
97 * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)
99 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
100 static __u64
gettime(void)
105 res
= clock_gettime(CLOCK_MONOTONIC
, &t
);
107 fprintf(stderr
, "Error with gettimeofday! (%i)\n", res
);
110 return (__u64
) t
.tv_sec
* NANOSEC_PER_SEC
+ t
.tv_nsec
;
113 /* Common stats data record shared with _kern.c */
121 struct datarec total
;
124 struct stats_record
{
125 struct record rx_cnt
;
126 struct record redir_err
;
127 struct record kthread
;
128 struct record exception
;
129 struct record enq
[MAX_CPUS
];
132 static bool map_collect_percpu(int fd
, __u32 key
, struct record
*rec
)
134 /* For percpu maps, userspace gets a value per possible CPU */
135 unsigned int nr_cpus
= bpf_num_possible_cpus();
136 struct datarec values
[nr_cpus
];
137 __u64 sum_processed
= 0;
138 __u64 sum_dropped
= 0;
142 if ((bpf_map_lookup_elem(fd
, &key
, values
)) != 0) {
144 "ERR: bpf_map_lookup_elem failed key:0x%X\n", key
);
147 /* Get time as close as possible to reading map contents */
148 rec
->timestamp
= gettime();
150 /* Record and sum values from each CPU */
151 for (i
= 0; i
< nr_cpus
; i
++) {
152 rec
->cpu
[i
].processed
= values
[i
].processed
;
153 sum_processed
+= values
[i
].processed
;
154 rec
->cpu
[i
].dropped
= values
[i
].dropped
;
155 sum_dropped
+= values
[i
].dropped
;
156 rec
->cpu
[i
].issue
= values
[i
].issue
;
157 sum_issue
+= values
[i
].issue
;
159 rec
->total
.processed
= sum_processed
;
160 rec
->total
.dropped
= sum_dropped
;
161 rec
->total
.issue
= sum_issue
;
165 static struct datarec
*alloc_record_per_cpu(void)
167 unsigned int nr_cpus
= bpf_num_possible_cpus();
168 struct datarec
*array
;
171 size
= sizeof(struct datarec
) * nr_cpus
;
172 array
= malloc(size
);
173 memset(array
, 0, size
);
175 fprintf(stderr
, "Mem alloc error (nr_cpus:%u)\n", nr_cpus
);
181 static struct stats_record
*alloc_stats_record(void)
183 struct stats_record
*rec
;
186 rec
= malloc(sizeof(*rec
));
187 memset(rec
, 0, sizeof(*rec
));
189 fprintf(stderr
, "Mem alloc error\n");
192 rec
->rx_cnt
.cpu
= alloc_record_per_cpu();
193 rec
->redir_err
.cpu
= alloc_record_per_cpu();
194 rec
->kthread
.cpu
= alloc_record_per_cpu();
195 rec
->exception
.cpu
= alloc_record_per_cpu();
196 for (i
= 0; i
< MAX_CPUS
; i
++)
197 rec
->enq
[i
].cpu
= alloc_record_per_cpu();
202 static void free_stats_record(struct stats_record
*r
)
206 for (i
= 0; i
< MAX_CPUS
; i
++)
208 free(r
->exception
.cpu
);
209 free(r
->kthread
.cpu
);
210 free(r
->redir_err
.cpu
);
215 static double calc_period(struct record
*r
, struct record
*p
)
220 period
= r
->timestamp
- p
->timestamp
;
222 period_
= ((double) period
/ NANOSEC_PER_SEC
);
227 static __u64
calc_pps(struct datarec
*r
, struct datarec
*p
, double period_
)
233 packets
= r
->processed
- p
->processed
;
234 pps
= packets
/ period_
;
239 static __u64
calc_drop_pps(struct datarec
*r
, struct datarec
*p
, double period_
)
245 packets
= r
->dropped
- p
->dropped
;
246 pps
= packets
/ period_
;
251 static __u64
calc_errs_pps(struct datarec
*r
,
252 struct datarec
*p
, double period_
)
258 packets
= r
->issue
- p
->issue
;
259 pps
= packets
/ period_
;
264 static void stats_print(struct stats_record
*stats_rec
,
265 struct stats_record
*stats_prev
,
268 unsigned int nr_cpus
= bpf_num_possible_cpus();
269 double pps
= 0, drop
= 0, err
= 0;
270 struct record
*rec
, *prev
;
276 printf("Running XDP/eBPF prog_num:%d\n", prog_num
);
277 printf("%-15s %-7s %-14s %-11s %-9s\n",
278 "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
282 char *fmt_rx
= "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
283 char *fm2_rx
= "%-15s %-7s %'-14.0f %'-11.0f\n";
286 rec
= &stats_rec
->rx_cnt
;
287 prev
= &stats_prev
->rx_cnt
;
288 t
= calc_period(rec
, prev
);
289 for (i
= 0; i
< nr_cpus
; i
++) {
290 struct datarec
*r
= &rec
->cpu
[i
];
291 struct datarec
*p
= &prev
->cpu
[i
];
293 pps
= calc_pps(r
, p
, t
);
294 drop
= calc_drop_pps(r
, p
, t
);
295 err
= calc_errs_pps(r
, p
, t
);
297 errstr
= "cpu-dest/err";
299 printf(fmt_rx
, "XDP-RX",
300 i
, pps
, drop
, err
, errstr
);
302 pps
= calc_pps(&rec
->total
, &prev
->total
, t
);
303 drop
= calc_drop_pps(&rec
->total
, &prev
->total
, t
);
304 err
= calc_errs_pps(&rec
->total
, &prev
->total
, t
);
305 printf(fm2_rx
, "XDP-RX", "total", pps
, drop
);
308 /* cpumap enqueue stats */
309 for (to_cpu
= 0; to_cpu
< MAX_CPUS
; to_cpu
++) {
310 char *fmt
= "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
311 char *fm2
= "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
314 rec
= &stats_rec
->enq
[to_cpu
];
315 prev
= &stats_prev
->enq
[to_cpu
];
316 t
= calc_period(rec
, prev
);
317 for (i
= 0; i
< nr_cpus
; i
++) {
318 struct datarec
*r
= &rec
->cpu
[i
];
319 struct datarec
*p
= &prev
->cpu
[i
];
321 pps
= calc_pps(r
, p
, t
);
322 drop
= calc_drop_pps(r
, p
, t
);
323 err
= calc_errs_pps(r
, p
, t
);
325 errstr
= "bulk-average";
326 err
= pps
/ err
; /* calc average bulk size */
329 printf(fmt
, "cpumap-enqueue",
330 i
, to_cpu
, pps
, drop
, err
, errstr
);
332 pps
= calc_pps(&rec
->total
, &prev
->total
, t
);
334 drop
= calc_drop_pps(&rec
->total
, &prev
->total
, t
);
335 err
= calc_errs_pps(&rec
->total
, &prev
->total
, t
);
337 errstr
= "bulk-average";
338 err
= pps
/ err
; /* calc average bulk size */
340 printf(fm2
, "cpumap-enqueue",
341 "sum", to_cpu
, pps
, drop
, err
, errstr
);
345 /* cpumap kthread stats */
347 char *fmt_k
= "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
348 char *fm2_k
= "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
351 rec
= &stats_rec
->kthread
;
352 prev
= &stats_prev
->kthread
;
353 t
= calc_period(rec
, prev
);
354 for (i
= 0; i
< nr_cpus
; i
++) {
355 struct datarec
*r
= &rec
->cpu
[i
];
356 struct datarec
*p
= &prev
->cpu
[i
];
358 pps
= calc_pps(r
, p
, t
);
359 drop
= calc_drop_pps(r
, p
, t
);
360 err
= calc_errs_pps(r
, p
, t
);
364 printf(fmt_k
, "cpumap_kthread",
365 i
, pps
, drop
, err
, e_str
);
367 pps
= calc_pps(&rec
->total
, &prev
->total
, t
);
368 drop
= calc_drop_pps(&rec
->total
, &prev
->total
, t
);
369 err
= calc_errs_pps(&rec
->total
, &prev
->total
, t
);
372 printf(fm2_k
, "cpumap_kthread", "total", pps
, drop
, err
, e_str
);
375 /* XDP redirect err tracepoints (very unlikely) */
377 char *fmt_err
= "%-15s %-7d %'-14.0f %'-11.0f\n";
378 char *fm2_err
= "%-15s %-7s %'-14.0f %'-11.0f\n";
380 rec
= &stats_rec
->redir_err
;
381 prev
= &stats_prev
->redir_err
;
382 t
= calc_period(rec
, prev
);
383 for (i
= 0; i
< nr_cpus
; i
++) {
384 struct datarec
*r
= &rec
->cpu
[i
];
385 struct datarec
*p
= &prev
->cpu
[i
];
387 pps
= calc_pps(r
, p
, t
);
388 drop
= calc_drop_pps(r
, p
, t
);
390 printf(fmt_err
, "redirect_err", i
, pps
, drop
);
392 pps
= calc_pps(&rec
->total
, &prev
->total
, t
);
393 drop
= calc_drop_pps(&rec
->total
, &prev
->total
, t
);
394 printf(fm2_err
, "redirect_err", "total", pps
, drop
);
397 /* XDP general exception tracepoints */
399 char *fmt_err
= "%-15s %-7d %'-14.0f %'-11.0f\n";
400 char *fm2_err
= "%-15s %-7s %'-14.0f %'-11.0f\n";
402 rec
= &stats_rec
->exception
;
403 prev
= &stats_prev
->exception
;
404 t
= calc_period(rec
, prev
);
405 for (i
= 0; i
< nr_cpus
; i
++) {
406 struct datarec
*r
= &rec
->cpu
[i
];
407 struct datarec
*p
= &prev
->cpu
[i
];
409 pps
= calc_pps(r
, p
, t
);
410 drop
= calc_drop_pps(r
, p
, t
);
412 printf(fmt_err
, "xdp_exception", i
, pps
, drop
);
414 pps
= calc_pps(&rec
->total
, &prev
->total
, t
);
415 drop
= calc_drop_pps(&rec
->total
, &prev
->total
, t
);
416 printf(fm2_err
, "xdp_exception", "total", pps
, drop
);
423 static void stats_collect(struct stats_record
*rec
)
427 fd
= map_fd
[1]; /* map: rx_cnt */
428 map_collect_percpu(fd
, 0, &rec
->rx_cnt
);
430 fd
= map_fd
[2]; /* map: redirect_err_cnt */
431 map_collect_percpu(fd
, 1, &rec
->redir_err
);
433 fd
= map_fd
[3]; /* map: cpumap_enqueue_cnt */
434 for (i
= 0; i
< MAX_CPUS
; i
++)
435 map_collect_percpu(fd
, i
, &rec
->enq
[i
]);
437 fd
= map_fd
[4]; /* map: cpumap_kthread_cnt */
438 map_collect_percpu(fd
, 0, &rec
->kthread
);
440 fd
= map_fd
[8]; /* map: exception_cnt */
441 map_collect_percpu(fd
, 0, &rec
->exception
);
445 /* Pointer swap trick */
446 static inline void swap(struct stats_record
**a
, struct stats_record
**b
)
448 struct stats_record
*tmp
;
455 static int create_cpu_entry(__u32 cpu
, __u32 queue_size
,
456 __u32 avail_idx
, bool new)
458 __u32 curr_cpus_count
= 0;
462 /* Add a CPU entry to cpumap, as this allocate a cpu entry in
463 * the kernel for the cpu.
465 ret
= bpf_map_update_elem(map_fd
[0], &cpu
, &queue_size
, 0);
467 fprintf(stderr
, "Create CPU entry failed (err:%d)\n", ret
);
471 /* Inform bpf_prog's that a new CPU is available to select
472 * from via some control maps.
474 /* map_fd[5] = cpus_available */
475 ret
= bpf_map_update_elem(map_fd
[5], &avail_idx
, &cpu
, 0);
477 fprintf(stderr
, "Add to avail CPUs failed\n");
481 /* When not replacing/updating existing entry, bump the count */
482 /* map_fd[6] = cpus_count */
483 ret
= bpf_map_lookup_elem(map_fd
[6], &key
, &curr_cpus_count
);
485 fprintf(stderr
, "Failed reading curr cpus_count\n");
490 ret
= bpf_map_update_elem(map_fd
[6], &key
, &curr_cpus_count
, 0);
492 fprintf(stderr
, "Failed write curr cpus_count\n");
496 /* map_fd[7] = cpus_iterator */
497 printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
498 new ? "Add-new":"Replace", cpu
, avail_idx
,
499 queue_size
, curr_cpus_count
);
504 /* CPUs are zero-indexed. Thus, add a special sentinel default value
505 * in map cpus_available to mark CPU index'es not configured
507 static void mark_cpus_unavailable(void)
509 __u32 invalid_cpu
= MAX_CPUS
;
512 for (i
= 0; i
< MAX_CPUS
; i
++) {
513 /* map_fd[5] = cpus_available */
514 ret
= bpf_map_update_elem(map_fd
[5], &i
, &invalid_cpu
, 0);
516 fprintf(stderr
, "Failed marking CPU unavailable\n");
522 /* Stress cpumap management code by concurrently changing underlying cpumap */
523 static void stress_cpumap(void)
525 /* Changing qsize will cause kernel to free and alloc a new
526 * bpf_cpu_map_entry, with an associated/complicated tear-down
529 create_cpu_entry(1, 1024, 0, false);
530 create_cpu_entry(1, 128, 0, false);
531 create_cpu_entry(1, 16000, 0, false);
534 static void stats_poll(int interval
, bool use_separators
, int prog_num
,
537 struct stats_record
*record
, *prev
;
539 record
= alloc_stats_record();
540 prev
= alloc_stats_record();
541 stats_collect(record
);
543 /* Trick to pretty printf with thousands separators use %' */
545 setlocale(LC_NUMERIC
, "en_US");
548 swap(&prev
, &record
);
549 stats_collect(record
);
550 stats_print(record
, prev
, prog_num
);
556 free_stats_record(record
);
557 free_stats_record(prev
);
560 int main(int argc
, char **argv
)
562 struct rlimit r
= {10 * 1024 * 1024, RLIM_INFINITY
};
563 bool use_separators
= true;
564 bool stress_mode
= false;
575 /* Notice: choosing he queue size is very important with the
576 * ixgbe driver, because it's driver page recycling trick is
577 * dependend on pages being returned quickly. The number of
578 * out-standing packets in the system must be less-than 2x
583 snprintf(filename
, sizeof(filename
), "%s_kern.o", argv
[0]);
585 if (setrlimit(RLIMIT_MEMLOCK
, &r
)) {
586 perror("setrlimit(RLIMIT_MEMLOCK)");
590 if (load_bpf_file(filename
)) {
591 fprintf(stderr
, "ERR in load_bpf_file(): %s", bpf_log_buf
);
596 fprintf(stderr
, "ERR: load_bpf_file: %s\n", strerror(errno
));
600 mark_cpus_unavailable();
602 /* Parse commands line args */
603 while ((opt
= getopt_long(argc
, argv
, "hSd:",
604 long_options
, &longindex
)) != -1) {
607 if (strlen(optarg
) >= IF_NAMESIZE
) {
608 fprintf(stderr
, "ERR: --dev name too long\n");
611 ifname
= (char *)&ifname_buf
;
612 strncpy(ifname
, optarg
, IF_NAMESIZE
);
613 ifindex
= if_nametoindex(ifname
);
616 "ERR: --dev name unknown err(%d):%s\n",
617 errno
, strerror(errno
));
622 interval
= atoi(optarg
);
625 xdp_flags
|= XDP_FLAGS_SKB_MODE
;
634 use_separators
= false;
637 /* Selecting eBPF prog to load */
638 prog_num
= atoi(optarg
);
639 if (prog_num
< 0 || prog_num
>= MAX_PROG
) {
641 "--prognum too large err(%d):%s\n",
642 errno
, strerror(errno
));
647 /* Add multiple CPUs */
648 add_cpu
= strtoul(optarg
, NULL
, 0);
649 if (add_cpu
>= MAX_CPUS
) {
651 "--cpu nr too large for cpumap err(%d):%s\n",
652 errno
, strerror(errno
));
655 create_cpu_entry(add_cpu
, qsize
, added_cpus
, true);
659 qsize
= atoi(optarg
);
665 return EXIT_FAIL_OPTION
;
668 /* Required option */
670 fprintf(stderr
, "ERR: required option --dev missing\n");
672 return EXIT_FAIL_OPTION
;
674 /* Required option */
676 fprintf(stderr
, "ERR: required option --cpu missing\n");
677 fprintf(stderr
, " Specify multiple --cpu option to add more\n");
679 return EXIT_FAIL_OPTION
;
682 /* Remove XDP program when program is interrupted */
683 signal(SIGINT
, int_exit
);
685 if (set_link_xdp_fd(ifindex
, prog_fd
[prog_num
], xdp_flags
) < 0) {
686 fprintf(stderr
, "link set xdp fd failed\n");
687 return EXIT_FAIL_XDP
;
691 printf("Debug-mode reading trace pipe (fix #define DEBUG)\n");
695 stats_poll(interval
, use_separators
, prog_num
, stress_mode
);