]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - samples/bpf/xdp_redirect_cpu_user.c
Merge tag 'ntb-4.15' of git://github.com/jonmason/ntb
[mirror_ubuntu-bionic-kernel.git] / samples / bpf / xdp_redirect_cpu_user.c
1 /* GPLv2 Copyright(c) 2017 Jesper Dangaard Brouer, Red Hat, Inc.
2 */
3 static const char *__doc__ =
4 " XDP redirect with a CPU-map type \"BPF_MAP_TYPE_CPUMAP\"";
5
6 #include <errno.h>
7 #include <signal.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <stdbool.h>
11 #include <string.h>
12 #include <unistd.h>
13 #include <locale.h>
14 #include <sys/resource.h>
15 #include <getopt.h>
16 #include <net/if.h>
17 #include <time.h>
18
19 #include <arpa/inet.h>
20 #include <linux/if_link.h>
21
22 #define MAX_CPUS 12 /* WARNING - sync with _kern.c */
23
24 /* How many xdp_progs are defined in _kern.c */
25 #define MAX_PROG 5
26
27 /* Wanted to get rid of bpf_load.h and fake-"libbpf.h" (and instead
28 * use bpf/libbpf.h), but cannot as (currently) needed for XDP
29 * attaching to a device via set_link_xdp_fd()
30 */
31 #include "libbpf.h"
32 #include "bpf_load.h"
33
34 #include "bpf_util.h"
35
36 static int ifindex = -1;
37 static char ifname_buf[IF_NAMESIZE];
38 static char *ifname;
39
40 static __u32 xdp_flags;
41
42 /* Exit return codes */
43 #define EXIT_OK 0
44 #define EXIT_FAIL 1
45 #define EXIT_FAIL_OPTION 2
46 #define EXIT_FAIL_XDP 3
47 #define EXIT_FAIL_BPF 4
48 #define EXIT_FAIL_MEM 5
49
50 static const struct option long_options[] = {
51 {"help", no_argument, NULL, 'h' },
52 {"dev", required_argument, NULL, 'd' },
53 {"skb-mode", no_argument, NULL, 'S' },
54 {"debug", no_argument, NULL, 'D' },
55 {"sec", required_argument, NULL, 's' },
56 {"prognum", required_argument, NULL, 'p' },
57 {"qsize", required_argument, NULL, 'q' },
58 {"cpu", required_argument, NULL, 'c' },
59 {"stress-mode", no_argument, NULL, 'x' },
60 {"no-separators", no_argument, NULL, 'z' },
61 {0, 0, NULL, 0 }
62 };
63
64 static void int_exit(int sig)
65 {
66 fprintf(stderr,
67 "Interrupted: Removing XDP program on ifindex:%d device:%s\n",
68 ifindex, ifname);
69 if (ifindex > -1)
70 set_link_xdp_fd(ifindex, -1, xdp_flags);
71 exit(EXIT_OK);
72 }
73
74 static void usage(char *argv[])
75 {
76 int i;
77
78 printf("\nDOCUMENTATION:\n%s\n", __doc__);
79 printf("\n");
80 printf(" Usage: %s (options-see-below)\n", argv[0]);
81 printf(" Listing options:\n");
82 for (i = 0; long_options[i].name != 0; i++) {
83 printf(" --%-12s", long_options[i].name);
84 if (long_options[i].flag != NULL)
85 printf(" flag (internal value:%d)",
86 *long_options[i].flag);
87 else
88 printf(" short-option: -%c",
89 long_options[i].val);
90 printf("\n");
91 }
92 printf("\n");
93 }
94
95 /* gettime returns the current time of day in nanoseconds.
96 * Cost: clock_gettime (ns) => 26ns (CLOCK_MONOTONIC)
97 * clock_gettime (ns) => 9ns (CLOCK_MONOTONIC_COARSE)
98 */
99 #define NANOSEC_PER_SEC 1000000000 /* 10^9 */
100 static __u64 gettime(void)
101 {
102 struct timespec t;
103 int res;
104
105 res = clock_gettime(CLOCK_MONOTONIC, &t);
106 if (res < 0) {
107 fprintf(stderr, "Error with gettimeofday! (%i)\n", res);
108 exit(EXIT_FAIL);
109 }
110 return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
111 }
112
113 /* Common stats data record shared with _kern.c */
114 struct datarec {
115 __u64 processed;
116 __u64 dropped;
117 __u64 issue;
118 };
119 struct record {
120 __u64 timestamp;
121 struct datarec total;
122 struct datarec *cpu;
123 };
124 struct stats_record {
125 struct record rx_cnt;
126 struct record redir_err;
127 struct record kthread;
128 struct record exception;
129 struct record enq[MAX_CPUS];
130 };
131
132 static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
133 {
134 /* For percpu maps, userspace gets a value per possible CPU */
135 unsigned int nr_cpus = bpf_num_possible_cpus();
136 struct datarec values[nr_cpus];
137 __u64 sum_processed = 0;
138 __u64 sum_dropped = 0;
139 __u64 sum_issue = 0;
140 int i;
141
142 if ((bpf_map_lookup_elem(fd, &key, values)) != 0) {
143 fprintf(stderr,
144 "ERR: bpf_map_lookup_elem failed key:0x%X\n", key);
145 return false;
146 }
147 /* Get time as close as possible to reading map contents */
148 rec->timestamp = gettime();
149
150 /* Record and sum values from each CPU */
151 for (i = 0; i < nr_cpus; i++) {
152 rec->cpu[i].processed = values[i].processed;
153 sum_processed += values[i].processed;
154 rec->cpu[i].dropped = values[i].dropped;
155 sum_dropped += values[i].dropped;
156 rec->cpu[i].issue = values[i].issue;
157 sum_issue += values[i].issue;
158 }
159 rec->total.processed = sum_processed;
160 rec->total.dropped = sum_dropped;
161 rec->total.issue = sum_issue;
162 return true;
163 }
164
165 static struct datarec *alloc_record_per_cpu(void)
166 {
167 unsigned int nr_cpus = bpf_num_possible_cpus();
168 struct datarec *array;
169 size_t size;
170
171 size = sizeof(struct datarec) * nr_cpus;
172 array = malloc(size);
173 memset(array, 0, size);
174 if (!array) {
175 fprintf(stderr, "Mem alloc error (nr_cpus:%u)\n", nr_cpus);
176 exit(EXIT_FAIL_MEM);
177 }
178 return array;
179 }
180
181 static struct stats_record *alloc_stats_record(void)
182 {
183 struct stats_record *rec;
184 int i;
185
186 rec = malloc(sizeof(*rec));
187 memset(rec, 0, sizeof(*rec));
188 if (!rec) {
189 fprintf(stderr, "Mem alloc error\n");
190 exit(EXIT_FAIL_MEM);
191 }
192 rec->rx_cnt.cpu = alloc_record_per_cpu();
193 rec->redir_err.cpu = alloc_record_per_cpu();
194 rec->kthread.cpu = alloc_record_per_cpu();
195 rec->exception.cpu = alloc_record_per_cpu();
196 for (i = 0; i < MAX_CPUS; i++)
197 rec->enq[i].cpu = alloc_record_per_cpu();
198
199 return rec;
200 }
201
202 static void free_stats_record(struct stats_record *r)
203 {
204 int i;
205
206 for (i = 0; i < MAX_CPUS; i++)
207 free(r->enq[i].cpu);
208 free(r->exception.cpu);
209 free(r->kthread.cpu);
210 free(r->redir_err.cpu);
211 free(r->rx_cnt.cpu);
212 free(r);
213 }
214
215 static double calc_period(struct record *r, struct record *p)
216 {
217 double period_ = 0;
218 __u64 period = 0;
219
220 period = r->timestamp - p->timestamp;
221 if (period > 0)
222 period_ = ((double) period / NANOSEC_PER_SEC);
223
224 return period_;
225 }
226
227 static __u64 calc_pps(struct datarec *r, struct datarec *p, double period_)
228 {
229 __u64 packets = 0;
230 __u64 pps = 0;
231
232 if (period_ > 0) {
233 packets = r->processed - p->processed;
234 pps = packets / period_;
235 }
236 return pps;
237 }
238
239 static __u64 calc_drop_pps(struct datarec *r, struct datarec *p, double period_)
240 {
241 __u64 packets = 0;
242 __u64 pps = 0;
243
244 if (period_ > 0) {
245 packets = r->dropped - p->dropped;
246 pps = packets / period_;
247 }
248 return pps;
249 }
250
251 static __u64 calc_errs_pps(struct datarec *r,
252 struct datarec *p, double period_)
253 {
254 __u64 packets = 0;
255 __u64 pps = 0;
256
257 if (period_ > 0) {
258 packets = r->issue - p->issue;
259 pps = packets / period_;
260 }
261 return pps;
262 }
263
264 static void stats_print(struct stats_record *stats_rec,
265 struct stats_record *stats_prev,
266 int prog_num)
267 {
268 unsigned int nr_cpus = bpf_num_possible_cpus();
269 double pps = 0, drop = 0, err = 0;
270 struct record *rec, *prev;
271 int to_cpu;
272 double t;
273 int i;
274
275 /* Header */
276 printf("Running XDP/eBPF prog_num:%d\n", prog_num);
277 printf("%-15s %-7s %-14s %-11s %-9s\n",
278 "XDP-cpumap", "CPU:to", "pps", "drop-pps", "extra-info");
279
280 /* XDP rx_cnt */
281 {
282 char *fmt_rx = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
283 char *fm2_rx = "%-15s %-7s %'-14.0f %'-11.0f\n";
284 char *errstr = "";
285
286 rec = &stats_rec->rx_cnt;
287 prev = &stats_prev->rx_cnt;
288 t = calc_period(rec, prev);
289 for (i = 0; i < nr_cpus; i++) {
290 struct datarec *r = &rec->cpu[i];
291 struct datarec *p = &prev->cpu[i];
292
293 pps = calc_pps(r, p, t);
294 drop = calc_drop_pps(r, p, t);
295 err = calc_errs_pps(r, p, t);
296 if (err > 0)
297 errstr = "cpu-dest/err";
298 if (pps > 0)
299 printf(fmt_rx, "XDP-RX",
300 i, pps, drop, err, errstr);
301 }
302 pps = calc_pps(&rec->total, &prev->total, t);
303 drop = calc_drop_pps(&rec->total, &prev->total, t);
304 err = calc_errs_pps(&rec->total, &prev->total, t);
305 printf(fm2_rx, "XDP-RX", "total", pps, drop);
306 }
307
308 /* cpumap enqueue stats */
309 for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
310 char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
311 char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
312 char *errstr = "";
313
314 rec = &stats_rec->enq[to_cpu];
315 prev = &stats_prev->enq[to_cpu];
316 t = calc_period(rec, prev);
317 for (i = 0; i < nr_cpus; i++) {
318 struct datarec *r = &rec->cpu[i];
319 struct datarec *p = &prev->cpu[i];
320
321 pps = calc_pps(r, p, t);
322 drop = calc_drop_pps(r, p, t);
323 err = calc_errs_pps(r, p, t);
324 if (err > 0) {
325 errstr = "bulk-average";
326 err = pps / err; /* calc average bulk size */
327 }
328 if (pps > 0)
329 printf(fmt, "cpumap-enqueue",
330 i, to_cpu, pps, drop, err, errstr);
331 }
332 pps = calc_pps(&rec->total, &prev->total, t);
333 if (pps > 0) {
334 drop = calc_drop_pps(&rec->total, &prev->total, t);
335 err = calc_errs_pps(&rec->total, &prev->total, t);
336 if (err > 0) {
337 errstr = "bulk-average";
338 err = pps / err; /* calc average bulk size */
339 }
340 printf(fm2, "cpumap-enqueue",
341 "sum", to_cpu, pps, drop, err, errstr);
342 }
343 }
344
345 /* cpumap kthread stats */
346 {
347 char *fmt_k = "%-15s %-7d %'-14.0f %'-11.0f %'-10.0f %s\n";
348 char *fm2_k = "%-15s %-7s %'-14.0f %'-11.0f %'-10.0f %s\n";
349 char *e_str = "";
350
351 rec = &stats_rec->kthread;
352 prev = &stats_prev->kthread;
353 t = calc_period(rec, prev);
354 for (i = 0; i < nr_cpus; i++) {
355 struct datarec *r = &rec->cpu[i];
356 struct datarec *p = &prev->cpu[i];
357
358 pps = calc_pps(r, p, t);
359 drop = calc_drop_pps(r, p, t);
360 err = calc_errs_pps(r, p, t);
361 if (err > 0)
362 e_str = "sched";
363 if (pps > 0)
364 printf(fmt_k, "cpumap_kthread",
365 i, pps, drop, err, e_str);
366 }
367 pps = calc_pps(&rec->total, &prev->total, t);
368 drop = calc_drop_pps(&rec->total, &prev->total, t);
369 err = calc_errs_pps(&rec->total, &prev->total, t);
370 if (err > 0)
371 e_str = "sched-sum";
372 printf(fm2_k, "cpumap_kthread", "total", pps, drop, err, e_str);
373 }
374
375 /* XDP redirect err tracepoints (very unlikely) */
376 {
377 char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
378 char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
379
380 rec = &stats_rec->redir_err;
381 prev = &stats_prev->redir_err;
382 t = calc_period(rec, prev);
383 for (i = 0; i < nr_cpus; i++) {
384 struct datarec *r = &rec->cpu[i];
385 struct datarec *p = &prev->cpu[i];
386
387 pps = calc_pps(r, p, t);
388 drop = calc_drop_pps(r, p, t);
389 if (pps > 0)
390 printf(fmt_err, "redirect_err", i, pps, drop);
391 }
392 pps = calc_pps(&rec->total, &prev->total, t);
393 drop = calc_drop_pps(&rec->total, &prev->total, t);
394 printf(fm2_err, "redirect_err", "total", pps, drop);
395 }
396
397 /* XDP general exception tracepoints */
398 {
399 char *fmt_err = "%-15s %-7d %'-14.0f %'-11.0f\n";
400 char *fm2_err = "%-15s %-7s %'-14.0f %'-11.0f\n";
401
402 rec = &stats_rec->exception;
403 prev = &stats_prev->exception;
404 t = calc_period(rec, prev);
405 for (i = 0; i < nr_cpus; i++) {
406 struct datarec *r = &rec->cpu[i];
407 struct datarec *p = &prev->cpu[i];
408
409 pps = calc_pps(r, p, t);
410 drop = calc_drop_pps(r, p, t);
411 if (pps > 0)
412 printf(fmt_err, "xdp_exception", i, pps, drop);
413 }
414 pps = calc_pps(&rec->total, &prev->total, t);
415 drop = calc_drop_pps(&rec->total, &prev->total, t);
416 printf(fm2_err, "xdp_exception", "total", pps, drop);
417 }
418
419 printf("\n");
420 fflush(stdout);
421 }
422
423 static void stats_collect(struct stats_record *rec)
424 {
425 int fd, i;
426
427 fd = map_fd[1]; /* map: rx_cnt */
428 map_collect_percpu(fd, 0, &rec->rx_cnt);
429
430 fd = map_fd[2]; /* map: redirect_err_cnt */
431 map_collect_percpu(fd, 1, &rec->redir_err);
432
433 fd = map_fd[3]; /* map: cpumap_enqueue_cnt */
434 for (i = 0; i < MAX_CPUS; i++)
435 map_collect_percpu(fd, i, &rec->enq[i]);
436
437 fd = map_fd[4]; /* map: cpumap_kthread_cnt */
438 map_collect_percpu(fd, 0, &rec->kthread);
439
440 fd = map_fd[8]; /* map: exception_cnt */
441 map_collect_percpu(fd, 0, &rec->exception);
442 }
443
444
445 /* Pointer swap trick */
446 static inline void swap(struct stats_record **a, struct stats_record **b)
447 {
448 struct stats_record *tmp;
449
450 tmp = *a;
451 *a = *b;
452 *b = tmp;
453 }
454
455 static int create_cpu_entry(__u32 cpu, __u32 queue_size,
456 __u32 avail_idx, bool new)
457 {
458 __u32 curr_cpus_count = 0;
459 __u32 key = 0;
460 int ret;
461
462 /* Add a CPU entry to cpumap, as this allocate a cpu entry in
463 * the kernel for the cpu.
464 */
465 ret = bpf_map_update_elem(map_fd[0], &cpu, &queue_size, 0);
466 if (ret) {
467 fprintf(stderr, "Create CPU entry failed (err:%d)\n", ret);
468 exit(EXIT_FAIL_BPF);
469 }
470
471 /* Inform bpf_prog's that a new CPU is available to select
472 * from via some control maps.
473 */
474 /* map_fd[5] = cpus_available */
475 ret = bpf_map_update_elem(map_fd[5], &avail_idx, &cpu, 0);
476 if (ret) {
477 fprintf(stderr, "Add to avail CPUs failed\n");
478 exit(EXIT_FAIL_BPF);
479 }
480
481 /* When not replacing/updating existing entry, bump the count */
482 /* map_fd[6] = cpus_count */
483 ret = bpf_map_lookup_elem(map_fd[6], &key, &curr_cpus_count);
484 if (ret) {
485 fprintf(stderr, "Failed reading curr cpus_count\n");
486 exit(EXIT_FAIL_BPF);
487 }
488 if (new) {
489 curr_cpus_count++;
490 ret = bpf_map_update_elem(map_fd[6], &key, &curr_cpus_count, 0);
491 if (ret) {
492 fprintf(stderr, "Failed write curr cpus_count\n");
493 exit(EXIT_FAIL_BPF);
494 }
495 }
496 /* map_fd[7] = cpus_iterator */
497 printf("%s CPU:%u as idx:%u queue_size:%d (total cpus_count:%u)\n",
498 new ? "Add-new":"Replace", cpu, avail_idx,
499 queue_size, curr_cpus_count);
500
501 return 0;
502 }
503
504 /* CPUs are zero-indexed. Thus, add a special sentinel default value
505 * in map cpus_available to mark CPU index'es not configured
506 */
507 static void mark_cpus_unavailable(void)
508 {
509 __u32 invalid_cpu = MAX_CPUS;
510 int ret, i;
511
512 for (i = 0; i < MAX_CPUS; i++) {
513 /* map_fd[5] = cpus_available */
514 ret = bpf_map_update_elem(map_fd[5], &i, &invalid_cpu, 0);
515 if (ret) {
516 fprintf(stderr, "Failed marking CPU unavailable\n");
517 exit(EXIT_FAIL_BPF);
518 }
519 }
520 }
521
522 /* Stress cpumap management code by concurrently changing underlying cpumap */
523 static void stress_cpumap(void)
524 {
525 /* Changing qsize will cause kernel to free and alloc a new
526 * bpf_cpu_map_entry, with an associated/complicated tear-down
527 * procedure.
528 */
529 create_cpu_entry(1, 1024, 0, false);
530 create_cpu_entry(1, 128, 0, false);
531 create_cpu_entry(1, 16000, 0, false);
532 }
533
534 static void stats_poll(int interval, bool use_separators, int prog_num,
535 bool stress_mode)
536 {
537 struct stats_record *record, *prev;
538
539 record = alloc_stats_record();
540 prev = alloc_stats_record();
541 stats_collect(record);
542
543 /* Trick to pretty printf with thousands separators use %' */
544 if (use_separators)
545 setlocale(LC_NUMERIC, "en_US");
546
547 while (1) {
548 swap(&prev, &record);
549 stats_collect(record);
550 stats_print(record, prev, prog_num);
551 sleep(interval);
552 if (stress_mode)
553 stress_cpumap();
554 }
555
556 free_stats_record(record);
557 free_stats_record(prev);
558 }
559
560 int main(int argc, char **argv)
561 {
562 struct rlimit r = {10 * 1024 * 1024, RLIM_INFINITY};
563 bool use_separators = true;
564 bool stress_mode = false;
565 char filename[256];
566 bool debug = false;
567 int added_cpus = 0;
568 int longindex = 0;
569 int interval = 2;
570 int prog_num = 0;
571 int add_cpu = -1;
572 __u32 qsize;
573 int opt;
574
575 /* Notice: choosing he queue size is very important with the
576 * ixgbe driver, because it's driver page recycling trick is
577 * dependend on pages being returned quickly. The number of
578 * out-standing packets in the system must be less-than 2x
579 * RX-ring size.
580 */
581 qsize = 128+64;
582
583 snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
584
585 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
586 perror("setrlimit(RLIMIT_MEMLOCK)");
587 return 1;
588 }
589
590 if (load_bpf_file(filename)) {
591 fprintf(stderr, "ERR in load_bpf_file(): %s", bpf_log_buf);
592 return EXIT_FAIL;
593 }
594
595 if (!prog_fd[0]) {
596 fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno));
597 return EXIT_FAIL;
598 }
599
600 mark_cpus_unavailable();
601
602 /* Parse commands line args */
603 while ((opt = getopt_long(argc, argv, "hSd:",
604 long_options, &longindex)) != -1) {
605 switch (opt) {
606 case 'd':
607 if (strlen(optarg) >= IF_NAMESIZE) {
608 fprintf(stderr, "ERR: --dev name too long\n");
609 goto error;
610 }
611 ifname = (char *)&ifname_buf;
612 strncpy(ifname, optarg, IF_NAMESIZE);
613 ifindex = if_nametoindex(ifname);
614 if (ifindex == 0) {
615 fprintf(stderr,
616 "ERR: --dev name unknown err(%d):%s\n",
617 errno, strerror(errno));
618 goto error;
619 }
620 break;
621 case 's':
622 interval = atoi(optarg);
623 break;
624 case 'S':
625 xdp_flags |= XDP_FLAGS_SKB_MODE;
626 break;
627 case 'D':
628 debug = true;
629 break;
630 case 'x':
631 stress_mode = true;
632 break;
633 case 'z':
634 use_separators = false;
635 break;
636 case 'p':
637 /* Selecting eBPF prog to load */
638 prog_num = atoi(optarg);
639 if (prog_num < 0 || prog_num >= MAX_PROG) {
640 fprintf(stderr,
641 "--prognum too large err(%d):%s\n",
642 errno, strerror(errno));
643 goto error;
644 }
645 break;
646 case 'c':
647 /* Add multiple CPUs */
648 add_cpu = strtoul(optarg, NULL, 0);
649 if (add_cpu >= MAX_CPUS) {
650 fprintf(stderr,
651 "--cpu nr too large for cpumap err(%d):%s\n",
652 errno, strerror(errno));
653 goto error;
654 }
655 create_cpu_entry(add_cpu, qsize, added_cpus, true);
656 added_cpus++;
657 break;
658 case 'q':
659 qsize = atoi(optarg);
660 break;
661 case 'h':
662 error:
663 default:
664 usage(argv);
665 return EXIT_FAIL_OPTION;
666 }
667 }
668 /* Required option */
669 if (ifindex == -1) {
670 fprintf(stderr, "ERR: required option --dev missing\n");
671 usage(argv);
672 return EXIT_FAIL_OPTION;
673 }
674 /* Required option */
675 if (add_cpu == -1) {
676 fprintf(stderr, "ERR: required option --cpu missing\n");
677 fprintf(stderr, " Specify multiple --cpu option to add more\n");
678 usage(argv);
679 return EXIT_FAIL_OPTION;
680 }
681
682 /* Remove XDP program when program is interrupted */
683 signal(SIGINT, int_exit);
684
685 if (set_link_xdp_fd(ifindex, prog_fd[prog_num], xdp_flags) < 0) {
686 fprintf(stderr, "link set xdp fd failed\n");
687 return EXIT_FAIL_XDP;
688 }
689
690 if (debug) {
691 printf("Debug-mode reading trace pipe (fix #define DEBUG)\n");
692 read_trace_pipe();
693 }
694
695 stats_poll(interval, use_separators, prog_num, stress_mode);
696 return EXIT_OK;
697 }