]>
Commit | Line | Data |
---|---|---|
b4b8faa1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
dac09149 | 2 | /* Copyright(c) 2017 - 2018 Intel Corporation. */ |
b4b8faa1 | 3 | |
b4b8faa1 MK |
4 | #include <errno.h> |
5 | #include <getopt.h> | |
6 | #include <libgen.h> | |
7 | #include <linux/bpf.h> | |
8 | #include <linux/if_link.h> | |
9 | #include <linux/if_xdp.h> | |
10 | #include <linux/if_ether.h> | |
4a3c23ae | 11 | #include <linux/ip.h> |
67ed3755 | 12 | #include <linux/limits.h> |
4a3c23ae JJ |
13 | #include <linux/udp.h> |
14 | #include <arpa/inet.h> | |
248c7f9c MK |
15 | #include <locale.h> |
16 | #include <net/ethernet.h> | |
b4b8faa1 | 17 | #include <net/if.h> |
248c7f9c MK |
18 | #include <poll.h> |
19 | #include <pthread.h> | |
b4b8faa1 MK |
20 | #include <signal.h> |
21 | #include <stdbool.h> | |
22 | #include <stdio.h> | |
23 | #include <stdlib.h> | |
24 | #include <string.h> | |
3627d970 | 25 | #include <sys/capability.h> |
248c7f9c | 26 | #include <sys/mman.h> |
b4b8faa1 MK |
27 | #include <sys/resource.h> |
28 | #include <sys/socket.h> | |
248c7f9c | 29 | #include <sys/types.h> |
3627d970 | 30 | #include <sys/un.h> |
b4b8faa1 MK |
31 | #include <time.h> |
32 | #include <unistd.h> | |
b4b8faa1 | 33 | |
7cf245a3 THJ |
34 | #include <bpf/libbpf.h> |
35 | #include <bpf/xsk.h> | |
2bf3e2ef | 36 | #include <bpf/bpf.h> |
7cf245a3 | 37 | #include "xdpsock.h" |
b4b8faa1 | 38 | |
b4b8faa1 MK |
39 | #ifndef SOL_XDP |
40 | #define SOL_XDP 283 | |
41 | #endif | |
42 | ||
43 | #ifndef AF_XDP | |
44 | #define AF_XDP 44 | |
45 | #endif | |
46 | ||
47 | #ifndef PF_XDP | |
48 | #define PF_XDP AF_XDP | |
49 | #endif | |
50 | ||
248c7f9c | 51 | #define NUM_FRAMES (4 * 1024) |
4a3c23ae | 52 | #define MIN_PKT_SIZE 64 |
b4b8faa1 MK |
53 | |
54 | #define DEBUG_HEXDUMP 0 | |
55 | ||
a412ef54 | 56 | typedef __u64 u64; |
b4b8faa1 | 57 | typedef __u32 u32; |
4a3c23ae JJ |
58 | typedef __u16 u16; |
59 | typedef __u8 u8; | |
b4b8faa1 MK |
60 | |
61 | static unsigned long prev_time; | |
62 | ||
63 | enum benchmark_type { | |
64 | BENCH_RXDROP = 0, | |
65 | BENCH_TXONLY = 1, | |
66 | BENCH_L2FWD = 2, | |
67 | }; | |
68 | ||
69 | static enum benchmark_type opt_bench = BENCH_RXDROP; | |
743e568c | 70 | static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; |
b4b8faa1 MK |
71 | static const char *opt_if = ""; |
72 | static int opt_ifindex; | |
73 | static int opt_queue; | |
d3f11b01 JJ |
74 | static unsigned long opt_duration; |
75 | static unsigned long start_time; | |
76 | static bool benchmark_done; | |
cd9e72b6 | 77 | static u32 opt_batch_size = 64; |
ece6e969 | 78 | static int opt_pkt_count; |
4a3c23ae | 79 | static u16 opt_pkt_size = MIN_PKT_SIZE; |
46e3268e | 80 | static u32 opt_pkt_fill_pattern = 0x12345678; |
b36c3206 | 81 | static bool opt_extra_stats; |
74e00676 | 82 | static bool opt_quiet; |
60dc609d | 83 | static bool opt_app_stats; |
67ed3755 CL |
84 | static const char *opt_irq_str = ""; |
85 | static u32 irq_no; | |
86 | static int irqs_at_init = -1; | |
b4b8faa1 | 87 | static int opt_poll; |
b4b8faa1 | 88 | static int opt_interval = 1; |
46738f73 | 89 | static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; |
c543f546 KL |
90 | static u32 opt_umem_flags; |
91 | static int opt_unaligned_chunks; | |
3945b37a | 92 | static int opt_mmap_flags; |
123e8da1 | 93 | static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; |
46738f73 MK |
94 | static int opt_timeout = 1000; |
95 | static bool opt_need_wakeup = true; | |
2e5d72c1 | 96 | static u32 opt_num_xsks = 1; |
2620e92a | 97 | static u32 prog_id; |
b35fc148 | 98 | static bool opt_busy_poll; |
3627d970 | 99 | static bool opt_reduced_cap; |
b4b8faa1 | 100 | |
2e8806f0 | 101 | struct xsk_ring_stats { |
b4b8faa1 MK |
102 | unsigned long rx_npkts; |
103 | unsigned long tx_npkts; | |
b36c3206 CL |
104 | unsigned long rx_dropped_npkts; |
105 | unsigned long rx_invalid_npkts; | |
106 | unsigned long tx_invalid_npkts; | |
107 | unsigned long rx_full_npkts; | |
108 | unsigned long rx_fill_empty_npkts; | |
109 | unsigned long tx_empty_npkts; | |
b4b8faa1 MK |
110 | unsigned long prev_rx_npkts; |
111 | unsigned long prev_tx_npkts; | |
b36c3206 CL |
112 | unsigned long prev_rx_dropped_npkts; |
113 | unsigned long prev_rx_invalid_npkts; | |
114 | unsigned long prev_tx_invalid_npkts; | |
115 | unsigned long prev_rx_full_npkts; | |
116 | unsigned long prev_rx_fill_empty_npkts; | |
117 | unsigned long prev_tx_empty_npkts; | |
2e8806f0 CL |
118 | }; |
119 | ||
67ed3755 CL |
120 | struct xsk_driver_stats { |
121 | unsigned long intrs; | |
122 | unsigned long prev_intrs; | |
123 | }; | |
124 | ||
60dc609d CL |
125 | struct xsk_app_stats { |
126 | unsigned long rx_empty_polls; | |
127 | unsigned long fill_fail_polls; | |
128 | unsigned long copy_tx_sendtos; | |
129 | unsigned long tx_wakeup_sendtos; | |
130 | unsigned long opt_polls; | |
131 | unsigned long prev_rx_empty_polls; | |
132 | unsigned long prev_fill_fail_polls; | |
133 | unsigned long prev_copy_tx_sendtos; | |
134 | unsigned long prev_tx_wakeup_sendtos; | |
135 | unsigned long prev_opt_polls; | |
136 | }; | |
137 | ||
2e8806f0 CL |
138 | struct xsk_umem_info { |
139 | struct xsk_ring_prod fq; | |
140 | struct xsk_ring_cons cq; | |
141 | struct xsk_umem *umem; | |
142 | void *buffer; | |
143 | }; | |
144 | ||
145 | struct xsk_socket_info { | |
146 | struct xsk_ring_cons rx; | |
147 | struct xsk_ring_prod tx; | |
148 | struct xsk_umem_info *umem; | |
149 | struct xsk_socket *xsk; | |
150 | struct xsk_ring_stats ring_stats; | |
60dc609d | 151 | struct xsk_app_stats app_stats; |
67ed3755 | 152 | struct xsk_driver_stats drv_stats; |
248c7f9c | 153 | u32 outstanding_tx; |
b4b8faa1 MK |
154 | }; |
155 | ||
b4b8faa1 | 156 | static int num_socks; |
248c7f9c | 157 | struct xsk_socket_info *xsks[MAX_SOCKS]; |
3627d970 | 158 | int sock; |
b4b8faa1 MK |
159 | |
160 | static unsigned long get_nsecs(void) | |
161 | { | |
162 | struct timespec ts; | |
163 | ||
164 | clock_gettime(CLOCK_MONOTONIC, &ts); | |
165 | return ts.tv_sec * 1000000000UL + ts.tv_nsec; | |
166 | } | |
167 | ||
248c7f9c | 168 | static void print_benchmark(bool running) |
b4b8faa1 | 169 | { |
248c7f9c | 170 | const char *bench_str = "INVALID"; |
b4b8faa1 | 171 | |
248c7f9c MK |
172 | if (opt_bench == BENCH_RXDROP) |
173 | bench_str = "rxdrop"; | |
174 | else if (opt_bench == BENCH_TXONLY) | |
175 | bench_str = "txonly"; | |
176 | else if (opt_bench == BENCH_L2FWD) | |
177 | bench_str = "l2fwd"; | |
b4b8faa1 | 178 | |
248c7f9c MK |
179 | printf("%s:%d %s ", opt_if, opt_queue, bench_str); |
180 | if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) | |
181 | printf("xdp-skb "); | |
182 | else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) | |
183 | printf("xdp-drv "); | |
184 | else | |
185 | printf(" "); | |
b4b8faa1 | 186 | |
248c7f9c MK |
187 | if (opt_poll) |
188 | printf("poll() "); | |
b4b8faa1 | 189 | |
248c7f9c MK |
190 | if (running) { |
191 | printf("running..."); | |
192 | fflush(stdout); | |
b4b8faa1 | 193 | } |
b4b8faa1 MK |
194 | } |
195 | ||
b36c3206 CL |
196 | static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) |
197 | { | |
198 | struct xdp_statistics stats; | |
199 | socklen_t optlen; | |
200 | int err; | |
201 | ||
202 | optlen = sizeof(stats); | |
203 | err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); | |
204 | if (err) | |
205 | return err; | |
206 | ||
207 | if (optlen == sizeof(struct xdp_statistics)) { | |
2e8806f0 CL |
208 | xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped; |
209 | xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs; | |
210 | xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs; | |
211 | xsk->ring_stats.rx_full_npkts = stats.rx_ring_full; | |
212 | xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; | |
213 | xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs; | |
b36c3206 CL |
214 | return 0; |
215 | } | |
216 | ||
217 | return -EINVAL; | |
218 | } | |
219 | ||
60dc609d CL |
220 | static void dump_app_stats(long dt) |
221 | { | |
222 | int i; | |
223 | ||
224 | for (i = 0; i < num_socks && xsks[i]; i++) { | |
225 | char *fmt = "%-18s %'-14.0f %'-14lu\n"; | |
226 | double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps, | |
227 | tx_wakeup_sendtos_ps, opt_polls_ps; | |
228 | ||
229 | rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls - | |
230 | xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt; | |
231 | fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls - | |
232 | xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt; | |
233 | copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos - | |
234 | xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt; | |
235 | tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos - | |
236 | xsks[i]->app_stats.prev_tx_wakeup_sendtos) | |
237 | * 1000000000. / dt; | |
238 | opt_polls_ps = (xsks[i]->app_stats.opt_polls - | |
239 | xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt; | |
240 | ||
241 | printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count"); | |
242 | printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls); | |
243 | printf(fmt, "fill fail polls", fill_fail_polls_ps, | |
244 | xsks[i]->app_stats.fill_fail_polls); | |
245 | printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps, | |
246 | xsks[i]->app_stats.copy_tx_sendtos); | |
247 | printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps, | |
248 | xsks[i]->app_stats.tx_wakeup_sendtos); | |
249 | printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls); | |
250 | ||
251 | xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls; | |
252 | xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls; | |
253 | xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos; | |
254 | xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos; | |
255 | xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls; | |
256 | } | |
257 | } | |
258 | ||
67ed3755 CL |
259 | static bool get_interrupt_number(void) |
260 | { | |
261 | FILE *f_int_proc; | |
262 | char line[4096]; | |
263 | bool found = false; | |
264 | ||
265 | f_int_proc = fopen("/proc/interrupts", "r"); | |
266 | if (f_int_proc == NULL) { | |
267 | printf("Failed to open /proc/interrupts.\n"); | |
268 | return found; | |
269 | } | |
270 | ||
271 | while (!feof(f_int_proc) && !found) { | |
272 | /* Make sure to read a full line at a time */ | |
273 | if (fgets(line, sizeof(line), f_int_proc) == NULL || | |
274 | line[strlen(line) - 1] != '\n') { | |
275 | printf("Error reading from interrupts file\n"); | |
276 | break; | |
277 | } | |
278 | ||
279 | /* Extract interrupt number from line */ | |
280 | if (strstr(line, opt_irq_str) != NULL) { | |
281 | irq_no = atoi(line); | |
282 | found = true; | |
283 | break; | |
284 | } | |
285 | } | |
286 | ||
287 | fclose(f_int_proc); | |
288 | ||
289 | return found; | |
290 | } | |
291 | ||
292 | static int get_irqs(void) | |
293 | { | |
294 | char count_path[PATH_MAX]; | |
295 | int total_intrs = -1; | |
296 | FILE *f_count_proc; | |
297 | char line[4096]; | |
298 | ||
299 | snprintf(count_path, sizeof(count_path), | |
300 | "/sys/kernel/irq/%i/per_cpu_count", irq_no); | |
301 | f_count_proc = fopen(count_path, "r"); | |
302 | if (f_count_proc == NULL) { | |
303 | printf("Failed to open %s\n", count_path); | |
304 | return total_intrs; | |
305 | } | |
306 | ||
307 | if (fgets(line, sizeof(line), f_count_proc) == NULL || | |
308 | line[strlen(line) - 1] != '\n') { | |
309 | printf("Error reading from %s\n", count_path); | |
310 | } else { | |
311 | static const char com[2] = ","; | |
312 | char *token; | |
313 | ||
314 | total_intrs = 0; | |
315 | token = strtok(line, com); | |
316 | while (token != NULL) { | |
317 | /* sum up interrupts across all cores */ | |
318 | total_intrs += atoi(token); | |
319 | token = strtok(NULL, com); | |
320 | } | |
321 | } | |
322 | ||
323 | fclose(f_count_proc); | |
324 | ||
325 | return total_intrs; | |
326 | } | |
327 | ||
328 | static void dump_driver_stats(long dt) | |
329 | { | |
330 | int i; | |
331 | ||
332 | for (i = 0; i < num_socks && xsks[i]; i++) { | |
333 | char *fmt = "%-18s %'-14.0f %'-14lu\n"; | |
334 | double intrs_ps; | |
335 | int n_ints = get_irqs(); | |
336 | ||
337 | if (n_ints < 0) { | |
338 | printf("error getting intr info for intr %i\n", irq_no); | |
339 | return; | |
340 | } | |
341 | xsks[i]->drv_stats.intrs = n_ints - irqs_at_init; | |
342 | ||
343 | intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) * | |
344 | 1000000000. / dt; | |
345 | ||
346 | printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count"); | |
347 | printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs); | |
348 | ||
349 | xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs; | |
350 | } | |
351 | } | |
352 | ||
248c7f9c | 353 | static void dump_stats(void) |
b4b8faa1 | 354 | { |
248c7f9c MK |
355 | unsigned long now = get_nsecs(); |
356 | long dt = now - prev_time; | |
357 | int i; | |
b4b8faa1 | 358 | |
248c7f9c | 359 | prev_time = now; |
b4b8faa1 | 360 | |
248c7f9c | 361 | for (i = 0; i < num_socks && xsks[i]; i++) { |
60dc609d | 362 | char *fmt = "%-18s %'-14.0f %'-14lu\n"; |
b36c3206 CL |
363 | double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, |
364 | tx_invalid_pps, tx_empty_pps; | |
b4b8faa1 | 365 | |
2e8806f0 | 366 | rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) * |
248c7f9c | 367 | 1000000000. / dt; |
2e8806f0 | 368 | tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) * |
248c7f9c | 369 | 1000000000. / dt; |
b4b8faa1 | 370 | |
248c7f9c MK |
371 | printf("\n sock%d@", i); |
372 | print_benchmark(false); | |
373 | printf("\n"); | |
b4b8faa1 | 374 | |
60dc609d | 375 | printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts", |
248c7f9c | 376 | dt / 1000000000.); |
2e8806f0 CL |
377 | printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts); |
378 | printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts); | |
b4b8faa1 | 379 | |
2e8806f0 CL |
380 | xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts; |
381 | xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts; | |
b36c3206 CL |
382 | |
383 | if (opt_extra_stats) { | |
384 | if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) { | |
2e8806f0 CL |
385 | dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts - |
386 | xsks[i]->ring_stats.prev_rx_dropped_npkts) * | |
387 | 1000000000. / dt; | |
388 | rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts - | |
389 | xsks[i]->ring_stats.prev_rx_invalid_npkts) * | |
390 | 1000000000. / dt; | |
391 | tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts - | |
392 | xsks[i]->ring_stats.prev_tx_invalid_npkts) * | |
393 | 1000000000. / dt; | |
394 | full_pps = (xsks[i]->ring_stats.rx_full_npkts - | |
395 | xsks[i]->ring_stats.prev_rx_full_npkts) * | |
396 | 1000000000. / dt; | |
397 | fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts - | |
398 | xsks[i]->ring_stats.prev_rx_fill_empty_npkts) * | |
399 | 1000000000. / dt; | |
400 | tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts - | |
401 | xsks[i]->ring_stats.prev_tx_empty_npkts) * | |
402 | 1000000000. / dt; | |
b36c3206 CL |
403 | |
404 | printf(fmt, "rx dropped", dropped_pps, | |
2e8806f0 | 405 | xsks[i]->ring_stats.rx_dropped_npkts); |
b36c3206 | 406 | printf(fmt, "rx invalid", rx_invalid_pps, |
2e8806f0 | 407 | xsks[i]->ring_stats.rx_invalid_npkts); |
b36c3206 | 408 | printf(fmt, "tx invalid", tx_invalid_pps, |
2e8806f0 | 409 | xsks[i]->ring_stats.tx_invalid_npkts); |
b36c3206 | 410 | printf(fmt, "rx queue full", full_pps, |
2e8806f0 | 411 | xsks[i]->ring_stats.rx_full_npkts); |
b36c3206 | 412 | printf(fmt, "fill ring empty", fill_empty_pps, |
2e8806f0 | 413 | xsks[i]->ring_stats.rx_fill_empty_npkts); |
b36c3206 | 414 | printf(fmt, "tx ring empty", tx_empty_pps, |
2e8806f0 CL |
415 | xsks[i]->ring_stats.tx_empty_npkts); |
416 | ||
417 | xsks[i]->ring_stats.prev_rx_dropped_npkts = | |
418 | xsks[i]->ring_stats.rx_dropped_npkts; | |
419 | xsks[i]->ring_stats.prev_rx_invalid_npkts = | |
420 | xsks[i]->ring_stats.rx_invalid_npkts; | |
421 | xsks[i]->ring_stats.prev_tx_invalid_npkts = | |
422 | xsks[i]->ring_stats.tx_invalid_npkts; | |
423 | xsks[i]->ring_stats.prev_rx_full_npkts = | |
424 | xsks[i]->ring_stats.rx_full_npkts; | |
425 | xsks[i]->ring_stats.prev_rx_fill_empty_npkts = | |
426 | xsks[i]->ring_stats.rx_fill_empty_npkts; | |
427 | xsks[i]->ring_stats.prev_tx_empty_npkts = | |
428 | xsks[i]->ring_stats.tx_empty_npkts; | |
b36c3206 CL |
429 | } else { |
430 | printf("%-15s\n", "Error retrieving extra stats"); | |
431 | } | |
432 | } | |
b4b8faa1 | 433 | } |
60dc609d CL |
434 | |
435 | if (opt_app_stats) | |
436 | dump_app_stats(dt); | |
67ed3755 CL |
437 | if (irq_no) |
438 | dump_driver_stats(dt); | |
b4b8faa1 MK |
439 | } |
440 | ||
d3f11b01 JJ |
441 | static bool is_benchmark_done(void) |
442 | { | |
443 | if (opt_duration > 0) { | |
444 | unsigned long dt = (get_nsecs() - start_time); | |
445 | ||
446 | if (dt >= opt_duration) | |
447 | benchmark_done = true; | |
448 | } | |
449 | return benchmark_done; | |
450 | } | |
451 | ||
248c7f9c | 452 | static void *poller(void *arg) |
b4b8faa1 | 453 | { |
248c7f9c | 454 | (void)arg; |
d3f11b01 | 455 | while (!is_benchmark_done()) { |
248c7f9c MK |
456 | sleep(opt_interval); |
457 | dump_stats(); | |
b4b8faa1 MK |
458 | } |
459 | ||
248c7f9c | 460 | return NULL; |
b4b8faa1 MK |
461 | } |
462 | ||
2620e92a WH |
463 | static void remove_xdp_program(void) |
464 | { | |
465 | u32 curr_prog_id = 0; | |
466 | ||
467 | if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { | |
468 | printf("bpf_get_link_xdp_id failed\n"); | |
469 | exit(EXIT_FAILURE); | |
470 | } | |
471 | ||
472 | if (prog_id == curr_prog_id) | |
473 | bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); | |
474 | else if (!curr_prog_id) | |
475 | printf("couldn't find a prog id on a given interface\n"); | |
476 | else | |
477 | printf("program on interface changed, not removing\n"); | |
478 | } | |
479 | ||
c9d27c9e | 480 | static void int_exit(int sig) |
b4b8faa1 | 481 | { |
c9d27c9e | 482 | benchmark_done = true; |
b4b8faa1 MK |
483 | } |
484 | ||
c9d27c9e MF |
485 | static void __exit_with_error(int error, const char *file, const char *func, |
486 | int line) | |
69525588 | 487 | { |
c9d27c9e MF |
488 | fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, |
489 | line, error, strerror(error)); | |
2620e92a WH |
490 | |
491 | if (opt_num_xsks > 1) | |
492 | remove_xdp_program(); | |
c9d27c9e | 493 | exit(EXIT_FAILURE); |
69525588 JJ |
494 | } |
495 | ||
c9d27c9e MF |
496 | #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__) |
497 | ||
69525588 | 498 | static void xdpsock_cleanup(void) |
b4b8faa1 | 499 | { |
248c7f9c | 500 | struct xsk_umem *umem = xsks[0]->umem->umem; |
c9d27c9e | 501 | int i, cmd = CLOSE_CONN; |
b4b8faa1 | 502 | |
248c7f9c | 503 | dump_stats(); |
2e5d72c1 MK |
504 | for (i = 0; i < num_socks; i++) |
505 | xsk_socket__delete(xsks[i]->xsk); | |
248c7f9c | 506 | (void)xsk_umem__delete(umem); |
b4b8faa1 | 507 | |
c9d27c9e MF |
508 | if (opt_reduced_cap) { |
509 | if (write(sock, &cmd, sizeof(int)) < 0) | |
510 | exit_with_error(errno); | |
511 | } | |
2620e92a WH |
512 | |
513 | if (opt_num_xsks > 1) | |
514 | remove_xdp_program(); | |
b4b8faa1 MK |
515 | } |
516 | ||
b4b8faa1 MK |
517 | static void swap_mac_addresses(void *data) |
518 | { | |
519 | struct ether_header *eth = (struct ether_header *)data; | |
520 | struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; | |
521 | struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; | |
522 | struct ether_addr tmp; | |
523 | ||
524 | tmp = *src_addr; | |
525 | *src_addr = *dst_addr; | |
526 | *dst_addr = tmp; | |
527 | } | |
528 | ||
a412ef54 | 529 | static void hex_dump(void *pkt, size_t length, u64 addr) |
b4b8faa1 | 530 | { |
b4b8faa1 MK |
531 | const unsigned char *address = (unsigned char *)pkt; |
532 | const unsigned char *line = address; | |
533 | size_t line_size = 32; | |
534 | unsigned char c; | |
a412ef54 BT |
535 | char buf[32]; |
536 | int i = 0; | |
b4b8faa1 | 537 | |
a412ef54 BT |
538 | if (!DEBUG_HEXDUMP) |
539 | return; | |
540 | ||
541 | sprintf(buf, "addr=%llu", addr); | |
b4b8faa1 | 542 | printf("length = %zu\n", length); |
a412ef54 | 543 | printf("%s | ", buf); |
b4b8faa1 MK |
544 | while (length-- > 0) { |
545 | printf("%02X ", *address++); | |
546 | if (!(++i % line_size) || (length == 0 && i % line_size)) { | |
547 | if (length == 0) { | |
548 | while (i++ % line_size) | |
549 | printf("__ "); | |
550 | } | |
551 | printf(" | "); /* right close */ | |
552 | while (line < address) { | |
553 | c = *line++; | |
554 | printf("%c", (c < 33 || c == 255) ? 0x2E : c); | |
555 | } | |
556 | printf("\n"); | |
557 | if (length > 0) | |
a412ef54 | 558 | printf("%s | ", buf); |
b4b8faa1 MK |
559 | } |
560 | } | |
561 | printf("\n"); | |
562 | } | |
b4b8faa1 | 563 | |
4a3c23ae JJ |
564 | static void *memset32_htonl(void *dest, u32 val, u32 size) |
565 | { | |
566 | u32 *ptr = (u32 *)dest; | |
567 | int i; | |
568 | ||
569 | val = htonl(val); | |
570 | ||
571 | for (i = 0; i < (size & (~0x3)); i += 4) | |
572 | ptr[i >> 2] = val; | |
573 | ||
574 | for (; i < size; i++) | |
575 | ((char *)dest)[i] = ((char *)&val)[i & 3]; | |
576 | ||
577 | return dest; | |
578 | } | |
579 | ||
580 | /* | |
581 | * This function code has been taken from | |
582 | * Linux kernel lib/checksum.c | |
583 | */ | |
584 | static inline unsigned short from32to16(unsigned int x) | |
585 | { | |
586 | /* add up 16-bit and 16-bit for 16+c bit */ | |
587 | x = (x & 0xffff) + (x >> 16); | |
588 | /* add up carry.. */ | |
589 | x = (x & 0xffff) + (x >> 16); | |
590 | return x; | |
591 | } | |
592 | ||
593 | /* | |
594 | * This function code has been taken from | |
595 | * Linux kernel lib/checksum.c | |
596 | */ | |
597 | static unsigned int do_csum(const unsigned char *buff, int len) | |
598 | { | |
599 | unsigned int result = 0; | |
600 | int odd; | |
601 | ||
602 | if (len <= 0) | |
603 | goto out; | |
604 | odd = 1 & (unsigned long)buff; | |
605 | if (odd) { | |
606 | #ifdef __LITTLE_ENDIAN | |
607 | result += (*buff << 8); | |
608 | #else | |
609 | result = *buff; | |
610 | #endif | |
611 | len--; | |
612 | buff++; | |
613 | } | |
614 | if (len >= 2) { | |
615 | if (2 & (unsigned long)buff) { | |
616 | result += *(unsigned short *)buff; | |
617 | len -= 2; | |
618 | buff += 2; | |
619 | } | |
620 | if (len >= 4) { | |
621 | const unsigned char *end = buff + | |
622 | ((unsigned int)len & ~3); | |
623 | unsigned int carry = 0; | |
624 | ||
625 | do { | |
626 | unsigned int w = *(unsigned int *)buff; | |
627 | ||
628 | buff += 4; | |
629 | result += carry; | |
630 | result += w; | |
631 | carry = (w > result); | |
632 | } while (buff < end); | |
633 | result += carry; | |
634 | result = (result & 0xffff) + (result >> 16); | |
635 | } | |
636 | if (len & 2) { | |
637 | result += *(unsigned short *)buff; | |
638 | buff += 2; | |
639 | } | |
640 | } | |
641 | if (len & 1) | |
642 | #ifdef __LITTLE_ENDIAN | |
643 | result += *buff; | |
644 | #else | |
645 | result += (*buff << 8); | |
646 | #endif | |
647 | result = from32to16(result); | |
648 | if (odd) | |
649 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | |
650 | out: | |
651 | return result; | |
652 | } | |
653 | ||
4a3c23ae JJ |
654 | /* |
655 | * This is a version of ip_compute_csum() optimized for IP headers, | |
656 | * which always checksum on 4 octet boundaries. | |
657 | * This function code has been taken from | |
658 | * Linux kernel lib/checksum.c | |
659 | */ | |
f4700a62 | 660 | static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) |
4a3c23ae | 661 | { |
29f24c43 | 662 | return (__sum16)~do_csum(iph, ihl * 4); |
4a3c23ae JJ |
663 | } |
664 | ||
665 | /* | |
666 | * Fold a partial checksum | |
667 | * This function code has been taken from | |
668 | * Linux kernel include/asm-generic/checksum.h | |
669 | */ | |
670 | static inline __sum16 csum_fold(__wsum csum) | |
671 | { | |
29f24c43 | 672 | u32 sum = (u32)csum; |
4a3c23ae JJ |
673 | |
674 | sum = (sum & 0xffff) + (sum >> 16); | |
675 | sum = (sum & 0xffff) + (sum >> 16); | |
29f24c43 | 676 | return (__sum16)~sum; |
4a3c23ae JJ |
677 | } |
678 | ||
679 | /* | |
680 | * This function code has been taken from | |
681 | * Linux kernel lib/checksum.c | |
682 | */ | |
683 | static inline u32 from64to32(u64 x) | |
684 | { | |
685 | /* add up 32-bit and 32-bit for 32+c bit */ | |
686 | x = (x & 0xffffffff) + (x >> 32); | |
687 | /* add up carry.. */ | |
688 | x = (x & 0xffffffff) + (x >> 32); | |
689 | return (u32)x; | |
690 | } | |
691 | ||
692 | __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, | |
693 | __u32 len, __u8 proto, __wsum sum); | |
694 | ||
695 | /* | |
696 | * This function code has been taken from | |
697 | * Linux kernel lib/checksum.c | |
698 | */ | |
699 | __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, | |
700 | __u32 len, __u8 proto, __wsum sum) | |
701 | { | |
29f24c43 | 702 | unsigned long long s = (u32)sum; |
4a3c23ae | 703 | |
29f24c43 NS |
704 | s += (u32)saddr; |
705 | s += (u32)daddr; | |
4a3c23ae JJ |
706 | #ifdef __BIG_ENDIAN__ |
707 | s += proto + len; | |
708 | #else | |
709 | s += (proto + len) << 8; | |
710 | #endif | |
29f24c43 | 711 | return (__wsum)from64to32(s); |
4a3c23ae JJ |
712 | } |
713 | ||
714 | /* | |
715 | * This function has been taken from | |
716 | * Linux kernel include/asm-generic/checksum.h | |
717 | */ | |
718 | static inline __sum16 | |
719 | csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, | |
720 | __u8 proto, __wsum sum) | |
721 | { | |
722 | return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); | |
723 | } | |
724 | ||
725 | static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, | |
726 | u8 proto, u16 *udp_pkt) | |
727 | { | |
728 | u32 csum = 0; | |
729 | u32 cnt = 0; | |
730 | ||
731 | /* udp hdr and data */ | |
732 | for (; cnt < len; cnt += 2) | |
733 | csum += udp_pkt[cnt >> 1]; | |
734 | ||
735 | return csum_tcpudp_magic(saddr, daddr, len, proto, csum); | |
736 | } | |
737 | ||
738 | #define ETH_FCS_SIZE 4 | |
739 | ||
740 | #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ | |
741 | sizeof(struct udphdr)) | |
742 | ||
743 | #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) | |
744 | #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) | |
745 | #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) | |
746 | #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) | |
747 | ||
748 | static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; | |
749 | ||
750 | static void gen_eth_hdr_data(void) | |
751 | { | |
752 | struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + | |
753 | sizeof(struct ethhdr) + | |
754 | sizeof(struct iphdr)); | |
755 | struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + | |
756 | sizeof(struct ethhdr)); | |
757 | struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; | |
758 | ||
759 | /* ethernet header */ | |
760 | memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); | |
761 | memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); | |
762 | eth_hdr->h_proto = htons(ETH_P_IP); | |
763 | ||
764 | /* IP header */ | |
765 | ip_hdr->version = IPVERSION; | |
766 | ip_hdr->ihl = 0x5; /* 20 byte header */ | |
767 | ip_hdr->tos = 0x0; | |
768 | ip_hdr->tot_len = htons(IP_PKT_SIZE); | |
769 | ip_hdr->id = 0; | |
770 | ip_hdr->frag_off = 0; | |
771 | ip_hdr->ttl = IPDEFTTL; | |
772 | ip_hdr->protocol = IPPROTO_UDP; | |
773 | ip_hdr->saddr = htonl(0x0a0a0a10); | |
774 | ip_hdr->daddr = htonl(0x0a0a0a20); | |
775 | ||
776 | /* IP header checksum */ | |
777 | ip_hdr->check = 0; | |
778 | ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); | |
779 | ||
780 | /* UDP header */ | |
781 | udp_hdr->source = htons(0x1000); | |
782 | udp_hdr->dest = htons(0x1000); | |
783 | udp_hdr->len = htons(UDP_PKT_SIZE); | |
784 | ||
785 | /* UDP data */ | |
46e3268e | 786 | memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, |
4a3c23ae JJ |
787 | UDP_PKT_DATA_SIZE); |
788 | ||
789 | /* UDP header checksum */ | |
790 | udp_hdr->check = 0; | |
791 | udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, | |
792 | IPPROTO_UDP, (u16 *)udp_hdr); | |
793 | } | |
794 | ||
cd9e72b6 | 795 | static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) |
b4b8faa1 | 796 | { |
248c7f9c | 797 | memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, |
4a3c23ae | 798 | PKT_SIZE); |
b4b8faa1 MK |
799 | } |
800 | ||
248c7f9c | 801 | static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) |
b4b8faa1 | 802 | { |
248c7f9c | 803 | struct xsk_umem_info *umem; |
123e8da1 | 804 | struct xsk_umem_config cfg = { |
c8a039a4 MK |
805 | /* We recommend that you set the fill ring size >= HW RX ring size + |
806 | * AF_XDP RX ring size. Make sure you fill up the fill ring | |
807 | * with buffers at regular intervals, and you will with this setting | |
808 | * avoid allocation failures in the driver. These are usually quite | |
809 | * expensive since drivers have not been written to assume that | |
810 | * allocation failures are common. For regular sockets, kernel | |
811 | * allocated memory is used that only runs out in OOM situations | |
812 | * that should be rare. | |
813 | */ | |
814 | .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, | |
123e8da1 MM |
815 | .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, |
816 | .frame_size = opt_xsk_frame_size, | |
817 | .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, | |
c543f546 | 818 | .flags = opt_umem_flags |
123e8da1 | 819 | }; |
661842c4 | 820 | int ret; |
b4b8faa1 MK |
821 | |
822 | umem = calloc(1, sizeof(*umem)); | |
248c7f9c MK |
823 | if (!umem) |
824 | exit_with_error(errno); | |
b4b8faa1 | 825 | |
248c7f9c | 826 | ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, |
123e8da1 | 827 | &cfg); |
248c7f9c MK |
828 | if (ret) |
829 | exit_with_error(-ret); | |
b4b8faa1 | 830 | |
661842c4 MK |
831 | umem->buffer = buffer; |
832 | return umem; | |
833 | } | |
834 | ||
835 | static void xsk_populate_fill_ring(struct xsk_umem_info *umem) | |
836 | { | |
837 | int ret, i; | |
838 | u32 idx; | |
839 | ||
2e5d72c1 | 840 | ret = xsk_ring_prod__reserve(&umem->fq, |
c8a039a4 MK |
841 | XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx); |
842 | if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2) | |
2e5d72c1 | 843 | exit_with_error(-ret); |
c8a039a4 | 844 | for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++) |
2e5d72c1 MK |
845 | *xsk_ring_prod__fill_addr(&umem->fq, idx++) = |
846 | i * opt_xsk_frame_size; | |
c8a039a4 | 847 | xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2); |
b4b8faa1 MK |
848 | } |
849 | ||
661842c4 MK |
850 | static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, |
851 | bool rx, bool tx) | |
b4b8faa1 | 852 | { |
248c7f9c MK |
853 | struct xsk_socket_config cfg; |
854 | struct xsk_socket_info *xsk; | |
661842c4 MK |
855 | struct xsk_ring_cons *rxr; |
856 | struct xsk_ring_prod *txr; | |
248c7f9c | 857 | int ret; |
b4b8faa1 MK |
858 | |
859 | xsk = calloc(1, sizeof(*xsk)); | |
248c7f9c MK |
860 | if (!xsk) |
861 | exit_with_error(errno); | |
862 | ||
863 | xsk->umem = umem; | |
864 | cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; | |
865 | cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; | |
3627d970 | 866 | if (opt_num_xsks > 1 || opt_reduced_cap) |
2e5d72c1 MK |
867 | cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; |
868 | else | |
869 | cfg.libbpf_flags = 0; | |
248c7f9c MK |
870 | cfg.xdp_flags = opt_xdp_flags; |
871 | cfg.bind_flags = opt_xdp_bind_flags; | |
2e5d72c1 | 872 | |
661842c4 MK |
873 | rxr = rx ? &xsk->rx : NULL; |
874 | txr = tx ? &xsk->tx : NULL; | |
875 | ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, | |
876 | rxr, txr, &cfg); | |
248c7f9c MK |
877 | if (ret) |
878 | exit_with_error(-ret); | |
879 | ||
2620e92a WH |
880 | ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); |
881 | if (ret) | |
882 | exit_with_error(-ret); | |
883 | ||
60dc609d CL |
884 | xsk->app_stats.rx_empty_polls = 0; |
885 | xsk->app_stats.fill_fail_polls = 0; | |
886 | xsk->app_stats.copy_tx_sendtos = 0; | |
887 | xsk->app_stats.tx_wakeup_sendtos = 0; | |
888 | xsk->app_stats.opt_polls = 0; | |
889 | xsk->app_stats.prev_rx_empty_polls = 0; | |
890 | xsk->app_stats.prev_fill_fail_polls = 0; | |
891 | xsk->app_stats.prev_copy_tx_sendtos = 0; | |
892 | xsk->app_stats.prev_tx_wakeup_sendtos = 0; | |
893 | xsk->app_stats.prev_opt_polls = 0; | |
894 | ||
b4b8faa1 MK |
895 | return xsk; |
896 | } | |
897 | ||
b4b8faa1 MK |
898 | static struct option long_options[] = { |
899 | {"rxdrop", no_argument, 0, 'r'}, | |
900 | {"txonly", no_argument, 0, 't'}, | |
901 | {"l2fwd", no_argument, 0, 'l'}, | |
902 | {"interface", required_argument, 0, 'i'}, | |
903 | {"queue", required_argument, 0, 'q'}, | |
904 | {"poll", no_argument, 0, 'p'}, | |
b4b8faa1 MK |
905 | {"xdp-skb", no_argument, 0, 'S'}, |
906 | {"xdp-native", no_argument, 0, 'N'}, | |
907 | {"interval", required_argument, 0, 'n'}, | |
58c50ae4 BT |
908 | {"zero-copy", no_argument, 0, 'z'}, |
909 | {"copy", no_argument, 0, 'c'}, | |
123e8da1 | 910 | {"frame-size", required_argument, 0, 'f'}, |
46738f73 | 911 | {"no-need-wakeup", no_argument, 0, 'm'}, |
c543f546 | 912 | {"unaligned", no_argument, 0, 'u'}, |
2e5d72c1 | 913 | {"shared-umem", no_argument, 0, 'M'}, |
b3133329 | 914 | {"force", no_argument, 0, 'F'}, |
d3f11b01 | 915 | {"duration", required_argument, 0, 'd'}, |
cd9e72b6 | 916 | {"batch-size", required_argument, 0, 'b'}, |
ece6e969 | 917 | {"tx-pkt-count", required_argument, 0, 'C'}, |
4a3c23ae | 918 | {"tx-pkt-size", required_argument, 0, 's'}, |
46e3268e | 919 | {"tx-pkt-pattern", required_argument, 0, 'P'}, |
b36c3206 | 920 | {"extra-stats", no_argument, 0, 'x'}, |
74e00676 | 921 | {"quiet", no_argument, 0, 'Q'}, |
60dc609d | 922 | {"app-stats", no_argument, 0, 'a'}, |
67ed3755 | 923 | {"irq-string", no_argument, 0, 'I'}, |
b35fc148 | 924 | {"busy-poll", no_argument, 0, 'B'}, |
3627d970 | 925 | {"reduce-cap", no_argument, 0, 'R'}, |
b4b8faa1 MK |
926 | {0, 0, 0, 0} |
927 | }; | |
928 | ||
929 | static void usage(const char *prog) | |
930 | { | |
931 | const char *str = | |
932 | " Usage: %s [OPTIONS]\n" | |
933 | " Options:\n" | |
934 | " -r, --rxdrop Discard all incoming packets (default)\n" | |
935 | " -t, --txonly Only send packets\n" | |
936 | " -l, --l2fwd MAC swap L2 forwarding\n" | |
937 | " -i, --interface=n Run on interface n\n" | |
938 | " -q, --queue=n Use queue n (default 0)\n" | |
939 | " -p, --poll Use poll syscall\n" | |
b4b8faa1 | 940 | " -S, --xdp-skb=n Use XDP skb-mod\n" |
4564a8bb | 941 | " -N, --xdp-native=n Enforce XDP native mode\n" |
b4b8faa1 | 942 | " -n, --interval=n Specify statistics update interval (default 1 sec).\n" |
58c50ae4 BT |
943 | " -z, --zero-copy Force zero-copy mode.\n" |
944 | " -c, --copy Force copy mode.\n" | |
46738f73 | 945 | " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" |
c543f546 KL |
946 | " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" |
947 | " -u, --unaligned Enable unaligned chunk placement\n" | |
3627d970 | 948 | " -M, --shared-umem Enable XDP_SHARED_UMEM (cannot be used with -R)\n" |
b3133329 | 949 | " -F, --force Force loading the XDP prog\n" |
d3f11b01 JJ |
950 | " -d, --duration=n Duration in secs to run command.\n" |
951 | " Default: forever.\n" | |
cd9e72b6 JJ |
952 | " -b, --batch-size=n Batch size for sending or receiving\n" |
953 | " packets. Default: %d\n" | |
ece6e969 JJ |
954 | " -C, --tx-pkt-count=n Number of packets to send.\n" |
955 | " Default: Continuous packets.\n" | |
4a3c23ae JJ |
956 | " -s, --tx-pkt-size=n Transmit packet size.\n" |
957 | " (Default: %d bytes)\n" | |
958 | " Min size: %d, Max size %d.\n" | |
46e3268e | 959 | " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" |
b36c3206 | 960 | " -x, --extra-stats Display extra statistics.\n" |
74e00676 | 961 | " -Q, --quiet Do not display any stats.\n" |
60dc609d | 962 | " -a, --app-stats Display application (syscall) statistics.\n" |
67ed3755 | 963 | " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n" |
b35fc148 | 964 | " -B, --busy-poll Busy poll.\n" |
3627d970 | 965 | " -R, --reduce-cap Use reduced capabilities (cannot be used with -M)\n" |
b4b8faa1 | 966 | "\n"; |
cd9e72b6 | 967 | fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, |
4a3c23ae | 968 | opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, |
46e3268e | 969 | XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); |
4a3c23ae | 970 | |
b4b8faa1 MK |
971 | exit(EXIT_FAILURE); |
972 | } | |
973 | ||
974 | static void parse_command_line(int argc, char **argv) | |
975 | { | |
976 | int option_index, c; | |
977 | ||
978 | opterr = 0; | |
979 | ||
980 | for (;;) { | |
3627d970 | 981 | c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR", |
46738f73 | 982 | long_options, &option_index); |
b4b8faa1 MK |
983 | if (c == -1) |
984 | break; | |
985 | ||
986 | switch (c) { | |
987 | case 'r': | |
988 | opt_bench = BENCH_RXDROP; | |
989 | break; | |
990 | case 't': | |
991 | opt_bench = BENCH_TXONLY; | |
992 | break; | |
993 | case 'l': | |
994 | opt_bench = BENCH_L2FWD; | |
995 | break; | |
996 | case 'i': | |
997 | opt_if = optarg; | |
998 | break; | |
999 | case 'q': | |
1000 | opt_queue = atoi(optarg); | |
1001 | break; | |
b4b8faa1 MK |
1002 | case 'p': |
1003 | opt_poll = 1; | |
1004 | break; | |
1005 | case 'S': | |
1006 | opt_xdp_flags |= XDP_FLAGS_SKB_MODE; | |
9f5232cc | 1007 | opt_xdp_bind_flags |= XDP_COPY; |
b4b8faa1 MK |
1008 | break; |
1009 | case 'N': | |
d50ecc46 | 1010 | /* default, set below */ |
b4b8faa1 MK |
1011 | break; |
1012 | case 'n': | |
1013 | opt_interval = atoi(optarg); | |
1014 | break; | |
58c50ae4 BT |
1015 | case 'z': |
1016 | opt_xdp_bind_flags |= XDP_ZEROCOPY; | |
1017 | break; | |
1018 | case 'c': | |
1019 | opt_xdp_bind_flags |= XDP_COPY; | |
1020 | break; | |
c543f546 KL |
1021 | case 'u': |
1022 | opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; | |
1023 | opt_unaligned_chunks = 1; | |
3945b37a | 1024 | opt_mmap_flags = MAP_HUGETLB; |
c543f546 | 1025 | break; |
743e568c MF |
1026 | case 'F': |
1027 | opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; | |
1028 | break; | |
123e8da1 MM |
1029 | case 'f': |
1030 | opt_xsk_frame_size = atoi(optarg); | |
2e5d72c1 | 1031 | break; |
46738f73 MK |
1032 | case 'm': |
1033 | opt_need_wakeup = false; | |
1034 | opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; | |
123e8da1 | 1035 | break; |
2e5d72c1 MK |
1036 | case 'M': |
1037 | opt_num_xsks = MAX_SOCKS; | |
1038 | break; | |
d3f11b01 JJ |
1039 | case 'd': |
1040 | opt_duration = atoi(optarg); | |
1041 | opt_duration *= 1000000000; | |
1042 | break; | |
cd9e72b6 JJ |
1043 | case 'b': |
1044 | opt_batch_size = atoi(optarg); | |
1045 | break; | |
ece6e969 JJ |
1046 | case 'C': |
1047 | opt_pkt_count = atoi(optarg); | |
1048 | break; | |
4a3c23ae JJ |
1049 | case 's': |
1050 | opt_pkt_size = atoi(optarg); | |
1051 | if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) || | |
1052 | opt_pkt_size < MIN_PKT_SIZE) { | |
1053 | fprintf(stderr, | |
1054 | "ERROR: Invalid frame size %d\n", | |
1055 | opt_pkt_size); | |
1056 | usage(basename(argv[0])); | |
1057 | } | |
1058 | break; | |
46e3268e JJ |
1059 | case 'P': |
1060 | opt_pkt_fill_pattern = strtol(optarg, NULL, 16); | |
1061 | break; | |
b36c3206 CL |
1062 | case 'x': |
1063 | opt_extra_stats = 1; | |
1064 | break; | |
74e00676 MK |
1065 | case 'Q': |
1066 | opt_quiet = 1; | |
1067 | break; | |
60dc609d CL |
1068 | case 'a': |
1069 | opt_app_stats = 1; | |
67ed3755 CL |
1070 | break; |
1071 | case 'I': | |
1072 | opt_irq_str = optarg; | |
1073 | if (get_interrupt_number()) | |
1074 | irqs_at_init = get_irqs(); | |
1075 | if (irqs_at_init < 0) { | |
1076 | fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str); | |
1077 | usage(basename(argv[0])); | |
1078 | } | |
b35fc148 BT |
1079 | break; |
1080 | case 'B': | |
1081 | opt_busy_poll = 1; | |
60dc609d | 1082 | break; |
3627d970 MD |
1083 | case 'R': |
1084 | opt_reduced_cap = true; | |
1085 | break; | |
b4b8faa1 MK |
1086 | default: |
1087 | usage(basename(argv[0])); | |
1088 | } | |
1089 | } | |
1090 | ||
d50ecc46 THJ |
1091 | if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE)) |
1092 | opt_xdp_flags |= XDP_FLAGS_DRV_MODE; | |
1093 | ||
b4b8faa1 MK |
1094 | opt_ifindex = if_nametoindex(opt_if); |
1095 | if (!opt_ifindex) { | |
1096 | fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", | |
1097 | opt_if); | |
1098 | usage(basename(argv[0])); | |
1099 | } | |
248c7f9c | 1100 | |
c543f546 KL |
1101 | if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) && |
1102 | !opt_unaligned_chunks) { | |
123e8da1 MM |
1103 | fprintf(stderr, "--frame-size=%d is not a power of two\n", |
1104 | opt_xsk_frame_size); | |
1105 | usage(basename(argv[0])); | |
1106 | } | |
3627d970 MD |
1107 | |
1108 | if (opt_reduced_cap && opt_num_xsks > 1) { | |
1109 | fprintf(stderr, "ERROR: -M and -R cannot be used together\n"); | |
1110 | usage(basename(argv[0])); | |
1111 | } | |
b4b8faa1 MK |
1112 | } |
1113 | ||
248c7f9c | 1114 | static void kick_tx(struct xsk_socket_info *xsk) |
b4b8faa1 MK |
1115 | { |
1116 | int ret; | |
1117 | ||
248c7f9c | 1118 | ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); |
8ed47e14 MF |
1119 | if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || |
1120 | errno == EBUSY || errno == ENETDOWN) | |
b4b8faa1 | 1121 | return; |
248c7f9c | 1122 | exit_with_error(errno); |
b4b8faa1 MK |
1123 | } |
1124 | ||
284cbc61 | 1125 | static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) |
b4b8faa1 | 1126 | { |
03895e63 | 1127 | struct xsk_umem_info *umem = xsk->umem; |
b74e21ab | 1128 | u32 idx_cq = 0, idx_fq = 0; |
b4b8faa1 MK |
1129 | unsigned int rcvd; |
1130 | size_t ndescs; | |
1131 | ||
1132 | if (!xsk->outstanding_tx) | |
1133 | return; | |
1134 | ||
3131cf66 MK |
1135 | /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to |
1136 | * really send the packets. In zero-copy mode we do not have to do this, since Tx | |
1137 | * is driven by the NAPI loop. So as an optimization, we do not have to call | |
1138 | * sendto() all the time in zero-copy mode for l2fwd. | |
1139 | */ | |
60dc609d CL |
1140 | if (opt_xdp_bind_flags & XDP_COPY) { |
1141 | xsk->app_stats.copy_tx_sendtos++; | |
3131cf66 | 1142 | kick_tx(xsk); |
60dc609d | 1143 | } |
3131cf66 | 1144 | |
cd9e72b6 | 1145 | ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : |
248c7f9c | 1146 | xsk->outstanding_tx; |
b4b8faa1 MK |
1147 | |
1148 | /* re-add completed Tx buffers */ | |
03895e63 | 1149 | rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq); |
b4b8faa1 | 1150 | if (rcvd > 0) { |
248c7f9c MK |
1151 | unsigned int i; |
1152 | int ret; | |
1153 | ||
03895e63 | 1154 | ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); |
248c7f9c MK |
1155 | while (ret != rcvd) { |
1156 | if (ret < 0) | |
1157 | exit_with_error(-ret); | |
b35fc148 | 1158 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&umem->fq)) { |
60dc609d | 1159 | xsk->app_stats.fill_fail_polls++; |
284cbc61 BT |
1160 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, |
1161 | NULL); | |
60dc609d | 1162 | } |
03895e63 | 1163 | ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); |
248c7f9c | 1164 | } |
03895e63 | 1165 | |
248c7f9c | 1166 | for (i = 0; i < rcvd; i++) |
03895e63 KL |
1167 | *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = |
1168 | *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++); | |
248c7f9c MK |
1169 | |
1170 | xsk_ring_prod__submit(&xsk->umem->fq, rcvd); | |
1171 | xsk_ring_cons__release(&xsk->umem->cq, rcvd); | |
b4b8faa1 | 1172 | xsk->outstanding_tx -= rcvd; |
b4b8faa1 MK |
1173 | } |
1174 | } | |
1175 | ||
ece6e969 JJ |
1176 | static inline void complete_tx_only(struct xsk_socket_info *xsk, |
1177 | int batch_size) | |
b4b8faa1 | 1178 | { |
b4b8faa1 | 1179 | unsigned int rcvd; |
248c7f9c | 1180 | u32 idx; |
b4b8faa1 MK |
1181 | |
1182 | if (!xsk->outstanding_tx) | |
1183 | return; | |
1184 | ||
60dc609d CL |
1185 | if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) { |
1186 | xsk->app_stats.tx_wakeup_sendtos++; | |
46738f73 | 1187 | kick_tx(xsk); |
60dc609d | 1188 | } |
b4b8faa1 | 1189 | |
ece6e969 | 1190 | rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); |
b4b8faa1 | 1191 | if (rcvd > 0) { |
248c7f9c | 1192 | xsk_ring_cons__release(&xsk->umem->cq, rcvd); |
b4b8faa1 | 1193 | xsk->outstanding_tx -= rcvd; |
b4b8faa1 MK |
1194 | } |
1195 | } | |
1196 | ||
f2d27282 | 1197 | static void rx_drop(struct xsk_socket_info *xsk) |
b4b8faa1 | 1198 | { |
b4b8faa1 | 1199 | unsigned int rcvd, i; |
b74e21ab | 1200 | u32 idx_rx = 0, idx_fq = 0; |
248c7f9c | 1201 | int ret; |
b4b8faa1 | 1202 | |
cd9e72b6 | 1203 | rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); |
46738f73 | 1204 | if (!rcvd) { |
b35fc148 | 1205 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { |
60dc609d | 1206 | xsk->app_stats.rx_empty_polls++; |
f2d27282 | 1207 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); |
60dc609d | 1208 | } |
b4b8faa1 | 1209 | return; |
46738f73 | 1210 | } |
b4b8faa1 | 1211 | |
248c7f9c MK |
1212 | ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); |
1213 | while (ret != rcvd) { | |
1214 | if (ret < 0) | |
1215 | exit_with_error(-ret); | |
b35fc148 | 1216 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { |
60dc609d | 1217 | xsk->app_stats.fill_fail_polls++; |
f2d27282 | 1218 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); |
60dc609d | 1219 | } |
248c7f9c MK |
1220 | ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); |
1221 | } | |
1222 | ||
b4b8faa1 | 1223 | for (i = 0; i < rcvd; i++) { |
248c7f9c MK |
1224 | u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; |
1225 | u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; | |
03895e63 KL |
1226 | u64 orig = xsk_umem__extract_addr(addr); |
1227 | ||
1228 | addr = xsk_umem__add_offset_to_addr(addr); | |
248c7f9c | 1229 | char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); |
b4b8faa1 | 1230 | |
248c7f9c | 1231 | hex_dump(pkt, len, addr); |
03895e63 | 1232 | *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; |
b4b8faa1 MK |
1233 | } |
1234 | ||
248c7f9c MK |
1235 | xsk_ring_prod__submit(&xsk->umem->fq, rcvd); |
1236 | xsk_ring_cons__release(&xsk->rx, rcvd); | |
2e8806f0 | 1237 | xsk->ring_stats.rx_npkts += rcvd; |
b4b8faa1 MK |
1238 | } |
1239 | ||
1240 | static void rx_drop_all(void) | |
1241 | { | |
2e5d72c1 | 1242 | struct pollfd fds[MAX_SOCKS] = {}; |
46738f73 | 1243 | int i, ret; |
b4b8faa1 | 1244 | |
b4b8faa1 | 1245 | for (i = 0; i < num_socks; i++) { |
248c7f9c | 1246 | fds[i].fd = xsk_socket__fd(xsks[i]->xsk); |
b4b8faa1 | 1247 | fds[i].events = POLLIN; |
b4b8faa1 MK |
1248 | } |
1249 | ||
1250 | for (;;) { | |
1251 | if (opt_poll) { | |
60dc609d CL |
1252 | for (i = 0; i < num_socks; i++) |
1253 | xsks[i]->app_stats.opt_polls++; | |
46738f73 | 1254 | ret = poll(fds, num_socks, opt_timeout); |
b4b8faa1 MK |
1255 | if (ret <= 0) |
1256 | continue; | |
1257 | } | |
1258 | ||
1259 | for (i = 0; i < num_socks; i++) | |
f2d27282 | 1260 | rx_drop(xsks[i]); |
d3f11b01 JJ |
1261 | |
1262 | if (benchmark_done) | |
1263 | break; | |
46738f73 MK |
1264 | } |
1265 | } | |
1266 | ||
b69e56cf | 1267 | static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) |
46738f73 MK |
1268 | { |
1269 | u32 idx; | |
cd9e72b6 | 1270 | unsigned int i; |
46738f73 | 1271 | |
ece6e969 JJ |
1272 | while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < |
1273 | batch_size) { | |
1274 | complete_tx_only(xsk, batch_size); | |
092fde0f MK |
1275 | if (benchmark_done) |
1276 | return; | |
cd9e72b6 | 1277 | } |
46738f73 | 1278 | |
ece6e969 | 1279 | for (i = 0; i < batch_size; i++) { |
cd9e72b6 JJ |
1280 | struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, |
1281 | idx + i); | |
3b80d106 | 1282 | tx_desc->addr = (*frame_nb + i) * opt_xsk_frame_size; |
4a3c23ae | 1283 | tx_desc->len = PKT_SIZE; |
b4b8faa1 | 1284 | } |
46738f73 | 1285 | |
ece6e969 | 1286 | xsk_ring_prod__submit(&xsk->tx, batch_size); |
90da4b32 | 1287 | xsk->ring_stats.tx_npkts += batch_size; |
ece6e969 | 1288 | xsk->outstanding_tx += batch_size; |
b69e56cf WJ |
1289 | *frame_nb += batch_size; |
1290 | *frame_nb %= NUM_FRAMES; | |
ece6e969 JJ |
1291 | complete_tx_only(xsk, batch_size); |
1292 | } | |
1293 | ||
1294 | static inline int get_batch_size(int pkt_cnt) | |
1295 | { | |
1296 | if (!opt_pkt_count) | |
1297 | return opt_batch_size; | |
1298 | ||
1299 | if (pkt_cnt + opt_batch_size <= opt_pkt_count) | |
1300 | return opt_batch_size; | |
1301 | ||
1302 | return opt_pkt_count - pkt_cnt; | |
1303 | } | |
1304 | ||
1305 | static void complete_tx_only_all(void) | |
1306 | { | |
1307 | bool pending; | |
1308 | int i; | |
1309 | ||
1310 | do { | |
1311 | pending = false; | |
1312 | for (i = 0; i < num_socks; i++) { | |
1313 | if (xsks[i]->outstanding_tx) { | |
1314 | complete_tx_only(xsks[i], opt_batch_size); | |
1315 | pending = !!xsks[i]->outstanding_tx; | |
1316 | } | |
1317 | } | |
1318 | } while (pending); | |
b4b8faa1 MK |
1319 | } |
1320 | ||
46738f73 | 1321 | static void tx_only_all(void) |
b4b8faa1 | 1322 | { |
2e5d72c1 | 1323 | struct pollfd fds[MAX_SOCKS] = {}; |
46738f73 | 1324 | u32 frame_nb[MAX_SOCKS] = {}; |
ece6e969 | 1325 | int pkt_cnt = 0; |
46738f73 | 1326 | int i, ret; |
b4b8faa1 | 1327 | |
46738f73 MK |
1328 | for (i = 0; i < num_socks; i++) { |
1329 | fds[0].fd = xsk_socket__fd(xsks[i]->xsk); | |
1330 | fds[0].events = POLLOUT; | |
1331 | } | |
b4b8faa1 | 1332 | |
ece6e969 JJ |
1333 | while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { |
1334 | int batch_size = get_batch_size(pkt_cnt); | |
1335 | ||
b4b8faa1 | 1336 | if (opt_poll) { |
60dc609d CL |
1337 | for (i = 0; i < num_socks; i++) |
1338 | xsks[i]->app_stats.opt_polls++; | |
46738f73 | 1339 | ret = poll(fds, num_socks, opt_timeout); |
b4b8faa1 MK |
1340 | if (ret <= 0) |
1341 | continue; | |
1342 | ||
248c7f9c | 1343 | if (!(fds[0].revents & POLLOUT)) |
b4b8faa1 MK |
1344 | continue; |
1345 | } | |
1346 | ||
46738f73 | 1347 | for (i = 0; i < num_socks; i++) |
b69e56cf | 1348 | tx_only(xsks[i], &frame_nb[i], batch_size); |
ece6e969 JJ |
1349 | |
1350 | pkt_cnt += batch_size; | |
d3f11b01 JJ |
1351 | |
1352 | if (benchmark_done) | |
1353 | break; | |
b4b8faa1 | 1354 | } |
ece6e969 JJ |
1355 | |
1356 | if (opt_pkt_count) | |
1357 | complete_tx_only_all(); | |
b4b8faa1 MK |
1358 | } |
1359 | ||
284cbc61 | 1360 | static void l2fwd(struct xsk_socket_info *xsk) |
b4b8faa1 | 1361 | { |
46738f73 MK |
1362 | unsigned int rcvd, i; |
1363 | u32 idx_rx = 0, idx_tx = 0; | |
1364 | int ret; | |
b4b8faa1 | 1365 | |
284cbc61 | 1366 | complete_tx_l2fwd(xsk); |
b4b8faa1 | 1367 | |
cd9e72b6 | 1368 | rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); |
46738f73 | 1369 | if (!rcvd) { |
b35fc148 | 1370 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { |
60dc609d | 1371 | xsk->app_stats.rx_empty_polls++; |
284cbc61 | 1372 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); |
60dc609d | 1373 | } |
46738f73 MK |
1374 | return; |
1375 | } | |
90da4b32 | 1376 | xsk->ring_stats.rx_npkts += rcvd; |
b4b8faa1 | 1377 | |
46738f73 MK |
1378 | ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); |
1379 | while (ret != rcvd) { | |
1380 | if (ret < 0) | |
1381 | exit_with_error(-ret); | |
284cbc61 | 1382 | complete_tx_l2fwd(xsk); |
b35fc148 | 1383 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->tx)) { |
60dc609d | 1384 | xsk->app_stats.tx_wakeup_sendtos++; |
46738f73 | 1385 | kick_tx(xsk); |
60dc609d | 1386 | } |
248c7f9c | 1387 | ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); |
46738f73 MK |
1388 | } |
1389 | ||
1390 | for (i = 0; i < rcvd; i++) { | |
1391 | u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; | |
1392 | u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; | |
5a712e13 | 1393 | u64 orig = addr; |
03895e63 KL |
1394 | |
1395 | addr = xsk_umem__add_offset_to_addr(addr); | |
46738f73 MK |
1396 | char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); |
1397 | ||
1398 | swap_mac_addresses(pkt); | |
248c7f9c | 1399 | |
46738f73 | 1400 | hex_dump(pkt, len, addr); |
03895e63 | 1401 | xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig; |
46738f73 MK |
1402 | xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; |
1403 | } | |
b4b8faa1 | 1404 | |
46738f73 MK |
1405 | xsk_ring_prod__submit(&xsk->tx, rcvd); |
1406 | xsk_ring_cons__release(&xsk->rx, rcvd); | |
b4b8faa1 | 1407 | |
90da4b32 | 1408 | xsk->ring_stats.tx_npkts += rcvd; |
46738f73 MK |
1409 | xsk->outstanding_tx += rcvd; |
1410 | } | |
1411 | ||
1412 | static void l2fwd_all(void) | |
1413 | { | |
2e5d72c1 | 1414 | struct pollfd fds[MAX_SOCKS] = {}; |
46738f73 MK |
1415 | int i, ret; |
1416 | ||
46738f73 MK |
1417 | for (;;) { |
1418 | if (opt_poll) { | |
284cbc61 BT |
1419 | for (i = 0; i < num_socks; i++) { |
1420 | fds[i].fd = xsk_socket__fd(xsks[i]->xsk); | |
1421 | fds[i].events = POLLOUT | POLLIN; | |
60dc609d | 1422 | xsks[i]->app_stats.opt_polls++; |
284cbc61 | 1423 | } |
46738f73 MK |
1424 | ret = poll(fds, num_socks, opt_timeout); |
1425 | if (ret <= 0) | |
1426 | continue; | |
1427 | } | |
b4b8faa1 | 1428 | |
46738f73 | 1429 | for (i = 0; i < num_socks; i++) |
284cbc61 | 1430 | l2fwd(xsks[i]); |
d3f11b01 JJ |
1431 | |
1432 | if (benchmark_done) | |
1433 | break; | |
b4b8faa1 MK |
1434 | } |
1435 | } | |
1436 | ||
2e5d72c1 MK |
1437 | static void load_xdp_program(char **argv, struct bpf_object **obj) |
1438 | { | |
1439 | struct bpf_prog_load_attr prog_load_attr = { | |
1440 | .prog_type = BPF_PROG_TYPE_XDP, | |
1441 | }; | |
1442 | char xdp_filename[256]; | |
1443 | int prog_fd; | |
1444 | ||
1445 | snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); | |
1446 | prog_load_attr.file = xdp_filename; | |
1447 | ||
1448 | if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd)) | |
1449 | exit(EXIT_FAILURE); | |
1450 | if (prog_fd < 0) { | |
1451 | fprintf(stderr, "ERROR: no program found: %s\n", | |
1452 | strerror(prog_fd)); | |
1453 | exit(EXIT_FAILURE); | |
1454 | } | |
1455 | ||
1456 | if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { | |
1457 | fprintf(stderr, "ERROR: link set xdp fd failed\n"); | |
1458 | exit(EXIT_FAILURE); | |
1459 | } | |
1460 | } | |
1461 | ||
1462 | static void enter_xsks_into_map(struct bpf_object *obj) | |
1463 | { | |
1464 | struct bpf_map *map; | |
1465 | int i, xsks_map; | |
1466 | ||
1467 | map = bpf_object__find_map_by_name(obj, "xsks_map"); | |
1468 | xsks_map = bpf_map__fd(map); | |
1469 | if (xsks_map < 0) { | |
1470 | fprintf(stderr, "ERROR: no xsks map found: %s\n", | |
1471 | strerror(xsks_map)); | |
1472 | exit(EXIT_FAILURE); | |
1473 | } | |
1474 | ||
1475 | for (i = 0; i < num_socks; i++) { | |
1476 | int fd = xsk_socket__fd(xsks[i]->xsk); | |
1477 | int key, ret; | |
1478 | ||
1479 | key = i; | |
1480 | ret = bpf_map_update_elem(xsks_map, &key, &fd, 0); | |
1481 | if (ret) { | |
1482 | fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); | |
1483 | exit(EXIT_FAILURE); | |
1484 | } | |
1485 | } | |
1486 | } | |
1487 | ||
b35fc148 BT |
1488 | static void apply_setsockopt(struct xsk_socket_info *xsk) |
1489 | { | |
1490 | int sock_opt; | |
1491 | ||
1492 | if (!opt_busy_poll) | |
1493 | return; | |
1494 | ||
1495 | sock_opt = 1; | |
1496 | if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, | |
1497 | (void *)&sock_opt, sizeof(sock_opt)) < 0) | |
1498 | exit_with_error(errno); | |
1499 | ||
1500 | sock_opt = 20; | |
1501 | if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, | |
1502 | (void *)&sock_opt, sizeof(sock_opt)) < 0) | |
1503 | exit_with_error(errno); | |
41bf900f BT |
1504 | |
1505 | sock_opt = opt_batch_size; | |
1506 | if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, | |
1507 | (void *)&sock_opt, sizeof(sock_opt)) < 0) | |
1508 | exit_with_error(errno); | |
b35fc148 BT |
1509 | } |
1510 | ||
3627d970 MD |
1511 | static int recv_xsks_map_fd_from_ctrl_node(int sock, int *_fd) |
1512 | { | |
1513 | char cms[CMSG_SPACE(sizeof(int))]; | |
1514 | struct cmsghdr *cmsg; | |
1515 | struct msghdr msg; | |
1516 | struct iovec iov; | |
1517 | int value; | |
1518 | int len; | |
1519 | ||
1520 | iov.iov_base = &value; | |
1521 | iov.iov_len = sizeof(int); | |
1522 | ||
1523 | msg.msg_name = 0; | |
1524 | msg.msg_namelen = 0; | |
1525 | msg.msg_iov = &iov; | |
1526 | msg.msg_iovlen = 1; | |
1527 | msg.msg_flags = 0; | |
1528 | msg.msg_control = (caddr_t)cms; | |
1529 | msg.msg_controllen = sizeof(cms); | |
1530 | ||
1531 | len = recvmsg(sock, &msg, 0); | |
1532 | ||
1533 | if (len < 0) { | |
1534 | fprintf(stderr, "Recvmsg failed length incorrect.\n"); | |
1535 | return -EINVAL; | |
1536 | } | |
1537 | ||
1538 | if (len == 0) { | |
1539 | fprintf(stderr, "Recvmsg failed no data\n"); | |
1540 | return -EINVAL; | |
1541 | } | |
1542 | ||
1543 | cmsg = CMSG_FIRSTHDR(&msg); | |
1544 | *_fd = *(int *)CMSG_DATA(cmsg); | |
1545 | ||
1546 | return 0; | |
1547 | } | |
1548 | ||
1549 | static int | |
1550 | recv_xsks_map_fd(int *xsks_map_fd) | |
1551 | { | |
1552 | struct sockaddr_un server; | |
1553 | int err; | |
1554 | ||
1555 | sock = socket(AF_UNIX, SOCK_STREAM, 0); | |
1556 | if (sock < 0) { | |
1557 | fprintf(stderr, "Error opening socket stream: %s", strerror(errno)); | |
1558 | return errno; | |
1559 | } | |
1560 | ||
1561 | server.sun_family = AF_UNIX; | |
1562 | strcpy(server.sun_path, SOCKET_NAME); | |
1563 | ||
1564 | if (connect(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) { | |
1565 | close(sock); | |
1566 | fprintf(stderr, "Error connecting stream socket: %s", strerror(errno)); | |
1567 | return errno; | |
1568 | } | |
1569 | ||
1570 | err = recv_xsks_map_fd_from_ctrl_node(sock, xsks_map_fd); | |
1571 | if (err) { | |
2faa7328 | 1572 | fprintf(stderr, "Error %d receiving fd\n", err); |
3627d970 MD |
1573 | return err; |
1574 | } | |
1575 | return 0; | |
1576 | } | |
1577 | ||
b4b8faa1 MK |
1578 | int main(int argc, char **argv) |
1579 | { | |
3627d970 MD |
1580 | struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 }; |
1581 | struct __user_cap_data_struct data[2] = { { 0 } }; | |
1582 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | |
661842c4 | 1583 | bool rx = false, tx = false; |
248c7f9c | 1584 | struct xsk_umem_info *umem; |
2e5d72c1 | 1585 | struct bpf_object *obj; |
3627d970 | 1586 | int xsks_map_fd = 0; |
b4b8faa1 | 1587 | pthread_t pt; |
2e5d72c1 | 1588 | int i, ret; |
248c7f9c | 1589 | void *bufs; |
b4b8faa1 MK |
1590 | |
1591 | parse_command_line(argc, argv); | |
1592 | ||
3627d970 MD |
1593 | if (opt_reduced_cap) { |
1594 | if (capget(&hdr, data) < 0) | |
1595 | fprintf(stderr, "Error getting capabilities\n"); | |
1596 | ||
1597 | data->effective &= CAP_TO_MASK(CAP_NET_RAW); | |
1598 | data->permitted &= CAP_TO_MASK(CAP_NET_RAW); | |
1599 | ||
1600 | if (capset(&hdr, data) < 0) | |
1601 | fprintf(stderr, "Setting capabilities failed\n"); | |
1602 | ||
1603 | if (capget(&hdr, data) < 0) { | |
1604 | fprintf(stderr, "Error getting capabilities\n"); | |
1605 | } else { | |
1606 | fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n", | |
1607 | data[0].effective, data[0].inheritable, data[0].permitted); | |
1608 | fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n", | |
1609 | data[1].effective, data[1].inheritable, data[1].permitted); | |
1610 | } | |
1611 | } else { | |
1612 | if (setrlimit(RLIMIT_MEMLOCK, &r)) { | |
1613 | fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", | |
1614 | strerror(errno)); | |
1615 | exit(EXIT_FAILURE); | |
1616 | } | |
1617 | ||
1618 | if (opt_num_xsks > 1) | |
1619 | load_xdp_program(argv, &obj); | |
1620 | } | |
2e5d72c1 | 1621 | |
3945b37a KL |
1622 | /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ |
1623 | bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, | |
1624 | PROT_READ | PROT_WRITE, | |
1625 | MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0); | |
1626 | if (bufs == MAP_FAILED) { | |
1627 | printf("ERROR: mmap failed\n"); | |
1628 | exit(EXIT_FAILURE); | |
1629 | } | |
2e5d72c1 MK |
1630 | |
1631 | /* Create sockets... */ | |
123e8da1 | 1632 | umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); |
661842c4 MK |
1633 | if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) { |
1634 | rx = true; | |
1635 | xsk_populate_fill_ring(umem); | |
1636 | } | |
1637 | if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY) | |
1638 | tx = true; | |
2e5d72c1 | 1639 | for (i = 0; i < opt_num_xsks; i++) |
661842c4 | 1640 | xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); |
b4b8faa1 | 1641 | |
b35fc148 BT |
1642 | for (i = 0; i < opt_num_xsks; i++) |
1643 | apply_setsockopt(xsks[i]); | |
1644 | ||
4a3c23ae JJ |
1645 | if (opt_bench == BENCH_TXONLY) { |
1646 | gen_eth_hdr_data(); | |
1647 | ||
661842c4 MK |
1648 | for (i = 0; i < NUM_FRAMES; i++) |
1649 | gen_eth_frame(umem, i * opt_xsk_frame_size); | |
4a3c23ae | 1650 | } |
b4b8faa1 | 1651 | |
2e5d72c1 MK |
1652 | if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) |
1653 | enter_xsks_into_map(obj); | |
b4b8faa1 | 1654 | |
3627d970 MD |
1655 | if (opt_reduced_cap) { |
1656 | ret = recv_xsks_map_fd(&xsks_map_fd); | |
1657 | if (ret) { | |
1658 | fprintf(stderr, "Error %d receiving xsks_map_fd\n", ret); | |
1659 | exit_with_error(ret); | |
1660 | } | |
1661 | if (xsks[0]->xsk) { | |
1662 | ret = xsk_socket__update_xskmap(xsks[0]->xsk, xsks_map_fd); | |
1663 | if (ret) { | |
1664 | fprintf(stderr, "Update of BPF map failed(%d)\n", ret); | |
1665 | exit_with_error(ret); | |
1666 | } | |
1667 | } | |
1668 | } | |
1669 | ||
b4b8faa1 MK |
1670 | signal(SIGINT, int_exit); |
1671 | signal(SIGTERM, int_exit); | |
1672 | signal(SIGABRT, int_exit); | |
1673 | ||
1674 | setlocale(LC_ALL, ""); | |
1675 | ||
74e00676 MK |
1676 | if (!opt_quiet) { |
1677 | ret = pthread_create(&pt, NULL, poller, NULL); | |
1678 | if (ret) | |
1679 | exit_with_error(ret); | |
1680 | } | |
b4b8faa1 MK |
1681 | |
1682 | prev_time = get_nsecs(); | |
d3f11b01 | 1683 | start_time = prev_time; |
b4b8faa1 MK |
1684 | |
1685 | if (opt_bench == BENCH_RXDROP) | |
1686 | rx_drop_all(); | |
1687 | else if (opt_bench == BENCH_TXONLY) | |
46738f73 | 1688 | tx_only_all(); |
b4b8faa1 | 1689 | else |
46738f73 | 1690 | l2fwd_all(); |
b4b8faa1 | 1691 | |
ece6e969 JJ |
1692 | benchmark_done = true; |
1693 | ||
74e00676 MK |
1694 | if (!opt_quiet) |
1695 | pthread_join(pt, NULL); | |
d3f11b01 | 1696 | |
69525588 JJ |
1697 | xdpsock_cleanup(); |
1698 | ||
6bc66998 MF |
1699 | munmap(bufs, NUM_FRAMES * opt_xsk_frame_size); |
1700 | ||
b4b8faa1 MK |
1701 | return 0; |
1702 | } |