]>
Commit | Line | Data |
---|---|---|
b4b8faa1 | 1 | // SPDX-License-Identifier: GPL-2.0 |
dac09149 | 2 | /* Copyright(c) 2017 - 2018 Intel Corporation. */ |
b4b8faa1 | 3 | |
248c7f9c | 4 | #include <asm/barrier.h> |
b4b8faa1 MK |
5 | #include <errno.h> |
6 | #include <getopt.h> | |
7 | #include <libgen.h> | |
8 | #include <linux/bpf.h> | |
248c7f9c | 9 | #include <linux/compiler.h> |
b4b8faa1 MK |
10 | #include <linux/if_link.h> |
11 | #include <linux/if_xdp.h> | |
12 | #include <linux/if_ether.h> | |
4a3c23ae | 13 | #include <linux/ip.h> |
67ed3755 | 14 | #include <linux/limits.h> |
4a3c23ae JJ |
15 | #include <linux/udp.h> |
16 | #include <arpa/inet.h> | |
248c7f9c MK |
17 | #include <locale.h> |
18 | #include <net/ethernet.h> | |
b4b8faa1 | 19 | #include <net/if.h> |
248c7f9c MK |
20 | #include <poll.h> |
21 | #include <pthread.h> | |
b4b8faa1 MK |
22 | #include <signal.h> |
23 | #include <stdbool.h> | |
24 | #include <stdio.h> | |
25 | #include <stdlib.h> | |
26 | #include <string.h> | |
3627d970 | 27 | #include <sys/capability.h> |
248c7f9c | 28 | #include <sys/mman.h> |
b4b8faa1 MK |
29 | #include <sys/resource.h> |
30 | #include <sys/socket.h> | |
248c7f9c | 31 | #include <sys/types.h> |
3627d970 | 32 | #include <sys/un.h> |
b4b8faa1 MK |
33 | #include <time.h> |
34 | #include <unistd.h> | |
b4b8faa1 | 35 | |
7cf245a3 THJ |
36 | #include <bpf/libbpf.h> |
37 | #include <bpf/xsk.h> | |
2bf3e2ef | 38 | #include <bpf/bpf.h> |
7cf245a3 | 39 | #include "xdpsock.h" |
b4b8faa1 | 40 | |
b4b8faa1 MK |
41 | #ifndef SOL_XDP |
42 | #define SOL_XDP 283 | |
43 | #endif | |
44 | ||
45 | #ifndef AF_XDP | |
46 | #define AF_XDP 44 | |
47 | #endif | |
48 | ||
49 | #ifndef PF_XDP | |
50 | #define PF_XDP AF_XDP | |
51 | #endif | |
52 | ||
248c7f9c | 53 | #define NUM_FRAMES (4 * 1024) |
4a3c23ae | 54 | #define MIN_PKT_SIZE 64 |
b4b8faa1 MK |
55 | |
56 | #define DEBUG_HEXDUMP 0 | |
57 | ||
a412ef54 | 58 | typedef __u64 u64; |
b4b8faa1 | 59 | typedef __u32 u32; |
4a3c23ae JJ |
60 | typedef __u16 u16; |
61 | typedef __u8 u8; | |
b4b8faa1 MK |
62 | |
63 | static unsigned long prev_time; | |
64 | ||
65 | enum benchmark_type { | |
66 | BENCH_RXDROP = 0, | |
67 | BENCH_TXONLY = 1, | |
68 | BENCH_L2FWD = 2, | |
69 | }; | |
70 | ||
71 | static enum benchmark_type opt_bench = BENCH_RXDROP; | |
743e568c | 72 | static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; |
b4b8faa1 MK |
73 | static const char *opt_if = ""; |
74 | static int opt_ifindex; | |
75 | static int opt_queue; | |
d3f11b01 JJ |
76 | static unsigned long opt_duration; |
77 | static unsigned long start_time; | |
78 | static bool benchmark_done; | |
cd9e72b6 | 79 | static u32 opt_batch_size = 64; |
ece6e969 | 80 | static int opt_pkt_count; |
4a3c23ae | 81 | static u16 opt_pkt_size = MIN_PKT_SIZE; |
46e3268e | 82 | static u32 opt_pkt_fill_pattern = 0x12345678; |
b36c3206 | 83 | static bool opt_extra_stats; |
74e00676 | 84 | static bool opt_quiet; |
60dc609d | 85 | static bool opt_app_stats; |
67ed3755 CL |
86 | static const char *opt_irq_str = ""; |
87 | static u32 irq_no; | |
88 | static int irqs_at_init = -1; | |
b4b8faa1 | 89 | static int opt_poll; |
b4b8faa1 | 90 | static int opt_interval = 1; |
46738f73 | 91 | static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; |
c543f546 KL |
92 | static u32 opt_umem_flags; |
93 | static int opt_unaligned_chunks; | |
3945b37a | 94 | static int opt_mmap_flags; |
123e8da1 | 95 | static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; |
46738f73 MK |
96 | static int opt_timeout = 1000; |
97 | static bool opt_need_wakeup = true; | |
2e5d72c1 MK |
98 | static u32 opt_num_xsks = 1; |
99 | static u32 prog_id; | |
b35fc148 | 100 | static bool opt_busy_poll; |
3627d970 | 101 | static bool opt_reduced_cap; |
b4b8faa1 | 102 | |
2e8806f0 | 103 | struct xsk_ring_stats { |
b4b8faa1 MK |
104 | unsigned long rx_npkts; |
105 | unsigned long tx_npkts; | |
b36c3206 CL |
106 | unsigned long rx_dropped_npkts; |
107 | unsigned long rx_invalid_npkts; | |
108 | unsigned long tx_invalid_npkts; | |
109 | unsigned long rx_full_npkts; | |
110 | unsigned long rx_fill_empty_npkts; | |
111 | unsigned long tx_empty_npkts; | |
b4b8faa1 MK |
112 | unsigned long prev_rx_npkts; |
113 | unsigned long prev_tx_npkts; | |
b36c3206 CL |
114 | unsigned long prev_rx_dropped_npkts; |
115 | unsigned long prev_rx_invalid_npkts; | |
116 | unsigned long prev_tx_invalid_npkts; | |
117 | unsigned long prev_rx_full_npkts; | |
118 | unsigned long prev_rx_fill_empty_npkts; | |
119 | unsigned long prev_tx_empty_npkts; | |
2e8806f0 CL |
120 | }; |
121 | ||
67ed3755 CL |
122 | struct xsk_driver_stats { |
123 | unsigned long intrs; | |
124 | unsigned long prev_intrs; | |
125 | }; | |
126 | ||
60dc609d CL |
127 | struct xsk_app_stats { |
128 | unsigned long rx_empty_polls; | |
129 | unsigned long fill_fail_polls; | |
130 | unsigned long copy_tx_sendtos; | |
131 | unsigned long tx_wakeup_sendtos; | |
132 | unsigned long opt_polls; | |
133 | unsigned long prev_rx_empty_polls; | |
134 | unsigned long prev_fill_fail_polls; | |
135 | unsigned long prev_copy_tx_sendtos; | |
136 | unsigned long prev_tx_wakeup_sendtos; | |
137 | unsigned long prev_opt_polls; | |
138 | }; | |
139 | ||
2e8806f0 CL |
140 | struct xsk_umem_info { |
141 | struct xsk_ring_prod fq; | |
142 | struct xsk_ring_cons cq; | |
143 | struct xsk_umem *umem; | |
144 | void *buffer; | |
145 | }; | |
146 | ||
147 | struct xsk_socket_info { | |
148 | struct xsk_ring_cons rx; | |
149 | struct xsk_ring_prod tx; | |
150 | struct xsk_umem_info *umem; | |
151 | struct xsk_socket *xsk; | |
152 | struct xsk_ring_stats ring_stats; | |
60dc609d | 153 | struct xsk_app_stats app_stats; |
67ed3755 | 154 | struct xsk_driver_stats drv_stats; |
248c7f9c | 155 | u32 outstanding_tx; |
b4b8faa1 MK |
156 | }; |
157 | ||
b4b8faa1 | 158 | static int num_socks; |
248c7f9c | 159 | struct xsk_socket_info *xsks[MAX_SOCKS]; |
3627d970 | 160 | int sock; |
b4b8faa1 MK |
161 | |
162 | static unsigned long get_nsecs(void) | |
163 | { | |
164 | struct timespec ts; | |
165 | ||
166 | clock_gettime(CLOCK_MONOTONIC, &ts); | |
167 | return ts.tv_sec * 1000000000UL + ts.tv_nsec; | |
168 | } | |
169 | ||
248c7f9c | 170 | static void print_benchmark(bool running) |
b4b8faa1 | 171 | { |
248c7f9c | 172 | const char *bench_str = "INVALID"; |
b4b8faa1 | 173 | |
248c7f9c MK |
174 | if (opt_bench == BENCH_RXDROP) |
175 | bench_str = "rxdrop"; | |
176 | else if (opt_bench == BENCH_TXONLY) | |
177 | bench_str = "txonly"; | |
178 | else if (opt_bench == BENCH_L2FWD) | |
179 | bench_str = "l2fwd"; | |
b4b8faa1 | 180 | |
248c7f9c MK |
181 | printf("%s:%d %s ", opt_if, opt_queue, bench_str); |
182 | if (opt_xdp_flags & XDP_FLAGS_SKB_MODE) | |
183 | printf("xdp-skb "); | |
184 | else if (opt_xdp_flags & XDP_FLAGS_DRV_MODE) | |
185 | printf("xdp-drv "); | |
186 | else | |
187 | printf(" "); | |
b4b8faa1 | 188 | |
248c7f9c MK |
189 | if (opt_poll) |
190 | printf("poll() "); | |
b4b8faa1 | 191 | |
248c7f9c MK |
192 | if (running) { |
193 | printf("running..."); | |
194 | fflush(stdout); | |
b4b8faa1 | 195 | } |
b4b8faa1 MK |
196 | } |
197 | ||
b36c3206 CL |
198 | static int xsk_get_xdp_stats(int fd, struct xsk_socket_info *xsk) |
199 | { | |
200 | struct xdp_statistics stats; | |
201 | socklen_t optlen; | |
202 | int err; | |
203 | ||
204 | optlen = sizeof(stats); | |
205 | err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen); | |
206 | if (err) | |
207 | return err; | |
208 | ||
209 | if (optlen == sizeof(struct xdp_statistics)) { | |
2e8806f0 CL |
210 | xsk->ring_stats.rx_dropped_npkts = stats.rx_dropped; |
211 | xsk->ring_stats.rx_invalid_npkts = stats.rx_invalid_descs; | |
212 | xsk->ring_stats.tx_invalid_npkts = stats.tx_invalid_descs; | |
213 | xsk->ring_stats.rx_full_npkts = stats.rx_ring_full; | |
214 | xsk->ring_stats.rx_fill_empty_npkts = stats.rx_fill_ring_empty_descs; | |
215 | xsk->ring_stats.tx_empty_npkts = stats.tx_ring_empty_descs; | |
b36c3206 CL |
216 | return 0; |
217 | } | |
218 | ||
219 | return -EINVAL; | |
220 | } | |
221 | ||
60dc609d CL |
222 | static void dump_app_stats(long dt) |
223 | { | |
224 | int i; | |
225 | ||
226 | for (i = 0; i < num_socks && xsks[i]; i++) { | |
227 | char *fmt = "%-18s %'-14.0f %'-14lu\n"; | |
228 | double rx_empty_polls_ps, fill_fail_polls_ps, copy_tx_sendtos_ps, | |
229 | tx_wakeup_sendtos_ps, opt_polls_ps; | |
230 | ||
231 | rx_empty_polls_ps = (xsks[i]->app_stats.rx_empty_polls - | |
232 | xsks[i]->app_stats.prev_rx_empty_polls) * 1000000000. / dt; | |
233 | fill_fail_polls_ps = (xsks[i]->app_stats.fill_fail_polls - | |
234 | xsks[i]->app_stats.prev_fill_fail_polls) * 1000000000. / dt; | |
235 | copy_tx_sendtos_ps = (xsks[i]->app_stats.copy_tx_sendtos - | |
236 | xsks[i]->app_stats.prev_copy_tx_sendtos) * 1000000000. / dt; | |
237 | tx_wakeup_sendtos_ps = (xsks[i]->app_stats.tx_wakeup_sendtos - | |
238 | xsks[i]->app_stats.prev_tx_wakeup_sendtos) | |
239 | * 1000000000. / dt; | |
240 | opt_polls_ps = (xsks[i]->app_stats.opt_polls - | |
241 | xsks[i]->app_stats.prev_opt_polls) * 1000000000. / dt; | |
242 | ||
243 | printf("\n%-18s %-14s %-14s\n", "", "calls/s", "count"); | |
244 | printf(fmt, "rx empty polls", rx_empty_polls_ps, xsks[i]->app_stats.rx_empty_polls); | |
245 | printf(fmt, "fill fail polls", fill_fail_polls_ps, | |
246 | xsks[i]->app_stats.fill_fail_polls); | |
247 | printf(fmt, "copy tx sendtos", copy_tx_sendtos_ps, | |
248 | xsks[i]->app_stats.copy_tx_sendtos); | |
249 | printf(fmt, "tx wakeup sendtos", tx_wakeup_sendtos_ps, | |
250 | xsks[i]->app_stats.tx_wakeup_sendtos); | |
251 | printf(fmt, "opt polls", opt_polls_ps, xsks[i]->app_stats.opt_polls); | |
252 | ||
253 | xsks[i]->app_stats.prev_rx_empty_polls = xsks[i]->app_stats.rx_empty_polls; | |
254 | xsks[i]->app_stats.prev_fill_fail_polls = xsks[i]->app_stats.fill_fail_polls; | |
255 | xsks[i]->app_stats.prev_copy_tx_sendtos = xsks[i]->app_stats.copy_tx_sendtos; | |
256 | xsks[i]->app_stats.prev_tx_wakeup_sendtos = xsks[i]->app_stats.tx_wakeup_sendtos; | |
257 | xsks[i]->app_stats.prev_opt_polls = xsks[i]->app_stats.opt_polls; | |
258 | } | |
259 | } | |
260 | ||
67ed3755 CL |
261 | static bool get_interrupt_number(void) |
262 | { | |
263 | FILE *f_int_proc; | |
264 | char line[4096]; | |
265 | bool found = false; | |
266 | ||
267 | f_int_proc = fopen("/proc/interrupts", "r"); | |
268 | if (f_int_proc == NULL) { | |
269 | printf("Failed to open /proc/interrupts.\n"); | |
270 | return found; | |
271 | } | |
272 | ||
273 | while (!feof(f_int_proc) && !found) { | |
274 | /* Make sure to read a full line at a time */ | |
275 | if (fgets(line, sizeof(line), f_int_proc) == NULL || | |
276 | line[strlen(line) - 1] != '\n') { | |
277 | printf("Error reading from interrupts file\n"); | |
278 | break; | |
279 | } | |
280 | ||
281 | /* Extract interrupt number from line */ | |
282 | if (strstr(line, opt_irq_str) != NULL) { | |
283 | irq_no = atoi(line); | |
284 | found = true; | |
285 | break; | |
286 | } | |
287 | } | |
288 | ||
289 | fclose(f_int_proc); | |
290 | ||
291 | return found; | |
292 | } | |
293 | ||
294 | static int get_irqs(void) | |
295 | { | |
296 | char count_path[PATH_MAX]; | |
297 | int total_intrs = -1; | |
298 | FILE *f_count_proc; | |
299 | char line[4096]; | |
300 | ||
301 | snprintf(count_path, sizeof(count_path), | |
302 | "/sys/kernel/irq/%i/per_cpu_count", irq_no); | |
303 | f_count_proc = fopen(count_path, "r"); | |
304 | if (f_count_proc == NULL) { | |
305 | printf("Failed to open %s\n", count_path); | |
306 | return total_intrs; | |
307 | } | |
308 | ||
309 | if (fgets(line, sizeof(line), f_count_proc) == NULL || | |
310 | line[strlen(line) - 1] != '\n') { | |
311 | printf("Error reading from %s\n", count_path); | |
312 | } else { | |
313 | static const char com[2] = ","; | |
314 | char *token; | |
315 | ||
316 | total_intrs = 0; | |
317 | token = strtok(line, com); | |
318 | while (token != NULL) { | |
319 | /* sum up interrupts across all cores */ | |
320 | total_intrs += atoi(token); | |
321 | token = strtok(NULL, com); | |
322 | } | |
323 | } | |
324 | ||
325 | fclose(f_count_proc); | |
326 | ||
327 | return total_intrs; | |
328 | } | |
329 | ||
330 | static void dump_driver_stats(long dt) | |
331 | { | |
332 | int i; | |
333 | ||
334 | for (i = 0; i < num_socks && xsks[i]; i++) { | |
335 | char *fmt = "%-18s %'-14.0f %'-14lu\n"; | |
336 | double intrs_ps; | |
337 | int n_ints = get_irqs(); | |
338 | ||
339 | if (n_ints < 0) { | |
340 | printf("error getting intr info for intr %i\n", irq_no); | |
341 | return; | |
342 | } | |
343 | xsks[i]->drv_stats.intrs = n_ints - irqs_at_init; | |
344 | ||
345 | intrs_ps = (xsks[i]->drv_stats.intrs - xsks[i]->drv_stats.prev_intrs) * | |
346 | 1000000000. / dt; | |
347 | ||
348 | printf("\n%-18s %-14s %-14s\n", "", "intrs/s", "count"); | |
349 | printf(fmt, "irqs", intrs_ps, xsks[i]->drv_stats.intrs); | |
350 | ||
351 | xsks[i]->drv_stats.prev_intrs = xsks[i]->drv_stats.intrs; | |
352 | } | |
353 | } | |
354 | ||
248c7f9c | 355 | static void dump_stats(void) |
b4b8faa1 | 356 | { |
248c7f9c MK |
357 | unsigned long now = get_nsecs(); |
358 | long dt = now - prev_time; | |
359 | int i; | |
b4b8faa1 | 360 | |
248c7f9c | 361 | prev_time = now; |
b4b8faa1 | 362 | |
248c7f9c | 363 | for (i = 0; i < num_socks && xsks[i]; i++) { |
60dc609d | 364 | char *fmt = "%-18s %'-14.0f %'-14lu\n"; |
b36c3206 CL |
365 | double rx_pps, tx_pps, dropped_pps, rx_invalid_pps, full_pps, fill_empty_pps, |
366 | tx_invalid_pps, tx_empty_pps; | |
b4b8faa1 | 367 | |
2e8806f0 | 368 | rx_pps = (xsks[i]->ring_stats.rx_npkts - xsks[i]->ring_stats.prev_rx_npkts) * |
248c7f9c | 369 | 1000000000. / dt; |
2e8806f0 | 370 | tx_pps = (xsks[i]->ring_stats.tx_npkts - xsks[i]->ring_stats.prev_tx_npkts) * |
248c7f9c | 371 | 1000000000. / dt; |
b4b8faa1 | 372 | |
248c7f9c MK |
373 | printf("\n sock%d@", i); |
374 | print_benchmark(false); | |
375 | printf("\n"); | |
b4b8faa1 | 376 | |
60dc609d | 377 | printf("%-18s %-14s %-14s %-14.2f\n", "", "pps", "pkts", |
248c7f9c | 378 | dt / 1000000000.); |
2e8806f0 CL |
379 | printf(fmt, "rx", rx_pps, xsks[i]->ring_stats.rx_npkts); |
380 | printf(fmt, "tx", tx_pps, xsks[i]->ring_stats.tx_npkts); | |
b4b8faa1 | 381 | |
2e8806f0 CL |
382 | xsks[i]->ring_stats.prev_rx_npkts = xsks[i]->ring_stats.rx_npkts; |
383 | xsks[i]->ring_stats.prev_tx_npkts = xsks[i]->ring_stats.tx_npkts; | |
b36c3206 CL |
384 | |
385 | if (opt_extra_stats) { | |
386 | if (!xsk_get_xdp_stats(xsk_socket__fd(xsks[i]->xsk), xsks[i])) { | |
2e8806f0 CL |
387 | dropped_pps = (xsks[i]->ring_stats.rx_dropped_npkts - |
388 | xsks[i]->ring_stats.prev_rx_dropped_npkts) * | |
389 | 1000000000. / dt; | |
390 | rx_invalid_pps = (xsks[i]->ring_stats.rx_invalid_npkts - | |
391 | xsks[i]->ring_stats.prev_rx_invalid_npkts) * | |
392 | 1000000000. / dt; | |
393 | tx_invalid_pps = (xsks[i]->ring_stats.tx_invalid_npkts - | |
394 | xsks[i]->ring_stats.prev_tx_invalid_npkts) * | |
395 | 1000000000. / dt; | |
396 | full_pps = (xsks[i]->ring_stats.rx_full_npkts - | |
397 | xsks[i]->ring_stats.prev_rx_full_npkts) * | |
398 | 1000000000. / dt; | |
399 | fill_empty_pps = (xsks[i]->ring_stats.rx_fill_empty_npkts - | |
400 | xsks[i]->ring_stats.prev_rx_fill_empty_npkts) * | |
401 | 1000000000. / dt; | |
402 | tx_empty_pps = (xsks[i]->ring_stats.tx_empty_npkts - | |
403 | xsks[i]->ring_stats.prev_tx_empty_npkts) * | |
404 | 1000000000. / dt; | |
b36c3206 CL |
405 | |
406 | printf(fmt, "rx dropped", dropped_pps, | |
2e8806f0 | 407 | xsks[i]->ring_stats.rx_dropped_npkts); |
b36c3206 | 408 | printf(fmt, "rx invalid", rx_invalid_pps, |
2e8806f0 | 409 | xsks[i]->ring_stats.rx_invalid_npkts); |
b36c3206 | 410 | printf(fmt, "tx invalid", tx_invalid_pps, |
2e8806f0 | 411 | xsks[i]->ring_stats.tx_invalid_npkts); |
b36c3206 | 412 | printf(fmt, "rx queue full", full_pps, |
2e8806f0 | 413 | xsks[i]->ring_stats.rx_full_npkts); |
b36c3206 | 414 | printf(fmt, "fill ring empty", fill_empty_pps, |
2e8806f0 | 415 | xsks[i]->ring_stats.rx_fill_empty_npkts); |
b36c3206 | 416 | printf(fmt, "tx ring empty", tx_empty_pps, |
2e8806f0 CL |
417 | xsks[i]->ring_stats.tx_empty_npkts); |
418 | ||
419 | xsks[i]->ring_stats.prev_rx_dropped_npkts = | |
420 | xsks[i]->ring_stats.rx_dropped_npkts; | |
421 | xsks[i]->ring_stats.prev_rx_invalid_npkts = | |
422 | xsks[i]->ring_stats.rx_invalid_npkts; | |
423 | xsks[i]->ring_stats.prev_tx_invalid_npkts = | |
424 | xsks[i]->ring_stats.tx_invalid_npkts; | |
425 | xsks[i]->ring_stats.prev_rx_full_npkts = | |
426 | xsks[i]->ring_stats.rx_full_npkts; | |
427 | xsks[i]->ring_stats.prev_rx_fill_empty_npkts = | |
428 | xsks[i]->ring_stats.rx_fill_empty_npkts; | |
429 | xsks[i]->ring_stats.prev_tx_empty_npkts = | |
430 | xsks[i]->ring_stats.tx_empty_npkts; | |
b36c3206 CL |
431 | } else { |
432 | printf("%-15s\n", "Error retrieving extra stats"); | |
433 | } | |
434 | } | |
b4b8faa1 | 435 | } |
60dc609d CL |
436 | |
437 | if (opt_app_stats) | |
438 | dump_app_stats(dt); | |
67ed3755 CL |
439 | if (irq_no) |
440 | dump_driver_stats(dt); | |
b4b8faa1 MK |
441 | } |
442 | ||
d3f11b01 JJ |
443 | static bool is_benchmark_done(void) |
444 | { | |
445 | if (opt_duration > 0) { | |
446 | unsigned long dt = (get_nsecs() - start_time); | |
447 | ||
448 | if (dt >= opt_duration) | |
449 | benchmark_done = true; | |
450 | } | |
451 | return benchmark_done; | |
452 | } | |
453 | ||
248c7f9c | 454 | static void *poller(void *arg) |
b4b8faa1 | 455 | { |
248c7f9c | 456 | (void)arg; |
d3f11b01 | 457 | while (!is_benchmark_done()) { |
248c7f9c MK |
458 | sleep(opt_interval); |
459 | dump_stats(); | |
b4b8faa1 MK |
460 | } |
461 | ||
248c7f9c | 462 | return NULL; |
b4b8faa1 MK |
463 | } |
464 | ||
248c7f9c | 465 | static void remove_xdp_program(void) |
b4b8faa1 | 466 | { |
2e5d72c1 | 467 | u32 curr_prog_id = 0; |
3627d970 | 468 | int cmd = CLOSE_CONN; |
b4b8faa1 | 469 | |
248c7f9c MK |
470 | if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { |
471 | printf("bpf_get_link_xdp_id failed\n"); | |
472 | exit(EXIT_FAILURE); | |
b4b8faa1 | 473 | } |
248c7f9c MK |
474 | if (prog_id == curr_prog_id) |
475 | bpf_set_link_xdp_fd(opt_ifindex, -1, opt_xdp_flags); | |
476 | else if (!curr_prog_id) | |
477 | printf("couldn't find a prog id on a given interface\n"); | |
478 | else | |
479 | printf("program on interface changed, not removing\n"); | |
3627d970 MD |
480 | |
481 | if (opt_reduced_cap) { | |
482 | if (write(sock, &cmd, sizeof(int)) < 0) { | |
483 | fprintf(stderr, "Error writing into stream socket: %s", strerror(errno)); | |
484 | exit(EXIT_FAILURE); | |
485 | } | |
486 | } | |
b4b8faa1 MK |
487 | } |
488 | ||
248c7f9c | 489 | static void int_exit(int sig) |
69525588 JJ |
490 | { |
491 | benchmark_done = true; | |
492 | } | |
493 | ||
494 | static void xdpsock_cleanup(void) | |
b4b8faa1 | 495 | { |
248c7f9c | 496 | struct xsk_umem *umem = xsks[0]->umem->umem; |
2e5d72c1 | 497 | int i; |
b4b8faa1 | 498 | |
248c7f9c | 499 | dump_stats(); |
2e5d72c1 MK |
500 | for (i = 0; i < num_socks; i++) |
501 | xsk_socket__delete(xsks[i]->xsk); | |
248c7f9c MK |
502 | (void)xsk_umem__delete(umem); |
503 | remove_xdp_program(); | |
b4b8faa1 MK |
504 | } |
505 | ||
248c7f9c MK |
506 | static void __exit_with_error(int error, const char *file, const char *func, |
507 | int line) | |
b4b8faa1 | 508 | { |
248c7f9c MK |
509 | fprintf(stderr, "%s:%s:%i: errno: %d/\"%s\"\n", file, func, |
510 | line, error, strerror(error)); | |
511 | dump_stats(); | |
512 | remove_xdp_program(); | |
513 | exit(EXIT_FAILURE); | |
b4b8faa1 MK |
514 | } |
515 | ||
248c7f9c MK |
516 | #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ |
517 | __LINE__) | |
b4b8faa1 MK |
518 | static void swap_mac_addresses(void *data) |
519 | { | |
520 | struct ether_header *eth = (struct ether_header *)data; | |
521 | struct ether_addr *src_addr = (struct ether_addr *)ð->ether_shost; | |
522 | struct ether_addr *dst_addr = (struct ether_addr *)ð->ether_dhost; | |
523 | struct ether_addr tmp; | |
524 | ||
525 | tmp = *src_addr; | |
526 | *src_addr = *dst_addr; | |
527 | *dst_addr = tmp; | |
528 | } | |
529 | ||
a412ef54 | 530 | static void hex_dump(void *pkt, size_t length, u64 addr) |
b4b8faa1 | 531 | { |
b4b8faa1 MK |
532 | const unsigned char *address = (unsigned char *)pkt; |
533 | const unsigned char *line = address; | |
534 | size_t line_size = 32; | |
535 | unsigned char c; | |
a412ef54 BT |
536 | char buf[32]; |
537 | int i = 0; | |
b4b8faa1 | 538 | |
a412ef54 BT |
539 | if (!DEBUG_HEXDUMP) |
540 | return; | |
541 | ||
542 | sprintf(buf, "addr=%llu", addr); | |
b4b8faa1 | 543 | printf("length = %zu\n", length); |
a412ef54 | 544 | printf("%s | ", buf); |
b4b8faa1 MK |
545 | while (length-- > 0) { |
546 | printf("%02X ", *address++); | |
547 | if (!(++i % line_size) || (length == 0 && i % line_size)) { | |
548 | if (length == 0) { | |
549 | while (i++ % line_size) | |
550 | printf("__ "); | |
551 | } | |
552 | printf(" | "); /* right close */ | |
553 | while (line < address) { | |
554 | c = *line++; | |
555 | printf("%c", (c < 33 || c == 255) ? 0x2E : c); | |
556 | } | |
557 | printf("\n"); | |
558 | if (length > 0) | |
a412ef54 | 559 | printf("%s | ", buf); |
b4b8faa1 MK |
560 | } |
561 | } | |
562 | printf("\n"); | |
563 | } | |
b4b8faa1 | 564 | |
4a3c23ae JJ |
565 | static void *memset32_htonl(void *dest, u32 val, u32 size) |
566 | { | |
567 | u32 *ptr = (u32 *)dest; | |
568 | int i; | |
569 | ||
570 | val = htonl(val); | |
571 | ||
572 | for (i = 0; i < (size & (~0x3)); i += 4) | |
573 | ptr[i >> 2] = val; | |
574 | ||
575 | for (; i < size; i++) | |
576 | ((char *)dest)[i] = ((char *)&val)[i & 3]; | |
577 | ||
578 | return dest; | |
579 | } | |
580 | ||
581 | /* | |
582 | * This function code has been taken from | |
583 | * Linux kernel lib/checksum.c | |
584 | */ | |
585 | static inline unsigned short from32to16(unsigned int x) | |
586 | { | |
587 | /* add up 16-bit and 16-bit for 16+c bit */ | |
588 | x = (x & 0xffff) + (x >> 16); | |
589 | /* add up carry.. */ | |
590 | x = (x & 0xffff) + (x >> 16); | |
591 | return x; | |
592 | } | |
593 | ||
594 | /* | |
595 | * This function code has been taken from | |
596 | * Linux kernel lib/checksum.c | |
597 | */ | |
598 | static unsigned int do_csum(const unsigned char *buff, int len) | |
599 | { | |
600 | unsigned int result = 0; | |
601 | int odd; | |
602 | ||
603 | if (len <= 0) | |
604 | goto out; | |
605 | odd = 1 & (unsigned long)buff; | |
606 | if (odd) { | |
607 | #ifdef __LITTLE_ENDIAN | |
608 | result += (*buff << 8); | |
609 | #else | |
610 | result = *buff; | |
611 | #endif | |
612 | len--; | |
613 | buff++; | |
614 | } | |
615 | if (len >= 2) { | |
616 | if (2 & (unsigned long)buff) { | |
617 | result += *(unsigned short *)buff; | |
618 | len -= 2; | |
619 | buff += 2; | |
620 | } | |
621 | if (len >= 4) { | |
622 | const unsigned char *end = buff + | |
623 | ((unsigned int)len & ~3); | |
624 | unsigned int carry = 0; | |
625 | ||
626 | do { | |
627 | unsigned int w = *(unsigned int *)buff; | |
628 | ||
629 | buff += 4; | |
630 | result += carry; | |
631 | result += w; | |
632 | carry = (w > result); | |
633 | } while (buff < end); | |
634 | result += carry; | |
635 | result = (result & 0xffff) + (result >> 16); | |
636 | } | |
637 | if (len & 2) { | |
638 | result += *(unsigned short *)buff; | |
639 | buff += 2; | |
640 | } | |
641 | } | |
642 | if (len & 1) | |
643 | #ifdef __LITTLE_ENDIAN | |
644 | result += *buff; | |
645 | #else | |
646 | result += (*buff << 8); | |
647 | #endif | |
648 | result = from32to16(result); | |
649 | if (odd) | |
650 | result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); | |
651 | out: | |
652 | return result; | |
653 | } | |
654 | ||
655 | __sum16 ip_fast_csum(const void *iph, unsigned int ihl); | |
656 | ||
657 | /* | |
658 | * This is a version of ip_compute_csum() optimized for IP headers, | |
659 | * which always checksum on 4 octet boundaries. | |
660 | * This function code has been taken from | |
661 | * Linux kernel lib/checksum.c | |
662 | */ | |
663 | __sum16 ip_fast_csum(const void *iph, unsigned int ihl) | |
664 | { | |
665 | return (__force __sum16)~do_csum(iph, ihl * 4); | |
666 | } | |
667 | ||
668 | /* | |
669 | * Fold a partial checksum | |
670 | * This function code has been taken from | |
671 | * Linux kernel include/asm-generic/checksum.h | |
672 | */ | |
673 | static inline __sum16 csum_fold(__wsum csum) | |
674 | { | |
675 | u32 sum = (__force u32)csum; | |
676 | ||
677 | sum = (sum & 0xffff) + (sum >> 16); | |
678 | sum = (sum & 0xffff) + (sum >> 16); | |
679 | return (__force __sum16)~sum; | |
680 | } | |
681 | ||
682 | /* | |
683 | * This function code has been taken from | |
684 | * Linux kernel lib/checksum.c | |
685 | */ | |
686 | static inline u32 from64to32(u64 x) | |
687 | { | |
688 | /* add up 32-bit and 32-bit for 32+c bit */ | |
689 | x = (x & 0xffffffff) + (x >> 32); | |
690 | /* add up carry.. */ | |
691 | x = (x & 0xffffffff) + (x >> 32); | |
692 | return (u32)x; | |
693 | } | |
694 | ||
695 | __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, | |
696 | __u32 len, __u8 proto, __wsum sum); | |
697 | ||
698 | /* | |
699 | * This function code has been taken from | |
700 | * Linux kernel lib/checksum.c | |
701 | */ | |
702 | __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, | |
703 | __u32 len, __u8 proto, __wsum sum) | |
704 | { | |
705 | unsigned long long s = (__force u32)sum; | |
706 | ||
707 | s += (__force u32)saddr; | |
708 | s += (__force u32)daddr; | |
709 | #ifdef __BIG_ENDIAN__ | |
710 | s += proto + len; | |
711 | #else | |
712 | s += (proto + len) << 8; | |
713 | #endif | |
714 | return (__force __wsum)from64to32(s); | |
715 | } | |
716 | ||
717 | /* | |
718 | * This function has been taken from | |
719 | * Linux kernel include/asm-generic/checksum.h | |
720 | */ | |
721 | static inline __sum16 | |
722 | csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, | |
723 | __u8 proto, __wsum sum) | |
724 | { | |
725 | return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); | |
726 | } | |
727 | ||
728 | static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, | |
729 | u8 proto, u16 *udp_pkt) | |
730 | { | |
731 | u32 csum = 0; | |
732 | u32 cnt = 0; | |
733 | ||
734 | /* udp hdr and data */ | |
735 | for (; cnt < len; cnt += 2) | |
736 | csum += udp_pkt[cnt >> 1]; | |
737 | ||
738 | return csum_tcpudp_magic(saddr, daddr, len, proto, csum); | |
739 | } | |
740 | ||
741 | #define ETH_FCS_SIZE 4 | |
742 | ||
743 | #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ | |
744 | sizeof(struct udphdr)) | |
745 | ||
746 | #define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) | |
747 | #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) | |
748 | #define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) | |
749 | #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) | |
750 | ||
751 | static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; | |
752 | ||
753 | static void gen_eth_hdr_data(void) | |
754 | { | |
755 | struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + | |
756 | sizeof(struct ethhdr) + | |
757 | sizeof(struct iphdr)); | |
758 | struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + | |
759 | sizeof(struct ethhdr)); | |
760 | struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; | |
761 | ||
762 | /* ethernet header */ | |
763 | memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); | |
764 | memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); | |
765 | eth_hdr->h_proto = htons(ETH_P_IP); | |
766 | ||
767 | /* IP header */ | |
768 | ip_hdr->version = IPVERSION; | |
769 | ip_hdr->ihl = 0x5; /* 20 byte header */ | |
770 | ip_hdr->tos = 0x0; | |
771 | ip_hdr->tot_len = htons(IP_PKT_SIZE); | |
772 | ip_hdr->id = 0; | |
773 | ip_hdr->frag_off = 0; | |
774 | ip_hdr->ttl = IPDEFTTL; | |
775 | ip_hdr->protocol = IPPROTO_UDP; | |
776 | ip_hdr->saddr = htonl(0x0a0a0a10); | |
777 | ip_hdr->daddr = htonl(0x0a0a0a20); | |
778 | ||
779 | /* IP header checksum */ | |
780 | ip_hdr->check = 0; | |
781 | ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); | |
782 | ||
783 | /* UDP header */ | |
784 | udp_hdr->source = htons(0x1000); | |
785 | udp_hdr->dest = htons(0x1000); | |
786 | udp_hdr->len = htons(UDP_PKT_SIZE); | |
787 | ||
788 | /* UDP data */ | |
46e3268e | 789 | memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, |
4a3c23ae JJ |
790 | UDP_PKT_DATA_SIZE); |
791 | ||
792 | /* UDP header checksum */ | |
793 | udp_hdr->check = 0; | |
794 | udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, | |
795 | IPPROTO_UDP, (u16 *)udp_hdr); | |
796 | } | |
797 | ||
cd9e72b6 | 798 | static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) |
b4b8faa1 | 799 | { |
248c7f9c | 800 | memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, |
4a3c23ae | 801 | PKT_SIZE); |
b4b8faa1 MK |
802 | } |
803 | ||
248c7f9c | 804 | static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) |
b4b8faa1 | 805 | { |
248c7f9c | 806 | struct xsk_umem_info *umem; |
123e8da1 | 807 | struct xsk_umem_config cfg = { |
c8a039a4 MK |
808 | /* We recommend that you set the fill ring size >= HW RX ring size + |
809 | * AF_XDP RX ring size. Make sure you fill up the fill ring | |
810 | * with buffers at regular intervals, and you will with this setting | |
811 | * avoid allocation failures in the driver. These are usually quite | |
812 | * expensive since drivers have not been written to assume that | |
813 | * allocation failures are common. For regular sockets, kernel | |
814 | * allocated memory is used that only runs out in OOM situations | |
815 | * that should be rare. | |
816 | */ | |
817 | .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, | |
123e8da1 MM |
818 | .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS, |
819 | .frame_size = opt_xsk_frame_size, | |
820 | .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, | |
c543f546 | 821 | .flags = opt_umem_flags |
123e8da1 | 822 | }; |
661842c4 | 823 | int ret; |
b4b8faa1 MK |
824 | |
825 | umem = calloc(1, sizeof(*umem)); | |
248c7f9c MK |
826 | if (!umem) |
827 | exit_with_error(errno); | |
b4b8faa1 | 828 | |
248c7f9c | 829 | ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, |
123e8da1 | 830 | &cfg); |
248c7f9c MK |
831 | if (ret) |
832 | exit_with_error(-ret); | |
b4b8faa1 | 833 | |
661842c4 MK |
834 | umem->buffer = buffer; |
835 | return umem; | |
836 | } | |
837 | ||
838 | static void xsk_populate_fill_ring(struct xsk_umem_info *umem) | |
839 | { | |
840 | int ret, i; | |
841 | u32 idx; | |
842 | ||
2e5d72c1 | 843 | ret = xsk_ring_prod__reserve(&umem->fq, |
c8a039a4 MK |
844 | XSK_RING_PROD__DEFAULT_NUM_DESCS * 2, &idx); |
845 | if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS * 2) | |
2e5d72c1 | 846 | exit_with_error(-ret); |
c8a039a4 | 847 | for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS * 2; i++) |
2e5d72c1 MK |
848 | *xsk_ring_prod__fill_addr(&umem->fq, idx++) = |
849 | i * opt_xsk_frame_size; | |
c8a039a4 | 850 | xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS * 2); |
b4b8faa1 MK |
851 | } |
852 | ||
661842c4 MK |
853 | static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, |
854 | bool rx, bool tx) | |
b4b8faa1 | 855 | { |
248c7f9c MK |
856 | struct xsk_socket_config cfg; |
857 | struct xsk_socket_info *xsk; | |
661842c4 MK |
858 | struct xsk_ring_cons *rxr; |
859 | struct xsk_ring_prod *txr; | |
248c7f9c | 860 | int ret; |
b4b8faa1 MK |
861 | |
862 | xsk = calloc(1, sizeof(*xsk)); | |
248c7f9c MK |
863 | if (!xsk) |
864 | exit_with_error(errno); | |
865 | ||
866 | xsk->umem = umem; | |
867 | cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; | |
868 | cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; | |
3627d970 | 869 | if (opt_num_xsks > 1 || opt_reduced_cap) |
2e5d72c1 MK |
870 | cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; |
871 | else | |
872 | cfg.libbpf_flags = 0; | |
248c7f9c MK |
873 | cfg.xdp_flags = opt_xdp_flags; |
874 | cfg.bind_flags = opt_xdp_bind_flags; | |
2e5d72c1 | 875 | |
661842c4 MK |
876 | rxr = rx ? &xsk->rx : NULL; |
877 | txr = tx ? &xsk->tx : NULL; | |
878 | ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, | |
879 | rxr, txr, &cfg); | |
248c7f9c MK |
880 | if (ret) |
881 | exit_with_error(-ret); | |
882 | ||
883 | ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); | |
884 | if (ret) | |
885 | exit_with_error(-ret); | |
886 | ||
60dc609d CL |
887 | xsk->app_stats.rx_empty_polls = 0; |
888 | xsk->app_stats.fill_fail_polls = 0; | |
889 | xsk->app_stats.copy_tx_sendtos = 0; | |
890 | xsk->app_stats.tx_wakeup_sendtos = 0; | |
891 | xsk->app_stats.opt_polls = 0; | |
892 | xsk->app_stats.prev_rx_empty_polls = 0; | |
893 | xsk->app_stats.prev_fill_fail_polls = 0; | |
894 | xsk->app_stats.prev_copy_tx_sendtos = 0; | |
895 | xsk->app_stats.prev_tx_wakeup_sendtos = 0; | |
896 | xsk->app_stats.prev_opt_polls = 0; | |
897 | ||
b4b8faa1 MK |
898 | return xsk; |
899 | } | |
900 | ||
b4b8faa1 MK |
901 | static struct option long_options[] = { |
902 | {"rxdrop", no_argument, 0, 'r'}, | |
903 | {"txonly", no_argument, 0, 't'}, | |
904 | {"l2fwd", no_argument, 0, 'l'}, | |
905 | {"interface", required_argument, 0, 'i'}, | |
906 | {"queue", required_argument, 0, 'q'}, | |
907 | {"poll", no_argument, 0, 'p'}, | |
b4b8faa1 MK |
908 | {"xdp-skb", no_argument, 0, 'S'}, |
909 | {"xdp-native", no_argument, 0, 'N'}, | |
910 | {"interval", required_argument, 0, 'n'}, | |
58c50ae4 BT |
911 | {"zero-copy", no_argument, 0, 'z'}, |
912 | {"copy", no_argument, 0, 'c'}, | |
123e8da1 | 913 | {"frame-size", required_argument, 0, 'f'}, |
46738f73 | 914 | {"no-need-wakeup", no_argument, 0, 'm'}, |
c543f546 | 915 | {"unaligned", no_argument, 0, 'u'}, |
2e5d72c1 | 916 | {"shared-umem", no_argument, 0, 'M'}, |
b3133329 | 917 | {"force", no_argument, 0, 'F'}, |
d3f11b01 | 918 | {"duration", required_argument, 0, 'd'}, |
cd9e72b6 | 919 | {"batch-size", required_argument, 0, 'b'}, |
ece6e969 | 920 | {"tx-pkt-count", required_argument, 0, 'C'}, |
4a3c23ae | 921 | {"tx-pkt-size", required_argument, 0, 's'}, |
46e3268e | 922 | {"tx-pkt-pattern", required_argument, 0, 'P'}, |
b36c3206 | 923 | {"extra-stats", no_argument, 0, 'x'}, |
74e00676 | 924 | {"quiet", no_argument, 0, 'Q'}, |
60dc609d | 925 | {"app-stats", no_argument, 0, 'a'}, |
67ed3755 | 926 | {"irq-string", no_argument, 0, 'I'}, |
b35fc148 | 927 | {"busy-poll", no_argument, 0, 'B'}, |
3627d970 | 928 | {"reduce-cap", no_argument, 0, 'R'}, |
b4b8faa1 MK |
929 | {0, 0, 0, 0} |
930 | }; | |
931 | ||
932 | static void usage(const char *prog) | |
933 | { | |
934 | const char *str = | |
935 | " Usage: %s [OPTIONS]\n" | |
936 | " Options:\n" | |
937 | " -r, --rxdrop Discard all incoming packets (default)\n" | |
938 | " -t, --txonly Only send packets\n" | |
939 | " -l, --l2fwd MAC swap L2 forwarding\n" | |
940 | " -i, --interface=n Run on interface n\n" | |
941 | " -q, --queue=n Use queue n (default 0)\n" | |
942 | " -p, --poll Use poll syscall\n" | |
b4b8faa1 | 943 | " -S, --xdp-skb=n Use XDP skb-mod\n" |
4564a8bb | 944 | " -N, --xdp-native=n Enforce XDP native mode\n" |
b4b8faa1 | 945 | " -n, --interval=n Specify statistics update interval (default 1 sec).\n" |
58c50ae4 BT |
946 | " -z, --zero-copy Force zero-copy mode.\n" |
947 | " -c, --copy Force copy mode.\n" | |
46738f73 | 948 | " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" |
c543f546 KL |
949 | " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" |
950 | " -u, --unaligned Enable unaligned chunk placement\n" | |
3627d970 | 951 | " -M, --shared-umem Enable XDP_SHARED_UMEM (cannot be used with -R)\n" |
b3133329 | 952 | " -F, --force Force loading the XDP prog\n" |
d3f11b01 JJ |
953 | " -d, --duration=n Duration in secs to run command.\n" |
954 | " Default: forever.\n" | |
cd9e72b6 JJ |
955 | " -b, --batch-size=n Batch size for sending or receiving\n" |
956 | " packets. Default: %d\n" | |
ece6e969 JJ |
957 | " -C, --tx-pkt-count=n Number of packets to send.\n" |
958 | " Default: Continuous packets.\n" | |
4a3c23ae JJ |
959 | " -s, --tx-pkt-size=n Transmit packet size.\n" |
960 | " (Default: %d bytes)\n" | |
961 | " Min size: %d, Max size %d.\n" | |
46e3268e | 962 | " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" |
b36c3206 | 963 | " -x, --extra-stats Display extra statistics.\n" |
74e00676 | 964 | " -Q, --quiet Do not display any stats.\n" |
60dc609d | 965 | " -a, --app-stats Display application (syscall) statistics.\n" |
67ed3755 | 966 | " -I, --irq-string Display driver interrupt statistics for interface associated with irq-string.\n" |
b35fc148 | 967 | " -B, --busy-poll Busy poll.\n" |
3627d970 | 968 | " -R, --reduce-cap Use reduced capabilities (cannot be used with -M)\n" |
b4b8faa1 | 969 | "\n"; |
cd9e72b6 | 970 | fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, |
4a3c23ae | 971 | opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, |
46e3268e | 972 | XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); |
4a3c23ae | 973 | |
b4b8faa1 MK |
974 | exit(EXIT_FAILURE); |
975 | } | |
976 | ||
977 | static void parse_command_line(int argc, char **argv) | |
978 | { | |
979 | int option_index, c; | |
980 | ||
981 | opterr = 0; | |
982 | ||
983 | for (;;) { | |
3627d970 | 984 | c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:xQaI:BR", |
46738f73 | 985 | long_options, &option_index); |
b4b8faa1 MK |
986 | if (c == -1) |
987 | break; | |
988 | ||
989 | switch (c) { | |
990 | case 'r': | |
991 | opt_bench = BENCH_RXDROP; | |
992 | break; | |
993 | case 't': | |
994 | opt_bench = BENCH_TXONLY; | |
995 | break; | |
996 | case 'l': | |
997 | opt_bench = BENCH_L2FWD; | |
998 | break; | |
999 | case 'i': | |
1000 | opt_if = optarg; | |
1001 | break; | |
1002 | case 'q': | |
1003 | opt_queue = atoi(optarg); | |
1004 | break; | |
b4b8faa1 MK |
1005 | case 'p': |
1006 | opt_poll = 1; | |
1007 | break; | |
1008 | case 'S': | |
1009 | opt_xdp_flags |= XDP_FLAGS_SKB_MODE; | |
9f5232cc | 1010 | opt_xdp_bind_flags |= XDP_COPY; |
b4b8faa1 MK |
1011 | break; |
1012 | case 'N': | |
d50ecc46 | 1013 | /* default, set below */ |
b4b8faa1 MK |
1014 | break; |
1015 | case 'n': | |
1016 | opt_interval = atoi(optarg); | |
1017 | break; | |
58c50ae4 BT |
1018 | case 'z': |
1019 | opt_xdp_bind_flags |= XDP_ZEROCOPY; | |
1020 | break; | |
1021 | case 'c': | |
1022 | opt_xdp_bind_flags |= XDP_COPY; | |
1023 | break; | |
c543f546 KL |
1024 | case 'u': |
1025 | opt_umem_flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG; | |
1026 | opt_unaligned_chunks = 1; | |
3945b37a | 1027 | opt_mmap_flags = MAP_HUGETLB; |
c543f546 | 1028 | break; |
743e568c MF |
1029 | case 'F': |
1030 | opt_xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; | |
1031 | break; | |
123e8da1 MM |
1032 | case 'f': |
1033 | opt_xsk_frame_size = atoi(optarg); | |
2e5d72c1 | 1034 | break; |
46738f73 MK |
1035 | case 'm': |
1036 | opt_need_wakeup = false; | |
1037 | opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; | |
123e8da1 | 1038 | break; |
2e5d72c1 MK |
1039 | case 'M': |
1040 | opt_num_xsks = MAX_SOCKS; | |
1041 | break; | |
d3f11b01 JJ |
1042 | case 'd': |
1043 | opt_duration = atoi(optarg); | |
1044 | opt_duration *= 1000000000; | |
1045 | break; | |
cd9e72b6 JJ |
1046 | case 'b': |
1047 | opt_batch_size = atoi(optarg); | |
1048 | break; | |
ece6e969 JJ |
1049 | case 'C': |
1050 | opt_pkt_count = atoi(optarg); | |
1051 | break; | |
4a3c23ae JJ |
1052 | case 's': |
1053 | opt_pkt_size = atoi(optarg); | |
1054 | if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) || | |
1055 | opt_pkt_size < MIN_PKT_SIZE) { | |
1056 | fprintf(stderr, | |
1057 | "ERROR: Invalid frame size %d\n", | |
1058 | opt_pkt_size); | |
1059 | usage(basename(argv[0])); | |
1060 | } | |
1061 | break; | |
46e3268e JJ |
1062 | case 'P': |
1063 | opt_pkt_fill_pattern = strtol(optarg, NULL, 16); | |
1064 | break; | |
b36c3206 CL |
1065 | case 'x': |
1066 | opt_extra_stats = 1; | |
1067 | break; | |
74e00676 MK |
1068 | case 'Q': |
1069 | opt_quiet = 1; | |
1070 | break; | |
60dc609d CL |
1071 | case 'a': |
1072 | opt_app_stats = 1; | |
67ed3755 CL |
1073 | break; |
1074 | case 'I': | |
1075 | opt_irq_str = optarg; | |
1076 | if (get_interrupt_number()) | |
1077 | irqs_at_init = get_irqs(); | |
1078 | if (irqs_at_init < 0) { | |
1079 | fprintf(stderr, "ERROR: Failed to get irqs for %s\n", opt_irq_str); | |
1080 | usage(basename(argv[0])); | |
1081 | } | |
b35fc148 BT |
1082 | break; |
1083 | case 'B': | |
1084 | opt_busy_poll = 1; | |
60dc609d | 1085 | break; |
3627d970 MD |
1086 | case 'R': |
1087 | opt_reduced_cap = true; | |
1088 | break; | |
b4b8faa1 MK |
1089 | default: |
1090 | usage(basename(argv[0])); | |
1091 | } | |
1092 | } | |
1093 | ||
d50ecc46 THJ |
1094 | if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE)) |
1095 | opt_xdp_flags |= XDP_FLAGS_DRV_MODE; | |
1096 | ||
b4b8faa1 MK |
1097 | opt_ifindex = if_nametoindex(opt_if); |
1098 | if (!opt_ifindex) { | |
1099 | fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", | |
1100 | opt_if); | |
1101 | usage(basename(argv[0])); | |
1102 | } | |
248c7f9c | 1103 | |
c543f546 KL |
1104 | if ((opt_xsk_frame_size & (opt_xsk_frame_size - 1)) && |
1105 | !opt_unaligned_chunks) { | |
123e8da1 MM |
1106 | fprintf(stderr, "--frame-size=%d is not a power of two\n", |
1107 | opt_xsk_frame_size); | |
1108 | usage(basename(argv[0])); | |
1109 | } | |
3627d970 MD |
1110 | |
1111 | if (opt_reduced_cap && opt_num_xsks > 1) { | |
1112 | fprintf(stderr, "ERROR: -M and -R cannot be used together\n"); | |
1113 | usage(basename(argv[0])); | |
1114 | } | |
b4b8faa1 MK |
1115 | } |
1116 | ||
248c7f9c | 1117 | static void kick_tx(struct xsk_socket_info *xsk) |
b4b8faa1 MK |
1118 | { |
1119 | int ret; | |
1120 | ||
248c7f9c | 1121 | ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); |
8ed47e14 MF |
1122 | if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || |
1123 | errno == EBUSY || errno == ENETDOWN) | |
b4b8faa1 | 1124 | return; |
248c7f9c | 1125 | exit_with_error(errno); |
b4b8faa1 MK |
1126 | } |
1127 | ||
284cbc61 | 1128 | static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk) |
b4b8faa1 | 1129 | { |
03895e63 | 1130 | struct xsk_umem_info *umem = xsk->umem; |
b74e21ab | 1131 | u32 idx_cq = 0, idx_fq = 0; |
b4b8faa1 MK |
1132 | unsigned int rcvd; |
1133 | size_t ndescs; | |
1134 | ||
1135 | if (!xsk->outstanding_tx) | |
1136 | return; | |
1137 | ||
3131cf66 MK |
1138 | /* In copy mode, Tx is driven by a syscall so we need to use e.g. sendto() to |
1139 | * really send the packets. In zero-copy mode we do not have to do this, since Tx | |
1140 | * is driven by the NAPI loop. So as an optimization, we do not have to call | |
1141 | * sendto() all the time in zero-copy mode for l2fwd. | |
1142 | */ | |
60dc609d CL |
1143 | if (opt_xdp_bind_flags & XDP_COPY) { |
1144 | xsk->app_stats.copy_tx_sendtos++; | |
3131cf66 | 1145 | kick_tx(xsk); |
60dc609d | 1146 | } |
3131cf66 | 1147 | |
cd9e72b6 | 1148 | ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : |
248c7f9c | 1149 | xsk->outstanding_tx; |
b4b8faa1 MK |
1150 | |
1151 | /* re-add completed Tx buffers */ | |
03895e63 | 1152 | rcvd = xsk_ring_cons__peek(&umem->cq, ndescs, &idx_cq); |
b4b8faa1 | 1153 | if (rcvd > 0) { |
248c7f9c MK |
1154 | unsigned int i; |
1155 | int ret; | |
1156 | ||
03895e63 | 1157 | ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); |
248c7f9c MK |
1158 | while (ret != rcvd) { |
1159 | if (ret < 0) | |
1160 | exit_with_error(-ret); | |
b35fc148 | 1161 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&umem->fq)) { |
60dc609d | 1162 | xsk->app_stats.fill_fail_polls++; |
284cbc61 BT |
1163 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, |
1164 | NULL); | |
60dc609d | 1165 | } |
03895e63 | 1166 | ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq); |
248c7f9c | 1167 | } |
03895e63 | 1168 | |
248c7f9c | 1169 | for (i = 0; i < rcvd; i++) |
03895e63 KL |
1170 | *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = |
1171 | *xsk_ring_cons__comp_addr(&umem->cq, idx_cq++); | |
248c7f9c MK |
1172 | |
1173 | xsk_ring_prod__submit(&xsk->umem->fq, rcvd); | |
1174 | xsk_ring_cons__release(&xsk->umem->cq, rcvd); | |
b4b8faa1 | 1175 | xsk->outstanding_tx -= rcvd; |
b4b8faa1 MK |
1176 | } |
1177 | } | |
1178 | ||
ece6e969 JJ |
1179 | static inline void complete_tx_only(struct xsk_socket_info *xsk, |
1180 | int batch_size) | |
b4b8faa1 | 1181 | { |
b4b8faa1 | 1182 | unsigned int rcvd; |
248c7f9c | 1183 | u32 idx; |
b4b8faa1 MK |
1184 | |
1185 | if (!xsk->outstanding_tx) | |
1186 | return; | |
1187 | ||
60dc609d CL |
1188 | if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) { |
1189 | xsk->app_stats.tx_wakeup_sendtos++; | |
46738f73 | 1190 | kick_tx(xsk); |
60dc609d | 1191 | } |
b4b8faa1 | 1192 | |
ece6e969 | 1193 | rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); |
b4b8faa1 | 1194 | if (rcvd > 0) { |
248c7f9c | 1195 | xsk_ring_cons__release(&xsk->umem->cq, rcvd); |
b4b8faa1 | 1196 | xsk->outstanding_tx -= rcvd; |
b4b8faa1 MK |
1197 | } |
1198 | } | |
1199 | ||
f2d27282 | 1200 | static void rx_drop(struct xsk_socket_info *xsk) |
b4b8faa1 | 1201 | { |
b4b8faa1 | 1202 | unsigned int rcvd, i; |
b74e21ab | 1203 | u32 idx_rx = 0, idx_fq = 0; |
248c7f9c | 1204 | int ret; |
b4b8faa1 | 1205 | |
cd9e72b6 | 1206 | rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); |
46738f73 | 1207 | if (!rcvd) { |
b35fc148 | 1208 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { |
60dc609d | 1209 | xsk->app_stats.rx_empty_polls++; |
f2d27282 | 1210 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); |
60dc609d | 1211 | } |
b4b8faa1 | 1212 | return; |
46738f73 | 1213 | } |
b4b8faa1 | 1214 | |
248c7f9c MK |
1215 | ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); |
1216 | while (ret != rcvd) { | |
1217 | if (ret < 0) | |
1218 | exit_with_error(-ret); | |
b35fc148 | 1219 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { |
60dc609d | 1220 | xsk->app_stats.fill_fail_polls++; |
f2d27282 | 1221 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); |
60dc609d | 1222 | } |
248c7f9c MK |
1223 | ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq); |
1224 | } | |
1225 | ||
b4b8faa1 | 1226 | for (i = 0; i < rcvd; i++) { |
248c7f9c MK |
1227 | u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; |
1228 | u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; | |
03895e63 KL |
1229 | u64 orig = xsk_umem__extract_addr(addr); |
1230 | ||
1231 | addr = xsk_umem__add_offset_to_addr(addr); | |
248c7f9c | 1232 | char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); |
b4b8faa1 | 1233 | |
248c7f9c | 1234 | hex_dump(pkt, len, addr); |
03895e63 | 1235 | *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig; |
b4b8faa1 MK |
1236 | } |
1237 | ||
248c7f9c MK |
1238 | xsk_ring_prod__submit(&xsk->umem->fq, rcvd); |
1239 | xsk_ring_cons__release(&xsk->rx, rcvd); | |
2e8806f0 | 1240 | xsk->ring_stats.rx_npkts += rcvd; |
b4b8faa1 MK |
1241 | } |
1242 | ||
1243 | static void rx_drop_all(void) | |
1244 | { | |
2e5d72c1 | 1245 | struct pollfd fds[MAX_SOCKS] = {}; |
46738f73 | 1246 | int i, ret; |
b4b8faa1 | 1247 | |
b4b8faa1 | 1248 | for (i = 0; i < num_socks; i++) { |
248c7f9c | 1249 | fds[i].fd = xsk_socket__fd(xsks[i]->xsk); |
b4b8faa1 | 1250 | fds[i].events = POLLIN; |
b4b8faa1 MK |
1251 | } |
1252 | ||
1253 | for (;;) { | |
1254 | if (opt_poll) { | |
60dc609d CL |
1255 | for (i = 0; i < num_socks; i++) |
1256 | xsks[i]->app_stats.opt_polls++; | |
46738f73 | 1257 | ret = poll(fds, num_socks, opt_timeout); |
b4b8faa1 MK |
1258 | if (ret <= 0) |
1259 | continue; | |
1260 | } | |
1261 | ||
1262 | for (i = 0; i < num_socks; i++) | |
f2d27282 | 1263 | rx_drop(xsks[i]); |
d3f11b01 JJ |
1264 | |
1265 | if (benchmark_done) | |
1266 | break; | |
46738f73 MK |
1267 | } |
1268 | } | |
1269 | ||
b69e56cf | 1270 | static void tx_only(struct xsk_socket_info *xsk, u32 *frame_nb, int batch_size) |
46738f73 MK |
1271 | { |
1272 | u32 idx; | |
cd9e72b6 | 1273 | unsigned int i; |
46738f73 | 1274 | |
ece6e969 JJ |
1275 | while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < |
1276 | batch_size) { | |
1277 | complete_tx_only(xsk, batch_size); | |
092fde0f MK |
1278 | if (benchmark_done) |
1279 | return; | |
cd9e72b6 | 1280 | } |
46738f73 | 1281 | |
ece6e969 | 1282 | for (i = 0; i < batch_size; i++) { |
cd9e72b6 JJ |
1283 | struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, |
1284 | idx + i); | |
b69e56cf | 1285 | tx_desc->addr = (*frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; |
4a3c23ae | 1286 | tx_desc->len = PKT_SIZE; |
b4b8faa1 | 1287 | } |
46738f73 | 1288 | |
ece6e969 | 1289 | xsk_ring_prod__submit(&xsk->tx, batch_size); |
90da4b32 | 1290 | xsk->ring_stats.tx_npkts += batch_size; |
ece6e969 | 1291 | xsk->outstanding_tx += batch_size; |
b69e56cf WJ |
1292 | *frame_nb += batch_size; |
1293 | *frame_nb %= NUM_FRAMES; | |
ece6e969 JJ |
1294 | complete_tx_only(xsk, batch_size); |
1295 | } | |
1296 | ||
1297 | static inline int get_batch_size(int pkt_cnt) | |
1298 | { | |
1299 | if (!opt_pkt_count) | |
1300 | return opt_batch_size; | |
1301 | ||
1302 | if (pkt_cnt + opt_batch_size <= opt_pkt_count) | |
1303 | return opt_batch_size; | |
1304 | ||
1305 | return opt_pkt_count - pkt_cnt; | |
1306 | } | |
1307 | ||
1308 | static void complete_tx_only_all(void) | |
1309 | { | |
1310 | bool pending; | |
1311 | int i; | |
1312 | ||
1313 | do { | |
1314 | pending = false; | |
1315 | for (i = 0; i < num_socks; i++) { | |
1316 | if (xsks[i]->outstanding_tx) { | |
1317 | complete_tx_only(xsks[i], opt_batch_size); | |
1318 | pending = !!xsks[i]->outstanding_tx; | |
1319 | } | |
1320 | } | |
1321 | } while (pending); | |
b4b8faa1 MK |
1322 | } |
1323 | ||
46738f73 | 1324 | static void tx_only_all(void) |
b4b8faa1 | 1325 | { |
2e5d72c1 | 1326 | struct pollfd fds[MAX_SOCKS] = {}; |
46738f73 | 1327 | u32 frame_nb[MAX_SOCKS] = {}; |
ece6e969 | 1328 | int pkt_cnt = 0; |
46738f73 | 1329 | int i, ret; |
b4b8faa1 | 1330 | |
46738f73 MK |
1331 | for (i = 0; i < num_socks; i++) { |
1332 | fds[0].fd = xsk_socket__fd(xsks[i]->xsk); | |
1333 | fds[0].events = POLLOUT; | |
1334 | } | |
b4b8faa1 | 1335 | |
ece6e969 JJ |
1336 | while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { |
1337 | int batch_size = get_batch_size(pkt_cnt); | |
1338 | ||
b4b8faa1 | 1339 | if (opt_poll) { |
60dc609d CL |
1340 | for (i = 0; i < num_socks; i++) |
1341 | xsks[i]->app_stats.opt_polls++; | |
46738f73 | 1342 | ret = poll(fds, num_socks, opt_timeout); |
b4b8faa1 MK |
1343 | if (ret <= 0) |
1344 | continue; | |
1345 | ||
248c7f9c | 1346 | if (!(fds[0].revents & POLLOUT)) |
b4b8faa1 MK |
1347 | continue; |
1348 | } | |
1349 | ||
46738f73 | 1350 | for (i = 0; i < num_socks; i++) |
b69e56cf | 1351 | tx_only(xsks[i], &frame_nb[i], batch_size); |
ece6e969 JJ |
1352 | |
1353 | pkt_cnt += batch_size; | |
d3f11b01 JJ |
1354 | |
1355 | if (benchmark_done) | |
1356 | break; | |
b4b8faa1 | 1357 | } |
ece6e969 JJ |
1358 | |
1359 | if (opt_pkt_count) | |
1360 | complete_tx_only_all(); | |
b4b8faa1 MK |
1361 | } |
1362 | ||
284cbc61 | 1363 | static void l2fwd(struct xsk_socket_info *xsk) |
b4b8faa1 | 1364 | { |
46738f73 MK |
1365 | unsigned int rcvd, i; |
1366 | u32 idx_rx = 0, idx_tx = 0; | |
1367 | int ret; | |
b4b8faa1 | 1368 | |
284cbc61 | 1369 | complete_tx_l2fwd(xsk); |
b4b8faa1 | 1370 | |
cd9e72b6 | 1371 | rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); |
46738f73 | 1372 | if (!rcvd) { |
b35fc148 | 1373 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) { |
60dc609d | 1374 | xsk->app_stats.rx_empty_polls++; |
284cbc61 | 1375 | recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL); |
60dc609d | 1376 | } |
46738f73 MK |
1377 | return; |
1378 | } | |
90da4b32 | 1379 | xsk->ring_stats.rx_npkts += rcvd; |
b4b8faa1 | 1380 | |
46738f73 MK |
1381 | ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); |
1382 | while (ret != rcvd) { | |
1383 | if (ret < 0) | |
1384 | exit_with_error(-ret); | |
284cbc61 | 1385 | complete_tx_l2fwd(xsk); |
b35fc148 | 1386 | if (opt_busy_poll || xsk_ring_prod__needs_wakeup(&xsk->tx)) { |
60dc609d | 1387 | xsk->app_stats.tx_wakeup_sendtos++; |
46738f73 | 1388 | kick_tx(xsk); |
60dc609d | 1389 | } |
248c7f9c | 1390 | ret = xsk_ring_prod__reserve(&xsk->tx, rcvd, &idx_tx); |
46738f73 MK |
1391 | } |
1392 | ||
1393 | for (i = 0; i < rcvd; i++) { | |
1394 | u64 addr = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx)->addr; | |
1395 | u32 len = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++)->len; | |
5a712e13 | 1396 | u64 orig = addr; |
03895e63 KL |
1397 | |
1398 | addr = xsk_umem__add_offset_to_addr(addr); | |
46738f73 MK |
1399 | char *pkt = xsk_umem__get_data(xsk->umem->buffer, addr); |
1400 | ||
1401 | swap_mac_addresses(pkt); | |
248c7f9c | 1402 | |
46738f73 | 1403 | hex_dump(pkt, len, addr); |
03895e63 | 1404 | xsk_ring_prod__tx_desc(&xsk->tx, idx_tx)->addr = orig; |
46738f73 MK |
1405 | xsk_ring_prod__tx_desc(&xsk->tx, idx_tx++)->len = len; |
1406 | } | |
b4b8faa1 | 1407 | |
46738f73 MK |
1408 | xsk_ring_prod__submit(&xsk->tx, rcvd); |
1409 | xsk_ring_cons__release(&xsk->rx, rcvd); | |
b4b8faa1 | 1410 | |
90da4b32 | 1411 | xsk->ring_stats.tx_npkts += rcvd; |
46738f73 MK |
1412 | xsk->outstanding_tx += rcvd; |
1413 | } | |
1414 | ||
1415 | static void l2fwd_all(void) | |
1416 | { | |
2e5d72c1 | 1417 | struct pollfd fds[MAX_SOCKS] = {}; |
46738f73 MK |
1418 | int i, ret; |
1419 | ||
46738f73 MK |
1420 | for (;;) { |
1421 | if (opt_poll) { | |
284cbc61 BT |
1422 | for (i = 0; i < num_socks; i++) { |
1423 | fds[i].fd = xsk_socket__fd(xsks[i]->xsk); | |
1424 | fds[i].events = POLLOUT | POLLIN; | |
60dc609d | 1425 | xsks[i]->app_stats.opt_polls++; |
284cbc61 | 1426 | } |
46738f73 MK |
1427 | ret = poll(fds, num_socks, opt_timeout); |
1428 | if (ret <= 0) | |
1429 | continue; | |
1430 | } | |
b4b8faa1 | 1431 | |
46738f73 | 1432 | for (i = 0; i < num_socks; i++) |
284cbc61 | 1433 | l2fwd(xsks[i]); |
d3f11b01 JJ |
1434 | |
1435 | if (benchmark_done) | |
1436 | break; | |
b4b8faa1 MK |
1437 | } |
1438 | } | |
1439 | ||
2e5d72c1 MK |
1440 | static void load_xdp_program(char **argv, struct bpf_object **obj) |
1441 | { | |
1442 | struct bpf_prog_load_attr prog_load_attr = { | |
1443 | .prog_type = BPF_PROG_TYPE_XDP, | |
1444 | }; | |
1445 | char xdp_filename[256]; | |
1446 | int prog_fd; | |
1447 | ||
1448 | snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); | |
1449 | prog_load_attr.file = xdp_filename; | |
1450 | ||
1451 | if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd)) | |
1452 | exit(EXIT_FAILURE); | |
1453 | if (prog_fd < 0) { | |
1454 | fprintf(stderr, "ERROR: no program found: %s\n", | |
1455 | strerror(prog_fd)); | |
1456 | exit(EXIT_FAILURE); | |
1457 | } | |
1458 | ||
1459 | if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { | |
1460 | fprintf(stderr, "ERROR: link set xdp fd failed\n"); | |
1461 | exit(EXIT_FAILURE); | |
1462 | } | |
1463 | } | |
1464 | ||
1465 | static void enter_xsks_into_map(struct bpf_object *obj) | |
1466 | { | |
1467 | struct bpf_map *map; | |
1468 | int i, xsks_map; | |
1469 | ||
1470 | map = bpf_object__find_map_by_name(obj, "xsks_map"); | |
1471 | xsks_map = bpf_map__fd(map); | |
1472 | if (xsks_map < 0) { | |
1473 | fprintf(stderr, "ERROR: no xsks map found: %s\n", | |
1474 | strerror(xsks_map)); | |
1475 | exit(EXIT_FAILURE); | |
1476 | } | |
1477 | ||
1478 | for (i = 0; i < num_socks; i++) { | |
1479 | int fd = xsk_socket__fd(xsks[i]->xsk); | |
1480 | int key, ret; | |
1481 | ||
1482 | key = i; | |
1483 | ret = bpf_map_update_elem(xsks_map, &key, &fd, 0); | |
1484 | if (ret) { | |
1485 | fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); | |
1486 | exit(EXIT_FAILURE); | |
1487 | } | |
1488 | } | |
1489 | } | |
1490 | ||
b35fc148 BT |
1491 | static void apply_setsockopt(struct xsk_socket_info *xsk) |
1492 | { | |
1493 | int sock_opt; | |
1494 | ||
1495 | if (!opt_busy_poll) | |
1496 | return; | |
1497 | ||
1498 | sock_opt = 1; | |
1499 | if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL, | |
1500 | (void *)&sock_opt, sizeof(sock_opt)) < 0) | |
1501 | exit_with_error(errno); | |
1502 | ||
1503 | sock_opt = 20; | |
1504 | if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL, | |
1505 | (void *)&sock_opt, sizeof(sock_opt)) < 0) | |
1506 | exit_with_error(errno); | |
41bf900f BT |
1507 | |
1508 | sock_opt = opt_batch_size; | |
1509 | if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET, | |
1510 | (void *)&sock_opt, sizeof(sock_opt)) < 0) | |
1511 | exit_with_error(errno); | |
b35fc148 BT |
1512 | } |
1513 | ||
3627d970 MD |
1514 | static int recv_xsks_map_fd_from_ctrl_node(int sock, int *_fd) |
1515 | { | |
1516 | char cms[CMSG_SPACE(sizeof(int))]; | |
1517 | struct cmsghdr *cmsg; | |
1518 | struct msghdr msg; | |
1519 | struct iovec iov; | |
1520 | int value; | |
1521 | int len; | |
1522 | ||
1523 | iov.iov_base = &value; | |
1524 | iov.iov_len = sizeof(int); | |
1525 | ||
1526 | msg.msg_name = 0; | |
1527 | msg.msg_namelen = 0; | |
1528 | msg.msg_iov = &iov; | |
1529 | msg.msg_iovlen = 1; | |
1530 | msg.msg_flags = 0; | |
1531 | msg.msg_control = (caddr_t)cms; | |
1532 | msg.msg_controllen = sizeof(cms); | |
1533 | ||
1534 | len = recvmsg(sock, &msg, 0); | |
1535 | ||
1536 | if (len < 0) { | |
1537 | fprintf(stderr, "Recvmsg failed length incorrect.\n"); | |
1538 | return -EINVAL; | |
1539 | } | |
1540 | ||
1541 | if (len == 0) { | |
1542 | fprintf(stderr, "Recvmsg failed no data\n"); | |
1543 | return -EINVAL; | |
1544 | } | |
1545 | ||
1546 | cmsg = CMSG_FIRSTHDR(&msg); | |
1547 | *_fd = *(int *)CMSG_DATA(cmsg); | |
1548 | ||
1549 | return 0; | |
1550 | } | |
1551 | ||
1552 | static int | |
1553 | recv_xsks_map_fd(int *xsks_map_fd) | |
1554 | { | |
1555 | struct sockaddr_un server; | |
1556 | int err; | |
1557 | ||
1558 | sock = socket(AF_UNIX, SOCK_STREAM, 0); | |
1559 | if (sock < 0) { | |
1560 | fprintf(stderr, "Error opening socket stream: %s", strerror(errno)); | |
1561 | return errno; | |
1562 | } | |
1563 | ||
1564 | server.sun_family = AF_UNIX; | |
1565 | strcpy(server.sun_path, SOCKET_NAME); | |
1566 | ||
1567 | if (connect(sock, (struct sockaddr *)&server, sizeof(struct sockaddr_un)) < 0) { | |
1568 | close(sock); | |
1569 | fprintf(stderr, "Error connecting stream socket: %s", strerror(errno)); | |
1570 | return errno; | |
1571 | } | |
1572 | ||
1573 | err = recv_xsks_map_fd_from_ctrl_node(sock, xsks_map_fd); | |
1574 | if (err) { | |
2faa7328 | 1575 | fprintf(stderr, "Error %d receiving fd\n", err); |
3627d970 MD |
1576 | return err; |
1577 | } | |
1578 | return 0; | |
1579 | } | |
1580 | ||
b4b8faa1 MK |
1581 | int main(int argc, char **argv) |
1582 | { | |
3627d970 MD |
1583 | struct __user_cap_header_struct hdr = { _LINUX_CAPABILITY_VERSION_3, 0 }; |
1584 | struct __user_cap_data_struct data[2] = { { 0 } }; | |
1585 | struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; | |
661842c4 | 1586 | bool rx = false, tx = false; |
248c7f9c | 1587 | struct xsk_umem_info *umem; |
2e5d72c1 | 1588 | struct bpf_object *obj; |
3627d970 | 1589 | int xsks_map_fd = 0; |
b4b8faa1 | 1590 | pthread_t pt; |
2e5d72c1 | 1591 | int i, ret; |
248c7f9c | 1592 | void *bufs; |
b4b8faa1 MK |
1593 | |
1594 | parse_command_line(argc, argv); | |
1595 | ||
3627d970 MD |
1596 | if (opt_reduced_cap) { |
1597 | if (capget(&hdr, data) < 0) | |
1598 | fprintf(stderr, "Error getting capabilities\n"); | |
1599 | ||
1600 | data->effective &= CAP_TO_MASK(CAP_NET_RAW); | |
1601 | data->permitted &= CAP_TO_MASK(CAP_NET_RAW); | |
1602 | ||
1603 | if (capset(&hdr, data) < 0) | |
1604 | fprintf(stderr, "Setting capabilities failed\n"); | |
1605 | ||
1606 | if (capget(&hdr, data) < 0) { | |
1607 | fprintf(stderr, "Error getting capabilities\n"); | |
1608 | } else { | |
1609 | fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n", | |
1610 | data[0].effective, data[0].inheritable, data[0].permitted); | |
1611 | fprintf(stderr, "Capabilities EFF %x Caps INH %x Caps Per %x\n", | |
1612 | data[1].effective, data[1].inheritable, data[1].permitted); | |
1613 | } | |
1614 | } else { | |
1615 | if (setrlimit(RLIMIT_MEMLOCK, &r)) { | |
1616 | fprintf(stderr, "ERROR: setrlimit(RLIMIT_MEMLOCK) \"%s\"\n", | |
1617 | strerror(errno)); | |
1618 | exit(EXIT_FAILURE); | |
1619 | } | |
1620 | ||
1621 | if (opt_num_xsks > 1) | |
1622 | load_xdp_program(argv, &obj); | |
1623 | } | |
2e5d72c1 | 1624 | |
3945b37a KL |
1625 | /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ |
1626 | bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, | |
1627 | PROT_READ | PROT_WRITE, | |
1628 | MAP_PRIVATE | MAP_ANONYMOUS | opt_mmap_flags, -1, 0); | |
1629 | if (bufs == MAP_FAILED) { | |
1630 | printf("ERROR: mmap failed\n"); | |
1631 | exit(EXIT_FAILURE); | |
1632 | } | |
2e5d72c1 MK |
1633 | |
1634 | /* Create sockets... */ | |
123e8da1 | 1635 | umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); |
661842c4 MK |
1636 | if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) { |
1637 | rx = true; | |
1638 | xsk_populate_fill_ring(umem); | |
1639 | } | |
1640 | if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY) | |
1641 | tx = true; | |
2e5d72c1 | 1642 | for (i = 0; i < opt_num_xsks; i++) |
661842c4 | 1643 | xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); |
b4b8faa1 | 1644 | |
b35fc148 BT |
1645 | for (i = 0; i < opt_num_xsks; i++) |
1646 | apply_setsockopt(xsks[i]); | |
1647 | ||
4a3c23ae JJ |
1648 | if (opt_bench == BENCH_TXONLY) { |
1649 | gen_eth_hdr_data(); | |
1650 | ||
661842c4 MK |
1651 | for (i = 0; i < NUM_FRAMES; i++) |
1652 | gen_eth_frame(umem, i * opt_xsk_frame_size); | |
4a3c23ae | 1653 | } |
b4b8faa1 | 1654 | |
2e5d72c1 MK |
1655 | if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) |
1656 | enter_xsks_into_map(obj); | |
b4b8faa1 | 1657 | |
3627d970 MD |
1658 | if (opt_reduced_cap) { |
1659 | ret = recv_xsks_map_fd(&xsks_map_fd); | |
1660 | if (ret) { | |
1661 | fprintf(stderr, "Error %d receiving xsks_map_fd\n", ret); | |
1662 | exit_with_error(ret); | |
1663 | } | |
1664 | if (xsks[0]->xsk) { | |
1665 | ret = xsk_socket__update_xskmap(xsks[0]->xsk, xsks_map_fd); | |
1666 | if (ret) { | |
1667 | fprintf(stderr, "Update of BPF map failed(%d)\n", ret); | |
1668 | exit_with_error(ret); | |
1669 | } | |
1670 | } | |
1671 | } | |
1672 | ||
b4b8faa1 MK |
1673 | signal(SIGINT, int_exit); |
1674 | signal(SIGTERM, int_exit); | |
1675 | signal(SIGABRT, int_exit); | |
1676 | ||
1677 | setlocale(LC_ALL, ""); | |
1678 | ||
74e00676 MK |
1679 | if (!opt_quiet) { |
1680 | ret = pthread_create(&pt, NULL, poller, NULL); | |
1681 | if (ret) | |
1682 | exit_with_error(ret); | |
1683 | } | |
b4b8faa1 MK |
1684 | |
1685 | prev_time = get_nsecs(); | |
d3f11b01 | 1686 | start_time = prev_time; |
b4b8faa1 MK |
1687 | |
1688 | if (opt_bench == BENCH_RXDROP) | |
1689 | rx_drop_all(); | |
1690 | else if (opt_bench == BENCH_TXONLY) | |
46738f73 | 1691 | tx_only_all(); |
b4b8faa1 | 1692 | else |
46738f73 | 1693 | l2fwd_all(); |
b4b8faa1 | 1694 | |
ece6e969 JJ |
1695 | benchmark_done = true; |
1696 | ||
74e00676 MK |
1697 | if (!opt_quiet) |
1698 | pthread_join(pt, NULL); | |
d3f11b01 | 1699 | |
69525588 JJ |
1700 | xdpsock_cleanup(); |
1701 | ||
6bc66998 MF |
1702 | munmap(bufs, NUM_FRAMES * opt_xsk_frame_size); |
1703 | ||
b4b8faa1 MK |
1704 | return 0; |
1705 | } |