]> git.proxmox.com Git - mirror_ovs.git/blob - lib/netdev-afxdp.c
ovsdb-idl: Fix iteration over tracked rows with no actual data.
[mirror_ovs.git] / lib / netdev-afxdp.c
1 /*
2 * Copyright (c) 2018, 2019 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18
19 #include "netdev-linux-private.h"
20 #include "netdev-linux.h"
21 #include "netdev-afxdp.h"
22 #include "netdev-afxdp-pool.h"
23
24 #include <errno.h>
25 #include <inttypes.h>
26 #include <linux/rtnetlink.h>
27 #include <linux/if_xdp.h>
28 #include <net/if.h>
29 #include <numa.h>
30 #include <numaif.h>
31 #include <poll.h>
32 #include <stdlib.h>
33 #include <sys/resource.h>
34 #include <sys/socket.h>
35 #include <sys/types.h>
36 #include <unistd.h>
37
38 #include "coverage.h"
39 #include "dp-packet.h"
40 #include "dpif-netdev.h"
41 #include "fatal-signal.h"
42 #include "openvswitch/compiler.h"
43 #include "openvswitch/dynamic-string.h"
44 #include "openvswitch/list.h"
45 #include "openvswitch/thread.h"
46 #include "openvswitch/vlog.h"
47 #include "ovs-numa.h"
48 #include "packets.h"
49 #include "socket-util.h"
50 #include "util.h"
51
52 #ifndef SOL_XDP
53 #define SOL_XDP 283
54 #endif
55
56 COVERAGE_DEFINE(afxdp_cq_empty);
57 COVERAGE_DEFINE(afxdp_fq_full);
58 COVERAGE_DEFINE(afxdp_tx_full);
59 COVERAGE_DEFINE(afxdp_cq_skip);
60
61 VLOG_DEFINE_THIS_MODULE(netdev_afxdp);
62
63 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
64
65 #define MAX_XSKQ 16
66 #define FRAME_HEADROOM XDP_PACKET_HEADROOM
67 #define OVS_XDP_HEADROOM 128
68 #define FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
69 #define FRAME_SHIFT XSK_UMEM__DEFAULT_FRAME_SHIFT
70 #define FRAME_SHIFT_MASK ((1 << FRAME_SHIFT) - 1)
71
72 #define PROD_NUM_DESCS XSK_RING_PROD__DEFAULT_NUM_DESCS
73 #define CONS_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS
74
75 #ifdef HAVE_XDP_NEED_WAKEUP
76 #define NEED_WAKEUP_DEFAULT true
77 #else
78 #define NEED_WAKEUP_DEFAULT false
79 #endif
80
81 /* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets
82 * still on processing in threads. Number of packets currently in OVS
83 * processing is hard to estimate because it depends on number of ports.
84 * Setting NUM_FRAMES twice as large than total of ring sizes should be
85 * enough for most corner cases.
86 */
87 #define NUM_FRAMES (4 * (PROD_NUM_DESCS + CONS_NUM_DESCS))
88 #define BATCH_SIZE NETDEV_MAX_BURST
89
90 BUILD_ASSERT_DECL(IS_POW2(NUM_FRAMES));
91 BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
92
93 #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
94
95 static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
96 enum afxdp_mode mode,
97 bool use_need_wakeup,
98 bool report_socket_failures);
99 static void xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode);
100 static void xsk_destroy(struct xsk_socket_info *xsk);
101 static int xsk_configure_all(struct netdev *netdev);
102 static void xsk_destroy_all(struct netdev *netdev);
103
104 static struct {
105 const char *name;
106 uint32_t bind_flags;
107 uint32_t xdp_flags;
108 } xdp_modes[] = {
109 [OVS_AF_XDP_MODE_UNSPEC] = {
110 .name = "unspecified",
111 .bind_flags = 0,
112 .xdp_flags = 0,
113 },
114 [OVS_AF_XDP_MODE_BEST_EFFORT] = {
115 .name = "best-effort",
116 .bind_flags = 0,
117 .xdp_flags = 0,
118 },
119 [OVS_AF_XDP_MODE_NATIVE_ZC] = {
120 .name = "native-with-zerocopy",
121 .bind_flags = XDP_ZEROCOPY,
122 .xdp_flags = XDP_FLAGS_DRV_MODE,
123 },
124 [OVS_AF_XDP_MODE_NATIVE] = {
125 .name = "native",
126 .bind_flags = XDP_COPY,
127 .xdp_flags = XDP_FLAGS_DRV_MODE,
128 },
129 [OVS_AF_XDP_MODE_GENERIC] = {
130 .name = "generic",
131 .bind_flags = XDP_COPY,
132 .xdp_flags = XDP_FLAGS_SKB_MODE,
133 },
134 };
135
136 struct unused_pool {
137 struct xsk_umem_info *umem_info;
138 int lost_in_rings; /* Number of packets left in tx, rx, cq and fq. */
139 struct ovs_list list_node;
140 };
141
142 static struct ovs_mutex unused_pools_mutex = OVS_MUTEX_INITIALIZER;
143 static struct ovs_list unused_pools OVS_GUARDED_BY(unused_pools_mutex) =
144 OVS_LIST_INITIALIZER(&unused_pools);
145
146 struct xsk_umem_info {
147 struct umem_pool mpool;
148 struct xpacket_pool xpool;
149 struct xsk_ring_prod fq;
150 struct xsk_ring_cons cq;
151 struct xsk_umem *umem;
152 void *buffer;
153 };
154
155 struct xsk_socket_info {
156 struct xsk_ring_cons rx;
157 struct xsk_ring_prod tx;
158 struct xsk_umem_info *umem;
159 struct xsk_socket *xsk;
160 uint32_t outstanding_tx; /* Number of descriptors filled in tx and cq. */
161 uint32_t available_rx; /* Number of descriptors filled in rx and fq. */
162 atomic_uint64_t tx_dropped;
163 };
164
165 struct netdev_afxdp_tx_lock {
166 /* Padding to make netdev_afxdp_tx_lock exactly one cache line long. */
167 PADDED_MEMBERS(CACHE_LINE_SIZE,
168 struct ovs_spin lock;
169 );
170 };
171
172 #ifdef HAVE_XDP_NEED_WAKEUP
173 static inline void
174 xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
175 struct netdev *netdev, int fd)
176 {
177 struct netdev_linux *dev = netdev_linux_cast(netdev);
178 struct pollfd pfd;
179 int ret;
180
181 if (!dev->use_need_wakeup) {
182 return;
183 }
184
185 if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
186 pfd.fd = fd;
187 pfd.events = POLLIN;
188
189 ret = poll(&pfd, 1, 0);
190 if (OVS_UNLIKELY(ret < 0)) {
191 VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
192 netdev_get_name(netdev),
193 ovs_strerror(errno));
194 }
195 }
196 }
197
198 static inline bool
199 xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
200 {
201 return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
202 }
203
204 #else /* !HAVE_XDP_NEED_WAKEUP */
205 static inline void
206 xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
207 struct netdev *netdev OVS_UNUSED,
208 int fd OVS_UNUSED)
209 {
210 /* Nothing. */
211 }
212
213 static inline bool
214 xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
215 {
216 return true;
217 }
218 #endif /* HAVE_XDP_NEED_WAKEUP */
219
220 static void
221 netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
222 {
223 /* Free the packet buffer. */
224 free_pagealign(pool->umem_info->buffer);
225
226 /* Cleanup umem pool. */
227 umem_pool_cleanup(&pool->umem_info->mpool);
228
229 /* Cleanup metadata pool. */
230 xpacket_pool_cleanup(&pool->umem_info->xpool);
231
232 free(pool->umem_info);
233 }
234
235 static void
236 netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED)
237 {
238 struct unused_pool *pool, *next;
239 unsigned int count;
240
241 ovs_mutex_lock(&unused_pools_mutex);
242 LIST_FOR_EACH_SAFE (pool, next, list_node, &unused_pools) {
243
244 count = umem_pool_count(&pool->umem_info->mpool);
245 ovs_assert(count + pool->lost_in_rings <= NUM_FRAMES);
246
247 if (count + pool->lost_in_rings == NUM_FRAMES) {
248 /* OVS doesn't use this memory pool anymore. Kernel doesn't
249 * use it since closing the xdp socket. So, it's safe to free
250 * the pool now. */
251 VLOG_DBG("Freeing umem pool at 0x%"PRIxPTR,
252 (uintptr_t) pool->umem_info);
253 ovs_list_remove(&pool->list_node);
254 netdev_afxdp_cleanup_unused_pool(pool);
255 free(pool);
256 }
257 }
258 ovs_mutex_unlock(&unused_pools_mutex);
259 }
260
261 static struct xsk_umem_info *
262 xsk_configure_umem(void *buffer, uint64_t size)
263 {
264 struct xsk_umem_config uconfig;
265 struct xsk_umem_info *umem;
266 int ret;
267 int i;
268
269 umem = xzalloc(sizeof *umem);
270
271 memset(&uconfig, 0, sizeof uconfig);
272 uconfig.fill_size = PROD_NUM_DESCS;
273 uconfig.comp_size = CONS_NUM_DESCS;
274 uconfig.frame_size = FRAME_SIZE;
275 uconfig.frame_headroom = OVS_XDP_HEADROOM;
276
277 ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
278 &uconfig);
279 if (ret) {
280 VLOG_ERR("xsk_umem__create failed: %s.", ovs_strerror(errno));
281 free(umem);
282 return NULL;
283 }
284
285 umem->buffer = buffer;
286
287 /* Set-up umem pool. */
288 if (umem_pool_init(&umem->mpool, NUM_FRAMES) < 0) {
289 VLOG_ERR("umem_pool_init failed");
290 if (xsk_umem__delete(umem->umem)) {
291 VLOG_ERR("xsk_umem__delete failed");
292 }
293 free(umem);
294 return NULL;
295 }
296
297 for (i = NUM_FRAMES - 1; i >= 0; i--) {
298 void *elem;
299
300 elem = ALIGNED_CAST(void *, (char *)umem->buffer + i * FRAME_SIZE);
301 umem_elem_push(&umem->mpool, elem);
302 }
303
304 /* Set-up metadata. */
305 if (xpacket_pool_init(&umem->xpool, NUM_FRAMES) < 0) {
306 VLOG_ERR("xpacket_pool_init failed");
307 umem_pool_cleanup(&umem->mpool);
308 if (xsk_umem__delete(umem->umem)) {
309 VLOG_ERR("xsk_umem__delete failed");
310 }
311 free(umem);
312 return NULL;
313 }
314
315 VLOG_DBG("%s: xpacket pool from %p to %p", __func__,
316 umem->xpool.array,
317 (char *)umem->xpool.array +
318 NUM_FRAMES * sizeof(struct dp_packet_afxdp));
319
320 for (i = NUM_FRAMES - 1; i >= 0; i--) {
321 struct dp_packet_afxdp *xpacket;
322 struct dp_packet *packet;
323
324 xpacket = &umem->xpool.array[i];
325 xpacket->mpool = &umem->mpool;
326
327 packet = &xpacket->packet;
328 packet->source = DPBUF_AFXDP;
329 }
330
331 return umem;
332 }
333
334 static struct xsk_socket_info *
335 xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
336 uint32_t queue_id, enum afxdp_mode mode,
337 bool use_need_wakeup, bool report_socket_failures)
338 {
339 struct xsk_socket_config cfg;
340 struct xsk_socket_info *xsk;
341 char devname[IF_NAMESIZE];
342 uint32_t idx = 0, prog_id;
343 int ret;
344 int i;
345
346 xsk = xzalloc(sizeof *xsk);
347 xsk->umem = umem;
348 cfg.rx_size = CONS_NUM_DESCS;
349 cfg.tx_size = PROD_NUM_DESCS;
350 cfg.libbpf_flags = 0;
351 cfg.bind_flags = xdp_modes[mode].bind_flags;
352 cfg.xdp_flags = xdp_modes[mode].xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST;
353
354 #ifdef HAVE_XDP_NEED_WAKEUP
355 if (use_need_wakeup) {
356 cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
357 }
358 #endif
359
360 if (if_indextoname(ifindex, devname) == NULL) {
361 VLOG_ERR("ifindex %d to devname failed (%s)",
362 ifindex, ovs_strerror(errno));
363 free(xsk);
364 return NULL;
365 }
366
367 ret = xsk_socket__create(&xsk->xsk, devname, queue_id, umem->umem,
368 &xsk->rx, &xsk->tx, &cfg);
369 if (ret) {
370 VLOG(report_socket_failures ? VLL_ERR : VLL_DBG,
371 "xsk_socket__create failed (%s) mode: %s, "
372 "use-need-wakeup: %s, qid: %d",
373 ovs_strerror(errno), xdp_modes[mode].name,
374 use_need_wakeup ? "true" : "false", queue_id);
375 free(xsk);
376 return NULL;
377 }
378
379 /* Make sure the built-in AF_XDP program is loaded. */
380 ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
381 if (ret || !prog_id) {
382 if (ret) {
383 VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
384 } else {
385 VLOG_ERR("No XDP program is loaded at ifindex %d", ifindex);
386 }
387 xsk_socket__delete(xsk->xsk);
388 free(xsk);
389 return NULL;
390 }
391
392 while (!xsk_ring_prod__reserve(&xsk->umem->fq,
393 PROD_NUM_DESCS, &idx)) {
394 VLOG_WARN_RL(&rl, "Retry xsk_ring_prod__reserve to FILL queue");
395 }
396
397 for (i = 0;
398 i < PROD_NUM_DESCS * FRAME_SIZE;
399 i += FRAME_SIZE) {
400 void *elem;
401 uint64_t addr;
402
403 elem = umem_elem_pop(&xsk->umem->mpool);
404 addr = UMEM2DESC(elem, xsk->umem->buffer);
405
406 *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = addr;
407 }
408
409 xsk_ring_prod__submit(&xsk->umem->fq,
410 PROD_NUM_DESCS);
411 return xsk;
412 }
413
414 static struct xsk_socket_info *
415 xsk_configure(int ifindex, int xdp_queue_id, enum afxdp_mode mode,
416 bool use_need_wakeup, bool report_socket_failures)
417 {
418 struct xsk_socket_info *xsk;
419 struct xsk_umem_info *umem;
420 void *bufs;
421
422 netdev_afxdp_sweep_unused_pools(NULL);
423
424 /* Umem memory region. */
425 bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
426 memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
427
428 /* Create AF_XDP socket. */
429 umem = xsk_configure_umem(bufs, NUM_FRAMES * FRAME_SIZE);
430 if (!umem) {
431 free_pagealign(bufs);
432 return NULL;
433 }
434
435 VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
436
437 xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, mode,
438 use_need_wakeup, report_socket_failures);
439 if (!xsk) {
440 /* Clean up umem and xpacket pool. */
441 if (xsk_umem__delete(umem->umem)) {
442 VLOG_ERR("xsk_umem__delete failed.");
443 }
444 free_pagealign(bufs);
445 umem_pool_cleanup(&umem->mpool);
446 xpacket_pool_cleanup(&umem->xpool);
447 free(umem);
448 }
449 return xsk;
450 }
451
452 static int
453 xsk_configure_queue(struct netdev_linux *dev, int ifindex, int queue_id,
454 enum afxdp_mode mode, bool report_socket_failures)
455 {
456 struct xsk_socket_info *xsk_info;
457
458 VLOG_DBG("%s: configuring queue: %d, mode: %s, use-need-wakeup: %s.",
459 netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name,
460 dev->use_need_wakeup ? "true" : "false");
461 xsk_info = xsk_configure(ifindex, queue_id, mode, dev->use_need_wakeup,
462 report_socket_failures);
463 if (!xsk_info) {
464 VLOG(report_socket_failures ? VLL_ERR : VLL_DBG,
465 "%s: Failed to create AF_XDP socket on queue %d in %s mode.",
466 netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name);
467 dev->xsks[queue_id] = NULL;
468 return -1;
469 }
470 dev->xsks[queue_id] = xsk_info;
471 atomic_init(&xsk_info->tx_dropped, 0);
472 xsk_info->outstanding_tx = 0;
473 xsk_info->available_rx = PROD_NUM_DESCS;
474 return 0;
475 }
476
477
478 static int
479 xsk_configure_all(struct netdev *netdev)
480 {
481 struct netdev_linux *dev = netdev_linux_cast(netdev);
482 int i, ifindex, n_rxq, n_txq;
483 int qid = 0;
484
485 ifindex = linux_get_ifindex(netdev_get_name(netdev));
486
487 ovs_assert(dev->xsks == NULL);
488 ovs_assert(dev->tx_locks == NULL);
489
490 n_rxq = netdev_n_rxq(netdev);
491 dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
492
493 if (dev->xdp_mode == OVS_AF_XDP_MODE_BEST_EFFORT) {
494 /* Trying to configure first queue with different modes to
495 * find the most suitable. */
496 for (i = OVS_AF_XDP_MODE_NATIVE_ZC; i < OVS_AF_XDP_MODE_MAX; i++) {
497 if (!xsk_configure_queue(dev, ifindex, qid, i,
498 i == OVS_AF_XDP_MODE_MAX - 1)) {
499 dev->xdp_mode_in_use = i;
500 VLOG_INFO("%s: %s XDP mode will be in use.",
501 netdev_get_name(netdev), xdp_modes[i].name);
502 break;
503 }
504 }
505 if (i == OVS_AF_XDP_MODE_MAX) {
506 VLOG_ERR("%s: Failed to detect suitable XDP mode.",
507 netdev_get_name(netdev));
508 goto err;
509 }
510 qid++;
511 } else {
512 dev->xdp_mode_in_use = dev->xdp_mode;
513 }
514
515 /* Configure remaining queues. */
516 for (; qid < n_rxq; qid++) {
517 if (xsk_configure_queue(dev, ifindex, qid,
518 dev->xdp_mode_in_use, true)) {
519 VLOG_ERR("%s: Failed to create AF_XDP socket on queue %d.",
520 netdev_get_name(netdev), qid);
521 goto err;
522 }
523 }
524
525 n_txq = netdev_n_txq(netdev);
526 dev->tx_locks = xzalloc_cacheline(n_txq * sizeof *dev->tx_locks);
527
528 for (i = 0; i < n_txq; i++) {
529 ovs_spin_init(&dev->tx_locks[i].lock);
530 }
531
532 return 0;
533
534 err:
535 xsk_destroy_all(netdev);
536 return EINVAL;
537 }
538
539 static void
540 xsk_destroy(struct xsk_socket_info *xsk_info)
541 {
542 struct xsk_umem *umem;
543 struct unused_pool *pool;
544
545 xsk_socket__delete(xsk_info->xsk);
546 xsk_info->xsk = NULL;
547
548 umem = xsk_info->umem->umem;
549 if (xsk_umem__delete(umem)) {
550 VLOG_ERR("xsk_umem__delete failed.");
551 }
552
553 pool = xzalloc(sizeof *pool);
554 pool->umem_info = xsk_info->umem;
555 pool->lost_in_rings = xsk_info->outstanding_tx + xsk_info->available_rx;
556
557 ovs_mutex_lock(&unused_pools_mutex);
558 ovs_list_push_back(&unused_pools, &pool->list_node);
559 ovs_mutex_unlock(&unused_pools_mutex);
560
561 free(xsk_info);
562
563 netdev_afxdp_sweep_unused_pools(NULL);
564 }
565
566 static void
567 xsk_destroy_all(struct netdev *netdev)
568 {
569 struct netdev_linux *dev = netdev_linux_cast(netdev);
570 int i, ifindex;
571
572 if (dev->xsks) {
573 for (i = 0; i < netdev_n_rxq(netdev); i++) {
574 if (dev->xsks[i]) {
575 xsk_destroy(dev->xsks[i]);
576 dev->xsks[i] = NULL;
577 VLOG_DBG("%s: Destroyed xsk[%d].", netdev_get_name(netdev), i);
578 }
579 }
580
581 free(dev->xsks);
582 dev->xsks = NULL;
583 }
584
585 VLOG_INFO("%s: Removing xdp program.", netdev_get_name(netdev));
586 ifindex = linux_get_ifindex(netdev_get_name(netdev));
587 xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use);
588
589 if (dev->tx_locks) {
590 for (i = 0; i < netdev_n_txq(netdev); i++) {
591 ovs_spin_destroy(&dev->tx_locks[i].lock);
592 }
593 free_cacheline(dev->tx_locks);
594 dev->tx_locks = NULL;
595 }
596 }
597
598 int
599 netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
600 char **errp OVS_UNUSED)
601 {
602 struct netdev_linux *dev = netdev_linux_cast(netdev);
603 const char *str_xdp_mode;
604 enum afxdp_mode xdp_mode;
605 bool need_wakeup;
606 int new_n_rxq;
607
608 ovs_mutex_lock(&dev->mutex);
609 new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
610 if (new_n_rxq > MAX_XSKQ) {
611 ovs_mutex_unlock(&dev->mutex);
612 VLOG_ERR("%s: Too big 'n_rxq' (%d > %d).",
613 netdev_get_name(netdev), new_n_rxq, MAX_XSKQ);
614 return EINVAL;
615 }
616
617 str_xdp_mode = smap_get_def(args, "xdp-mode", "best-effort");
618 for (xdp_mode = OVS_AF_XDP_MODE_BEST_EFFORT;
619 xdp_mode < OVS_AF_XDP_MODE_MAX;
620 xdp_mode++) {
621 if (!strcasecmp(str_xdp_mode, xdp_modes[xdp_mode].name)) {
622 break;
623 }
624 }
625 if (xdp_mode == OVS_AF_XDP_MODE_MAX) {
626 VLOG_ERR("%s: Incorrect xdp-mode (%s).",
627 netdev_get_name(netdev), str_xdp_mode);
628 ovs_mutex_unlock(&dev->mutex);
629 return EINVAL;
630 }
631
632 need_wakeup = smap_get_bool(args, "use-need-wakeup", NEED_WAKEUP_DEFAULT);
633 #ifndef HAVE_XDP_NEED_WAKEUP
634 if (need_wakeup) {
635 VLOG_WARN("XDP need_wakeup is not supported in libbpf.");
636 need_wakeup = false;
637 }
638 #endif
639
640 if (dev->requested_n_rxq != new_n_rxq
641 || dev->requested_xdp_mode != xdp_mode
642 || dev->requested_need_wakeup != need_wakeup) {
643 dev->requested_n_rxq = new_n_rxq;
644 dev->requested_xdp_mode = xdp_mode;
645 dev->requested_need_wakeup = need_wakeup;
646 netdev_request_reconfigure(netdev);
647 }
648 ovs_mutex_unlock(&dev->mutex);
649 return 0;
650 }
651
652 int
653 netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args)
654 {
655 struct netdev_linux *dev = netdev_linux_cast(netdev);
656
657 ovs_mutex_lock(&dev->mutex);
658 smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
659 smap_add_format(args, "xdp-mode", "%s", xdp_modes[dev->xdp_mode].name);
660 smap_add_format(args, "xdp-mode-in-use", "%s",
661 xdp_modes[dev->xdp_mode_in_use].name);
662 smap_add_format(args, "use-need-wakeup", "%s",
663 dev->use_need_wakeup ? "true" : "false");
664 ovs_mutex_unlock(&dev->mutex);
665 return 0;
666 }
667
668 int
669 netdev_afxdp_reconfigure(struct netdev *netdev)
670 {
671 struct netdev_linux *dev = netdev_linux_cast(netdev);
672 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
673 struct bitmask *old_bm = NULL;
674 int old_policy, numa_id;
675 int err = 0;
676
677 /* Allocate all the xsk related memory in the netdev's NUMA domain. */
678 if (numa_available() != -1 && ovs_numa_get_n_numas() > 1) {
679 numa_id = netdev_get_numa_id(netdev);
680 if (numa_id != NETDEV_NUMA_UNSPEC) {
681 old_bm = numa_allocate_nodemask();
682 if (get_mempolicy(&old_policy, old_bm->maskp, old_bm->size + 1,
683 NULL, 0)) {
684 VLOG_INFO("Failed to get NUMA memory policy: %s.",
685 ovs_strerror(errno));
686 numa_bitmask_free(old_bm);
687 old_bm = NULL;
688 } else {
689 numa_set_preferred(numa_id);
690 }
691 }
692 }
693
694 ovs_mutex_lock(&dev->mutex);
695
696 if (netdev->n_rxq == dev->requested_n_rxq
697 && dev->xdp_mode == dev->requested_xdp_mode
698 && dev->use_need_wakeup == dev->requested_need_wakeup
699 && dev->xsks) {
700 goto out;
701 }
702
703 xsk_destroy_all(netdev);
704
705 netdev->n_rxq = dev->requested_n_rxq;
706 netdev->n_txq = netdev->n_rxq;
707
708 dev->xdp_mode = dev->requested_xdp_mode;
709 VLOG_INFO("%s: Setting XDP mode to %s.", netdev_get_name(netdev),
710 xdp_modes[dev->xdp_mode].name);
711
712 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
713 VLOG_ERR("setrlimit(RLIMIT_MEMLOCK) failed: %s", ovs_strerror(errno));
714 }
715 dev->use_need_wakeup = dev->requested_need_wakeup;
716
717 err = xsk_configure_all(netdev);
718 if (err) {
719 VLOG_ERR("%s: AF_XDP device reconfiguration failed.",
720 netdev_get_name(netdev));
721 }
722 netdev_change_seq_changed(netdev);
723 out:
724 ovs_mutex_unlock(&dev->mutex);
725 if (old_bm) {
726 if (set_mempolicy(old_policy, old_bm->maskp, old_bm->size + 1)) {
727 VLOG_WARN("Failed to restore NUMA memory policy: %s.",
728 ovs_strerror(errno));
729 /* Can't restore correctly. Try to use localalloc as the most
730 * likely default memory policy. */
731 numa_set_localalloc();
732 }
733 numa_bitmask_free(old_bm);
734 }
735 return err;
736 }
737
738 static void
739 xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode mode)
740 {
741 uint32_t flags = xdp_modes[mode].xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST;
742 uint32_t ret, prog_id = 0;
743
744 /* Check whether XDP program is loaded. */
745 ret = bpf_get_link_xdp_id(ifindex, &prog_id, flags);
746 if (ret) {
747 VLOG_ERR("Failed to get XDP prog id (%s)", ovs_strerror(errno));
748 return;
749 }
750
751 if (!prog_id) {
752 VLOG_INFO("No XDP program is loaded at ifindex %d", ifindex);
753 return;
754 }
755
756 bpf_set_link_xdp_fd(ifindex, -1, flags);
757 }
758
759 void
760 signal_remove_xdp(struct netdev *netdev)
761 {
762 struct netdev_linux *dev = netdev_linux_cast(netdev);
763 int ifindex;
764
765 ifindex = linux_get_ifindex(netdev_get_name(netdev));
766
767 VLOG_WARN("Force removing xdp program.");
768 xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use);
769 }
770
771 static struct dp_packet_afxdp *
772 dp_packet_cast_afxdp(const struct dp_packet *d)
773 {
774 ovs_assert(d->source == DPBUF_AFXDP);
775 return CONTAINER_OF(d, struct dp_packet_afxdp, packet);
776 }
777
778 static inline void
779 prepare_fill_queue(struct xsk_socket_info *xsk_info)
780 {
781 struct xsk_umem_info *umem;
782 void *elems[BATCH_SIZE];
783 unsigned int idx_fq;
784 int i, ret;
785
786 umem = xsk_info->umem;
787
788 if (xsk_prod_nb_free(&umem->fq, BATCH_SIZE) < BATCH_SIZE) {
789 return;
790 }
791
792 ret = umem_elem_pop_n(&umem->mpool, BATCH_SIZE, elems);
793 if (OVS_UNLIKELY(ret)) {
794 return;
795 }
796
797 if (!xsk_ring_prod__reserve(&umem->fq, BATCH_SIZE, &idx_fq)) {
798 umem_elem_push_n(&umem->mpool, BATCH_SIZE, elems);
799 COVERAGE_INC(afxdp_fq_full);
800 return;
801 }
802
803 for (i = 0; i < BATCH_SIZE; i++) {
804 uint64_t index;
805 void *elem;
806
807 elem = elems[i];
808 index = (uint64_t)((char *)elem - (char *)umem->buffer);
809 ovs_assert((index & FRAME_SHIFT_MASK) == 0);
810 *xsk_ring_prod__fill_addr(&umem->fq, idx_fq) = index;
811
812 idx_fq++;
813 }
814 xsk_ring_prod__submit(&umem->fq, BATCH_SIZE);
815 xsk_info->available_rx += BATCH_SIZE;
816 }
817
818 int
819 netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
820 int *qfill)
821 {
822 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
823 struct netdev *netdev = rx->up.netdev;
824 struct netdev_linux *dev = netdev_linux_cast(netdev);
825 struct xsk_socket_info *xsk_info;
826 struct xsk_umem_info *umem;
827 uint32_t idx_rx = 0;
828 int qid = rxq_->queue_id;
829 unsigned int rcvd, i;
830
831 xsk_info = dev->xsks[qid];
832 if (!xsk_info || !xsk_info->xsk) {
833 return EAGAIN;
834 }
835
836 prepare_fill_queue(xsk_info);
837
838 umem = xsk_info->umem;
839 rx->fd = xsk_socket__fd(xsk_info->xsk);
840
841 rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
842 if (!rcvd) {
843 xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
844 return EAGAIN;
845 }
846
847 /* Setup a dp_packet batch from descriptors in RX queue. */
848 for (i = 0; i < rcvd; i++) {
849 struct dp_packet_afxdp *xpacket;
850 const struct xdp_desc *desc;
851 struct dp_packet *packet;
852 uint64_t addr, index;
853 uint32_t len;
854 char *pkt;
855
856 desc = xsk_ring_cons__rx_desc(&xsk_info->rx, idx_rx);
857 addr = desc->addr;
858 len = desc->len;
859
860 pkt = xsk_umem__get_data(umem->buffer, addr);
861 index = addr >> FRAME_SHIFT;
862 xpacket = &umem->xpool.array[index];
863 packet = &xpacket->packet;
864
865 /* Initialize the struct dp_packet. */
866 dp_packet_use_afxdp(packet, pkt,
867 FRAME_SIZE - FRAME_HEADROOM,
868 OVS_XDP_HEADROOM);
869 dp_packet_set_size(packet, len);
870
871 /* Add packet into batch, increase batch->count. */
872 dp_packet_batch_add(batch, packet);
873
874 idx_rx++;
875 }
876 /* Release the RX queue. */
877 xsk_ring_cons__release(&xsk_info->rx, rcvd);
878 xsk_info->available_rx -= rcvd;
879
880 if (qfill) {
881 /* TODO: return the number of remaining packets in the queue. */
882 *qfill = 0;
883 }
884 return 0;
885 }
886
887 static inline int
888 kick_tx(struct xsk_socket_info *xsk_info, enum afxdp_mode mode,
889 bool use_need_wakeup)
890 {
891 int ret, retries;
892 static const int KERNEL_TX_BATCH_SIZE = 16;
893
894 if (use_need_wakeup && !xsk_tx_need_wakeup(xsk_info)) {
895 return 0;
896 }
897
898 /* In all modes except native-with-zerocopy packet transmission is
899 * synchronous, and the kernel xmits only TX_BATCH_SIZE(16) packets for a
900 * single sendmsg syscall.
901 * So, we have to kick the kernel (n_packets / 16) times to be sure that
902 * all packets are transmitted. */
903 retries = (mode != OVS_AF_XDP_MODE_NATIVE_ZC)
904 ? xsk_info->outstanding_tx / KERNEL_TX_BATCH_SIZE
905 : 0;
906 kick_retry:
907 /* This causes system call into kernel's xsk_sendmsg, and xsk_generic_xmit
908 * (generic and native modes) or xsk_zc_xmit (native-with-zerocopy mode).
909 */
910 ret = sendto(xsk_socket__fd(xsk_info->xsk), NULL, 0, MSG_DONTWAIT,
911 NULL, 0);
912 if (ret < 0) {
913 if (retries-- && errno == EAGAIN) {
914 goto kick_retry;
915 }
916 if (errno == ENXIO || errno == ENOBUFS || errno == EOPNOTSUPP) {
917 return errno;
918 }
919 }
920 /* No error, or EBUSY, or too many retries on EAGAIN. */
921 return 0;
922 }
923
924 void
925 free_afxdp_buf(struct dp_packet *p)
926 {
927 struct dp_packet_afxdp *xpacket;
928 uintptr_t addr;
929
930 xpacket = dp_packet_cast_afxdp(p);
931 if (xpacket->mpool) {
932 void *base = dp_packet_base(p);
933
934 addr = (uintptr_t)base & (~FRAME_SHIFT_MASK);
935 umem_elem_push(xpacket->mpool, (void *)addr);
936 }
937 }
938
939 static void
940 free_afxdp_buf_batch(struct dp_packet_batch *batch)
941 {
942 struct dp_packet_afxdp *xpacket = NULL;
943 struct dp_packet *packet;
944 void *elems[BATCH_SIZE];
945 uintptr_t addr;
946
947 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
948 void *base;
949
950 xpacket = dp_packet_cast_afxdp(packet);
951 base = dp_packet_base(packet);
952 addr = (uintptr_t)base & (~FRAME_SHIFT_MASK);
953 elems[i] = (void *)addr;
954 }
955 umem_elem_push_n(xpacket->mpool, dp_packet_batch_size(batch), elems);
956 dp_packet_batch_init(batch);
957 }
958
959 static inline bool
960 check_free_batch(struct dp_packet_batch *batch)
961 {
962 struct umem_pool *first_mpool = NULL;
963 struct dp_packet_afxdp *xpacket;
964 struct dp_packet *packet;
965
966 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
967 if (packet->source != DPBUF_AFXDP) {
968 return false;
969 }
970 xpacket = dp_packet_cast_afxdp(packet);
971 if (i == 0) {
972 first_mpool = xpacket->mpool;
973 continue;
974 }
975 if (xpacket->mpool != first_mpool) {
976 return false;
977 }
978 }
979 /* All packets are DPBUF_AFXDP and from the same mpool. */
980 return true;
981 }
982
983 static inline void
984 afxdp_complete_tx(struct xsk_socket_info *xsk_info)
985 {
986 void *elems_push[BATCH_SIZE];
987 struct xsk_umem_info *umem;
988 uint32_t idx_cq = 0;
989 int tx_to_free = 0;
990 int tx_done, j;
991
992 umem = xsk_info->umem;
993 tx_done = xsk_ring_cons__peek(&umem->cq, CONS_NUM_DESCS, &idx_cq);
994
995 /* Recycle back to umem pool. */
996 for (j = 0; j < tx_done; j++) {
997 uint64_t *addr;
998 void *elem;
999
1000 addr = (uint64_t *)xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
1001 if (*addr != UINT64_MAX) {
1002 elem = ALIGNED_CAST(void *, (char *)umem->buffer + *addr);
1003 elems_push[tx_to_free] = elem;
1004 *addr = UINT64_MAX; /* Mark as pushed. */
1005 tx_to_free++;
1006 } else {
1007 /* The elem has been pushed already. */
1008 COVERAGE_INC(afxdp_cq_skip);
1009 }
1010
1011 if (tx_to_free == BATCH_SIZE || j == tx_done - 1) {
1012 umem_elem_push_n(&umem->mpool, tx_to_free, elems_push);
1013 xsk_info->outstanding_tx -= tx_to_free;
1014 tx_to_free = 0;
1015 }
1016 }
1017
1018 if (tx_done > 0) {
1019 xsk_ring_cons__release(&umem->cq, tx_done);
1020 } else {
1021 COVERAGE_INC(afxdp_cq_empty);
1022 }
1023 }
1024
1025 static inline int
1026 __netdev_afxdp_batch_send(struct netdev *netdev, int qid,
1027 struct dp_packet_batch *batch)
1028 {
1029 struct netdev_linux *dev = netdev_linux_cast(netdev);
1030 struct xsk_socket_info *xsk_info;
1031 void *elems_pop[BATCH_SIZE];
1032 struct xsk_umem_info *umem;
1033 struct dp_packet *packet;
1034 bool free_batch = false;
1035 unsigned long orig;
1036 uint32_t idx = 0;
1037 int error = 0;
1038 int ret;
1039
1040 xsk_info = dev->xsks[qid];
1041 if (!xsk_info || !xsk_info->xsk) {
1042 goto out;
1043 }
1044
1045 afxdp_complete_tx(xsk_info);
1046
1047 free_batch = check_free_batch(batch);
1048
1049 umem = xsk_info->umem;
1050 ret = umem_elem_pop_n(&umem->mpool, dp_packet_batch_size(batch),
1051 elems_pop);
1052 if (OVS_UNLIKELY(ret)) {
1053 atomic_add_relaxed(&xsk_info->tx_dropped, dp_packet_batch_size(batch),
1054 &orig);
1055 VLOG_WARN_RL(&rl, "%s: send failed due to exhausted memory pool.",
1056 netdev_get_name(netdev));
1057 error = ENOMEM;
1058 goto out;
1059 }
1060
1061 /* Make sure we have enough TX descs. */
1062 ret = xsk_ring_prod__reserve(&xsk_info->tx, dp_packet_batch_size(batch),
1063 &idx);
1064 if (OVS_UNLIKELY(ret == 0)) {
1065 umem_elem_push_n(&umem->mpool, dp_packet_batch_size(batch), elems_pop);
1066 atomic_add_relaxed(&xsk_info->tx_dropped, dp_packet_batch_size(batch),
1067 &orig);
1068 COVERAGE_INC(afxdp_tx_full);
1069 afxdp_complete_tx(xsk_info);
1070 kick_tx(xsk_info, dev->xdp_mode_in_use, dev->use_need_wakeup);
1071 error = ENOMEM;
1072 goto out;
1073 }
1074
1075 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
1076 uint64_t index;
1077 void *elem;
1078
1079 elem = elems_pop[i];
1080 /* Copy the packet to the umem we just pop from umem pool.
1081 * TODO: avoid this copy if the packet and the pop umem
1082 * are located in the same umem.
1083 */
1084 memcpy(elem, dp_packet_data(packet), dp_packet_size(packet));
1085
1086 index = (uint64_t)((char *)elem - (char *)umem->buffer);
1087 xsk_ring_prod__tx_desc(&xsk_info->tx, idx + i)->addr = index;
1088 xsk_ring_prod__tx_desc(&xsk_info->tx, idx + i)->len
1089 = dp_packet_size(packet);
1090 }
1091 xsk_ring_prod__submit(&xsk_info->tx, dp_packet_batch_size(batch));
1092 xsk_info->outstanding_tx += dp_packet_batch_size(batch);
1093
1094 ret = kick_tx(xsk_info, dev->xdp_mode_in_use, dev->use_need_wakeup);
1095 if (OVS_UNLIKELY(ret)) {
1096 VLOG_WARN_RL(&rl, "%s: error sending AF_XDP packet: %s.",
1097 netdev_get_name(netdev), ovs_strerror(ret));
1098 }
1099
1100 out:
1101 if (free_batch) {
1102 free_afxdp_buf_batch(batch);
1103 } else {
1104 dp_packet_delete_batch(batch, true);
1105 }
1106
1107 return error;
1108 }
1109
1110 int
1111 netdev_afxdp_batch_send(struct netdev *netdev, int qid,
1112 struct dp_packet_batch *batch,
1113 bool concurrent_txq)
1114 {
1115 struct netdev_linux *dev;
1116 int ret;
1117
1118 if (concurrent_txq) {
1119 dev = netdev_linux_cast(netdev);
1120 qid = qid % netdev_n_txq(netdev);
1121
1122 ovs_spin_lock(&dev->tx_locks[qid].lock);
1123 ret = __netdev_afxdp_batch_send(netdev, qid, batch);
1124 ovs_spin_unlock(&dev->tx_locks[qid].lock);
1125 } else {
1126 ret = __netdev_afxdp_batch_send(netdev, qid, batch);
1127 }
1128
1129 return ret;
1130 }
1131
1132 int
1133 netdev_afxdp_rxq_construct(struct netdev_rxq *rxq_ OVS_UNUSED)
1134 {
1135 /* Done at reconfigure. */
1136 return 0;
1137 }
1138
1139 void
1140 netdev_afxdp_rxq_destruct(struct netdev_rxq *rxq_ OVS_UNUSED)
1141 {
1142 /* Nothing. */
1143 }
1144
1145 static int
1146 libbpf_print(enum libbpf_print_level level,
1147 const char *format, va_list args)
1148 {
1149 if (level == LIBBPF_WARN) {
1150 vlog_valist(&this_module, VLL_WARN, format, args);
1151 } else if (level == LIBBPF_INFO) {
1152 vlog_valist(&this_module, VLL_INFO, format, args);
1153 } else {
1154 vlog_valist(&this_module, VLL_DBG, format, args);
1155 }
1156 return 0;
1157 }
1158
1159 int netdev_afxdp_init(void)
1160 {
1161 libbpf_set_print(libbpf_print);
1162 return 0;
1163 }
1164
1165 int
1166 netdev_afxdp_construct(struct netdev *netdev)
1167 {
1168 struct netdev_linux *dev = netdev_linux_cast(netdev);
1169 int ret;
1170
1171 /* Configure common netdev-linux first. */
1172 ret = netdev_linux_construct(netdev);
1173 if (ret) {
1174 return ret;
1175 }
1176
1177 /* Queues should not be used before the first reconfiguration. Clearing. */
1178 netdev->n_rxq = 0;
1179 netdev->n_txq = 0;
1180 dev->xdp_mode = OVS_AF_XDP_MODE_UNSPEC;
1181 dev->xdp_mode_in_use = OVS_AF_XDP_MODE_UNSPEC;
1182
1183 dev->requested_n_rxq = NR_QUEUE;
1184 dev->requested_xdp_mode = OVS_AF_XDP_MODE_BEST_EFFORT;
1185 dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
1186
1187 dev->xsks = NULL;
1188 dev->tx_locks = NULL;
1189
1190 netdev_request_reconfigure(netdev);
1191 return 0;
1192 }
1193
1194 void
1195 netdev_afxdp_destruct(struct netdev *netdev)
1196 {
1197 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
1198 struct netdev_linux *dev = netdev_linux_cast(netdev);
1199
1200 if (ovsthread_once_start(&once)) {
1201 fatal_signal_add_hook(netdev_afxdp_sweep_unused_pools,
1202 NULL, NULL, true);
1203 ovsthread_once_done(&once);
1204 }
1205
1206 /* Note: tc is by-passed when using drv-mode, but when using
1207 * skb-mode, we might need to clean up tc. */
1208
1209 xsk_destroy_all(netdev);
1210 ovs_mutex_destroy(&dev->mutex);
1211 }
1212
1213 int
1214 netdev_afxdp_verify_mtu_size(const struct netdev *netdev OVS_UNUSED, int mtu)
1215 {
1216 /*
1217 * If a device is used in xdpmode skb, no driver-specific MTU size is
1218 * checked and any value is allowed resulting in packet drops.
1219 * This check will verify the maximum supported value based on the
1220 * buffer size allocated and the additional headroom required.
1221 */
1222 if (mtu > (FRAME_SIZE - OVS_XDP_HEADROOM -
1223 XDP_PACKET_HEADROOM - VLAN_ETH_HEADER_LEN)) {
1224 return EINVAL;
1225 }
1226
1227 return 0;
1228 }
1229
1230 int
1231 netdev_afxdp_get_custom_stats(const struct netdev *netdev,
1232 struct netdev_custom_stats *custom_stats)
1233 {
1234 struct netdev_linux *dev = netdev_linux_cast(netdev);
1235 struct xsk_socket_info *xsk_info;
1236 struct xdp_statistics stat;
1237 uint32_t i, c = 0;
1238 socklen_t optlen;
1239
1240 ovs_mutex_lock(&dev->mutex);
1241
1242 #define XDP_CSTATS \
1243 XDP_CSTAT(rx_dropped) \
1244 XDP_CSTAT(rx_invalid_descs) \
1245 XDP_CSTAT(tx_invalid_descs)
1246
1247 #define XDP_CSTAT(NAME) + 1
1248 enum { N_XDP_CSTATS = XDP_CSTATS };
1249 #undef XDP_CSTAT
1250
1251 custom_stats->counters = xcalloc(netdev_n_rxq(netdev) * N_XDP_CSTATS,
1252 sizeof *custom_stats->counters);
1253
1254 /* Account the stats for each xsk. */
1255 for (i = 0; i < netdev_n_rxq(netdev); i++) {
1256 xsk_info = dev->xsks[i];
1257 optlen = sizeof stat;
1258
1259 if (xsk_info && !getsockopt(xsk_socket__fd(xsk_info->xsk), SOL_XDP,
1260 XDP_STATISTICS, &stat, &optlen)) {
1261 #define XDP_CSTAT(NAME) \
1262 snprintf(custom_stats->counters[c].name, \
1263 NETDEV_CUSTOM_STATS_NAME_SIZE, \
1264 "xsk_queue_%d_" #NAME, i); \
1265 custom_stats->counters[c++].value = stat.NAME;
1266 XDP_CSTATS;
1267 #undef XDP_CSTAT
1268 }
1269 }
1270 custom_stats->size = c;
1271 ovs_mutex_unlock(&dev->mutex);
1272
1273 return 0;
1274 }
1275
1276 int
1277 netdev_afxdp_get_stats(const struct netdev *netdev,
1278 struct netdev_stats *stats)
1279 {
1280 struct netdev_linux *dev = netdev_linux_cast(netdev);
1281 struct xsk_socket_info *xsk_info;
1282 struct netdev_stats dev_stats;
1283 int error, i;
1284
1285 ovs_mutex_lock(&dev->mutex);
1286
1287 error = get_stats_via_netlink(netdev, &dev_stats);
1288 if (error) {
1289 VLOG_WARN_RL(&rl, "%s: Error getting AF_XDP statistics.",
1290 netdev_get_name(netdev));
1291 } else {
1292 /* Use kernel netdev's packet and byte counts. */
1293 stats->rx_packets = dev_stats.rx_packets;
1294 stats->rx_bytes = dev_stats.rx_bytes;
1295 stats->tx_packets = dev_stats.tx_packets;
1296 stats->tx_bytes = dev_stats.tx_bytes;
1297
1298 stats->rx_errors += dev_stats.rx_errors;
1299 stats->tx_errors += dev_stats.tx_errors;
1300 stats->rx_dropped += dev_stats.rx_dropped;
1301 stats->tx_dropped += dev_stats.tx_dropped;
1302 stats->multicast += dev_stats.multicast;
1303 stats->collisions += dev_stats.collisions;
1304 stats->rx_length_errors += dev_stats.rx_length_errors;
1305 stats->rx_over_errors += dev_stats.rx_over_errors;
1306 stats->rx_crc_errors += dev_stats.rx_crc_errors;
1307 stats->rx_frame_errors += dev_stats.rx_frame_errors;
1308 stats->rx_fifo_errors += dev_stats.rx_fifo_errors;
1309 stats->rx_missed_errors += dev_stats.rx_missed_errors;
1310 stats->tx_aborted_errors += dev_stats.tx_aborted_errors;
1311 stats->tx_carrier_errors += dev_stats.tx_carrier_errors;
1312 stats->tx_fifo_errors += dev_stats.tx_fifo_errors;
1313 stats->tx_heartbeat_errors += dev_stats.tx_heartbeat_errors;
1314 stats->tx_window_errors += dev_stats.tx_window_errors;
1315
1316 /* Account the dropped in each xsk. */
1317 for (i = 0; i < netdev_n_rxq(netdev); i++) {
1318 xsk_info = dev->xsks[i];
1319 if (xsk_info) {
1320 uint64_t tx_dropped;
1321
1322 atomic_read_relaxed(&xsk_info->tx_dropped, &tx_dropped);
1323 stats->tx_dropped += tx_dropped;
1324 }
1325 }
1326 }
1327 ovs_mutex_unlock(&dev->mutex);
1328
1329 return error;
1330 }