]> git.proxmox.com Git - mirror_ovs.git/blame - lib/netdev-afxdp.c
netdev-linux: Detect numa node id.
[mirror_ovs.git] / lib / netdev-afxdp.c
CommitLineData
0de1b425
WT
1/*
2 * Copyright (c) 2018, 2019 Nicira, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <config.h>
18
19#include "netdev-linux-private.h"
20#include "netdev-linux.h"
21#include "netdev-afxdp.h"
22#include "netdev-afxdp-pool.h"
23
24#include <errno.h>
25#include <inttypes.h>
26#include <linux/rtnetlink.h>
27#include <linux/if_xdp.h>
28#include <net/if.h>
e50547b5 29#include <poll.h>
0de1b425
WT
30#include <stdlib.h>
31#include <sys/resource.h>
32#include <sys/socket.h>
33#include <sys/types.h>
34#include <unistd.h>
35
36#include "coverage.h"
37#include "dp-packet.h"
38#include "dpif-netdev.h"
39#include "fatal-signal.h"
40#include "openvswitch/compiler.h"
41#include "openvswitch/dynamic-string.h"
42#include "openvswitch/list.h"
28d05016 43#include "openvswitch/thread.h"
0de1b425
WT
44#include "openvswitch/vlog.h"
45#include "packets.h"
46#include "socket-util.h"
47#include "util.h"
48
49#ifndef SOL_XDP
50#define SOL_XDP 283
51#endif
52
53COVERAGE_DEFINE(afxdp_cq_empty);
54COVERAGE_DEFINE(afxdp_fq_full);
55COVERAGE_DEFINE(afxdp_tx_full);
56COVERAGE_DEFINE(afxdp_cq_skip);
57
58VLOG_DEFINE_THIS_MODULE(netdev_afxdp);
59
60static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
61
62#define MAX_XSKQ 16
63#define FRAME_HEADROOM XDP_PACKET_HEADROOM
64#define OVS_XDP_HEADROOM 128
65#define FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
66#define FRAME_SHIFT XSK_UMEM__DEFAULT_FRAME_SHIFT
67#define FRAME_SHIFT_MASK ((1 << FRAME_SHIFT) - 1)
68
69#define PROD_NUM_DESCS XSK_RING_PROD__DEFAULT_NUM_DESCS
70#define CONS_NUM_DESCS XSK_RING_CONS__DEFAULT_NUM_DESCS
71
e50547b5
WT
72#ifdef HAVE_XDP_NEED_WAKEUP
73#define NEED_WAKEUP_DEFAULT true
74#else
75#define NEED_WAKEUP_DEFAULT false
76#endif
77
0de1b425
WT
78/* The worst case is all 4 queues TX/CQ/RX/FILL are full + some packets
79 * still on processing in threads. Number of packets currently in OVS
80 * processing is hard to estimate because it depends on number of ports.
81 * Setting NUM_FRAMES twice as large than total of ring sizes should be
82 * enough for most corner cases.
83 */
84#define NUM_FRAMES (4 * (PROD_NUM_DESCS + CONS_NUM_DESCS))
85#define BATCH_SIZE NETDEV_MAX_BURST
86
87BUILD_ASSERT_DECL(IS_POW2(NUM_FRAMES));
88BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
89
90#define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base))
91
92static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id,
e8f56344
IM
93 enum afxdp_mode mode,
94 bool use_need_wakeup,
95 bool report_socket_failures);
96static void xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode);
0de1b425
WT
97static void xsk_destroy(struct xsk_socket_info *xsk);
98static int xsk_configure_all(struct netdev *netdev);
99static void xsk_destroy_all(struct netdev *netdev);
100
e8f56344
IM
101static struct {
102 const char *name;
103 uint32_t bind_flags;
104 uint32_t xdp_flags;
105} xdp_modes[] = {
106 [OVS_AF_XDP_MODE_UNSPEC] = {
107 .name = "unspecified",
108 .bind_flags = 0,
109 .xdp_flags = 0,
110 },
111 [OVS_AF_XDP_MODE_BEST_EFFORT] = {
112 .name = "best-effort",
113 .bind_flags = 0,
114 .xdp_flags = 0,
115 },
116 [OVS_AF_XDP_MODE_NATIVE_ZC] = {
117 .name = "native-with-zerocopy",
118 .bind_flags = XDP_ZEROCOPY,
119 .xdp_flags = XDP_FLAGS_DRV_MODE,
120 },
121 [OVS_AF_XDP_MODE_NATIVE] = {
122 .name = "native",
123 .bind_flags = XDP_COPY,
124 .xdp_flags = XDP_FLAGS_DRV_MODE,
125 },
126 [OVS_AF_XDP_MODE_GENERIC] = {
127 .name = "generic",
128 .bind_flags = XDP_COPY,
129 .xdp_flags = XDP_FLAGS_SKB_MODE,
130 },
131};
132
0de1b425
WT
133struct unused_pool {
134 struct xsk_umem_info *umem_info;
135 int lost_in_rings; /* Number of packets left in tx, rx, cq and fq. */
136 struct ovs_list list_node;
137};
138
139static struct ovs_mutex unused_pools_mutex = OVS_MUTEX_INITIALIZER;
140static struct ovs_list unused_pools OVS_GUARDED_BY(unused_pools_mutex) =
141 OVS_LIST_INITIALIZER(&unused_pools);
142
143struct xsk_umem_info {
144 struct umem_pool mpool;
145 struct xpacket_pool xpool;
146 struct xsk_ring_prod fq;
147 struct xsk_ring_cons cq;
148 struct xsk_umem *umem;
149 void *buffer;
150};
151
152struct xsk_socket_info {
153 struct xsk_ring_cons rx;
154 struct xsk_ring_prod tx;
155 struct xsk_umem_info *umem;
156 struct xsk_socket *xsk;
157 uint32_t outstanding_tx; /* Number of descriptors filled in tx and cq. */
158 uint32_t available_rx; /* Number of descriptors filled in rx and fq. */
159 atomic_uint64_t tx_dropped;
160};
161
28d05016
IM
162struct netdev_afxdp_tx_lock {
163 /* Padding to make netdev_afxdp_tx_lock exactly one cache line long. */
164 PADDED_MEMBERS(CACHE_LINE_SIZE,
165 struct ovs_spin lock;
166 );
167};
168
e50547b5
WT
169#ifdef HAVE_XDP_NEED_WAKEUP
170static inline void
171xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem,
172 struct netdev *netdev, int fd)
173{
174 struct netdev_linux *dev = netdev_linux_cast(netdev);
175 struct pollfd pfd;
176 int ret;
177
178 if (!dev->use_need_wakeup) {
179 return;
180 }
181
182 if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
183 pfd.fd = fd;
184 pfd.events = POLLIN;
185
186 ret = poll(&pfd, 1, 0);
187 if (OVS_UNLIKELY(ret < 0)) {
188 VLOG_WARN_RL(&rl, "%s: error polling rx fd: %s.",
189 netdev_get_name(netdev),
190 ovs_strerror(errno));
191 }
192 }
193}
194
195static inline bool
196xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info)
197{
198 return xsk_ring_prod__needs_wakeup(&xsk_info->tx);
199}
200
201#else /* !HAVE_XDP_NEED_WAKEUP */
202static inline void
203xsk_rx_wakeup_if_needed(struct xsk_umem_info *umem OVS_UNUSED,
204 struct netdev *netdev OVS_UNUSED,
205 int fd OVS_UNUSED)
206{
207 /* Nothing. */
208}
209
210static inline bool
211xsk_tx_need_wakeup(struct xsk_socket_info *xsk_info OVS_UNUSED)
212{
213 return true;
214}
215#endif /* HAVE_XDP_NEED_WAKEUP */
216
0de1b425
WT
217static void
218netdev_afxdp_cleanup_unused_pool(struct unused_pool *pool)
219{
220 /* Free the packet buffer. */
221 free_pagealign(pool->umem_info->buffer);
222
223 /* Cleanup umem pool. */
224 umem_pool_cleanup(&pool->umem_info->mpool);
225
226 /* Cleanup metadata pool. */
227 xpacket_pool_cleanup(&pool->umem_info->xpool);
228
229 free(pool->umem_info);
230}
231
232static void
233netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED)
234{
235 struct unused_pool *pool, *next;
236 unsigned int count;
237
238 ovs_mutex_lock(&unused_pools_mutex);
239 LIST_FOR_EACH_SAFE (pool, next, list_node, &unused_pools) {
240
241 count = umem_pool_count(&pool->umem_info->mpool);
242 ovs_assert(count + pool->lost_in_rings <= NUM_FRAMES);
243
244 if (count + pool->lost_in_rings == NUM_FRAMES) {
245 /* OVS doesn't use this memory pool anymore. Kernel doesn't
246 * use it since closing the xdp socket. So, it's safe to free
247 * the pool now. */
248 VLOG_DBG("Freeing umem pool at 0x%"PRIxPTR,
249 (uintptr_t) pool->umem_info);
250 ovs_list_remove(&pool->list_node);
251 netdev_afxdp_cleanup_unused_pool(pool);
252 free(pool);
253 }
254 }
255 ovs_mutex_unlock(&unused_pools_mutex);
256}
257
258static struct xsk_umem_info *
e8f56344 259xsk_configure_umem(void *buffer, uint64_t size)
0de1b425
WT
260{
261 struct xsk_umem_config uconfig;
262 struct xsk_umem_info *umem;
263 int ret;
264 int i;
265
266 umem = xzalloc(sizeof *umem);
267
ec92f8d2 268 memset(&uconfig, 0, sizeof uconfig);
0de1b425
WT
269 uconfig.fill_size = PROD_NUM_DESCS;
270 uconfig.comp_size = CONS_NUM_DESCS;
271 uconfig.frame_size = FRAME_SIZE;
272 uconfig.frame_headroom = OVS_XDP_HEADROOM;
273
274 ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq,
275 &uconfig);
276 if (ret) {
e8f56344 277 VLOG_ERR("xsk_umem__create failed: %s.", ovs_strerror(errno));
0de1b425
WT
278 free(umem);
279 return NULL;
280 }
281
282 umem->buffer = buffer;
283
284 /* Set-up umem pool. */
285 if (umem_pool_init(&umem->mpool, NUM_FRAMES) < 0) {
286 VLOG_ERR("umem_pool_init failed");
287 if (xsk_umem__delete(umem->umem)) {
288 VLOG_ERR("xsk_umem__delete failed");
289 }
290 free(umem);
291 return NULL;
292 }
293
294 for (i = NUM_FRAMES - 1; i >= 0; i--) {
295 void *elem;
296
297 elem = ALIGNED_CAST(void *, (char *)umem->buffer + i * FRAME_SIZE);
298 umem_elem_push(&umem->mpool, elem);
299 }
300
301 /* Set-up metadata. */
302 if (xpacket_pool_init(&umem->xpool, NUM_FRAMES) < 0) {
303 VLOG_ERR("xpacket_pool_init failed");
304 umem_pool_cleanup(&umem->mpool);
305 if (xsk_umem__delete(umem->umem)) {
306 VLOG_ERR("xsk_umem__delete failed");
307 }
308 free(umem);
309 return NULL;
310 }
311
312 VLOG_DBG("%s: xpacket pool from %p to %p", __func__,
313 umem->xpool.array,
314 (char *)umem->xpool.array +
315 NUM_FRAMES * sizeof(struct dp_packet_afxdp));
316
317 for (i = NUM_FRAMES - 1; i >= 0; i--) {
318 struct dp_packet_afxdp *xpacket;
319 struct dp_packet *packet;
320
321 xpacket = &umem->xpool.array[i];
322 xpacket->mpool = &umem->mpool;
323
324 packet = &xpacket->packet;
325 packet->source = DPBUF_AFXDP;
326 }
327
328 return umem;
329}
330
331static struct xsk_socket_info *
332xsk_configure_socket(struct xsk_umem_info *umem, uint32_t ifindex,
e8f56344
IM
333 uint32_t queue_id, enum afxdp_mode mode,
334 bool use_need_wakeup, bool report_socket_failures)
0de1b425
WT
335{
336 struct xsk_socket_config cfg;
337 struct xsk_socket_info *xsk;
338 char devname[IF_NAMESIZE];
339 uint32_t idx = 0, prog_id;
340 int ret;
341 int i;
342
343 xsk = xzalloc(sizeof *xsk);
344 xsk->umem = umem;
345 cfg.rx_size = CONS_NUM_DESCS;
346 cfg.tx_size = PROD_NUM_DESCS;
347 cfg.libbpf_flags = 0;
e8f56344
IM
348 cfg.bind_flags = xdp_modes[mode].bind_flags;
349 cfg.xdp_flags = xdp_modes[mode].xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST;
0de1b425 350
e50547b5
WT
351#ifdef HAVE_XDP_NEED_WAKEUP
352 if (use_need_wakeup) {
353 cfg.bind_flags |= XDP_USE_NEED_WAKEUP;
354 }
355#endif
356
0de1b425
WT
357 if (if_indextoname(ifindex, devname) == NULL) {
358 VLOG_ERR("ifindex %d to devname failed (%s)",
359 ifindex, ovs_strerror(errno));
360 free(xsk);
361 return NULL;
362 }
363
364 ret = xsk_socket__create(&xsk->xsk, devname, queue_id, umem->umem,
365 &xsk->rx, &xsk->tx, &cfg);
366 if (ret) {
e8f56344
IM
367 VLOG(report_socket_failures ? VLL_ERR : VLL_DBG,
368 "xsk_socket__create failed (%s) mode: %s, "
369 "use-need-wakeup: %s, qid: %d",
370 ovs_strerror(errno), xdp_modes[mode].name,
371 use_need_wakeup ? "true" : "false", queue_id);
0de1b425
WT
372 free(xsk);
373 return NULL;
374 }
375
376 /* Make sure the built-in AF_XDP program is loaded. */
377 ret = bpf_get_link_xdp_id(ifindex, &prog_id, cfg.xdp_flags);
22b78906
WT
378 if (ret || !prog_id) {
379 if (ret) {
380 VLOG_ERR("Get XDP prog ID failed (%s)", ovs_strerror(errno));
381 } else {
382 VLOG_ERR("No XDP program is loaded at ifindex %d", ifindex);
383 }
0de1b425
WT
384 xsk_socket__delete(xsk->xsk);
385 free(xsk);
386 return NULL;
387 }
388
389 while (!xsk_ring_prod__reserve(&xsk->umem->fq,
390 PROD_NUM_DESCS, &idx)) {
391 VLOG_WARN_RL(&rl, "Retry xsk_ring_prod__reserve to FILL queue");
392 }
393
394 for (i = 0;
395 i < PROD_NUM_DESCS * FRAME_SIZE;
396 i += FRAME_SIZE) {
397 void *elem;
398 uint64_t addr;
399
400 elem = umem_elem_pop(&xsk->umem->mpool);
401 addr = UMEM2DESC(elem, xsk->umem->buffer);
402
403 *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = addr;
404 }
405
406 xsk_ring_prod__submit(&xsk->umem->fq,
407 PROD_NUM_DESCS);
408 return xsk;
409}
410
411static struct xsk_socket_info *
e8f56344
IM
412xsk_configure(int ifindex, int xdp_queue_id, enum afxdp_mode mode,
413 bool use_need_wakeup, bool report_socket_failures)
0de1b425
WT
414{
415 struct xsk_socket_info *xsk;
416 struct xsk_umem_info *umem;
417 void *bufs;
418
419 netdev_afxdp_sweep_unused_pools(NULL);
420
421 /* Umem memory region. */
422 bufs = xmalloc_pagealign(NUM_FRAMES * FRAME_SIZE);
423 memset(bufs, 0, NUM_FRAMES * FRAME_SIZE);
424
425 /* Create AF_XDP socket. */
e8f56344 426 umem = xsk_configure_umem(bufs, NUM_FRAMES * FRAME_SIZE);
0de1b425
WT
427 if (!umem) {
428 free_pagealign(bufs);
429 return NULL;
430 }
431
432 VLOG_DBG("Allocated umem pool at 0x%"PRIxPTR, (uintptr_t) umem);
433
e8f56344
IM
434 xsk = xsk_configure_socket(umem, ifindex, xdp_queue_id, mode,
435 use_need_wakeup, report_socket_failures);
0de1b425
WT
436 if (!xsk) {
437 /* Clean up umem and xpacket pool. */
438 if (xsk_umem__delete(umem->umem)) {
439 VLOG_ERR("xsk_umem__delete failed.");
440 }
441 free_pagealign(bufs);
442 umem_pool_cleanup(&umem->mpool);
443 xpacket_pool_cleanup(&umem->xpool);
444 free(umem);
445 }
446 return xsk;
447}
448
e8f56344
IM
449static int
450xsk_configure_queue(struct netdev_linux *dev, int ifindex, int queue_id,
451 enum afxdp_mode mode, bool report_socket_failures)
452{
453 struct xsk_socket_info *xsk_info;
454
455 VLOG_DBG("%s: configuring queue: %d, mode: %s, use-need-wakeup: %s.",
456 netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name,
457 dev->use_need_wakeup ? "true" : "false");
458 xsk_info = xsk_configure(ifindex, queue_id, mode, dev->use_need_wakeup,
459 report_socket_failures);
460 if (!xsk_info) {
461 VLOG(report_socket_failures ? VLL_ERR : VLL_DBG,
462 "%s: Failed to create AF_XDP socket on queue %d in %s mode.",
463 netdev_get_name(&dev->up), queue_id, xdp_modes[mode].name);
464 dev->xsks[queue_id] = NULL;
465 return -1;
466 }
467 dev->xsks[queue_id] = xsk_info;
468 atomic_init(&xsk_info->tx_dropped, 0);
469 xsk_info->outstanding_tx = 0;
470 xsk_info->available_rx = PROD_NUM_DESCS;
471 return 0;
472}
473
474
0de1b425
WT
475static int
476xsk_configure_all(struct netdev *netdev)
477{
478 struct netdev_linux *dev = netdev_linux_cast(netdev);
0de1b425 479 int i, ifindex, n_rxq, n_txq;
e8f56344 480 int qid = 0;
0de1b425
WT
481
482 ifindex = linux_get_ifindex(netdev_get_name(netdev));
483
484 ovs_assert(dev->xsks == NULL);
485 ovs_assert(dev->tx_locks == NULL);
486
487 n_rxq = netdev_n_rxq(netdev);
488 dev->xsks = xcalloc(n_rxq, sizeof *dev->xsks);
489
e8f56344
IM
490 if (dev->xdp_mode == OVS_AF_XDP_MODE_BEST_EFFORT) {
491 /* Trying to configure first queue with different modes to
492 * find the most suitable. */
493 for (i = OVS_AF_XDP_MODE_NATIVE_ZC; i < OVS_AF_XDP_MODE_MAX; i++) {
494 if (!xsk_configure_queue(dev, ifindex, qid, i,
495 i == OVS_AF_XDP_MODE_MAX - 1)) {
496 dev->xdp_mode_in_use = i;
497 VLOG_INFO("%s: %s XDP mode will be in use.",
498 netdev_get_name(netdev), xdp_modes[i].name);
499 break;
500 }
501 }
502 if (i == OVS_AF_XDP_MODE_MAX) {
503 VLOG_ERR("%s: Failed to detect suitable XDP mode.",
504 netdev_get_name(netdev));
505 goto err;
506 }
507 qid++;
508 } else {
509 dev->xdp_mode_in_use = dev->xdp_mode;
510 }
511
512 /* Configure remaining queues. */
513 for (; qid < n_rxq; qid++) {
514 if (xsk_configure_queue(dev, ifindex, qid,
515 dev->xdp_mode_in_use, true)) {
516 VLOG_ERR("%s: Failed to create AF_XDP socket on queue %d.",
517 netdev_get_name(netdev), qid);
0de1b425
WT
518 goto err;
519 }
0de1b425
WT
520 }
521
522 n_txq = netdev_n_txq(netdev);
28d05016 523 dev->tx_locks = xzalloc_cacheline(n_txq * sizeof *dev->tx_locks);
0de1b425
WT
524
525 for (i = 0; i < n_txq; i++) {
28d05016 526 ovs_spin_init(&dev->tx_locks[i].lock);
0de1b425
WT
527 }
528
529 return 0;
530
531err:
532 xsk_destroy_all(netdev);
533 return EINVAL;
534}
535
536static void
537xsk_destroy(struct xsk_socket_info *xsk_info)
538{
539 struct xsk_umem *umem;
540 struct unused_pool *pool;
541
542 xsk_socket__delete(xsk_info->xsk);
543 xsk_info->xsk = NULL;
544
545 umem = xsk_info->umem->umem;
546 if (xsk_umem__delete(umem)) {
547 VLOG_ERR("xsk_umem__delete failed.");
548 }
549
550 pool = xzalloc(sizeof *pool);
551 pool->umem_info = xsk_info->umem;
552 pool->lost_in_rings = xsk_info->outstanding_tx + xsk_info->available_rx;
553
554 ovs_mutex_lock(&unused_pools_mutex);
555 ovs_list_push_back(&unused_pools, &pool->list_node);
556 ovs_mutex_unlock(&unused_pools_mutex);
557
558 free(xsk_info);
559
560 netdev_afxdp_sweep_unused_pools(NULL);
561}
562
563static void
564xsk_destroy_all(struct netdev *netdev)
565{
566 struct netdev_linux *dev = netdev_linux_cast(netdev);
567 int i, ifindex;
568
569 if (dev->xsks) {
570 for (i = 0; i < netdev_n_rxq(netdev); i++) {
571 if (dev->xsks[i]) {
572 xsk_destroy(dev->xsks[i]);
573 dev->xsks[i] = NULL;
e8f56344 574 VLOG_DBG("%s: Destroyed xsk[%d].", netdev_get_name(netdev), i);
0de1b425
WT
575 }
576 }
577
578 free(dev->xsks);
579 dev->xsks = NULL;
580 }
581
582 VLOG_INFO("%s: Removing xdp program.", netdev_get_name(netdev));
583 ifindex = linux_get_ifindex(netdev_get_name(netdev));
e8f56344 584 xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use);
0de1b425
WT
585
586 if (dev->tx_locks) {
587 for (i = 0; i < netdev_n_txq(netdev); i++) {
28d05016 588 ovs_spin_destroy(&dev->tx_locks[i].lock);
0de1b425 589 }
28d05016 590 free_cacheline(dev->tx_locks);
0de1b425
WT
591 dev->tx_locks = NULL;
592 }
593}
594
0de1b425
WT
595int
596netdev_afxdp_set_config(struct netdev *netdev, const struct smap *args,
597 char **errp OVS_UNUSED)
598{
599 struct netdev_linux *dev = netdev_linux_cast(netdev);
e8f56344
IM
600 const char *str_xdp_mode;
601 enum afxdp_mode xdp_mode;
e50547b5 602 bool need_wakeup;
e8f56344 603 int new_n_rxq;
0de1b425
WT
604
605 ovs_mutex_lock(&dev->mutex);
606 new_n_rxq = MAX(smap_get_int(args, "n_rxq", NR_QUEUE), 1);
607 if (new_n_rxq > MAX_XSKQ) {
608 ovs_mutex_unlock(&dev->mutex);
609 VLOG_ERR("%s: Too big 'n_rxq' (%d > %d).",
610 netdev_get_name(netdev), new_n_rxq, MAX_XSKQ);
611 return EINVAL;
612 }
613
e8f56344
IM
614 str_xdp_mode = smap_get_def(args, "xdp-mode", "best-effort");
615 for (xdp_mode = OVS_AF_XDP_MODE_BEST_EFFORT;
616 xdp_mode < OVS_AF_XDP_MODE_MAX;
617 xdp_mode++) {
618 if (!strcasecmp(str_xdp_mode, xdp_modes[xdp_mode].name)) {
619 break;
620 }
621 }
622 if (xdp_mode == OVS_AF_XDP_MODE_MAX) {
623 VLOG_ERR("%s: Incorrect xdp-mode (%s).",
624 netdev_get_name(netdev), str_xdp_mode);
0de1b425
WT
625 ovs_mutex_unlock(&dev->mutex);
626 return EINVAL;
627 }
628
e50547b5
WT
629 need_wakeup = smap_get_bool(args, "use-need-wakeup", NEED_WAKEUP_DEFAULT);
630#ifndef HAVE_XDP_NEED_WAKEUP
631 if (need_wakeup) {
632 VLOG_WARN("XDP need_wakeup is not supported in libbpf.");
633 need_wakeup = false;
634 }
635#endif
636
0de1b425 637 if (dev->requested_n_rxq != new_n_rxq
e8f56344 638 || dev->requested_xdp_mode != xdp_mode
e50547b5 639 || dev->requested_need_wakeup != need_wakeup) {
0de1b425 640 dev->requested_n_rxq = new_n_rxq;
e8f56344 641 dev->requested_xdp_mode = xdp_mode;
e50547b5 642 dev->requested_need_wakeup = need_wakeup;
0de1b425
WT
643 netdev_request_reconfigure(netdev);
644 }
645 ovs_mutex_unlock(&dev->mutex);
646 return 0;
647}
648
649int
650netdev_afxdp_get_config(const struct netdev *netdev, struct smap *args)
651{
652 struct netdev_linux *dev = netdev_linux_cast(netdev);
653
654 ovs_mutex_lock(&dev->mutex);
655 smap_add_format(args, "n_rxq", "%d", netdev->n_rxq);
e8f56344
IM
656 smap_add_format(args, "xdp-mode", "%s", xdp_modes[dev->xdp_mode].name);
657 smap_add_format(args, "xdp-mode-in-use", "%s",
658 xdp_modes[dev->xdp_mode_in_use].name);
e50547b5
WT
659 smap_add_format(args, "use-need-wakeup", "%s",
660 dev->use_need_wakeup ? "true" : "false");
0de1b425
WT
661 ovs_mutex_unlock(&dev->mutex);
662 return 0;
663}
664
665int
666netdev_afxdp_reconfigure(struct netdev *netdev)
667{
668 struct netdev_linux *dev = netdev_linux_cast(netdev);
669 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
670 int err = 0;
671
672 ovs_mutex_lock(&dev->mutex);
673
674 if (netdev->n_rxq == dev->requested_n_rxq
e8f56344 675 && dev->xdp_mode == dev->requested_xdp_mode
e50547b5 676 && dev->use_need_wakeup == dev->requested_need_wakeup
f627cf1d 677 && dev->xsks) {
0de1b425
WT
678 goto out;
679 }
680
681 xsk_destroy_all(netdev);
682
683 netdev->n_rxq = dev->requested_n_rxq;
684 netdev->n_txq = netdev->n_rxq;
685
e8f56344 686 dev->xdp_mode = dev->requested_xdp_mode;
53c0bd5d 687 VLOG_INFO("%s: Setting XDP mode to %s.", netdev_get_name(netdev),
e8f56344 688 xdp_modes[dev->xdp_mode].name);
53c0bd5d
IM
689
690 if (setrlimit(RLIMIT_MEMLOCK, &r)) {
691 VLOG_ERR("setrlimit(RLIMIT_MEMLOCK) failed: %s", ovs_strerror(errno));
0de1b425 692 }
e50547b5 693 dev->use_need_wakeup = dev->requested_need_wakeup;
0de1b425
WT
694
695 err = xsk_configure_all(netdev);
696 if (err) {
e8f56344
IM
697 VLOG_ERR("%s: AF_XDP device reconfiguration failed.",
698 netdev_get_name(netdev));
0de1b425
WT
699 }
700 netdev_change_seq_changed(netdev);
701out:
702 ovs_mutex_unlock(&dev->mutex);
703 return err;
704}
705
0de1b425 706static void
e8f56344 707xsk_remove_xdp_program(uint32_t ifindex, enum afxdp_mode mode)
0de1b425 708{
e8f56344 709 uint32_t flags = xdp_modes[mode].xdp_flags | XDP_FLAGS_UPDATE_IF_NOEXIST;
37a24655
IM
710 uint32_t ret, prog_id = 0;
711
712 /* Check whether XDP program is loaded. */
713 ret = bpf_get_link_xdp_id(ifindex, &prog_id, flags);
714 if (ret) {
715 VLOG_ERR("Failed to get XDP prog id (%s)", ovs_strerror(errno));
716 return;
717 }
718
719 if (!prog_id) {
720 VLOG_INFO("No XDP program is loaded at ifindex %d", ifindex);
721 return;
722 }
0de1b425
WT
723
724 bpf_set_link_xdp_fd(ifindex, -1, flags);
725}
726
727void
728signal_remove_xdp(struct netdev *netdev)
729{
730 struct netdev_linux *dev = netdev_linux_cast(netdev);
731 int ifindex;
732
733 ifindex = linux_get_ifindex(netdev_get_name(netdev));
734
735 VLOG_WARN("Force removing xdp program.");
e8f56344 736 xsk_remove_xdp_program(ifindex, dev->xdp_mode_in_use);
0de1b425
WT
737}
738
739static struct dp_packet_afxdp *
740dp_packet_cast_afxdp(const struct dp_packet *d)
741{
742 ovs_assert(d->source == DPBUF_AFXDP);
743 return CONTAINER_OF(d, struct dp_packet_afxdp, packet);
744}
745
746static inline void
747prepare_fill_queue(struct xsk_socket_info *xsk_info)
748{
749 struct xsk_umem_info *umem;
750 void *elems[BATCH_SIZE];
751 unsigned int idx_fq;
752 int i, ret;
753
754 umem = xsk_info->umem;
755
756 if (xsk_prod_nb_free(&umem->fq, BATCH_SIZE) < BATCH_SIZE) {
757 return;
758 }
759
760 ret = umem_elem_pop_n(&umem->mpool, BATCH_SIZE, elems);
761 if (OVS_UNLIKELY(ret)) {
762 return;
763 }
764
765 if (!xsk_ring_prod__reserve(&umem->fq, BATCH_SIZE, &idx_fq)) {
766 umem_elem_push_n(&umem->mpool, BATCH_SIZE, elems);
767 COVERAGE_INC(afxdp_fq_full);
768 return;
769 }
770
771 for (i = 0; i < BATCH_SIZE; i++) {
772 uint64_t index;
773 void *elem;
774
775 elem = elems[i];
776 index = (uint64_t)((char *)elem - (char *)umem->buffer);
777 ovs_assert((index & FRAME_SHIFT_MASK) == 0);
778 *xsk_ring_prod__fill_addr(&umem->fq, idx_fq) = index;
779
780 idx_fq++;
781 }
782 xsk_ring_prod__submit(&umem->fq, BATCH_SIZE);
783 xsk_info->available_rx += BATCH_SIZE;
784}
785
786int
787netdev_afxdp_rxq_recv(struct netdev_rxq *rxq_, struct dp_packet_batch *batch,
788 int *qfill)
789{
790 struct netdev_rxq_linux *rx = netdev_rxq_linux_cast(rxq_);
791 struct netdev *netdev = rx->up.netdev;
792 struct netdev_linux *dev = netdev_linux_cast(netdev);
793 struct xsk_socket_info *xsk_info;
794 struct xsk_umem_info *umem;
795 uint32_t idx_rx = 0;
796 int qid = rxq_->queue_id;
797 unsigned int rcvd, i;
798
799 xsk_info = dev->xsks[qid];
800 if (!xsk_info || !xsk_info->xsk) {
801 return EAGAIN;
802 }
803
804 prepare_fill_queue(xsk_info);
805
806 umem = xsk_info->umem;
807 rx->fd = xsk_socket__fd(xsk_info->xsk);
808
809 rcvd = xsk_ring_cons__peek(&xsk_info->rx, BATCH_SIZE, &idx_rx);
810 if (!rcvd) {
e50547b5 811 xsk_rx_wakeup_if_needed(umem, netdev, rx->fd);
0de1b425
WT
812 return EAGAIN;
813 }
814
815 /* Setup a dp_packet batch from descriptors in RX queue. */
816 for (i = 0; i < rcvd; i++) {
817 struct dp_packet_afxdp *xpacket;
818 const struct xdp_desc *desc;
819 struct dp_packet *packet;
820 uint64_t addr, index;
821 uint32_t len;
822 char *pkt;
823
824 desc = xsk_ring_cons__rx_desc(&xsk_info->rx, idx_rx);
825 addr = desc->addr;
826 len = desc->len;
827
828 pkt = xsk_umem__get_data(umem->buffer, addr);
829 index = addr >> FRAME_SHIFT;
830 xpacket = &umem->xpool.array[index];
831 packet = &xpacket->packet;
832
833 /* Initialize the struct dp_packet. */
834 dp_packet_use_afxdp(packet, pkt,
835 FRAME_SIZE - FRAME_HEADROOM,
836 OVS_XDP_HEADROOM);
837 dp_packet_set_size(packet, len);
838
839 /* Add packet into batch, increase batch->count. */
840 dp_packet_batch_add(batch, packet);
841
842 idx_rx++;
843 }
844 /* Release the RX queue. */
845 xsk_ring_cons__release(&xsk_info->rx, rcvd);
846 xsk_info->available_rx -= rcvd;
847
848 if (qfill) {
849 /* TODO: return the number of remaining packets in the queue. */
850 *qfill = 0;
851 }
0de1b425
WT
852 return 0;
853}
854
855static inline int
e8f56344
IM
856kick_tx(struct xsk_socket_info *xsk_info, enum afxdp_mode mode,
857 bool use_need_wakeup)
0de1b425
WT
858{
859 int ret, retries;
860 static const int KERNEL_TX_BATCH_SIZE = 16;
861
e50547b5
WT
862 if (use_need_wakeup && !xsk_tx_need_wakeup(xsk_info)) {
863 return 0;
864 }
865
161773c7
IM
866 /* In all modes except native-with-zerocopy packet transmission is
867 * synchronous, and the kernel xmits only TX_BATCH_SIZE(16) packets for a
868 * single sendmsg syscall.
0de1b425
WT
869 * So, we have to kick the kernel (n_packets / 16) times to be sure that
870 * all packets are transmitted. */
161773c7 871 retries = (mode != OVS_AF_XDP_MODE_NATIVE_ZC)
0de1b425
WT
872 ? xsk_info->outstanding_tx / KERNEL_TX_BATCH_SIZE
873 : 0;
874kick_retry:
161773c7
IM
875 /* This causes system call into kernel's xsk_sendmsg, and xsk_generic_xmit
876 * (generic and native modes) or xsk_zc_xmit (native-with-zerocopy mode).
0de1b425
WT
877 */
878 ret = sendto(xsk_socket__fd(xsk_info->xsk), NULL, 0, MSG_DONTWAIT,
879 NULL, 0);
880 if (ret < 0) {
881 if (retries-- && errno == EAGAIN) {
882 goto kick_retry;
883 }
884 if (errno == ENXIO || errno == ENOBUFS || errno == EOPNOTSUPP) {
885 return errno;
886 }
887 }
888 /* No error, or EBUSY, or too many retries on EAGAIN. */
889 return 0;
890}
891
892void
893free_afxdp_buf(struct dp_packet *p)
894{
895 struct dp_packet_afxdp *xpacket;
896 uintptr_t addr;
897
898 xpacket = dp_packet_cast_afxdp(p);
899 if (xpacket->mpool) {
900 void *base = dp_packet_base(p);
901
902 addr = (uintptr_t)base & (~FRAME_SHIFT_MASK);
903 umem_elem_push(xpacket->mpool, (void *)addr);
904 }
905}
906
907static void
908free_afxdp_buf_batch(struct dp_packet_batch *batch)
909{
910 struct dp_packet_afxdp *xpacket = NULL;
911 struct dp_packet *packet;
912 void *elems[BATCH_SIZE];
913 uintptr_t addr;
914
915 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
916 void *base;
917
918 xpacket = dp_packet_cast_afxdp(packet);
919 base = dp_packet_base(packet);
920 addr = (uintptr_t)base & (~FRAME_SHIFT_MASK);
921 elems[i] = (void *)addr;
922 }
940ac2ce 923 umem_elem_push_n(xpacket->mpool, dp_packet_batch_size(batch), elems);
0de1b425
WT
924 dp_packet_batch_init(batch);
925}
926
927static inline bool
928check_free_batch(struct dp_packet_batch *batch)
929{
930 struct umem_pool *first_mpool = NULL;
931 struct dp_packet_afxdp *xpacket;
932 struct dp_packet *packet;
933
934 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
935 if (packet->source != DPBUF_AFXDP) {
936 return false;
937 }
938 xpacket = dp_packet_cast_afxdp(packet);
939 if (i == 0) {
940 first_mpool = xpacket->mpool;
941 continue;
942 }
943 if (xpacket->mpool != first_mpool) {
944 return false;
945 }
946 }
947 /* All packets are DPBUF_AFXDP and from the same mpool. */
948 return true;
949}
950
951static inline void
952afxdp_complete_tx(struct xsk_socket_info *xsk_info)
953{
954 void *elems_push[BATCH_SIZE];
955 struct xsk_umem_info *umem;
956 uint32_t idx_cq = 0;
957 int tx_to_free = 0;
958 int tx_done, j;
959
960 umem = xsk_info->umem;
961 tx_done = xsk_ring_cons__peek(&umem->cq, CONS_NUM_DESCS, &idx_cq);
962
963 /* Recycle back to umem pool. */
964 for (j = 0; j < tx_done; j++) {
965 uint64_t *addr;
966 void *elem;
967
968 addr = (uint64_t *)xsk_ring_cons__comp_addr(&umem->cq, idx_cq++);
05629ed2
EC
969 if (*addr != UINT64_MAX) {
970 elem = ALIGNED_CAST(void *, (char *)umem->buffer + *addr);
971 elems_push[tx_to_free] = elem;
972 *addr = UINT64_MAX; /* Mark as pushed. */
973 tx_to_free++;
974 } else {
0de1b425
WT
975 /* The elem has been pushed already. */
976 COVERAGE_INC(afxdp_cq_skip);
0de1b425 977 }
0de1b425
WT
978
979 if (tx_to_free == BATCH_SIZE || j == tx_done - 1) {
980 umem_elem_push_n(&umem->mpool, tx_to_free, elems_push);
981 xsk_info->outstanding_tx -= tx_to_free;
982 tx_to_free = 0;
983 }
984 }
985
986 if (tx_done > 0) {
987 xsk_ring_cons__release(&umem->cq, tx_done);
988 } else {
989 COVERAGE_INC(afxdp_cq_empty);
990 }
991}
992
993static inline int
994__netdev_afxdp_batch_send(struct netdev *netdev, int qid,
995 struct dp_packet_batch *batch)
996{
997 struct netdev_linux *dev = netdev_linux_cast(netdev);
998 struct xsk_socket_info *xsk_info;
999 void *elems_pop[BATCH_SIZE];
1000 struct xsk_umem_info *umem;
1001 struct dp_packet *packet;
1002 bool free_batch = false;
1003 unsigned long orig;
1004 uint32_t idx = 0;
1005 int error = 0;
1006 int ret;
1007
1008 xsk_info = dev->xsks[qid];
1009 if (!xsk_info || !xsk_info->xsk) {
1010 goto out;
1011 }
1012
1013 afxdp_complete_tx(xsk_info);
1014
1015 free_batch = check_free_batch(batch);
1016
1017 umem = xsk_info->umem;
940ac2ce
PC
1018 ret = umem_elem_pop_n(&umem->mpool, dp_packet_batch_size(batch),
1019 elems_pop);
0de1b425 1020 if (OVS_UNLIKELY(ret)) {
940ac2ce
PC
1021 atomic_add_relaxed(&xsk_info->tx_dropped, dp_packet_batch_size(batch),
1022 &orig);
0de1b425
WT
1023 VLOG_WARN_RL(&rl, "%s: send failed due to exhausted memory pool.",
1024 netdev_get_name(netdev));
1025 error = ENOMEM;
1026 goto out;
1027 }
1028
1029 /* Make sure we have enough TX descs. */
940ac2ce
PC
1030 ret = xsk_ring_prod__reserve(&xsk_info->tx, dp_packet_batch_size(batch),
1031 &idx);
0de1b425 1032 if (OVS_UNLIKELY(ret == 0)) {
940ac2ce
PC
1033 umem_elem_push_n(&umem->mpool, dp_packet_batch_size(batch), elems_pop);
1034 atomic_add_relaxed(&xsk_info->tx_dropped, dp_packet_batch_size(batch),
1035 &orig);
0de1b425
WT
1036 COVERAGE_INC(afxdp_tx_full);
1037 afxdp_complete_tx(xsk_info);
e8f56344 1038 kick_tx(xsk_info, dev->xdp_mode_in_use, dev->use_need_wakeup);
0de1b425
WT
1039 error = ENOMEM;
1040 goto out;
1041 }
1042
1043 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
1044 uint64_t index;
1045 void *elem;
1046
1047 elem = elems_pop[i];
1048 /* Copy the packet to the umem we just pop from umem pool.
1049 * TODO: avoid this copy if the packet and the pop umem
1050 * are located in the same umem.
1051 */
1052 memcpy(elem, dp_packet_data(packet), dp_packet_size(packet));
1053
1054 index = (uint64_t)((char *)elem - (char *)umem->buffer);
1055 xsk_ring_prod__tx_desc(&xsk_info->tx, idx + i)->addr = index;
1056 xsk_ring_prod__tx_desc(&xsk_info->tx, idx + i)->len
1057 = dp_packet_size(packet);
1058 }
940ac2ce
PC
1059 xsk_ring_prod__submit(&xsk_info->tx, dp_packet_batch_size(batch));
1060 xsk_info->outstanding_tx += dp_packet_batch_size(batch);
0de1b425 1061
e8f56344 1062 ret = kick_tx(xsk_info, dev->xdp_mode_in_use, dev->use_need_wakeup);
0de1b425
WT
1063 if (OVS_UNLIKELY(ret)) {
1064 VLOG_WARN_RL(&rl, "%s: error sending AF_XDP packet: %s.",
1065 netdev_get_name(netdev), ovs_strerror(ret));
1066 }
1067
1068out:
1069 if (free_batch) {
1070 free_afxdp_buf_batch(batch);
1071 } else {
1072 dp_packet_delete_batch(batch, true);
1073 }
1074
1075 return error;
1076}
1077
1078int
1079netdev_afxdp_batch_send(struct netdev *netdev, int qid,
1080 struct dp_packet_batch *batch,
1081 bool concurrent_txq)
1082{
1083 struct netdev_linux *dev;
1084 int ret;
1085
1086 if (concurrent_txq) {
1087 dev = netdev_linux_cast(netdev);
1088 qid = qid % netdev_n_txq(netdev);
1089
28d05016 1090 ovs_spin_lock(&dev->tx_locks[qid].lock);
0de1b425 1091 ret = __netdev_afxdp_batch_send(netdev, qid, batch);
28d05016 1092 ovs_spin_unlock(&dev->tx_locks[qid].lock);
0de1b425
WT
1093 } else {
1094 ret = __netdev_afxdp_batch_send(netdev, qid, batch);
1095 }
1096
1097 return ret;
1098}
1099
1100int
1101netdev_afxdp_rxq_construct(struct netdev_rxq *rxq_ OVS_UNUSED)
1102{
1103 /* Done at reconfigure. */
1104 return 0;
1105}
1106
1107void
1108netdev_afxdp_rxq_destruct(struct netdev_rxq *rxq_ OVS_UNUSED)
1109{
1110 /* Nothing. */
1111}
1112
7bf075d9
WT
1113static int
1114libbpf_print(enum libbpf_print_level level,
1115 const char *format, va_list args)
1116{
1117 if (level == LIBBPF_WARN) {
1118 vlog_valist(&this_module, VLL_WARN, format, args);
1119 } else if (level == LIBBPF_INFO) {
1120 vlog_valist(&this_module, VLL_INFO, format, args);
1121 } else {
1122 vlog_valist(&this_module, VLL_DBG, format, args);
1123 }
1124 return 0;
1125}
1126
1127int netdev_afxdp_init(void)
1128{
1129 libbpf_set_print(libbpf_print);
1130 return 0;
1131}
1132
f627cf1d
IM
1133int
1134netdev_afxdp_construct(struct netdev *netdev)
1135{
1136 struct netdev_linux *dev = netdev_linux_cast(netdev);
1137 int ret;
1138
1139 /* Configure common netdev-linux first. */
1140 ret = netdev_linux_construct(netdev);
1141 if (ret) {
1142 return ret;
1143 }
1144
1145 /* Queues should not be used before the first reconfiguration. Clearing. */
1146 netdev->n_rxq = 0;
1147 netdev->n_txq = 0;
e8f56344
IM
1148 dev->xdp_mode = OVS_AF_XDP_MODE_UNSPEC;
1149 dev->xdp_mode_in_use = OVS_AF_XDP_MODE_UNSPEC;
f627cf1d
IM
1150
1151 dev->requested_n_rxq = NR_QUEUE;
e8f56344 1152 dev->requested_xdp_mode = OVS_AF_XDP_MODE_BEST_EFFORT;
e50547b5 1153 dev->requested_need_wakeup = NEED_WAKEUP_DEFAULT;
f627cf1d
IM
1154
1155 dev->xsks = NULL;
1156 dev->tx_locks = NULL;
1157
1158 netdev_request_reconfigure(netdev);
1159 return 0;
1160}
1161
0de1b425
WT
1162void
1163netdev_afxdp_destruct(struct netdev *netdev)
1164{
1165 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
1166 struct netdev_linux *dev = netdev_linux_cast(netdev);
1167
1168 if (ovsthread_once_start(&once)) {
1169 fatal_signal_add_hook(netdev_afxdp_sweep_unused_pools,
1170 NULL, NULL, true);
1171 ovsthread_once_done(&once);
1172 }
1173
1174 /* Note: tc is by-passed when using drv-mode, but when using
1175 * skb-mode, we might need to clean up tc. */
1176
1177 xsk_destroy_all(netdev);
1178 ovs_mutex_destroy(&dev->mutex);
1179}
1180
52b5a5c0
EC
1181int
1182netdev_afxdp_verify_mtu_size(const struct netdev *netdev OVS_UNUSED, int mtu)
1183{
1184 /*
1185 * If a device is used in xdpmode skb, no driver-specific MTU size is
1186 * checked and any value is allowed resulting in packet drops.
1187 * This check will verify the maximum supported value based on the
1188 * buffer size allocated and the additional headroom required.
1189 */
1190 if (mtu > (FRAME_SIZE - OVS_XDP_HEADROOM -
1191 XDP_PACKET_HEADROOM - VLAN_ETH_HEADER_LEN)) {
1192 return EINVAL;
1193 }
1194
1195 return 0;
1196}
1197
d560bc1b
IM
1198int
1199netdev_afxdp_get_custom_stats(const struct netdev *netdev,
1200 struct netdev_custom_stats *custom_stats)
1201{
1202 struct netdev_linux *dev = netdev_linux_cast(netdev);
1203 struct xsk_socket_info *xsk_info;
1204 struct xdp_statistics stat;
1205 uint32_t i, c = 0;
1206 socklen_t optlen;
1207
1208 ovs_mutex_lock(&dev->mutex);
1209
1210#define XDP_CSTATS \
1211 XDP_CSTAT(rx_dropped) \
1212 XDP_CSTAT(rx_invalid_descs) \
1213 XDP_CSTAT(tx_invalid_descs)
1214
1215#define XDP_CSTAT(NAME) + 1
1216 enum { N_XDP_CSTATS = XDP_CSTATS };
1217#undef XDP_CSTAT
1218
1219 custom_stats->counters = xcalloc(netdev_n_rxq(netdev) * N_XDP_CSTATS,
1220 sizeof *custom_stats->counters);
1221
1222 /* Account the stats for each xsk. */
1223 for (i = 0; i < netdev_n_rxq(netdev); i++) {
1224 xsk_info = dev->xsks[i];
1225 optlen = sizeof stat;
1226
1227 if (xsk_info && !getsockopt(xsk_socket__fd(xsk_info->xsk), SOL_XDP,
1228 XDP_STATISTICS, &stat, &optlen)) {
1229#define XDP_CSTAT(NAME) \
1230 snprintf(custom_stats->counters[c].name, \
1231 NETDEV_CUSTOM_STATS_NAME_SIZE, \
1232 "xsk_queue_%d_" #NAME, i); \
1233 custom_stats->counters[c++].value = stat.NAME;
1234 XDP_CSTATS;
1235#undef XDP_CSTAT
1236 }
1237 }
1238 custom_stats->size = c;
1239 ovs_mutex_unlock(&dev->mutex);
1240
1241 return 0;
1242}
1243
0de1b425
WT
1244int
1245netdev_afxdp_get_stats(const struct netdev *netdev,
1246 struct netdev_stats *stats)
1247{
1248 struct netdev_linux *dev = netdev_linux_cast(netdev);
1249 struct xsk_socket_info *xsk_info;
1250 struct netdev_stats dev_stats;
1251 int error, i;
1252
1253 ovs_mutex_lock(&dev->mutex);
1254
1255 error = get_stats_via_netlink(netdev, &dev_stats);
1256 if (error) {
1257 VLOG_WARN_RL(&rl, "%s: Error getting AF_XDP statistics.",
1258 netdev_get_name(netdev));
1259 } else {
1260 /* Use kernel netdev's packet and byte counts. */
1261 stats->rx_packets = dev_stats.rx_packets;
1262 stats->rx_bytes = dev_stats.rx_bytes;
1263 stats->tx_packets = dev_stats.tx_packets;
1264 stats->tx_bytes = dev_stats.tx_bytes;
1265
1266 stats->rx_errors += dev_stats.rx_errors;
1267 stats->tx_errors += dev_stats.tx_errors;
1268 stats->rx_dropped += dev_stats.rx_dropped;
1269 stats->tx_dropped += dev_stats.tx_dropped;
1270 stats->multicast += dev_stats.multicast;
1271 stats->collisions += dev_stats.collisions;
1272 stats->rx_length_errors += dev_stats.rx_length_errors;
1273 stats->rx_over_errors += dev_stats.rx_over_errors;
1274 stats->rx_crc_errors += dev_stats.rx_crc_errors;
1275 stats->rx_frame_errors += dev_stats.rx_frame_errors;
1276 stats->rx_fifo_errors += dev_stats.rx_fifo_errors;
1277 stats->rx_missed_errors += dev_stats.rx_missed_errors;
1278 stats->tx_aborted_errors += dev_stats.tx_aborted_errors;
1279 stats->tx_carrier_errors += dev_stats.tx_carrier_errors;
1280 stats->tx_fifo_errors += dev_stats.tx_fifo_errors;
1281 stats->tx_heartbeat_errors += dev_stats.tx_heartbeat_errors;
1282 stats->tx_window_errors += dev_stats.tx_window_errors;
1283
1284 /* Account the dropped in each xsk. */
1285 for (i = 0; i < netdev_n_rxq(netdev); i++) {
1286 xsk_info = dev->xsks[i];
1287 if (xsk_info) {
1288 uint64_t tx_dropped;
1289
1290 atomic_read_relaxed(&xsk_info->tx_dropped, &tx_dropped);
1291 stats->tx_dropped += tx_dropped;
1292 }
1293 }
1294 }
1295 ovs_mutex_unlock(&dev->mutex);
1296
1297 return error;
1298}